diff options
Diffstat (limited to 'include/linux/sunrpc')
25 files changed, 1542 insertions, 771 deletions
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 98da816b5fc2..61e58327b1aa 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -99,6 +99,7 @@ struct rpc_auth_create_args { /* Flags for rpcauth_lookupcred() */ #define RPCAUTH_LOOKUP_NEW 0x01 /* Accept an uninitialised cred */ +#define RPCAUTH_LOOKUP_ASYNC 0x02 /* Don't block waiting for memory */ /* * Client authentication ops @@ -119,6 +120,7 @@ struct rpc_authops { struct rpcsec_gss_info *); int (*key_timeout)(struct rpc_auth *, struct rpc_cred *); + int (*ping)(struct rpc_clnt *clnt); }; struct rpc_credops { @@ -143,6 +145,7 @@ struct rpc_credops { extern const struct rpc_authops authunix_ops; extern const struct rpc_authops authnull_ops; +extern const struct rpc_authops authtls_ops; int __init rpc_init_authunix(void); int __init rpcauth_init_module(void); diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index f07c334c599f..f22bf915dcf6 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -1,22 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** (c) 2008 NetApp. All Rights Reserved. -NetApp provides this source code under the GPL v2 License. -The GPL v2 license is available at -https://opensource.org/licenses/gpl-license.php. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ @@ -34,7 +20,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef CONFIG_SUNRPC_BACKCHANNEL struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid); void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); -void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task); +void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task, + const struct rpc_timeout *to); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); @@ -83,4 +70,3 @@ static inline void xprt_free_bc_request(struct rpc_rqst *req) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ #endif /* _LINUX_SUNRPC_BC_XPRT_H */ - diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index d0965e2997b0..e783132e481f 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -14,6 +14,7 @@ #include <linux/kref.h> #include <linux/slab.h> #include <linux/atomic.h> +#include <linux/kstrtox.h> #include <linux/proc_fs.h> /* @@ -55,10 +56,14 @@ struct cache_head { struct kref ref; unsigned long flags; }; -#define CACHE_VALID 0 /* Entry contains valid data */ -#define CACHE_NEGATIVE 1 /* Negative entry - there is no match for the key */ -#define CACHE_PENDING 2 /* An upcall has been sent but no reply received yet*/ -#define CACHE_CLEANED 3 /* Entry has been cleaned from cache */ + +/* cache_head.flags */ +enum { + CACHE_VALID, /* Entry contains valid data */ + CACHE_NEGATIVE, /* Negative entry - there is no match for the key */ + CACHE_PENDING, /* An upcall has been sent but no reply received yet*/ + CACHE_CLEANED, /* Entry has been cleaned from cache */ +}; #define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */ @@ -120,17 +125,17 @@ struct cache_detail { struct net *net; }; - /* this must be embedded in any request structure that * identifies an object that will want a callback on * a cache fill */ struct cache_req { struct cache_deferred_req *(*defer)(struct cache_req *req); - int thread_wait; /* How long (jiffies) we can block the - * current thread to wait for updates. - */ + unsigned long thread_wait; /* How long (jiffies) we can block the + * current thread to wait for updates. + */ }; + /* this must be embedded in a deferred_request that is being * delayed awaiting cache-fill */ @@ -217,6 +222,8 @@ static inline bool cache_is_expired(struct cache_detail *detail, struct cache_he return detail->flush_time >= h->last_refresh; } +extern int cache_check_rcu(struct cache_detail *detail, + struct cache_head *h, struct cache_req *rqstp); extern int cache_check(struct cache_detail *detail, struct cache_head *h, struct cache_req *rqstp); extern void cache_flush(void); @@ -299,17 +306,18 @@ static inline int get_time(char **bpp, time64_t *time) return 0; } -static inline time64_t get_expiry(char **bpp) +static inline int get_expiry(char **bpp, time64_t *rvp) { - time64_t rv; + int error; struct timespec64 boot; - if (get_time(bpp, &rv)) - return 0; - if (rv < 0) - return 0; + error = get_time(bpp, rvp); + if (error) + return error; + getboottime64(&boot); - return rv - boot.tv_sec; + (*rvp) -= boot.tv_sec; + return 0; } #endif /* _LINUX_SUNRPC_CACHE_H_ */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 02e7a5863d28..f8b406b0a1af 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -14,6 +14,7 @@ #include <linux/socket.h> #include <linux/in.h> #include <linux/in6.h> +#include <linux/refcount.h> #include <linux/sunrpc/msg_prot.h> #include <linux/sunrpc/sched.h> @@ -29,15 +30,23 @@ #include <linux/sunrpc/xprtmultipath.h> struct rpc_inode; +struct rpc_sysfs_client { + struct kobject kobject; + struct net *net; + struct rpc_clnt *clnt; + struct rpc_xprt_switch *xprt_switch; +}; + /* * The high-level client handle */ struct rpc_clnt { - atomic_t cl_count; /* Number of references */ + refcount_t cl_count; /* Number of references */ unsigned int cl_clid; /* client id */ struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ + atomic_t cl_pid; /* task PID counter */ spinlock_t cl_lock; /* spinlock */ struct rpc_xprt __rcu * cl_xprt; /* transport */ const struct rpc_procinfo *cl_procinfo; /* procedure info */ @@ -54,7 +63,11 @@ struct rpc_clnt { cl_discrtry : 1,/* disconnect before retry */ cl_noretranstimeo: 1,/* No retransmit timeouts */ cl_autobind : 1,/* use getport() */ - cl_chatty : 1;/* be verbose */ + cl_chatty : 1,/* be verbose */ + cl_shutdown : 1,/* rpc immediate -EIO */ + cl_netunreach_fatal : 1; + /* Treat ENETUNREACH errors as fatal */ + struct xprtsec_parms cl_xprtsec; /* transport security policy */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ const struct rpc_timeout *cl_timeout; /* Timeout strategy */ @@ -71,6 +84,7 @@ struct rpc_clnt { #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct dentry *cl_debugfs; /* debugfs directory */ #endif + struct rpc_sysfs_client *cl_sysfs; /* sysfs directory */ /* cl_work is only needed after cl_xpi is no longer used, * and that are of similar size */ @@ -79,6 +93,9 @@ struct rpc_clnt { struct work_struct cl_work; }; const struct cred *cl_cred; + unsigned int cl_max_connect; /* max number of transports not to the same IP */ + struct super_block *pipefs_sb; + atomic_t cl_task_count; }; /* @@ -125,6 +142,7 @@ struct rpc_create_args { const char *servername; const char *nodename; const struct rpc_program *program; + struct rpc_stat *stats; u32 prognumber; /* overrides program->number */ u32 version; rpc_authflavor_t authflavor; @@ -133,6 +151,10 @@ struct rpc_create_args { char *client_name; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ const struct cred *cred; + unsigned int max_connect; + struct xprtsec_parms xprtsec; + unsigned long connect_timeout; + unsigned long reconnect_timeout; }; struct rpc_add_xprt_test { @@ -154,6 +176,8 @@ struct rpc_add_xprt_test { #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) #define RPC_CLNT_CREATE_SOFTERR (1UL << 10) #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) +#define RPC_CLNT_CREATE_CONNECTED (1UL << 12) +#define RPC_CLNT_CREATE_NETUNREACH_FATAL (1UL << 13) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, @@ -227,13 +251,18 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *, struct rpc_xprt_switch *, struct rpc_xprt *, void *); +void rpc_clnt_manage_trunked_xprts(struct rpc_clnt *); +void rpc_clnt_probe_trunked_xprts(struct rpc_clnt *, + struct rpc_add_xprt_test *); const char *rpc_proc_name(const struct rpc_task *task); -void rpc_clnt_xprt_switch_put(struct rpc_clnt *); void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); +void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *, struct rpc_xprt *); bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, const struct sockaddr *sap); +void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt); +void rpc_clnt_disconnect(struct rpc_clnt *clnt); void rpc_cleanup_clids(void); static inline int rpc_reply_expected(struct rpc_task *task) diff --git a/include/linux/sunrpc/gss_asn1.h b/include/linux/sunrpc/gss_asn1.h deleted file mode 100644 index 3ccecd0ad229..000000000000 --- a/include/linux/sunrpc/gss_asn1.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * linux/include/linux/sunrpc/gss_asn1.h - * - * minimal asn1 for generic encoding/decoding of gss tokens - * - * Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h, - * lib/gssapi/krb5/gssapiP_krb5.h, and others - * - * Copyright (c) 2000 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson <andros@umich.edu> - */ - -/* - * Copyright 1995 by the Massachusetts Institute of Technology. - * All Rights Reserved. - * - * Export of this software from the United States of America may - * require a specific license from the United States Government. - * It is the responsibility of any person or organization contemplating - * export to obtain such a license before exporting. - * - * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and - * distribute this software and its documentation for any purpose and - * without fee is hereby granted, provided that the above copyright - * notice appear in all copies and that both that copyright notice and - * this permission notice appear in supporting documentation, and that - * the name of M.I.T. not be used in advertising or publicity pertaining - * to distribution of the software without specific, written prior - * permission. Furthermore if you modify this software you must label - * your software as modified software and not distribute it in such a - * fashion that it might be confused with the original M.I.T. software. - * M.I.T. makes no representations about the suitability of - * this software for any purpose. It is provided "as is" without express - * or implied warranty. - * - */ - - -#include <linux/sunrpc/gss_api.h> - -#define SIZEOF_INT 4 - -/* from gssapi_err_generic.h */ -#define G_BAD_SERVICE_NAME (-2045022976L) -#define G_BAD_STRING_UID (-2045022975L) -#define G_NOUSER (-2045022974L) -#define G_VALIDATE_FAILED (-2045022973L) -#define G_BUFFER_ALLOC (-2045022972L) -#define G_BAD_MSG_CTX (-2045022971L) -#define G_WRONG_SIZE (-2045022970L) -#define G_BAD_USAGE (-2045022969L) -#define G_UNKNOWN_QOP (-2045022968L) -#define G_NO_HOSTNAME (-2045022967L) -#define G_BAD_HOSTNAME (-2045022966L) -#define G_WRONG_MECH (-2045022965L) -#define G_BAD_TOK_HEADER (-2045022964L) -#define G_BAD_DIRECTION (-2045022963L) -#define G_TOK_TRUNC (-2045022962L) -#define G_REFLECT (-2045022961L) -#define G_WRONG_TOKID (-2045022960L) - -#define g_OID_equal(o1,o2) \ - (((o1)->len == (o2)->len) && \ - (memcmp((o1)->data,(o2)->data,(int) (o1)->len) == 0)) - -u32 g_verify_token_header( - struct xdr_netobj *mech, - int *body_size, - unsigned char **buf_in, - int toksize); - -int g_token_size( - struct xdr_netobj *mech, - unsigned int body_size); - -void g_make_token_header( - struct xdr_netobj *mech, - int body_size, - unsigned char **buf); diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 91f43d86879d..43950b5237c8 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -1,6 +1,4 @@ /* - * linux/include/linux/sunrpc/gss_krb5_types.h - * * Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h, * lib/gssapi/krb5/gssapiP_krb5.h, and others * @@ -36,88 +34,25 @@ * */ +#ifndef _LINUX_SUNRPC_GSS_KRB5_H +#define _LINUX_SUNRPC_GSS_KRB5_H + #include <crypto/skcipher.h> #include <linux/sunrpc/auth_gss.h> #include <linux/sunrpc/gss_err.h> -#include <linux/sunrpc/gss_asn1.h> /* Length of constant used in key derivation */ #define GSS_KRB5_K5CLENGTH (5) -/* Maximum key length (in bytes) for the supported crypto algorithms*/ +/* Maximum key length (in bytes) for the supported crypto algorithms */ #define GSS_KRB5_MAX_KEYLEN (32) -/* Maximum checksum function output for the supported crypto algorithms */ -#define GSS_KRB5_MAX_CKSUM_LEN (20) +/* Maximum checksum function output for the supported enctypes */ +#define GSS_KRB5_MAX_CKSUM_LEN (24) /* Maximum blocksize for the supported crypto algorithms */ #define GSS_KRB5_MAX_BLOCKSIZE (16) -struct krb5_ctx; - -struct gss_krb5_enctype { - const u32 etype; /* encryption (key) type */ - const u32 ctype; /* checksum type */ - const char *name; /* "friendly" name */ - const char *encrypt_name; /* crypto encrypt name */ - const char *cksum_name; /* crypto checksum name */ - const u16 signalg; /* signing algorithm */ - const u16 sealalg; /* sealing algorithm */ - const u32 blocksize; /* encryption blocksize */ - const u32 conflen; /* confounder length - (normally the same as - the blocksize) */ - const u32 cksumlength; /* checksum length */ - const u32 keyed_cksum; /* is it a keyed cksum? */ - const u32 keybytes; /* raw key len, in bytes */ - const u32 keylength; /* final key len, in bytes */ - u32 (*encrypt) (struct crypto_sync_skcipher *tfm, - void *iv, void *in, void *out, - int length); /* encryption function */ - u32 (*decrypt) (struct crypto_sync_skcipher *tfm, - void *iv, void *in, void *out, - int length); /* decryption function */ - u32 (*mk_key) (const struct gss_krb5_enctype *gk5e, - struct xdr_netobj *in, - struct xdr_netobj *out); /* complete key generation */ - u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset, - struct xdr_buf *buf, - struct page **pages); /* v2 encryption function */ - u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset, u32 len, - struct xdr_buf *buf, u32 *headskip, - u32 *tailskip); /* v2 decryption function */ -}; - -/* krb5_ctx flags definitions */ -#define KRB5_CTX_FLAG_INITIATOR 0x00000001 -#define KRB5_CTX_FLAG_CFX 0x00000002 -#define KRB5_CTX_FLAG_ACCEPTOR_SUBKEY 0x00000004 - -struct krb5_ctx { - int initiate; /* 1 = initiating, 0 = accepting */ - u32 enctype; - u32 flags; - const struct gss_krb5_enctype *gk5e; /* enctype-specific info */ - struct crypto_sync_skcipher *enc; - struct crypto_sync_skcipher *seq; - struct crypto_sync_skcipher *acceptor_enc; - struct crypto_sync_skcipher *initiator_enc; - struct crypto_sync_skcipher *acceptor_enc_aux; - struct crypto_sync_skcipher *initiator_enc_aux; - u8 Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */ - u8 cksum[GSS_KRB5_MAX_KEYLEN]; - atomic_t seq_send; - atomic64_t seq_send64; - time64_t endtime; - struct xdr_netobj mech_used; - u8 initiator_sign[GSS_KRB5_MAX_KEYLEN]; - u8 acceptor_sign[GSS_KRB5_MAX_KEYLEN]; - u8 initiator_seal[GSS_KRB5_MAX_KEYLEN]; - u8 acceptor_seal[GSS_KRB5_MAX_KEYLEN]; - u8 initiator_integ[GSS_KRB5_MAX_KEYLEN]; - u8 acceptor_integ[GSS_KRB5_MAX_KEYLEN]; -}; - /* The length of the Kerberos GSS token header */ #define GSS_KRB5_TOK_HDR_LEN (16) @@ -150,6 +85,12 @@ enum seal_alg { SEAL_ALG_DES3KD = 0x0002 }; +/* + * These values are assigned by IANA and published via the + * subregistry at the link below: + * + * https://www.iana.org/assignments/kerberos-parameters/kerberos-parameters.xhtml#kerberos-parameters-2 + */ #define CKSUMTYPE_CRC32 0x0001 #define CKSUMTYPE_RSA_MD4 0x0002 #define CKSUMTYPE_RSA_MD4_DES 0x0003 @@ -160,6 +101,10 @@ enum seal_alg { #define CKSUMTYPE_HMAC_SHA1_DES3 0x000c #define CKSUMTYPE_HMAC_SHA1_96_AES128 0x000f #define CKSUMTYPE_HMAC_SHA1_96_AES256 0x0010 +#define CKSUMTYPE_CMAC_CAMELLIA128 0x0011 +#define CKSUMTYPE_CMAC_CAMELLIA256 0x0012 +#define CKSUMTYPE_HMAC_SHA256_128_AES128 0x0013 +#define CKSUMTYPE_HMAC_SHA384_192_AES256 0x0014 #define CKSUMTYPE_HMAC_MD5_ARCFOUR -138 /* Microsoft md5 hmac cksumtype */ /* from gssapi_err_krb5.h */ @@ -180,6 +125,11 @@ enum seal_alg { /* per Kerberos v5 protocol spec crypto types from the wire. * these get mapped to linux kernel crypto routines. + * + * These values are assigned by IANA and published via the + * subregistry at the link below: + * + * https://www.iana.org/assignments/kerberos-parameters/kerberos-parameters.xhtml#kerberos-parameters-1 */ #define ENCTYPE_NULL 0x0000 #define ENCTYPE_DES_CBC_CRC 0x0001 /* DES cbc mode with CRC-32 */ @@ -193,8 +143,12 @@ enum seal_alg { #define ENCTYPE_DES3_CBC_SHA1 0x0010 #define ENCTYPE_AES128_CTS_HMAC_SHA1_96 0x0011 #define ENCTYPE_AES256_CTS_HMAC_SHA1_96 0x0012 +#define ENCTYPE_AES128_CTS_HMAC_SHA256_128 0x0013 +#define ENCTYPE_AES256_CTS_HMAC_SHA384_192 0x0014 #define ENCTYPE_ARCFOUR_HMAC 0x0017 #define ENCTYPE_ARCFOUR_HMAC_EXP 0x0018 +#define ENCTYPE_CAMELLIA128_CTS_CMAC 0x0019 +#define ENCTYPE_CAMELLIA256_CTS_CMAC 0x001A #define ENCTYPE_UNKNOWN 0x01ff /* @@ -216,103 +170,4 @@ enum seal_alg { #define KG_USAGE_INITIATOR_SEAL (24) #define KG_USAGE_INITIATOR_SIGN (25) -/* - * This compile-time check verifies that we will not exceed the - * slack space allotted by the client and server auth_gss code - * before they call gss_wrap(). - */ -#define GSS_KRB5_MAX_SLACK_NEEDED \ - (GSS_KRB5_TOK_HDR_LEN /* gss token header */ \ - + GSS_KRB5_MAX_CKSUM_LEN /* gss token checksum */ \ - + GSS_KRB5_MAX_BLOCKSIZE /* confounder */ \ - + GSS_KRB5_MAX_BLOCKSIZE /* possible padding */ \ - + GSS_KRB5_TOK_HDR_LEN /* encrypted hdr in v2 token */\ - + GSS_KRB5_MAX_CKSUM_LEN /* encryption hmac */ \ - + 4 + 4 /* RPC verifier */ \ - + GSS_KRB5_TOK_HDR_LEN \ - + GSS_KRB5_MAX_CKSUM_LEN) - -u32 -make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, - struct xdr_buf *body, int body_offset, u8 *cksumkey, - unsigned int usage, struct xdr_netobj *cksumout); - -u32 -make_checksum_v2(struct krb5_ctx *, char *header, int hdrlen, - struct xdr_buf *body, int body_offset, u8 *key, - unsigned int usage, struct xdr_netobj *cksum); - -u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *, - struct xdr_netobj *); - -u32 gss_verify_mic_kerberos(struct gss_ctx *, struct xdr_buf *, - struct xdr_netobj *); - -u32 -gss_wrap_kerberos(struct gss_ctx *ctx_id, int offset, - struct xdr_buf *outbuf, struct page **pages); - -u32 -gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, int len, - struct xdr_buf *buf); - - -u32 -krb5_encrypt(struct crypto_sync_skcipher *key, - void *iv, void *in, void *out, int length); - -u32 -krb5_decrypt(struct crypto_sync_skcipher *key, - void *iv, void *in, void *out, int length); - -int -gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *outbuf, - int offset, struct page **pages); - -int -gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *inbuf, - int offset); - -s32 -krb5_make_seq_num(struct krb5_ctx *kctx, - struct crypto_sync_skcipher *key, - int direction, - u32 seqnum, unsigned char *cksum, unsigned char *buf); - -s32 -krb5_get_seq_num(struct krb5_ctx *kctx, - unsigned char *cksum, - unsigned char *buf, int *direction, u32 *seqnum); - -int -xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen); - -u32 -krb5_derive_key(const struct gss_krb5_enctype *gk5e, - const struct xdr_netobj *inkey, - struct xdr_netobj *outkey, - const struct xdr_netobj *in_constant, - gfp_t gfp_mask); - -u32 -gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e, - struct xdr_netobj *randombits, - struct xdr_netobj *key); - -u32 -gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e, - struct xdr_netobj *randombits, - struct xdr_netobj *key); - -u32 -gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, - struct xdr_buf *buf, - struct page **pages); - -u32 -gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len, - struct xdr_buf *buf, u32 *plainoffset, - u32 *plainlen); - -void -gss_krb5_make_confounder(char *p, u32 conflen); +#endif /* _LINUX_SUNRPC_GSS_KRB5_H */ diff --git a/include/linux/sunrpc/gss_krb5_enctypes.h b/include/linux/sunrpc/gss_krb5_enctypes.h deleted file mode 100644 index 87eea679d750..000000000000 --- a/include/linux/sunrpc/gss_krb5_enctypes.h +++ /dev/null @@ -1,41 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Define the string that exports the set of kernel-supported - * Kerberos enctypes. This list is sent via upcall to gssd, and - * is also exposed via the nfsd /proc API. The consumers generally - * treat this as an ordered list, where the first item in the list - * is the most preferred. - */ - -#ifndef _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H -#define _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H - -#ifdef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES - -/* - * NB: This list includes DES3_CBC_SHA1, which was deprecated by RFC 8429. - * - * ENCTYPE_AES256_CTS_HMAC_SHA1_96 - * ENCTYPE_AES128_CTS_HMAC_SHA1_96 - * ENCTYPE_DES3_CBC_SHA1 - */ -#define KRB5_SUPPORTED_ENCTYPES "18,17,16" - -#else /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */ - -/* - * NB: This list includes encryption types that were deprecated - * by RFC 8429 and RFC 6649. - * - * ENCTYPE_AES256_CTS_HMAC_SHA1_96 - * ENCTYPE_AES128_CTS_HMAC_SHA1_96 - * ENCTYPE_DES3_CBC_SHA1 - * ENCTYPE_DES_CBC_MD5 - * ENCTYPE_DES_CBC_CRC - * ENCTYPE_DES_CBC_MD4 - */ -#define KRB5_SUPPORTED_ENCTYPES "18,17,16,3,1,2" - -#endif /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */ - -#endif /* _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H */ diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 43f854487539..c4b0eb2b2f04 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -10,9 +10,6 @@ #define RPC_VERSION 2 -/* size of an XDR encoding unit in bytes, i.e. 32bit */ -#define XDR_UNIT (4) - /* spec defines authentication flavor as an unsigned 32 bit integer */ typedef u32 rpc_authflavor_t; @@ -23,6 +20,7 @@ enum rpc_auth_flavors { RPC_AUTH_DES = 3, RPC_AUTH_KRB = 4, RPC_AUTH_GSS = 6, + RPC_AUTH_TLS = 7, RPC_AUTH_MAXFLAVOR = 8, /* pseudoflavors: */ RPC_AUTH_GSS_KRB5 = 390003, @@ -36,6 +34,11 @@ enum rpc_auth_flavors { RPC_AUTH_GSS_SPKMP = 390011, }; +/* Maximum size (in octets) of the machinename in an AUTH_UNIX + * credential (per RFC 5531 Appendix A) + */ +#define RPC_MAX_MACHINENAME (255) + /* Maximum size (in bytes) of an rpc credential or verifier */ #define RPC_MAX_AUTH_SIZE (400) diff --git a/include/linux/sunrpc/rdma_rn.h b/include/linux/sunrpc/rdma_rn.h new file mode 100644 index 000000000000..7d032ca057af --- /dev/null +++ b/include/linux/sunrpc/rdma_rn.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * * Copyright (c) 2024, Oracle and/or its affiliates. + */ + +#ifndef _LINUX_SUNRPC_RDMA_RN_H +#define _LINUX_SUNRPC_RDMA_RN_H + +#include <rdma/ib_verbs.h> + +/** + * rpcrdma_notification - request removal notification + */ +struct rpcrdma_notification { + void (*rn_done)(struct rpcrdma_notification *rn); + u32 rn_index; +}; + +int rpcrdma_rn_register(struct ib_device *device, + struct rpcrdma_notification *rn, + void (*done)(struct rpcrdma_notification *rn)); +void rpcrdma_rn_unregister(struct ib_device *device, + struct rpcrdma_notification *rn); +int rpcrdma_ib_client_register(void); +void rpcrdma_ib_client_unregister(void); + +#endif /* _LINUX_SUNRPC_RDMA_RN_H */ diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index cd188a527d16..3b35b6f6533a 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -92,6 +92,11 @@ extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *, char __user *, size_t); extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *); +/* returns true if the msg is in-flight, i.e., already eaten by the peer */ +static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) { + return (msg->copied != 0 && list_empty(&msg->list)); +} + struct rpc_clnt; extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *); extern int rpc_remove_client_dir(struct rpc_clnt *); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index df696efdd675..ccba79ebf893 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -38,6 +38,17 @@ struct rpc_wait { }; /* + * This describes a timeout strategy + */ +struct rpc_timeout { + unsigned long to_initval, /* initial timeout */ + to_maxval, /* max timeout */ + to_increment; /* if !exponential */ + unsigned int to_retries; /* max # of retries */ + unsigned char to_exponential; +}; + +/* * This is the RPC task struct */ struct rpc_task { @@ -61,8 +72,6 @@ struct rpc_task { struct rpc_wait tk_wait; /* RPC wait */ } u; - int tk_rpc_status; /* Result of last RPC operation */ - /* * RPC call state */ @@ -82,6 +91,8 @@ struct rpc_task { ktime_t tk_start; /* RPC task init timestamp */ pid_t tk_owner; /* Process id for batching tasks */ + + int tk_rpc_status; /* Result of last RPC operation */ unsigned short tk_flags; /* misc flags */ unsigned short tk_timeouts; /* maj timeouts */ @@ -90,8 +101,7 @@ struct rpc_task { #endif unsigned char tk_priority : 2,/* Task priority */ tk_garb_retry : 2, - tk_cred_retry : 2, - tk_rebind_retry : 2; + tk_cred_retry : 2; }; typedef void (*rpc_action)(struct rpc_task *); @@ -121,9 +131,10 @@ struct rpc_task_setup { */ #define RPC_TASK_ASYNC 0x0001 /* is an async task */ #define RPC_TASK_SWAPPER 0x0002 /* is swapping in/out */ +#define RPC_TASK_MOVEABLE 0x0004 /* nfs4.1+ rpc tasks */ #define RPC_TASK_NULLCREDS 0x0010 /* Use AUTH_NULL credential */ #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ -#define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ +#define RPC_TASK_NETUNREACH_FATAL 0x0040 /* ENETUNREACH is fatal */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ #define RPC_TASK_NO_ROUND_ROBIN 0x0100 /* send requests on "main" xprt */ #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ @@ -139,38 +150,28 @@ struct rpc_task_setup { #define RPC_IS_SOFT(t) ((t)->tk_flags & (RPC_TASK_SOFT|RPC_TASK_TIMEOUT)) #define RPC_IS_SOFTCONN(t) ((t)->tk_flags & RPC_TASK_SOFTCONN) #define RPC_WAS_SENT(t) ((t)->tk_flags & RPC_TASK_SENT) +#define RPC_IS_MOVEABLE(t) ((t)->tk_flags & RPC_TASK_MOVEABLE) + +enum { + RPC_TASK_RUNNING, + RPC_TASK_QUEUED, + RPC_TASK_ACTIVE, + RPC_TASK_NEED_XMIT, + RPC_TASK_NEED_RECV, + RPC_TASK_MSG_PIN_WAIT, +}; -#define RPC_TASK_RUNNING 0 -#define RPC_TASK_QUEUED 1 -#define RPC_TASK_ACTIVE 2 -#define RPC_TASK_NEED_XMIT 3 -#define RPC_TASK_NEED_RECV 4 -#define RPC_TASK_MSG_PIN_WAIT 5 -#define RPC_TASK_SIGNALLED 6 - -#define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) -#define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define rpc_test_and_set_running(t) \ test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) -#define rpc_clear_running(t) \ - do { \ - smp_mb__before_atomic(); \ - clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate); \ - smp_mb__after_atomic(); \ - } while (0) +#define rpc_clear_running(t) clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define RPC_IS_QUEUED(t) test_bit(RPC_TASK_QUEUED, &(t)->tk_runstate) #define rpc_set_queued(t) set_bit(RPC_TASK_QUEUED, &(t)->tk_runstate) -#define rpc_clear_queued(t) \ - do { \ - smp_mb__before_atomic(); \ - clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate); \ - smp_mb__after_atomic(); \ - } while (0) +#define rpc_clear_queued(t) clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate) #define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate) -#define RPC_SIGNALLED(t) test_bit(RPC_TASK_SIGNALLED, &(t)->tk_runstate) +#define RPC_SIGNALLED(t) (READ_ONCE(task->tk_rpc_status) == -ERESTARTSYS) /* * Task priorities. @@ -198,7 +199,7 @@ struct rpc_wait_queue { unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ unsigned char priority; /* current priority */ unsigned char nr; /* # tasks remaining for cookie */ - unsigned short qlen; /* total # tasks waiting in queue */ + unsigned int qlen; /* total # tasks waiting in queue */ struct rpc_timer timer_list; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) const char * name; @@ -217,14 +218,21 @@ struct rpc_wait_queue { */ struct rpc_task *rpc_new_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_task(const struct rpc_task_setup *); -struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, + struct rpc_timeout *timeout); void rpc_put_task(struct rpc_task *); void rpc_put_task_async(struct rpc_task *); +bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status); +void rpc_task_try_cancel(struct rpc_task *task, int error); void rpc_signal_task(struct rpc_task *); void rpc_exit_task(struct rpc_task *); void rpc_exit(struct rpc_task *, int); void rpc_release_calldata(const struct rpc_call_ops *, void *); void rpc_killall_tasks(struct rpc_clnt *); +unsigned long rpc_cancel_tasks(struct rpc_clnt *clnt, int error, + bool (*fnmatch)(const struct rpc_task *, + const void *), + const void *data); void rpc_execute(struct rpc_task *); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); @@ -263,7 +271,7 @@ int rpc_malloc(struct rpc_task *); void rpc_free(struct rpc_task *); int rpciod_up(void); void rpciod_down(void); -int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *); +int rpc_wait_for_completion_task(struct rpc_task *task); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct net; void rpc_show_tasks(struct net *); @@ -273,11 +281,7 @@ void rpc_destroy_mempool(void); extern struct workqueue_struct *rpciod_workqueue; extern struct workqueue_struct *xprtiod_workqueue; void rpc_prepare_task(struct rpc_task *task); - -static inline int rpc_wait_for_completion_task(struct rpc_task *task) -{ - return __rpc_wait_for_completion_task(task, NULL); -} +gfp_t rpc_task_gfp_mask(void); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) static inline const char * rpc_qname(const struct rpc_wait_queue *q) diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h index d94d4f410507..3ce1550d1beb 100644 --- a/include/linux/sunrpc/stats.h +++ b/include/linux/sunrpc/stats.h @@ -43,22 +43,6 @@ struct net; #ifdef CONFIG_PROC_FS int rpc_proc_init(struct net *); void rpc_proc_exit(struct net *); -#else -static inline int rpc_proc_init(struct net *net) -{ - return 0; -} - -static inline void rpc_proc_exit(struct net *net) -{ -} -#endif - -#ifdef MODULE -void rpc_modcount(struct inode *, int); -#endif - -#ifdef CONFIG_PROC_FS struct proc_dir_entry * rpc_proc_register(struct net *,struct rpc_stat *); void rpc_proc_unregister(struct net *,const char *); void rpc_proc_zero(const struct rpc_program *); @@ -69,7 +53,14 @@ void svc_proc_unregister(struct net *, const char *); void svc_seq_show(struct seq_file *, const struct svc_stat *); #else +static inline int rpc_proc_init(struct net *net) +{ + return 0; +} +static inline void rpc_proc_exit(struct net *net) +{ +} static inline struct proc_dir_entry *rpc_proc_register(struct net *net, struct rpc_stat *s) { return NULL; } static inline void rpc_proc_unregister(struct net *net, const char *p) {} static inline void rpc_proc_zero(const struct rpc_program *p) {} diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 386628b36bc7..48666b83fe68 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -17,16 +17,11 @@ #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/auth.h> #include <linux/sunrpc/svcauth.h> +#include <linux/lwq.h> #include <linux/wait.h> #include <linux/mm.h> - -/* statistics for svc_pool structures */ -struct svc_pool_stats { - atomic_long_t packets; - unsigned long sockets_queued; - atomic_long_t threads_woken; - atomic_long_t threads_timedout; -}; +#include <linux/pagevec.h> +#include <linux/kthread.h> /* * @@ -39,37 +34,28 @@ struct svc_pool_stats { * node traffic on multi-node NUMA NFS servers. */ struct svc_pool { - unsigned int sp_id; /* pool id; also node id on NUMA */ - spinlock_t sp_lock; /* protects all fields */ - struct list_head sp_sockets; /* pending sockets */ + unsigned int sp_id; /* pool id; also node id on NUMA */ + struct lwq sp_xprts; /* pending transports */ unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ - struct svc_pool_stats sp_stats; /* statistics on pool operation */ -#define SP_TASK_PENDING (0) /* still work to do even if no - * xprt is queued. */ -#define SP_CONGESTED (1) - unsigned long sp_flags; -} ____cacheline_aligned_in_smp; - -struct svc_serv; - -struct svc_serv_ops { - /* Callback to use when last thread exits. */ - void (*svo_shutdown)(struct svc_serv *, struct net *); + struct llist_head sp_idle_threads; /* idle server threads */ - /* function for service threads to run */ - int (*svo_function)(void *); + /* statistics on pool operation */ + struct percpu_counter sp_messages_arrived; + struct percpu_counter sp_sockets_queued; + struct percpu_counter sp_threads_woken; - /* queue up a transport for servicing */ - void (*svo_enqueue_xprt)(struct svc_xprt *); - - /* set up thread (or whatever) execution context */ - int (*svo_setup)(struct svc_serv *, struct svc_pool *, int); + unsigned long sp_flags; +} ____cacheline_aligned_in_smp; - /* optional module to count when adding threads (pooled svcs only) */ - struct module *svo_module; +/* bits for sp_flags */ +enum { + SP_TASK_PENDING, /* still work to do even if no xprt is queued */ + SP_NEED_VICTIM, /* One thread needs to agree to exit */ + SP_VICTIM_REMAINS, /* One thread needs to actually exit */ }; + /* * RPC service. * @@ -81,48 +67,41 @@ struct svc_serv_ops { * We currently do not support more than one RPC program per daemon. */ struct svc_serv { - struct svc_program * sv_program; /* RPC program */ + struct svc_program * sv_programs; /* RPC programs */ struct svc_stat * sv_stats; /* RPC statistics */ spinlock_t sv_lock; + unsigned int sv_nprogs; /* Number of sv_programs */ unsigned int sv_nrthreads; /* # of server threads */ - unsigned int sv_maxconn; /* max connections allowed or - * '0' causing max to be based - * on number of threads. */ - unsigned int sv_max_payload; /* datagram payload size */ unsigned int sv_max_mesg; /* max_payload + 1 page for overheads */ unsigned int sv_xdrsize; /* XDR buffer size */ struct list_head sv_permsocks; /* all permanent sockets */ struct list_head sv_tempsocks; /* all temporary sockets */ - int sv_tmpcnt; /* count of temporary sockets */ + int sv_tmpcnt; /* count of temporary "valid" sockets */ struct timer_list sv_temptimer; /* timer for aging temporary sockets */ char * sv_name; /* service name */ unsigned int sv_nrpools; /* number of thread pools */ + bool sv_is_pooled; /* is this a pooled service? */ struct svc_pool * sv_pools; /* array of thread pools */ - const struct svc_serv_ops *sv_ops; /* server operations */ + int (*sv_threadfn)(void *data); + #if defined(CONFIG_SUNRPC_BACKCHANNEL) - struct list_head sv_cb_list; /* queue for callback requests + struct lwq sv_cb_list; /* queue for callback requests * that arrive over the same * connection */ - spinlock_t sv_cb_lock; /* protects the svc_cb_list */ - wait_queue_head_t sv_cb_waitq; /* sleep here if there are no - * entries in the svc_cb_list */ bool sv_bc_enabled; /* service uses backchannel */ #endif /* CONFIG_SUNRPC_BACKCHANNEL */ }; -/* - * We use sv_nrthreads as a reference count. svc_destroy() drops - * this refcount, so we need to bump it up around operations that - * change the number of threads. Horrible, but there it is. - * Should be called with the "service mutex" held. - */ -static inline void svc_get(struct svc_serv *serv) -{ - serv->sv_nrthreads++; -} +/* This is used by pool_stats to find and lock an svc */ +struct svc_info { + struct svc_serv *serv; + struct mutex *mutex; +}; + +void svc_destroy(struct svc_serv **svcp); /* * Maximum payload size supported by a kernel RPC server. @@ -140,28 +119,27 @@ static inline void svc_get(struct svc_serv *serv) * Linux limit; someone who cares more about NFS/UDP performance * can test a larger number. * - * For TCP transports we have more freedom. A size of 1MB is - * chosen to match the client limit. Other OSes are known to - * have larger limits, but those numbers are probably beyond - * the point of diminishing returns. + * For non-UDP transports we have more freedom. A size of 4MB is + * chosen to accommodate clients that support larger I/O sizes. */ -#define RPCSVC_MAXPAYLOAD (1*1024*1024u) -#define RPCSVC_MAXPAYLOAD_TCP RPCSVC_MAXPAYLOAD -#define RPCSVC_MAXPAYLOAD_UDP (32*1024u) +enum { + RPCSVC_MAXPAYLOAD = 4 * 1024 * 1024, + RPCSVC_MAXPAYLOAD_TCP = RPCSVC_MAXPAYLOAD, + RPCSVC_MAXPAYLOAD_UDP = 32 * 1024, +}; extern u32 svc_max_payload(const struct svc_rqst *rqstp); /* - * RPC Requsts and replies are stored in one or more pages. + * RPC Requests and replies are stored in one or more pages. * We maintain an array of pages for each server thread. * Requests are copied into these pages as they arrive. Remaining * pages are available to write the reply into. * - * Pages are sent using ->sendpage so each server thread needs to - * allocate more to replace those used in sending. To help keep track - * of these pages we have a receive list where all pages initialy live, - * and a send list where pages are moved to when there are to be part - * of a reply. + * Pages are sent using ->sendmsg with MSG_SPLICE_PAGES so each server thread + * needs to allocate more to replace those used in sending. To help keep track + * of these pages we have a receive list where all pages initialy live, and a + * send list where pages are moved to when there are to be part of a reply. * * We use xdr_buf for holding responses as it fits well with NFS * read responses (that have a header, and some data pages, and possibly @@ -172,54 +150,23 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); * list. xdr_buf.tail points to the end of the first page. * This assumes that the non-page part of an rpc reply will fit * in a page - NFSd ensures this. lockd also has no trouble. - * - * Each request/reply pair can have at most one "payload", plus two pages, - * one for the request, and one for the reply. - * We using ->sendfile to return read data, we might need one extra page - * if the request is not page-aligned. So add another '1'. */ -#define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE \ - + 2 + 1) - -static inline u32 svc_getnl(struct kvec *iov) -{ - __be32 val, *vp; - vp = iov->iov_base; - val = *vp++; - iov->iov_base = (void*)vp; - iov->iov_len -= sizeof(__be32); - return ntohl(val); -} - -static inline void svc_putnl(struct kvec *iov, u32 val) -{ - __be32 *vp = iov->iov_base + iov->iov_len; - *vp = htonl(val); - iov->iov_len += sizeof(__be32); -} - -static inline __be32 svc_getu32(struct kvec *iov) -{ - __be32 val, *vp; - vp = iov->iov_base; - val = *vp++; - iov->iov_base = (void*)vp; - iov->iov_len -= sizeof(__be32); - return val; -} - -static inline void svc_ungetu32(struct kvec *iov) -{ - __be32 *vp = (__be32 *)iov->iov_base; - iov->iov_base = (void *)(vp - 1); - iov->iov_len += sizeof(*vp); -} -static inline void svc_putu32(struct kvec *iov, __be32 val) +/** + * svc_serv_maxpages - maximum count of pages needed for one RPC message + * @serv: RPC service context + * + * Returns a count of pages or vectors that can hold the maximum + * size RPC message for @serv. + * + * Each request/reply pair can have at most one "payload", plus two + * pages, one for the request, and one for the reply. + * nfsd_splice_actor() might need an extra page when a READ payload + * is not page-aligned. + */ +static inline unsigned long svc_serv_maxpages(const struct svc_serv *serv) { - __be32 *vp = iov->iov_base + iov->iov_len; - *vp = val; - iov->iov_len += sizeof(__be32); + return DIV_ROUND_UP(serv->sv_max_mesg, PAGE_SIZE) + 2 + 1; } /* @@ -228,6 +175,7 @@ static inline void svc_putu32(struct kvec *iov, __be32 val) */ struct svc_rqst { struct list_head rq_all; /* all threads list */ + struct llist_node rq_idle; /* On the idle list */ struct rcu_head rq_rcu_head; /* for RCU deferred kfree */ struct svc_xprt * rq_xprt; /* transport ptr */ @@ -245,16 +193,19 @@ struct svc_rqst { void * rq_xprt_ctxt; /* transport specific context ptr */ struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ - size_t rq_xprt_hlen; /* xprt header len */ struct xdr_buf rq_arg; + struct xdr_stream rq_arg_stream; + struct xdr_stream rq_res_stream; + struct page *rq_scratch_page; struct xdr_buf rq_res; - struct page *rq_pages[RPCSVC_MAXPAGES + 1]; + unsigned long rq_maxpages; /* num of entries in rq_pages */ + struct page * *rq_pages; struct page * *rq_respages; /* points into rq_pages */ struct page * *rq_next_page; /* next reply page to use */ struct page * *rq_page_end; /* one past the last page */ - struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ - struct bio_vec rq_bvec[RPCSVC_MAXPAGES]; + struct folio_batch rq_fbatch; + struct bio_vec *rq_bvec; __be32 rq_xid; /* transmission id */ u32 rq_prog; /* program number */ @@ -262,23 +213,14 @@ struct svc_rqst { u32 rq_proc; /* procedure number */ u32 rq_prot; /* IP protocol */ int rq_cachetype; /* catering to nfsd */ -#define RQ_SECURE (0) /* secure port */ -#define RQ_LOCAL (1) /* local request */ -#define RQ_USEDEFERRAL (2) /* use deferral */ -#define RQ_DROPME (3) /* drop current reply */ -#define RQ_SPLICE_OK (4) /* turned off in gss privacy - * to prevent encrypting page - * cache pages */ -#define RQ_VICTIM (5) /* about to be shut down */ -#define RQ_BUSY (6) /* request is busy */ -#define RQ_DATA (7) /* request has data */ -#define RQ_AUTHERR (8) /* Request status is auth error */ unsigned long rq_flags; /* flags field */ ktime_t rq_qtime; /* enqueue time */ void * rq_argp; /* decoded arguments */ void * rq_resp; /* xdr'd results */ + __be32 *rq_accept_statp; void * rq_auth_data; /* flavor-specific data */ + __be32 rq_auth_stat; /* authentication status */ int rq_auth_slack; /* extra space xdr code * should leave in head * for krb5i, krb5p. @@ -294,13 +236,29 @@ struct svc_rqst { /* Catering to nfsd */ struct auth_domain * rq_client; /* RPC peer info */ struct auth_domain * rq_gssclient; /* "gss/"-style peer info */ - struct svc_cacherep * rq_cacherep; /* cache info */ struct task_struct *rq_task; /* service thread */ - spinlock_t rq_lock; /* per-request lock */ struct net *rq_bc_net; /* pointer to backchannel's * net namespace */ + + int rq_err; /* Thread sets this to inidicate + * initialisation success. + */ + + unsigned long bc_to_initval; + unsigned int bc_to_retries; void ** rq_lease_breaker; /* The v4 client breaking a lease */ + unsigned int rq_status_counter; /* RPC processing counter */ +}; + +/* bits for rq_flags */ +enum { + RQ_SECURE, /* secure port */ + RQ_LOCAL, /* local request */ + RQ_USEDEFERRAL, /* use deferral */ + RQ_DROPME, /* drop current reply */ + RQ_VICTIM, /* Have agreed to shut down */ + RQ_DATA, /* request has data */ }; #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) @@ -338,38 +296,46 @@ static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst) return (struct sockaddr *) &rqst->rq_daddr; } -/* - * Check buffer bounds after decoding arguments +/** + * svc_thread_should_stop - check if this thread should stop + * @rqstp: the thread that might need to stop + * + * To stop an svc thread, the pool flags SP_NEED_VICTIM and SP_VICTIM_REMAINS + * are set. The first thread which sees SP_NEED_VICTIM clears it, becoming + * the victim using this function. It should then promptly call + * svc_exit_thread() to complete the process, clearing SP_VICTIM_REMAINS + * so the task waiting for a thread to exit can wake and continue. + * + * Return values: + * %true: caller should invoke svc_exit_thread() + * %false: caller should do nothing */ -static inline int -xdr_argsize_check(struct svc_rqst *rqstp, __be32 *p) -{ - char *cp = (char *)p; - struct kvec *vec = &rqstp->rq_arg.head[0]; - return cp >= (char*)vec->iov_base - && cp <= (char*)vec->iov_base + vec->iov_len; -} - -static inline int -xdr_ressize_check(struct svc_rqst *rqstp, __be32 *p) +static inline bool svc_thread_should_stop(struct svc_rqst *rqstp) { - struct kvec *vec = &rqstp->rq_res.head[0]; - char *cp = (char*)p; - - vec->iov_len = cp - (char*)vec->iov_base; + if (test_and_clear_bit(SP_NEED_VICTIM, &rqstp->rq_pool->sp_flags)) + set_bit(RQ_VICTIM, &rqstp->rq_flags); - return vec->iov_len <= PAGE_SIZE; + return test_bit(RQ_VICTIM, &rqstp->rq_flags); } -static inline void svc_free_res_pages(struct svc_rqst *rqstp) +/** + * svc_thread_init_status - report whether thread has initialised successfully + * @rqstp: the thread in question + * @err: errno code + * + * After performing any initialisation that could fail, and before starting + * normal work, each sunrpc svc_thread must call svc_thread_init_status() + * with an appropriate error, or zero. + * + * If zero is passed, the thread is ready and must continue until + * svc_thread_should_stop() returns true. If a non-zero error is passed + * the call will not return - the thread will exit. + */ +static inline void svc_thread_init_status(struct svc_rqst *rqstp, int err) { - while (rqstp->rq_next_page != rqstp->rq_respages) { - struct page **pp = --rqstp->rq_next_page; - if (*pp) { - put_page(*pp); - *pp = NULL; - } - } + store_release_wake_up(&rqstp->rq_err, err); + if (err) + kthread_exit(1); } struct svc_deferred_req { @@ -379,15 +345,15 @@ struct svc_deferred_req { size_t addrlen; struct sockaddr_storage daddr; /* where reply must come from */ size_t daddrlen; + void *xprt_ctxt; struct cache_deferred_req handle; - size_t xprt_hlen; int argslen; __be32 args[]; }; struct svc_process_info { union { - int (*dispatch)(struct svc_rqst *, __be32 *); + int (*dispatch)(struct svc_rqst *rqstp); struct { unsigned int lovers; unsigned int hivers; @@ -396,10 +362,9 @@ struct svc_process_info { }; /* - * List of RPC programs on the same transport endpoint + * RPC program - an array of these can use the same transport endpoint */ struct svc_program { - struct svc_program * pg_next; /* other programs (same xprt) */ u32 pg_prog; /* program number */ unsigned int pg_lovers; /* lowest version */ unsigned int pg_hivers; /* highest version */ @@ -407,8 +372,7 @@ struct svc_program { const struct svc_version **pg_vers; /* version array */ char * pg_name; /* service name */ char * pg_class; /* class name: services sharing authentication */ - struct svc_stat * pg_stats; /* rpc statistics */ - int (*pg_authenticate)(struct svc_rqst *); + enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp); __be32 (*pg_init_request)(struct svc_rqst *, const struct svc_program *, struct svc_process_info *); @@ -426,7 +390,7 @@ struct svc_version { u32 vs_vers; /* version number */ u32 vs_nproc; /* number of procedures */ const struct svc_procedure *vs_proc; /* per-procedure info */ - unsigned int *vs_count; /* call counts */ + unsigned long __percpu *vs_count; /* call counts */ u32 vs_xdrsize; /* xdrsize needed for this version */ /* Don't register with rpcbind */ @@ -438,11 +402,8 @@ struct svc_version { /* Need xprt with congestion control */ bool vs_need_cong_ctrl; - /* Override dispatch function (e.g. when caching replies). - * A return value of 0 means drop the request. - * vs_dispatch == NULL means use default dispatcher. - */ - int (*vs_dispatch)(struct svc_rqst *, __be32 *); + /* Dispatch function */ + int (*vs_dispatch)(struct svc_rqst *rqstp); }; /* @@ -452,83 +413,58 @@ struct svc_procedure { /* process the request: */ __be32 (*pc_func)(struct svc_rqst *); /* XDR decode args: */ - int (*pc_decode)(struct svc_rqst *, __be32 *data); + bool (*pc_decode)(struct svc_rqst *rqstp, + struct xdr_stream *xdr); /* XDR encode result: */ - int (*pc_encode)(struct svc_rqst *, __be32 *data); + bool (*pc_encode)(struct svc_rqst *rqstp, + struct xdr_stream *xdr); /* XDR free result: */ void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ + unsigned int pc_argzero; /* how much of argument to clear */ unsigned int pc_ressize; /* result struct size */ unsigned int pc_cachetype; /* cache info (NFS) */ unsigned int pc_xdrressize; /* maximum size of XDR reply */ + const char * pc_name; /* for display */ }; /* - * Mode for mapping cpus to pools. - */ -enum { - SVC_POOL_AUTO = -1, /* choose one of the others */ - SVC_POOL_GLOBAL, /* no mapping, just a single global pool - * (legacy & UP mode) */ - SVC_POOL_PERCPU, /* one pool per cpu */ - SVC_POOL_PERNODE /* one pool per numa node */ -}; - -struct svc_pool_map { - int count; /* How many svc_servs use us */ - int mode; /* Note: int not enum to avoid - * warnings about "enumeration value - * not handled in switch" */ - unsigned int npools; - unsigned int *pool_to; /* maps pool id to cpu or node */ - unsigned int *to_pool; /* maps cpu or node to pool id */ -}; - -extern struct svc_pool_map svc_pool_map; - -/* * Function prototypes. */ -int svc_rpcb_setup(struct svc_serv *serv, struct net *net); +int sunrpc_set_pool_mode(const char *val); +int sunrpc_get_pool_mode(char *val, size_t size); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, - const struct svc_serv_ops *); -struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, - struct svc_pool *pool, int node); -struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, - struct svc_pool *pool, int node); -void svc_rqst_free(struct svc_rqst *); + int (*threadfn)(void *data)); +bool svc_rqst_replace_page(struct svc_rqst *rqstp, + struct page *page); +void svc_rqst_release_pages(struct svc_rqst *rqstp); void svc_exit_thread(struct svc_rqst *); -unsigned int svc_pool_map_get(void); -void svc_pool_map_put(void); -struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - const struct svc_serv_ops *); +struct svc_serv * svc_create_pooled(struct svc_program *prog, + unsigned int nprog, + struct svc_stat *stats, + unsigned int bufsize, + int (*threadfn)(void *data)); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); -int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int); -int svc_pool_stats_open(struct svc_serv *serv, struct file *file); -void svc_destroy(struct svc_serv *); -void svc_shutdown_net(struct svc_serv *, struct net *); -int svc_process(struct svc_rqst *); -int bc_svc_process(struct svc_serv *, struct rpc_rqst *, - struct svc_rqst *); +int svc_pool_stats_open(struct svc_info *si, struct file *file); +void svc_process(struct svc_rqst *rqstp); +void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp); int svc_register(const struct svc_serv *, struct net *, const int, const unsigned short, const unsigned short); void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); -struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); +void svc_pool_wake_idle_thread(struct svc_pool *pool); +struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv); char * svc_print_addr(struct svc_rqst *, char *, size_t); -int svc_encode_read_payload(struct svc_rqst *rqstp, - unsigned int offset, - unsigned int length); -unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, - struct page **pages, - struct kvec *first, size_t total); +const char * svc_proc_name(const struct svc_rqst *rqstp); +int svc_encode_result_payload(struct svc_rqst *rqstp, + unsigned int offset, + unsigned int length); char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, void *p, size_t total); -__be32 svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err); __be32 svc_generic_init_request(struct svc_rqst *rqstp, const struct svc_program *progp, struct svc_process_info *procinfo); @@ -537,11 +473,6 @@ int svc_generic_rpcbind_set(struct net *net, u32 version, int family, unsigned short proto, unsigned short port); -int svc_rpcbind_set_version(struct net *net, - const struct svc_program *progp, - u32 version, int family, - unsigned short proto, - unsigned short port); #define RPC_MAX_ADDRBUFLEN (63U) @@ -557,4 +488,108 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) svc_reserve(rqstp, space + rqstp->rq_auth_slack); } +/** + * svcxdr_init_decode - Prepare an xdr_stream for Call decoding + * @rqstp: controlling server RPC transaction context + * + */ +static inline void svcxdr_init_decode(struct svc_rqst *rqstp) +{ + struct xdr_stream *xdr = &rqstp->rq_arg_stream; + struct xdr_buf *buf = &rqstp->rq_arg; + struct kvec *argv = buf->head; + + WARN_ON(buf->len != buf->head->iov_len + buf->page_len + buf->tail->iov_len); + buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len; + + xdr_init_decode(xdr, buf, argv->iov_base, NULL); + xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); +} + +/** + * svcxdr_init_encode - Prepare an xdr_stream for svc Reply encoding + * @rqstp: controlling server RPC transaction context + * + */ +static inline void svcxdr_init_encode(struct svc_rqst *rqstp) +{ + struct xdr_stream *xdr = &rqstp->rq_res_stream; + struct xdr_buf *buf = &rqstp->rq_res; + struct kvec *resv = buf->head; + + xdr_reset_scratch_buffer(xdr); + + xdr->buf = buf; + xdr->iov = resv; + xdr->p = resv->iov_base + resv->iov_len; + xdr->end = resv->iov_base + PAGE_SIZE; + buf->len = resv->iov_len; + xdr->page_ptr = buf->pages - 1; + buf->buflen = PAGE_SIZE * (rqstp->rq_page_end - buf->pages); + xdr->rqst = NULL; +} + +/** + * svcxdr_encode_opaque_pages - Insert pages into an xdr_stream + * @xdr: xdr_stream to be updated + * @pages: array of pages to insert + * @base: starting offset of first data byte in @pages + * @len: number of data bytes in @pages to insert + * + * After the @pages are added, the tail iovec is instantiated pointing + * to end of the head buffer, and the stream is set up to encode + * subsequent items into the tail. + */ +static inline void svcxdr_encode_opaque_pages(struct svc_rqst *rqstp, + struct xdr_stream *xdr, + struct page **pages, + unsigned int base, + unsigned int len) +{ + xdr_write_pages(xdr, pages, base, len); + xdr->page_ptr = rqstp->rq_next_page - 1; +} + +/** + * svcxdr_set_auth_slack - + * @rqstp: RPC transaction + * @slack: buffer space to reserve for the transaction's security flavor + * + * Set the request's slack space requirement, and set aside that much + * space in the rqstp's rq_res.head for use when the auth wraps the Reply. + */ +static inline void svcxdr_set_auth_slack(struct svc_rqst *rqstp, int slack) +{ + struct xdr_stream *xdr = &rqstp->rq_res_stream; + struct xdr_buf *buf = &rqstp->rq_res; + struct kvec *resv = buf->head; + + rqstp->rq_auth_slack = slack; + + xdr->end -= XDR_QUADLEN(slack); + buf->buflen -= rqstp->rq_auth_slack; + + WARN_ON(xdr->iov != resv); + WARN_ON(xdr->p > xdr->end); +} + +/** + * svcxdr_set_accept_stat - Reserve space for the accept_stat field + * @rqstp: RPC transaction context + * + * Return values: + * %true: Success + * %false: No response buffer space was available + */ +static inline bool svcxdr_set_accept_stat(struct svc_rqst *rqstp) +{ + struct xdr_stream *xdr = &rqstp->rq_res_stream; + + rqstp->rq_accept_statp = xdr_reserve_space(xdr, XDR_UNIT); + if (unlikely(!rqstp->rq_accept_statp)) + return false; + *rqstp->rq_accept_statp = rpc_success; + return true; +} + #endif /* SUNRPC_SVC_H */ diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 9dc3a3b88391..22704c2e5b9b 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -47,6 +47,10 @@ #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/rpc_rdma.h> #include <linux/sunrpc/rpc_rdma_cid.h> +#include <linux/sunrpc/svc_rdma_pcl.h> +#include <linux/sunrpc/rdma_rn.h> + +#include <linux/percpu_counter.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> @@ -62,21 +66,18 @@ extern unsigned int svcrdma_ord; extern unsigned int svcrdma_max_requests; extern unsigned int svcrdma_max_bc_requests; extern unsigned int svcrdma_max_req_size; +extern struct workqueue_struct *svcrdma_wq; -extern atomic_t rdma_stat_recv; -extern atomic_t rdma_stat_read; -extern atomic_t rdma_stat_write; -extern atomic_t rdma_stat_sq_starve; -extern atomic_t rdma_stat_rq_starve; -extern atomic_t rdma_stat_rq_poll; -extern atomic_t rdma_stat_rq_prod; -extern atomic_t rdma_stat_sq_poll; -extern atomic_t rdma_stat_sq_prod; +extern struct percpu_counter svcrdma_stat_read; +extern struct percpu_counter svcrdma_stat_recv; +extern struct percpu_counter svcrdma_stat_sq_starve; +extern struct percpu_counter svcrdma_stat_write; struct svcxprt_rdma { struct svc_xprt sc_xprt; /* SVC transport structure */ struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ struct list_head sc_accept_q; /* Conn. waiting accept */ + struct rpcrdma_notification sc_rn; /* removal notification */ int sc_ord; /* RDMA read limit */ int sc_max_send_sges; bool sc_snd_w_inv; /* OK to use Send With Invalidate */ @@ -92,11 +93,14 @@ struct svcxprt_rdma { struct ib_pd *sc_pd; spinlock_t sc_send_lock; - struct list_head sc_send_ctxts; + struct llist_head sc_send_ctxts; spinlock_t sc_rw_ctxt_lock; - struct list_head sc_rw_ctxts; + struct llist_head sc_rw_ctxts; + u32 sc_pending_recvs; + u32 sc_recv_batch; struct list_head sc_rq_dto_q; + struct list_head sc_read_complete_q; spinlock_t sc_rq_dto_lock; struct ib_qp *sc_qp; struct ib_cq *sc_rq_cq; @@ -106,7 +110,6 @@ struct svcxprt_rdma { wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ unsigned long sc_flags; - struct list_head sc_read_complete_q; struct work_struct sc_work; struct llist_head sc_recv_ctxts; @@ -116,6 +119,13 @@ struct svcxprt_rdma { /* sc_flags */ #define RDMAXPRT_CONN_PENDING 3 +static inline struct svcxprt_rdma *svc_rdma_rqst_rdma(struct svc_rqst *rqstp) +{ + struct svc_xprt *xprt = rqstp->rq_xprt; + + return container_of(xprt, struct svcxprt_rdma, sc_xprt); +} + /* * Default connection parameters */ @@ -127,6 +137,43 @@ enum { #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD +/** + * svc_rdma_send_cid_init - Initialize a Receive Queue completion ID + * @rdma: controlling transport + * @cid: completion ID to initialize + */ +static inline void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma, + struct rpc_rdma_cid *cid) +{ + cid->ci_queue_id = rdma->sc_rq_cq->res.id; + cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); +} + +/** + * svc_rdma_send_cid_init - Initialize a Send Queue completion ID + * @rdma: controlling transport + * @cid: completion ID to initialize + */ +static inline void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma, + struct rpc_rdma_cid *cid) +{ + cid->ci_queue_id = rdma->sc_sq_cq->res.id; + cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); +} + +/* + * A chunk context tracks all I/O for moving one Read or Write + * chunk. This is a set of rdma_rw's that handle data movement + * for all segments of one chunk. + */ +struct svc_rdma_chunk_ctxt { + struct rpc_rdma_cid cc_cid; + struct ib_cqe cc_cqe; + struct list_head cc_rwctxts; + ktime_t cc_posttime; + int cc_sqecount; +}; + struct svc_rdma_recv_ctxt { struct llist_node rc_node; struct list_head rc_list; @@ -135,32 +182,71 @@ struct svc_rdma_recv_ctxt { struct rpc_rdma_cid rc_cid; struct ib_sge rc_recv_sge; void *rc_recv_buf; - struct xdr_buf rc_arg; struct xdr_stream rc_stream; - bool rc_temp; u32 rc_byte_len; - unsigned int rc_page_count; - unsigned int rc_hdr_count; u32 rc_inv_rkey; - __be32 *rc_write_list; - __be32 *rc_reply_chunk; - unsigned int rc_read_payload_offset; - unsigned int rc_read_payload_length; - struct page *rc_pages[RPCSVC_MAXPAGES]; + __be32 rc_msgtype; + + /* State for pulling a Read chunk */ + unsigned int rc_pageoff; + unsigned int rc_curpage; + unsigned int rc_readbytes; + struct xdr_buf rc_saved_arg; + struct svc_rdma_chunk_ctxt rc_cc; + + struct svc_rdma_pcl rc_call_pcl; + + struct svc_rdma_pcl rc_read_pcl; + struct svc_rdma_chunk *rc_cur_result_payload; + struct svc_rdma_pcl rc_write_pcl; + struct svc_rdma_pcl rc_reply_pcl; + + unsigned int rc_page_count; + unsigned long rc_maxpages; + struct page *rc_pages[] __counted_by(rc_maxpages); +}; + +/* + * State for sending a Write chunk. + * - Tracks progress of writing one chunk over all its segments + * - Stores arguments for the SGL constructor functions + */ +struct svc_rdma_write_info { + struct svcxprt_rdma *wi_rdma; + + const struct svc_rdma_chunk *wi_chunk; + + /* write state of this chunk */ + unsigned int wi_seg_off; + unsigned int wi_seg_no; + + /* SGL constructor arguments */ + const struct xdr_buf *wi_xdr; + unsigned char *wi_base; + unsigned int wi_next_off; + + struct svc_rdma_chunk_ctxt wi_cc; + struct work_struct wi_work; }; struct svc_rdma_send_ctxt { - struct list_head sc_list; + struct llist_node sc_node; struct rpc_rdma_cid sc_cid; + struct work_struct sc_work; + struct svcxprt_rdma *sc_rdma; struct ib_send_wr sc_send_wr; + struct ib_send_wr *sc_wr_chain; + int sc_sqecount; struct ib_cqe sc_cqe; struct xdr_buf sc_hdrbuf; struct xdr_stream sc_stream; + struct svc_rdma_write_info sc_reply_info; void *sc_xprt_buf; int sc_page_count; int sc_cur_sge_no; - struct page *sc_pages[RPCSVC_MAXPAGES]; + unsigned long sc_maxpages; + struct page **sc_pages; struct ib_sge sc_sges[]; }; @@ -171,24 +257,36 @@ extern void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, /* svc_rdma_recvfrom.c */ extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma); extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma); +extern struct svc_rdma_recv_ctxt * + svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma); extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *ctxt); extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma); -extern void svc_rdma_release_rqst(struct svc_rqst *rqstp); +extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt); extern int svc_rdma_recvfrom(struct svc_rqst *); /* svc_rdma_rw.c */ +extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc); extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); -extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, - struct svc_rqst *rqstp, - struct svc_rdma_recv_ctxt *head, __be32 *p); -extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, - __be32 *wr_ch, struct xdr_buf *xdr, - unsigned int offset, - unsigned long length); -extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, - const struct svc_rdma_recv_ctxt *rctxt, - struct xdr_buf *xdr); +extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc); +extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc, + enum dma_data_direction dir); +extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma, + const struct svc_rdma_recv_ctxt *rctxt, + const struct xdr_buf *xdr); +extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma, + const struct svc_rdma_pcl *write_pcl, + const struct svc_rdma_pcl *reply_pcl, + struct svc_rdma_send_ctxt *sctxt, + const struct xdr_buf *xdr); +extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, + struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head); /* svc_rdma_sendto.c */ extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma); @@ -196,19 +294,21 @@ extern struct svc_rdma_send_ctxt * svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma); extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt); -extern int svc_rdma_send(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_post_send(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, - const struct svc_rdma_recv_ctxt *rctxt, - struct xdr_buf *xdr); + const struct svc_rdma_pcl *write_pcl, + const struct svc_rdma_pcl *reply_pcl, + const struct xdr_buf *xdr); extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, struct svc_rdma_recv_ctxt *rctxt, int status); +extern void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail); extern int svc_rdma_sendto(struct svc_rqst *); -extern int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset, - unsigned int length); +extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, + unsigned int length); /* svc_rdma_transport.c */ extern struct svc_xprt_class svc_rdma_class; diff --git a/include/linux/sunrpc/svc_rdma_pcl.h b/include/linux/sunrpc/svc_rdma_pcl.h new file mode 100644 index 000000000000..7516ad0fae80 --- /dev/null +++ b/include/linux/sunrpc/svc_rdma_pcl.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020, Oracle and/or its affiliates + */ + +#ifndef SVC_RDMA_PCL_H +#define SVC_RDMA_PCL_H + +#include <linux/list.h> + +struct svc_rdma_segment { + u32 rs_handle; + u32 rs_length; + u64 rs_offset; +}; + +struct svc_rdma_chunk { + struct list_head ch_list; + + u32 ch_position; + u32 ch_length; + u32 ch_payload_length; + + u32 ch_segcount; + struct svc_rdma_segment ch_segments[]; +}; + +struct svc_rdma_pcl { + unsigned int cl_count; + struct list_head cl_chunks; +}; + +/** + * pcl_init - Initialize a parsed chunk list + * @pcl: parsed chunk list to initialize + * + */ +static inline void pcl_init(struct svc_rdma_pcl *pcl) +{ + INIT_LIST_HEAD(&pcl->cl_chunks); +} + +/** + * pcl_is_empty - Return true if parsed chunk list is empty + * @pcl: parsed chunk list + * + */ +static inline bool pcl_is_empty(const struct svc_rdma_pcl *pcl) +{ + return list_empty(&pcl->cl_chunks); +} + +/** + * pcl_first_chunk - Return first chunk in a parsed chunk list + * @pcl: parsed chunk list + * + * Returns the first chunk in the list, or NULL if the list is empty. + */ +static inline struct svc_rdma_chunk * +pcl_first_chunk(const struct svc_rdma_pcl *pcl) +{ + if (pcl_is_empty(pcl)) + return NULL; + return list_first_entry(&pcl->cl_chunks, struct svc_rdma_chunk, + ch_list); +} + +/** + * pcl_next_chunk - Return next chunk in a parsed chunk list + * @pcl: a parsed chunk list + * @chunk: chunk in @pcl + * + * Returns the next chunk in the list, or NULL if @chunk is already last. + */ +static inline struct svc_rdma_chunk * +pcl_next_chunk(const struct svc_rdma_pcl *pcl, struct svc_rdma_chunk *chunk) +{ + if (list_is_last(&chunk->ch_list, &pcl->cl_chunks)) + return NULL; + return list_next_entry(chunk, ch_list); +} + +/** + * pcl_for_each_chunk - Iterate over chunks in a parsed chunk list + * @pos: the loop cursor + * @pcl: a parsed chunk list + */ +#define pcl_for_each_chunk(pos, pcl) \ + for (pos = list_first_entry(&(pcl)->cl_chunks, struct svc_rdma_chunk, ch_list); \ + &pos->ch_list != &(pcl)->cl_chunks; \ + pos = list_next_entry(pos, ch_list)) + +/** + * pcl_for_each_segment - Iterate over segments in a parsed chunk + * @pos: the loop cursor + * @chunk: a parsed chunk + */ +#define pcl_for_each_segment(pos, chunk) \ + for (pos = &(chunk)->ch_segments[0]; \ + pos <= &(chunk)->ch_segments[(chunk)->ch_segcount - 1]; \ + pos++) + +/** + * pcl_chunk_end_offset - Return offset of byte range following @chunk + * @chunk: chunk in @pcl + * + * Returns starting offset of the region just after @chunk + */ +static inline unsigned int +pcl_chunk_end_offset(const struct svc_rdma_chunk *chunk) +{ + return xdr_align_size(chunk->ch_position + chunk->ch_payload_length); +} + +struct svc_rdma_recv_ctxt; + +extern void pcl_free(struct svc_rdma_pcl *pcl); +extern bool pcl_alloc_call(struct svc_rdma_recv_ctxt *rctxt, __be32 *p); +extern bool pcl_alloc_read(struct svc_rdma_recv_ctxt *rctxt, __be32 *p); +extern bool pcl_alloc_write(struct svc_rdma_recv_ctxt *rctxt, + struct svc_rdma_pcl *pcl, __be32 *p); +extern int pcl_process_nonpayloads(const struct svc_rdma_pcl *pcl, + const struct xdr_buf *xdr, + int (*actor)(const struct xdr_buf *, + void *), + void *data); + +#endif /* SVC_RDMA_PCL_H */ diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index aca35ab5cff2..369a89aea186 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -21,13 +21,13 @@ struct svc_xprt_ops { int (*xpo_has_wspace)(struct svc_xprt *); int (*xpo_recvfrom)(struct svc_rqst *); int (*xpo_sendto)(struct svc_rqst *); - int (*xpo_read_payload)(struct svc_rqst *, unsigned int, - unsigned int); - void (*xpo_release_rqst)(struct svc_rqst *); + int (*xpo_result_payload)(struct svc_rqst *, unsigned int, + unsigned int); + void (*xpo_release_ctxt)(struct svc_xprt *xprt, void *ctxt); void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); - void (*xpo_secure_port)(struct svc_rqst *rqstp); void (*xpo_kill_temp_xprt)(struct svc_xprt *); + void (*xpo_handshake)(struct svc_xprt *xprt); }; struct svc_xprt_class { @@ -53,23 +53,10 @@ struct svc_xprt { struct svc_xprt_class *xpt_class; const struct svc_xprt_ops *xpt_ops; struct kref xpt_ref; + ktime_t xpt_qtime; struct list_head xpt_list; - struct list_head xpt_ready; + struct lwq_node xpt_ready; unsigned long xpt_flags; -#define XPT_BUSY 0 /* enqueued/receiving */ -#define XPT_CONN 1 /* conn pending */ -#define XPT_CLOSE 2 /* dead or dying */ -#define XPT_DATA 3 /* data pending */ -#define XPT_TEMP 4 /* connected transport */ -#define XPT_DEAD 6 /* transport closed */ -#define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */ -#define XPT_DEFERRED 8 /* deferred request pending */ -#define XPT_OLD 9 /* used for xprt aging mark+sweep */ -#define XPT_LISTENER 10 /* listening endpoint */ -#define XPT_CACHE_AUTH 11 /* cache auth info */ -#define XPT_LOCAL 12 /* connection from loopback interface */ -#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ -#define XPT_CONG_CTRL 14 /* has congestion control */ struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ @@ -88,11 +75,55 @@ struct svc_xprt { struct list_head xpt_users; /* callbacks on free */ struct net *xpt_net; + netns_tracker ns_tracker; const struct cred *xpt_cred; struct rpc_xprt *xpt_bc_xprt; /* NFSv4.1 backchannel */ struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */ }; +/* flag bits for xpt_flags */ +enum { + XPT_BUSY, /* enqueued/receiving */ + XPT_CONN, /* conn pending */ + XPT_CLOSE, /* dead or dying */ + XPT_DATA, /* data pending */ + XPT_TEMP, /* connected transport */ + XPT_DEAD, /* transport closed */ + XPT_CHNGBUF, /* need to change snd/rcv buf sizes */ + XPT_DEFERRED, /* deferred request pending */ + XPT_OLD, /* used for xprt aging mark+sweep */ + XPT_LISTENER, /* listening endpoint */ + XPT_CACHE_AUTH, /* cache auth info */ + XPT_LOCAL, /* connection from loopback interface */ + XPT_KILL_TEMP, /* call xpo_kill_temp_xprt before closing */ + XPT_CONG_CTRL, /* has congestion control */ + XPT_HANDSHAKE, /* xprt requests a handshake */ + XPT_TLS_SESSION, /* transport-layer security established */ + XPT_PEER_AUTH, /* peer has been authenticated */ + XPT_PEER_VALID, /* peer has presented a filehandle that + * it has access to. It is NOT counted + * in ->sv_tmpcnt. + */ +}; + +/* + * Maximum number of "tmp" connections - those without XPT_PEER_VALID - + * permitted on any service. + */ +#define XPT_MAX_TMP_CONN 64 + +static inline void svc_xprt_set_valid(struct svc_xprt *xpt) +{ + if (test_bit(XPT_TEMP, &xpt->xpt_flags) && + !test_and_set_bit(XPT_PEER_VALID, &xpt->xpt_flags)) { + struct svc_serv *serv = xpt->xpt_server; + + spin_lock(&serv->sv_lock); + serv->sv_tmpcnt -= 1; + spin_unlock(&serv->sv_lock); + } +} + static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u) { spin_lock(&xpt->xpt_lock); @@ -127,22 +158,30 @@ int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); -int svc_create_xprt(struct svc_serv *, const char *, struct net *, - const int, const unsigned short, int, - const struct cred *); -void svc_xprt_do_enqueue(struct svc_xprt *xprt); +int svc_xprt_create_from_sa(struct svc_serv *serv, const char *xprt_name, + struct net *net, struct sockaddr *sap, + int flags, const struct cred *cred); +int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, + struct net *net, const int family, + const unsigned short port, int flags, + const struct cred *cred); +void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net); +void svc_xprt_received(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); -void svc_close_xprt(struct svc_xprt *xprt); +void svc_xprt_close(struct svc_xprt *xprt); int svc_port_is_privileged(struct sockaddr *sin); int svc_print_xprts(char *buf, int maxlen); +struct svc_xprt *svc_find_listener(struct svc_serv *serv, const char *xcl_name, + struct net *net, const struct sockaddr *sa); struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, struct net *net, const sa_family_t af, const unsigned short port); int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen); void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *xprt); void svc_age_temp_xprts_now(struct svc_serv *, struct sockaddr *); +void svc_xprt_deferred_close(struct svc_xprt *xprt); static inline void svc_xprt_get(struct svc_xprt *xprt) { diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index b0003866a249..2e111153f7cd 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -14,6 +14,7 @@ #include <linux/sunrpc/msg_prot.h> #include <linux/sunrpc/cache.h> #include <linux/sunrpc/gss_api.h> +#include <linux/sunrpc/clnt.h> #include <linux/hash.h> #include <linux/stringhash.h> #include <linux/cred.h> @@ -83,6 +84,19 @@ struct auth_domain { struct rcu_head rcu_head; }; +enum svc_auth_status { + SVC_GARBAGE = 1, + SVC_SYSERR, + SVC_VALID, + SVC_NEGATIVE, + SVC_OK, + SVC_DROP, + SVC_CLOSE, + SVC_DENIED, + SVC_PENDING, + SVC_COMPLETE, +}; + /* * Each authentication flavour registers an auth_ops * structure. @@ -98,6 +112,8 @@ struct auth_domain { * is (probably) already in place. Certainly space is * reserved for it. * DROP - simply drop the request. It may have been deferred + * CLOSE - like SVC_DROP, but request is definitely lost. + * If there is a tcp connection, it should be closed. * GARBAGE - rpc garbage_args error * SYSERR - rpc system_err error * DENIED - authp holds reason for denial. @@ -111,60 +127,48 @@ struct auth_domain { * * release() is given a request after the procedure has been run. * It should sign/encrypt the results if needed - * It should return: - * OK - the resbuf is ready to be sent - * DROP - the reply should be quitely dropped - * DENIED - authp holds a reason for MSG_DENIED - * SYSERR - rpc system_err * * domain_release() * This call releases a domain. + * * set_client() - * Givens a pending request (struct svc_rqst), finds and assigns + * Given a pending request (struct svc_rqst), finds and assigns * an appropriate 'auth_domain' as the client. + * + * pseudoflavor() + * Returns RPC_AUTH pseudoflavor in use by @rqstp. */ struct auth_ops { char * name; struct module *owner; int flavour; - int (*accept)(struct svc_rqst *rq, __be32 *authp); - int (*release)(struct svc_rqst *rq); - void (*domain_release)(struct auth_domain *); - int (*set_client)(struct svc_rqst *rq); -}; -#define SVC_GARBAGE 1 -#define SVC_SYSERR 2 -#define SVC_VALID 3 -#define SVC_NEGATIVE 4 -#define SVC_OK 5 -#define SVC_DROP 6 -#define SVC_CLOSE 7 /* Like SVC_DROP, but request is definitely - * lost so if there is a tcp connection, it - * should be closed - */ -#define SVC_DENIED 8 -#define SVC_PENDING 9 -#define SVC_COMPLETE 10 + enum svc_auth_status (*accept)(struct svc_rqst *rqstp); + int (*release)(struct svc_rqst *rqstp); + void (*domain_release)(struct auth_domain *dom); + enum svc_auth_status (*set_client)(struct svc_rqst *rqstp); + rpc_authflavor_t (*pseudoflavor)(struct svc_rqst *rqstp); +}; struct svc_xprt; -extern int svc_authenticate(struct svc_rqst *rqstp, __be32 *authp); +extern rpc_authflavor_t svc_auth_flavor(struct svc_rqst *rqstp); extern int svc_authorise(struct svc_rqst *rqstp); -extern int svc_set_client(struct svc_rqst *rqstp); +extern enum svc_auth_status svc_set_client(struct svc_rqst *rqstp); extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops); extern void svc_auth_unregister(rpc_authflavor_t flavor); +extern void svcauth_map_clnt_to_svc_cred_local(struct rpc_clnt *clnt, + const struct cred *, + struct svc_cred *); + extern struct auth_domain *unix_domain_find(char *name); extern void auth_domain_put(struct auth_domain *item); -extern int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom); extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new); extern struct auth_domain *auth_domain_find(char *name); -extern struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr); -extern int auth_unix_forget_old(struct auth_domain *dom); extern void svcauth_unix_purge(struct net *net); extern void svcauth_unix_info_release(struct svc_xprt *xpt); -extern int svcauth_unix_set_client(struct svc_rqst *rqstp); +extern enum svc_auth_status svcauth_unix_set_client(struct svc_rqst *rqstp); extern int unix_gid_cache_create(struct net *net); extern void unix_gid_cache_destroy(struct net *net); diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index b7ac7fe68306..963bbe251e52 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -36,7 +36,13 @@ struct svc_sock { * received so far in the fragments making up this rpc: */ u32 sk_datalen; - struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ + struct page_frag_cache sk_frag_cache; + + struct completion sk_handshake_done; + + /* received data */ + unsigned long sk_maxpages; + struct page * sk_pages[] __counted_by(sk_maxpages); }; static inline u32 svc_sock_reclen(struct svc_sock *svsk) @@ -52,19 +58,13 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk) /* * Function prototypes. */ -void svc_close_net(struct svc_serv *, struct net *); -int svc_recv(struct svc_rqst *, long); -int svc_send(struct svc_rqst *); -void svc_drop(struct svc_rqst *); -void svc_sock_update_bufs(struct svc_serv *serv); -bool svc_alien_sock(struct net *net, int fd); -int svc_addsock(struct svc_serv *serv, const int fd, - char *name_return, const size_t len, - const struct cred *cred); +void svc_recv(struct svc_rqst *rqstp); +void svc_send(struct svc_rqst *rqstp); +int svc_addsock(struct svc_serv *serv, struct net *net, + const int fd, char *name_return, const size_t len, + const struct cred *cred); void svc_init_xprt_sock(void); void svc_cleanup_xprt_sock(void); -struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot); -void svc_sock_destroy(struct svc_xprt *); /* * svc_makesock socket characteristics diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 9548d075e06d..a2ab813a9800 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -13,20 +13,26 @@ #include <linux/uio.h> #include <asm/byteorder.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/scatterlist.h> struct bio_vec; struct rpc_rqst; /* + * Size of an XDR encoding unit in bytes, i.e. 32 bits, + * as defined in Section 3 of RFC 4506. All encoded + * XDR data items are aligned on a boundary of 32 bits. + */ +#define XDR_UNIT sizeof(__be32) + +/* * Buffer adjustment */ #define XDR_QUADLEN(l) (((l) + 3) >> 2) /* - * Generic opaque `network object.' At the kernel level, this type - * is used only by lockd. + * Generic opaque `network object.' */ #define XDR_MAX_NETOBJ 1024 struct xdr_netobj { @@ -89,6 +95,7 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) #define rpc_auth_unix cpu_to_be32(RPC_AUTH_UNIX) #define rpc_auth_short cpu_to_be32(RPC_AUTH_SHORT) #define rpc_auth_gss cpu_to_be32(RPC_AUTH_GSS) +#define rpc_auth_tls cpu_to_be32(RPC_AUTH_TLS) #define rpc_call cpu_to_be32(RPC_CALL) #define rpc_reply cpu_to_be32(RPC_REPLY) @@ -128,10 +135,12 @@ __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); void xdr_inline_pages(struct xdr_buf *, unsigned int, struct page **, unsigned int, unsigned int); -void xdr_terminate_string(struct xdr_buf *, const u32); -size_t xdr_buf_pagecount(struct xdr_buf *buf); +void xdr_terminate_string(const struct xdr_buf *, const u32); +size_t xdr_buf_pagecount(const struct xdr_buf *buf); int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp); void xdr_free_bvec(struct xdr_buf *buf); +unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size, + const struct xdr_buf *xdr); static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len) { @@ -181,15 +190,14 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) /* * XDR buffer helper functions */ -extern void xdr_shift_buf(struct xdr_buf *, size_t); -extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); -extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); +extern void xdr_buf_from_iov(const struct kvec *, struct xdr_buf *); +extern int xdr_buf_subsegment(const struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); extern void xdr_buf_trim(struct xdr_buf *, unsigned int); -extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); -extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); +extern int read_bytes_from_xdr_buf(const struct xdr_buf *, unsigned int, void *, unsigned int); +extern int write_bytes_to_xdr_buf(const struct xdr_buf *, unsigned int, void *, unsigned int); -extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32); -extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *); +extern int xdr_encode_word(const struct xdr_buf *, unsigned int, u32); +extern int xdr_decode_word(const struct xdr_buf *, unsigned int, u32 *); struct xdr_array2_desc; typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem); @@ -200,9 +208,9 @@ struct xdr_array2_desc { xdr_xcode_elem_t xcode; }; -extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base, +extern int xdr_decode_array2(const struct xdr_buf *buf, unsigned int base, struct xdr_array2_desc *desc); -extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base, +extern int xdr_encode_array2(const struct xdr_buf *buf, unsigned int base, struct xdr_array2_desc *desc); extern void _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len); @@ -218,6 +226,7 @@ struct xdr_stream { struct kvec *iov; /* pointer to the current kvec */ struct kvec scratch; /* Scratch buffer */ struct page **page_ptr; /* pointer to the current page */ + void *page_kaddr; /* kmapped address of the current page */ unsigned int nwords; /* Remaining decode buffer length */ struct rpc_rqst *rqst; /* For debugging */ @@ -233,11 +242,13 @@ typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); +extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, + struct page **pages, struct rpc_rqst *rqst); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); -extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, - size_t nbytes); -extern void xdr_commit_encode(struct xdr_stream *xdr); +extern int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes); +extern void __xdr_commit_encode(struct xdr_stream *xdr); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); +extern void xdr_truncate_decode(struct xdr_stream *xdr, size_t len); extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); @@ -247,13 +258,75 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, struct page **pages, unsigned int len); -extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); +extern void xdr_finish_decode(struct xdr_stream *xdr); extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); -extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); -extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t); -extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); +extern int xdr_process_buf(const struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); +extern void xdr_set_pagelen(struct xdr_stream *, unsigned int len); +extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, + unsigned int len); +extern unsigned int xdr_stream_move_subsegment(struct xdr_stream *xdr, unsigned int offset, + unsigned int target, unsigned int length); +extern unsigned int xdr_stream_zero(struct xdr_stream *xdr, unsigned int offset, + unsigned int length); + +/** + * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. + * @xdr: pointer to xdr_stream struct + * @buf: pointer to an empty buffer + * @buflen: size of 'buf' + * + * The scratch buffer is used when decoding from an array of pages. + * If an xdr_inline_decode() call spans across page boundaries, then + * we copy the data into the scratch buffer in order to allow linear + * access. + */ +static inline void +xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen) +{ + xdr->scratch.iov_base = buf; + xdr->scratch.iov_len = buflen; +} + +/** + * xdr_set_scratch_page - Attach a scratch buffer for decoding data + * @xdr: pointer to xdr_stream struct + * @page: an anonymous page + * + * See xdr_set_scratch_buffer(). + */ +static inline void +xdr_set_scratch_page(struct xdr_stream *xdr, struct page *page) +{ + xdr_set_scratch_buffer(xdr, page_address(page), PAGE_SIZE); +} + +/** + * xdr_reset_scratch_buffer - Clear scratch buffer information + * @xdr: pointer to xdr_stream struct + * + * See xdr_set_scratch_buffer(). + */ +static inline void +xdr_reset_scratch_buffer(struct xdr_stream *xdr) +{ + xdr_set_scratch_buffer(xdr, NULL, 0); +} + +/** + * xdr_commit_encode - Ensure all data is written to xdr->buf + * @xdr: pointer to xdr_stream + * + * Handle encoding across page boundaries by giving the caller a + * temporary location to write to, then later copying the data into + * place. __xdr_commit_encode() does that copying. + */ +static inline void xdr_commit_encode(struct xdr_stream *xdr) +{ + if (unlikely(xdr->scratch.iov_len)) + __xdr_commit_encode(xdr); +} /** * xdr_stream_remaining - Return the number of bytes remaining in the stream @@ -276,6 +349,11 @@ ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, size_t size); ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, size_t maxlen, gfp_t gfp_flags); +ssize_t xdr_stream_decode_opaque_auth(struct xdr_stream *xdr, u32 *flavor, + void **body, unsigned int *body_len); +ssize_t xdr_stream_encode_opaque_auth(struct xdr_stream *xdr, u32 flavor, + void *body, unsigned int body_len); + /** * xdr_align_size - Calculate padded size of an object * @n: Size of an object being XDR encoded (in bytes) @@ -286,7 +364,7 @@ ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, static inline size_t xdr_align_size(size_t n) { - const size_t mask = sizeof(__u32) - 1; + const size_t mask = XDR_UNIT - 1; return (n + mask) & ~mask; } @@ -316,7 +394,7 @@ static inline size_t xdr_pad_size(size_t n) */ static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr) { - const size_t len = sizeof(__be32); + const size_t len = XDR_UNIT; __be32 *p = xdr_reserve_space(xdr, len); if (unlikely(!p)) @@ -335,7 +413,7 @@ static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr) */ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) { - const size_t len = sizeof(__be32); + const size_t len = XDR_UNIT; __be32 *p = xdr_reserve_space(xdr, len); if (unlikely(!p)) @@ -345,6 +423,40 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) } /** + * xdr_encode_bool - Encode a boolean item + * @p: address in a buffer into which to encode + * @n: boolean value to encode + * + * Return value: + * Address of item following the encoded boolean + */ +static inline __be32 *xdr_encode_bool(__be32 *p, u32 n) +{ + *p++ = n ? xdr_one : xdr_zero; + return p; +} + +/** + * xdr_stream_encode_bool - Encode a boolean item + * @xdr: pointer to xdr_stream + * @n: boolean value to encode + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline int xdr_stream_encode_bool(struct xdr_stream *xdr, __u32 n) +{ + const size_t len = XDR_UNIT; + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + xdr_encode_bool(p, n); + return len; +} + +/** * xdr_stream_encode_u32 - Encode a 32-bit integer * @xdr: pointer to xdr_stream * @n: integer to encode @@ -366,6 +478,27 @@ xdr_stream_encode_u32(struct xdr_stream *xdr, __u32 n) } /** + * xdr_stream_encode_be32 - Encode a big-endian 32-bit integer + * @xdr: pointer to xdr_stream + * @n: integer to encode + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_be32(struct xdr_stream *xdr, __be32 n) +{ + const size_t len = sizeof(n); + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + *p = n; + return len; +} + +/** * xdr_stream_encode_u64 - Encode a 64-bit integer * @xdr: pointer to xdr_stream * @n: 64-bit integer to encode @@ -506,6 +639,27 @@ static inline bool xdr_item_is_present(const __be32 *p) } /** + * xdr_stream_decode_bool - Decode a boolean + * @xdr: pointer to xdr_stream + * @ptr: pointer to a u32 in which to store the result + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_bool(struct xdr_stream *xdr, __u32 *ptr) +{ + const size_t count = sizeof(*ptr); + __be32 *p = xdr_inline_decode(xdr, count); + + if (unlikely(!p)) + return -EBADMSG; + *ptr = (*p != xdr_zero); + return 0; +} + +/** * xdr_stream_decode_u32 - Decode a 32-bit integer * @xdr: pointer to xdr_stream * @ptr: location to store integer @@ -527,6 +681,48 @@ xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr) } /** + * xdr_stream_decode_be32 - Decode a big-endian 32-bit integer + * @xdr: pointer to xdr_stream + * @ptr: location to store integer + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_be32(struct xdr_stream *xdr, __be32 *ptr) +{ + const size_t count = sizeof(*ptr); + __be32 *p = xdr_inline_decode(xdr, count); + + if (unlikely(!p)) + return -EBADMSG; + *ptr = *p; + return 0; +} + +/** + * xdr_stream_decode_u64 - Decode a 64-bit integer + * @xdr: pointer to xdr_stream + * @ptr: location to store 64-bit integer + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr) +{ + const size_t count = sizeof(*ptr); + __be32 *p = xdr_inline_decode(xdr, count); + + if (unlikely(!p)) + return -EBADMSG; + xdr_decode_hyper(p, ptr); + return 0; +} + +/** * xdr_stream_decode_opaque_fixed - Decode fixed length opaque xdr data * @xdr: pointer to xdr_stream * @ptr: location to store data @@ -604,6 +800,8 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; + if (U32_MAX >= SIZE_MAX / sizeof(*p) && len > SIZE_MAX / sizeof(*p)) + return -EBADMSG; p = xdr_inline_decode(xdr, len * sizeof(*p)); if (unlikely(!p)) return -EBADMSG; diff --git a/include/linux/sunrpc/xdrgen/_builtins.h b/include/linux/sunrpc/xdrgen/_builtins.h new file mode 100644 index 000000000000..66ca3ece951a --- /dev/null +++ b/include/linux/sunrpc/xdrgen/_builtins.h @@ -0,0 +1,243 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024 Oracle and/or its affiliates. + * + * This header defines XDR data type primitives specified in + * Section 4 of RFC 4506, used by RPC programs implemented + * in the Linux kernel. + */ + +#ifndef _SUNRPC_XDRGEN__BUILTINS_H_ +#define _SUNRPC_XDRGEN__BUILTINS_H_ + +#include <linux/sunrpc/xdr.h> + +static inline bool +xdrgen_decode_void(struct xdr_stream *xdr) +{ + return true; +} + +static inline bool +xdrgen_encode_void(struct xdr_stream *xdr) +{ + return true; +} + +static inline bool +xdrgen_decode_bool(struct xdr_stream *xdr, bool *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = (*p != xdr_zero); + return true; +} + +static inline bool +xdrgen_encode_bool(struct xdr_stream *xdr, bool val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = val ? xdr_one : xdr_zero; + return true; +} + +static inline bool +xdrgen_decode_int(struct xdr_stream *xdr, s32 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = be32_to_cpup(p); + return true; +} + +static inline bool +xdrgen_encode_int(struct xdr_stream *xdr, s32 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = cpu_to_be32(val); + return true; +} + +static inline bool +xdrgen_decode_unsigned_int(struct xdr_stream *xdr, u32 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = be32_to_cpup(p); + return true; +} + +static inline bool +xdrgen_encode_unsigned_int(struct xdr_stream *xdr, u32 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = cpu_to_be32(val); + return true; +} + +static inline bool +xdrgen_decode_long(struct xdr_stream *xdr, s32 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = be32_to_cpup(p); + return true; +} + +static inline bool +xdrgen_encode_long(struct xdr_stream *xdr, s32 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = cpu_to_be32(val); + return true; +} + +static inline bool +xdrgen_decode_unsigned_long(struct xdr_stream *xdr, u32 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = be32_to_cpup(p); + return true; +} + +static inline bool +xdrgen_encode_unsigned_long(struct xdr_stream *xdr, u32 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = cpu_to_be32(val); + return true; +} + +static inline bool +xdrgen_decode_hyper(struct xdr_stream *xdr, s64 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT * 2); + + if (unlikely(!p)) + return false; + *ptr = get_unaligned_be64(p); + return true; +} + +static inline bool +xdrgen_encode_hyper(struct xdr_stream *xdr, s64 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT * 2); + + if (unlikely(!p)) + return false; + put_unaligned_be64(val, p); + return true; +} + +static inline bool +xdrgen_decode_unsigned_hyper(struct xdr_stream *xdr, u64 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT * 2); + + if (unlikely(!p)) + return false; + *ptr = get_unaligned_be64(p); + return true; +} + +static inline bool +xdrgen_encode_unsigned_hyper(struct xdr_stream *xdr, u64 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT * 2); + + if (unlikely(!p)) + return false; + put_unaligned_be64(val, p); + return true; +} + +static inline bool +xdrgen_decode_string(struct xdr_stream *xdr, string *ptr, u32 maxlen) +{ + __be32 *p; + u32 len; + + if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) + return false; + if (unlikely(maxlen && len > maxlen)) + return false; + if (len != 0) { + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + return false; + ptr->data = (unsigned char *)p; + } + ptr->len = len; + return true; +} + +static inline bool +xdrgen_encode_string(struct xdr_stream *xdr, string val, u32 maxlen) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT + xdr_align_size(val.len)); + + if (unlikely(!p)) + return false; + xdr_encode_opaque(p, val.data, val.len); + return true; +} + +static inline bool +xdrgen_decode_opaque(struct xdr_stream *xdr, opaque *ptr, u32 maxlen) +{ + __be32 *p; + u32 len; + + if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) + return false; + if (unlikely(maxlen && len > maxlen)) + return false; + if (len != 0) { + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + return false; + ptr->data = (u8 *)p; + } + ptr->len = len; + return true; +} + +static inline bool +xdrgen_encode_opaque(struct xdr_stream *xdr, opaque val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT + xdr_align_size(val.len)); + + if (unlikely(!p)) + return false; + xdr_encode_opaque(p, val.data, val.len); + return true; +} + +#endif /* _SUNRPC_XDRGEN__BUILTINS_H_ */ diff --git a/include/linux/sunrpc/xdrgen/_defs.h b/include/linux/sunrpc/xdrgen/_defs.h new file mode 100644 index 000000000000..20c7270aa64d --- /dev/null +++ b/include/linux/sunrpc/xdrgen/_defs.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024 Oracle and/or its affiliates. + * + * This header defines XDR data type primitives specified in + * Section 4 of RFC 4506, used by RPC programs implemented + * in the Linux kernel. + */ + +#ifndef _SUNRPC_XDRGEN__DEFS_H_ +#define _SUNRPC_XDRGEN__DEFS_H_ + +#define TRUE (true) +#define FALSE (false) + +typedef struct { + u32 len; + unsigned char *data; +} string; + +typedef struct { + u32 len; + u8 *data; +} opaque; + +#define XDR_void (0) +#define XDR_bool (1) +#define XDR_int (1) +#define XDR_unsigned_int (1) +#define XDR_long (1) +#define XDR_unsigned_long (1) +#define XDR_hyper (2) +#define XDR_unsigned_hyper (2) + +#endif /* _SUNRPC_XDRGEN__DEFS_H_ */ diff --git a/include/linux/sunrpc/xdrgen/nfs4_1.h b/include/linux/sunrpc/xdrgen/nfs4_1.h new file mode 100644 index 000000000000..cf21a14aa885 --- /dev/null +++ b/include/linux/sunrpc/xdrgen/nfs4_1.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Generated by xdrgen. Manual edits will be lost. */ +/* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */ +/* XDR specification modification time: Mon Oct 14 09:10:13 2024 */ + +#ifndef _LINUX_XDRGEN_NFS4_1_DEF_H +#define _LINUX_XDRGEN_NFS4_1_DEF_H + +#include <linux/types.h> +#include <linux/sunrpc/xdrgen/_defs.h> + +typedef s64 int64_t; + +typedef u32 uint32_t; + +typedef struct { + u32 count; + uint32_t *element; +} bitmap4; + +struct nfstime4 { + int64_t seconds; + uint32_t nseconds; +}; + +typedef bool fattr4_offline; + +enum { FATTR4_OFFLINE = 83 }; + +struct open_arguments4 { + bitmap4 oa_share_access; + bitmap4 oa_share_deny; + bitmap4 oa_share_access_want; + bitmap4 oa_open_claim; + bitmap4 oa_create_mode; +}; + +enum open_args_share_access4 { + OPEN_ARGS_SHARE_ACCESS_READ = 1, + OPEN_ARGS_SHARE_ACCESS_WRITE = 2, + OPEN_ARGS_SHARE_ACCESS_BOTH = 3, +}; +typedef enum open_args_share_access4 open_args_share_access4; + +enum open_args_share_deny4 { + OPEN_ARGS_SHARE_DENY_NONE = 0, + OPEN_ARGS_SHARE_DENY_READ = 1, + OPEN_ARGS_SHARE_DENY_WRITE = 2, + OPEN_ARGS_SHARE_DENY_BOTH = 3, +}; +typedef enum open_args_share_deny4 open_args_share_deny4; + +enum open_args_share_access_want4 { + OPEN_ARGS_SHARE_ACCESS_WANT_ANY_DELEG = 3, + OPEN_ARGS_SHARE_ACCESS_WANT_NO_DELEG = 4, + OPEN_ARGS_SHARE_ACCESS_WANT_CANCEL = 5, + OPEN_ARGS_SHARE_ACCESS_WANT_SIGNAL_DELEG_WHEN_RESRC_AVAIL = 17, + OPEN_ARGS_SHARE_ACCESS_WANT_PUSH_DELEG_WHEN_UNCONTENDED = 18, + OPEN_ARGS_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS = 20, + OPEN_ARGS_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION = 21, +}; +typedef enum open_args_share_access_want4 open_args_share_access_want4; + +enum open_args_open_claim4 { + OPEN_ARGS_OPEN_CLAIM_NULL = 0, + OPEN_ARGS_OPEN_CLAIM_PREVIOUS = 1, + OPEN_ARGS_OPEN_CLAIM_DELEGATE_CUR = 2, + OPEN_ARGS_OPEN_CLAIM_DELEGATE_PREV = 3, + OPEN_ARGS_OPEN_CLAIM_FH = 4, + OPEN_ARGS_OPEN_CLAIM_DELEG_CUR_FH = 5, + OPEN_ARGS_OPEN_CLAIM_DELEG_PREV_FH = 6, +}; +typedef enum open_args_open_claim4 open_args_open_claim4; + +enum open_args_createmode4 { + OPEN_ARGS_CREATEMODE_UNCHECKED4 = 0, + OPEN_ARGS_CREATE_MODE_GUARDED = 1, + OPEN_ARGS_CREATEMODE_EXCLUSIVE4 = 2, + OPEN_ARGS_CREATE_MODE_EXCLUSIVE4_1 = 3, +}; +typedef enum open_args_createmode4 open_args_createmode4; + +typedef struct open_arguments4 fattr4_open_arguments; + +enum { FATTR4_OPEN_ARGUMENTS = 86 }; + +enum { OPEN4_RESULT_NO_OPEN_STATEID = 0x00000010 }; + +typedef struct nfstime4 fattr4_time_deleg_access; + +typedef struct nfstime4 fattr4_time_deleg_modify; + +enum { FATTR4_TIME_DELEG_ACCESS = 84 }; + +enum { FATTR4_TIME_DELEG_MODIFY = 85 }; + +enum { OPEN4_SHARE_ACCESS_WANT_DELEG_MASK = 0xFF00 }; + +enum { OPEN4_SHARE_ACCESS_WANT_NO_PREFERENCE = 0x0000 }; + +enum { OPEN4_SHARE_ACCESS_WANT_READ_DELEG = 0x0100 }; + +enum { OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG = 0x0200 }; + +enum { OPEN4_SHARE_ACCESS_WANT_ANY_DELEG = 0x0300 }; + +enum { OPEN4_SHARE_ACCESS_WANT_NO_DELEG = 0x0400 }; + +enum { OPEN4_SHARE_ACCESS_WANT_CANCEL = 0x0500 }; + +enum { OPEN4_SHARE_ACCESS_WANT_SIGNAL_DELEG_WHEN_RESRC_AVAIL = 0x10000 }; + +enum { OPEN4_SHARE_ACCESS_WANT_PUSH_DELEG_WHEN_UNCONTENDED = 0x20000 }; + +enum { OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS = 0x100000 }; + +enum { OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION = 0x200000 }; + +enum open_delegation_type4 { + OPEN_DELEGATE_NONE = 0, + OPEN_DELEGATE_READ = 1, + OPEN_DELEGATE_WRITE = 2, + OPEN_DELEGATE_NONE_EXT = 3, + OPEN_DELEGATE_READ_ATTRS_DELEG = 4, + OPEN_DELEGATE_WRITE_ATTRS_DELEG = 5, +}; +typedef enum open_delegation_type4 open_delegation_type4; + +#define NFS4_int64_t_sz \ + (XDR_hyper) +#define NFS4_uint32_t_sz \ + (XDR_unsigned_int) +#define NFS4_bitmap4_sz (XDR_unsigned_int) +#define NFS4_nfstime4_sz \ + (NFS4_int64_t_sz + NFS4_uint32_t_sz) +#define NFS4_fattr4_offline_sz \ + (XDR_bool) +#define NFS4_open_arguments4_sz \ + (NFS4_bitmap4_sz + NFS4_bitmap4_sz + NFS4_bitmap4_sz + NFS4_bitmap4_sz + NFS4_bitmap4_sz) +#define NFS4_open_args_share_access4_sz (XDR_int) +#define NFS4_open_args_share_deny4_sz (XDR_int) +#define NFS4_open_args_share_access_want4_sz (XDR_int) +#define NFS4_open_args_open_claim4_sz (XDR_int) +#define NFS4_open_args_createmode4_sz (XDR_int) +#define NFS4_fattr4_open_arguments_sz \ + (NFS4_open_arguments4_sz) +#define NFS4_fattr4_time_deleg_access_sz \ + (NFS4_nfstime4_sz) +#define NFS4_fattr4_time_deleg_modify_sz \ + (NFS4_nfstime4_sz) +#define NFS4_open_delegation_type4_sz (XDR_int) + +#endif /* _LINUX_XDRGEN_NFS4_1_DEF_H */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a603d48d2b2c..f46d1fb8f71a 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -30,16 +30,7 @@ #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) -/* - * This describes a timeout strategy - */ -struct rpc_timeout { - unsigned long to_initval, /* initial timeout */ - to_maxval, /* max timeout */ - to_increment; /* if !exponential */ - unsigned int to_retries; /* max # of retries */ - unsigned char to_exponential; -}; +#define RPC_GSS_SEQNO_ARRAY_SIZE 3U enum rpc_display_format_t { RPC_DISPLAY_ADDR = 0, @@ -53,9 +44,11 @@ enum rpc_display_format_t { struct rpc_task; struct rpc_xprt; +struct xprt_class; struct seq_file; struct svc_serv; struct net; +#include <linux/lwq.h> /* * This describes a complete RPC request @@ -75,7 +68,8 @@ struct rpc_rqst { struct rpc_cred * rq_cred; /* Bound cred */ __be32 rq_xid; /* request XID */ int rq_cong; /* has incremented xprt->cong */ - u32 rq_seqno; /* gss seq no. used on req. */ + u32 rq_seqnos[RPC_GSS_SEQNO_ARRAY_SIZE]; /* past gss req seq nos. */ + unsigned int rq_seqno_count; /* number of entries in rq_seqnos */ int rq_enc_pages_num; struct page **rq_enc_pages; /* scratch pages for use by gss privacy code */ @@ -120,7 +114,7 @@ struct rpc_rqst { int rq_ntrans; #if defined(CONFIG_SUNRPC_BACKCHANNEL) - struct list_head rq_bc_list; /* Callback service list */ + struct lwq_node rq_bc_list; /* Callback service list */ unsigned long rq_bc_pa_state; /* Backchannel prealloc state */ struct list_head rq_bc_pa_list; /* Backchannel prealloc list */ #endif /* CONFIG_SUNRPC_BACKCHANEL */ @@ -128,6 +122,33 @@ struct rpc_rqst { #define rq_svec rq_snd_buf.head #define rq_slen rq_snd_buf.len +static inline int xprt_rqst_add_seqno(struct rpc_rqst *req, u32 seqno) +{ + if (likely(req->rq_seqno_count < RPC_GSS_SEQNO_ARRAY_SIZE)) + req->rq_seqno_count++; + + /* Shift array to make room for the newest element at the beginning */ + memmove(&req->rq_seqnos[1], &req->rq_seqnos[0], + (RPC_GSS_SEQNO_ARRAY_SIZE - 1) * sizeof(req->rq_seqnos[0])); + req->rq_seqnos[0] = seqno; + return 0; +} + +/* RPC transport layer security policies */ +enum xprtsec_policies { + RPC_XPRTSEC_NONE = 0, + RPC_XPRTSEC_TLS_ANON, + RPC_XPRTSEC_TLS_X509, +}; + +struct xprtsec_parms { + enum xprtsec_policies policy; + + /* authentication material */ + key_serial_t cert_serial; + key_serial_t privkey_serial; +}; + struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); @@ -138,10 +159,15 @@ struct rpc_xprt_ops { void (*rpcbind)(struct rpc_task *task); void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); + int (*get_srcaddr)(struct rpc_xprt *xprt, char *buf, + size_t buflen); + unsigned short (*get_srcport)(struct rpc_xprt *xprt); int (*buf_alloc)(struct rpc_task *task); void (*buf_free)(struct rpc_task *task); - void (*prepare_request)(struct rpc_rqst *req); + int (*prepare_request)(struct rpc_rqst *req, + struct xdr_buf *buf); int (*send_request)(struct rpc_rqst *req); + void (*abort_send_request)(struct rpc_rqst *req); void (*wait_for_reply_request)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_request)(struct rpc_task *task); @@ -180,11 +206,14 @@ enum xprt_transports { XPRT_TRANSPORT_RDMA = 256, XPRT_TRANSPORT_BC_RDMA = XPRT_TRANSPORT_RDMA | XPRT_TRANSPORT_BC, XPRT_TRANSPORT_LOCAL = 257, + XPRT_TRANSPORT_TCP_TLS = 258, }; +struct rpc_sysfs_xprt; struct rpc_xprt { struct kref kref; /* Reference count */ const struct rpc_xprt_ops *ops; /* transport methods */ + unsigned int id; /* transport id */ const struct rpc_timeout *timeout; /* timeout parms */ struct sockaddr_storage addr; /* server address */ @@ -222,6 +251,7 @@ struct rpc_xprt { */ unsigned long bind_timeout, reestablish_timeout; + struct xprtsec_parms xprtsec; unsigned int connect_cookie; /* A cookie that gets bumped every time the transport is reconnected */ @@ -247,6 +277,7 @@ struct rpc_xprt { struct rpc_task * snd_task; /* Task blocked in send */ struct list_head xmit_queue; /* Send queue */ + atomic_long_t xmit_queuelen; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) @@ -280,13 +311,16 @@ struct rpc_xprt { } stat; struct net *xprt_net; + netns_tracker ns_tracker; const char *servername; const char *address_strings[RPC_DISPLAY_MAX]; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct dentry *debugfs; /* debugfs directory */ - atomic_t inject_disconnect; #endif struct rcu_head rcu; + const struct xprt_class *xprt_class; + struct rpc_sysfs_xprt *xprt_sysfs; + bool main; /*mark if this is the 1st transport */ }; #if defined(CONFIG_SUNRPC_BACKCHANNEL) @@ -322,6 +356,9 @@ struct xprt_create { struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ struct rpc_xprt_switch *bc_xps; unsigned int flags; + struct xprtsec_parms xprtsec; + unsigned long connect_timeout; + unsigned long reconnect_timeout; }; struct xprt_class { @@ -330,6 +367,7 @@ struct xprt_class { struct rpc_xprt * (*setup)(struct xprt_create *); struct module *owner; char name[32]; + const char * netid[]; }; /* @@ -347,10 +385,9 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req); -void xprt_request_prepare(struct rpc_rqst *req); bool xprt_prepare_transmit(struct rpc_task *task); void xprt_request_enqueue_transmit(struct rpc_task *task); -void xprt_request_enqueue_receive(struct rpc_task *task); +int xprt_request_enqueue_receive(struct rpc_task *task); void xprt_request_wait_receive(struct rpc_task *task); void xprt_request_dequeue_xprt(struct rpc_task *task); bool xprt_request_need_retransmit(struct rpc_task *task); @@ -366,6 +403,9 @@ struct rpc_xprt * xprt_alloc(struct net *net, size_t size, unsigned int num_prealloc, unsigned int max_req); void xprt_free(struct rpc_xprt *); +void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task); +bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req); +void xprt_cleanup_ids(void); static inline int xprt_enable_swap(struct rpc_xprt *xprt) @@ -384,7 +424,7 @@ xprt_disable_swap(struct rpc_xprt *xprt) */ int xprt_register_transport(struct xprt_class *type); int xprt_unregister_transport(struct xprt_class *type); -int xprt_load_transport(const char *); +int xprt_find_transport_ident(const char *); void xprt_wait_for_reply_request_def(struct rpc_task *task); void xprt_wait_for_reply_request_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); @@ -404,6 +444,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); void xprt_unlock_connect(struct rpc_xprt *, void *); +void xprt_release_write(struct rpc_xprt *, struct rpc_task *); /* * Reserved bit positions in xprt->state @@ -415,9 +456,12 @@ void xprt_unlock_connect(struct rpc_xprt *, void *); #define XPRT_BOUND (4) #define XPRT_BINDING (5) #define XPRT_CLOSING (6) +#define XPRT_OFFLINE (7) +#define XPRT_REMOVE (8) #define XPRT_CONGESTED (9) #define XPRT_CWND_WAIT (10) #define XPRT_WRITE_SPACE (11) +#define XPRT_SND_IS_COOKIE (12) static inline void xprt_set_connected(struct rpc_xprt *xprt) { @@ -488,21 +532,7 @@ static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt) return test_and_set_bit(XPRT_BINDING, &xprt->state); } -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -extern unsigned int rpc_inject_disconnect; -static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) -{ - if (!rpc_inject_disconnect) - return; - if (atomic_dec_return(&xprt->inject_disconnect)) - return; - atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect); - xprt->ops->inject_disconnect(xprt); -} -#else -static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) -{ -} -#endif - +void xprt_set_offline_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps); +void xprt_set_online_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps); +void xprt_delete_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps); #endif /* _LINUX_SUNRPC_XPRT_H */ diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h index c6cce3fbf29d..e4db5022fe92 100644 --- a/include/linux/sunrpc/xprtmultipath.h +++ b/include/linux/sunrpc/xprtmultipath.h @@ -10,12 +10,15 @@ #define _NET_SUNRPC_XPRTMULTIPATH_H struct rpc_xprt_iter_ops; +struct rpc_sysfs_xprt_switch; struct rpc_xprt_switch { spinlock_t xps_lock; struct kref xps_kref; + unsigned int xps_id; unsigned int xps_nxprts; unsigned int xps_nactive; + unsigned int xps_nunique_destaddr_xprts; atomic_long_t xps_queuelen; struct list_head xps_xprt_list; @@ -23,6 +26,7 @@ struct rpc_xprt_switch { const struct rpc_xprt_iter_ops *xps_iter_ops; + struct rpc_sysfs_xprt_switch *xps_sysfs; struct rcu_head xps_rcu; }; @@ -51,7 +55,8 @@ extern void rpc_xprt_switch_set_roundrobin(struct rpc_xprt_switch *xps); extern void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt); extern void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps, - struct rpc_xprt *xprt); + struct rpc_xprt *xprt, bool offline); +extern struct rpc_xprt *rpc_xprt_switch_get_main_xprt(struct rpc_xprt_switch *xps); extern void xprt_iter_init(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps); @@ -59,16 +64,23 @@ extern void xprt_iter_init(struct rpc_xprt_iter *xpi, extern void xprt_iter_init_listall(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps); +extern void xprt_iter_init_listoffline(struct rpc_xprt_iter *xpi, + struct rpc_xprt_switch *xps); + extern void xprt_iter_destroy(struct rpc_xprt_iter *xpi); +extern void xprt_iter_rewind(struct rpc_xprt_iter *xpi); + extern struct rpc_xprt_switch *xprt_iter_xchg_switch( struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *newswitch); extern struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi); -extern struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi); extern struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi); extern bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, const struct sockaddr *sap); + +extern void xprt_multipath_cleanup_ids(void); + #endif diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 3c1423ee74b4..700a1e6c047c 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -57,9 +57,11 @@ struct sock_xprt { struct work_struct error_worker; struct work_struct recv_worker; struct mutex recv_mutex; + struct completion handshake_done; struct sockaddr_storage srcaddr; unsigned short srcport; int xprt_err; + struct rpc_clnt *clnt; /* * UDP socket buffer size parameters @@ -88,5 +90,8 @@ struct sock_xprt { #define XPRT_SOCK_WAKE_WRITE (5) #define XPRT_SOCK_WAKE_PENDING (6) #define XPRT_SOCK_WAKE_DISCONNECT (7) +#define XPRT_SOCK_CONNECT_SENT (8) +#define XPRT_SOCK_NOSPACE (9) +#define XPRT_SOCK_IGNORE_RECV (10) #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ |