diff options
Diffstat (limited to '')
31 files changed, 2632 insertions, 2643 deletions
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index 57ebb29c26ad..accd35c05577 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -8,6 +8,7 @@ config AF_RXRPC depends on INET select CRYPTO select KEYS + select NET_UDP_TUNNEL help Say Y or M here to include support for RxRPC session sockets (just the transport part, not the presentation part: (un)marshalling is @@ -18,7 +19,9 @@ config AF_RXRPC This module at the moment only supports client operations and is currently incomplete. - See Documentation/networking/rxrpc.txt. + See Documentation/networking/rxrpc.rst. + +if AF_RXRPC config AF_RXRPC_IPV6 bool "IPv6 support for RxRPC" @@ -29,7 +32,6 @@ config AF_RXRPC_IPV6 config AF_RXRPC_INJECT_LOSS bool "Inject packet loss into RxRPC packet stream" - depends on AF_RXRPC help Say Y here to inject packet loss by discarding some received and some transmitted packets. @@ -37,16 +39,14 @@ config AF_RXRPC_INJECT_LOSS config AF_RXRPC_DEBUG bool "RxRPC dynamic debugging" - depends on AF_RXRPC help Say Y here to make runtime controllable debugging messages appear. - See Documentation/networking/rxrpc.txt. + See Documentation/networking/rxrpc.rst. config RXKAD bool "RxRPC Kerberos security" - depends on AF_RXRPC select CRYPTO select CRYPTO_MANAGER select CRYPTO_SKCIPHER @@ -56,4 +56,6 @@ config RXKAD Provide kerberos 4 and AFS kaserver security handling for AF_RXRPC through the use of the key retention service. - See Documentation/networking/rxrpc.txt. + See Documentation/networking/rxrpc.rst. + +endif diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index 6ffb7e9887ce..b11281bed2a4 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -25,8 +25,10 @@ rxrpc-y := \ peer_event.o \ peer_object.o \ recvmsg.o \ + rtt.o \ security.o \ sendmsg.o \ + server_key.o \ skbuff.o \ utils.o diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index fe42f986cd94..ceba28e9dce6 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -246,7 +246,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) ret = 0; break; } - /* Fall through */ + fallthrough; default: ret = -EBUSY; break; @@ -267,7 +267,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) * @gfp: The allocation constraints * @notify_rx: Where to send notifications instead of socket queue * @upgrade: Request service upgrade for call - * @intr: The call is interruptible + * @interruptibility: The call is interruptible, or can be canceled. * @debug_id: The debug ID for tracing to be assigned to the call * * Allow a kernel service to begin a call on the nominated socket. This just @@ -285,7 +285,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, gfp_t gfp, rxrpc_notify_rx_t notify_rx, bool upgrade, - bool intr, + enum rxrpc_interruptibility interruptibility, unsigned int debug_id) { struct rxrpc_conn_parameters cp; @@ -308,9 +308,10 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, key = NULL; /* a no-security key */ memset(&p, 0, sizeof(p)); - p.user_call_ID = user_call_ID; - p.tx_total_len = tx_total_len; - p.intr = intr; + p.user_call_ID = user_call_ID; + p.tx_total_len = tx_total_len; + p.interruptibility = interruptibility; + p.kernel = true; memset(&cp, 0, sizeof(cp)); cp.local = rx->local; @@ -350,7 +351,7 @@ static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall, */ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) { - _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); + _enter("%d{%d}", call->debug_id, refcount_read(&call->ref)); mutex_lock(&call->user_mutex); rxrpc_release_call(rxrpc_sk(sock->sk), call); @@ -371,45 +372,18 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call); * rxrpc_kernel_check_life - Check to see whether a call is still alive * @sock: The socket the call is on * @call: The call to check - * @_life: Where to store the life value * - * Allow a kernel service to find out whether a call is still alive - ie. we're - * getting ACKs from the server. Passes back in *_life a number representing - * the life state which can be compared to that returned by a previous call and - * return true if the call is still alive. - * - * If the life state stalls, rxrpc_kernel_probe_life() should be called and - * then 2RTT waited. + * Allow a kernel service to find out whether a call is still alive - + * ie. whether it has completed. */ bool rxrpc_kernel_check_life(const struct socket *sock, - const struct rxrpc_call *call, - u32 *_life) + const struct rxrpc_call *call) { - *_life = call->acks_latest; return call->state != RXRPC_CALL_COMPLETE; } EXPORT_SYMBOL(rxrpc_kernel_check_life); /** - * rxrpc_kernel_probe_life - Poke the peer to see if it's still alive - * @sock: The socket the call is on - * @call: The call to check - * - * In conjunction with rxrpc_kernel_check_life(), allow a kernel service to - * find out whether a call is still alive by pinging it. This should cause the - * life state to be bumped in about 2*RTT. - * - * The must be called in TASK_RUNNING state on pain of might_sleep() objecting. - */ -void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call) -{ - rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false, - rxrpc_propose_ack_ping_for_check_life); - rxrpc_send_ack_packet(call, true, NULL); -} -EXPORT_SYMBOL(rxrpc_kernel_probe_life); - -/** * rxrpc_kernel_get_epoch - Retrieve the epoch value from a call. * @sock: The socket the call is on * @call: The call to query @@ -497,6 +471,7 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr, switch (rx->sk.sk_state) { case RXRPC_UNBOUND: rx->sk.sk_state = RXRPC_CLIENT_UNBOUND; + break; case RXRPC_CLIENT_UNBOUND: case RXRPC_CLIENT_BOUND: break; @@ -572,7 +547,7 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) rx->local = local; rx->sk.sk_state = RXRPC_CLIENT_BOUND; - /* Fall through */ + fallthrough; case RXRPC_CLIENT_BOUND: if (!m->msg_name && @@ -580,7 +555,7 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) m->msg_name = &rx->connect_srx; m->msg_namelen = sizeof(rx->connect_srx); } - /* Fall through */ + fallthrough; case RXRPC_SERVER_BOUND: case RXRPC_SERVER_LISTENING: ret = rxrpc_do_sendmsg(rx, m, len); @@ -598,11 +573,24 @@ out: return ret; } +int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val) +{ + if (sk->sk_state != RXRPC_UNBOUND) + return -EISCONN; + if (val > RXRPC_SECURITY_MAX) + return -EINVAL; + lock_sock(sk); + rxrpc_sk(sk)->min_sec_level = val; + release_sock(sk); + return 0; +} +EXPORT_SYMBOL(rxrpc_sock_set_min_security_level); + /* * set RxRPC socket options */ static int rxrpc_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); unsigned int min_sec_level; @@ -653,8 +641,8 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname, ret = -EISCONN; if (rx->sk.sk_state != RXRPC_UNBOUND) goto error; - ret = get_user(min_sec_level, - (unsigned int __user *) optval); + ret = copy_from_sockptr(&min_sec_level, optval, + sizeof(unsigned int)); if (ret < 0) goto error; ret = -EINVAL; @@ -672,7 +660,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname, if (rx->sk.sk_state != RXRPC_SERVER_BOUND2) goto error; ret = -EFAULT; - if (copy_from_user(service_upgrade, optval, + if (copy_from_sockptr(service_upgrade, optval, sizeof(service_upgrade)) != 0) goto error; ret = -EINVAL; @@ -1003,7 +991,7 @@ static int __init af_rxrpc_init(void) goto error_security; } - ret = register_pernet_subsys(&rxrpc_net_ops); + ret = register_pernet_device(&rxrpc_net_ops); if (ret) goto error_pernet; @@ -1048,7 +1036,7 @@ error_key_type: error_sock: proto_unregister(&rxrpc_proto); error_proto: - unregister_pernet_subsys(&rxrpc_net_ops); + unregister_pernet_device(&rxrpc_net_ops); error_pernet: rxrpc_exit_security(); error_security: @@ -1070,7 +1058,7 @@ static void __exit af_rxrpc_exit(void) unregister_key_type(&key_type_rxrpc); sock_unregister(PF_RXRPC); proto_unregister(&rxrpc_proto); - unregister_pernet_subsys(&rxrpc_net_ops); + unregister_pernet_device(&rxrpc_net_ops); ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0); ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 7d730c438404..1ad0ec5afb50 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -7,20 +7,14 @@ #include <linux/atomic.h> #include <linux/seqlock.h> +#include <linux/win_minmax.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/sock.h> #include <net/af_rxrpc.h> +#include <keys/rxrpc-type.h> #include "protocol.h" -#if 0 -#define CHECK_SLAB_OKAY(X) \ - BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \ - (POISON_FREE << 8 | POISON_FREE)) -#else -#define CHECK_SLAB_OKAY(X) do {} while (0) -#endif - #define FCRYPT_BSIZE 8 struct rxrpc_crypt { union { @@ -33,6 +27,7 @@ struct rxrpc_crypt { #define rxrpc_queue_delayed_work(WS,D) \ queue_delayed_work(rxrpc_workqueue, (WS), (D)) +struct key_preparsed_payload; struct rxrpc_connection; /* @@ -65,7 +60,7 @@ struct rxrpc_net { struct proc_dir_entry *proc_net; /* Subdir in /proc/net */ u32 epoch; /* Local epoch for detecting local-end reset */ struct list_head calls; /* List of calls active in this namespace */ - rwlock_t call_lock; /* Lock for ->calls */ + spinlock_t call_lock; /* Lock for ->calls */ atomic_t nr_calls; /* Count of allocated calls */ atomic_t nr_conns; @@ -75,19 +70,17 @@ struct rxrpc_net { struct work_struct service_conn_reaper; struct timer_list service_conn_reap_timer; - unsigned int nr_client_conns; - unsigned int nr_active_client_conns; - bool kill_all_client_conns; bool live; + + bool kill_all_client_conns; + atomic_t nr_client_conns; spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */ - struct list_head waiting_client_conns; - struct list_head active_client_conns; struct list_head idle_client_conns; struct work_struct client_conn_reaper; struct timer_list client_conn_reap_timer; - struct list_head local_endpoints; + struct hlist_head local_endpoints; struct mutex local_mutex; /* Lock for ->local_endpoints */ DECLARE_HASHTABLE (peer_hash, 10); @@ -217,17 +210,30 @@ struct rxrpc_security { /* Clean up a security service */ void (*exit)(void); + /* Parse the information from a server key */ + int (*preparse_server_key)(struct key_preparsed_payload *); + + /* Clean up the preparse buffer after parsing a server key */ + void (*free_preparse_server_key)(struct key_preparsed_payload *); + + /* Destroy the payload of a server key */ + void (*destroy_server_key)(struct key *); + + /* Describe a server key */ + void (*describe_server_key)(const struct key *, struct seq_file *); + /* initialise a connection's security */ - int (*init_connection_security)(struct rxrpc_connection *); + int (*init_connection_security)(struct rxrpc_connection *, + struct rxrpc_key_token *); - /* prime a connection's packet security */ - int (*prime_packet_security)(struct rxrpc_connection *); + /* Work out how much data we can store in a packet, given an estimate + * of the amount of data remaining. + */ + int (*how_much_data)(struct rxrpc_call *, size_t, + size_t *, size_t *, size_t *); /* impose security on a packet */ - int (*secure_packet)(struct rxrpc_call *, - struct sk_buff *, - size_t, - void *); + int (*secure_packet)(struct rxrpc_call *, struct sk_buff *, size_t); /* verify the security on a received packet */ int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, @@ -265,17 +271,17 @@ struct rxrpc_security { struct rxrpc_local { struct rcu_head rcu; atomic_t active_users; /* Number of users of the local endpoint */ - atomic_t usage; /* Number of references to the structure */ + refcount_t ref; /* Number of references to the structure */ struct rxrpc_net *rxnet; /* The network ns in which this resides */ - struct list_head link; + struct hlist_node link; struct socket *socket; /* my UDP socket */ struct work_struct processor; struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ struct sk_buff_head reject_queue; /* packets awaiting rejection */ struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */ - struct rb_root client_conns; /* Client connections by socket params */ - spinlock_t client_conns_lock; /* Lock for client_conns */ + struct rb_root client_bundles; /* Client connection bundles by socket params */ + spinlock_t client_bundles_lock; /* Lock for client_bundles */ spinlock_t lock; /* access lock */ rwlock_t services_lock; /* lock for services list */ int debug_id; /* debug ID for printks */ @@ -290,7 +296,7 @@ struct rxrpc_local { */ struct rxrpc_peer { struct rcu_head rcu; /* This must be first */ - atomic_t usage; + refcount_t ref; unsigned long hash_key; struct hlist_node hash_link; struct rxrpc_local *local; @@ -311,11 +317,14 @@ struct rxrpc_peer { #define RXRPC_RTT_CACHE_SIZE 32 spinlock_t rtt_input_lock; /* RTT lock for input routine */ ktime_t rtt_last_req; /* Time of last RTT request */ - u64 rtt; /* Current RTT estimate (in nS) */ - u64 rtt_sum; /* Sum of cache contents */ - u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */ - u8 rtt_cursor; /* next entry at which to insert */ - u8 rtt_usage; /* amount of cache actually used */ + unsigned int rtt_count; /* Number of samples we've got */ + + u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + u32 mdev_us; /* medium deviation */ + u32 mdev_max_us; /* maximal mdev for the last rtt period */ + u32 rttvar_us; /* smoothed mdev_max */ + u32 rto_j; /* Retransmission timeout in jiffies */ + u8 backoff; /* Backoff timeout */ u8 cong_cwnd; /* Congestion window size */ }; @@ -349,10 +358,7 @@ struct rxrpc_conn_parameters { enum rxrpc_conn_flag { RXRPC_CONN_HAS_IDR, /* Has a client conn ID assigned */ RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */ - RXRPC_CONN_IN_CLIENT_CONNS, /* Conn is in local->client_conns */ - RXRPC_CONN_EXPOSED, /* Conn has extra ref for exposure */ RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ - RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */ RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */ RXRPC_CONN_FINAL_ACK_0, /* Need final ACK for channel 0 */ RXRPC_CONN_FINAL_ACK_1, /* Need final ACK for channel 1 */ @@ -373,19 +379,6 @@ enum rxrpc_conn_event { }; /* - * The connection cache state. - */ -enum rxrpc_conn_cache_state { - RXRPC_CONN_CLIENT_INACTIVE, /* Conn is not yet listed */ - RXRPC_CONN_CLIENT_WAITING, /* Conn is on wait list, waiting for capacity */ - RXRPC_CONN_CLIENT_ACTIVE, /* Conn is on active list, doing calls */ - RXRPC_CONN_CLIENT_UPGRADE, /* Conn is on active list, probing for upgrade */ - RXRPC_CONN_CLIENT_CULLED, /* Conn is culled and delisted, doing calls */ - RXRPC_CONN_CLIENT_IDLE, /* Conn is on idle list, doing mostly nothing */ - RXRPC_CONN__NR_CACHE_STATES -}; - -/* * The connection protocol state. */ enum rxrpc_conn_proto_state { @@ -401,6 +394,23 @@ enum rxrpc_conn_proto_state { }; /* + * RxRPC client connection bundle. + */ +struct rxrpc_bundle { + struct rxrpc_conn_parameters params; + refcount_t ref; + unsigned int debug_id; + bool try_upgrade; /* True if the bundle is attempting upgrade */ + bool alloc_conn; /* True if someone's getting a conn */ + short alloc_error; /* Error from last conn allocation */ + spinlock_t channel_lock; + struct rb_node local_node; /* Node in local->client_conns */ + struct list_head waiting_calls; /* Calls waiting for channels */ + unsigned long avail_chans; /* Mask of available channels */ + struct rxrpc_connection *conns[4]; /* The connections in the bundle (max 4) */ +}; + +/* * RxRPC connection definition * - matched by { local, peer, epoch, conn_id, direction } * - each connection can only handle four simultaneous calls @@ -409,14 +419,11 @@ struct rxrpc_connection { struct rxrpc_conn_proto proto; struct rxrpc_conn_parameters params; - atomic_t usage; + refcount_t ref; struct rcu_head rcu; struct list_head cache_link; - spinlock_t channel_lock; - unsigned char active_chans; /* Mask of active channels */ -#define RXRPC_ACTIVE_CHANS_MASK ((1 << RXRPC_MAXCALLS) - 1) - struct list_head waiting_calls; /* Calls waiting for channels */ + unsigned char act_chans; /* Mask of active channels */ struct rxrpc_channel { unsigned long final_ack_at; /* Time at which to issue final ACK */ struct rxrpc_call __rcu *call; /* Active call */ @@ -433,33 +440,33 @@ struct rxrpc_connection { struct timer_list timer; /* Conn event timer */ struct work_struct processor; /* connection event processor */ - union { - struct rb_node client_node; /* Node in local->client_conns */ - struct rb_node service_node; /* Node in peer->service_conns */ - }; + struct rxrpc_bundle *bundle; /* Client connection bundle */ + struct rb_node service_node; /* Node in peer->service_conns */ struct list_head proc_link; /* link in procfs list */ struct list_head link; /* link in master connection list */ struct sk_buff_head rx_queue; /* received conn-level packets */ + const struct rxrpc_security *security; /* applied security module */ - struct key *server_key; /* security for this service */ - struct crypto_sync_skcipher *cipher; /* encryption handle */ - struct rxrpc_crypt csum_iv; /* packet checksum base */ + union { + struct { + struct crypto_sync_skcipher *cipher; /* encryption handle */ + struct rxrpc_crypt csum_iv; /* packet checksum base */ + u32 nonce; /* response re-use preventer */ + } rxkad; + }; unsigned long flags; unsigned long events; unsigned long idle_timestamp; /* Time at which last became idle */ spinlock_t state_lock; /* state-change lock */ - enum rxrpc_conn_cache_state cache_state; enum rxrpc_conn_proto_state state; /* current state of connection */ u32 abort_code; /* Abort code of connection abort */ int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ - u32 security_nonce; /* response re-use preventer */ u32 service_id; /* Service ID, possibly upgraded */ - u8 size_align; /* data size alignment (for security) */ - u8 security_size; /* security header size */ u8 security_ix; /* security type */ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ + u8 bundle_shift; /* Index into bundle->avail_chans */ short error; /* Local error code */ }; @@ -484,13 +491,13 @@ enum rxrpc_call_flag { RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ RXRPC_CALL_SEND_PING, /* A ping will need to be sent */ - RXRPC_CALL_PINGING, /* Ping in process */ RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */ RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */ RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */ - RXRPC_CALL_IS_INTR, /* The call is interruptible */ RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */ + RXRPC_CALL_KERNEL, /* The call was made by the kernel */ + RXRPC_CALL_UPGRADE, /* Service upgrade was requested for the call */ }; /* @@ -516,7 +523,6 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ - RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */ RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */ @@ -575,7 +581,7 @@ struct rxrpc_call { struct work_struct processor; /* Event processor */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ struct list_head link; /* link in master call list */ - struct list_head chan_wait_link; /* Link in conn->waiting_calls */ + struct list_head chan_wait_link; /* Link in conn->bundle->waiting_calls */ struct hlist_node error_link; /* link in error distribution list */ struct list_head accept_link; /* Link in rx->acceptq */ struct list_head recvmsg_link; /* Link in rx->recvmsg_q */ @@ -595,9 +601,10 @@ struct rxrpc_call { int error; /* Local error incurred */ enum rxrpc_call_state state; /* current state of call */ enum rxrpc_call_completion completion; /* Call completion condition */ - atomic_t usage; + refcount_t ref; u16 service_id; /* service ID */ u8 security_ix; /* Security type */ + enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */ u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ int debug_id; /* debug ID for printks */ @@ -661,21 +668,25 @@ struct rxrpc_call { spinlock_t input_lock; /* Lock for packet input to this call */ - /* receive-phase ACK management */ + /* Receive-phase ACK management (ACKs we send). */ u8 ackr_reason; /* reason to ACK */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ - rxrpc_serial_t ackr_first_seq; /* first sequence number received */ - rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ - rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */ - rxrpc_seq_t ackr_seen; /* Highest packet shown seen */ - - /* ping management */ - rxrpc_serial_t ping_serial; /* Last ping sent */ - ktime_t ping_time; /* Time last ping sent */ - - /* transmission-phase ACK management */ + rxrpc_seq_t ackr_highest_seq; /* Higest sequence number received */ + atomic_t ackr_nr_unacked; /* Number of unacked packets */ + atomic_t ackr_nr_consumed; /* Number of packets needing hard ACK */ + + /* RTT management */ + rxrpc_serial_t rtt_serial[4]; /* Serial number of DATA or PING sent */ + ktime_t rtt_sent_at[4]; /* Time packet sent */ + unsigned long rtt_avail; /* Mask of available slots in bits 0-3, + * Mask of pending samples in 8-11 */ +#define RXRPC_CALL_RTT_AVAIL_MASK 0xf +#define RXRPC_CALL_RTT_PEND_SHIFT 8 + + /* Transmission-phase ACK management (ACKs we've received). */ ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ - rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ + rxrpc_seq_t acks_first_seq; /* first sequence number received */ + rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */ rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */ @@ -708,8 +719,8 @@ struct rxrpc_ack_summary { enum rxrpc_command { RXRPC_CMD_SEND_DATA, /* send data message */ RXRPC_CMD_SEND_ABORT, /* request abort generation */ - RXRPC_CMD_ACCEPT, /* [server] accept incoming call */ RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ + RXRPC_CMD_CHARGE_ACCEPT, /* [server] charge accept preallocation */ }; struct rxrpc_call_params { @@ -721,7 +732,8 @@ struct rxrpc_call_params { u32 normal; /* Max time since last call packet (msec) */ } timeouts; u8 nr_timeouts; /* Number of timeouts specified */ - bool intr; /* The call is interruptible */ + bool kernel; /* T if kernel is making the call */ + enum rxrpc_interruptibility interruptibility; /* How is interruptible is the call? */ }; struct rxrpc_send_params { @@ -749,9 +761,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_sock *, struct sk_buff *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); -struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, - rxrpc_notify_rx_t); -int rxrpc_reject_call(struct rxrpc_sock *); +int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long); /* * call_event.c @@ -760,21 +770,18 @@ void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool, enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); -static inline void rxrpc_reduce_call_timer(struct rxrpc_call *call, - unsigned long expire_at, - unsigned long now, - enum rxrpc_timer_trace why) -{ - trace_rxrpc_timer(call, why, now); - timer_reduce(&call->timer, expire_at); -} +void rxrpc_reduce_call_timer(struct rxrpc_call *call, + unsigned long expire_at, + unsigned long now, + enum rxrpc_timer_trace why); + +void rxrpc_delete_call_timer(struct rxrpc_call *call); /* * call_object.c */ extern const char *const rxrpc_call_states[]; extern const char *const rxrpc_call_completions[]; -extern unsigned int rxrpc_max_call_lifetime; extern struct kmem_cache *rxrpc_call_jar; struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); @@ -791,6 +798,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *); bool __rxrpc_queue_call(struct rxrpc_call *); bool rxrpc_queue_call(struct rxrpc_call *); void rxrpc_see_call(struct rxrpc_call *); +bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op); void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_cleanup_call(struct rxrpc_call *); @@ -807,114 +815,21 @@ static inline bool rxrpc_is_client_call(const struct rxrpc_call *call) } /* - * Transition a call to the complete state. - */ -static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call, - enum rxrpc_call_completion compl, - u32 abort_code, - int error) -{ - if (call->state < RXRPC_CALL_COMPLETE) { - call->abort_code = abort_code; - call->error = error; - call->completion = compl, - call->state = RXRPC_CALL_COMPLETE; - trace_rxrpc_call_complete(call); - wake_up(&call->waitq); - return true; - } - return false; -} - -static inline bool rxrpc_set_call_completion(struct rxrpc_call *call, - enum rxrpc_call_completion compl, - u32 abort_code, - int error) -{ - bool ret; - - write_lock_bh(&call->state_lock); - ret = __rxrpc_set_call_completion(call, compl, abort_code, error); - write_unlock_bh(&call->state_lock); - return ret; -} - -/* - * Record that a call successfully completed. - */ -static inline bool __rxrpc_call_completed(struct rxrpc_call *call) -{ - return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); -} - -static inline bool rxrpc_call_completed(struct rxrpc_call *call) -{ - bool ret; - - write_lock_bh(&call->state_lock); - ret = __rxrpc_call_completed(call); - write_unlock_bh(&call->state_lock); - return ret; -} - -/* - * Record that a call is locally aborted. - */ -static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, - rxrpc_seq_t seq, - u32 abort_code, int error) -{ - trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq, - abort_code, error); - return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, - abort_code, error); -} - -static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, - rxrpc_seq_t seq, u32 abort_code, int error) -{ - bool ret; - - write_lock_bh(&call->state_lock); - ret = __rxrpc_abort_call(why, call, seq, abort_code, error); - write_unlock_bh(&call->state_lock); - return ret; -} - -/* - * Abort a call due to a protocol error. - */ -static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call, - struct sk_buff *skb, - const char *eproto_why, - const char *why, - u32 abort_code) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why); - return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO); -} - -#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \ - __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \ - (abort_why), (abort_code)) - -/* * conn_client.c */ -extern unsigned int rxrpc_max_client_connections; extern unsigned int rxrpc_reap_client_connections; extern unsigned long rxrpc_conn_idle_client_expiry; extern unsigned long rxrpc_conn_idle_client_fast_expiry; extern struct idr rxrpc_client_conn_ids; void rxrpc_destroy_client_conn_ids(void); +struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *); +void rxrpc_put_bundle(struct rxrpc_bundle *); int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, gfp_t); void rxrpc_expose_client_call(struct rxrpc_call *); -void rxrpc_disconnect_client_call(struct rxrpc_call *); +void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *); void rxrpc_put_client_conn(struct rxrpc_connection *); void rxrpc_discard_expired_client_conns(struct work_struct *); void rxrpc_destroy_all_client_connections(struct rxrpc_net *); @@ -924,6 +839,7 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *); * conn_event.c */ void rxrpc_process_connection(struct work_struct *); +void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool); /* * conn_object.c @@ -940,7 +856,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *); void rxrpc_kill_connection(struct rxrpc_connection *); bool rxrpc_queue_conn(struct rxrpc_connection *); void rxrpc_see_connection(struct rxrpc_connection *); -void rxrpc_get_connection(struct rxrpc_connection *); +struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *); struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *); void rxrpc_put_service_conn(struct rxrpc_connection *); void rxrpc_service_connection_reaper(struct work_struct *); @@ -980,8 +896,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *, struct sk_buff *); struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *, gfp_t); void rxrpc_new_incoming_connection(struct rxrpc_sock *, struct rxrpc_connection *, - const struct rxrpc_security *, struct key *, - struct sk_buff *); + const struct rxrpc_security *, struct sk_buff *); void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* @@ -998,10 +913,8 @@ extern const struct rxrpc_security rxrpc_no_security; * key.c */ extern struct key_type key_type_rxrpc; -extern struct key_type key_type_rxrpc_s; -int rxrpc_request_key(struct rxrpc_sock *, char __user *, int); -int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int); +int rxrpc_request_key(struct rxrpc_sock *, sockptr_t , int); int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t, u32); @@ -1042,7 +955,6 @@ extern unsigned long rxrpc_idle_ack_delay; extern unsigned int rxrpc_rx_window_size; extern unsigned int rxrpc_rx_mtu; extern unsigned int rxrpc_rx_jumbo_max; -extern unsigned long rxrpc_resend_timeout; extern const s8 rxrpc_ack_priority[]; @@ -1069,9 +981,8 @@ void rxrpc_send_keepalive(struct rxrpc_peer *); /* * peer_event.c */ +void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset); void rxrpc_error_report(struct sock *); -void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, - rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); void rxrpc_peer_keepalive_worker(struct work_struct *); /* @@ -1096,14 +1007,48 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *); extern const struct seq_operations rxrpc_call_seq_ops; extern const struct seq_operations rxrpc_connection_seq_ops; extern const struct seq_operations rxrpc_peer_seq_ops; +extern const struct seq_operations rxrpc_local_seq_ops; /* * recvmsg.c */ void rxrpc_notify_socket(struct rxrpc_call *); +bool __rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int); +bool rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int); +bool __rxrpc_call_completed(struct rxrpc_call *); +bool rxrpc_call_completed(struct rxrpc_call *); +bool __rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int); +bool rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* + * Abort a call due to a protocol error. + */ +static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call, + struct sk_buff *skb, + const char *eproto_why, + const char *why, + u32 abort_code) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why); + return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO); +} + +#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \ + __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \ + (abort_why), (abort_code)) + +/* + * rtt.c + */ +void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, int, + rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); +unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *, bool); +void rxrpc_peer_init_rtt(struct rxrpc_peer *); + +/* * rxkad.c */ #ifdef CONFIG_RXKAD @@ -1114,11 +1059,13 @@ extern const struct rxrpc_security rxkad; * security.c */ int __init rxrpc_init_security(void); +const struct rxrpc_security *rxrpc_security_lookup(u8); void rxrpc_exit_security(void); int rxrpc_init_client_conn_security(struct rxrpc_connection *); -bool rxrpc_look_up_server_security(struct rxrpc_local *, struct rxrpc_sock *, - const struct rxrpc_security **, struct key **, - struct sk_buff *); +const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *, + struct sk_buff *); +struct key *rxrpc_look_up_server_security(struct rxrpc_connection *, + struct sk_buff *, u32, u32); /* * sendmsg.c @@ -1126,6 +1073,13 @@ bool rxrpc_look_up_server_security(struct rxrpc_local *, struct rxrpc_sock *, int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); /* + * server_key.c + */ +extern struct key_type key_type_rxrpc_s; + +int rxrpc_server_keyring(struct rxrpc_sock *, sockptr_t, int); + +/* * skbuff.c */ void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 70e44abf106c..99e10eea3732 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -22,6 +22,11 @@ #include <net/ip.h> #include "ar-internal.h" +static void rxrpc_dummy_notify(struct sock *sk, struct rxrpc_call *call, + unsigned long user_call_ID) +{ +} + /* * Preallocate a single service call, connection and peer and, if possible, * give them a user ID and attach the user's side of the ID to them. @@ -34,8 +39,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, unsigned int debug_id) { const void *here = __builtin_return_address(0); - struct rxrpc_call *call; + struct rxrpc_call *call, *xcall; struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); + struct rb_node *parent, **pp; int max, tmp; unsigned int size = RXRPC_BACKLOG_MAX; unsigned int head, tail, call_head, call_tail; @@ -85,11 +91,11 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, (head + 1) & (size - 1)); trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, - atomic_read(&conn->usage), here); + refcount_read(&conn->ref), here); } /* Now it gets complicated, because calls get registered with the - * socket here, particularly if a user ID is preassigned by the user. + * socket here, with a user ID preassigned by the user. */ call = rxrpc_alloc_call(rx, gfp, debug_id); if (!call) @@ -98,46 +104,45 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, call->state = RXRPC_CALL_SERVER_PREALLOC; trace_rxrpc_call(call->debug_id, rxrpc_call_new_service, - atomic_read(&call->usage), + refcount_read(&call->ref), here, (const void *)user_call_ID); write_lock(&rx->call_lock); - if (user_attach_call) { - struct rxrpc_call *xcall; - struct rb_node *parent, **pp; - - /* Check the user ID isn't already in use */ - pp = &rx->calls.rb_node; - parent = NULL; - while (*pp) { - parent = *pp; - xcall = rb_entry(parent, struct rxrpc_call, sock_node); - if (user_call_ID < xcall->user_call_ID) - pp = &(*pp)->rb_left; - else if (user_call_ID > xcall->user_call_ID) - pp = &(*pp)->rb_right; - else - goto id_in_use; - } - call->user_call_ID = user_call_ID; - call->notify_rx = notify_rx; + /* Check the user ID isn't already in use */ + pp = &rx->calls.rb_node; + parent = NULL; + while (*pp) { + parent = *pp; + xcall = rb_entry(parent, struct rxrpc_call, sock_node); + if (user_call_ID < xcall->user_call_ID) + pp = &(*pp)->rb_left; + else if (user_call_ID > xcall->user_call_ID) + pp = &(*pp)->rb_right; + else + goto id_in_use; + } + + call->user_call_ID = user_call_ID; + call->notify_rx = notify_rx; + if (user_attach_call) { rxrpc_get_call(call, rxrpc_call_got_kernel); user_attach_call(call, user_call_ID); - rxrpc_get_call(call, rxrpc_call_got_userid); - rb_link_node(&call->sock_node, parent, pp); - rb_insert_color(&call->sock_node, &rx->calls); - set_bit(RXRPC_CALL_HAS_USERID, &call->flags); } + rxrpc_get_call(call, rxrpc_call_got_userid); + rb_link_node(&call->sock_node, parent, pp); + rb_insert_color(&call->sock_node, &rx->calls); + set_bit(RXRPC_CALL_HAS_USERID, &call->flags); + list_add(&call->sock_link, &rx->sock_calls); write_unlock(&rx->call_lock); rxnet = call->rxnet; - write_lock(&rxnet->call_lock); - list_add_tail(&call->link, &rxnet->calls); - write_unlock(&rxnet->call_lock); + spin_lock_bh(&rxnet->call_lock); + list_add_tail_rcu(&call->link, &rxnet->calls); + spin_unlock_bh(&rxnet->call_lock); b->call_backlog[call_head] = call; smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1)); @@ -152,11 +157,8 @@ id_in_use: } /* - * Preallocate sufficient service connections, calls and peers to cover the - * entire backlog of a socket. When a new call comes in, if we don't have - * sufficient of each available, the call gets rejected as busy or ignored. - * - * The backlog is replenished when a connection is accepted or rejected. + * Allocate the preallocation buffers for incoming service calls. These must + * be charged manually. */ int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) { @@ -169,13 +171,6 @@ int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) rx->backlog = b; } - if (rx->discard_new_call) - return 0; - - while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp, - atomic_inc_return(&rxrpc_debug_id)) == 0) - ; - return 0; } @@ -202,6 +197,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) tail = b->peer_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_peer *peer = b->peer_backlog[tail]; + rxrpc_put_local(peer->local); kfree(peer); tail = (tail + 1) & (size - 1); } @@ -228,6 +224,8 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) if (rx->discard_new_call) { _debug("discard %lx", call->user_call_ID); rx->discard_new_call(call, call->user_call_ID); + if (call->notify_rx) + call->notify_rx = rxrpc_dummy_notify; rxrpc_put_call(call, rxrpc_call_put_kernel); } rxrpc_call_completed(call); @@ -248,7 +246,7 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_skb_priv *sp = rxrpc_skb(skb); ktime_t now = skb->tstamp; - if (call->peer->rtt_usage < 3 || + if (call->peer->rtt_count < 3 || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial, true, true, @@ -264,7 +262,6 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, struct rxrpc_peer *peer, struct rxrpc_connection *conn, const struct rxrpc_security *sec, - struct key *key, struct sk_buff *skb) { struct rxrpc_backlog *b = rx->backlog; @@ -312,7 +309,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, conn->params.local = rxrpc_get_local(local); conn->params.peer = peer; rxrpc_see_connection(conn); - rxrpc_new_incoming_connection(rx, conn, sec, key, skb); + rxrpc_new_incoming_connection(rx, conn, sec, skb); } else { rxrpc_get_connection(conn); } @@ -326,6 +323,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, rxrpc_see_call(call); call->conn = conn; call->security = conn->security; + call->security_ix = conn->security_ix; call->peer = rxrpc_get_peer(conn->params.peer); call->cong_cwnd = call->peer->cong_cwnd; return call; @@ -355,7 +353,6 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, struct rxrpc_connection *conn; struct rxrpc_peer *peer = NULL; struct rxrpc_call *call = NULL; - struct key *key = NULL; _enter(""); @@ -376,11 +373,13 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, */ conn = rxrpc_find_connection_rcu(local, skb, &peer); - if (!conn && !rxrpc_look_up_server_security(local, rx, &sec, &key, skb)) - goto no_call; + if (!conn) { + sec = rxrpc_get_incoming_security(rx, skb); + if (!sec) + goto no_call; + } - call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, key, skb); - key_put(key); + call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, skb); if (!call) { skb->mark = RXRPC_SKB_MARK_REJECT_BUSY; goto no_call; @@ -395,8 +394,6 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, if (rx->notify_new_call) rx->notify_new_call(&rx->sk, call, call->user_call_ID); - else - sk_acceptq_added(&rx->sk); spin_lock(&conn->state_lock); switch (conn->state) { @@ -408,12 +405,8 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, case RXRPC_CONN_SERVICE: write_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) { - if (rx->discard_new_call) - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; - else - call->state = RXRPC_CALL_SERVER_ACCEPTING; - } + if (call->state < RXRPC_CALL_COMPLETE) + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; write_unlock(&call->state_lock); break; @@ -433,9 +426,6 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, rxrpc_send_ping(call, skb); - if (call->state == RXRPC_CALL_SERVER_ACCEPTING) - rxrpc_notify_socket(call); - /* We have to discard the prealloc queue's ref here and rely on a * combination of the RCU read lock and refs held either by the socket * (recvmsg queue, to-be-accepted queue or user ID tree) or the kernel @@ -453,187 +443,18 @@ no_call: } /* - * handle acceptance of a call by userspace - * - assign the user call ID to the call at the front of the queue - * - called with the socket locked. + * Charge up socket with preallocated calls, attaching user call IDs. */ -struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, - unsigned long user_call_ID, - rxrpc_notify_rx_t notify_rx) - __releases(&rx->sk.sk_lock.slock) - __acquires(call->user_mutex) +int rxrpc_user_charge_accept(struct rxrpc_sock *rx, unsigned long user_call_ID) { - struct rxrpc_call *call; - struct rb_node *parent, **pp; - int ret; - - _enter(",%lx", user_call_ID); - - ASSERT(!irqs_disabled()); - - write_lock(&rx->call_lock); - - if (list_empty(&rx->to_be_accepted)) { - write_unlock(&rx->call_lock); - release_sock(&rx->sk); - kleave(" = -ENODATA [empty]"); - return ERR_PTR(-ENODATA); - } - - /* check the user ID isn't already in use */ - pp = &rx->calls.rb_node; - parent = NULL; - while (*pp) { - parent = *pp; - call = rb_entry(parent, struct rxrpc_call, sock_node); - - if (user_call_ID < call->user_call_ID) - pp = &(*pp)->rb_left; - else if (user_call_ID > call->user_call_ID) - pp = &(*pp)->rb_right; - else - goto id_in_use; - } - - /* Dequeue the first call and check it's still valid. We gain - * responsibility for the queue's reference. - */ - call = list_entry(rx->to_be_accepted.next, - struct rxrpc_call, accept_link); - write_unlock(&rx->call_lock); - - /* We need to gain the mutex from the interrupt handler without - * upsetting lockdep, so we have to release it there and take it here. - * We are, however, still holding the socket lock, so other accepts - * must wait for us and no one can add the user ID behind our backs. - */ - if (mutex_lock_interruptible(&call->user_mutex) < 0) { - release_sock(&rx->sk); - kleave(" = -ERESTARTSYS"); - return ERR_PTR(-ERESTARTSYS); - } - - write_lock(&rx->call_lock); - list_del_init(&call->accept_link); - sk_acceptq_removed(&rx->sk); - rxrpc_see_call(call); - - /* Find the user ID insertion point. */ - pp = &rx->calls.rb_node; - parent = NULL; - while (*pp) { - parent = *pp; - call = rb_entry(parent, struct rxrpc_call, sock_node); - - if (user_call_ID < call->user_call_ID) - pp = &(*pp)->rb_left; - else if (user_call_ID > call->user_call_ID) - pp = &(*pp)->rb_right; - else - BUG(); - } - - write_lock_bh(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_SERVER_ACCEPTING: - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; - break; - case RXRPC_CALL_COMPLETE: - ret = call->error; - goto out_release; - default: - BUG(); - } - - /* formalise the acceptance */ - call->notify_rx = notify_rx; - call->user_call_ID = user_call_ID; - rxrpc_get_call(call, rxrpc_call_got_userid); - rb_link_node(&call->sock_node, parent, pp); - rb_insert_color(&call->sock_node, &rx->calls); - if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags)) - BUG(); - - write_unlock_bh(&call->state_lock); - write_unlock(&rx->call_lock); - rxrpc_notify_socket(call); - rxrpc_service_prealloc(rx, GFP_KERNEL); - release_sock(&rx->sk); - _leave(" = %p{%d}", call, call->debug_id); - return call; - -out_release: - _debug("release %p", call); - write_unlock_bh(&call->state_lock); - write_unlock(&rx->call_lock); - rxrpc_release_call(rx, call); - rxrpc_put_call(call, rxrpc_call_put); - goto out; - -id_in_use: - ret = -EBADSLT; - write_unlock(&rx->call_lock); -out: - rxrpc_service_prealloc(rx, GFP_KERNEL); - release_sock(&rx->sk); - _leave(" = %d", ret); - return ERR_PTR(ret); -} - -/* - * Handle rejection of a call by userspace - * - reject the call at the front of the queue - */ -int rxrpc_reject_call(struct rxrpc_sock *rx) -{ - struct rxrpc_call *call; - bool abort = false; - int ret; - - _enter(""); - - ASSERT(!irqs_disabled()); - - write_lock(&rx->call_lock); - - if (list_empty(&rx->to_be_accepted)) { - write_unlock(&rx->call_lock); - return -ENODATA; - } - - /* Dequeue the first call and check it's still valid. We gain - * responsibility for the queue's reference. - */ - call = list_entry(rx->to_be_accepted.next, - struct rxrpc_call, accept_link); - list_del_init(&call->accept_link); - sk_acceptq_removed(&rx->sk); - rxrpc_see_call(call); + struct rxrpc_backlog *b = rx->backlog; - write_lock_bh(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_SERVER_ACCEPTING: - __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, -ECONNABORTED); - abort = true; - /* fall through */ - case RXRPC_CALL_COMPLETE: - ret = call->error; - goto out_discard; - default: - BUG(); - } + if (rx->sk.sk_state == RXRPC_CLOSE) + return -ESHUTDOWN; -out_discard: - write_unlock_bh(&call->state_lock); - write_unlock(&rx->call_lock); - if (abort) { - rxrpc_send_abort_packet(call); - rxrpc_release_call(rx, call); - rxrpc_put_call(call, rxrpc_call_put); - } - rxrpc_service_prealloc(rx, GFP_KERNEL); - _leave(" = %d", ret); - return ret; + return rxrpc_service_prealloc_one(rx, b, NULL, NULL, user_call_ID, + GFP_KERNEL, + atomic_inc_return(&rxrpc_debug_id)); } /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index cedbbb3a7c2e..2a93e7b5fbd0 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -111,8 +111,8 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, } else { unsigned long now = jiffies, ack_at; - if (call->peer->rtt_usage > 0) - ack_at = nsecs_to_jiffies(call->peer->rtt); + if (call->peer->srtt_us != 0) + ack_at = usecs_to_jiffies(call->peer->srtt_us >> 3); else ack_at = expiry; @@ -159,22 +159,14 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) struct sk_buff *skb; unsigned long resend_at; rxrpc_seq_t cursor, seq, top; - ktime_t now, max_age, oldest, ack_ts, timeout, min_timeo; + ktime_t now, max_age, oldest, ack_ts; int ix; u8 annotation, anno_type, retrans = 0, unacked = 0; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); - if (call->peer->rtt_usage > 1) - timeout = ns_to_ktime(call->peer->rtt * 3 / 2); - else - timeout = ms_to_ktime(rxrpc_resend_timeout); - min_timeo = ns_to_ktime((1000000000 / HZ) * 4); - if (ktime_before(timeout, min_timeo)) - timeout = min_timeo; - now = ktime_get_real(); - max_age = ktime_sub(now, timeout); + max_age = ktime_sub_us(now, jiffies_to_usecs(call->peer->rto_j)); spin_lock_bh(&call->lock); @@ -219,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) } resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest))); - resend_at += jiffies + rxrpc_resend_timeout; + resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans); WRITE_ONCE(call->resend_at, resend_at); if (unacked) @@ -234,7 +226,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) rxrpc_timer_set_for_resend); spin_unlock_bh(&call->lock); ack_ts = ktime_sub(now, call->acks_latest_ts); - if (ktime_to_ns(ack_ts) < call->peer->rtt) + if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3)) goto out; rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false, rxrpc_propose_ack_ping_for_lost_ack); @@ -254,7 +246,18 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) if (anno_type != RXRPC_TX_ANNO_RETRANS) continue; + /* We need to reset the retransmission state, but we need to do + * so before we drop the lock as a new ACK/NAK may come in and + * confuse things + */ + annotation &= ~RXRPC_TX_ANNO_MASK; + annotation |= RXRPC_TX_ANNO_UNACK | RXRPC_TX_ANNO_RESENT; + call->rxtx_annotations[ix] = annotation; + skb = call->rxtx_buffer[ix]; + if (!skb) + continue; + rxrpc_get_skb(skb, rxrpc_skb_got); spin_unlock_bh(&call->lock); @@ -268,24 +271,6 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) rxrpc_free_skb(skb, rxrpc_skb_freed); spin_lock_bh(&call->lock); - - /* We need to clear the retransmit state, but there are two - * things we need to be aware of: A new ACK/NAK might have been - * received and the packet might have been hard-ACK'd (in which - * case it will no longer be in the buffer). - */ - if (after(seq, call->tx_hard_ack)) { - annotation = call->rxtx_annotations[ix]; - anno_type = annotation & RXRPC_TX_ANNO_MASK; - if (anno_type == RXRPC_TX_ANNO_RETRANS || - anno_type == RXRPC_TX_ANNO_NAK) { - annotation &= ~RXRPC_TX_ANNO_MASK; - annotation |= RXRPC_TX_ANNO_UNACK; - } - annotation |= RXRPC_TX_ANNO_RESENT; - call->rxtx_annotations[ix] = annotation; - } - if (after(call->tx_hard_ack, seq)) seq = call->tx_hard_ack; } @@ -325,8 +310,7 @@ recheck_state: } if (call->state == RXRPC_CALL_COMPLETE) { - del_timer_sync(&call->timer); - rxrpc_notify_socket(call); + rxrpc_delete_call_timer(call); goto out_put; } @@ -393,9 +377,9 @@ recheck_state: if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) && (int)call->conn->hi_serial - (int)call->rx_serial > 0) { trace_rxrpc_call_reset(call); - rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ECONNRESET); + rxrpc_abort_call("EXP", call, 0, RX_CALL_DEAD, -ECONNRESET); } else { - rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME); + rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); } set_bit(RXRPC_CALL_EV_ABORT, &call->events); goto recheck_state; @@ -422,7 +406,8 @@ recheck_state: goto recheck_state; } - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) { + if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) && + call->state != RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_resend(call, now); goto recheck_state; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index c9f34b0a11df..6401cdf7a624 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -23,7 +23,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", [RXRPC_CALL_SERVER_SECURING] = "SvSecure", - [RXRPC_CALL_SERVER_ACCEPTING] = "SvAccept", [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq", [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", @@ -41,6 +40,11 @@ const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = { struct kmem_cache *rxrpc_call_jar; +static struct semaphore rxrpc_call_limiter = + __SEMAPHORE_INITIALIZER(rxrpc_call_limiter, 1000); +static struct semaphore rxrpc_kernel_call_limiter = + __SEMAPHORE_INITIALIZER(rxrpc_kernel_call_limiter, 1000); + static void rxrpc_call_timer_expired(struct timer_list *t) { struct rxrpc_call *call = from_timer(call, t, timer); @@ -49,10 +53,30 @@ static void rxrpc_call_timer_expired(struct timer_list *t) if (call->state < RXRPC_CALL_COMPLETE) { trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies); - rxrpc_queue_call(call); + __rxrpc_queue_call(call); + } else { + rxrpc_put_call(call, rxrpc_call_put); + } +} + +void rxrpc_reduce_call_timer(struct rxrpc_call *call, + unsigned long expire_at, + unsigned long now, + enum rxrpc_timer_trace why) +{ + if (rxrpc_try_get_call(call, rxrpc_call_got_timer)) { + trace_rxrpc_timer(call, why, now); + if (timer_reduce(&call->timer, expire_at)) + rxrpc_put_call(call, rxrpc_call_put_notimer); } } +void rxrpc_delete_call_timer(struct rxrpc_call *call) +{ + if (del_timer_sync(&call->timer)) + rxrpc_put_call(call, rxrpc_call_put_timer); +} + static struct lock_class_key rxrpc_call_user_mutex_lock_class_key; /* @@ -88,7 +112,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx, found_extant_call: rxrpc_get_call(call, rxrpc_call_got); read_unlock(&rx->call_lock); - _leave(" = %p [%d]", call, atomic_read(&call->usage)); + _leave(" = %p [%d]", call, refcount_read(&call->ref)); return call; } @@ -136,7 +160,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, spin_lock_init(&call->notify_lock); spin_lock_init(&call->input_lock); rwlock_init(&call->state_lock); - atomic_set(&call->usage, 1); + refcount_set(&call->ref, 1); call->debug_id = debug_id; call->tx_total_len = -1; call->next_rx_timo = 20 * HZ; @@ -153,6 +177,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1; call->rxnet = rxnet; + call->rtt_avail = RXRPC_CALL_RTT_AVAIL_MASK; atomic_inc(&rxnet->nr_calls); return call; @@ -209,6 +234,34 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) } /* + * Wait for a call slot to become available. + */ +static struct semaphore *rxrpc_get_call_slot(struct rxrpc_call_params *p, gfp_t gfp) +{ + struct semaphore *limiter = &rxrpc_call_limiter; + + if (p->kernel) + limiter = &rxrpc_kernel_call_limiter; + if (p->interruptibility == RXRPC_UNINTERRUPTIBLE) { + down(limiter); + return limiter; + } + return down_interruptible(limiter) < 0 ? NULL : limiter; +} + +/* + * Release a call slot. + */ +static void rxrpc_put_call_slot(struct rxrpc_call *call) +{ + struct semaphore *limiter = &rxrpc_call_limiter; + + if (test_bit(RXRPC_CALL_KERNEL, &call->flags)) + limiter = &rxrpc_kernel_call_limiter; + up(limiter); +} + +/* * Set up a call for the given parameters. * - Called with the socket lock held, which it must release. * - If it returns a call, the call's lock will need releasing by the caller. @@ -224,25 +277,34 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, { struct rxrpc_call *call, *xcall; struct rxrpc_net *rxnet; + struct semaphore *limiter; struct rb_node *parent, **pp; const void *here = __builtin_return_address(0); int ret; _enter("%p,%lx", rx, p->user_call_ID); + limiter = rxrpc_get_call_slot(p, gfp); + if (!limiter) { + release_sock(&rx->sk); + return ERR_PTR(-ERESTARTSYS); + } + call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id); if (IS_ERR(call)) { release_sock(&rx->sk); + up(limiter); _leave(" = %ld", PTR_ERR(call)); return call; } - if (p->intr) - __set_bit(RXRPC_CALL_IS_INTR, &call->flags); + call->interruptibility = p->interruptibility; call->tx_total_len = p->tx_total_len; trace_rxrpc_call(call->debug_id, rxrpc_call_new_client, - atomic_read(&call->usage), + refcount_read(&call->ref), here, (const void *)p->user_call_ID); + if (p->kernel) + __set_bit(RXRPC_CALL_KERNEL, &call->flags); /* We need to protect a partially set up call against the user as we * will be acting outside the socket lock. @@ -277,9 +339,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); rxnet = call->rxnet; - write_lock(&rxnet->call_lock); - list_add_tail(&call->link, &rxnet->calls); - write_unlock(&rxnet->call_lock); + spin_lock_bh(&rxnet->call_lock); + list_add_tail_rcu(&call->link, &rxnet->calls); + spin_unlock_bh(&rxnet->call_lock); /* From this point on, the call is protected by its own lock. */ release_sock(&rx->sk); @@ -289,10 +351,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, */ ret = rxrpc_connect_call(rx, call, cp, srx, gfp); if (ret < 0) - goto error; + goto error_attached_to_socket; trace_rxrpc_call(call->debug_id, rxrpc_call_connected, - atomic_read(&call->usage), here, NULL); + refcount_read(&call->ref), here, NULL); rxrpc_start_call_timer(call); @@ -309,18 +371,29 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, error_dup_user_ID: write_unlock(&rx->call_lock); release_sock(&rx->sk); - ret = -EEXIST; - -error: __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - RX_CALL_DEAD, ret); + RX_CALL_DEAD, -EEXIST); trace_rxrpc_call(call->debug_id, rxrpc_call_error, - atomic_read(&call->usage), here, ERR_PTR(ret)); + refcount_read(&call->ref), here, ERR_PTR(-EEXIST)); rxrpc_release_call(rx, call); mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put); - _leave(" = %d", ret); - return ERR_PTR(ret); + _leave(" = -EEXIST"); + return ERR_PTR(-EEXIST); + + /* We got an error, but the call is attached to the socket and is in + * need of release. However, we might now race with recvmsg() when + * completing the call queues it. Return 0 from sys_sendmsg() and + * leave the error to recvmsg() to deal with. + */ +error_attached_to_socket: + trace_rxrpc_call(call->debug_id, rxrpc_call_error, + refcount_read(&call->ref), here, ERR_PTR(ret)); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_CALL_DEAD, ret); + _leave(" = c=%08x [err]", call->debug_id); + return call; } /* @@ -341,9 +414,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->call_id = sp->hdr.callNumber; call->service_id = sp->hdr.serviceId; call->cid = sp->hdr.cid; - call->state = RXRPC_CALL_SERVER_ACCEPTING; - if (sp->hdr.securityIndex > 0) - call->state = RXRPC_CALL_SERVER_SECURING; + call->state = RXRPC_CALL_SERVER_SECURING; call->cong_tstamp = skb->tstamp; /* Set the channel for this call. We don't get channel_lock as we're @@ -373,8 +444,9 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, bool rxrpc_queue_call(struct rxrpc_call *call) { const void *here = __builtin_return_address(0); - int n = atomic_fetch_add_unless(&call->usage, 1, 0); - if (n == 0) + int n; + + if (!__refcount_inc_not_zero(&call->ref, &n)) return false; if (rxrpc_queue_work(&call->processor)) trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1, @@ -390,7 +462,7 @@ bool rxrpc_queue_call(struct rxrpc_call *call) bool __rxrpc_queue_call(struct rxrpc_call *call) { const void *here = __builtin_return_address(0); - int n = atomic_read(&call->usage); + int n = refcount_read(&call->ref); ASSERTCMP(n, >=, 1); if (rxrpc_queue_work(&call->processor)) trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n, @@ -407,22 +479,34 @@ void rxrpc_see_call(struct rxrpc_call *call) { const void *here = __builtin_return_address(0); if (call) { - int n = atomic_read(&call->usage); + int n = refcount_read(&call->ref); trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n, here, NULL); } } +bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) +{ + const void *here = __builtin_return_address(0); + int n; + + if (!__refcount_inc_not_zero(&call->ref, &n)) + return false; + trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL); + return true; +} + /* * Note the addition of a ref on a call. */ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { const void *here = __builtin_return_address(0); - int n = atomic_inc_return(&call->usage); + int n; - trace_rxrpc_call(call->debug_id, op, n, here, NULL); + __refcount_inc(&call->ref, &n); + trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL); } /* @@ -447,10 +531,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) struct rxrpc_connection *conn = call->conn; bool put = false; - _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); + _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref)); trace_rxrpc_call(call->debug_id, rxrpc_call_release, - atomic_read(&call->usage), + refcount_read(&call->ref), here, (const void *)call->flags); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); @@ -460,7 +544,8 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) BUG(); spin_unlock_bh(&call->lock); - del_timer_sync(&call->timer); + rxrpc_put_call_slot(call); + rxrpc_delete_call_timer(call); /* Make sure we don't get any more notifications */ write_lock_bh(&rx->recvmsg_lock); @@ -497,8 +582,6 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) rxrpc_disconnect_call(call); if (call->security) call->security->free_call_crypto(call); - - rxrpc_cleanup_ring(call); _leave(""); } @@ -540,21 +623,21 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) struct rxrpc_net *rxnet = call->rxnet; const void *here = __builtin_return_address(0); unsigned int debug_id = call->debug_id; + bool dead; int n; ASSERT(call != NULL); - n = atomic_dec_return(&call->usage); + dead = __refcount_dec_and_test(&call->ref, &n); trace_rxrpc_call(debug_id, op, n, here, NULL); - ASSERTCMP(n, >=, 0); - if (n == 0) { + if (dead) { _debug("call %d dead", call->debug_id); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); if (!list_empty(&call->link)) { - write_lock(&rxnet->call_lock); + spin_lock_bh(&rxnet->call_lock); list_del_init(&call->link); - write_unlock(&rxnet->call_lock); + spin_unlock_bh(&rxnet->call_lock); } rxrpc_cleanup_call(call); @@ -569,6 +652,8 @@ static void rxrpc_destroy_call(struct work_struct *work) struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); struct rxrpc_net *rxnet = call->rxnet; + rxrpc_delete_call_timer(call); + rxrpc_put_connection(call->conn); rxrpc_put_peer(call->peer); kfree(call->rxtx_buffer); @@ -603,8 +688,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); - del_timer_sync(&call->timer); - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); @@ -626,7 +709,7 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) _enter(""); if (!list_empty(&rxnet->calls)) { - write_lock(&rxnet->call_lock); + spin_lock_bh(&rxnet->call_lock); while (!list_empty(&rxnet->calls)) { call = list_entry(rxnet->calls.next, @@ -637,16 +720,16 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) list_del_init(&call->link); pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", - call, atomic_read(&call->usage), + call, refcount_read(&call->ref), rxrpc_call_states[call->state], call->flags, call->events); - write_unlock(&rxnet->call_lock); + spin_unlock_bh(&rxnet->call_lock); cond_resched(); - write_lock(&rxnet->call_lock); + spin_lock_bh(&rxnet->call_lock); } - write_unlock(&rxnet->call_lock); + spin_unlock_bh(&rxnet->call_lock); } atomic_dec(&rxnet->nr_calls); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index ea7d4c21f889..3c9eeb5b750c 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -1,63 +1,15 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* Client connection-specific management code. * - * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2016, 2020 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * Client connections need to be cached for a little while after they've made a * call so as to handle retransmitted DATA packets in case the server didn't * receive the final ACK or terminating ABORT we sent it. * - * Client connections can be in one of a number of cache states: - * - * (1) INACTIVE - The connection is not held in any list and may not have been - * exposed to the world. If it has been previously exposed, it was - * discarded from the idle list after expiring. - * - * (2) WAITING - The connection is waiting for the number of client conns to - * drop below the maximum capacity. Calls may be in progress upon it from - * when it was active and got culled. - * - * The connection is on the rxrpc_waiting_client_conns list which is kept - * in to-be-granted order. Culled conns with waiters go to the back of - * the queue just like new conns. - * - * (3) ACTIVE - The connection has at least one call in progress upon it, it - * may freely grant available channels to new calls and calls may be - * waiting on it for channels to become available. - * - * The connection is on the rxnet->active_client_conns list which is kept - * in activation order for culling purposes. - * - * rxrpc_nr_active_client_conns is held incremented also. - * - * (4) UPGRADE - As for ACTIVE, but only one call may be in progress and is - * being used to probe for service upgrade. - * - * (5) CULLED - The connection got summarily culled to try and free up - * capacity. Calls currently in progress on the connection are allowed to - * continue, but new calls will have to wait. There can be no waiters in - * this state - the conn would have to go to the WAITING state instead. - * - * (6) IDLE - The connection has no calls in progress upon it and must have - * been exposed to the world (ie. the EXPOSED flag must be set). When it - * expires, the EXPOSED flag is cleared and the connection transitions to - * the INACTIVE state. - * - * The connection is on the rxnet->idle_client_conns list which is kept in - * order of how soon they'll expire. - * * There are flags of relevance to the cache: * - * (1) EXPOSED - The connection ID got exposed to the world. If this flag is - * set, an extra ref is added to the connection preventing it from being - * reaped when it has no calls outstanding. This flag is cleared and the - * ref dropped when a conn is discarded from the idle list. - * - * This allows us to move terminal call state retransmission to the - * connection and to discard the call immediately we think it is done - * with. It also give us a chance to reuse the connection. - * * (2) DONT_REUSE - The connection should be discarded as soon as possible and * should not be reused. This is set when an exclusive connection is used * or a call ID counter overflows. @@ -78,7 +30,6 @@ #include "ar-internal.h" -__read_mostly unsigned int rxrpc_max_client_connections = 1000; __read_mostly unsigned int rxrpc_reap_client_connections = 900; __read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; @@ -89,8 +40,6 @@ __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; DEFINE_IDR(rxrpc_client_conn_ids); static DEFINE_SPINLOCK(rxrpc_conn_id_lock); -static void rxrpc_cull_active_client_conns(struct rxrpc_net *); - /* * Get a connection ID and epoch for a client connection from the global pool. * The connection struct pointer is then recorded in the idr radix tree. The @@ -153,7 +102,7 @@ void rxrpc_destroy_client_conn_ids(void) if (!idr_is_empty(&rxrpc_client_conn_ids)) { idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) { pr_err("AF_RXRPC: Leaked client conn %p {%d}\n", - conn, atomic_read(&conn->usage)); + conn, refcount_read(&conn->ref)); } BUG(); } @@ -162,13 +111,57 @@ void rxrpc_destroy_client_conn_ids(void) } /* + * Allocate a connection bundle. + */ +static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, + gfp_t gfp) +{ + struct rxrpc_bundle *bundle; + + bundle = kzalloc(sizeof(*bundle), gfp); + if (bundle) { + bundle->params = *cp; + rxrpc_get_peer(bundle->params.peer); + refcount_set(&bundle->ref, 1); + spin_lock_init(&bundle->channel_lock); + INIT_LIST_HEAD(&bundle->waiting_calls); + } + return bundle; +} + +struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle) +{ + refcount_inc(&bundle->ref); + return bundle; +} + +static void rxrpc_free_bundle(struct rxrpc_bundle *bundle) +{ + rxrpc_put_peer(bundle->params.peer); + kfree(bundle); +} + +void rxrpc_put_bundle(struct rxrpc_bundle *bundle) +{ + unsigned int d = bundle->debug_id; + bool dead; + int r; + + dead = __refcount_dec_and_test(&bundle->ref, &r); + + _debug("PUT B=%x %d", d, r); + if (dead) + rxrpc_free_bundle(bundle); +} + +/* * Allocate a client connection. */ static struct rxrpc_connection * -rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) +rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) { struct rxrpc_connection *conn; - struct rxrpc_net *rxnet = cp->local->rxnet; + struct rxrpc_net *rxnet = bundle->params.local->rxnet; int ret; _enter(""); @@ -179,16 +172,12 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) return ERR_PTR(-ENOMEM); } - atomic_set(&conn->usage, 1); - if (cp->exclusive) - __set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); - if (cp->upgrade) - __set_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags); - - conn->params = *cp; + refcount_set(&conn->ref, 1); + conn->bundle = bundle; + conn->params = bundle->params; conn->out_clientflag = RXRPC_CLIENT_INITIATED; conn->state = RXRPC_CONN_CLIENT; - conn->service_id = cp->service_id; + conn->service_id = conn->params.service_id; ret = rxrpc_get_client_connection_id(conn, gfp); if (ret < 0) @@ -198,29 +187,25 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) if (ret < 0) goto error_1; - ret = conn->security->prime_packet_security(conn); - if (ret < 0) - goto error_2; - atomic_inc(&rxnet->nr_conns); write_lock(&rxnet->conn_lock); list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); write_unlock(&rxnet->conn_lock); - /* We steal the caller's peer ref. */ - cp->peer = NULL; + rxrpc_get_bundle(bundle); + rxrpc_get_peer(conn->params.peer); rxrpc_get_local(conn->params.local); key_get(conn->params.key); trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client, - atomic_read(&conn->usage), + refcount_read(&conn->ref), __builtin_return_address(0)); + + atomic_inc(&rxnet->nr_client_conns); trace_rxrpc_client(conn, -1, rxrpc_client_alloc); _leave(" = %p", conn); return conn; -error_2: - conn->security->clear(conn); error_1: rxrpc_put_client_connection_id(conn); error_0: @@ -234,13 +219,18 @@ error_0: */ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) { - struct rxrpc_net *rxnet = conn->params.local->rxnet; + struct rxrpc_net *rxnet; int id_cursor, id, distance, limit; + if (!conn) + goto dont_reuse; + + rxnet = conn->params.local->rxnet; if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags)) goto dont_reuse; - if (conn->proto.epoch != rxnet->epoch) + if (conn->state != RXRPC_CONN_CLIENT || + conn->proto.epoch != rxnet->epoch) goto mark_dont_reuse; /* The IDR tree gets very expensive on memory if the connection IDs are @@ -254,7 +244,7 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) distance = id - id_cursor; if (distance < 0) distance = -distance; - limit = max(rxrpc_max_client_connections * 4, 1024U); + limit = max_t(unsigned long, atomic_read(&rxnet->nr_conns) * 4, 1024); if (distance > limit) goto mark_dont_reuse; @@ -267,277 +257,247 @@ dont_reuse: } /* - * Create or find a client connection to use for a call. - * - * If we return with a connection, the call will be on its waiting list. It's - * left to the caller to assign a channel and wake up the call. + * Look up the conn bundle that matches the connection parameters, adding it if + * it doesn't yet exist. */ -static int rxrpc_get_client_conn(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) +static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *cp, + gfp_t gfp) { - struct rxrpc_connection *conn, *candidate = NULL; + static atomic_t rxrpc_bundle_id; + struct rxrpc_bundle *bundle, *candidate; struct rxrpc_local *local = cp->local; struct rb_node *p, **pp, *parent; long diff; - int ret = -ENOMEM; - _enter("{%d,%lx},", call->debug_id, call->user_call_ID); + _enter("{%px,%x,%u,%u}", + cp->peer, key_serial(cp->key), cp->security_level, cp->upgrade); - cp->peer = rxrpc_lookup_peer(rx, cp->local, srx, gfp); - if (!cp->peer) - goto error; + if (cp->exclusive) + return rxrpc_alloc_bundle(cp, gfp); - call->cong_cwnd = cp->peer->cong_cwnd; - if (call->cong_cwnd >= call->cong_ssthresh) - call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; - else - call->cong_mode = RXRPC_CALL_SLOW_START; + /* First, see if the bundle is already there. */ + _debug("search 1"); + spin_lock(&local->client_bundles_lock); + p = local->client_bundles.rb_node; + while (p) { + bundle = rb_entry(p, struct rxrpc_bundle, local_node); - /* If the connection is not meant to be exclusive, search the available - * connections to see if the connection we want to use already exists. - */ - if (!cp->exclusive) { - _debug("search 1"); - spin_lock(&local->client_conns_lock); - p = local->client_conns.rb_node; - while (p) { - conn = rb_entry(p, struct rxrpc_connection, client_node); - -#define cmp(X) ((long)conn->params.X - (long)cp->X) - diff = (cmp(peer) ?: - cmp(key) ?: - cmp(security_level) ?: - cmp(upgrade)); +#define cmp(X) ((long)bundle->params.X - (long)cp->X) + diff = (cmp(peer) ?: + cmp(key) ?: + cmp(security_level) ?: + cmp(upgrade)); #undef cmp - if (diff < 0) { - p = p->rb_left; - } else if (diff > 0) { - p = p->rb_right; - } else { - if (rxrpc_may_reuse_conn(conn) && - rxrpc_get_connection_maybe(conn)) - goto found_extant_conn; - /* The connection needs replacing. It's better - * to effect that when we have something to - * replace it with so that we don't have to - * rebalance the tree twice. - */ - break; - } - } - spin_unlock(&local->client_conns_lock); - } - - /* There wasn't a connection yet or we need an exclusive connection. - * We need to create a candidate and then potentially redo the search - * in case we're racing with another thread also trying to connect on a - * shareable connection. - */ - _debug("new conn"); - candidate = rxrpc_alloc_client_connection(cp, gfp); - if (IS_ERR(candidate)) { - ret = PTR_ERR(candidate); - goto error_peer; + if (diff < 0) + p = p->rb_left; + else if (diff > 0) + p = p->rb_right; + else + goto found_bundle; } + spin_unlock(&local->client_bundles_lock); + _debug("not found"); - /* Add the call to the new connection's waiting list in case we're - * going to have to wait for the connection to come live. It's our - * connection, so we want first dibs on the channel slots. We would - * normally have to take channel_lock but we do this before anyone else - * can see the connection. - */ - list_add(&call->chan_wait_link, &candidate->waiting_calls); - - if (cp->exclusive) { - call->conn = candidate; - call->security = candidate->security; - call->security_ix = candidate->security_ix; - call->service_id = candidate->service_id; - _leave(" = 0 [exclusive %d]", candidate->debug_id); - return 0; - } + /* It wasn't. We need to add one. */ + candidate = rxrpc_alloc_bundle(cp, gfp); + if (!candidate) + return NULL; - /* Publish the new connection for userspace to find. We need to redo - * the search before doing this lest we race with someone else adding a - * conflicting instance. - */ _debug("search 2"); - spin_lock(&local->client_conns_lock); - - pp = &local->client_conns.rb_node; + spin_lock(&local->client_bundles_lock); + pp = &local->client_bundles.rb_node; parent = NULL; while (*pp) { parent = *pp; - conn = rb_entry(parent, struct rxrpc_connection, client_node); + bundle = rb_entry(parent, struct rxrpc_bundle, local_node); -#define cmp(X) ((long)conn->params.X - (long)candidate->params.X) +#define cmp(X) ((long)bundle->params.X - (long)cp->X) diff = (cmp(peer) ?: cmp(key) ?: cmp(security_level) ?: cmp(upgrade)); #undef cmp - if (diff < 0) { + if (diff < 0) pp = &(*pp)->rb_left; - } else if (diff > 0) { + else if (diff > 0) pp = &(*pp)->rb_right; - } else { - if (rxrpc_may_reuse_conn(conn) && - rxrpc_get_connection_maybe(conn)) - goto found_extant_conn; - /* The old connection is from an outdated epoch. */ - _debug("replace conn"); - clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags); - rb_replace_node(&conn->client_node, - &candidate->client_node, - &local->client_conns); - trace_rxrpc_client(conn, -1, rxrpc_client_replace); - goto candidate_published; - } + else + goto found_bundle_free; } - _debug("new conn"); - rb_link_node(&candidate->client_node, parent, pp); - rb_insert_color(&candidate->client_node, &local->client_conns); - -candidate_published: - set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags); - call->conn = candidate; - call->security = candidate->security; - call->security_ix = candidate->security_ix; - call->service_id = candidate->service_id; - spin_unlock(&local->client_conns_lock); - _leave(" = 0 [new %d]", candidate->debug_id); - return 0; + _debug("new bundle"); + candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id); + rb_link_node(&candidate->local_node, parent, pp); + rb_insert_color(&candidate->local_node, &local->client_bundles); + rxrpc_get_bundle(candidate); + spin_unlock(&local->client_bundles_lock); + _leave(" = %u [new]", candidate->debug_id); + return candidate; + +found_bundle_free: + rxrpc_free_bundle(candidate); +found_bundle: + rxrpc_get_bundle(bundle); + spin_unlock(&local->client_bundles_lock); + _leave(" = %u [found]", bundle->debug_id); + return bundle; +} - /* We come here if we found a suitable connection already in existence. - * Discard any candidate we may have allocated, and try to get a - * channel on this one. - */ -found_extant_conn: - _debug("found conn"); - spin_unlock(&local->client_conns_lock); +/* + * Create or find a client bundle to use for a call. + * + * If we return with a connection, the call will be on its waiting list. It's + * left to the caller to assign a channel and wake up the call. + */ +static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct rxrpc_conn_parameters *cp, + struct sockaddr_rxrpc *srx, + gfp_t gfp) +{ + struct rxrpc_bundle *bundle; - if (candidate) { - trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate); - rxrpc_put_connection(candidate); - candidate = NULL; - } + _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - spin_lock(&conn->channel_lock); - call->conn = conn; - call->security = conn->security; - call->security_ix = conn->security_ix; - call->service_id = conn->service_id; - list_add_tail(&call->chan_wait_link, &conn->waiting_calls); - spin_unlock(&conn->channel_lock); - _leave(" = 0 [extant %d]", conn->debug_id); - return 0; + cp->peer = rxrpc_lookup_peer(rx, cp->local, srx, gfp); + if (!cp->peer) + goto error; + + call->cong_cwnd = cp->peer->cong_cwnd; + if (call->cong_cwnd >= call->cong_ssthresh) + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + else + call->cong_mode = RXRPC_CALL_SLOW_START; + if (cp->upgrade) + __set_bit(RXRPC_CALL_UPGRADE, &call->flags); + + /* Find the client connection bundle. */ + bundle = rxrpc_look_up_bundle(cp, gfp); + if (!bundle) + goto error; + + /* Get this call queued. Someone else may activate it whilst we're + * lining up a new connection, but that's fine. + */ + spin_lock(&bundle->channel_lock); + list_add_tail(&call->chan_wait_link, &bundle->waiting_calls); + spin_unlock(&bundle->channel_lock); + + _leave(" = [B=%x]", bundle->debug_id); + return bundle; -error_peer: - rxrpc_put_peer(cp->peer); - cp->peer = NULL; error: - _leave(" = %d", ret); - return ret; + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); } /* - * Activate a connection. + * Allocate a new connection and add it into a bundle. */ -static void rxrpc_activate_conn(struct rxrpc_net *rxnet, - struct rxrpc_connection *conn) +static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp) + __releases(bundle->channel_lock) { - if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) { - trace_rxrpc_client(conn, -1, rxrpc_client_to_upgrade); - conn->cache_state = RXRPC_CONN_CLIENT_UPGRADE; - } else { - trace_rxrpc_client(conn, -1, rxrpc_client_to_active); - conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE; - } - rxnet->nr_active_client_conns++; - list_move_tail(&conn->cache_link, &rxnet->active_client_conns); -} + struct rxrpc_connection *candidate = NULL, *old = NULL; + bool conflict; + int i; -/* - * Attempt to animate a connection for a new call. - * - * If it's not exclusive, the connection is in the endpoint tree, and we're in - * the conn's list of those waiting to grab a channel. There is, however, a - * limit on the number of live connections allowed at any one time, so we may - * have to wait for capacity to become available. - * - * Note that a connection on the waiting queue might *also* have active - * channels if it has been culled to make space and then re-requested by a new - * call. - */ -static void rxrpc_animate_client_conn(struct rxrpc_net *rxnet, - struct rxrpc_connection *conn) -{ - unsigned int nr_conns; + _enter(""); - _enter("%d,%d", conn->debug_id, conn->cache_state); + conflict = bundle->alloc_conn; + if (!conflict) + bundle->alloc_conn = true; + spin_unlock(&bundle->channel_lock); + if (conflict) { + _leave(" [conf]"); + return; + } - if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE || - conn->cache_state == RXRPC_CONN_CLIENT_UPGRADE) - goto out; + candidate = rxrpc_alloc_client_connection(bundle, gfp); - spin_lock(&rxnet->client_conn_cache_lock); + spin_lock(&bundle->channel_lock); + bundle->alloc_conn = false; - nr_conns = rxnet->nr_client_conns; - if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags)) { - trace_rxrpc_client(conn, -1, rxrpc_client_count); - rxnet->nr_client_conns = nr_conns + 1; + if (IS_ERR(candidate)) { + bundle->alloc_error = PTR_ERR(candidate); + spin_unlock(&bundle->channel_lock); + _leave(" [err %ld]", PTR_ERR(candidate)); + return; } - switch (conn->cache_state) { - case RXRPC_CONN_CLIENT_ACTIVE: - case RXRPC_CONN_CLIENT_UPGRADE: - case RXRPC_CONN_CLIENT_WAITING: - break; - - case RXRPC_CONN_CLIENT_INACTIVE: - case RXRPC_CONN_CLIENT_CULLED: - case RXRPC_CONN_CLIENT_IDLE: - if (nr_conns >= rxrpc_max_client_connections) - goto wait_for_capacity; - goto activate_conn; + bundle->alloc_error = 0; + + for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) { + unsigned int shift = i * RXRPC_MAXCALLS; + int j; + + old = bundle->conns[i]; + if (!rxrpc_may_reuse_conn(old)) { + if (old) + trace_rxrpc_client(old, -1, rxrpc_client_replace); + candidate->bundle_shift = shift; + bundle->conns[i] = candidate; + for (j = 0; j < RXRPC_MAXCALLS; j++) + set_bit(shift + j, &bundle->avail_chans); + candidate = NULL; + break; + } - default: - BUG(); + old = NULL; } -out_unlock: - spin_unlock(&rxnet->client_conn_cache_lock); -out: - _leave(" [%d]", conn->cache_state); - return; + spin_unlock(&bundle->channel_lock); -activate_conn: - _debug("activate"); - rxrpc_activate_conn(rxnet, conn); - goto out_unlock; - -wait_for_capacity: - _debug("wait"); - trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting); - conn->cache_state = RXRPC_CONN_CLIENT_WAITING; - list_move_tail(&conn->cache_link, &rxnet->waiting_client_conns); - goto out_unlock; + if (candidate) { + _debug("discard C=%x", candidate->debug_id); + trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate); + rxrpc_put_connection(candidate); + } + + rxrpc_put_connection(old); + _leave(""); } /* - * Deactivate a channel. + * Add a connection to a bundle if there are no usable connections or we have + * connections waiting for extra capacity. */ -static void rxrpc_deactivate_one_channel(struct rxrpc_connection *conn, - unsigned int channel) +static void rxrpc_maybe_add_conn(struct rxrpc_bundle *bundle, gfp_t gfp) { - struct rxrpc_channel *chan = &conn->channels[channel]; + struct rxrpc_call *call; + int i, usable; - rcu_assign_pointer(chan->call, NULL); - conn->active_chans &= ~(1 << channel); + _enter(""); + + spin_lock(&bundle->channel_lock); + + /* See if there are any usable connections. */ + usable = 0; + for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) + if (rxrpc_may_reuse_conn(bundle->conns[i])) + usable++; + + if (!usable && !list_empty(&bundle->waiting_calls)) { + call = list_first_entry(&bundle->waiting_calls, + struct rxrpc_call, chan_wait_link); + if (test_bit(RXRPC_CALL_UPGRADE, &call->flags)) + bundle->try_upgrade = true; + } + + if (!usable) + goto alloc_conn; + + if (!bundle->avail_chans && + !bundle->try_upgrade && + !list_empty(&bundle->waiting_calls) && + usable < ARRAY_SIZE(bundle->conns)) + goto alloc_conn; + + spin_unlock(&bundle->channel_lock); + _leave(""); + return; + +alloc_conn: + return rxrpc_add_conn_to_bundle(bundle, gfp); } /* @@ -549,35 +509,42 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, unsigned int channel) { struct rxrpc_channel *chan = &conn->channels[channel]; - struct rxrpc_call *call = list_entry(conn->waiting_calls.next, + struct rxrpc_bundle *bundle = conn->bundle; + struct rxrpc_call *call = list_entry(bundle->waiting_calls.next, struct rxrpc_call, chan_wait_link); u32 call_id = chan->call_counter + 1; + _enter("C=%x,%u", conn->debug_id, channel); + trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); /* Cancel the final ACK on the previous call if it hasn't been sent yet * as the DATA packet will implicitly ACK it. */ clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); - - write_lock_bh(&call->state_lock); - call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; - write_unlock_bh(&call->state_lock); + clear_bit(conn->bundle_shift + channel, &bundle->avail_chans); rxrpc_see_call(call); list_del_init(&call->chan_wait_link); - conn->active_chans |= 1 << channel; call->peer = rxrpc_get_peer(conn->params.peer); + call->conn = rxrpc_get_connection(conn); call->cid = conn->proto.cid | channel; call->call_id = call_id; + call->security = conn->security; + call->security_ix = conn->security_ix; + call->service_id = conn->service_id; trace_rxrpc_connect_call(call); _net("CONNECT call %08x:%08x as call %d on conn %d", call->cid, call->call_id, call->debug_id, conn->debug_id); - /* Paired with the read barrier in rxrpc_wait_for_channel(). This - * orders cid and epoch in the connection wrt to call_id without the - * need to take the channel_lock. + write_lock_bh(&call->state_lock); + call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; + write_unlock_bh(&call->state_lock); + + /* Paired with the read barrier in rxrpc_connect_call(). This orders + * cid and epoch in the connection wrt to call_id without the need to + * take the channel_lock. * * We provisionally assign a callNumber at this point, but we don't * confirm it until the call is about to be exposed. @@ -586,94 +553,137 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, * at the call ID through a connection channel. */ smp_wmb(); - chan->call_id = call_id; - chan->call_debug_id = call->debug_id; + + chan->call_id = call_id; + chan->call_debug_id = call->debug_id; rcu_assign_pointer(chan->call, call); wake_up(&call->waitq); } /* + * Remove a connection from the idle list if it's on it. + */ +static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connection *conn) +{ + struct rxrpc_net *rxnet = bundle->params.local->rxnet; + bool drop_ref; + + if (!list_empty(&conn->cache_link)) { + drop_ref = false; + spin_lock(&rxnet->client_conn_cache_lock); + if (!list_empty(&conn->cache_link)) { + list_del_init(&conn->cache_link); + drop_ref = true; + } + spin_unlock(&rxnet->client_conn_cache_lock); + if (drop_ref) + rxrpc_put_connection(conn); + } +} + +/* * Assign channels and callNumbers to waiting calls with channel_lock * held by caller. */ -static void rxrpc_activate_channels_locked(struct rxrpc_connection *conn) +static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle) { - u8 avail, mask; - - switch (conn->cache_state) { - case RXRPC_CONN_CLIENT_ACTIVE: - mask = RXRPC_ACTIVE_CHANS_MASK; - break; - case RXRPC_CONN_CLIENT_UPGRADE: - mask = 0x01; - break; - default: - return; - } + struct rxrpc_connection *conn; + unsigned long avail, mask; + unsigned int channel, slot; - while (!list_empty(&conn->waiting_calls) && - (avail = ~conn->active_chans, - avail &= mask, - avail != 0)) - rxrpc_activate_one_channel(conn, __ffs(avail)); + if (bundle->try_upgrade) + mask = 1; + else + mask = ULONG_MAX; + + while (!list_empty(&bundle->waiting_calls)) { + avail = bundle->avail_chans & mask; + if (!avail) + break; + channel = __ffs(avail); + clear_bit(channel, &bundle->avail_chans); + + slot = channel / RXRPC_MAXCALLS; + conn = bundle->conns[slot]; + if (!conn) + break; + + if (bundle->try_upgrade) + set_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags); + rxrpc_unidle_conn(bundle, conn); + + channel &= (RXRPC_MAXCALLS - 1); + conn->act_chans |= 1 << channel; + rxrpc_activate_one_channel(conn, channel); + } } /* * Assign channels and callNumbers to waiting calls. */ -static void rxrpc_activate_channels(struct rxrpc_connection *conn) +static void rxrpc_activate_channels(struct rxrpc_bundle *bundle) { - _enter("%d", conn->debug_id); + _enter("B=%x", bundle->debug_id); - trace_rxrpc_client(conn, -1, rxrpc_client_activate_chans); + trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans); - if (conn->active_chans == RXRPC_ACTIVE_CHANS_MASK) + if (!bundle->avail_chans) return; - spin_lock(&conn->channel_lock); - rxrpc_activate_channels_locked(conn); - spin_unlock(&conn->channel_lock); + spin_lock(&bundle->channel_lock); + rxrpc_activate_channels_locked(bundle); + spin_unlock(&bundle->channel_lock); _leave(""); } /* * Wait for a callNumber and a channel to be granted to a call. */ -static int rxrpc_wait_for_channel(struct rxrpc_call *call, gfp_t gfp) +static int rxrpc_wait_for_channel(struct rxrpc_bundle *bundle, + struct rxrpc_call *call, gfp_t gfp) { + DECLARE_WAITQUEUE(myself, current); int ret = 0; _enter("%d", call->debug_id); - if (!call->call_id) { - DECLARE_WAITQUEUE(myself, current); + if (!gfpflags_allow_blocking(gfp)) { + rxrpc_maybe_add_conn(bundle, gfp); + rxrpc_activate_channels(bundle); + ret = bundle->alloc_error ?: -EAGAIN; + goto out; + } - if (!gfpflags_allow_blocking(gfp)) { - ret = -EAGAIN; - goto out; + add_wait_queue_exclusive(&call->waitq, &myself); + for (;;) { + rxrpc_maybe_add_conn(bundle, gfp); + rxrpc_activate_channels(bundle); + ret = bundle->alloc_error; + if (ret < 0) + break; + + switch (call->interruptibility) { + case RXRPC_INTERRUPTIBLE: + case RXRPC_PREINTERRUPTIBLE: + set_current_state(TASK_INTERRUPTIBLE); + break; + case RXRPC_UNINTERRUPTIBLE: + default: + set_current_state(TASK_UNINTERRUPTIBLE); + break; } - - add_wait_queue_exclusive(&call->waitq, &myself); - for (;;) { - if (test_bit(RXRPC_CALL_IS_INTR, &call->flags)) - set_current_state(TASK_INTERRUPTIBLE); - else - set_current_state(TASK_UNINTERRUPTIBLE); - if (call->call_id) - break; - if (test_bit(RXRPC_CALL_IS_INTR, &call->flags) && - signal_pending(current)) { - ret = -ERESTARTSYS; - break; - } - schedule(); + if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_AWAIT_CONN) + break; + if ((call->interruptibility == RXRPC_INTERRUPTIBLE || + call->interruptibility == RXRPC_PREINTERRUPTIBLE) && + signal_pending(current)) { + ret = -ERESTARTSYS; + break; } - remove_wait_queue(&call->waitq, &myself); - __set_current_state(TASK_RUNNING); + schedule(); } - - /* Paired with the write barrier in rxrpc_activate_one_channel(). */ - smp_rmb(); + remove_wait_queue(&call->waitq, &myself); + __set_current_state(TASK_RUNNING); out: _leave(" = %d", ret); @@ -690,52 +700,50 @@ int rxrpc_connect_call(struct rxrpc_sock *rx, struct sockaddr_rxrpc *srx, gfp_t gfp) { + struct rxrpc_bundle *bundle; struct rxrpc_net *rxnet = cp->local->rxnet; - int ret; + int ret = 0; _enter("{%d,%lx},", call->debug_id, call->user_call_ID); rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper); - rxrpc_cull_active_client_conns(rxnet); - ret = rxrpc_get_client_conn(rx, call, cp, srx, gfp); - if (ret < 0) + bundle = rxrpc_prep_call(rx, call, cp, srx, gfp); + if (IS_ERR(bundle)) { + ret = PTR_ERR(bundle); goto out; + } - rxrpc_animate_client_conn(rxnet, call->conn); - rxrpc_activate_channels(call->conn); - - ret = rxrpc_wait_for_channel(call, gfp); - if (ret < 0) { - trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed); - rxrpc_disconnect_client_call(call); - goto out; + if (call->state == RXRPC_CALL_CLIENT_AWAIT_CONN) { + ret = rxrpc_wait_for_channel(bundle, call, gfp); + if (ret < 0) + goto wait_failed; } - spin_lock_bh(&call->conn->params.peer->lock); - hlist_add_head_rcu(&call->error_link, - &call->conn->params.peer->error_targets); - spin_unlock_bh(&call->conn->params.peer->lock); +granted_channel: + /* Paired with the write barrier in rxrpc_activate_one_channel(). */ + smp_rmb(); +out_put_bundle: + rxrpc_put_bundle(bundle); out: _leave(" = %d", ret); return ret; -} -/* - * Note that a connection is about to be exposed to the world. Once it is - * exposed, we maintain an extra ref on it that stops it from being summarily - * discarded before it's (a) had a chance to deal with retransmission and (b) - * had a chance at re-use (the per-connection security negotiation is - * expensive). - */ -static void rxrpc_expose_client_conn(struct rxrpc_connection *conn, - unsigned int channel) -{ - if (!test_and_set_bit(RXRPC_CONN_EXPOSED, &conn->flags)) { - trace_rxrpc_client(conn, channel, rxrpc_client_exposed); - rxrpc_get_connection(conn); +wait_failed: + spin_lock(&bundle->channel_lock); + list_del_init(&call->chan_wait_link); + spin_unlock(&bundle->channel_lock); + + if (call->state != RXRPC_CALL_CLIENT_AWAIT_CONN) { + ret = 0; + goto granted_channel; } + + trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed); + rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); + rxrpc_disconnect_client_call(bundle, call); + goto out_put_bundle; } /* @@ -757,7 +765,7 @@ void rxrpc_expose_client_call(struct rxrpc_call *call) chan->call_counter++; if (chan->call_counter >= INT_MAX) set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); - rxrpc_expose_client_conn(conn, channel); + trace_rxrpc_client(conn, channel, rxrpc_client_exposed); } } @@ -766,62 +774,56 @@ void rxrpc_expose_client_call(struct rxrpc_call *call) */ static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet) { - unsigned long now = jiffies; - unsigned long reap_at = now + rxrpc_conn_idle_client_expiry; + if (!rxnet->kill_all_client_conns) { + unsigned long now = jiffies; + unsigned long reap_at = now + rxrpc_conn_idle_client_expiry; - if (rxnet->live) - timer_reduce(&rxnet->client_conn_reap_timer, reap_at); + if (rxnet->live) + timer_reduce(&rxnet->client_conn_reap_timer, reap_at); + } } /* * Disconnect a client call. */ -void rxrpc_disconnect_client_call(struct rxrpc_call *call) +void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call *call) { - struct rxrpc_connection *conn = call->conn; + struct rxrpc_connection *conn; struct rxrpc_channel *chan = NULL; - struct rxrpc_net *rxnet = conn->params.local->rxnet; - unsigned int channel = -1; + struct rxrpc_net *rxnet = bundle->params.local->rxnet; + unsigned int channel; + bool may_reuse; u32 cid; - spin_lock(&conn->channel_lock); - set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + _enter("c=%x", call->debug_id); - cid = call->cid; - if (cid) { - channel = cid & RXRPC_CHANNELMASK; - chan = &conn->channels[channel]; - } - trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); + spin_lock(&bundle->channel_lock); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); /* Calls that have never actually been assigned a channel can simply be - * discarded. If the conn didn't get used either, it will follow - * immediately unless someone else grabs it in the meantime. + * discarded. */ - if (!list_empty(&call->chan_wait_link)) { + conn = call->conn; + if (!conn) { _debug("call is waiting"); ASSERTCMP(call->call_id, ==, 0); ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags)); list_del_init(&call->chan_wait_link); - - trace_rxrpc_client(conn, channel, rxrpc_client_chan_unstarted); - - /* We must deactivate or idle the connection if it's now - * waiting for nothing. - */ - spin_lock(&rxnet->client_conn_cache_lock); - if (conn->cache_state == RXRPC_CONN_CLIENT_WAITING && - list_empty(&conn->waiting_calls) && - !conn->active_chans) - goto idle_connection; goto out; } + cid = call->cid; + channel = cid & RXRPC_CHANNELMASK; + chan = &conn->channels[channel]; + trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); + if (rcu_access_pointer(chan->call) != call) { - spin_unlock(&conn->channel_lock); + spin_unlock(&bundle->channel_lock); BUG(); } + may_reuse = rxrpc_may_reuse_conn(conn); + /* If a client call was exposed to the world, we save the result for * retransmission. * @@ -834,14 +836,21 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { _debug("exposed %u,%u", call->call_id, call->abort_code); __rxrpc_disconnect_call(conn, call); + + if (test_and_clear_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) { + trace_rxrpc_client(conn, channel, rxrpc_client_to_active); + bundle->try_upgrade = false; + if (may_reuse) + rxrpc_activate_channels_locked(bundle); + } + } /* See if we can pass the channel directly to another call. */ - if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE && - !list_empty(&conn->waiting_calls)) { + if (may_reuse && !list_empty(&bundle->waiting_calls)) { trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass); rxrpc_activate_one_channel(conn, channel); - goto out_2; + goto out; } /* Schedule the final ACK to be transmitted in a short while so that it @@ -858,196 +867,115 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) rxrpc_reduce_conn_timer(conn, final_ack_at); } - /* Things are more complex and we need the cache lock. We might be - * able to simply idle the conn or it might now be lurking on the wait - * list. It might even get moved back to the active list whilst we're - * waiting for the lock. - */ - spin_lock(&rxnet->client_conn_cache_lock); - - switch (conn->cache_state) { - case RXRPC_CONN_CLIENT_UPGRADE: - /* Deal with termination of a service upgrade probe. */ - if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) { - clear_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags); - trace_rxrpc_client(conn, channel, rxrpc_client_to_active); - conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE; - rxrpc_activate_channels_locked(conn); - } - /* fall through */ - case RXRPC_CONN_CLIENT_ACTIVE: - if (list_empty(&conn->waiting_calls)) { - rxrpc_deactivate_one_channel(conn, channel); - if (!conn->active_chans) { - rxnet->nr_active_client_conns--; - goto idle_connection; - } - goto out; - } - - trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass); - rxrpc_activate_one_channel(conn, channel); - goto out; + /* Deactivate the channel. */ + rcu_assign_pointer(chan->call, NULL); + set_bit(conn->bundle_shift + channel, &conn->bundle->avail_chans); + conn->act_chans &= ~(1 << channel); - case RXRPC_CONN_CLIENT_CULLED: - rxrpc_deactivate_one_channel(conn, channel); - ASSERT(list_empty(&conn->waiting_calls)); - if (!conn->active_chans) - goto idle_connection; - goto out; + /* If no channels remain active, then put the connection on the idle + * list for a short while. Give it a ref to stop it going away if it + * becomes unbundled. + */ + if (!conn->act_chans) { + trace_rxrpc_client(conn, channel, rxrpc_client_to_idle); + conn->idle_timestamp = jiffies; - case RXRPC_CONN_CLIENT_WAITING: - rxrpc_deactivate_one_channel(conn, channel); - goto out; + rxrpc_get_connection(conn); + spin_lock(&rxnet->client_conn_cache_lock); + list_move_tail(&conn->cache_link, &rxnet->idle_client_conns); + spin_unlock(&rxnet->client_conn_cache_lock); - default: - BUG(); + rxrpc_set_client_reap_timer(rxnet); } out: - spin_unlock(&rxnet->client_conn_cache_lock); -out_2: - spin_unlock(&conn->channel_lock); + spin_unlock(&bundle->channel_lock); _leave(""); return; - -idle_connection: - /* As no channels remain active, the connection gets deactivated - * immediately or moved to the idle list for a short while. - */ - if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) { - trace_rxrpc_client(conn, channel, rxrpc_client_to_idle); - conn->idle_timestamp = jiffies; - conn->cache_state = RXRPC_CONN_CLIENT_IDLE; - list_move_tail(&conn->cache_link, &rxnet->idle_client_conns); - if (rxnet->idle_client_conns.next == &conn->cache_link && - !rxnet->kill_all_client_conns) - rxrpc_set_client_reap_timer(rxnet); - } else { - trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive); - conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; - list_del_init(&conn->cache_link); - } - goto out; } /* - * Clean up a dead client connection. + * Remove a connection from a bundle. */ -static struct rxrpc_connection * -rxrpc_put_one_client_conn(struct rxrpc_connection *conn) +static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) { - struct rxrpc_connection *next = NULL; - struct rxrpc_local *local = conn->params.local; - struct rxrpc_net *rxnet = local->rxnet; - unsigned int nr_conns; - - trace_rxrpc_client(conn, -1, rxrpc_client_cleanup); - - if (test_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags)) { - spin_lock(&local->client_conns_lock); - if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, - &conn->flags)) - rb_erase(&conn->client_node, &local->client_conns); - spin_unlock(&local->client_conns_lock); + struct rxrpc_bundle *bundle = conn->bundle; + struct rxrpc_local *local = bundle->params.local; + unsigned int bindex; + bool need_drop = false, need_put = false; + int i; + + _enter("C=%x", conn->debug_id); + + if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) + rxrpc_process_delayed_final_acks(conn, true); + + spin_lock(&bundle->channel_lock); + bindex = conn->bundle_shift / RXRPC_MAXCALLS; + if (bundle->conns[bindex] == conn) { + _debug("clear slot %u", bindex); + bundle->conns[bindex] = NULL; + for (i = 0; i < RXRPC_MAXCALLS; i++) + clear_bit(conn->bundle_shift + i, &bundle->avail_chans); + need_drop = true; } + spin_unlock(&bundle->channel_lock); - rxrpc_put_client_connection_id(conn); - - ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_INACTIVE); + /* If there are no more connections, remove the bundle */ + if (!bundle->avail_chans) { + _debug("maybe unbundle"); + spin_lock(&local->client_bundles_lock); - if (test_bit(RXRPC_CONN_COUNTED, &conn->flags)) { - trace_rxrpc_client(conn, -1, rxrpc_client_uncount); - spin_lock(&rxnet->client_conn_cache_lock); - nr_conns = --rxnet->nr_client_conns; - - if (nr_conns < rxrpc_max_client_connections && - !list_empty(&rxnet->waiting_client_conns)) { - next = list_entry(rxnet->waiting_client_conns.next, - struct rxrpc_connection, cache_link); - rxrpc_get_connection(next); - rxrpc_activate_conn(rxnet, next); + for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) + if (bundle->conns[i]) + break; + if (i == ARRAY_SIZE(bundle->conns) && !bundle->params.exclusive) { + _debug("erase bundle"); + rb_erase(&bundle->local_node, &local->client_bundles); + need_put = true; } - spin_unlock(&rxnet->client_conn_cache_lock); + spin_unlock(&local->client_bundles_lock); + if (need_put) + rxrpc_put_bundle(bundle); } - rxrpc_kill_connection(conn); - if (next) - rxrpc_activate_channels(next); - - /* We need to get rid of the temporary ref we took upon next, but we - * can't call rxrpc_put_connection() recursively. - */ - return next; + if (need_drop) + rxrpc_put_connection(conn); + _leave(""); } /* - * Clean up a dead client connections. + * Clean up a dead client connection. */ -void rxrpc_put_client_conn(struct rxrpc_connection *conn) +static void rxrpc_kill_client_conn(struct rxrpc_connection *conn) { - const void *here = __builtin_return_address(0); - unsigned int debug_id = conn->debug_id; - int n; + struct rxrpc_local *local = conn->params.local; + struct rxrpc_net *rxnet = local->rxnet; - do { - n = atomic_dec_return(&conn->usage); - trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, n, here); - if (n > 0) - return; - ASSERTCMP(n, >=, 0); + _enter("C=%x", conn->debug_id); + + trace_rxrpc_client(conn, -1, rxrpc_client_cleanup); + atomic_dec(&rxnet->nr_client_conns); - conn = rxrpc_put_one_client_conn(conn); - } while (conn); + rxrpc_put_client_connection_id(conn); + rxrpc_kill_connection(conn); } /* - * Kill the longest-active client connections to make room for new ones. + * Clean up a dead client connections. */ -static void rxrpc_cull_active_client_conns(struct rxrpc_net *rxnet) +void rxrpc_put_client_conn(struct rxrpc_connection *conn) { - struct rxrpc_connection *conn; - unsigned int nr_conns = rxnet->nr_client_conns; - unsigned int nr_active, limit; - - _enter(""); - - ASSERTCMP(nr_conns, >=, 0); - if (nr_conns < rxrpc_max_client_connections) { - _leave(" [ok]"); - return; - } - limit = rxrpc_reap_client_connections; - - spin_lock(&rxnet->client_conn_cache_lock); - nr_active = rxnet->nr_active_client_conns; - - while (nr_active > limit) { - ASSERT(!list_empty(&rxnet->active_client_conns)); - conn = list_entry(rxnet->active_client_conns.next, - struct rxrpc_connection, cache_link); - ASSERTIFCMP(conn->cache_state != RXRPC_CONN_CLIENT_ACTIVE, - conn->cache_state, ==, RXRPC_CONN_CLIENT_UPGRADE); - - if (list_empty(&conn->waiting_calls)) { - trace_rxrpc_client(conn, -1, rxrpc_client_to_culled); - conn->cache_state = RXRPC_CONN_CLIENT_CULLED; - list_del_init(&conn->cache_link); - } else { - trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting); - conn->cache_state = RXRPC_CONN_CLIENT_WAITING; - list_move_tail(&conn->cache_link, - &rxnet->waiting_client_conns); - } - - nr_active--; - } + const void *here = __builtin_return_address(0); + unsigned int debug_id = conn->debug_id; + bool dead; + int r; - rxnet->nr_active_client_conns = nr_active; - spin_unlock(&rxnet->client_conn_cache_lock); - ASSERTCMP(nr_active, >=, 0); - _leave(" [culled]"); + dead = __refcount_dec_and_test(&conn->ref, &r); + trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, r - 1, here); + if (dead) + rxrpc_kill_client_conn(conn); } /* @@ -1081,7 +1009,7 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work) /* We keep an estimate of what the number of conns ought to be after * we've discarded some so that we don't overdo the discarding. */ - nr_conns = rxnet->nr_client_conns; + nr_conns = atomic_read(&rxnet->nr_client_conns); next: spin_lock(&rxnet->client_conn_cache_lock); @@ -1091,7 +1019,6 @@ next: conn = list_entry(rxnet->idle_client_conns.next, struct rxrpc_connection, cache_link); - ASSERT(test_bit(RXRPC_CONN_EXPOSED, &conn->flags)); if (!rxnet->kill_all_client_conns) { /* If the number of connections is over the reap limit, we @@ -1113,18 +1040,13 @@ next: } trace_rxrpc_client(conn, -1, rxrpc_client_discard); - if (!test_and_clear_bit(RXRPC_CONN_EXPOSED, &conn->flags)) - BUG(); - conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; list_del_init(&conn->cache_link); spin_unlock(&rxnet->client_conn_cache_lock); - /* When we cleared the EXPOSED flag, we took on responsibility for the - * reference that that had on the usage count. We deal with that here. - * If someone re-sets the flag and re-gets the ref, that's fine. - */ - rxrpc_put_connection(conn); + rxrpc_unbundle_conn(conn); + rxrpc_put_connection(conn); /* Drop the ->cache_link ref */ + nr_conns--; goto next; @@ -1138,8 +1060,7 @@ not_yet_expired: */ _debug("not yet"); if (!rxnet->kill_all_client_conns) - timer_reduce(&rxnet->client_conn_reap_timer, - conn_expires_at); + timer_reduce(&rxnet->client_conn_reap_timer, conn_expires_at); out: spin_unlock(&rxnet->client_conn_cache_lock); @@ -1174,37 +1095,27 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *local) { struct rxrpc_connection *conn, *tmp; struct rxrpc_net *rxnet = local->rxnet; - unsigned int nr_active; LIST_HEAD(graveyard); _enter(""); spin_lock(&rxnet->client_conn_cache_lock); - nr_active = rxnet->nr_active_client_conns; list_for_each_entry_safe(conn, tmp, &rxnet->idle_client_conns, cache_link) { if (conn->params.local == local) { - ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_IDLE); - trace_rxrpc_client(conn, -1, rxrpc_client_discard); - if (!test_and_clear_bit(RXRPC_CONN_EXPOSED, &conn->flags)) - BUG(); - conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; list_move(&conn->cache_link, &graveyard); - nr_active--; } } - rxnet->nr_active_client_conns = nr_active; spin_unlock(&rxnet->client_conn_cache_lock); - ASSERTCMP(nr_active, >=, 0); while (!list_empty(&graveyard)) { conn = list_entry(graveyard.next, struct rxrpc_connection, cache_link); list_del_init(&conn->cache_link); - + rxrpc_unbundle_conn(conn); rxrpc_put_connection(conn); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 06fcff2ebbba..aab069701398 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -157,12 +157,12 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, _enter("{%d},%x", conn->debug_id, conn->abort_code); - spin_lock(&conn->channel_lock); + spin_lock(&conn->bundle->channel_lock); for (i = 0; i < RXRPC_MAXCALLS; i++) { call = rcu_dereference_protected( conn->channels[i].call, - lockdep_is_held(&conn->channel_lock)); + lockdep_is_held(&conn->bundle->channel_lock)); if (call) { if (compl == RXRPC_CALL_LOCALLY_ABORTED) trace_rxrpc_abort(call->debug_id, @@ -173,14 +173,13 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, else trace_rxrpc_rx_abort(call, serial, conn->abort_code); - if (rxrpc_set_call_completion(call, compl, - conn->abort_code, - conn->error)) - rxrpc_notify_socket(call); + rxrpc_set_call_completion(call, compl, + conn->abort_code, + conn->error); } } - spin_unlock(&conn->channel_lock); + spin_unlock(&conn->bundle->channel_lock); _leave(""); } @@ -211,6 +210,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, conn->error = error; conn->abort_code = abort_code; conn->state = RXRPC_CONN_LOCALLY_ABORTED; + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); spin_unlock_bh(&conn->state_lock); msg.msg_name = &conn->params.peer->srx.transport; @@ -270,7 +270,7 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call) if (call) { write_lock_bh(&call->state_lock); if (call->state == RXRPC_CALL_SERVER_SECURING) { - call->state = RXRPC_CALL_SERVER_ACCEPTING; + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; rxrpc_notify_socket(call); } write_unlock_bh(&call->state_lock); @@ -320,6 +320,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, conn->error = -ECONNABORTED; conn->abort_code = abort_code; conn->state = RXRPC_CONN_REMOTELY_ABORTED; + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial); return -ECONNABORTED; @@ -332,30 +333,27 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, if (ret < 0) return ret; - ret = conn->security->init_connection_security(conn); - if (ret < 0) - return ret; - - ret = conn->security->prime_packet_security(conn); + ret = conn->security->init_connection_security( + conn, conn->params.key->payload.data[0]); if (ret < 0) return ret; - spin_lock(&conn->channel_lock); - spin_lock(&conn->state_lock); + spin_lock(&conn->bundle->channel_lock); + spin_lock_bh(&conn->state_lock); if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) { conn->state = RXRPC_CONN_SERVICE; - spin_unlock(&conn->state_lock); + spin_unlock_bh(&conn->state_lock); for (loop = 0; loop < RXRPC_MAXCALLS; loop++) rxrpc_call_is_secure( rcu_dereference_protected( conn->channels[loop].call, - lockdep_is_held(&conn->channel_lock))); + lockdep_is_held(&conn->bundle->channel_lock))); } else { - spin_unlock(&conn->state_lock); + spin_unlock_bh(&conn->state_lock); } - spin_unlock(&conn->channel_lock); + spin_unlock(&conn->bundle->channel_lock); return 0; default: @@ -376,7 +374,6 @@ static void rxrpc_secure_connection(struct rxrpc_connection *conn) _enter("{%d}", conn->debug_id); ASSERT(conn->security_ix != 0); - ASSERT(conn->server_key); if (conn->security->issue_challenge(conn) < 0) { abort_code = RX_CALL_DEAD; @@ -396,7 +393,7 @@ abort: /* * Process delayed final ACKs that we haven't subsumed into a subsequent call. */ -static void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn) +void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn, bool force) { unsigned long j = jiffies, next_j; unsigned int channel; @@ -415,7 +412,7 @@ again: smp_rmb(); /* vs rxrpc_disconnect_client_call */ ack_at = READ_ONCE(chan->final_ack_at); - if (time_before(j, ack_at)) { + if (time_before(j, ack_at) && !force) { if (time_before(ack_at, next_j)) { next_j = ack_at; set = true; @@ -449,7 +446,7 @@ static void rxrpc_do_process_connection(struct rxrpc_connection *conn) /* Process delayed ACKs whose time has come. */ if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) - rxrpc_process_delayed_final_acks(conn); + rxrpc_process_delayed_final_acks(conn, false); /* go through the conn-level event packets, releasing the ref on this * connection that each one has when we've finished with it */ diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 19e141eeed17..22089e37e97f 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -41,8 +41,6 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) conn = kzalloc(sizeof(struct rxrpc_connection), gfp); if (conn) { INIT_LIST_HEAD(&conn->cache_link); - spin_lock_init(&conn->channel_lock); - INIT_LIST_HEAD(&conn->waiting_calls); timer_setup(&conn->timer, &rxrpc_connection_timer, 0); INIT_WORK(&conn->processor, &rxrpc_process_connection); INIT_LIST_HEAD(&conn->proc_link); @@ -51,7 +49,6 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) conn->security = &rxrpc_no_security; spin_lock_init(&conn->state_lock); conn->debug_id = atomic_inc_return(&rxrpc_debug_id); - conn->size_align = 4; conn->idle_timestamp = jiffies; } @@ -107,7 +104,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, goto not_found; *_peer = peer; conn = rxrpc_find_service_conn_rcu(peer, skb); - if (!conn || atomic_read(&conn->usage) == 0) + if (!conn || refcount_read(&conn->ref) == 0) goto not_found; _leave(" = %p", conn); return conn; @@ -117,7 +114,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, */ conn = idr_find(&rxrpc_client_conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT); - if (!conn || atomic_read(&conn->usage) == 0) { + if (!conn || refcount_read(&conn->ref) == 0) { _debug("no conn"); goto not_found; } @@ -186,7 +183,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, chan->last_type = RXRPC_PACKET_TYPE_ABORT; break; default: - chan->last_abort = RX_USER_ABORT; + chan->last_abort = RX_CALL_DEAD; chan->last_type = RXRPC_PACKET_TYPE_ABORT; break; } @@ -212,16 +209,18 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) call->peer->cong_cwnd = call->cong_cwnd; - spin_lock_bh(&conn->params.peer->lock); - hlist_del_rcu(&call->error_link); - spin_unlock_bh(&conn->params.peer->lock); + if (!hlist_unhashed(&call->error_link)) { + spin_lock_bh(&call->peer->lock); + hlist_del_rcu(&call->error_link); + spin_unlock_bh(&call->peer->lock); + } if (rxrpc_is_client_call(call)) - return rxrpc_disconnect_client_call(call); + return rxrpc_disconnect_client_call(conn->bundle, call); - spin_lock(&conn->channel_lock); + spin_lock(&conn->bundle->channel_lock); __rxrpc_disconnect_call(conn, call); - spin_unlock(&conn->channel_lock); + spin_unlock(&conn->bundle->channel_lock); set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); conn->idle_timestamp = jiffies; @@ -264,11 +263,12 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn) bool rxrpc_queue_conn(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); - int n = atomic_fetch_add_unless(&conn->usage, 1, 0); - if (n == 0) + int r; + + if (!__refcount_inc_not_zero(&conn->ref, &r)) return false; if (rxrpc_queue_work(&conn->processor)) - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, n + 1, here); + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, r + 1, here); else rxrpc_put_connection(conn); return true; @@ -281,7 +281,7 @@ void rxrpc_see_connection(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); if (conn) { - int n = atomic_read(&conn->usage); + int n = refcount_read(&conn->ref); trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here); } @@ -290,12 +290,14 @@ void rxrpc_see_connection(struct rxrpc_connection *conn) /* * Get a ref on a connection. */ -void rxrpc_get_connection(struct rxrpc_connection *conn) +struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); - int n = atomic_inc_return(&conn->usage); + int r; - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n, here); + __refcount_inc(&conn->ref, &r); + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r, here); + return conn; } /* @@ -305,11 +307,11 @@ struct rxrpc_connection * rxrpc_get_connection_maybe(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); + int r; if (conn) { - int n = atomic_fetch_add_unless(&conn->usage, 1, 0); - if (n > 0) - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n + 1, here); + if (__refcount_inc_not_zero(&conn->ref, &r)) + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r + 1, here); else conn = NULL; } @@ -333,12 +335,11 @@ void rxrpc_put_service_conn(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); unsigned int debug_id = conn->debug_id; - int n; + int r; - n = atomic_dec_return(&conn->usage); - trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, n, here); - ASSERTCMP(n, >=, 0); - if (n == 1) + __refcount_dec(&conn->ref, &r); + trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, r - 1, here); + if (r - 1 == 1) rxrpc_set_service_reap_timer(conn->params.local->rxnet, jiffies + rxrpc_connection_expiry); } @@ -351,9 +352,9 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu) struct rxrpc_connection *conn = container_of(rcu, struct rxrpc_connection, rcu); - _enter("{%d,u=%d}", conn->debug_id, atomic_read(&conn->usage)); + _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref)); - ASSERTCMP(atomic_read(&conn->usage), ==, 0); + ASSERTCMP(refcount_read(&conn->ref), ==, 0); _net("DESTROY CONN %d", conn->debug_id); @@ -362,7 +363,7 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu) conn->security->clear(conn); key_put(conn->params.key); - key_put(conn->server_key); + rxrpc_put_bundle(conn->bundle); rxrpc_put_peer(conn->params.peer); if (atomic_dec_and_test(&conn->params.local->rxnet->nr_conns)) @@ -392,8 +393,8 @@ void rxrpc_service_connection_reaper(struct work_struct *work) write_lock(&rxnet->conn_lock); list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { - ASSERTCMP(atomic_read(&conn->usage), >, 0); - if (likely(atomic_read(&conn->usage) > 1)) + ASSERTCMP(refcount_read(&conn->ref), >, 0); + if (likely(refcount_read(&conn->ref) > 1)) continue; if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) continue; @@ -405,7 +406,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ; _debug("reap CONN %d { u=%d,t=%ld }", - conn->debug_id, atomic_read(&conn->usage), + conn->debug_id, refcount_read(&conn->ref), (long)expire_at - (long)now); if (time_before(now, expire_at)) { @@ -418,7 +419,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) /* The usage count sits at 1 whilst the object is unused on the * list; we reduce that to 0 to make the object unavailable. */ - if (atomic_cmpxchg(&conn->usage, 1, 0) != 1) + if (!refcount_dec_if_one(&conn->ref)) continue; trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL); @@ -442,7 +443,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) link); list_del_init(&conn->link); - ASSERTCMP(atomic_read(&conn->usage), ==, 0); + ASSERTCMP(refcount_read(&conn->ref), ==, 0); rxrpc_kill_connection(conn); } @@ -470,7 +471,7 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) write_lock(&rxnet->conn_lock); list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { pr_err("AF_RXRPC: Leaked conn %p {%d}\n", - conn, atomic_read(&conn->usage)); + conn, refcount_read(&conn->ref)); leak = true; } write_unlock(&rxnet->conn_lock); diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 21da48e3d2e5..6e6aa02c6f9e 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -8,6 +8,12 @@ #include <linux/slab.h> #include "ar-internal.h" +static struct rxrpc_bundle rxrpc_service_dummy_bundle = { + .ref = REFCOUNT_INIT(1), + .debug_id = UINT_MAX, + .channel_lock = __SPIN_LOCK_UNLOCKED(&rxrpc_service_dummy_bundle.channel_lock), +}; + /* * Find a service connection under RCU conditions. * @@ -93,7 +99,7 @@ conn_published: return; found_extant_conn: - if (atomic_read(&cursor->usage) == 0) + if (refcount_read(&cursor->ref) == 0) goto replace_old_connection; write_sequnlock_bh(&peer->service_conn_lock); /* We should not be able to get here. rxrpc_incoming_connection() is @@ -126,7 +132,8 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn * the rxrpc_connections list. */ conn->state = RXRPC_CONN_SERVICE_PREALLOC; - atomic_set(&conn->usage, 2); + refcount_set(&conn->ref, 2); + conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle); atomic_inc(&rxnet->nr_conns); write_lock(&rxnet->conn_lock); @@ -135,7 +142,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn write_unlock(&rxnet->conn_lock); trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, - atomic_read(&conn->usage), + refcount_read(&conn->ref), __builtin_return_address(0)); } @@ -149,7 +156,6 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn void rxrpc_new_incoming_connection(struct rxrpc_sock *rx, struct rxrpc_connection *conn, const struct rxrpc_security *sec, - struct key *key, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); @@ -163,7 +169,6 @@ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx, conn->security_ix = sp->hdr.securityIndex; conn->out_clientflag = 0; conn->security = sec; - conn->server_key = key_get(key); if (conn->security_ix) conn->state = RXRPC_CONN_SERVICE_UNSECURED; else diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index ef10fbf71b15..721d847ba92b 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -91,11 +91,11 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, /* We analyse the number of packets that get ACK'd per RTT * period and increase the window if we managed to fill it. */ - if (call->peer->rtt_usage == 0) + if (call->peer->rtt_count == 0) goto out; if (ktime_before(skb->tstamp, - ktime_add_ns(call->cong_tstamp, - call->peer->rtt))) + ktime_add_us(call->cong_tstamp, + call->peer->srtt_us >> 3))) goto out_no_clear_ca; change = rxrpc_cong_rtt_window_end; call->cong_tstamp = skb->tstamp; @@ -275,7 +275,6 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, case RXRPC_CALL_SERVER_AWAIT_ACK: __rxrpc_call_completed(call); - rxrpc_notify_socket(call); state = call->state; break; @@ -413,8 +412,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); enum rxrpc_call_state state; - unsigned int j, nr_subpackets; - rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; + unsigned int j, nr_subpackets, nr_unacked = 0; + rxrpc_serial_t serial = sp->hdr.serial, ack_serial = serial; rxrpc_seq_t seq0 = sp->hdr.seq, hard_ack; bool immediate_ack = false, jumbo_bad = false; u8 ack = 0; @@ -431,7 +430,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) return; } - if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) { + if (state == RXRPC_CALL_SERVER_RECV_REQUEST) { unsigned long timo = READ_ONCE(call->next_req_timo); unsigned long now, expect_req_by; @@ -454,7 +453,6 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) !rxrpc_receiving_reply(call)) goto unlock; - call->ackr_prev_seq = seq0; hard_ack = READ_ONCE(call->rx_hard_ack); nr_subpackets = sp->nr_subpackets; @@ -535,6 +533,9 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) ack_serial = serial; } + if (after(seq0, call->ackr_highest_seq)) + call->ackr_highest_seq = seq0; + /* Queue the packet. We use a couple of memory barriers here as need * to make sure that rx_top is perceived to be set after the buffer * pointer and that the buffer pointer is set after the annotation and @@ -568,6 +569,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) sp = NULL; } + nr_unacked++; + if (last) { set_bit(RXRPC_CALL_RX_LAST, &call->flags); if (!ack) { @@ -587,9 +590,14 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) } call->rx_expect_next = seq + 1; } + if (!ack) + ack_serial = serial; } ack: + if (atomic_add_return(nr_unacked, &call->ackr_nr_unacked) > 2 && !ack) + ack = RXRPC_ACK_IDLE; + if (ack) rxrpc_propose_ACK(call, ack, ack_serial, immediate_ack, true, @@ -609,36 +617,57 @@ unlock: } /* - * Process a requested ACK. + * See if there's a cached RTT probe to complete. */ -static void rxrpc_input_requested_ack(struct rxrpc_call *call, - ktime_t resp_time, - rxrpc_serial_t orig_serial, - rxrpc_serial_t ack_serial) +static void rxrpc_complete_rtt_probe(struct rxrpc_call *call, + ktime_t resp_time, + rxrpc_serial_t acked_serial, + rxrpc_serial_t ack_serial, + enum rxrpc_rtt_rx_trace type) { - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; + rxrpc_serial_t orig_serial; + unsigned long avail; ktime_t sent_at; - int ix; + bool matched = false; + int i; - for (ix = 0; ix < RXRPC_RXTX_BUFF_SIZE; ix++) { - skb = call->rxtx_buffer[ix]; - if (!skb) - continue; + avail = READ_ONCE(call->rtt_avail); + smp_rmb(); /* Read avail bits before accessing data. */ - sent_at = skb->tstamp; - smp_rmb(); /* Read timestamp before serial. */ - sp = rxrpc_skb(skb); - if (sp->hdr.serial != orig_serial) + for (i = 0; i < ARRAY_SIZE(call->rtt_serial); i++) { + if (!test_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &avail)) continue; - goto found; - } - return; + sent_at = call->rtt_sent_at[i]; + orig_serial = call->rtt_serial[i]; + + if (orig_serial == acked_serial) { + clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); + smp_mb(); /* Read data before setting avail bit */ + set_bit(i, &call->rtt_avail); + if (type != rxrpc_rtt_rx_cancel) + rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial, + sent_at, resp_time); + else + trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_cancel, i, + orig_serial, acked_serial, 0, 0); + matched = true; + } -found: - rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_requested_ack, - orig_serial, ack_serial, sent_at, resp_time); + /* If a later serial is being acked, then mark this slot as + * being available. + */ + if (after(acked_serial, orig_serial)) { + trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_obsolete, i, + orig_serial, acked_serial, 0, 0); + clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); + smp_wmb(); + set_bit(i, &call->rtt_avail); + } + } + + if (!matched) + trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_lost, 9, 0, acked_serial, 0, 0); } /* @@ -683,27 +712,11 @@ static void rxrpc_input_check_for_lost_ack(struct rxrpc_call *call) */ static void rxrpc_input_ping_response(struct rxrpc_call *call, ktime_t resp_time, - rxrpc_serial_t orig_serial, + rxrpc_serial_t acked_serial, rxrpc_serial_t ack_serial) { - rxrpc_serial_t ping_serial; - ktime_t ping_time; - - ping_time = call->ping_time; - smp_rmb(); - ping_serial = READ_ONCE(call->ping_serial); - - if (orig_serial == call->acks_lost_ping) + if (acked_serial == call->acks_lost_ping) rxrpc_input_check_for_lost_ack(call); - - if (before(orig_serial, ping_serial) || - !test_and_clear_bit(RXRPC_CALL_PINGING, &call->flags)) - return; - if (after(orig_serial, ping_serial)) - return; - - rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_ping_response, - orig_serial, ack_serial, ping_time, resp_time); } /* @@ -723,13 +736,12 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), rwind, ntohl(ackinfo->jumbo_max)); + if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) + rwind = RXRPC_RXTX_BUFF_SIZE - 1; if (call->tx_winsize != rwind) { - if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) - rwind = RXRPC_RXTX_BUFF_SIZE - 1; if (rwind > call->tx_winsize) wake = true; - trace_rxrpc_rx_rwind_change(call, sp->hdr.serial, - ntohl(ackinfo->rwind), wake); + trace_rxrpc_rx_rwind_change(call, sp->hdr.serial, rwind, wake); call->tx_winsize = rwind; } @@ -803,6 +815,30 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, } /* + * Return true if the ACK is valid - ie. it doesn't appear to have regressed + * with respect to the ack state conveyed by preceding ACKs. + */ +static bool rxrpc_is_ack_valid(struct rxrpc_call *call, + rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt) +{ + rxrpc_seq_t base = READ_ONCE(call->acks_first_seq); + + if (after(first_pkt, base)) + return true; /* The window advanced */ + + if (before(first_pkt, base)) + return false; /* firstPacket regressed */ + + if (after_eq(prev_pkt, call->acks_prev_seq)) + return true; /* previousPacket hasn't regressed. */ + + /* Some rx implementations put a serial number in previousPacket. */ + if (after_eq(prev_pkt, base + call->tx_winsize)) + return false; + return true; +} + +/* * Process an ACK packet. * * ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet @@ -821,7 +857,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_ackinfo info; u8 acks[RXRPC_MAXACKS]; } buf; - rxrpc_serial_t acked_serial; + rxrpc_serial_t ack_serial, acked_serial; rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; int nr_acks, offset, ioffset; @@ -834,6 +870,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) } offset += sizeof(buf.ack); + ack_serial = sp->hdr.serial; acked_serial = ntohl(buf.ack.serial); first_soft_ack = ntohl(buf.ack.firstPacket); prev_pkt = ntohl(buf.ack.previousPacket); @@ -842,32 +879,73 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? buf.ack.reason : RXRPC_ACK__INVALID); - trace_rxrpc_rx_ack(call, sp->hdr.serial, acked_serial, + trace_rxrpc_rx_ack(call, ack_serial, acked_serial, first_soft_ack, prev_pkt, summary.ack_reason, nr_acks); - if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) + switch (buf.ack.reason) { + case RXRPC_ACK_PING_RESPONSE: rxrpc_input_ping_response(call, skb->tstamp, acked_serial, - sp->hdr.serial); - if (buf.ack.reason == RXRPC_ACK_REQUESTED) - rxrpc_input_requested_ack(call, skb->tstamp, acked_serial, - sp->hdr.serial); + ack_serial); + rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, + rxrpc_rtt_rx_ping_response); + break; + case RXRPC_ACK_REQUESTED: + rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, + rxrpc_rtt_rx_requested_ack); + break; + default: + if (acked_serial != 0) + rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, + rxrpc_rtt_rx_cancel); + break; + } if (buf.ack.reason == RXRPC_ACK_PING) { - _proto("Rx ACK %%%u PING Request", sp->hdr.serial); + _proto("Rx ACK %%%u PING Request", ack_serial); rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, - sp->hdr.serial, true, true, + ack_serial, true, true, rxrpc_propose_ack_respond_to_ping); } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, - sp->hdr.serial, true, true, + ack_serial, true, true, rxrpc_propose_ack_respond_to_ack); } + /* If we get an EXCEEDS_WINDOW ACK from the server, it probably + * indicates that the client address changed due to NAT. The server + * lost the call because it switched to a different peer. + */ + if (unlikely(buf.ack.reason == RXRPC_ACK_EXCEEDS_WINDOW) && + first_soft_ack == 1 && + prev_pkt == 0 && + rxrpc_is_client_call(call)) { + rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + 0, -ENETRESET); + return; + } + + /* If we get an OUT_OF_SEQUENCE ACK from the server, that can also + * indicate a change of address. However, we can retransmit the call + * if we still have it buffered to the beginning. + */ + if (unlikely(buf.ack.reason == RXRPC_ACK_OUT_OF_SEQUENCE) && + first_soft_ack == 1 && + prev_pkt == 0 && + call->tx_hard_ack == 0 && + rxrpc_is_client_call(call)) { + rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + 0, -ENETRESET); + return; + } + /* Discard any out-of-order or duplicate ACKs (outside lock). */ - if (before(first_soft_ack, call->ackr_first_seq) || - before(prev_pkt, call->ackr_prev_seq)) + if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { + trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, + first_soft_ack, call->acks_first_seq, + prev_pkt, call->acks_prev_seq); return; + } buf.info.rxMTU = 0; ioffset = offset + nr_acks + 3; @@ -878,14 +956,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) spin_lock(&call->input_lock); /* Discard any out-of-order or duplicate ACKs (inside lock). */ - if (before(first_soft_ack, call->ackr_first_seq) || - before(prev_pkt, call->ackr_prev_seq)) + if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { + trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, + first_soft_ack, call->acks_first_seq, + prev_pkt, call->acks_prev_seq); goto out; + } call->acks_latest_ts = skb->tstamp; - call->acks_latest = sp->hdr.serial; - call->ackr_first_seq = first_soft_ack; - call->ackr_prev_seq = prev_pkt; + call->acks_first_seq = first_soft_ack; + call->acks_prev_seq = prev_pkt; /* Parse rwind and mtu sizes if provided. */ if (buf.info.rxMTU) @@ -937,7 +1017,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) RXRPC_TX_ANNO_LAST && summary.nr_acks == call->tx_top - hard_ack && rxrpc_is_client_call(call)) - rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial, + rxrpc_propose_ACK(call, RXRPC_ACK_PING, ack_serial, false, true, rxrpc_propose_ack_ping_for_lost_reply); @@ -984,9 +1064,8 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); - if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - abort_code, -ECONNABORTED)) - rxrpc_notify_socket(call); + rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + abort_code, -ECONNABORTED); } /* @@ -1058,7 +1137,7 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx, switch (READ_ONCE(call->state)) { case RXRPC_CALL_SERVER_AWAIT_ACK: rxrpc_call_completed(call); - /* Fall through */ + fallthrough; case RXRPC_CALL_COMPLETE: break; default: @@ -1073,7 +1152,6 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx, spin_lock(&rx->incoming_lock); __rxrpc_disconnect_call(conn, call); spin_unlock(&rx->incoming_lock); - rxrpc_notify_socket(call); } /* @@ -1112,8 +1190,6 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local, */ static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) { - CHECK_SLAB_OKAY(&local->usage); - if (rxrpc_get_local_maybe(local)) { skb_queue_tail(&local->reject_queue, skb); rxrpc_queue_local(local); @@ -1218,12 +1294,12 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) case RXRPC_PACKET_TYPE_BUSY: if (rxrpc_to_server(sp)) goto discard; - /* Fall through */ + fallthrough; case RXRPC_PACKET_TYPE_ACK: case RXRPC_PACKET_TYPE_ACKALL: if (sp->hdr.callNumber == 0) goto bad_message; - /* Fall through */ + fallthrough; case RXRPC_PACKET_TYPE_ABORT: break; @@ -1371,7 +1447,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) } } - if (!call || atomic_read(&call->usage) == 0) { + if (!call || refcount_read(&call->ref) == 0) { if (rxrpc_to_client(sp) || sp->hdr.type != RXRPC_PACKET_TYPE_DATA) goto bad_message; diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index f6c59f5fae9d..9aae99d67833 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -8,20 +8,25 @@ #include <net/af_rxrpc.h> #include "ar-internal.h" -static int none_init_connection_security(struct rxrpc_connection *conn) +static int none_init_connection_security(struct rxrpc_connection *conn, + struct rxrpc_key_token *token) { return 0; } -static int none_prime_packet_security(struct rxrpc_connection *conn) +/* + * Work out how much data we can put in an unsecured packet. + */ +static int none_how_much_data(struct rxrpc_call *call, size_t remain, + size_t *_buf_size, size_t *_data_size, size_t *_offset) { + *_buf_size = *_data_size = min_t(size_t, remain, RXRPC_JUMBO_DATALEN); + *_offset = 0; return 0; } -static int none_secure_packet(struct rxrpc_call *call, - struct sk_buff *skb, - size_t data_size, - void *sechdr) +static int none_secure_packet(struct rxrpc_call *call, struct sk_buff *skb, + size_t data_size) { return 0; } @@ -86,8 +91,8 @@ const struct rxrpc_security rxrpc_no_security = { .init = none_init, .exit = none_exit, .init_connection_security = none_init_connection_security, - .prime_packet_security = none_prime_packet_security, .free_call_crypto = none_free_call_crypto, + .how_much_data = none_how_much_data, .secure_packet = none_secure_packet, .verify_packet = none_verify_packet, .locate_data = none_locate_data, diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 6c3f35fac42d..8d2073e0e3da 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -5,7 +5,7 @@ * Written by David Howells (dhowells@redhat.com) * * RxRPC keys should have a description of describing their purpose: - * "afs@CAMBRIDGE.REDHAT.COM> + * "afs@example.com" */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -23,15 +23,11 @@ #include <keys/user-type.h> #include "ar-internal.h" -static int rxrpc_vet_description_s(const char *); static int rxrpc_preparse(struct key_preparsed_payload *); -static int rxrpc_preparse_s(struct key_preparsed_payload *); static void rxrpc_free_preparse(struct key_preparsed_payload *); -static void rxrpc_free_preparse_s(struct key_preparsed_payload *); static void rxrpc_destroy(struct key *); -static void rxrpc_destroy_s(struct key *); static void rxrpc_describe(const struct key *, struct seq_file *); -static long rxrpc_read(const struct key *, char __user *, size_t); +static long rxrpc_read(const struct key *, char *, size_t); /* * rxrpc defined keys take an arbitrary string as the description and an @@ -50,38 +46,6 @@ struct key_type key_type_rxrpc = { EXPORT_SYMBOL(key_type_rxrpc); /* - * rxrpc server defined keys take "<serviceId>:<securityIndex>" as the - * description and an 8-byte decryption key as the payload - */ -struct key_type key_type_rxrpc_s = { - .name = "rxrpc_s", - .flags = KEY_TYPE_NET_DOMAIN, - .vet_description = rxrpc_vet_description_s, - .preparse = rxrpc_preparse_s, - .free_preparse = rxrpc_free_preparse_s, - .instantiate = generic_key_instantiate, - .destroy = rxrpc_destroy_s, - .describe = rxrpc_describe, -}; - -/* - * Vet the description for an RxRPC server key - */ -static int rxrpc_vet_description_s(const char *desc) -{ - unsigned long num; - char *p; - - num = simple_strtoul(desc, &p, 10); - if (*p != ':' || num > 65535) - return -EINVAL; - num = simple_strtoul(p + 1, &p, 10); - if (*p || num < 1 || num > 255) - return -EINVAL; - return 0; -} - -/* * parse an RxKAD type XDR format token * - the caller guarantees we have at least 4 words */ @@ -165,402 +129,17 @@ static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep, return 0; } -static void rxrpc_free_krb5_principal(struct krb5_principal *princ) -{ - int loop; - - if (princ->name_parts) { - for (loop = princ->n_name_parts - 1; loop >= 0; loop--) - kfree(princ->name_parts[loop]); - kfree(princ->name_parts); - } - kfree(princ->realm); -} - -static void rxrpc_free_krb5_tagged(struct krb5_tagged_data *td) -{ - kfree(td->data); -} - -/* - * free up an RxK5 token - */ -static void rxrpc_rxk5_free(struct rxk5_key *rxk5) -{ - int loop; - - rxrpc_free_krb5_principal(&rxk5->client); - rxrpc_free_krb5_principal(&rxk5->server); - rxrpc_free_krb5_tagged(&rxk5->session); - - if (rxk5->addresses) { - for (loop = rxk5->n_addresses - 1; loop >= 0; loop--) - rxrpc_free_krb5_tagged(&rxk5->addresses[loop]); - kfree(rxk5->addresses); - } - if (rxk5->authdata) { - for (loop = rxk5->n_authdata - 1; loop >= 0; loop--) - rxrpc_free_krb5_tagged(&rxk5->authdata[loop]); - kfree(rxk5->authdata); - } - - kfree(rxk5->ticket); - kfree(rxk5->ticket2); - kfree(rxk5); -} - -/* - * extract a krb5 principal - */ -static int rxrpc_krb5_decode_principal(struct krb5_principal *princ, - const __be32 **_xdr, - unsigned int *_toklen) -{ - const __be32 *xdr = *_xdr; - unsigned int toklen = *_toklen, n_parts, loop, tmp, paddedlen; - - /* there must be at least one name, and at least #names+1 length - * words */ - if (toklen <= 12) - return -EINVAL; - - _enter(",{%x,%x,%x},%u", - ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), toklen); - - n_parts = ntohl(*xdr++); - toklen -= 4; - if (n_parts <= 0 || n_parts > AFSTOKEN_K5_COMPONENTS_MAX) - return -EINVAL; - princ->n_name_parts = n_parts; - - if (toklen <= (n_parts + 1) * 4) - return -EINVAL; - - princ->name_parts = kcalloc(n_parts, sizeof(char *), GFP_KERNEL); - if (!princ->name_parts) - return -ENOMEM; - - for (loop = 0; loop < n_parts; loop++) { - if (toklen < 4) - return -EINVAL; - tmp = ntohl(*xdr++); - toklen -= 4; - if (tmp <= 0 || tmp > AFSTOKEN_STRING_MAX) - return -EINVAL; - paddedlen = (tmp + 3) & ~3; - if (paddedlen > toklen) - return -EINVAL; - princ->name_parts[loop] = kmalloc(tmp + 1, GFP_KERNEL); - if (!princ->name_parts[loop]) - return -ENOMEM; - memcpy(princ->name_parts[loop], xdr, tmp); - princ->name_parts[loop][tmp] = 0; - toklen -= paddedlen; - xdr += paddedlen >> 2; - } - - if (toklen < 4) - return -EINVAL; - tmp = ntohl(*xdr++); - toklen -= 4; - if (tmp <= 0 || tmp > AFSTOKEN_K5_REALM_MAX) - return -EINVAL; - paddedlen = (tmp + 3) & ~3; - if (paddedlen > toklen) - return -EINVAL; - princ->realm = kmalloc(tmp + 1, GFP_KERNEL); - if (!princ->realm) - return -ENOMEM; - memcpy(princ->realm, xdr, tmp); - princ->realm[tmp] = 0; - toklen -= paddedlen; - xdr += paddedlen >> 2; - - _debug("%s/...@%s", princ->name_parts[0], princ->realm); - - *_xdr = xdr; - *_toklen = toklen; - _leave(" = 0 [toklen=%u]", toklen); - return 0; -} - -/* - * extract a piece of krb5 tagged data - */ -static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td, - size_t max_data_size, - const __be32 **_xdr, - unsigned int *_toklen) -{ - const __be32 *xdr = *_xdr; - unsigned int toklen = *_toklen, len, paddedlen; - - /* there must be at least one tag and one length word */ - if (toklen <= 8) - return -EINVAL; - - _enter(",%zu,{%x,%x},%u", - max_data_size, ntohl(xdr[0]), ntohl(xdr[1]), toklen); - - td->tag = ntohl(*xdr++); - len = ntohl(*xdr++); - toklen -= 8; - if (len > max_data_size) - return -EINVAL; - paddedlen = (len + 3) & ~3; - if (paddedlen > toklen) - return -EINVAL; - td->data_len = len; - - if (len > 0) { - td->data = kmemdup(xdr, len, GFP_KERNEL); - if (!td->data) - return -ENOMEM; - toklen -= paddedlen; - xdr += paddedlen >> 2; - } - - _debug("tag %x len %x", td->tag, td->data_len); - - *_xdr = xdr; - *_toklen = toklen; - _leave(" = 0 [toklen=%u]", toklen); - return 0; -} - -/* - * extract an array of tagged data - */ -static int rxrpc_krb5_decode_tagged_array(struct krb5_tagged_data **_td, - u8 *_n_elem, - u8 max_n_elem, - size_t max_elem_size, - const __be32 **_xdr, - unsigned int *_toklen) -{ - struct krb5_tagged_data *td; - const __be32 *xdr = *_xdr; - unsigned int toklen = *_toklen, n_elem, loop; - int ret; - - /* there must be at least one count */ - if (toklen < 4) - return -EINVAL; - - _enter(",,%u,%zu,{%x},%u", - max_n_elem, max_elem_size, ntohl(xdr[0]), toklen); - - n_elem = ntohl(*xdr++); - toklen -= 4; - if (n_elem > max_n_elem) - return -EINVAL; - *_n_elem = n_elem; - if (n_elem > 0) { - if (toklen <= (n_elem + 1) * 4) - return -EINVAL; - - _debug("n_elem %d", n_elem); - - td = kcalloc(n_elem, sizeof(struct krb5_tagged_data), - GFP_KERNEL); - if (!td) - return -ENOMEM; - *_td = td; - - for (loop = 0; loop < n_elem; loop++) { - ret = rxrpc_krb5_decode_tagged_data(&td[loop], - max_elem_size, - &xdr, &toklen); - if (ret < 0) - return ret; - } - } - - *_xdr = xdr; - *_toklen = toklen; - _leave(" = 0 [toklen=%u]", toklen); - return 0; -} - -/* - * extract a krb5 ticket - */ -static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen, - const __be32 **_xdr, unsigned int *_toklen) -{ - const __be32 *xdr = *_xdr; - unsigned int toklen = *_toklen, len, paddedlen; - - /* there must be at least one length word */ - if (toklen <= 4) - return -EINVAL; - - _enter(",{%x},%u", ntohl(xdr[0]), toklen); - - len = ntohl(*xdr++); - toklen -= 4; - if (len > AFSTOKEN_K5_TIX_MAX) - return -EINVAL; - paddedlen = (len + 3) & ~3; - if (paddedlen > toklen) - return -EINVAL; - *_tktlen = len; - - _debug("ticket len %u", len); - - if (len > 0) { - *_ticket = kmemdup(xdr, len, GFP_KERNEL); - if (!*_ticket) - return -ENOMEM; - toklen -= paddedlen; - xdr += paddedlen >> 2; - } - - *_xdr = xdr; - *_toklen = toklen; - _leave(" = 0 [toklen=%u]", toklen); - return 0; -} - -/* - * parse an RxK5 type XDR format token - * - the caller guarantees we have at least 4 words - */ -static int rxrpc_preparse_xdr_rxk5(struct key_preparsed_payload *prep, - size_t datalen, - const __be32 *xdr, unsigned int toklen) -{ - struct rxrpc_key_token *token, **pptoken; - struct rxk5_key *rxk5; - const __be32 *end_xdr = xdr + (toklen >> 2); - time64_t expiry; - int ret; - - _enter(",{%x,%x,%x,%x},%u", - ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]), - toklen); - - /* reserve some payload space for this subkey - the length of the token - * is a reasonable approximation */ - prep->quotalen = datalen + toklen; - - token = kzalloc(sizeof(*token), GFP_KERNEL); - if (!token) - return -ENOMEM; - - rxk5 = kzalloc(sizeof(*rxk5), GFP_KERNEL); - if (!rxk5) { - kfree(token); - return -ENOMEM; - } - - token->security_index = RXRPC_SECURITY_RXK5; - token->k5 = rxk5; - - /* extract the principals */ - ret = rxrpc_krb5_decode_principal(&rxk5->client, &xdr, &toklen); - if (ret < 0) - goto error; - ret = rxrpc_krb5_decode_principal(&rxk5->server, &xdr, &toklen); - if (ret < 0) - goto error; - - /* extract the session key and the encoding type (the tag field -> - * ENCTYPE_xxx) */ - ret = rxrpc_krb5_decode_tagged_data(&rxk5->session, AFSTOKEN_DATA_MAX, - &xdr, &toklen); - if (ret < 0) - goto error; - - if (toklen < 4 * 8 + 2 * 4) - goto inval; - rxk5->authtime = be64_to_cpup((const __be64 *) xdr); - xdr += 2; - rxk5->starttime = be64_to_cpup((const __be64 *) xdr); - xdr += 2; - rxk5->endtime = be64_to_cpup((const __be64 *) xdr); - xdr += 2; - rxk5->renew_till = be64_to_cpup((const __be64 *) xdr); - xdr += 2; - rxk5->is_skey = ntohl(*xdr++); - rxk5->flags = ntohl(*xdr++); - toklen -= 4 * 8 + 2 * 4; - - _debug("times: a=%llx s=%llx e=%llx rt=%llx", - rxk5->authtime, rxk5->starttime, rxk5->endtime, - rxk5->renew_till); - _debug("is_skey=%x flags=%x", rxk5->is_skey, rxk5->flags); - - /* extract the permitted client addresses */ - ret = rxrpc_krb5_decode_tagged_array(&rxk5->addresses, - &rxk5->n_addresses, - AFSTOKEN_K5_ADDRESSES_MAX, - AFSTOKEN_DATA_MAX, - &xdr, &toklen); - if (ret < 0) - goto error; - - ASSERTCMP((end_xdr - xdr) << 2, ==, toklen); - - /* extract the tickets */ - ret = rxrpc_krb5_decode_ticket(&rxk5->ticket, &rxk5->ticket_len, - &xdr, &toklen); - if (ret < 0) - goto error; - ret = rxrpc_krb5_decode_ticket(&rxk5->ticket2, &rxk5->ticket2_len, - &xdr, &toklen); - if (ret < 0) - goto error; - - ASSERTCMP((end_xdr - xdr) << 2, ==, toklen); - - /* extract the typed auth data */ - ret = rxrpc_krb5_decode_tagged_array(&rxk5->authdata, - &rxk5->n_authdata, - AFSTOKEN_K5_AUTHDATA_MAX, - AFSTOKEN_BDATALN_MAX, - &xdr, &toklen); - if (ret < 0) - goto error; - - ASSERTCMP((end_xdr - xdr) << 2, ==, toklen); - - if (toklen != 0) - goto inval; - - /* attach the payload */ - for (pptoken = (struct rxrpc_key_token **)&prep->payload.data[0]; - *pptoken; - pptoken = &(*pptoken)->next) - continue; - *pptoken = token; - expiry = rxrpc_u32_to_time64(token->k5->endtime); - if (expiry < prep->expiry) - prep->expiry = expiry; - - _leave(" = 0"); - return 0; - -inval: - ret = -EINVAL; -error: - rxrpc_rxk5_free(rxk5); - kfree(token); - _leave(" = %d", ret); - return ret; -} - /* * attempt to parse the data as the XDR format * - the caller guarantees we have more than 7 words */ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep) { - const __be32 *xdr = prep->data, *token; + const __be32 *xdr = prep->data, *token, *p; const char *cp; unsigned int len, paddedlen, loop, ntoken, toklen, sec_ix; size_t datalen = prep->datalen; - int ret; + int ret, ret2; _enter(",{%x,%x,%x,%x},%zu", ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]), @@ -610,20 +189,20 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep) goto not_xdr; /* check each token wrapper */ - token = xdr; + p = xdr; loop = ntoken; do { if (datalen < 8) goto not_xdr; - toklen = ntohl(*xdr++); - sec_ix = ntohl(*xdr); + toklen = ntohl(*p++); + sec_ix = ntohl(*p); datalen -= 4; _debug("token: [%x/%zx] %x", toklen, datalen, sec_ix); paddedlen = (toklen + 3) & ~3; if (toklen < 20 || toklen > datalen || paddedlen > datalen) goto not_xdr; datalen -= paddedlen; - xdr += paddedlen >> 2; + p += paddedlen >> 2; } while (--loop > 0); @@ -634,44 +213,50 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep) /* okay: we're going to assume it's valid XDR format * - we ignore the cellname, relying on the key to be correctly named */ + ret = -EPROTONOSUPPORT; do { - xdr = token; toklen = ntohl(*xdr++); - token = xdr + ((toklen + 3) >> 2); - sec_ix = ntohl(*xdr++); + token = xdr; + xdr += (toklen + 3) / 4; + + sec_ix = ntohl(*token++); toklen -= 4; - _debug("TOKEN type=%u [%p-%p]", sec_ix, xdr, token); + _debug("TOKEN type=%x len=%x", sec_ix, toklen); switch (sec_ix) { case RXRPC_SECURITY_RXKAD: - ret = rxrpc_preparse_xdr_rxkad(prep, datalen, xdr, toklen); - if (ret != 0) - goto error; + ret2 = rxrpc_preparse_xdr_rxkad(prep, datalen, token, toklen); break; + default: + ret2 = -EPROTONOSUPPORT; + break; + } - case RXRPC_SECURITY_RXK5: - ret = rxrpc_preparse_xdr_rxk5(prep, datalen, xdr, toklen); + switch (ret2) { + case 0: + ret = 0; + break; + case -EPROTONOSUPPORT: + break; + case -ENOPKG: if (ret != 0) - goto error; + ret = -ENOPKG; break; - default: - ret = -EPROTONOSUPPORT; + ret = ret2; goto error; } } while (--ntoken > 0); - _leave(" = 0"); - return 0; +error: + _leave(" = %d", ret); + return ret; not_xdr: _leave(" = -EPROTO"); return -EPROTO; -error: - _leave(" = %d", ret); - return ret; } /* @@ -805,10 +390,6 @@ static void rxrpc_free_token_list(struct rxrpc_key_token *token) case RXRPC_SECURITY_RXKAD: kfree(token->kad); break; - case RXRPC_SECURITY_RXK5: - if (token->k5) - rxrpc_rxk5_free(token->k5); - break; default: pr_err("Unknown token type %x on rxrpc key\n", token->security_index); @@ -828,45 +409,6 @@ static void rxrpc_free_preparse(struct key_preparsed_payload *prep) } /* - * Preparse a server secret key. - * - * The data should be the 8-byte secret key. - */ -static int rxrpc_preparse_s(struct key_preparsed_payload *prep) -{ - struct crypto_skcipher *ci; - - _enter("%zu", prep->datalen); - - if (prep->datalen != 8) - return -EINVAL; - - memcpy(&prep->payload.data[2], prep->data, 8); - - ci = crypto_alloc_skcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(ci)) { - _leave(" = %ld", PTR_ERR(ci)); - return PTR_ERR(ci); - } - - if (crypto_skcipher_setkey(ci, prep->data, 8) < 0) - BUG(); - - prep->payload.data[0] = ci; - _leave(" = 0"); - return 0; -} - -/* - * Clean up preparse data. - */ -static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep) -{ - if (prep->payload.data[0]) - crypto_free_skcipher(prep->payload.data[0]); -} - -/* * dispose of the data dangling from the corpse of a rxrpc key */ static void rxrpc_destroy(struct key *key) @@ -875,80 +417,56 @@ static void rxrpc_destroy(struct key *key) } /* - * dispose of the data dangling from the corpse of a rxrpc key - */ -static void rxrpc_destroy_s(struct key *key) -{ - if (key->payload.data[0]) { - crypto_free_skcipher(key->payload.data[0]); - key->payload.data[0] = NULL; - } -} - -/* * describe the rxrpc key */ static void rxrpc_describe(const struct key *key, struct seq_file *m) { - seq_puts(m, key->description); -} - -/* - * grab the security key for a socket - */ -int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen) -{ - struct key *key; - char *description; + const struct rxrpc_key_token *token; + const char *sep = ": "; - _enter(""); + seq_puts(m, key->description); - if (optlen <= 0 || optlen > PAGE_SIZE - 1) - return -EINVAL; + for (token = key->payload.data[0]; token; token = token->next) { + seq_puts(m, sep); - description = memdup_user_nul(optval, optlen); - if (IS_ERR(description)) - return PTR_ERR(description); + switch (token->security_index) { + case RXRPC_SECURITY_RXKAD: + seq_puts(m, "ka"); + break; + default: /* we have a ticket we can't encode */ + seq_printf(m, "%u", token->security_index); + break; + } - key = request_key_net(&key_type_rxrpc, description, sock_net(&rx->sk), NULL); - if (IS_ERR(key)) { - kfree(description); - _leave(" = %ld", PTR_ERR(key)); - return PTR_ERR(key); + sep = " "; } - - rx->key = key; - kfree(description); - _leave(" = 0 [key %x]", key->serial); - return 0; } /* - * grab the security keyring for a server socket + * grab the security key for a socket */ -int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval, - int optlen) +int rxrpc_request_key(struct rxrpc_sock *rx, sockptr_t optval, int optlen) { struct key *key; char *description; _enter(""); - if (optlen <= 0 || optlen > PAGE_SIZE - 1) + if (optlen <= 0 || optlen > PAGE_SIZE - 1 || rx->securities) return -EINVAL; - description = memdup_user_nul(optval, optlen); + description = memdup_sockptr_nul(optval, optlen); if (IS_ERR(description)) return PTR_ERR(description); - key = request_key_net(&key_type_keyring, description, sock_net(&rx->sk), NULL); + key = request_key_net(&key_type_rxrpc, description, sock_net(&rx->sk), NULL); if (IS_ERR(key)) { kfree(description); _leave(" = %ld", PTR_ERR(key)); return PTR_ERR(key); } - rx->securities = key; + rx->key = key; kfree(description); _leave(" = 0 [key %x]", key->serial); return 0; @@ -1042,15 +560,13 @@ EXPORT_SYMBOL(rxrpc_get_null_key); * - this returns the result in XDR form */ static long rxrpc_read(const struct key *key, - char __user *buffer, size_t buflen) + char *buffer, size_t buflen) { const struct rxrpc_key_token *token; - const struct krb5_principal *princ; size_t size; - __be32 __user *xdr, *oldxdr; + __be32 *xdr, *oldxdr; u32 cnlen, toksize, ntoks, tok, zero; u16 toksizes[AFSTOKEN_MAX]; - int loop; _enter(""); @@ -1073,43 +589,16 @@ static long rxrpc_read(const struct key *key, switch (token->security_index) { case RXRPC_SECURITY_RXKAD: - toksize += 9 * 4; /* viceid, kvno, key*2 + len, begin, + toksize += 8 * 4; /* viceid, kvno, key*2, begin, * end, primary, tktlen */ - toksize += RND(token->kad->ticket_len); - break; - - case RXRPC_SECURITY_RXK5: - princ = &token->k5->client; - toksize += 4 + princ->n_name_parts * 4; - for (loop = 0; loop < princ->n_name_parts; loop++) - toksize += RND(strlen(princ->name_parts[loop])); - toksize += 4 + RND(strlen(princ->realm)); - - princ = &token->k5->server; - toksize += 4 + princ->n_name_parts * 4; - for (loop = 0; loop < princ->n_name_parts; loop++) - toksize += RND(strlen(princ->name_parts[loop])); - toksize += 4 + RND(strlen(princ->realm)); - - toksize += 8 + RND(token->k5->session.data_len); - - toksize += 4 * 8 + 2 * 4; - - toksize += 4 + token->k5->n_addresses * 8; - for (loop = 0; loop < token->k5->n_addresses; loop++) - toksize += RND(token->k5->addresses[loop].data_len); - - toksize += 4 + RND(token->k5->ticket_len); - toksize += 4 + RND(token->k5->ticket2_len); - - toksize += 4 + token->k5->n_authdata * 8; - for (loop = 0; loop < token->k5->n_authdata; loop++) - toksize += RND(token->k5->authdata[loop].data_len); + if (!token->no_leak_key) + toksize += RND(token->kad->ticket_len); break; default: /* we have a ticket we can't encode */ - BUG(); - continue; + pr_err("Unsupported key token type (%u)\n", + token->security_index); + return -ENOPKG; } _debug("token[%u]: toksize=%u", ntoks, toksize); @@ -1124,30 +613,33 @@ static long rxrpc_read(const struct key *key, if (!buffer || buflen < size) return size; - xdr = (__be32 __user *) buffer; + xdr = (__be32 *)buffer; zero = 0; #define ENCODE(x) \ do { \ - __be32 y = htonl(x); \ - if (put_user(y, xdr++) < 0) \ - goto fault; \ + *xdr++ = htonl(x); \ } while(0) #define ENCODE_DATA(l, s) \ do { \ u32 _l = (l); \ ENCODE(l); \ - if (copy_to_user(xdr, (s), _l) != 0) \ - goto fault; \ - if (_l & 3 && \ - copy_to_user((u8 __user *)xdr + _l, &zero, 4 - (_l & 3)) != 0) \ - goto fault; \ + memcpy(xdr, (s), _l); \ + if (_l & 3) \ + memcpy((u8 *)xdr + _l, &zero, 4 - (_l & 3)); \ + xdr += (_l + 3) >> 2; \ + } while(0) +#define ENCODE_BYTES(l, s) \ + do { \ + u32 _l = (l); \ + memcpy(xdr, (s), _l); \ + if (_l & 3) \ + memcpy((u8 *)xdr + _l, &zero, 4 - (_l & 3)); \ xdr += (_l + 3) >> 2; \ } while(0) #define ENCODE64(x) \ do { \ __be64 y = cpu_to_be64(x); \ - if (copy_to_user(xdr, &y, 8) != 0) \ - goto fault; \ + memcpy(xdr, &y, 8); \ xdr += 8 >> 2; \ } while(0) #define ENCODE_STR(s) \ @@ -1171,58 +663,20 @@ static long rxrpc_read(const struct key *key, case RXRPC_SECURITY_RXKAD: ENCODE(token->kad->vice_id); ENCODE(token->kad->kvno); - ENCODE_DATA(8, token->kad->session_key); + ENCODE_BYTES(8, token->kad->session_key); ENCODE(token->kad->start); ENCODE(token->kad->expiry); ENCODE(token->kad->primary_flag); - ENCODE_DATA(token->kad->ticket_len, token->kad->ticket); - break; - - case RXRPC_SECURITY_RXK5: - princ = &token->k5->client; - ENCODE(princ->n_name_parts); - for (loop = 0; loop < princ->n_name_parts; loop++) - ENCODE_STR(princ->name_parts[loop]); - ENCODE_STR(princ->realm); - - princ = &token->k5->server; - ENCODE(princ->n_name_parts); - for (loop = 0; loop < princ->n_name_parts; loop++) - ENCODE_STR(princ->name_parts[loop]); - ENCODE_STR(princ->realm); - - ENCODE(token->k5->session.tag); - ENCODE_DATA(token->k5->session.data_len, - token->k5->session.data); - - ENCODE64(token->k5->authtime); - ENCODE64(token->k5->starttime); - ENCODE64(token->k5->endtime); - ENCODE64(token->k5->renew_till); - ENCODE(token->k5->is_skey); - ENCODE(token->k5->flags); - - ENCODE(token->k5->n_addresses); - for (loop = 0; loop < token->k5->n_addresses; loop++) { - ENCODE(token->k5->addresses[loop].tag); - ENCODE_DATA(token->k5->addresses[loop].data_len, - token->k5->addresses[loop].data); - } - - ENCODE_DATA(token->k5->ticket_len, token->k5->ticket); - ENCODE_DATA(token->k5->ticket2_len, token->k5->ticket2); - - ENCODE(token->k5->n_authdata); - for (loop = 0; loop < token->k5->n_authdata; loop++) { - ENCODE(token->k5->authdata[loop].tag); - ENCODE_DATA(token->k5->authdata[loop].data_len, - token->k5->authdata[loop].data); - } + if (token->no_leak_key) + ENCODE(0); + else + ENCODE_DATA(token->kad->ticket_len, token->kad->ticket); break; default: - BUG(); - break; + pr_err("Unsupported key token type (%u)\n", + token->security_index); + return -ENOPKG; } ASSERTCMP((unsigned long)xdr - (unsigned long)oldxdr, ==, @@ -1238,8 +692,4 @@ static long rxrpc_read(const struct key *key, ASSERTCMP((char __user *) xdr - buffer, ==, size); _leave(" = %zu", size); return size; - -fault: - _leave(" = -EFAULT"); - return -EFAULT; } diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index 3ce6d628cd75..19e929c7c38b 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -77,7 +77,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local, } /* - * Process event packets targetted at a local endpoint. + * Process event packets targeted at a local endpoint. */ void rxrpc_process_local_events(struct rxrpc_local *local) { diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index a6c1349e965d..38ea98ff426b 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -16,6 +16,7 @@ #include <linux/hashtable.h> #include <net/sock.h> #include <net/udp.h> +#include <net/udp_tunnel.h> #include <net/af_rxrpc.h> #include "ar-internal.h" @@ -78,16 +79,16 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL); if (local) { - atomic_set(&local->usage, 1); + refcount_set(&local->ref, 1); atomic_set(&local->active_users, 1); local->rxnet = rxnet; - INIT_LIST_HEAD(&local->link); + INIT_HLIST_NODE(&local->link); INIT_WORK(&local->processor, rxrpc_local_processor); init_rwsem(&local->defrag_sem); skb_queue_head_init(&local->reject_queue); skb_queue_head_init(&local->event_queue); - local->client_conns = RB_ROOT; - spin_lock_init(&local->client_conns_lock); + local->client_bundles = RB_ROOT; + spin_lock_init(&local->client_bundles_lock); spin_lock_init(&local->lock); rwlock_init(&local->services_lock); local->debug_id = atomic_inc_return(&rxrpc_debug_id); @@ -106,105 +107,62 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, */ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) { + struct udp_tunnel_sock_cfg tuncfg = {NULL}; + struct sockaddr_rxrpc *srx = &local->srx; + struct udp_port_cfg udp_conf = {0}; struct sock *usk; - int ret, opt; + int ret; _enter("%p{%d,%d}", - local, local->srx.transport_type, local->srx.transport.family); + local, srx->transport_type, srx->transport.family); - /* create a socket to represent the local endpoint */ - ret = sock_create_kern(net, local->srx.transport.family, - local->srx.transport_type, 0, &local->socket); + udp_conf.family = srx->transport.family; + udp_conf.use_udp_checksums = true; + if (udp_conf.family == AF_INET) { + udp_conf.local_ip = srx->transport.sin.sin_addr; + udp_conf.local_udp_port = srx->transport.sin.sin_port; +#if IS_ENABLED(CONFIG_AF_RXRPC_IPV6) + } else { + udp_conf.local_ip6 = srx->transport.sin6.sin6_addr; + udp_conf.local_udp_port = srx->transport.sin6.sin6_port; + udp_conf.use_udp6_tx_checksums = true; + udp_conf.use_udp6_rx_checksums = true; +#endif + } + ret = udp_sock_create(net, &udp_conf, &local->socket); if (ret < 0) { _leave(" = %d [socket]", ret); return ret; } + tuncfg.encap_type = UDP_ENCAP_RXRPC; + tuncfg.encap_rcv = rxrpc_input_packet; + tuncfg.encap_err_rcv = rxrpc_encap_err_rcv; + tuncfg.sk_user_data = local; + setup_udp_tunnel_sock(net, local->socket, &tuncfg); + /* set the socket up */ usk = local->socket->sk; - inet_sk(usk)->mc_loop = 0; - - /* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */ - inet_inc_convert_csum(usk); - - rcu_assign_sk_user_data(usk, local); - - udp_sk(usk)->encap_type = UDP_ENCAP_RXRPC; - udp_sk(usk)->encap_rcv = rxrpc_input_packet; - udp_sk(usk)->encap_destroy = NULL; - udp_sk(usk)->gro_receive = NULL; - udp_sk(usk)->gro_complete = NULL; - - udp_encap_enable(); -#if IS_ENABLED(CONFIG_AF_RXRPC_IPV6) - if (local->srx.transport.family == AF_INET6) - udpv6_encap_enable(); -#endif usk->sk_error_report = rxrpc_error_report; - /* if a local address was supplied then bind it */ - if (local->srx.transport_len > sizeof(sa_family_t)) { - _debug("bind"); - ret = kernel_bind(local->socket, - (struct sockaddr *)&local->srx.transport, - local->srx.transport_len); - if (ret < 0) { - _debug("bind failed %d", ret); - goto error; - } - } - - switch (local->srx.transport.family) { + switch (srx->transport.family) { case AF_INET6: /* we want to receive ICMPv6 errors */ - opt = 1; - ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR, - (char *) &opt, sizeof(opt)); - if (ret < 0) { - _debug("setsockopt failed"); - goto error; - } - - /* we want to set the don't fragment bit */ - opt = IPV6_PMTUDISC_DO; - ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER, - (char *) &opt, sizeof(opt)); - if (ret < 0) { - _debug("setsockopt failed"); - goto error; - } + ip6_sock_set_recverr(usk); /* Fall through and set IPv4 options too otherwise we don't get * errors from IPv4 packets sent through the IPv6 socket. */ - /* Fall through */ + fallthrough; case AF_INET: /* we want to receive ICMP errors */ - opt = 1; - ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR, - (char *) &opt, sizeof(opt)); - if (ret < 0) { - _debug("setsockopt failed"); - goto error; - } + ip_sock_set_recverr(usk); /* we want to set the don't fragment bit */ - opt = IP_PMTUDISC_DO; - ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER, - (char *) &opt, sizeof(opt)); - if (ret < 0) { - _debug("setsockopt failed"); - goto error; - } + ip_sock_set_mtu_discover(usk, IP_PMTUDISC_DO); /* We want receive timestamps. */ - opt = 1; - ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS_OLD, - (char *)&opt, sizeof(opt)); - if (ret < 0) { - _debug("setsockopt failed"); - goto error; - } + sock_enable_timestamps(usk); break; default: @@ -213,15 +171,6 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) _leave(" = 0"); return 0; - -error: - kernel_sock_shutdown(local->socket, SHUT_RDWR); - local->socket->sk->sk_user_data = NULL; - sock_release(local->socket); - local->socket = NULL; - - _leave(" = %d", ret); - return ret; } /* @@ -232,7 +181,7 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, { struct rxrpc_local *local; struct rxrpc_net *rxnet = rxrpc_net(net); - struct list_head *cursor; + struct hlist_node *cursor; const char *age; long diff; int ret; @@ -242,16 +191,12 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, mutex_lock(&rxnet->local_mutex); - for (cursor = rxnet->local_endpoints.next; - cursor != &rxnet->local_endpoints; - cursor = cursor->next) { - local = list_entry(cursor, struct rxrpc_local, link); + hlist_for_each(cursor, &rxnet->local_endpoints) { + local = hlist_entry(cursor, struct rxrpc_local, link); diff = rxrpc_local_cmp_key(local, srx); - if (diff < 0) + if (diff != 0) continue; - if (diff > 0) - break; /* Services aren't allowed to share transport sockets, so * reject that here. It is possible that the object is dying - @@ -263,9 +208,10 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, goto addr_in_use; } - /* Found a match. We replace a dying object. Attempting to - * bind the transport socket may still fail if we're attempting - * to use a local address that the dying object is still using. + /* Found a match. We want to replace a dying object. + * Attempting to bind the transport socket may still fail if + * we're attempting to use a local address that the dying + * object is still using. */ if (!rxrpc_use_local(local)) break; @@ -282,10 +228,12 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, if (ret < 0) goto sock_error; - if (cursor != &rxnet->local_endpoints) - list_replace_init(cursor, &local->link); - else - list_add_tail(&local->link, cursor); + if (cursor) { + hlist_replace_rcu(cursor, &local->link); + cursor->pprev = NULL; + } else { + hlist_add_head_rcu(&local->link, &rxnet->local_endpoints); + } age = "new"; found: @@ -318,10 +266,10 @@ addr_in_use: struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); - int n; + int r; - n = atomic_inc_return(&local->usage); - trace_rxrpc_local(local->debug_id, rxrpc_local_got, n, here); + __refcount_inc(&local->ref, &r); + trace_rxrpc_local(local->debug_id, rxrpc_local_got, r + 1, here); return local; } @@ -331,12 +279,12 @@ struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local) struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); + int r; if (local) { - int n = atomic_fetch_add_unless(&local->usage, 1, 0); - if (n > 0) + if (__refcount_inc_not_zero(&local->ref, &r)) trace_rxrpc_local(local->debug_id, rxrpc_local_got, - n + 1, here); + r + 1, here); else local = NULL; } @@ -350,10 +298,10 @@ void rxrpc_queue_local(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); unsigned int debug_id = local->debug_id; - int n = atomic_read(&local->usage); + int r = refcount_read(&local->ref); if (rxrpc_queue_work(&local->processor)) - trace_rxrpc_local(debug_id, rxrpc_local_queued, n, here); + trace_rxrpc_local(debug_id, rxrpc_local_queued, r + 1, here); else rxrpc_put_local(local); } @@ -365,15 +313,16 @@ void rxrpc_put_local(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); unsigned int debug_id; - int n; + bool dead; + int r; if (local) { debug_id = local->debug_id; - n = atomic_dec_return(&local->usage); - trace_rxrpc_local(debug_id, rxrpc_local_put, n, here); + dead = __refcount_dec_and_test(&local->ref, &r); + trace_rxrpc_local(debug_id, rxrpc_local_put, r, here); - if (n == 0) + if (dead) call_rcu(&local->rcu, rxrpc_local_rcu); } } @@ -426,7 +375,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) local->dead = true; mutex_lock(&rxnet->local_mutex); - list_del_init(&local->link); + hlist_del_init_rcu(&local->link); mutex_unlock(&rxnet->local_mutex); rxrpc_clean_up_local_conns(local); @@ -457,8 +406,11 @@ static void rxrpc_local_processor(struct work_struct *work) container_of(work, struct rxrpc_local, processor); bool again; + if (local->dead) + return; + trace_rxrpc_local(local->debug_id, rxrpc_local_processing, - atomic_read(&local->usage), NULL); + refcount_read(&local->ref), NULL); do { again = false; @@ -510,11 +462,11 @@ void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet) flush_workqueue(rxrpc_workqueue); - if (!list_empty(&rxnet->local_endpoints)) { + if (!hlist_empty(&rxnet->local_endpoints)) { mutex_lock(&rxnet->local_mutex); - list_for_each_entry(local, &rxnet->local_endpoints, link) { + hlist_for_each_entry(local, &rxnet->local_endpoints, link) { pr_err("AF_RXRPC: Leaked local %p {%d}\n", - local, atomic_read(&local->usage)); + local, refcount_read(&local->ref)); } mutex_unlock(&rxnet->local_mutex); BUG(); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 214405f75346..d4144fd86f84 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -63,11 +63,6 @@ unsigned int rxrpc_rx_mtu = 5692; */ unsigned int rxrpc_rx_jumbo_max = 4; -/* - * Time till packet resend (in milliseconds). - */ -unsigned long rxrpc_resend_timeout = 4 * HZ; - const s8 rxrpc_ack_priority[] = { [0] = 0, [RXRPC_ACK_DELAY] = 1, diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index b312aab80fed..bb4c25d6df64 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -50,7 +50,7 @@ static __net_init int rxrpc_init_net(struct net *net) rxnet->epoch |= RXRPC_RANDOM_EPOCH; INIT_LIST_HEAD(&rxnet->calls); - rwlock_init(&rxnet->call_lock); + spin_lock_init(&rxnet->call_lock); atomic_set(&rxnet->nr_calls, 1); atomic_set(&rxnet->nr_conns, 1); @@ -62,20 +62,17 @@ static __net_init int rxrpc_init_net(struct net *net) timer_setup(&rxnet->service_conn_reap_timer, rxrpc_service_conn_reap_timeout, 0); - rxnet->nr_client_conns = 0; - rxnet->nr_active_client_conns = 0; + atomic_set(&rxnet->nr_client_conns, 0); rxnet->kill_all_client_conns = false; spin_lock_init(&rxnet->client_conn_cache_lock); spin_lock_init(&rxnet->client_conn_discard_lock); - INIT_LIST_HEAD(&rxnet->waiting_client_conns); - INIT_LIST_HEAD(&rxnet->active_client_conns); INIT_LIST_HEAD(&rxnet->idle_client_conns); INIT_WORK(&rxnet->client_conn_reaper, rxrpc_discard_expired_client_conns); timer_setup(&rxnet->client_conn_reap_timer, rxrpc_client_conn_reap_timeout, 0); - INIT_LIST_HEAD(&rxnet->local_endpoints); + INIT_HLIST_HEAD(&rxnet->local_endpoints); mutex_init(&rxnet->local_mutex); hash_init(rxnet->peer_hash); @@ -101,6 +98,9 @@ static __net_init int rxrpc_init_net(struct net *net) proc_create_net("peers", 0444, rxnet->proc_net, &rxrpc_peer_seq_ops, sizeof(struct seq_net_private)); + proc_create_net("locals", 0444, rxnet->proc_net, + &rxrpc_local_seq_ops, + sizeof(struct seq_net_private)); return 0; err_proc: @@ -118,6 +118,8 @@ static __net_exit void rxrpc_exit_net(struct net *net) rxnet->live = false; del_timer_sync(&rxnet->peer_keepalive_timer); cancel_work_sync(&rxnet->peer_keepalive_work); + /* Remove the timer again as the worker may have restarted it. */ + del_timer_sync(&rxnet->peer_keepalive_timer); rxrpc_destroy_all_calls(rxnet); rxrpc_destroy_all_connections(rxnet); rxrpc_destroy_all_peers(rxnet); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index bad3d2420344..9683617db704 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -74,11 +74,18 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, u8 reason) { rxrpc_serial_t serial; + unsigned int tmp; rxrpc_seq_t hard_ack, top, seq; int ix; u32 mtu, jmax; u8 *ackp = pkt->acks; + tmp = atomic_xchg(&call->ackr_nr_unacked, 0); + tmp |= atomic_xchg(&call->ackr_nr_consumed, 0); + if (!tmp && (reason == RXRPC_ACK_DELAY || + reason == RXRPC_ACK_IDLE)) + return 0; + /* Barrier against rxrpc_input_data(). */ serial = call->ackr_serial; hard_ack = READ_ONCE(call->rx_hard_ack); @@ -89,7 +96,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, pkt->ack.bufferSpace = htons(8); pkt->ack.maxSkew = htons(0); pkt->ack.firstPacket = htonl(hard_ack + 1); - pkt->ack.previousPacket = htonl(call->ackr_prev_seq); + pkt->ack.previousPacket = htonl(call->ackr_highest_seq); pkt->ack.serial = htonl(serial); pkt->ack.reason = reason; pkt->ack.nAcks = top - hard_ack; @@ -124,6 +131,49 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, } /* + * Record the beginning of an RTT probe. + */ +static int rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial, + enum rxrpc_rtt_tx_trace why) +{ + unsigned long avail = call->rtt_avail; + int rtt_slot = 9; + + if (!(avail & RXRPC_CALL_RTT_AVAIL_MASK)) + goto no_slot; + + rtt_slot = __ffs(avail & RXRPC_CALL_RTT_AVAIL_MASK); + if (!test_and_clear_bit(rtt_slot, &call->rtt_avail)) + goto no_slot; + + call->rtt_serial[rtt_slot] = serial; + call->rtt_sent_at[rtt_slot] = ktime_get_real(); + smp_wmb(); /* Write data before avail bit */ + set_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); + + trace_rxrpc_rtt_tx(call, why, rtt_slot, serial); + return rtt_slot; + +no_slot: + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_no_slot, rtt_slot, serial); + return -1; +} + +/* + * Cancel an RTT probe. + */ +static void rxrpc_cancel_rtt_probe(struct rxrpc_call *call, + rxrpc_serial_t serial, int rtt_slot) +{ + if (rtt_slot != -1) { + clear_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); + smp_wmb(); /* Clear pending bit before setting slot */ + set_bit(rtt_slot, &call->rtt_avail); + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_cancel, rtt_slot, serial); + } +} + +/* * Send an ACK call packet. */ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, @@ -136,7 +186,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top; size_t len, n; - int ret; + int ret, rtt_slot = -1; u8 reason; if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) @@ -180,6 +230,10 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason); spin_unlock_bh(&call->lock); + if (n == 0) { + kfree(pkt); + return 0; + } iov[0].iov_base = pkt; iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n; @@ -196,18 +250,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, if (_serial) *_serial = serial; - if (ping) { - call->ping_serial = serial; - smp_wmb(); - /* We need to stick a time in before we send the packet in case - * the reply gets back before kernel_sendmsg() completes - but - * asking UDP to send the packet can take a relatively long - * time. - */ - call->ping_time = ktime_get_real(); - set_bit(RXRPC_CALL_PINGING, &call->flags); - trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial); - } + if (ping) + rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_ping); ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); conn->params.peer->last_tx_at = ktime_get_seconds(); @@ -221,19 +265,11 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, if (call->state < RXRPC_CALL_COMPLETE) { if (ret < 0) { - if (ping) - clear_bit(RXRPC_CALL_PINGING, &call->flags); + rxrpc_cancel_rtt_probe(call, serial, rtt_slot); rxrpc_propose_ACK(call, pkt->ack.reason, ntohl(pkt->ack.serial), false, true, rxrpc_propose_ack_retry_tx); - } else { - spin_lock_bh(&call->lock); - if (after(hard_ack, call->ackr_consumed)) - call->ackr_consumed = hard_ack; - if (after(top, call->ackr_seen)) - call->ackr_seen = top; - spin_unlock_bh(&call->lock); } rxrpc_set_keepalive(call); @@ -321,10 +357,16 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, struct kvec iov[2]; rxrpc_serial_t serial; size_t len; - int ret, opt; + int ret, rtt_slot = -1; _enter(",{%d}", skb->len); + if (hlist_unhashed(&call->error_link)) { + spin_lock_bh(&call->peer->lock); + hlist_add_head_rcu(&call->error_link, &call->peer->error_targets); + spin_unlock_bh(&call->peer->lock); + } + /* Each transmission of a Tx packet needs a new serial number */ serial = atomic_inc_return(&conn->serial); @@ -369,7 +411,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || retrans || call->cong_mode == RXRPC_CALL_SLOW_START || - (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || + (call->peer->rtt_count < 3 && sp->hdr.seq & 1) || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real()))) whdr.flags |= RXRPC_REQUEST_ACK; @@ -397,6 +439,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, sp->hdr.serial = serial; smp_wmb(); /* Set serial before timestamp */ skb->tstamp = ktime_get_real(); + if (whdr.flags & RXRPC_REQUEST_ACK) + rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data); /* send the packet by UDP * - returns -EMSGSIZE if UDP would have to fragment the packet @@ -408,12 +452,15 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, conn->params.peer->last_tx_at = ktime_get_seconds(); up_read(&conn->params.local->defrag_sem); - if (ret < 0) + if (ret < 0) { + rxrpc_cancel_rtt_probe(call, serial, rtt_slot); trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_point_call_data_nofrag); - else + } else { trace_rxrpc_tx_packet(call->debug_id, &whdr, rxrpc_tx_point_call_data_nofrag); + } + rxrpc_tx_backoff(call, ret); if (ret == -EMSGSIZE) goto send_fragmentable; @@ -422,14 +469,10 @@ done: if (ret >= 0) { if (whdr.flags & RXRPC_REQUEST_ACK) { call->peer->rtt_last_req = skb->tstamp; - trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); - if (call->peer->rtt_usage > 1) { + if (call->peer->rtt_count > 1) { unsigned long nowj = jiffies, ack_lost_at; - ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt); - if (ack_lost_at < 1) - ack_lost_at = 1; - + ack_lost_at = rxrpc_get_rto_backoff(call->peer, false); ack_lost_at += nowj; WRITE_ONCE(call->ack_lost_at, ack_lost_at); rxrpc_reduce_call_timer(call, ack_lost_at, nowj, @@ -472,54 +515,34 @@ send_fragmentable: sp->hdr.serial = serial; smp_wmb(); /* Set serial before timestamp */ skb->tstamp = ktime_get_real(); + if (whdr.flags & RXRPC_REQUEST_ACK) + rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data); switch (conn->params.local->srx.transport.family) { - case AF_INET: - opt = IP_PMTUDISC_DONT; - ret = kernel_setsockopt(conn->params.local->socket, - SOL_IP, IP_MTU_DISCOVER, - (char *)&opt, sizeof(opt)); - if (ret == 0) { - ret = kernel_sendmsg(conn->params.local->socket, &msg, - iov, 2, len); - conn->params.peer->last_tx_at = ktime_get_seconds(); - - opt = IP_PMTUDISC_DO; - kernel_setsockopt(conn->params.local->socket, SOL_IP, - IP_MTU_DISCOVER, - (char *)&opt, sizeof(opt)); - } - break; - -#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: - opt = IPV6_PMTUDISC_DONT; - ret = kernel_setsockopt(conn->params.local->socket, - SOL_IPV6, IPV6_MTU_DISCOVER, - (char *)&opt, sizeof(opt)); - if (ret == 0) { - ret = kernel_sendmsg(conn->params.local->socket, &msg, - iov, 2, len); - conn->params.peer->last_tx_at = ktime_get_seconds(); - - opt = IPV6_PMTUDISC_DO; - kernel_setsockopt(conn->params.local->socket, - SOL_IPV6, IPV6_MTU_DISCOVER, - (char *)&opt, sizeof(opt)); - } + case AF_INET: + ip_sock_set_mtu_discover(conn->params.local->socket->sk, + IP_PMTUDISC_DONT); + ret = kernel_sendmsg(conn->params.local->socket, &msg, + iov, 2, len); + conn->params.peer->last_tx_at = ktime_get_seconds(); + + ip_sock_set_mtu_discover(conn->params.local->socket->sk, + IP_PMTUDISC_DO); break; -#endif default: BUG(); } - if (ret < 0) + if (ret < 0) { + rxrpc_cancel_rtt_probe(call, serial, rtt_slot); trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_point_call_data_frag); - else + } else { trace_rxrpc_tx_packet(call->debug_id, &whdr, rxrpc_tx_point_call_data_frag); + } rxrpc_tx_backoff(call, ret); up_write(&conn->params.local->defrag_sem); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 923b263c401b..32561e9567fe 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -16,22 +16,105 @@ #include <net/sock.h> #include <net/af_rxrpc.h> #include <net/ip.h> +#include <net/icmp.h> #include "ar-internal.h" +static void rxrpc_adjust_mtu(struct rxrpc_peer *, unsigned int); static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *); static void rxrpc_distribute_error(struct rxrpc_peer *, int, enum rxrpc_call_completion); /* - * Find the peer associated with an ICMP packet. + * Find the peer associated with an ICMPv4 packet. */ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, - const struct sk_buff *skb, + struct sk_buff *skb, + unsigned int udp_offset, + unsigned int *info, struct sockaddr_rxrpc *srx) { - struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); + struct iphdr *ip, *ip0 = ip_hdr(skb); + struct icmphdr *icmp = icmp_hdr(skb); + struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset); + + _enter("%u,%u,%u", ip0->protocol, icmp->type, icmp->code); + + switch (icmp->type) { + case ICMP_DEST_UNREACH: + *info = ntohs(icmp->un.frag.mtu); + fallthrough; + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + ip = (struct iphdr *)((void *)icmp + 8); + break; + default: + return NULL; + } - _enter(""); + memset(srx, 0, sizeof(*srx)); + srx->transport_type = local->srx.transport_type; + srx->transport_len = local->srx.transport_len; + srx->transport.family = local->srx.transport.family; + + /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice + * versa? + */ + switch (srx->transport.family) { + case AF_INET: + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.family = AF_INET; + srx->transport.sin.sin_port = udp->dest; + memcpy(&srx->transport.sin.sin_addr, &ip->daddr, + sizeof(struct in_addr)); + break; + +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.family = AF_INET; + srx->transport.sin.sin_port = udp->dest; + memcpy(&srx->transport.sin.sin_addr, &ip->daddr, + sizeof(struct in_addr)); + break; +#endif + + default: + WARN_ON_ONCE(1); + return NULL; + } + + _net("ICMP {%pISp}", &srx->transport); + return rxrpc_lookup_peer_rcu(local, srx); +} + +#ifdef CONFIG_AF_RXRPC_IPV6 +/* + * Find the peer associated with an ICMPv6 packet. + */ +static struct rxrpc_peer *rxrpc_lookup_peer_icmp6_rcu(struct rxrpc_local *local, + struct sk_buff *skb, + unsigned int udp_offset, + unsigned int *info, + struct sockaddr_rxrpc *srx) +{ + struct icmp6hdr *icmp = icmp6_hdr(skb); + struct ipv6hdr *ip, *ip0 = ipv6_hdr(skb); + struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset); + + _enter("%u,%u,%u", ip0->nexthdr, icmp->icmp6_type, icmp->icmp6_code); + + switch (icmp->icmp6_type) { + case ICMPV6_DEST_UNREACH: + *info = ntohl(icmp->icmp6_mtu); + fallthrough; + case ICMPV6_PKT_TOOBIG: + case ICMPV6_TIME_EXCEED: + case ICMPV6_PARAMPROB: + ip = (struct ipv6hdr *)((void *)icmp + 8); + break; + default: + return NULL; + } memset(srx, 0, sizeof(*srx)); srx->transport_type = local->srx.transport_type; @@ -43,6 +126,165 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, */ switch (srx->transport.family) { case AF_INET: + _net("Rx ICMP6 on v4 sock"); + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.family = AF_INET; + srx->transport.sin.sin_port = udp->dest; + memcpy(&srx->transport.sin.sin_addr, + &ip->daddr.s6_addr32[3], sizeof(struct in_addr)); + break; + case AF_INET6: + _net("Rx ICMP6"); + srx->transport.sin.sin_port = udp->dest; + memcpy(&srx->transport.sin6.sin6_addr, &ip->daddr, + sizeof(struct in6_addr)); + break; + default: + WARN_ON_ONCE(1); + return NULL; + } + + _net("ICMP {%pISp}", &srx->transport); + return rxrpc_lookup_peer_rcu(local, srx); +} +#endif /* CONFIG_AF_RXRPC_IPV6 */ + +/* + * Handle an error received on the local endpoint as a tunnel. + */ +void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, + unsigned int udp_offset) +{ + struct sock_extended_err ee; + struct sockaddr_rxrpc srx; + struct rxrpc_local *local; + struct rxrpc_peer *peer; + unsigned int info = 0; + int err; + u8 version = ip_hdr(skb)->version; + u8 type = icmp_hdr(skb)->type; + u8 code = icmp_hdr(skb)->code; + + rcu_read_lock(); + local = rcu_dereference_sk_user_data(sk); + if (unlikely(!local)) { + rcu_read_unlock(); + return; + } + + rxrpc_new_skb(skb, rxrpc_skb_received); + + switch (ip_hdr(skb)->version) { + case IPVERSION: + peer = rxrpc_lookup_peer_icmp_rcu(local, skb, udp_offset, + &info, &srx); + break; +#ifdef CONFIG_AF_RXRPC_IPV6 + case 6: + peer = rxrpc_lookup_peer_icmp6_rcu(local, skb, udp_offset, + &info, &srx); + break; +#endif + default: + rcu_read_unlock(); + return; + } + + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + if (!peer) { + rcu_read_unlock(); + return; + } + + memset(&ee, 0, sizeof(ee)); + + switch (version) { + case IPVERSION: + switch (type) { + case ICMP_DEST_UNREACH: + switch (code) { + case ICMP_FRAG_NEEDED: + rxrpc_adjust_mtu(peer, info); + rcu_read_unlock(); + rxrpc_put_peer(peer); + return; + default: + break; + } + + err = EHOSTUNREACH; + if (code <= NR_ICMP_UNREACH) { + /* Might want to do something different with + * non-fatal errors + */ + //harderr = icmp_err_convert[code].fatal; + err = icmp_err_convert[code].errno; + } + break; + + case ICMP_TIME_EXCEEDED: + err = EHOSTUNREACH; + break; + default: + err = EPROTO; + break; + } + + ee.ee_origin = SO_EE_ORIGIN_ICMP; + ee.ee_type = type; + ee.ee_code = code; + ee.ee_errno = err; + break; + +#ifdef CONFIG_AF_RXRPC_IPV6 + case 6: + switch (type) { + case ICMPV6_PKT_TOOBIG: + rxrpc_adjust_mtu(peer, info); + rcu_read_unlock(); + rxrpc_put_peer(peer); + return; + } + + icmpv6_err_convert(type, code, &err); + + if (err == EACCES) + err = EHOSTUNREACH; + + ee.ee_origin = SO_EE_ORIGIN_ICMP6; + ee.ee_type = type; + ee.ee_code = code; + ee.ee_errno = err; + break; +#endif + } + + trace_rxrpc_rx_icmp(peer, &ee, &srx); + + rxrpc_distribute_error(peer, err, RXRPC_CALL_NETWORK_ERROR); + rcu_read_unlock(); + rxrpc_put_peer(peer); +} + +/* + * Find the peer associated with a local error. + */ +static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local, + const struct sk_buff *skb, + struct sockaddr_rxrpc *srx) +{ + struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); + + _enter(""); + + memset(srx, 0, sizeof(*srx)); + srx->transport_type = local->srx.transport_type; + srx->transport_len = local->srx.transport_len; + srx->transport.family = local->srx.transport.family; + + switch (srx->transport.family) { + case AF_INET: srx->transport_len = sizeof(srx->transport.sin); srx->transport.family = AF_INET; srx->transport.sin.sin_port = serr->port; @@ -104,10 +346,8 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, /* * Handle an MTU/fragmentation problem. */ -static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, struct sock_exterr_skb *serr) +static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu) { - u32 mtu = serr->ee.ee_info; - _net("Rx ICMP Fragmentation Needed (%d)", mtu); /* wind down the local interface MTU */ @@ -148,7 +388,7 @@ void rxrpc_error_report(struct sock *sk) struct sock_exterr_skb *serr; struct sockaddr_rxrpc srx; struct rxrpc_local *local; - struct rxrpc_peer *peer; + struct rxrpc_peer *peer = NULL; struct sk_buff *skb; rcu_read_lock(); @@ -172,41 +412,20 @@ void rxrpc_error_report(struct sock *sk) } rxrpc_new_skb(skb, rxrpc_skb_received); serr = SKB_EXT_ERR(skb); - if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { - _leave("UDP empty message"); - rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_freed); - return; - } - peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx); - if (peer && !rxrpc_get_peer_maybe(peer)) - peer = NULL; - if (!peer) { - rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_freed); - _leave(" [no peer]"); - return; - } - - trace_rxrpc_rx_icmp(peer, &serr->ee, &srx); - - if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP && - serr->ee.ee_type == ICMP_DEST_UNREACH && - serr->ee.ee_code == ICMP_FRAG_NEEDED)) { - rxrpc_adjust_mtu(peer, serr); - rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_freed); - rxrpc_put_peer(peer); - _leave(" [MTU update]"); - return; + if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) { + peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx); + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + if (peer) { + trace_rxrpc_rx_icmp(peer, &serr->ee, &srx); + rxrpc_store_error(peer, serr); + } } - rxrpc_store_error(peer, serr); rcu_read_unlock(); rxrpc_free_skb(skb, rxrpc_skb_freed); rxrpc_put_peer(peer); - _leave(""); } @@ -271,6 +490,9 @@ static void rxrpc_store_error(struct rxrpc_peer *peer, break; case SO_EE_ORIGIN_ICMP6: + if (err == EACCES) + err = EHOSTUNREACH; + fallthrough; default: _proto("Rx Received error report { orig=%u }", ee->ee_origin); break; @@ -289,59 +511,11 @@ static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error, hlist_for_each_entry_rcu(call, &peer->error_targets, error_link) { rxrpc_see_call(call); - if (call->state < RXRPC_CALL_COMPLETE && - rxrpc_set_call_completion(call, compl, 0, -error)) - rxrpc_notify_socket(call); + rxrpc_set_call_completion(call, compl, 0, -error); } } /* - * Add RTT information to cache. This is called in softirq mode and has - * exclusive access to the peer RTT data. - */ -void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, - rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, - ktime_t send_time, ktime_t resp_time) -{ - struct rxrpc_peer *peer = call->peer; - s64 rtt; - u64 sum = peer->rtt_sum, avg; - u8 cursor = peer->rtt_cursor, usage = peer->rtt_usage; - - rtt = ktime_to_ns(ktime_sub(resp_time, send_time)); - if (rtt < 0) - return; - - spin_lock(&peer->rtt_input_lock); - - /* Replace the oldest datum in the RTT buffer */ - sum -= peer->rtt_cache[cursor]; - sum += rtt; - peer->rtt_cache[cursor] = rtt; - peer->rtt_cursor = (cursor + 1) & (RXRPC_RTT_CACHE_SIZE - 1); - peer->rtt_sum = sum; - if (usage < RXRPC_RTT_CACHE_SIZE) { - usage++; - peer->rtt_usage = usage; - } - - spin_unlock(&peer->rtt_input_lock); - - /* Now recalculate the average */ - if (usage == RXRPC_RTT_CACHE_SIZE) { - avg = sum / RXRPC_RTT_CACHE_SIZE; - } else { - avg = sum; - do_div(avg, usage); - } - - /* Don't need to update this under lock */ - peer->rtt = avg; - trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt, - usage, avg); -} - -/* * Perform keep-alive pings. */ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 452163eadb98..26d2ae9baaf2 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -121,7 +121,7 @@ static struct rxrpc_peer *__rxrpc_lookup_peer_rcu( hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) { if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0 && - atomic_read(&peer->usage) > 0) + refcount_read(&peer->ref) > 0) return peer; } @@ -140,7 +140,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); if (peer) { _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); - _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); + _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref)); } return peer; } @@ -216,7 +216,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) peer = kzalloc(sizeof(struct rxrpc_peer), gfp); if (peer) { - atomic_set(&peer->usage, 1); + refcount_set(&peer->ref, 1); peer->local = rxrpc_get_local(local); INIT_HLIST_HEAD(&peer->error_targets); peer->service_conns = RB_ROOT; @@ -225,6 +225,8 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) spin_lock_init(&peer->rtt_input_lock); peer->debug_id = atomic_inc_return(&rxrpc_debug_id); + rxrpc_peer_init_rtt(peer); + if (RXRPC_TX_SMSS > 2190) peer->cong_cwnd = 2; else if (RXRPC_TX_SMSS > 1095) @@ -297,6 +299,12 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, return peer; } +static void rxrpc_free_peer(struct rxrpc_peer *peer) +{ + rxrpc_put_local(peer->local); + kfree_rcu(peer, rcu); +} + /* * Set up a new incoming peer. There shouldn't be any other matching peers * since we've already done a search in the list from the non-reentrant context @@ -363,14 +371,14 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, spin_unlock_bh(&rxnet->peer_hash_lock); if (peer) - kfree(candidate); + rxrpc_free_peer(candidate); else peer = candidate; } _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); - _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); + _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref)); return peer; } @@ -380,10 +388,10 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) { const void *here = __builtin_return_address(0); - int n; + int r; - n = atomic_inc_return(&peer->usage); - trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n, here); + __refcount_inc(&peer->ref, &r); + trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here); return peer; } @@ -393,11 +401,11 @@ struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer) { const void *here = __builtin_return_address(0); + int r; if (peer) { - int n = atomic_fetch_add_unless(&peer->usage, 1, 0); - if (n > 0) - trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n + 1, here); + if (__refcount_inc_not_zero(&peer->ref, &r)) + trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here); else peer = NULL; } @@ -418,8 +426,7 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) list_del_init(&peer->keepalive_link); spin_unlock_bh(&rxnet->peer_hash_lock); - rxrpc_put_local(peer->local); - kfree_rcu(peer, rcu); + rxrpc_free_peer(peer); } /* @@ -429,13 +436,14 @@ void rxrpc_put_peer(struct rxrpc_peer *peer) { const void *here = __builtin_return_address(0); unsigned int debug_id; - int n; + bool dead; + int r; if (peer) { debug_id = peer->debug_id; - n = atomic_dec_return(&peer->usage); - trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here); - if (n == 0) + dead = __refcount_dec_and_test(&peer->ref, &r); + trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here); + if (dead) __rxrpc_put_peer(peer); } } @@ -448,15 +456,15 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *peer) { const void *here = __builtin_return_address(0); unsigned int debug_id = peer->debug_id; - int n; + bool dead; + int r; - n = atomic_dec_return(&peer->usage); - trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here); - if (n == 0) { + dead = __refcount_dec_and_test(&peer->ref, &r); + trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here); + if (dead) { hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); - rxrpc_put_local(peer->local); - kfree_rcu(peer, rcu); + rxrpc_free_peer(peer); } } @@ -475,7 +483,7 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet) hlist_for_each_entry(peer, &rxnet->peer_hash[i], hash_link) { pr_err("Leaked peer %u {%u} %pISp\n", peer->debug_id, - atomic_read(&peer->usage), + refcount_read(&peer->ref), &peer->srx.transport); } } @@ -497,14 +505,24 @@ void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, EXPORT_SYMBOL(rxrpc_kernel_get_peer); /** - * rxrpc_kernel_get_rtt - Get a call's peer RTT + * rxrpc_kernel_get_srtt - Get a call's peer smoothed RTT * @sock: The socket on which the call is in progress. * @call: The call to query + * @_srtt: Where to store the SRTT value. * - * Get the call's peer RTT. + * Get the call's peer smoothed RTT in uS. */ -u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call) +bool rxrpc_kernel_get_srtt(struct socket *sock, struct rxrpc_call *call, + u32 *_srtt) { - return call->peer->rtt; + struct rxrpc_peer *peer = call->peer; + + if (peer->rtt_count == 0) { + *_srtt = 1000000; /* 1S */ + return false; + } + + *_srtt = call->peer->srtt_us >> 3; + return true; } -EXPORT_SYMBOL(rxrpc_kernel_get_rtt); +EXPORT_SYMBOL(rxrpc_kernel_get_srtt); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index b9d053e42821..245418943e01 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -26,29 +26,23 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { */ static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos) __acquires(rcu) - __acquires(rxnet->call_lock) { struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); rcu_read_lock(); - read_lock(&rxnet->call_lock); - return seq_list_start_head(&rxnet->calls, *_pos); + return seq_list_start_head_rcu(&rxnet->calls, *_pos); } static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); - return seq_list_next(v, &rxnet->calls, pos); + return seq_list_next_rcu(v, &rxnet->calls, pos); } static void rxrpc_call_seq_stop(struct seq_file *seq, void *v) - __releases(rxnet->call_lock) __releases(rcu) { - struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); - - read_unlock(&rxnet->call_lock); rcu_read_unlock(); } @@ -68,7 +62,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) "Proto Local " " Remote " " SvID ConnID CallID End Use State Abort " - " UserID TxSeq TW RxSeq RW RxSerial RxTimo\n"); + " DebugId TxSeq TW RxSeq RW RxSerial RxTimo\n"); return 0; } @@ -100,17 +94,17 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) rx_hard_ack = READ_ONCE(call->rx_hard_ack); seq_printf(seq, "UDP %-47.47s %-47.47s %4x %08x %08x %s %3u" - " %-8.8s %08x %lx %08x %02x %08x %02x %08x %06lx\n", + " %-8.8s %08x %08x %08x %02x %08x %02x %08x %06lx\n", lbuff, rbuff, call->service_id, call->cid, call->call_id, rxrpc_is_service_call(call) ? "Svc" : "Clt", - atomic_read(&call->usage), + refcount_read(&call->ref), rxrpc_call_states[call->state], call->abort_code, - call->user_call_ID, + call->debug_id, tx_hard_ack, READ_ONCE(call->tx_top) - tx_hard_ack, rx_hard_ack, READ_ONCE(call->rx_top) - rx_hard_ack, call->rx_serial, @@ -165,7 +159,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) "Proto Local " " Remote " " SvID ConnID End Use State Key " - " Serial ISerial\n" + " Serial ISerial CallId0 CallId1 CallId2 CallId3\n" ); return 0; } @@ -189,7 +183,7 @@ print: conn->service_id, conn->proto.cid, rxrpc_conn_is_service(conn) ? "Svc" : "Clt", - atomic_read(&conn->usage), + refcount_read(&conn->ref), rxrpc_conn_states[conn->state], key_serial(conn->params.key), atomic_read(&conn->serial), @@ -222,7 +216,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "Proto Local " " Remote " - " Use CW MTU LastUse RTT Rc\n" + " Use CW MTU LastUse RTT RTO\n" ); return 0; } @@ -236,15 +230,15 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) now = ktime_get_seconds(); seq_printf(seq, "UDP %-47.47s %-47.47s %3u" - " %3u %5u %6llus %12llu %2u\n", + " %3u %5u %6llus %8u %8u\n", lbuff, rbuff, - atomic_read(&peer->usage), + refcount_read(&peer->ref), peer->cong_cwnd, peer->mtu, now - peer->last_tx_at, - peer->rtt, - peer->rtt_cursor); + peer->srtt_us >> 3, + jiffies_to_usecs(peer->rto_j)); return 0; } @@ -334,3 +328,72 @@ const struct seq_operations rxrpc_peer_seq_ops = { .stop = rxrpc_peer_seq_stop, .show = rxrpc_peer_seq_show, }; + +/* + * Generate a list of extant virtual local endpoints in /proc/net/rxrpc/locals + */ +static int rxrpc_local_seq_show(struct seq_file *seq, void *v) +{ + struct rxrpc_local *local; + char lbuff[50]; + + if (v == SEQ_START_TOKEN) { + seq_puts(seq, + "Proto Local " + " Use Act\n"); + return 0; + } + + local = hlist_entry(v, struct rxrpc_local, link); + + sprintf(lbuff, "%pISpc", &local->srx.transport); + + seq_printf(seq, + "UDP %-47.47s %3u %3u\n", + lbuff, + refcount_read(&local->ref), + atomic_read(&local->active_users)); + + return 0; +} + +static void *rxrpc_local_seq_start(struct seq_file *seq, loff_t *_pos) + __acquires(rcu) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + unsigned int n; + + rcu_read_lock(); + + if (*_pos >= UINT_MAX) + return NULL; + + n = *_pos; + if (n == 0) + return SEQ_START_TOKEN; + + return seq_hlist_start_rcu(&rxnet->local_endpoints, n - 1); +} + +static void *rxrpc_local_seq_next(struct seq_file *seq, void *v, loff_t *_pos) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + if (*_pos >= UINT_MAX) + return NULL; + + return seq_hlist_next_rcu(v, &rxnet->local_endpoints, _pos); +} + +static void rxrpc_local_seq_stop(struct seq_file *seq, void *v) + __releases(rcu) +{ + rcu_read_unlock(); +} + +const struct seq_operations rxrpc_local_seq_ops = { + .start = rxrpc_local_seq_start, + .next = rxrpc_local_seq_next, + .stop = rxrpc_local_seq_stop, + .show = rxrpc_local_seq_show, +}; diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h index 49bb972539aa..d2cf8e1d218f 100644 --- a/net/rxrpc/protocol.h +++ b/net/rxrpc/protocol.h @@ -57,7 +57,7 @@ struct rxrpc_wire_header { uint8_t userStatus; /* app-layer defined status */ #define RXRPC_USERSTATUS_SERVICE_UPGRADE 0x01 /* AuriStor service upgrade request */ - + uint8_t securityIndex; /* security protocol ID */ union { __be16 _rsvd; /* reserved */ diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 8578c39ec839..7e39c262fd79 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -59,6 +59,85 @@ void rxrpc_notify_socket(struct rxrpc_call *call) } /* + * Transition a call to the complete state. + */ +bool __rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + if (call->state < RXRPC_CALL_COMPLETE) { + call->abort_code = abort_code; + call->error = error; + call->completion = compl; + call->state = RXRPC_CALL_COMPLETE; + trace_rxrpc_call_complete(call); + wake_up(&call->waitq); + rxrpc_notify_socket(call); + return true; + } + return false; +} + +bool rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + bool ret = false; + + if (call->state < RXRPC_CALL_COMPLETE) { + write_lock_bh(&call->state_lock); + ret = __rxrpc_set_call_completion(call, compl, abort_code, error); + write_unlock_bh(&call->state_lock); + } + return ret; +} + +/* + * Record that a call successfully completed. + */ +bool __rxrpc_call_completed(struct rxrpc_call *call) +{ + return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); +} + +bool rxrpc_call_completed(struct rxrpc_call *call) +{ + bool ret = false; + + if (call->state < RXRPC_CALL_COMPLETE) { + write_lock_bh(&call->state_lock); + ret = __rxrpc_call_completed(call); + write_unlock_bh(&call->state_lock); + } + return ret; +} + +/* + * Record that a call is locally aborted. + */ +bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, u32 abort_code, int error) +{ + trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq, + abort_code, error); + return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, + abort_code, error); +} + +bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, u32 abort_code, int error) +{ + bool ret; + + write_lock_bh(&call->state_lock); + ret = __rxrpc_abort_call(why, call, seq, abort_code, error); + write_unlock_bh(&call->state_lock); + return ret; +} + +/* * Pass a call terminating message to userspace. */ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) @@ -100,37 +179,6 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) } /* - * Pass back notification of a new call. The call is added to the - * to-be-accepted list. This means that the next call to be accepted might not - * be the last call seen awaiting acceptance, but unless we leave this on the - * front of the queue and block all other messages until someone gives us a - * user_ID for it, there's not a lot we can do. - */ -static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct msghdr *msg, int flags) -{ - int tmp = 0, ret; - - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &tmp); - - if (ret == 0 && !(flags & MSG_PEEK)) { - _debug("to be accepted"); - write_lock_bh(&rx->recvmsg_lock); - list_del_init(&call->recvmsg_link); - write_unlock_bh(&rx->recvmsg_lock); - - rxrpc_get_call(call, rxrpc_call_got); - write_lock(&rx->call_lock); - list_add_tail(&call->accept_link, &rx->to_be_accepted); - write_unlock(&rx->call_lock); - } - - trace_rxrpc_recvmsg(call, rxrpc_recvmsg_to_be_accepted, 1, 0, 0, ret); - return ret; -} - -/* * End the packet reception phase. */ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) @@ -212,11 +260,9 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) rxrpc_end_rx_phase(call, serial); } else { /* Check to see if there's an ACK that needs sending. */ - if (after_eq(hard_ack, call->ackr_consumed + 2) || - after_eq(top, call->ackr_seen + 2) || - (hard_ack == top && after(hard_ack, call->ackr_consumed))) - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, - true, true, + if (atomic_inc_return(&call->ackr_nr_consumed) > 2) + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, serial, + true, false, rxrpc_propose_ack_rotate_rx); if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY) rxrpc_send_ack_packet(call, false, NULL); @@ -464,7 +510,7 @@ try_again: list_empty(&rx->recvmsg_q) && rx->sk.sk_state != RXRPC_SERVER_LISTENING) { release_sock(&rx->sk); - return -ENODATA; + return -EAGAIN; } if (list_empty(&rx->recvmsg_q)) { @@ -541,7 +587,7 @@ try_again: goto error_unlock_call; } - if (msg->msg_name) { + if (msg->msg_name && call->peer) { struct sockaddr_rxrpc *srx = msg->msg_name; size_t len = sizeof(call->peer->srx); @@ -551,9 +597,6 @@ try_again: } switch (READ_ONCE(call->state)) { - case RXRPC_CALL_SERVER_ACCEPTING: - ret = rxrpc_recvmsg_new_call(rx, call, msg, flags); - break; case RXRPC_CALL_CLIENT_RECV_REPLY: case RXRPC_CALL_SERVER_RECV_REQUEST: case RXRPC_CALL_SERVER_ACK_REQUEST: @@ -624,6 +667,7 @@ wait_error: * @sock: The socket that the call exists on * @call: The call to send data through * @iter: The buffer to receive into + * @_len: The amount of data we want to receive (decreased on return) * @want_more: True if more data is expected to be read * @_abort: Where the abort code is stored if -ECONNABORTED is returned * @_service: Where to store the actual service ID (may be upgraded) @@ -639,7 +683,7 @@ wait_error: * *_abort should also be initialised to 0. */ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, - struct iov_iter *iter, + struct iov_iter *iter, size_t *_len, bool want_more, u32 *_abort, u16 *_service) { size_t offset = 0; @@ -647,9 +691,9 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, _enter("{%d,%s},%zu,%d", call->debug_id, rxrpc_call_states[call->state], - iov_iter_count(iter), want_more); + *_len, want_more); - ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING); + ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_SECURING); mutex_lock(&call->user_mutex); @@ -658,8 +702,8 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, case RXRPC_CALL_SERVER_RECV_REQUEST: case RXRPC_CALL_SERVER_ACK_REQUEST: ret = rxrpc_recvmsg_data(sock, call, NULL, iter, - iov_iter_count(iter), 0, - &offset); + *_len, 0, &offset); + *_len -= offset; if (ret < 0) goto out; @@ -697,7 +741,7 @@ out: case RXRPC_ACK_DELAY: if (ret != -EAGAIN) break; - /* Fall through */ + fallthrough; default: rxrpc_send_ack_packet(call, false, NULL); } @@ -727,46 +771,3 @@ call_complete: goto out; } EXPORT_SYMBOL(rxrpc_kernel_recv_data); - -/** - * rxrpc_kernel_get_reply_time - Get timestamp on first reply packet - * @sock: The socket that the call exists on - * @call: The call to query - * @_ts: Where to put the timestamp - * - * Retrieve the timestamp from the first DATA packet of the reply if it is - * in the ring. Returns true if successful, false if not. - */ -bool rxrpc_kernel_get_reply_time(struct socket *sock, struct rxrpc_call *call, - ktime_t *_ts) -{ - struct sk_buff *skb; - rxrpc_seq_t hard_ack, top, seq; - bool success = false; - - mutex_lock(&call->user_mutex); - - if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_RECV_REPLY) - goto out; - - hard_ack = call->rx_hard_ack; - if (hard_ack != 0) - goto out; - - seq = hard_ack + 1; - top = smp_load_acquire(&call->rx_top); - if (after(seq, top)) - goto out; - - skb = call->rxtx_buffer[seq & RXRPC_RXTX_BUFF_MASK]; - if (!skb) - goto out; - - *_ts = skb_get_ktime(skb); - success = true; - -out: - mutex_unlock(&call->user_mutex); - return success; -} -EXPORT_SYMBOL(rxrpc_kernel_get_reply_time); diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c new file mode 100644 index 000000000000..be61d6f5be8d --- /dev/null +++ b/net/rxrpc/rtt.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 +/* RTT/RTO calculation. + * + * Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com) + * + * https://tools.ietf.org/html/rfc6298 + * https://tools.ietf.org/html/rfc1122#section-4.2.3.1 + * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf + */ + +#include <linux/net.h> +#include "ar-internal.h" + +#define RXRPC_RTO_MAX ((unsigned)(120 * HZ)) +#define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ +#define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */ + +static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) +{ + return 200; +} + +static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) +{ + return usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us); +} + +static u32 rxrpc_bound_rto(u32 rto) +{ + return min(rto, RXRPC_RTO_MAX); +} + +/* + * Called to compute a smoothed rtt estimate. The data fed to this + * routine either comes from timestamps, or from segments that were + * known _not_ to have been retransmitted [see Karn/Partridge + * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 + * piece by Van Jacobson. + * NOTE: the next three routines used to be one big routine. + * To save cycles in the RFC 1323 implementation it was better to break + * it up into three procedures. -- erics + */ +static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) +{ + long m = sample_rtt_us; /* RTT */ + u32 srtt = peer->srtt_us; + + /* The following amusing code comes from Jacobson's + * article in SIGCOMM '88. Note that rtt and mdev + * are scaled versions of rtt and mean deviation. + * This is designed to be as fast as possible + * m stands for "measurement". + * + * On a 1990 paper the rto value is changed to: + * RTO = rtt + 4 * mdev + * + * Funny. This algorithm seems to be very broken. + * These formulae increase RTO, when it should be decreased, increase + * too slowly, when it should be increased quickly, decrease too quickly + * etc. I guess in BSD RTO takes ONE value, so that it is absolutely + * does not matter how to _calculate_ it. Seems, it was trap + * that VJ failed to avoid. 8) + */ + if (srtt != 0) { + m -= (srtt >> 3); /* m is now error in rtt est */ + srtt += m; /* rtt = 7/8 rtt + 1/8 new */ + if (m < 0) { + m = -m; /* m is now abs(error) */ + m -= (peer->mdev_us >> 2); /* similar update on mdev */ + /* This is similar to one of Eifel findings. + * Eifel blocks mdev updates when rtt decreases. + * This solution is a bit different: we use finer gain + * for mdev in this case (alpha*beta). + * Like Eifel it also prevents growth of rto, + * but also it limits too fast rto decreases, + * happening in pure Eifel. + */ + if (m > 0) + m >>= 3; + } else { + m -= (peer->mdev_us >> 2); /* similar update on mdev */ + } + + peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ + if (peer->mdev_us > peer->mdev_max_us) { + peer->mdev_max_us = peer->mdev_us; + if (peer->mdev_max_us > peer->rttvar_us) + peer->rttvar_us = peer->mdev_max_us; + } + } else { + /* no previous measure. */ + srtt = m << 3; /* take the measured time to be rtt */ + peer->mdev_us = m << 1; /* make sure rto = 3*rtt */ + peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer)); + peer->mdev_max_us = peer->rttvar_us; + } + + peer->srtt_us = max(1U, srtt); +} + +/* + * Calculate rto without backoff. This is the second half of Van Jacobson's + * routine referred to above. + */ +static void rxrpc_set_rto(struct rxrpc_peer *peer) +{ + u32 rto; + + /* 1. If rtt variance happened to be less 50msec, it is hallucination. + * It cannot be less due to utterly erratic ACK generation made + * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ + * to do with delayed acks, because at cwnd>2 true delack timeout + * is invisible. Actually, Linux-2.4 also generates erratic + * ACKs in some circumstances. + */ + rto = __rxrpc_set_rto(peer); + + /* 2. Fixups made earlier cannot be right. + * If we do not estimate RTO correctly without them, + * all the algo is pure shit and should be replaced + * with correct one. It is exactly, which we pretend to do. + */ + + /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo + * guarantees that rto is higher. + */ + peer->rto_j = rxrpc_bound_rto(rto); +} + +static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us) +{ + if (rtt_us < 0) + return; + + //rxrpc_update_rtt_min(peer, rtt_us); + rxrpc_rtt_estimator(peer, rtt_us); + rxrpc_set_rto(peer); + + /* RFC6298: only reset backoff on valid RTT measurement. */ + peer->backoff = 0; +} + +/* + * Add RTT information to cache. This is called in softirq mode and has + * exclusive access to the peer RTT data. + */ +void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, + int rtt_slot, + rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, + ktime_t send_time, ktime_t resp_time) +{ + struct rxrpc_peer *peer = call->peer; + s64 rtt_us; + + rtt_us = ktime_to_us(ktime_sub(resp_time, send_time)); + if (rtt_us < 0) + return; + + spin_lock(&peer->rtt_input_lock); + rxrpc_ack_update_rtt(peer, rtt_us); + if (peer->rtt_count < 3) + peer->rtt_count++; + spin_unlock(&peer->rtt_input_lock); + + trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial, + peer->srtt_us >> 3, peer->rto_j); +} + +/* + * Get the retransmission timeout to set in jiffies, backing it off each time + * we retransmit. + */ +unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans) +{ + u64 timo_j; + u8 backoff = READ_ONCE(peer->backoff); + + timo_j = peer->rto_j; + timo_j <<= backoff; + if (retrans && timo_j * 2 <= RXRPC_RTO_MAX) + WRITE_ONCE(peer->backoff, backoff + 1); + + if (timo_j < 1) + timo_j = 1; + + return timo_j; +} + +void rxrpc_peer_init_rtt(struct rxrpc_peer *peer) +{ + peer->rto_j = RXRPC_TIMEOUT_INIT; + peer->mdev_us = jiffies_to_usecs(RXRPC_TIMEOUT_INIT); + peer->backoff = 0; + //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U); +} diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 098f1f9ec53b..78fa0524156f 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -15,6 +15,7 @@ #include <linux/scatterlist.h> #include <linux/ctype.h> #include <linux/slab.h> +#include <linux/key-type.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include <keys/rxrpc-type.h> @@ -27,6 +28,7 @@ #define INST_SZ 40 /* size of principal's instance */ #define REALM_SZ 40 /* size of principal's auth domain */ #define SNAME_SZ 40 /* size of service name */ +#define RXKAD_ALIGN 8 struct rxkad_level1_hdr { __be32 data_size; /* true data size (excluding padding) */ @@ -37,6 +39,9 @@ struct rxkad_level2_hdr { __be32 checksum; /* decrypted data checksum */ }; +static int rxkad_prime_packet_security(struct rxrpc_connection *conn, + struct crypto_sync_skcipher *ci); + /* * this holds a pinned cipher so that keventd doesn't get called by the cipher * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE @@ -47,17 +52,59 @@ static struct skcipher_request *rxkad_ci_req; static DEFINE_MUTEX(rxkad_ci_mutex); /* + * Parse the information from a server key + * + * The data should be the 8-byte secret key. + */ +static int rxkad_preparse_server_key(struct key_preparsed_payload *prep) +{ + struct crypto_skcipher *ci; + + if (prep->datalen != 8) + return -EINVAL; + + memcpy(&prep->payload.data[2], prep->data, 8); + + ci = crypto_alloc_skcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(ci)) { + _leave(" = %ld", PTR_ERR(ci)); + return PTR_ERR(ci); + } + + if (crypto_skcipher_setkey(ci, prep->data, 8) < 0) + BUG(); + + prep->payload.data[0] = ci; + _leave(" = 0"); + return 0; +} + +static void rxkad_free_preparse_server_key(struct key_preparsed_payload *prep) +{ + + if (prep->payload.data[0]) + crypto_free_skcipher(prep->payload.data[0]); +} + +static void rxkad_destroy_server_key(struct key *key) +{ + if (key->payload.data[0]) { + crypto_free_skcipher(key->payload.data[0]); + key->payload.data[0] = NULL; + } +} + +/* * initialise connection security */ -static int rxkad_init_connection_security(struct rxrpc_connection *conn) +static int rxkad_init_connection_security(struct rxrpc_connection *conn, + struct rxrpc_key_token *token) { struct crypto_sync_skcipher *ci; - struct rxrpc_key_token *token; int ret; _enter("{%d},{%x}", conn->debug_id, key_serial(conn->params.key)); - token = conn->params.key->payload.data[0]; conn->security_ix = token->security_index; ci = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0); @@ -73,32 +120,68 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn) switch (conn->params.security_level) { case RXRPC_SECURITY_PLAIN: - break; case RXRPC_SECURITY_AUTH: - conn->size_align = 8; - conn->security_size = sizeof(struct rxkad_level1_hdr); - break; case RXRPC_SECURITY_ENCRYPT: - conn->size_align = 8; - conn->security_size = sizeof(struct rxkad_level2_hdr); break; default: ret = -EKEYREJECTED; goto error; } - conn->cipher = ci; - ret = 0; + ret = rxkad_prime_packet_security(conn, ci); + if (ret < 0) + goto error_ci; + + conn->rxkad.cipher = ci; + return 0; + +error_ci: + crypto_free_sync_skcipher(ci); error: _leave(" = %d", ret); return ret; } /* + * Work out how much data we can put in a packet. + */ +static int rxkad_how_much_data(struct rxrpc_call *call, size_t remain, + size_t *_buf_size, size_t *_data_size, size_t *_offset) +{ + size_t shdr, buf_size, chunk; + + switch (call->conn->params.security_level) { + default: + buf_size = chunk = min_t(size_t, remain, RXRPC_JUMBO_DATALEN); + shdr = 0; + goto out; + case RXRPC_SECURITY_AUTH: + shdr = sizeof(struct rxkad_level1_hdr); + break; + case RXRPC_SECURITY_ENCRYPT: + shdr = sizeof(struct rxkad_level2_hdr); + break; + } + + buf_size = round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN); + + chunk = buf_size - shdr; + if (remain < chunk) + buf_size = round_up(shdr + remain, RXKAD_ALIGN); + +out: + *_buf_size = buf_size; + *_data_size = chunk; + *_offset = shdr; + return 0; +} + +/* * prime the encryption state with the invariant parts of a connection's * description */ -static int rxkad_prime_packet_security(struct rxrpc_connection *conn) +static int rxkad_prime_packet_security(struct rxrpc_connection *conn, + struct crypto_sync_skcipher *ci) { struct skcipher_request *req; struct rxrpc_key_token *token; @@ -116,7 +199,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn) if (!tmpbuf) return -ENOMEM; - req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS); + req = skcipher_request_alloc(&ci->base, GFP_NOFS); if (!req) { kfree(tmpbuf); return -ENOMEM; @@ -131,13 +214,13 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn) tmpbuf[3] = htonl(conn->security_ix); sg_init_one(&sg, tmpbuf, tmpsize); - skcipher_request_set_sync_tfm(req, conn->cipher); + skcipher_request_set_sync_tfm(req, ci); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x); crypto_skcipher_encrypt(req); skcipher_request_free(req); - memcpy(&conn->csum_iv, tmpbuf + 2, sizeof(conn->csum_iv)); + memcpy(&conn->rxkad.csum_iv, tmpbuf + 2, sizeof(conn->rxkad.csum_iv)); kfree(tmpbuf); _leave(" = 0"); return 0; @@ -149,7 +232,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn) */ static struct skcipher_request *rxkad_get_call_crypto(struct rxrpc_call *call) { - struct crypto_skcipher *tfm = &call->conn->cipher->base; + struct crypto_skcipher *tfm = &call->conn->rxkad.cipher->base; struct skcipher_request *cipher_req = call->cipher_req; if (!cipher_req) { @@ -176,15 +259,14 @@ static void rxkad_free_call_crypto(struct rxrpc_call *call) * partially encrypt a packet (level 1 security) */ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, - struct sk_buff *skb, - u32 data_size, - void *sechdr, + struct sk_buff *skb, u32 data_size, struct skcipher_request *req) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxkad_level1_hdr hdr; struct rxrpc_crypt iv; struct scatterlist sg; + size_t pad; u16 check; _enter(""); @@ -193,13 +275,19 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, data_size |= (u32)check << 16; hdr.data_size = htonl(data_size); - memcpy(sechdr, &hdr, sizeof(hdr)); + memcpy(skb->head, &hdr, sizeof(hdr)); + + pad = sizeof(struct rxkad_level1_hdr) + data_size; + pad = RXKAD_ALIGN - pad; + pad &= RXKAD_ALIGN - 1; + if (pad) + skb_put_zero(skb, pad); /* start the encryption afresh */ memset(&iv, 0, sizeof(iv)); - sg_init_one(&sg, sechdr, 8); - skcipher_request_set_sync_tfm(req, call->conn->cipher); + sg_init_one(&sg, skb->head, 8); + skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); crypto_skcipher_encrypt(req); @@ -215,7 +303,6 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, struct sk_buff *skb, u32 data_size, - void *sechdr, struct skcipher_request *req) { const struct rxrpc_key_token *token; @@ -224,6 +311,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, struct rxrpc_crypt iv; struct scatterlist sg[16]; unsigned int len; + size_t pad; u16 check; int err; @@ -235,14 +323,20 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, rxkhdr.data_size = htonl(data_size | (u32)check << 16); rxkhdr.checksum = 0; - memcpy(sechdr, &rxkhdr, sizeof(rxkhdr)); + memcpy(skb->head, &rxkhdr, sizeof(rxkhdr)); + + pad = sizeof(struct rxkad_level2_hdr) + data_size; + pad = RXKAD_ALIGN - pad; + pad &= RXKAD_ALIGN - 1; + if (pad) + skb_put_zero(skb, pad); /* encrypt from the session key */ token = call->conn->params.key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv)); - sg_init_one(&sg[0], sechdr, sizeof(rxkhdr)); - skcipher_request_set_sync_tfm(req, call->conn->cipher); + sg_init_one(&sg[0], skb->head, sizeof(rxkhdr)); + skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg[0], &sg[0], sizeof(rxkhdr), iv.x); crypto_skcipher_encrypt(req); @@ -252,11 +346,10 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, if (skb_shinfo(skb)->nr_frags > 16) goto out; - len = data_size + call->conn->size_align - 1; - len &= ~(call->conn->size_align - 1); + len = round_up(data_size, RXKAD_ALIGN); sg_init_table(sg, ARRAY_SIZE(sg)); - err = skb_to_sgvec(skb, sg, 0, len); + err = skb_to_sgvec(skb, sg, 8, len); if (unlikely(err < 0)) goto out; skcipher_request_set_crypt(req, sg, sg, len, iv.x); @@ -275,8 +368,7 @@ out: */ static int rxkad_secure_packet(struct rxrpc_call *call, struct sk_buff *skb, - size_t data_size, - void *sechdr) + size_t data_size) { struct rxrpc_skb_priv *sp; struct skcipher_request *req; @@ -291,7 +383,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, call->debug_id, key_serial(call->conn->params.key), sp->hdr.seq, data_size); - if (!call->conn->cipher) + if (!call->conn->rxkad.cipher) return 0; ret = key_validate(call->conn->params.key); @@ -303,7 +395,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, return -ENOMEM; /* continue encrypting from where we left off */ - memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); + memcpy(&iv, call->conn->rxkad.csum_iv.x, sizeof(iv)); /* calculate the security checksum */ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); @@ -312,7 +404,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, call->crypto_buf[1] = htonl(x); sg_init_one(&sg, call->crypto_buf, 8); - skcipher_request_set_sync_tfm(req, call->conn->cipher); + skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); crypto_skcipher_encrypt(req); @@ -329,19 +421,17 @@ static int rxkad_secure_packet(struct rxrpc_call *call, ret = 0; break; case RXRPC_SECURITY_AUTH: - ret = rxkad_secure_packet_auth(call, skb, data_size, sechdr, - req); + ret = rxkad_secure_packet_auth(call, skb, data_size, req); break; case RXRPC_SECURITY_ENCRYPT: - ret = rxkad_secure_packet_encrypt(call, skb, data_size, - sechdr, req); + ret = rxkad_secure_packet_encrypt(call, skb, data_size, req); break; default: ret = -EPERM; break; } - _leave(" = %d [set %hx]", ret, y); + _leave(" = %d [set %x]", ret, y); return ret; } @@ -380,7 +470,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, /* start the decryption afresh */ memset(&iv, 0, sizeof(iv)); - skcipher_request_set_sync_tfm(req, call->conn->cipher); + skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, 8, iv.x); crypto_skcipher_decrypt(req); @@ -392,7 +482,6 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, RXKADDATALEN); goto protocol_error; } - offset += sizeof(sechdr); len -= sizeof(sechdr); buf = ntohl(sechdr.data_size); @@ -451,7 +540,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, * directly into the target buffer. */ sg = _sg; - nsg = skb_shinfo(skb)->nr_frags; + nsg = skb_shinfo(skb)->nr_frags + 1; if (nsg <= 4) { nsg = 4; } else { @@ -472,7 +561,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, token = call->conn->params.key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv)); - skcipher_request_set_sync_tfm(req, call->conn->cipher); + skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, len, iv.x); crypto_skcipher_decrypt(req); @@ -486,7 +575,6 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, RXKADDATALEN); goto protocol_error; } - offset += sizeof(sechdr); len -= sizeof(sechdr); buf = ntohl(sechdr.data_size); @@ -538,7 +626,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, _enter("{%d{%x}},{#%u}", call->debug_id, key_serial(call->conn->params.key), seq); - if (!call->conn->cipher) + if (!call->conn->rxkad.cipher) return 0; req = rxkad_get_call_crypto(call); @@ -546,7 +634,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, return -ENOMEM; /* continue encrypting from where we left off */ - memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); + memcpy(&iv, call->conn->rxkad.csum_iv.x, sizeof(iv)); /* validate the security checksum */ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); @@ -555,7 +643,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, call->crypto_buf[1] = htonl(x); sg_init_one(&sg, call->crypto_buf, 8); - skcipher_request_set_sync_tfm(req, call->conn->cipher); + skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); crypto_skcipher_encrypt(req); @@ -648,16 +736,12 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) u32 serial; int ret; - _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); + _enter("{%d}", conn->debug_id); - ret = key_validate(conn->server_key); - if (ret < 0) - return ret; - - get_random_bytes(&conn->security_nonce, sizeof(conn->security_nonce)); + get_random_bytes(&conn->rxkad.nonce, sizeof(conn->rxkad.nonce)); challenge.version = htonl(2); - challenge.nonce = htonl(conn->security_nonce); + challenge.nonce = htonl(conn->rxkad.nonce); challenge.min_level = htonl(0); challenge.__padding = 0; @@ -785,7 +869,7 @@ static int rxkad_encrypt_response(struct rxrpc_connection *conn, struct rxrpc_crypt iv; struct scatterlist sg[1]; - req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS); + req = skcipher_request_alloc(&conn->rxkad.cipher->base, GFP_NOFS); if (!req) return -ENOMEM; @@ -794,7 +878,7 @@ static int rxkad_encrypt_response(struct rxrpc_connection *conn, sg_init_table(sg, 1); sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted)); - skcipher_request_set_sync_tfm(req, conn->cipher); + skcipher_request_set_sync_tfm(req, conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x); crypto_skcipher_encrypt(req); @@ -892,6 +976,7 @@ other_error: * decrypt the kerberos IV ticket in the response */ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, + struct key *server_key, struct sk_buff *skb, void *ticket, size_t ticket_len, struct rxrpc_crypt *_session_key, @@ -911,30 +996,17 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, u32 abort_code; u8 *p, *q, *name, *end; - _enter("{%d},{%x}", conn->debug_id, key_serial(conn->server_key)); + _enter("{%d},{%x}", conn->debug_id, key_serial(server_key)); *_expiry = 0; - ret = key_validate(conn->server_key); - if (ret < 0) { - switch (ret) { - case -EKEYEXPIRED: - abort_code = RXKADEXPIRED; - goto other_error; - default: - abort_code = RXKADNOAUTH; - goto other_error; - } - } - - ASSERT(conn->server_key->payload.data[0] != NULL); + ASSERT(server_key->payload.data[0] != NULL); ASSERTCMP((unsigned long) ticket & 7UL, ==, 0); - memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv)); + memcpy(&iv, &server_key->payload.data[2], sizeof(iv)); ret = -ENOMEM; - req = skcipher_request_alloc(conn->server_key->payload.data[0], - GFP_NOFS); + req = skcipher_request_alloc(server_key->payload.data[0], GFP_NOFS); if (!req) goto temporary_error; @@ -1090,6 +1162,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, struct rxkad_response *response; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt session_key; + struct key *server_key; const char *eproto; time64_t expiry; void *ticket; @@ -1097,7 +1170,27 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, __be32 csum; int ret, i; - _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); + _enter("{%d}", conn->debug_id); + + server_key = rxrpc_look_up_server_security(conn, skb, 0, 0); + if (IS_ERR(server_key)) { + switch (PTR_ERR(server_key)) { + case -ENOKEY: + abort_code = RXKADUNKNOWNKEY; + break; + case -EKEYEXPIRED: + abort_code = RXKADEXPIRED; + break; + default: + abort_code = RXKADNOAUTH; + break; + } + trace_rxrpc_abort(0, "SVK", + sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + abort_code, PTR_ERR(server_key)); + *_abort_code = abort_code; + return -EPROTO; + } ret = -ENOMEM; response = kzalloc(sizeof(struct rxkad_response), GFP_NOFS); @@ -1109,8 +1202,6 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), response, sizeof(*response)) < 0) goto protocol_error; - if (!pskb_pull(skb, sizeof(*response))) - BUG(); version = ntohl(response->version); ticket_len = ntohl(response->ticket_len); @@ -1137,18 +1228,18 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, ret = -ENOMEM; ticket = kmalloc(ticket_len, GFP_NOFS); if (!ticket) - goto temporary_error; + goto temporary_error_free_resp; eproto = tracepoint_string("rxkad_tkt_short"); abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response), ticket, ticket_len) < 0) goto protocol_error_free; - ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key, - &expiry, _abort_code); + ret = rxkad_decrypt_ticket(conn, server_key, skb, ticket, ticket_len, + &session_key, &expiry, _abort_code); if (ret < 0) - goto temporary_error_free_resp; + goto temporary_error_free_ticket; /* use the session key from inside the ticket to decrypt the * response */ @@ -1169,7 +1260,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, if (response->encrypted.checksum != csum) goto protocol_error_free; - spin_lock(&conn->channel_lock); + spin_lock(&conn->bundle->channel_lock); for (i = 0; i < RXRPC_MAXCALLS; i++) { struct rxrpc_call *call; u32 call_id = ntohl(response->encrypted.call_id[i]); @@ -1186,17 +1277,17 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, if (call_id > conn->channels[i].call_counter) { call = rcu_dereference_protected( conn->channels[i].call, - lockdep_is_held(&conn->channel_lock)); + lockdep_is_held(&conn->bundle->channel_lock)); if (call && call->state < RXRPC_CALL_COMPLETE) goto protocol_error_unlock; conn->channels[i].call_counter = call_id; } } - spin_unlock(&conn->channel_lock); + spin_unlock(&conn->bundle->channel_lock); eproto = tracepoint_string("rxkad_rsp_seq"); abort_code = RXKADOUTOFSEQUENCE; - if (ntohl(response->encrypted.inc_nonce) != conn->security_nonce + 1) + if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) goto protocol_error_free; eproto = tracepoint_string("rxkad_rsp_level"); @@ -1219,12 +1310,13 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, return 0; protocol_error_unlock: - spin_unlock(&conn->channel_lock); + spin_unlock(&conn->bundle->channel_lock); protocol_error_free: kfree(ticket); protocol_error: kfree(response); trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); + key_put(server_key); *_abort_code = abort_code; return -EPROTO; @@ -1237,6 +1329,7 @@ temporary_error: * ENOMEM. We just want to send the challenge again. Note that we * also come out this way if the ticket decryption fails. */ + key_put(server_key); return ret; } @@ -1247,8 +1340,8 @@ static void rxkad_clear(struct rxrpc_connection *conn) { _enter(""); - if (conn->cipher) - crypto_free_sync_skcipher(conn->cipher); + if (conn->rxkad.cipher) + crypto_free_sync_skcipher(conn->rxkad.cipher); } /* @@ -1296,8 +1389,11 @@ const struct rxrpc_security rxkad = { .no_key_abort = RXKADUNKNOWNKEY, .init = rxkad_init, .exit = rxkad_exit, + .preparse_server_key = rxkad_preparse_server_key, + .free_preparse_server_key = rxkad_free_preparse_server_key, + .destroy_server_key = rxkad_destroy_server_key, .init_connection_security = rxkad_init_connection_security, - .prime_packet_security = rxkad_prime_packet_security, + .how_much_data = rxkad_how_much_data, .secure_packet = rxkad_secure_packet, .verify_packet = rxkad_verify_packet, .free_call_crypto = rxkad_free_call_crypto, diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 9b1fb9ed0717..50cb5f1ee0c0 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -55,7 +55,7 @@ void rxrpc_exit_security(void) /* * look up an rxrpc security module */ -static const struct rxrpc_security *rxrpc_security_lookup(u8 security_index) +const struct rxrpc_security *rxrpc_security_lookup(u8 security_index) { if (security_index >= ARRAY_SIZE(rxrpc_security_types)) return NULL; @@ -81,16 +81,17 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) if (ret < 0) return ret; - token = key->payload.data[0]; - if (!token) - return -EKEYREJECTED; + for (token = key->payload.data[0]; token; token = token->next) { + sec = rxrpc_security_lookup(token->security_index); + if (sec) + goto found; + } + return -EKEYREJECTED; - sec = rxrpc_security_lookup(token->security_index); - if (!sec) - return -EKEYREJECTED; +found: conn->security = sec; - ret = conn->security->init_connection_security(conn); + ret = conn->security->init_connection_security(conn, token); if (ret < 0) { conn->security = &rxrpc_no_security; return ret; @@ -101,22 +102,16 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) } /* - * Find the security key for a server connection. + * Set the ops a server connection. */ -bool rxrpc_look_up_server_security(struct rxrpc_local *local, struct rxrpc_sock *rx, - const struct rxrpc_security **_sec, - struct key **_key, - struct sk_buff *skb) +const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *rx, + struct sk_buff *skb) { const struct rxrpc_security *sec; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - key_ref_t kref = NULL; - char kdesc[5 + 1 + 3 + 1]; _enter(""); - sprintf(kdesc, "%u:%u", sp->hdr.serviceId, sp->hdr.securityIndex); - sec = rxrpc_security_lookup(sp->hdr.securityIndex); if (!sec) { trace_rxrpc_abort(0, "SVS", @@ -124,35 +119,72 @@ bool rxrpc_look_up_server_security(struct rxrpc_local *local, struct rxrpc_sock RX_INVALID_OPERATION, EKEYREJECTED); skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; skb->priority = RX_INVALID_OPERATION; - return false; + return NULL; } - if (sp->hdr.securityIndex == RXRPC_SECURITY_NONE) - goto out; - - if (!rx->securities) { + if (sp->hdr.securityIndex != RXRPC_SECURITY_NONE && + !rx->securities) { trace_rxrpc_abort(0, "SVR", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_INVALID_OPERATION, EKEYREJECTED); skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; - skb->priority = RX_INVALID_OPERATION; - return false; + skb->priority = sec->no_key_abort; + return NULL; } + return sec; +} + +/* + * Find the security key for a server connection. + */ +struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn, + struct sk_buff *skb, + u32 kvno, u32 enctype) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_sock *rx; + struct key *key = ERR_PTR(-EKEYREJECTED); + key_ref_t kref = NULL; + char kdesc[5 + 1 + 3 + 1 + 12 + 1 + 12 + 1]; + int ret; + + _enter(""); + + if (enctype) + sprintf(kdesc, "%u:%u:%u:%u", + sp->hdr.serviceId, sp->hdr.securityIndex, kvno, enctype); + else if (kvno) + sprintf(kdesc, "%u:%u:%u", + sp->hdr.serviceId, sp->hdr.securityIndex, kvno); + else + sprintf(kdesc, "%u:%u", + sp->hdr.serviceId, sp->hdr.securityIndex); + + rcu_read_lock(); + + rx = rcu_dereference(conn->params.local->service); + if (!rx) + goto out; + /* look through the service's keyring */ kref = keyring_search(make_key_ref(rx->securities, 1UL), &key_type_rxrpc_s, kdesc, true); if (IS_ERR(kref)) { - trace_rxrpc_abort(0, "SVK", - sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - sec->no_key_abort, EKEYREJECTED); - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; - skb->priority = sec->no_key_abort; - return false; + key = ERR_CAST(kref); + goto out; + } + + key = key_ref_to_ptr(kref); + + ret = key_validate(key); + if (ret < 0) { + key_put(key); + key = ERR_PTR(ret); + goto out; } out: - *_sec = sec; - *_key = key_ref_to_ptr(kref); - return true; + rcu_read_unlock(); + return key; } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 813fd6888142..3c3a626459de 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -18,6 +18,21 @@ #include "ar-internal.h" /* + * Return true if there's sufficient Tx queue space. + */ +static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win) +{ + unsigned int win_size = + min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra); + rxrpc_seq_t tx_win = READ_ONCE(call->tx_hard_ack); + + if (_tx_win) + *_tx_win = tx_win; + return call->tx_top - tx_win < win_size; +} + +/* * Wait for space to appear in the Tx queue or a signal to occur. */ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, @@ -26,9 +41,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, { for (;;) { set_current_state(TASK_INTERRUPTIBLE); - if (call->tx_top - call->tx_hard_ack < - min_t(unsigned int, call->tx_winsize, - call->cong_cwnd + call->cong_extra)) + if (rxrpc_check_tx_space(call, NULL)) return 0; if (call->state >= RXRPC_CALL_COMPLETE) @@ -38,10 +51,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, return sock_intr_errno(*timeo); trace_rxrpc_transmit(call, rxrpc_transmit_wait); - mutex_unlock(&call->user_mutex); *timeo = schedule_timeout(*timeo); - if (mutex_lock_interruptible(&call->user_mutex) < 0) - return sock_intr_errno(*timeo); } } @@ -49,40 +59,36 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, * Wait for space to appear in the Tx queue uninterruptibly, but with * a timeout of 2*RTT if no progress was made and a signal occurred. */ -static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, +static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, struct rxrpc_call *call) { rxrpc_seq_t tx_start, tx_win; - signed long rtt2, timeout; - u64 rtt; + signed long rtt, timeout; - rtt = READ_ONCE(call->peer->rtt); - rtt2 = nsecs_to_jiffies64(rtt) * 2; - if (rtt2 < 1) - rtt2 = 1; + rtt = READ_ONCE(call->peer->srtt_us) >> 3; + rtt = usecs_to_jiffies(rtt) * 2; + if (rtt < 2) + rtt = 2; - timeout = rtt2; + timeout = rtt; tx_start = READ_ONCE(call->tx_hard_ack); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); tx_win = READ_ONCE(call->tx_hard_ack); - if (call->tx_top - tx_win < - min_t(unsigned int, call->tx_winsize, - call->cong_cwnd + call->cong_extra)) + if (rxrpc_check_tx_space(call, &tx_win)) return 0; if (call->state >= RXRPC_CALL_COMPLETE) return call->error; - if (test_bit(RXRPC_CALL_IS_INTR, &call->flags) && - timeout == 0 && + if (timeout == 0 && tx_win == tx_start && signal_pending(current)) return -EINTR; if (tx_win != tx_start) { - timeout = rtt2; + timeout = rtt; tx_start = tx_win; } @@ -92,6 +98,26 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, } /* + * Wait for space to appear in the Tx queue uninterruptibly. + */ +static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, + struct rxrpc_call *call, + long *timeo) +{ + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (rxrpc_check_tx_space(call, NULL)) + return 0; + + if (call->state >= RXRPC_CALL_COMPLETE) + return call->error; + + trace_rxrpc_transmit(call, rxrpc_transmit_wait); + *timeo = schedule_timeout(*timeo); + } +} + +/* * wait for space to appear in the transmit/ACK window * - caller holds the socket locked */ @@ -108,10 +134,19 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, add_wait_queue(&call->waitq, &myself); - if (waitall) - ret = rxrpc_wait_for_tx_window_nonintr(rx, call); - else - ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo); + switch (call->interruptibility) { + case RXRPC_INTERRUPTIBLE: + if (waitall) + ret = rxrpc_wait_for_tx_window_waitall(rx, call); + else + ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo); + break; + case RXRPC_PREINTERRUPTIBLE: + case RXRPC_UNINTERRUPTIBLE: + default: + ret = rxrpc_wait_for_tx_window_nonintr(rx, call, timeo); + break; + } remove_wait_queue(&call->waitq, &myself); set_current_state(TASK_RUNNING); @@ -203,7 +238,7 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now); if (!last) break; - /* Fall through */ + fallthrough; case RXRPC_CALL_SERVER_SEND_REPLY: call->state = RXRPC_CALL_SERVER_AWAIT_ACK; rxrpc_notify_end_tx(rx, call, notify_end_tx); @@ -223,25 +258,16 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, case -ENETUNREACH: case -EHOSTUNREACH: case -ECONNREFUSED: - rxrpc_set_call_completion(call, - RXRPC_CALL_LOCAL_ERROR, + rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); - rxrpc_notify_socket(call); goto out; } _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); } else { - unsigned long now = jiffies, resend_at; - - if (call->peer->rtt_usage > 1) - resend_at = nsecs_to_jiffies(call->peer->rtt * 3 / 2); - else - resend_at = rxrpc_resend_timeout; - if (resend_at < 1) - resend_at = 1; + unsigned long now = jiffies; + unsigned long resend_at = now + call->peer->rto_j; - resend_at += now; WRITE_ONCE(call->resend_at, resend_at); rxrpc_reduce_call_timer(call, resend_at, now, rxrpc_timer_set_for_send); @@ -261,78 +287,76 @@ out: static int rxrpc_send_data(struct rxrpc_sock *rx, struct rxrpc_call *call, struct msghdr *msg, size_t len, - rxrpc_notify_end_tx_t notify_end_tx) + rxrpc_notify_end_tx_t notify_end_tx, + bool *_dropped_lock) { struct rxrpc_skb_priv *sp; struct sk_buff *skb; struct sock *sk = &rx->sk; + enum rxrpc_call_state state; long timeo; - bool more; - int ret, copied; + bool more = msg->msg_flags & MSG_MORE; + int ret, copied = 0; timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); /* this should be in poll */ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) - return -EPIPE; - - more = msg->msg_flags & MSG_MORE; - +reload: + ret = -EPIPE; + if (sk->sk_shutdown & SEND_SHUTDOWN) + goto maybe_error; + state = READ_ONCE(call->state); + ret = -ESHUTDOWN; + if (state >= RXRPC_CALL_COMPLETE) + goto maybe_error; + ret = -EPROTO; + if (state != RXRPC_CALL_CLIENT_SEND_REQUEST && + state != RXRPC_CALL_SERVER_ACK_REQUEST && + state != RXRPC_CALL_SERVER_SEND_REPLY) + goto maybe_error; + + ret = -EMSGSIZE; if (call->tx_total_len != -1) { - if (len > call->tx_total_len) - return -EMSGSIZE; - if (!more && len != call->tx_total_len) - return -EMSGSIZE; + if (len - copied > call->tx_total_len) + goto maybe_error; + if (!more && len - copied != call->tx_total_len) + goto maybe_error; } skb = call->tx_pending; call->tx_pending = NULL; rxrpc_see_skb(skb, rxrpc_skb_seen); - copied = 0; do { /* Check to see if there's a ping ACK to reply to. */ if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) rxrpc_send_ack_packet(call, false, NULL); if (!skb) { - size_t size, chunk, max, space; + size_t remain, bufsize, chunk, offset; _debug("alloc"); - if (call->tx_top - call->tx_hard_ack >= - min_t(unsigned int, call->tx_winsize, - call->cong_cwnd + call->cong_extra)) { - ret = -EAGAIN; - if (msg->msg_flags & MSG_DONTWAIT) - goto maybe_error; - ret = rxrpc_wait_for_tx_window(rx, call, - &timeo, - msg->msg_flags & MSG_WAITALL); - if (ret < 0) - goto maybe_error; - } - - max = RXRPC_JUMBO_DATALEN; - max -= call->conn->security_size; - max &= ~(call->conn->size_align - 1UL); - - chunk = max; - if (chunk > msg_data_left(msg) && !more) - chunk = msg_data_left(msg); + if (!rxrpc_check_tx_space(call, NULL)) + goto wait_for_space; - space = chunk + call->conn->size_align; - space &= ~(call->conn->size_align - 1UL); - - size = space + call->conn->security_size; + /* Work out the maximum size of a packet. Assume that + * the security header is going to be in the padded + * region (enc blocksize), but the trailer is not. + */ + remain = more ? INT_MAX : msg_data_left(msg); + ret = call->conn->security->how_much_data(call, remain, + &bufsize, &chunk, &offset); + if (ret < 0) + goto maybe_error; - _debug("SIZE: %zu/%zu/%zu", chunk, space, size); + _debug("SIZE: %zu/%zu @%zu", chunk, bufsize, offset); /* create a buffer that we can retain until it's ACK'd */ skb = sock_alloc_send_skb( - sk, size, msg->msg_flags & MSG_DONTWAIT, &ret); + sk, bufsize, msg->msg_flags & MSG_DONTWAIT, &ret); if (!skb) goto maybe_error; @@ -344,9 +368,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, ASSERTCMP(skb->mark, ==, 0); - _debug("HS: %u", call->conn->security_size); - skb_reserve(skb, call->conn->security_size); - skb->len += call->conn->security_size; + __skb_put(skb, offset); sp->remain = chunk; if (sp->remain > skb_tailroom(skb)) @@ -395,17 +417,6 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, (msg_data_left(msg) == 0 && !more)) { struct rxrpc_connection *conn = call->conn; uint32_t seq; - size_t pad; - - /* pad out if we're using security */ - if (conn->security_ix) { - pad = conn->security_size + skb->mark; - pad = conn->size_align - pad; - pad &= conn->size_align - 1; - _debug("pad %zu", pad); - if (pad) - skb_put_zero(skb, pad); - } seq = call->tx_top + 1; @@ -419,8 +430,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, call->tx_winsize) sp->hdr.flags |= RXRPC_MORE_PACKETS; - ret = call->security->secure_packet( - call, skb, skb->mark, skb->head); + ret = call->security->secure_packet(call, skb, skb->mark); if (ret < 0) goto out; @@ -434,6 +444,12 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, success: ret = copied; + if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) { + read_lock_bh(&call->state_lock); + if (call->error < 0) + ret = call->error; + read_unlock_bh(&call->state_lock); + } out: call->tx_pending = skb; _leave(" = %d", ret); @@ -452,6 +468,27 @@ maybe_error: efault: ret = -EFAULT; goto out; + +wait_for_space: + ret = -EAGAIN; + if (msg->msg_flags & MSG_DONTWAIT) + goto maybe_error; + mutex_unlock(&call->user_mutex); + *_dropped_lock = true; + ret = rxrpc_wait_for_tx_window(rx, call, &timeo, + msg->msg_flags & MSG_WAITALL); + if (ret < 0) + goto maybe_error; + if (call->interruptibility == RXRPC_INTERRUPTIBLE) { + if (mutex_lock_interruptible(&call->user_mutex) < 0) { + ret = sock_intr_errno(timeo); + goto maybe_error; + } + } else { + mutex_lock(&call->user_mutex); + } + *_dropped_lock = false; + goto reload; } /* @@ -503,10 +540,10 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) return -EINVAL; break; - case RXRPC_ACCEPT: + case RXRPC_CHARGE_ACCEPT: if (p->command != RXRPC_CMD_SEND_DATA) return -EINVAL; - p->command = RXRPC_CMD_ACCEPT; + p->command = RXRPC_CMD_CHARGE_ACCEPT; if (len != 0) return -EINVAL; break; @@ -613,13 +650,14 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) enum rxrpc_call_state state; struct rxrpc_call *call; unsigned long now, j; + bool dropped_lock = false; int ret; struct rxrpc_send_params p = { .call.tx_total_len = -1, .call.user_call_ID = 0, .call.nr_timeouts = 0, - .call.intr = true, + .call.interruptibility = RXRPC_INTERRUPTIBLE, .abort_code = 0, .command = RXRPC_CMD_SEND_DATA, .exclusive = false, @@ -632,16 +670,12 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) if (ret < 0) goto error_release_sock; - if (p.command == RXRPC_CMD_ACCEPT) { + if (p.command == RXRPC_CMD_CHARGE_ACCEPT) { ret = -EINVAL; if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) goto error_release_sock; - call = rxrpc_accept_call(rx, p.call.user_call_ID, NULL); - /* The socket is now unlocked. */ - if (IS_ERR(call)) - return PTR_ERR(call); - ret = 0; - goto out_put_unlock; + ret = rxrpc_user_charge_accept(rx, p.call.user_call_ID); + goto error_release_sock; } call = rxrpc_find_call_by_user_ID(rx, p.call.user_call_ID); @@ -654,13 +688,15 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) if (IS_ERR(call)) return PTR_ERR(call); /* ... and we have the call lock. */ + ret = 0; + if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) + goto out_put_unlock; } else { switch (READ_ONCE(call->state)) { case RXRPC_CALL_UNINITIALISED: case RXRPC_CALL_CLIENT_AWAIT_CONN: case RXRPC_CALL_SERVER_PREALLOC: case RXRPC_CALL_SERVER_SECURING: - case RXRPC_CALL_SERVER_ACCEPTING: rxrpc_put_call(call, rxrpc_call_put); ret = -EBUSY; goto error_release_sock; @@ -691,13 +727,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) if (p.call.timeouts.normal > 0 && j == 0) j = 1; WRITE_ONCE(call->next_rx_timo, j); - /* Fall through */ + fallthrough; case 2: j = msecs_to_jiffies(p.call.timeouts.idle); if (p.call.timeouts.idle > 0 && j == 0) j = 1; WRITE_ONCE(call->next_req_timo, j); - /* Fall through */ + fallthrough; case 1: if (p.call.timeouts.hard > 0) { j = msecs_to_jiffies(p.call.timeouts.hard); @@ -723,21 +759,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = rxrpc_send_abort_packet(call); } else if (p.command != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; - } else if (rxrpc_is_client_call(call) && - state != RXRPC_CALL_CLIENT_SEND_REQUEST) { - /* request phase complete for this client call */ - ret = -EPROTO; - } else if (rxrpc_is_service_call(call) && - state != RXRPC_CALL_SERVER_ACK_REQUEST && - state != RXRPC_CALL_SERVER_SEND_REPLY) { - /* Reply phase not begun or not complete for service call. */ - ret = -EPROTO; } else { - ret = rxrpc_send_data(rx, call, msg, len, NULL); + ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock); } out_put_unlock: - mutex_unlock(&call->user_mutex); + if (!dropped_lock) + mutex_unlock(&call->user_mutex); error_put: rxrpc_put_call(call, rxrpc_call_put); _leave(" = %d", ret); @@ -765,6 +793,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, struct msghdr *msg, size_t len, rxrpc_notify_end_tx_t notify_end_tx) { + bool dropped_lock = false; int ret; _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]); @@ -782,7 +811,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, case RXRPC_CALL_SERVER_ACK_REQUEST: case RXRPC_CALL_SERVER_SEND_REPLY: ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len, - notify_end_tx); + notify_end_tx, &dropped_lock); break; case RXRPC_CALL_COMPLETE: read_lock_bh(&call->state_lock); @@ -796,7 +825,8 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, break; } - mutex_unlock(&call->user_mutex); + if (!dropped_lock) + mutex_unlock(&call->user_mutex); _leave(" = %d", ret); return ret; } diff --git a/net/rxrpc/server_key.c b/net/rxrpc/server_key.c new file mode 100644 index 000000000000..ee269e0e6ee8 --- /dev/null +++ b/net/rxrpc/server_key.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* RxRPC key management + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * RxRPC keys should have a description of describing their purpose: + * "afs@CAMBRIDGE.REDHAT.COM> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <crypto/skcipher.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/key-type.h> +#include <linux/ctype.h> +#include <linux/slab.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include <keys/rxrpc-type.h> +#include <keys/user-type.h> +#include "ar-internal.h" + +static int rxrpc_vet_description_s(const char *); +static int rxrpc_preparse_s(struct key_preparsed_payload *); +static void rxrpc_free_preparse_s(struct key_preparsed_payload *); +static void rxrpc_destroy_s(struct key *); +static void rxrpc_describe_s(const struct key *, struct seq_file *); + +/* + * rxrpc server keys take "<serviceId>:<securityIndex>[:<sec-specific>]" as the + * description and the key material as the payload. + */ +struct key_type key_type_rxrpc_s = { + .name = "rxrpc_s", + .flags = KEY_TYPE_NET_DOMAIN, + .vet_description = rxrpc_vet_description_s, + .preparse = rxrpc_preparse_s, + .free_preparse = rxrpc_free_preparse_s, + .instantiate = generic_key_instantiate, + .destroy = rxrpc_destroy_s, + .describe = rxrpc_describe_s, +}; + +/* + * Vet the description for an RxRPC server key. + */ +static int rxrpc_vet_description_s(const char *desc) +{ + unsigned long service, sec_class; + char *p; + + service = simple_strtoul(desc, &p, 10); + if (*p != ':' || service > 65535) + return -EINVAL; + sec_class = simple_strtoul(p + 1, &p, 10); + if ((*p && *p != ':') || sec_class < 1 || sec_class > 255) + return -EINVAL; + return 0; +} + +/* + * Preparse a server secret key. + */ +static int rxrpc_preparse_s(struct key_preparsed_payload *prep) +{ + const struct rxrpc_security *sec; + unsigned int service, sec_class; + int n; + + _enter("%zu", prep->datalen); + + if (!prep->orig_description) + return -EINVAL; + + if (sscanf(prep->orig_description, "%u:%u%n", &service, &sec_class, &n) != 2) + return -EINVAL; + + sec = rxrpc_security_lookup(sec_class); + if (!sec) + return -ENOPKG; + + prep->payload.data[1] = (struct rxrpc_security *)sec; + + if (!sec->preparse_server_key) + return -EINVAL; + + return sec->preparse_server_key(prep); +} + +static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep) +{ + const struct rxrpc_security *sec = prep->payload.data[1]; + + if (sec && sec->free_preparse_server_key) + sec->free_preparse_server_key(prep); +} + +static void rxrpc_destroy_s(struct key *key) +{ + const struct rxrpc_security *sec = key->payload.data[1]; + + if (sec && sec->destroy_server_key) + sec->destroy_server_key(key); +} + +static void rxrpc_describe_s(const struct key *key, struct seq_file *m) +{ + const struct rxrpc_security *sec = key->payload.data[1]; + + seq_puts(m, key->description); + if (sec && sec->describe_server_key) + sec->describe_server_key(key, m); +} + +/* + * grab the security keyring for a server socket + */ +int rxrpc_server_keyring(struct rxrpc_sock *rx, sockptr_t optval, int optlen) +{ + struct key *key; + char *description; + + _enter(""); + + if (optlen <= 0 || optlen > PAGE_SIZE - 1) + return -EINVAL; + + description = memdup_sockptr_nul(optval, optlen); + if (IS_ERR(description)) + return PTR_ERR(description); + + key = request_key(&key_type_keyring, description, NULL); + if (IS_ERR(key)) { + kfree(description); + _leave(" = %ld", PTR_ERR(key)); + return PTR_ERR(key); + } + + rx->securities = key; + kfree(description); + _leave(" = 0 [key %x]", key->serial); + return 0; +} diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 0348d2bf6f7d..580a5acffee7 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -71,7 +71,6 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) const void *here = __builtin_return_address(0); if (skb) { int n; - CHECK_SLAB_OKAY(&skb->users); n = atomic_dec_return(select_skb_count(skb)); trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, rxrpc_skb(skb)->rx_flags, here); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 2bbb38161851..555e0910786b 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -12,7 +12,7 @@ static struct ctl_table_header *rxrpc_sysctl_reg_table; static const unsigned int four = 4; -static const unsigned int thirtytwo = 32; +static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1; static const unsigned int n_65535 = 65535; static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; static const unsigned long one_jiffy = 1; @@ -21,7 +21,7 @@ static const unsigned long max_jiffies = MAX_JIFFY_OFFSET; /* * RxRPC operating parameters. * - * See Documentation/networking/rxrpc.txt and the variable definitions for more + * See Documentation/networking/rxrpc.rst and the variable definitions for more * information on the individual parameters. */ static struct ctl_table rxrpc_sysctl_table[] = { @@ -71,33 +71,16 @@ static struct ctl_table rxrpc_sysctl_table[] = { .extra1 = (void *)&one_jiffy, .extra2 = (void *)&max_jiffies, }, - { - .procname = "resend_timeout", - .data = &rxrpc_resend_timeout, - .maxlen = sizeof(unsigned long), - .mode = 0644, - .proc_handler = proc_doulongvec_ms_jiffies_minmax, - .extra1 = (void *)&one_jiffy, - .extra2 = (void *)&max_jiffies, - }, /* Non-time values */ { - .procname = "max_client_conns", - .data = &rxrpc_max_client_connections, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = (void *)&rxrpc_reap_client_connections, - }, - { .procname = "reap_client_conns", .data = &rxrpc_reap_client_connections, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = (void *)SYSCTL_ONE, - .extra2 = (void *)&rxrpc_max_client_connections, + .extra2 = (void *)&n_65535, }, { .procname = "max_backlog", @@ -106,7 +89,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = (void *)&four, - .extra2 = (void *)&thirtytwo, + .extra2 = (void *)&max_backlog, }, { .procname = "rx_window_size", |