diff options
Diffstat (limited to 'include/net/tcp.h')
-rw-r--r-- | include/net/tcp.h | 356 |
1 files changed, 199 insertions, 157 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h index 6ae35199d3b3..5078ad868fee 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -26,6 +26,7 @@ #include <linux/kref.h> #include <linux/ktime.h> #include <linux/indirect_call_wrapper.h> +#include <linux/bits.h> #include <net/inet_connection_sock.h> #include <net/inet_timewait_sock.h> @@ -41,6 +42,7 @@ #include <net/inet_ecn.h> #include <net/dst.h> #include <net/mptcp.h> +#include <net/xfrm.h> #include <linux/seq_file.h> #include <linux/memcontrol.h> @@ -52,6 +54,8 @@ extern struct inet_hashinfo tcp_hashinfo; DECLARE_PER_CPU(unsigned int, tcp_orphan_count); int tcp_orphan_count_sum(void); +DECLARE_PER_CPU(u32, tcp_tw_isn); + void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER) @@ -141,8 +145,9 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); #define TCP_DELACK_MIN 4U #define TCP_ATO_MIN 4U #endif -#define TCP_RTO_MAX ((unsigned)(120*HZ)) -#define TCP_RTO_MIN ((unsigned)(HZ/5)) +#define TCP_RTO_MAX_SEC 120 +#define TCP_RTO_MAX ((unsigned)(TCP_RTO_MAX_SEC * HZ)) +#define TCP_RTO_MIN ((unsigned)(HZ / 5)) #define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */ #define TCP_TIMEOUT_MIN_US (2*USEC_PER_MSEC) /* Min TCP timeout in microsecs */ @@ -294,14 +299,6 @@ static inline bool between(__u32 seq1, __u32 seq2, __u32 seq3) return seq3 - seq2 >= seq1 - seq2; } -static inline bool tcp_out_of_memory(struct sock *sk) -{ - if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && - sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2)) - return true; - return false; -} - static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb) { sk_wmem_queued_add(sk, -skb->truesize); @@ -314,7 +311,7 @@ static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb) void sk_forced_mem_schedule(struct sock *sk, int size); -bool tcp_check_oom(struct sock *sk, int shift); +bool tcp_check_oom(const struct sock *sk, int shift); extern struct proto tcp_prot; @@ -353,7 +350,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); -void tcp_twsk_purge(struct list_head *net_exit_list, int family); +void tcp_twsk_purge(struct list_head *net_exit_list); ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); @@ -377,25 +374,64 @@ static inline void tcp_dec_quickack_mode(struct sock *sk) } } -#define TCP_ECN_OK 1 -#define TCP_ECN_QUEUE_CWR 2 -#define TCP_ECN_DEMAND_CWR 4 -#define TCP_ECN_SEEN 8 +#define TCP_ECN_MODE_RFC3168 BIT(0) +#define TCP_ECN_QUEUE_CWR BIT(1) +#define TCP_ECN_DEMAND_CWR BIT(2) +#define TCP_ECN_SEEN BIT(3) +#define TCP_ECN_MODE_ACCECN BIT(4) + +#define TCP_ECN_DISABLED 0 +#define TCP_ECN_MODE_PENDING (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN) +#define TCP_ECN_MODE_ANY (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN) + +static inline bool tcp_ecn_mode_any(const struct tcp_sock *tp) +{ + return tp->ecn_flags & TCP_ECN_MODE_ANY; +} + +static inline bool tcp_ecn_mode_rfc3168(const struct tcp_sock *tp) +{ + return (tp->ecn_flags & TCP_ECN_MODE_ANY) == TCP_ECN_MODE_RFC3168; +} + +static inline bool tcp_ecn_mode_accecn(const struct tcp_sock *tp) +{ + return (tp->ecn_flags & TCP_ECN_MODE_ANY) == TCP_ECN_MODE_ACCECN; +} + +static inline bool tcp_ecn_disabled(const struct tcp_sock *tp) +{ + return !tcp_ecn_mode_any(tp); +} + +static inline bool tcp_ecn_mode_pending(const struct tcp_sock *tp) +{ + return (tp->ecn_flags & TCP_ECN_MODE_PENDING) == TCP_ECN_MODE_PENDING; +} + +static inline void tcp_ecn_mode_set(struct tcp_sock *tp, u8 mode) +{ + tp->ecn_flags &= ~TCP_ECN_MODE_ANY; + tp->ecn_flags |= mode; +} enum tcp_tw_status { TCP_TW_SUCCESS = 0, TCP_TW_RST = 1, TCP_TW_ACK = 2, - TCP_TW_SYN = 3 + TCP_TW_SYN = 3, + TCP_TW_ACK_OOW = 4 }; enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, - const struct tcphdr *th); + const struct tcphdr *th, + u32 *tw_isn, + enum skb_drop_reason *drop_reason); struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, bool fastopen, - bool *lost_race); + bool *lost_race, enum skb_drop_reason *drop_reason); enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); void tcp_enter_loss(struct sock *sk); @@ -420,6 +456,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); +void tcp_reset_keepalive_timer(struct sock *sk, unsigned long timeout); void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, @@ -667,10 +704,11 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, void tcp_send_probe0(struct sock *); int tcp_write_wakeup(struct sock *, int mib); void tcp_send_fin(struct sock *sk); -void tcp_send_active_reset(struct sock *sk, gfp_t priority); +void tcp_send_active_reset(struct sock *sk, gfp_t priority, + enum sk_rst_reason reason); int tcp_send_synack(struct sock *); void tcp_push_one(struct sock *, unsigned int mss_now); -void __tcp_send_ack(struct sock *sk, u32 rcv_nxt); +void __tcp_send_ack(struct sock *sk, u32 rcv_nxt, u16 flags); void tcp_send_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk); void tcp_send_loss_probe(struct sock *sk); @@ -681,11 +719,25 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb, /* tcp_input.c */ void tcp_rearm_rto(struct sock *sk); void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); +void tcp_done_with_error(struct sock *sk, int err); void tcp_reset(struct sock *sk, struct sk_buff *skb); void tcp_fin(struct sock *sk); void tcp_check_space(struct sock *sk); void tcp_sack_compress_send_ack(struct sock *sk); +static inline void tcp_cleanup_skb(struct sk_buff *skb) +{ + skb_dst_drop(skb); + secpath_reset(skb); +} + +static inline void tcp_add_receive_queue(struct sock *sk, struct sk_buff *skb) +{ + DEBUG_NET_WARN_ON_ONCE(skb_dst(skb)); + DEBUG_NET_WARN_ON_ONCE(secpath_exists(skb)); + __skb_queue_tail(&sk->sk_receive_queue, skb); +} + /* tcp_timer.c */ void tcp_init_xmit_timers(struct sock *); static inline void tcp_clear_xmit_timers(struct sock *sk) @@ -732,6 +784,9 @@ void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); +int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor, bool noack, + u32 *copied_seq); int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off); void tcp_read_done(struct sock *sk, size_t len); @@ -742,10 +797,14 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu); int tcp_mss_to_mtu(struct sock *sk, int mss); void tcp_mtup_init(struct sock *sk); -static inline void tcp_bound_rto(const struct sock *sk) +static inline unsigned int tcp_rto_max(const struct sock *sk) { - if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) - inet_csk(sk)->icsk_rto = TCP_RTO_MAX; + return READ_ONCE(inet_csk(sk)->icsk_rto_max); +} + +static inline void tcp_bound_rto(struct sock *sk) +{ + inet_csk(sk)->icsk_rto = min(inet_csk(sk)->icsk_rto, tcp_rto_max(sk)); } static inline u32 __tcp_set_rto(const struct tcp_sock *tp) @@ -786,7 +845,7 @@ u32 tcp_delack_max(const struct sock *sk); static inline u32 tcp_rto_min(const struct sock *sk) { const struct dst_entry *dst = __sk_dst_get(sk); - u32 rto_min = inet_csk(sk)->icsk_rto_min; + u32 rto_min = READ_ONCE(inet_csk(sk)->icsk_rto_min); if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); @@ -914,17 +973,37 @@ static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq) #define tcp_flag_byte(th) (((u_int8_t *)th)[13]) -#define TCPHDR_FIN 0x01 -#define TCPHDR_SYN 0x02 -#define TCPHDR_RST 0x04 -#define TCPHDR_PSH 0x08 -#define TCPHDR_ACK 0x10 -#define TCPHDR_URG 0x20 -#define TCPHDR_ECE 0x40 -#define TCPHDR_CWR 0x80 - +#define TCPHDR_FIN BIT(0) +#define TCPHDR_SYN BIT(1) +#define TCPHDR_RST BIT(2) +#define TCPHDR_PSH BIT(3) +#define TCPHDR_ACK BIT(4) +#define TCPHDR_URG BIT(5) +#define TCPHDR_ECE BIT(6) +#define TCPHDR_CWR BIT(7) +#define TCPHDR_AE BIT(8) +#define TCPHDR_FLAGS_MASK (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST | \ + TCPHDR_PSH | TCPHDR_ACK | TCPHDR_URG | \ + TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE) +#define tcp_flags_ntohs(th) (ntohs(*(__be16 *)&tcp_flag_word(th)) & \ + TCPHDR_FLAGS_MASK) + +#define TCPHDR_ACE (TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE) #define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR) +/* State flags for sacked in struct tcp_skb_cb */ +enum tcp_skb_cb_sacked_flags { + TCPCB_SACKED_ACKED = (1 << 0), /* SKB ACK'd by a SACK block */ + TCPCB_SACKED_RETRANS = (1 << 1), /* SKB retransmitted */ + TCPCB_LOST = (1 << 2), /* SKB is lost */ + TCPCB_TAGBITS = (TCPCB_SACKED_ACKED | TCPCB_SACKED_RETRANS | + TCPCB_LOST), /* All tag bits */ + TCPCB_REPAIRED = (1 << 4), /* SKB repaired (no skb_mstamp_ns) */ + TCPCB_EVER_RETRANS = (1 << 7), /* Ever retransmitted frame */ + TCPCB_RETRANS = (TCPCB_SACKED_RETRANS | TCPCB_EVER_RETRANS | + TCPCB_REPAIRED), +}; + /* This is what the send packet queuing engine uses to pass * TCP per-packet control information to the transmission code. * We also store the host-order sequence numbers in here too. @@ -935,35 +1014,25 @@ struct tcp_skb_cb { __u32 seq; /* Starting sequence number */ __u32 end_seq; /* SEQ + FIN + SYN + datalen */ union { - /* Note : tcp_tw_isn is used in input path only - * (isn chosen by tcp_timewait_state_process()) - * + /* Note : * tcp_gso_segs/size are used in write queue only, * cf tcp_skb_pcount()/tcp_skb_mss() */ - __u32 tcp_tw_isn; struct { u16 tcp_gso_segs; u16 tcp_gso_size; }; }; - __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ + __u16 tcp_flags; /* TCP header flags (tcp[12-13])*/ __u8 sacked; /* State flags for SACK. */ -#define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */ -#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ -#define TCPCB_LOST 0x04 /* SKB is lost */ -#define TCPCB_TAGBITS 0x07 /* All tag bits */ -#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp_ns) */ -#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ -#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \ - TCPCB_REPAIRED) - __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ - __u8 txstamp_ack:1, /* Record TX timestamp for ack? */ +#define TSTAMP_ACK_SK 0x1 +#define TSTAMP_ACK_BPF 0x2 + __u8 txstamp_ack:2, /* Record TX timestamp for ack? */ eor:1, /* Is skb MSG_EOR marked? */ has_rxtstamp:1, /* SKB has a RX timestamp */ - unused:5; + unused:4; __u32 ack_seq; /* Sequence number ACK'd */ union { struct { @@ -1068,9 +1137,18 @@ static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb) static inline bool tcp_skb_can_collapse(const struct sk_buff *to, const struct sk_buff *from) { + /* skb_cmp_decrypted() not needed, use tcp_write_collapse_fence() */ return likely(tcp_skb_can_collapse_to(to) && mptcp_skb_can_collapse(to, from) && - skb_pure_zcopy_same(to, from)); + skb_pure_zcopy_same(to, from) && + skb_frags_readable(to) == skb_frags_readable(from)); +} + +static inline bool tcp_skb_can_collapse_rx(const struct sk_buff *to, + const struct sk_buff *from) +{ + return likely(mptcp_skb_can_collapse(to, from) && + !skb_cmp_decrypted(to, from)); } /* Events passed to congestion control interface */ @@ -1100,9 +1178,9 @@ enum tcp_ca_ack_event_flags { #define TCP_CA_UNSPEC 0 /* Algorithm can be set on socket without CAP_NET_ADMIN privileges */ -#define TCP_CONG_NON_RESTRICTED 0x1 +#define TCP_CONG_NON_RESTRICTED BIT(0) /* Requires ECN/ECT set on all packets */ -#define TCP_CONG_NEEDS_ECN 0x2 +#define TCP_CONG_NEEDS_ECN BIT(1) #define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) union tcp_cc_info; @@ -1167,7 +1245,7 @@ struct tcp_congestion_ops { /* call when packets are delivered to update cwnd and pacing rate, * after all the ca_state processing. (optional) */ - void (*cong_control)(struct sock *sk, const struct rate_sample *rs); + void (*cong_control)(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs); /* new value of cwnd after loss (required) */ @@ -1218,7 +1296,7 @@ extern struct tcp_congestion_ops tcp_reno; struct tcp_congestion_ops *tcp_ca_find(const char *name); struct tcp_congestion_ops *tcp_ca_find_key(u32 key); -u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca); +u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca); #ifdef CONFIG_INET char *tcp_ca_get_name_by_key(u32 key, char *buffer); #else @@ -1416,10 +1494,12 @@ static inline unsigned long tcp_pacing_delay(const struct sock *sk) static inline void tcp_reset_xmit_timer(struct sock *sk, const int what, unsigned long when, - const unsigned long max_when) + bool pace_delay) { - inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk), - max_when); + if (pace_delay) + when += tcp_pacing_delay(sk); + inet_csk_reset_xmit_timer(sk, what, when, + tcp_rto_max(sk)); } /* Something is really bad, we could not queue an additional packet, @@ -1448,7 +1528,7 @@ static inline void tcp_check_probe_timer(struct sock *sk) { if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending) tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - tcp_probe0_base(sk), TCP_RTO_MAX); + tcp_probe0_base(sk), true); } static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) @@ -1539,11 +1619,10 @@ static inline int tcp_space_from_win(const struct sock *sk, int win) return __tcp_space_from_win(tcp_sk(sk)->scaling_ratio, win); } -/* Assume a conservative default of 1200 bytes of payload per 4K page. +/* Assume a 50% default for skb->len/skb->truesize ratio. * This may be adjusted later in tcp_measure_rcv_mss(). */ -#define TCP_DEFAULT_SCALING_RATIO ((1200 << TCP_RMEM_TO_WIN_SCALE) / \ - SKB_TRUESIZE(4096)) +#define TCP_DEFAULT_SCALING_RATIO (1 << (TCP_RMEM_TO_WIN_SCALE - 1)) static inline void tcp_scaling_ratio_init(struct sock *sk) { @@ -1811,7 +1890,7 @@ int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp, * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() * @c: returned tcp_sigpool for usage (uninitialized on failure) * - * Returns 0 on success, error otherwise. + * Returns: 0 on success, error otherwise. */ int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c); /** @@ -1858,12 +1937,6 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk, return __tcp_md5_do_lookup(sk, 0, addr, family, true); } -enum skb_drop_reason -tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, - const void *saddr, const void *daddr, - int family, int l3index, const __u8 *hash_location); - - #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) #else static inline struct tcp_md5sig_key * @@ -1880,13 +1953,6 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk, return NULL; } -static inline enum skb_drop_reason -tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, - const void *saddr, const void *daddr, - int family, int l3index, const __u8 *hash_location) -{ - return SKB_NOT_DROPPED_YET; -} #define tcp_twsk_md5_key(twsk) NULL #endif @@ -2098,6 +2164,14 @@ static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct soc tcp_wmem_free_skb(sk, skb); } +static inline void tcp_write_collapse_fence(struct sock *sk) +{ + struct sk_buff *skb = tcp_write_queue_tail(sk); + + if (skb) + TCP_SKB_CB(skb)->eor = 1; +} + static inline void tcp_push_pending_frames(struct sock *sk) { if (tcp_send_head(sk)) { @@ -2195,7 +2269,10 @@ void tcp_v4_destroy_sock(struct sock *sk); struct sk_buff *tcp_gso_segment(struct sk_buff *skb, netdev_features_t features); -struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb); +struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb); +struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th); +struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, + struct tcphdr *th); INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); @@ -2284,7 +2361,8 @@ struct tcp_request_sock_ops { struct dst_entry *(*route_req)(const struct sock *sk, struct sk_buff *skb, struct flowi *fl, - struct request_sock *req); + struct request_sock *req, + u32 tw_isn); u32 (*init_seq)(const struct sk_buff *skb); u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, @@ -2369,21 +2447,15 @@ static inline void tcp_get_current_key(const struct sock *sk, static inline bool tcp_key_is_md5(const struct tcp_key *key) { -#ifdef CONFIG_TCP_MD5SIG - if (static_branch_unlikely(&tcp_md5_needed.key) && - key->type == TCP_KEY_MD5) - return true; -#endif + if (static_branch_tcp_md5()) + return key->type == TCP_KEY_MD5; return false; } static inline bool tcp_key_is_ao(const struct tcp_key *key) { -#ifdef CONFIG_TCP_AO - if (static_branch_unlikely(&tcp_ao_needed.key) && - key->type == TCP_KEY_AO) - return true; -#endif + if (static_branch_tcp_ao()) + return key->type == TCP_KEY_AO; return false; } @@ -2431,14 +2503,35 @@ void tcp_plb_update_state(const struct sock *sk, struct tcp_plb_state *plb, void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb); void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb); +static inline void tcp_warn_once(const struct sock *sk, bool cond, const char *str) +{ + WARN_ONCE(cond, + "%scwn:%u out:%u sacked:%u lost:%u retrans:%u tlp_high_seq:%u sk_state:%u ca_state:%u advmss:%u mss_cache:%u pmtu:%u\n", + str, + tcp_snd_cwnd(tcp_sk(sk)), + tcp_sk(sk)->packets_out, tcp_sk(sk)->sacked_out, + tcp_sk(sk)->lost_out, tcp_sk(sk)->retrans_out, + tcp_sk(sk)->tlp_high_seq, sk->sk_state, + inet_csk(sk)->icsk_ca_state, + tcp_sk(sk)->advmss, tcp_sk(sk)->mss_cache, + inet_csk(sk)->icsk_pmtu_cookie); +} + /* At how many usecs into the future should the RTO fire? */ static inline s64 tcp_rto_delta_us(const struct sock *sk) { const struct sk_buff *skb = tcp_rtx_queue_head(sk); u32 rto = inet_csk(sk)->icsk_rto; - u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto); - return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; + if (likely(skb)) { + u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto); + + return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; + } else { + tcp_warn_once(sk, 1, "rtx queue empty: "); + return jiffies_to_usecs(rto); + } + } /* @@ -2551,8 +2644,8 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); /* diagnostic */ - int (*get_info)(struct sock *sk, struct sk_buff *skb); - size_t (*get_info_size)(const struct sock *sk); + int (*get_info)(struct sock *sk, struct sk_buff *skb, bool net_admin); + size_t (*get_info_size)(const struct sock *sk, bool net_admin); /* clone ulp */ void (*clone)(const struct request_sock *req, struct sock *newsk, const gfp_t priority); @@ -2579,6 +2672,11 @@ struct sk_psock; #ifdef CONFIG_BPF_SYSCALL int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); +#ifdef CONFIG_BPF_STREAM_PARSER +struct strparser; +int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc, + sk_read_actor_t recv_actor); +#endif /* CONFIG_BPF_STREAM_PARSER */ #endif /* CONFIG_BPF_SYSCALL */ #ifdef CONFIG_INET @@ -2629,6 +2727,7 @@ static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp)); if (sk_fullsock(sk)) { sock_ops.is_fullsock = 1; + sock_ops.is_locked_tcp_sock = 1; sock_owned_by_me(sk); } @@ -2706,10 +2805,10 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1); } -static inline void tcp_bpf_rtt(struct sock *sk) +static inline void tcp_bpf_rtt(struct sock *sk, long mrtt, u32 srtt) { if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_RTT_CB_FLAG)) - tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL); + tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_RTT_CB, mrtt, srtt); } #if IS_ENABLED(CONFIG_SMC) @@ -2717,9 +2816,9 @@ extern struct static_key_false tcp_have_smc; #endif #if IS_ENABLED(CONFIG_TLS_DEVICE) -void clean_acked_data_enable(struct inet_connection_sock *icsk, +void clean_acked_data_enable(struct tcp_sock *tp, void (*cad)(struct sock *sk, u32 ack_seq)); -void clean_acked_data_disable(struct inet_connection_sock *icsk); +void clean_acked_data_disable(struct tcp_sock *tp); void clean_acked_data_flush(void); #endif @@ -2795,66 +2894,9 @@ static inline bool tcp_ao_required(struct sock *sk, const void *saddr, return false; } -/* Called with rcu_read_lock() */ -static inline enum skb_drop_reason -tcp_inbound_hash(struct sock *sk, const struct request_sock *req, - const struct sk_buff *skb, - const void *saddr, const void *daddr, - int family, int dif, int sdif) -{ - const struct tcphdr *th = tcp_hdr(skb); - const struct tcp_ao_hdr *aoh; - const __u8 *md5_location; - int l3index; - - /* Invalid option or two times meet any of auth options */ - if (tcp_parse_auth_options(th, &md5_location, &aoh)) { - tcp_hash_fail("TCP segment has incorrect auth options set", - family, skb, ""); - return SKB_DROP_REASON_TCP_AUTH_HDR; - } - - if (req) { - if (tcp_rsk_used_ao(req) != !!aoh) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); - tcp_hash_fail("TCP connection can't start/end using TCP-AO", - family, skb, "%s", - !aoh ? "missing AO" : "AO signed"); - return SKB_DROP_REASON_TCP_AOFAILURE; - } - } - - /* sdif set, means packet ingressed via a device - * in an L3 domain and dif is set to the l3mdev - */ - l3index = sdif ? dif : 0; - - /* Fast path: unsigned segments */ - if (likely(!md5_location && !aoh)) { - /* Drop if there's TCP-MD5 or TCP-AO key with any rcvid/sndid - * for the remote peer. On TCP-AO established connection - * the last key is impossible to remove, so there's - * always at least one current_key. - */ - if (tcp_ao_required(sk, saddr, family, l3index, true)) { - tcp_hash_fail("AO hash is required, but not found", - family, skb, "L3 index %d", l3index); - return SKB_DROP_REASON_TCP_AONOTFOUND; - } - if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - tcp_hash_fail("MD5 Hash not found", - family, skb, "L3 index %d", l3index); - return SKB_DROP_REASON_TCP_MD5NOTFOUND; - } - return SKB_NOT_DROPPED_YET; - } - - if (aoh) - return tcp_inbound_ao_hash(sk, skb, family, req, l3index, aoh); - - return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family, - l3index, md5_location); -} +enum skb_drop_reason tcp_inbound_hash(struct sock *sk, + const struct request_sock *req, const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif); #endif /* _TCP_H */ |