From 5000b28b0b1a34144b39376318cafb8c2a0f79fd Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 10 Dec 2019 02:41:48 +0000 Subject: tcp: Cleanup duplicate initialization of sk->sk_state. When a TCP socket is created, sk->sk_state is initialized twice as TCP_CLOSE in sock_init_data() and tcp_init_sock(). The tcp_init_sock() is always called after the sock_init_data(), so it is not necessary to update sk->sk_state in the tcp_init_sock(). Before v2.1.8, the code of the two functions was in the inet_create(). In the patch of v2.1.8, the tcp_v4/v6_init_sock() were added and the code of initialization of sk->state was duplicated. Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8a39ee794891..09e2cae92956 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -443,8 +443,6 @@ void tcp_init_sock(struct sock *sk) tp->tsoffset = 0; tp->rack.reo_wnd_steps = 1; - sk->sk_state = TCP_CLOSE; - sk->sk_write_space = sk_stream_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); -- cgit v1.2.3-59-g8ed1b From 0e627190563e8be6fc9c2cc2a8a3c14bd961754c Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Sun, 15 Dec 2019 11:54:51 -0800 Subject: tcp: Set rcv zerocopy hint correctly if skb last frag is < PAGE_SIZE At present, if the last frag of paged data in a skb has < PAGE_SIZE data, we compute the recv_skip_hint as being equal to the size of that frag and the entire next skb. Instead, just return the runt frag size as the hint. recv_skip_hint is used by the application to skip over bytes that can not be mmaped, so returning a too big chunk is pessimistic and forces more bytes to be copied. Signed-off-by: Arjun Roy Acked-by: Soheil Hassas Yeganeh Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 09e2cae92956..93fe77c5b02d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1778,6 +1778,8 @@ static int tcp_zerocopy_receive(struct sock *sk, while (length + PAGE_SIZE <= zc->length) { if (zc->recv_skip_hint < PAGE_SIZE) { if (skb) { + if (zc->recv_skip_hint > 0) + break; skb = skb->next; offset = seq - TCP_SKB_CB(skb)->seq; } else { -- cgit v1.2.3-59-g8ed1b From 35b2c32116091ef87a15c604cac363da8322a288 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Thu, 9 Jan 2020 07:59:21 -0800 Subject: tcp: Export TCP functions and ops struct MPTCP will make use of tcp_send_mss() and tcp_push() when sending data to specific TCP subflows. tcp_request_sock_ipvX_ops and ipvX_specific will be referenced during TCP subflow creation. Co-developed-by: Peter Krystad Signed-off-by: Peter Krystad Reviewed-by: Eric Dumazet Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/net/tcp.h | 8 ++++++++ net/ipv4/tcp.c | 6 +++--- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 6 +++--- 4 files changed, 15 insertions(+), 7 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 13bc83fab454..5e4133d09b9d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -330,6 +330,9 @@ int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags); ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, size_t size, int flags); +int tcp_send_mss(struct sock *sk, int *size_goal, int flags); +void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, + int size_goal); void tcp_release_cb(struct sock *sk); void tcp_wfree(struct sk_buff *skb); void tcp_write_timer_handler(struct sock *sk); @@ -2011,6 +2014,11 @@ struct tcp_request_sock_ops { enum tcp_synack_type synack_type); }; +extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops; +#if IS_ENABLED(CONFIG_IPV6) +extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops; +#endif + #ifdef CONFIG_SYN_COOKIES static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, const struct sock *sk, struct sk_buff *skb, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f09fbc85b108..6711a97de3ce 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -690,8 +690,8 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, refcount_read(&sk->sk_wmem_alloc) > skb->truesize; } -static void tcp_push(struct sock *sk, int flags, int mss_now, - int nonagle, int size_goal) +void tcp_push(struct sock *sk, int flags, int mss_now, + int nonagle, int size_goal) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -925,7 +925,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, return max(size_goal, mss_now); } -static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) +int tcp_send_mss(struct sock *sk, int *size_goal, int flags) { int mss_now; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4adac9c75343..fedb537839ec 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1426,7 +1426,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { .syn_ack_timeout = tcp_syn_ack_timeout, }; -static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { +const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .mss_clamp = TCP_MSS_DEFAULT, #ifdef CONFIG_TCP_MD5SIG .req_md5_lookup = tcp_v4_md5_lookup, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 95e4e1e95db2..5b5260103b65 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -75,7 +75,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static const struct inet_connection_sock_af_ops ipv6_mapped; -static const struct inet_connection_sock_af_ops ipv6_specific; +const struct inet_connection_sock_af_ops ipv6_specific; #ifdef CONFIG_TCP_MD5SIG static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; @@ -819,7 +819,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .syn_ack_timeout = tcp_syn_ack_timeout, }; -static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { +const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr), #ifdef CONFIG_TCP_MD5SIG @@ -1794,7 +1794,7 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_destructor = tcp_twsk_destructor, }; -static const struct inet_connection_sock_af_ops ipv6_specific = { +const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, -- cgit v1.2.3-59-g8ed1b From f870fa0b5768842cb4690c1c11f19f28b731ae6d Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 21 Jan 2020 16:56:15 -0800 Subject: mptcp: Add MPTCP socket stubs Implements the infrastructure for MPTCP sockets. MPTCP sockets open one in-kernel TCP socket per subflow. These subflow sockets are only managed by the MPTCP socket that owns them and are not visible from userspace. This commit allows a userspace program to open an MPTCP socket with: sock = socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP); The resulting socket is simply a wrapper around a single regular TCP socket, without any of the MPTCP protocol implemented over the wire. Co-developed-by: Florian Westphal Signed-off-by: Florian Westphal Co-developed-by: Peter Krystad Signed-off-by: Peter Krystad Co-developed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- MAINTAINERS | 1 + include/net/mptcp.h | 16 ++++++ net/Kconfig | 1 + net/Makefile | 1 + net/ipv4/tcp.c | 2 + net/ipv6/tcp_ipv6.c | 7 +++ net/mptcp/Kconfig | 16 ++++++ net/mptcp/Makefile | 4 ++ net/mptcp/protocol.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++ net/mptcp/protocol.h | 22 ++++++++ 10 files changed, 212 insertions(+) create mode 100644 net/mptcp/Kconfig create mode 100644 net/mptcp/Makefile create mode 100644 net/mptcp/protocol.c create mode 100644 net/mptcp/protocol.h (limited to 'net/ipv4/tcp.c') diff --git a/MAINTAINERS b/MAINTAINERS index 702382b89c37..4be9c1b50183 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11583,6 +11583,7 @@ W: https://github.com/multipath-tcp/mptcp_net-next/wiki B: https://github.com/multipath-tcp/mptcp_net-next/issues S: Maintained F: include/net/mptcp.h +F: net/mptcp/ NETWORKING [TCP] M: Eric Dumazet diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 0573ae75c3db..98ba22379117 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -28,6 +28,8 @@ struct mptcp_ext { #ifdef CONFIG_MPTCP +void mptcp_init(void); + /* move the skb extension owership, with the assumption that 'to' is * newly allocated */ @@ -70,6 +72,10 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to, #else +static inline void mptcp_init(void) +{ +} + static inline void mptcp_skb_ext_move(struct sk_buff *to, const struct sk_buff *from) { @@ -82,4 +88,14 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to, } #endif /* CONFIG_MPTCP */ + +#if IS_ENABLED(CONFIG_MPTCP_IPV6) +int mptcpv6_init(void); +#elif IS_ENABLED(CONFIG_IPV6) +static inline int mptcpv6_init(void) +{ + return 0; +} +#endif + #endif /* __NET_MPTCP_H */ diff --git a/net/Kconfig b/net/Kconfig index 54916b7adb9b..b0937a700f01 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -91,6 +91,7 @@ if INET source "net/ipv4/Kconfig" source "net/ipv6/Kconfig" source "net/netlabel/Kconfig" +source "net/mptcp/Kconfig" endif # if INET diff --git a/net/Makefile b/net/Makefile index 848303d98d3d..07ea48160874 100644 --- a/net/Makefile +++ b/net/Makefile @@ -87,3 +87,4 @@ endif obj-$(CONFIG_QRTR) += qrtr/ obj-$(CONFIG_NET_NCSI) += ncsi/ obj-$(CONFIG_XDP_SOCKETS) += xdp/ +obj-$(CONFIG_MPTCP) += mptcp/ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6711a97de3ce..7dfb78caedaf 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -271,6 +271,7 @@ #include #include #include +#include #include #include #include @@ -4021,4 +4022,5 @@ void __init tcp_init(void) tcp_metrics_init(); BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0); tcp_tasklet_init(); + mptcp_init(); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5b5260103b65..60068ffde1d9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2163,9 +2163,16 @@ int __init tcpv6_init(void) ret = register_pernet_subsys(&tcpv6_net_ops); if (ret) goto out_tcpv6_protosw; + + ret = mptcpv6_init(); + if (ret) + goto out_tcpv6_pernet_subsys; + out: return ret; +out_tcpv6_pernet_subsys: + unregister_pernet_subsys(&tcpv6_net_ops); out_tcpv6_protosw: inet6_unregister_protosw(&tcpv6_protosw); out_tcpv6_protocol: diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig new file mode 100644 index 000000000000..c1a99f07a4cd --- /dev/null +++ b/net/mptcp/Kconfig @@ -0,0 +1,16 @@ + +config MPTCP + bool "MPTCP: Multipath TCP" + depends on INET + select SKB_EXTENSIONS + help + Multipath TCP (MPTCP) connections send and receive data over multiple + subflows in order to utilize multiple network paths. Each subflow + uses the TCP protocol, and TCP options carry header information for + MPTCP. + +config MPTCP_IPV6 + bool "MPTCP: IPv6 support for Multipath TCP" + depends on MPTCP + select IPV6 + default y diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile new file mode 100644 index 000000000000..659129d1fcbf --- /dev/null +++ b/net/mptcp/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_MPTCP) += mptcp.o + +mptcp-y := protocol.o diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c new file mode 100644 index 000000000000..5e24e7cf7d70 --- /dev/null +++ b/net/mptcp/protocol.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Multipath TCP + * + * Copyright (c) 2017 - 2019, Intel Corporation. + */ + +#define pr_fmt(fmt) "MPTCP: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "protocol.h" + +static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + struct socket *subflow = msk->subflow; + + if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) + return -EOPNOTSUPP; + + return sock_sendmsg(subflow, msg); +} + +static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int nonblock, int flags, int *addr_len) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + struct socket *subflow = msk->subflow; + + if (msg->msg_flags & ~(MSG_WAITALL | MSG_DONTWAIT)) + return -EOPNOTSUPP; + + return sock_recvmsg(subflow, msg, flags); +} + +static int mptcp_init_sock(struct sock *sk) +{ + return 0; +} + +static void mptcp_close(struct sock *sk, long timeout) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + inet_sk_state_store(sk, TCP_CLOSE); + + if (msk->subflow) { + pr_debug("subflow=%p", msk->subflow->sk); + sock_release(msk->subflow); + } + + sock_orphan(sk); + sock_put(sk); +} + +static int mptcp_connect(struct sock *sk, struct sockaddr *saddr, int len) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + int err; + + saddr->sa_family = AF_INET; + + pr_debug("msk=%p, subflow=%p", msk, msk->subflow->sk); + + err = kernel_connect(msk->subflow, saddr, len, 0); + + sk->sk_state = TCP_ESTABLISHED; + + return err; +} + +static struct proto mptcp_prot = { + .name = "MPTCP", + .owner = THIS_MODULE, + .init = mptcp_init_sock, + .close = mptcp_close, + .accept = inet_csk_accept, + .connect = mptcp_connect, + .shutdown = tcp_shutdown, + .sendmsg = mptcp_sendmsg, + .recvmsg = mptcp_recvmsg, + .hash = inet_hash, + .unhash = inet_unhash, + .get_port = inet_csk_get_port, + .obj_size = sizeof(struct mptcp_sock), + .no_autobind = true, +}; + +static struct inet_protosw mptcp_protosw = { + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + .prot = &mptcp_prot, + .ops = &inet_stream_ops, +}; + +void __init mptcp_init(void) +{ + if (proto_register(&mptcp_prot, 1) != 0) + panic("Failed to register MPTCP proto.\n"); + + inet_register_protosw(&mptcp_protosw); +} + +#if IS_ENABLED(CONFIG_MPTCP_IPV6) +static struct proto mptcp_v6_prot; + +static struct inet_protosw mptcp_v6_protosw = { + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + .prot = &mptcp_v6_prot, + .ops = &inet6_stream_ops, + .flags = INET_PROTOSW_ICSK, +}; + +int mptcpv6_init(void) +{ + int err; + + mptcp_v6_prot = mptcp_prot; + strcpy(mptcp_v6_prot.name, "MPTCPv6"); + mptcp_v6_prot.slab = NULL; + mptcp_v6_prot.obj_size = sizeof(struct mptcp_sock) + + sizeof(struct ipv6_pinfo); + + err = proto_register(&mptcp_v6_prot, 1); + if (err) + return err; + + err = inet6_register_protosw(&mptcp_v6_protosw); + if (err) + proto_unregister(&mptcp_v6_prot); + + return err; +} +#endif diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h new file mode 100644 index 000000000000..ee04a01bffd3 --- /dev/null +++ b/net/mptcp/protocol.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Multipath TCP + * + * Copyright (c) 2017 - 2019, Intel Corporation. + */ + +#ifndef __MPTCP_PROTOCOL_H +#define __MPTCP_PROTOCOL_H + +/* MPTCP connection sock */ +struct mptcp_sock { + /* inet_connection_sock must be the first member */ + struct inet_connection_sock sk; + struct socket *subflow; /* outgoing connect/listener/!mp_capable */ +}; + +static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) +{ + return (struct mptcp_sock *)sk; +} + +#endif /* __MPTCP_PROTOCOL_H */ -- cgit v1.2.3-59-g8ed1b From 32efcc06d2a15fa87585614d12d6c2308cc2d3f3 Mon Sep 17 00:00:00 2001 From: Abdul Kabbani Date: Fri, 24 Jan 2020 16:34:02 -0500 Subject: tcp: export count for rehash attempts Using IPv6 flow-label to swiftly route around avoid congested or disconnected network path can greatly improve TCP reliability. This patch adds SNMP counters and a OPT_STATS counter to track both host-level and connection-level statistics. Network administrators can use these counters to evaluate the impact of this new ability better. Export count for rehash attempts to 1) two SNMP counters: TcpTimeoutRehash (rehash due to timeouts), and TcpDuplicateDataRehash (rehash due to receiving duplicate packets) 2) Timestamping API SOF_TIMESTAMPING_OPT_STATS. Signed-off-by: Abdul Kabbani Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Kevin(Yudong) Yang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 ++ include/uapi/linux/snmp.h | 2 ++ include/uapi/linux/tcp.h | 1 + net/ipv4/proc.c | 2 ++ net/ipv4/tcp.c | 2 ++ net/ipv4/tcp_input.c | 4 +++- net/ipv4/tcp_timer.c | 6 ++++++ 7 files changed, 18 insertions(+), 1 deletion(-) (limited to 'net/ipv4/tcp.c') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4e2124607d32..1cf73e6f85ca 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -386,6 +386,8 @@ struct tcp_sock { #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0 #endif + u16 timeout_rehash; /* Timeout-triggered rehash attempts */ + u32 rcv_ooopack; /* Received out-of-order packets, for tcpinfo */ /* Receiver side RTT estimation */ diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 7eee233e78d2..7d91f4debc48 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -285,6 +285,8 @@ enum LINUX_MIB_TCPRCVQDROP, /* TCPRcvQDrop */ LINUX_MIB_TCPWQUEUETOOBIG, /* TCPWqueueTooBig */ LINUX_MIB_TCPFASTOPENPASSIVEALTKEY, /* TCPFastOpenPassiveAltKey */ + LINUX_MIB_TCPTIMEOUTREHASH, /* TCPTimeoutRehash */ + LINUX_MIB_TCPDUPLICATEDATAREHASH, /* TCPDuplicateDataRehash */ __LINUX_MIB_MAX }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index d87184e673ca..fd9eb8f6bcae 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -311,6 +311,7 @@ enum { TCP_NLA_DSACK_DUPS, /* DSACK blocks received */ TCP_NLA_REORD_SEEN, /* reordering events seen */ TCP_NLA_SRTT, /* smoothed RTT in usecs */ + TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */ }; /* for TCP_MD5SIG socket option */ diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index cc90243ccf76..2580303249e2 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -289,6 +289,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP), SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG), SNMP_MIB_ITEM("TCPFastOpenPassiveAltKey", LINUX_MIB_TCPFASTOPENPASSIVEALTKEY), + SNMP_MIB_ITEM("TcpTimeoutRehash", LINUX_MIB_TCPTIMEOUTREHASH), + SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2857c853e2fd..484485ae74c2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3337,6 +3337,7 @@ static size_t tcp_opt_stats_get_size(void) nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */ nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */ nla_total_size(sizeof(u32)) + /* TCP_NLA_SRTT */ + nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */ 0; } @@ -3391,6 +3392,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups); nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen); nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3); + nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash); return stats; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4915de65d278..e8b840a4767e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4271,8 +4271,10 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb) * The receiver remembers and reflects via DSACKs. Leverage the * DSACK state and change the txhash to re-route speculatively. */ - if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq) + if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq) { sk_rethink_txhash(sk); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH); + } } static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 1097b438befe..c3f26dcd6704 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -223,6 +223,9 @@ static int tcp_write_timeout(struct sock *sk) dst_negative_advice(sk); } else { sk_rethink_txhash(sk); + tp->timeout_rehash++; + __NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPTIMEOUTREHASH); } retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; expired = icsk->icsk_retransmits >= retry_until; @@ -234,6 +237,9 @@ static int tcp_write_timeout(struct sock *sk) dst_negative_advice(sk); } else { sk_rethink_txhash(sk); + tp->timeout_rehash++; + __NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPTIMEOUTREHASH); } retry_until = net->ipv4.sysctl_tcp_retries2; -- cgit v1.2.3-59-g8ed1b