aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2020-11-20 18:10:47 -0800
committerJakub Kicinski <kuba@kernel.org>2020-11-20 18:10:47 -0800
commite10823c71920c6fd54ab85677f001d7978bcb3ae (patch)
tree6af6fa8f6e4c2259411d963bd709f35014284342
parentdpaa2-eth: select XGMAC_MDIO for MDIO bus support (diff)
parenttcp: Set INET_ECN_xmit configuration in tcp_reinit_congestion_control (diff)
downloadlinux-dev-e10823c71920c6fd54ab85677f001d7978bcb3ae.tar.xz
linux-dev-e10823c71920c6fd54ab85677f001d7978bcb3ae.zip
Merge branch 'tcp-address-issues-with-ect0-not-being-set-in-dctcp-packets'
Alexander Duyck says: ==================== tcp: Address issues with ECT0 not being set in DCTCP packets This patch set is meant to address issues seen with SYN/ACK packets not containing the ECT0 bit when DCTCP is configured as the congestion control algorithm for a TCP socket. A simple test using "tcpdump" and "test_progs -t bpf_tcp_ca" makes the issue obvious. Looking at the packets will result in the SYN/ACK packet with an ECT0 bit that does not match the other packets for the flow when the congestion control agorithm is switch from the default. So for example going from non-DCTCP to a DCTCP congestion control algorithm we will see the SYN/ACK IPV6 header will not have ECT0 set while the other packets in the flow will. Likewise if we switch from a default of DCTCP to cubic we will see the ECT0 bit set in the SYN/ACK while the other packets in the flow will not. ==================== Link: https://lore.kernel.org/r/160582070138.66684.11785214534154816097.stgit@localhost.localdomain Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--net/ipv4/tcp_cong.c5
-rw-r--r--net/ipv4/tcp_ipv4.c5
-rw-r--r--net/ipv6/tcp_ipv6.c6
3 files changed, 11 insertions, 5 deletions
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index db47ac24d057..563d016e7478 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -198,6 +198,11 @@ static void tcp_reinit_congestion_control(struct sock *sk,
icsk->icsk_ca_setsockopt = 1;
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
+ if (ca->flags & TCP_CONG_NEEDS_ECN)
+ INET_ECN_xmit(sk);
+ else
+ INET_ECN_dontxmit(sk);
+
if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
tcp_init_congestion_control(sk);
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7352c097ae48..25d35a56ceba 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -981,7 +981,8 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
- tcp_rsk(req)->syn_tos : inet_sk(sk)->tos;
+ tcp_rsk(req)->syn_tos & ~INET_ECN_MASK :
+ inet_sk(sk)->tos;
if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
@@ -990,7 +991,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
rcu_dereference(ireq->ireq_opt),
- tos & ~INET_ECN_MASK);
+ tos);
rcu_read_unlock();
err = net_xmit_eval(err);
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8db59f4e5f13..3d49e8d0afee 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -530,12 +530,12 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
rcu_read_lock();
opt = ireq->ipv6_opt;
tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
- tcp_rsk(req)->syn_tos : np->tclass;
+ tcp_rsk(req)->syn_tos & ~INET_ECN_MASK :
+ np->tclass;
if (!opt)
opt = rcu_dereference(np->opt);
err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt,
- tclass & ~INET_ECN_MASK,
- sk->sk_priority);
+ tclass, sk->sk_priority);
rcu_read_unlock();
err = net_xmit_eval(err);
}