aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2018-08-29 14:53:56 -0700
committerDavid S. Miller <davem@davemloft.net>2018-08-31 23:03:00 -0700
commit7788174e8726c309d8bfd8aeca743cefd6943616 (patch)
treec22fbe4b578d94cf9e813fa8831a3bfb798717e8 /net
parentnet_sched: add missing tcf_lock for act_connmark (diff)
downloadlinux-dev-7788174e8726c309d8bfd8aeca743cefd6943616.tar.xz
linux-dev-7788174e8726c309d8bfd8aeca743cefd6943616.zip
tcp: change IPv6 flow-label upon receiving spurious retransmission
Currently a Linux IPv6 TCP sender will change the flow label upon timeouts to potentially steer away from a data path that has gone bad. However this does not help if the problem is on the ACK path and the data path is healthy. In this case the receiver is likely to receive repeated spurious retransmission because the sender couldn't get the ACKs in time and has recurring timeouts. This patch adds another feature to mitigate this problem. It leverages the DSACK states in the receiver to change the flow label of the ACKs to speculatively re-route the ACK packets. In order to allow triggering on the second consecutive spurious RTO, the receiver changes the flow label upon sending a second consecutive DSACK for a sequence number below RCV.NXT. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_input.c13
2 files changed, 15 insertions, 0 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b8af2fec5ad5..8c4235c098fd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2595,6 +2595,8 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->compressed_ack = 0;
tp->bytes_sent = 0;
tp->bytes_retrans = 0;
+ tp->duplicate_sack[0].start_seq = 0;
+ tp->duplicate_sack[0].end_seq = 0;
tp->dsack_dups = 0;
tp->reord_seen = 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4c2dd9f863f7..62508a2f9b21 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4199,6 +4199,17 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
}
+static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
+{
+ /* When the ACK path fails or drops most ACKs, the sender would
+ * timeout and spuriously retransmit the same segment repeatedly.
+ * The receiver remembers and reflects via DSACKs. Leverage the
+ * DSACK state and change the txhash to re-route speculatively.
+ */
+ if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq)
+ sk_rethink_txhash(sk);
+}
+
static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -4211,6 +4222,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+ tcp_rcv_spurious_retrans(sk, skb);
if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
end_seq = tp->rcv_nxt;
tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
@@ -4755,6 +4767,7 @@ queue_and_out:
}
if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
+ tcp_rcv_spurious_retrans(sk, skb);
/* A retransmit, 2nd most common case. Force an immediate ack. */
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);