From bec41a11dd3dc8c54f766b4f494140ca92ba7c10 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 12 Jan 2017 22:11:39 -0800 Subject: tcp: remove early retransmit This patch removes the support of RFC5827 early retransmit (i.e., fast recovery on small inflight with <3 dupacks) because it is subsumed by the new RACK loss detection. More specifically when RACK receives DUPACKs, it'll arm a reordering timer to start fast recovery after a quarter of (min)RTT, hence it covers the early retransmit except RACK does not limit itself to specific inflight or dupack numbers. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 1 - net/ipv4/tcp.c | 3 --- net/ipv4/tcp_input.c | 60 ++---------------------------------------------- net/ipv4/tcp_ipv4.c | 1 - net/ipv4/tcp_metrics.c | 1 - net/ipv4/tcp_minisocks.c | 1 - net/ipv4/tcp_output.c | 11 ++++----- net/ipv4/tcp_timer.c | 3 --- 8 files changed, 6 insertions(+), 75 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index d216e40623d3..3828b3a805cd 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -215,7 +215,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, } if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { r->idiag_timer = 1; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c8d46c140b4a..d9023e8ed53e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -406,7 +406,6 @@ void tcp_init_sock(struct sock *sk) tp->mss_cache = TCP_MSS_DEFAULT; tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; - tcp_enable_early_retrans(tp); tcp_assign_congestion_control(sk); tp->tsoffset = 0; @@ -2477,8 +2476,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level, err = -EINVAL; else { tp->thin_dupack = val; - if (tp->thin_dupack) - tcp_disable_early_retrans(tp); } break; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a041a92348ee..79c819077a59 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -904,8 +904,6 @@ static void tcp_update_reordering(struct sock *sk, const int metric, tcp_disable_fack(tp); } - if (metric > 0) - tcp_disable_early_retrans(tp); tp->rack.reord = 1; } @@ -2054,30 +2052,6 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; } -static bool tcp_pause_early_retransmit(struct sock *sk, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - unsigned long delay; - - /* Delay early retransmit and entering fast recovery for - * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples - * available, or RTO is scheduled to fire first. - */ - if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 || - (flag & FLAG_ECE) || !tp->srtt_us) - return false; - - delay = max(usecs_to_jiffies(tp->srtt_us >> 5), - msecs_to_jiffies(2)); - - if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) - return false; - - inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay, - TCP_RTO_MAX); - return true; -} - /* Linux NewReno/SACK/FACK/ECN state machine. * -------------------------------------- * @@ -2221,16 +2195,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) tcp_is_sack(tp) && !tcp_send_head(sk)) return true; - /* Trick#6: TCP early retransmit, per RFC5827. To avoid spurious - * retransmissions due to small network reorderings, we implement - * Mitigation A.3 in the RFC and delay the retransmission for a short - * interval if appropriate. - */ - if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && - (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) && - !tcp_may_send_now(sk)) - return !tcp_pause_early_retransmit(sk, flag); - return false; } @@ -3050,8 +3014,7 @@ void tcp_rearm_rto(struct sock *sk) } else { u32 rto = inet_csk(sk)->icsk_rto; /* Offset the time elapsed after installing regular RTO */ - if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || - icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || + if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { struct sk_buff *skb = tcp_write_queue_head(sk); const u32 rto_time_stamp = @@ -3068,24 +3031,6 @@ void tcp_rearm_rto(struct sock *sk) } } -/* This function is called when the delayed ER timer fires. TCP enters - * fast recovery and performs fast-retransmit. - */ -void tcp_resume_early_retransmit(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - tcp_rearm_rto(sk); - - /* Stop if ER is disabled after the delayed ER timer is scheduled */ - if (!tp->do_early_retrans) - return; - - tcp_enter_recovery(sk, false); - tcp_update_scoreboard(sk, 1); - tcp_xmit_retransmit_queue(sk); -} - /* If we get here, the whole TSO packet has not been acked. */ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) { @@ -3651,8 +3596,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) skb_mstamp_get(&sack_state.ack_time); - if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || - icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) + if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); if (after(ack, prior_snd_una)) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ebf3e0c4967a..63214136cf1c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2229,7 +2229,6 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) int state; if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index ba8f02d0f283..b9ed0d50aead 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -522,7 +522,6 @@ void tcp_init_metrics(struct sock *sk) val = tcp_metric_get(tm, TCP_METRIC_REORDERING); if (val && tp->reordering != val) { tcp_disable_fack(tp); - tcp_disable_early_retrans(tp); tp->reordering = val; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 06fde26a82b7..bdb443471c39 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -468,7 +468,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->sacked_out = 0; newtp->fackets_out = 0; newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; - tcp_enable_early_retrans(newtp); newtp->tlp_high_seq = 0; newtp->lsndtime = treq->snt_synack.stamp_jiffies; newsk->sk_txhash = treq->txhash; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6327e4d368a4..9a1a1494b9dd 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -76,10 +76,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; tp->packets_out += tcp_skb_pcount(skb); - if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || - icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { + if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); - } NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT, tcp_skb_pcount(skb)); @@ -2289,8 +2287,6 @@ bool tcp_schedule_loss_probe(struct sock *sk) u32 timeout, tlp_time_stamp, rto_time_stamp; u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); - if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) - return false; /* No consecutive loss probes. */ if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { tcp_rearm_rto(sk); @@ -2309,8 +2305,9 @@ bool tcp_schedule_loss_probe(struct sock *sk) /* Schedule a loss probe in 2*RTT for SACK capable connections * in Open state, that are either limited by cwnd or application. */ - if (sysctl_tcp_early_retrans < 3 || !tp->packets_out || - !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) + if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) || + !tp->packets_out || !tcp_is_sack(tp) || + icsk->icsk_ca_state != TCP_CA_Open) return false; if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 953c02a8566e..40d893556e67 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -566,9 +566,6 @@ void tcp_write_timer_handler(struct sock *sk) case ICSK_TIME_REO_TIMEOUT: tcp_rack_reo_timeout(sk); break; - case ICSK_TIME_EARLY_RETRANS: - tcp_resume_early_retransmit(sk); - break; case ICSK_TIME_LOSS_PROBE: tcp_send_loss_probe(sk); break; -- cgit v1.2.3-59-g8ed1b