aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2019-12-23 12:27:53 -0800
committerDavid S. Miller <davem@davemloft.net>2019-12-27 16:29:14 -0800
commit42f3a8aaae66d31d87850fb4b02979a0fc5dc541 (patch)
treecc2f39bd39273825a510e8267ac269915b0a16df /net/ipv4
parenttcp_cubic: switch bictcp_clock() to usec resolution (diff)
downloadlinux-dev-42f3a8aaae66d31d87850fb4b02979a0fc5dc541.tar.xz
linux-dev-42f3a8aaae66d31d87850fb4b02979a0fc5dc541.zip
tcp_cubic: tweak Hystart detection for short RTT flows
After switching ca->delay_min to usec resolution, we exit slow start prematurely for very low RTT flows, setting snd_ssthresh to 20. The reason is that delay_min is fed with RTT of small packet trains. Then as cwnd is increased, TCP sends bigger TSO packets. LRO/GRO aggregation and/or interrupt mitigation strategies on receiver tend to inflate RTT samples. Fix this by adding to delay_min the expected delay of two TSO packets, given current pacing rate. Tested: Sender uses pfifo_fast qdisc Before : $ nstat -n;for f in {1..10}; do ./super_netperf 1 -H lpaa24 -l -4000000; done;nstat|egrep "Hystart" 11348 11707 11562 11428 11773 11534 9878 11693 10597 10968 TcpExtTCPHystartTrainDetect 10 0.0 TcpExtTCPHystartTrainCwnd 200 0.0 After : $ nstat -n;for f in {1..10}; do ./super_netperf 1 -H lpaa24 -l -4000000; done;nstat|egrep "Hystart" 14877 14517 15797 18466 17376 14833 17558 17933 16039 18059 TcpExtTCPHystartTrainDetect 10 0.0 TcpExtTCPHystartTrainCwnd 1670 0.0 Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp_cubic.c23
1 files changed, 21 insertions, 2 deletions
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 068775b91fb5..0e5428ed04fe 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -436,8 +436,27 @@ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
delay = 1;
/* first time call or link delay decreases */
- if (ca->delay_min == 0 || ca->delay_min > delay)
- ca->delay_min = delay;
+ if (ca->delay_min == 0 || ca->delay_min > delay) {
+ unsigned long rate = READ_ONCE(sk->sk_pacing_rate);
+
+ /* Account for TSO/GRO delays.
+ * Otherwise short RTT flows could get too small ssthresh,
+ * since during slow start we begin with small TSO packets
+ * and could lower ca->delay_min too much.
+ * Ideally even with a very small RTT we would like to have
+ * at least one TSO packet being sent and received by GRO,
+ * and another one in qdisc layer.
+ * We apply another 100% factor because @rate is doubled at
+ * this point.
+ * We cap the cushion to 1ms.
+ */
+ if (rate)
+ delay += min_t(u64, USEC_PER_MSEC,
+ div64_ul((u64)GSO_MAX_SIZE *
+ 4 * USEC_PER_SEC, rate));
+ if (ca->delay_min == 0 || ca->delay_min > delay)
+ ca->delay_min = delay;
+ }
/* hystart triggers when cwnd is larger than some threshold */
if (!ca->found && hystart && tcp_in_slow_start(tp) &&