aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_metrics.c
diff options
context:
space:
mode:
authorKevin(Yudong) Yang <yyd@google.com>2019-12-09 14:19:59 -0500
committerDavid S. Miller <davem@davemloft.net>2019-12-09 20:17:48 -0800
commit65e6d90168f3593df0ae598502bcbf20d78ff0fb (patch)
treef8a7670488d31167eb0fdce6033859caee7de056 /net/ipv4/tcp_metrics.c
parentsctp: get netns from asoc and ep base (diff)
downloadlinux-dev-65e6d90168f3593df0ae598502bcbf20d78ff0fb.tar.xz
linux-dev-65e6d90168f3593df0ae598502bcbf20d78ff0fb.zip
net-tcp: Disable TCP ssthresh metrics cache by default
This patch introduces a sysctl knob "net.ipv4.tcp_no_ssthresh_metrics_save" that disables TCP ssthresh metrics cache by default. Other parts of TCP metrics cache, e.g. rtt, cwnd, remain unchanged. As modern networks becoming more and more dynamic, TCP metrics cache today often causes more harm than benefits. For example, the same IP address is often shared by different subscribers behind NAT in residential networks. Even if the IP address is not shared by different users, caching the slow-start threshold of a previous short flow using loss-based congestion control (e.g. cubic) often causes the future longer flows of the same network path to exit slow-start prematurely with abysmal throughput. Caching ssthresh is very risky and can lead to terrible performance. Therefore it makes sense to make disabling ssthresh caching by default and opt-in for specific networks by the administrators. This practice also has worked well for several years of deployment with CUBIC congestion control at Google. Acked-by: Eric Dumazet <edumazet@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Acked-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Kevin(Yudong) Yang <yyd@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_metrics.c')
-rw-r--r--net/ipv4/tcp_metrics.c13
1 files changed, 9 insertions, 4 deletions
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index c4848e7a0aad..279db8822439 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -385,7 +385,8 @@ void tcp_update_metrics(struct sock *sk)
if (tcp_in_initial_slowstart(tp)) {
/* Slow start still did not finish. */
- if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
+ if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && (tp->snd_cwnd >> 1) > val)
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
@@ -400,7 +401,8 @@ void tcp_update_metrics(struct sock *sk)
} else if (!tcp_in_slow_start(tp) &&
icsk->icsk_ca_state == TCP_CA_Open) {
/* Cong. avoidance phase, cwnd is reliable. */
- if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
+ if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
@@ -416,7 +418,8 @@ void tcp_update_metrics(struct sock *sk)
tcp_metric_set(tm, TCP_METRIC_CWND,
(val + tp->snd_ssthresh) >> 1);
}
- if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
+ if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && tp->snd_ssthresh > val)
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
@@ -441,6 +444,7 @@ void tcp_init_metrics(struct sock *sk)
{
struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
struct tcp_metrics_block *tm;
u32 val, crtt = 0; /* cached RTT scaled by 8 */
@@ -458,7 +462,8 @@ void tcp_init_metrics(struct sock *sk)
if (tcp_metric_locked(tm, TCP_METRIC_CWND))
tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
- val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+ val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ?
+ 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val) {
tp->snd_ssthresh = val;
if (tp->snd_ssthresh > tp->snd_cwnd_clamp)