path: root/net/ipv4
diff options
authorEric Dumazet <edumazet@google.com>2018-10-15 09:37:52 -0700
committerDavid S. Miller <davem@davemloft.net>2018-10-15 22:56:41 -0700
commit5f6188a8003d080e3753b8f14f4a5a2325ae1ff6 (patch)
tree17f35c12eeef92a69902b197a73c1123eaac9e63 /net/ipv4
parentnet: bridge: fix a possible memory leak in __vlan_add (diff)
tcp: do not change tcp_wstamp_ns in tcp_mstamp_refresh
In EDT design, I made the mistake of using tcp_wstamp_ns to store the last tcp_clock_ns() sample and to store the pacing virtual timer. This causes major regressions at high speed flows. Introduce tcp_clock_cache to store last tcp_clock_ns(). This is needed because some arches have slow high-resolution kernel time service. tcp_wstamp_ns is only updated when a packet is sent. Note that we can remove tcp_mstamp in the future since tcp_mstamp is essentially tcp_clock_cache/1000, so the apparent socket size increase is temporary. Fixes: 9799ccb0e984 ("tcp: add tcp_wstamp_ns socket field") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
2 files changed, 7 insertions, 4 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 059b67af28b1..f14df66a0c85 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -52,9 +52,8 @@ void tcp_mstamp_refresh(struct tcp_sock *tp)
u64 val = tcp_clock_ns();
- /* departure time for next data packet */
- if (val > tp->tcp_wstamp_ns)
- tp->tcp_wstamp_ns = val;
+ if (val > tp->tcp_clock_cache)
+ tp->tcp_clock_cache = val;
val = div_u64(val, NSEC_PER_USEC);
if (val > tp->tcp_mstamp)
@@ -1050,6 +1049,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
if (unlikely(!skb))
return -ENOBUFS;
+ /* TODO: might take care of jitter here */
+ tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
inet = inet_sk(sk);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 61023d50cd60..676020663ce8 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -360,7 +360,7 @@ static void tcp_probe_timer(struct sock *sk)
start_ts = tcp_skb_timestamp(skb);
if (!start_ts)
- skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
+ skb->skb_mstamp_ns = tp->tcp_clock_cache;
else if (icsk->icsk_user_timeout &&
(s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
goto abort;