From 5f6188a8003d080e3753b8f14f4a5a2325ae1ff6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Oct 2018 09:37:52 -0700 Subject: tcp: do not change tcp_wstamp_ns in tcp_mstamp_refresh In EDT design, I made the mistake of using tcp_wstamp_ns to store the last tcp_clock_ns() sample and to store the pacing virtual timer. This causes major regressions at high speed flows. Introduce tcp_clock_cache to store last tcp_clock_ns(). This is needed because some arches have slow high-resolution kernel time service. tcp_wstamp_ns is only updated when a packet is sent. Note that we can remove tcp_mstamp in the future since tcp_mstamp is essentially tcp_clock_cache/1000, so the apparent socket size increase is temporary. Fixes: 9799ccb0e984 ("tcp: add tcp_wstamp_ns socket field") Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 059b67af28b1..f14df66a0c85 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -52,9 +52,8 @@ void tcp_mstamp_refresh(struct tcp_sock *tp) { u64 val = tcp_clock_ns(); - /* departure time for next data packet */ - if (val > tp->tcp_wstamp_ns) - tp->tcp_wstamp_ns = val; + if (val > tp->tcp_clock_cache) + tp->tcp_clock_cache = val; val = div_u64(val, NSEC_PER_USEC); if (val > tp->tcp_mstamp) @@ -1050,6 +1049,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, if (unlikely(!skb)) return -ENOBUFS; } + + /* TODO: might take care of jitter here */ + tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); + skb->skb_mstamp_ns = tp->tcp_wstamp_ns; inet = inet_sk(sk); -- cgit v1.2.3-59-g8ed1b