aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2017-05-16 14:00:14 -0700
committerDavid S. Miller <davem@davemloft.net>2017-05-17 16:06:01 -0400
commit9a568de4818dea9a05af141046bd3e589245ab83 (patch)
tree6f1502edf55ecb7205660d62bd683ebcf912cfea /include
parenttcp: replace misc tcp_time_stamp to tcp_jiffies32 (diff)
downloadlinux-dev-9a568de4818dea9a05af141046bd3e589245ab83.tar.xz
linux-dev-9a568de4818dea9a05af141046bd3e589245ab83.zip
tcp: switch TCP TS option (RFC 7323) to 1ms clock
TCP Timestamps option is defined in RFC 7323 Traditionally on linux, it has been tied to the internal 'jiffies' variable, because it had been a cheap and good enough generator. For TCP flows on the Internet, 1 ms resolution would be much better than 4ms or 10ms (HZ=250 or HZ=100 respectively) For TCP flows in the DC, Google has used usec resolution for more than two years with great success [1] Receive size autotuning (DRS) is indeed more precise and converges faster to optimal window size. This patch converts tp->tcp_mstamp to a plain u64 value storing a 1 usec TCP clock. This choice will allow us to upstream the 1 usec TS option as discussed in IETF 97. [1] https://www.ietf.org/proceedings/97/slides/slides-97-tcpm-tcp-options-for-low-latency-00.pdf Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to '')
-rw-r--r--include/linux/skbuff.h62
-rw-r--r--include/linux/tcp.h22
-rw-r--r--include/net/tcp.h59
3 files changed, 62 insertions, 81 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bfc7892f6c33..7c0cb2ce8b01 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -506,66 +506,6 @@ typedef unsigned int sk_buff_data_t;
typedef unsigned char *sk_buff_data_t;
#endif
-/**
- * struct skb_mstamp - multi resolution time stamps
- * @stamp_us: timestamp in us resolution
- * @stamp_jiffies: timestamp in jiffies
- */
-struct skb_mstamp {
- union {
- u64 v64;
- struct {
- u32 stamp_us;
- u32 stamp_jiffies;
- };
- };
-};
-
-/**
- * skb_mstamp_get - get current timestamp
- * @cl: place to store timestamps
- */
-static inline void skb_mstamp_get(struct skb_mstamp *cl)
-{
- u64 val = local_clock();
-
- do_div(val, NSEC_PER_USEC);
- cl->stamp_us = (u32)val;
- cl->stamp_jiffies = (u32)jiffies;
-}
-
-/**
- * skb_mstamp_delta - compute the difference in usec between two skb_mstamp
- * @t1: pointer to newest sample
- * @t0: pointer to oldest sample
- */
-static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
- const struct skb_mstamp *t0)
-{
- s32 delta_us = t1->stamp_us - t0->stamp_us;
- u32 delta_jiffies = t1->stamp_jiffies - t0->stamp_jiffies;
-
- /* If delta_us is negative, this might be because interval is too big,
- * or local_clock() drift is too big : fallback using jiffies.
- */
- if (delta_us <= 0 ||
- delta_jiffies >= (INT_MAX / (USEC_PER_SEC / HZ)))
-
- delta_us = jiffies_to_usecs(delta_jiffies);
-
- return delta_us;
-}
-
-static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
- const struct skb_mstamp *t0)
-{
- s32 diff = t1->stamp_jiffies - t0->stamp_jiffies;
-
- if (!diff)
- diff = t1->stamp_us - t0->stamp_us;
- return diff > 0;
-}
-
/**
* struct sk_buff - socket buffer
* @next: Next buffer in list
@@ -646,7 +586,7 @@ struct sk_buff {
union {
ktime_t tstamp;
- struct skb_mstamp skb_mstamp;
+ u64 skb_mstamp;
};
};
struct rb_node rbnode; /* used in netem & tcp stack */
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 22854f028434..542ca1ae02c4 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -123,7 +123,7 @@ struct tcp_request_sock_ops;
struct tcp_request_sock {
struct inet_request_sock req;
const struct tcp_request_sock_ops *af_specific;
- struct skb_mstamp snt_synack; /* first SYNACK sent time */
+ u64 snt_synack; /* first SYNACK sent time */
bool tfo_listener;
u32 txhash;
u32 rcv_isn;
@@ -211,7 +211,7 @@ struct tcp_sock {
/* Information of the most recently (s)acked skb */
struct tcp_rack {
- struct skb_mstamp mstamp; /* (Re)sent time of the skb */
+ u64 mstamp; /* (Re)sent time of the skb */
u32 rtt_us; /* Associated RTT */
u32 end_seq; /* Ending TCP sequence of the skb */
u8 advanced; /* mstamp advanced since last lost marking */
@@ -240,7 +240,7 @@ struct tcp_sock {
u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */
/* RTT measurement */
- struct skb_mstamp tcp_mstamp; /* most recent packet received/sent */
+ u64 tcp_mstamp; /* most recent packet received/sent */
u32 srtt_us; /* smoothed round trip time << 3 in usecs */
u32 mdev_us; /* medium deviation */
u32 mdev_max_us; /* maximal mdev for the last rtt period */
@@ -280,8 +280,8 @@ struct tcp_sock {
u32 delivered; /* Total data packets delivered incl. rexmits */
u32 lost; /* Total data packets lost incl. rexmits */
u32 app_limited; /* limited until "delivered" reaches this val */
- struct skb_mstamp first_tx_mstamp; /* start of window send phase */
- struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */
+ u64 first_tx_mstamp; /* start of window send phase */
+ u64 delivered_mstamp; /* time we reached "delivered" */
u32 rate_delivered; /* saved rate sample: packets delivered */
u32 rate_interval_us; /* saved rate sample: time elapsed */
@@ -335,16 +335,16 @@ struct tcp_sock {
/* Receiver side RTT estimation */
struct {
- u32 rtt_us;
- u32 seq;
- struct skb_mstamp time;
+ u32 rtt_us;
+ u32 seq;
+ u64 time;
} rcv_rtt_est;
/* Receiver queue space */
struct {
- int space;
- u32 seq;
- struct skb_mstamp time;
+ int space;
+ u32 seq;
+ u64 time;
} rcvq_space;
/* TCP-specific MTU probe information. */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5b2932b8363f..82462db97183 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -519,7 +519,7 @@ static inline u32 tcp_cookie_time(void)
u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
u16 *mssp);
__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
-__u32 cookie_init_timestamp(struct request_sock *req);
+u64 cookie_init_timestamp(struct request_sock *req);
bool cookie_timestamp_decode(struct tcp_options_received *opt);
bool cookie_ecn_ok(const struct tcp_options_received *opt,
const struct net *net, const struct dst_entry *dst);
@@ -706,14 +706,55 @@ void tcp_send_window_probe(struct sock *sk);
*/
#define tcp_jiffies32 ((u32)jiffies)
-/* Generator for TCP TS option (RFC 7323)
- * Currently tied to 'jiffies' but will soon be driven by 1 ms clock.
+/*
+ * Deliver a 32bit value for TCP timestamp option (RFC 7323)
+ * It is no longer tied to jiffies, but to 1 ms clock.
+ * Note: double check if you want to use tcp_jiffies32 instead of this.
+ */
+#define TCP_TS_HZ 1000
+
+static inline u64 tcp_clock_ns(void)
+{
+ return local_clock();
+}
+
+static inline u64 tcp_clock_us(void)
+{
+ return div_u64(tcp_clock_ns(), NSEC_PER_USEC);
+}
+
+/* This should only be used in contexts where tp->tcp_mstamp is up to date */
+static inline u32 tcp_time_stamp(const struct tcp_sock *tp)
+{
+ return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ);
+}
+
+/* Could use tcp_clock_us() / 1000, but this version uses a single divide */
+static inline u32 tcp_time_stamp_raw(void)
+{
+ return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ);
+}
+
+
+/* Refresh 1us clock of a TCP socket,
+ * ensuring monotically increasing values.
*/
-#define tcp_time_stamp ((__u32)(jiffies))
+static inline void tcp_mstamp_refresh(struct tcp_sock *tp)
+{
+ u64 val = tcp_clock_us();
+
+ if (val > tp->tcp_mstamp)
+ tp->tcp_mstamp = val;
+}
+
+static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
+{
+ return max_t(s64, t1 - t0, 0);
+}
static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
{
- return skb->skb_mstamp.stamp_jiffies;
+ return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ);
}
@@ -778,9 +819,9 @@ struct tcp_skb_cb {
/* pkts S/ACKed so far upon tx of skb, incl retrans: */
__u32 delivered;
/* start of send pipeline phase */
- struct skb_mstamp first_tx_mstamp;
+ u64 first_tx_mstamp;
/* when we reached the "delivered" count */
- struct skb_mstamp delivered_mstamp;
+ u64 delivered_mstamp;
} tx; /* only used for outgoing skbs */
union {
struct inet_skb_parm h4;
@@ -896,7 +937,7 @@ struct ack_sample {
* A sample is invalid if "delivered" or "interval_us" is negative.
*/
struct rate_sample {
- struct skb_mstamp prior_mstamp; /* starting timestamp for interval */
+ u64 prior_mstamp; /* starting timestamp for interval */
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
s32 delivered; /* number of packets delivered over interval */
long interval_us; /* time for tp->delivered to incr "delivered" */
@@ -1862,7 +1903,7 @@ void tcp_init(void);
/* tcp_recovery.c */
extern void tcp_rack_mark_lost(struct sock *sk);
extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
- const struct skb_mstamp *xmit_time);
+ u64 xmit_time);
extern void tcp_rack_reo_timeout(struct sock *sk);
/*