path: root/include
diff options
authorEric Dumazet <edumazet@google.com>2019-06-12 11:57:25 -0700
committerDavid S. Miller <davem@davemloft.net>2019-06-12 13:05:43 -0700
commita842fe1425cb20f457abd3f8ef98b468f83ca98b (patch)
treef578dc1b5ae2095c070ad8a15ed4d03d15a8a4c2 /include
parentMerge branch 'ena-dynamic-queue-sizes' (diff)
tcp: add optional per socket transmit delay
Adding delays to TCP flows is crucial for studying behavior of TCP stacks, including congestion control modules. Linux offers netem module, but it has unpractical constraints : - Need root access to change qdisc - Hard to setup on egress if combined with non trivial qdisc like FQ - Single delay for all flows. EDT (Earliest Departure Time) adoption in TCP stack allows us to enable a per socket delay at a very small cost. Networking tools can now establish thousands of flows, each of them with a different delay, simulating real world conditions. This requires FQ packet scheduler or a EDT-enabled NIC. This patchs adds TCP_TX_DELAY socket option, to set a delay in usec units. unsigned int tx_delay = 10000; /* 10 msec */ setsockopt(fd, SOL_TCP, TCP_TX_DELAY, &tx_delay, sizeof(tx_delay)); Note that FQ packet scheduler limits might need some tweaking : man tc-fq PARAMETERS limit Hard limit on the real queue size. When this limit is reached, new packets are dropped. If the value is lowered, packets are dropped so that the new limit is met. Default is 10000 packets. flow_limit Hard limit on the maximum number of packets queued per flow. Default value is 100. Use of TCP_TX_DELAY option will increase number of skbs in FQ qdisc, so packets would be dropped if any of the previous limit is hit. Use of a jump label makes this support runtime-free, for hosts never using the option. Also note that TSQ (TCP Small Queues) limits are slightly changed with this patch : we need to account that skbs artificially delayed wont stop us providind more skbs to feed the pipe (netem uses skb_orphan_partial() for this purpose, but FQ can not use this trick) Because of that, using big delays might very well trigger old bugs in TSO auto defer logic and/or sndbuf limited detection. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to '')
3 files changed, 24 insertions, 0 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 711361af9ce0..c23019a3b264 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -245,6 +245,7 @@ struct tcp_sock {
syn_smc:1; /* SYN includes SMC */
u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */
+ u32 tcp_tx_delay; /* delay (in usec) added to TX packets */
u64 tcp_wstamp_ns; /* departure time for next sent data packet */
u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
@@ -436,6 +437,7 @@ struct tcp_timewait_sock {
u32 tw_last_oow_ack_time;
int tw_ts_recent_stamp;
+ u32 tw_tx_delay;
struct tcp_md5sig_key *tw_md5_key;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 204328b88412..49a178b8d5b2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2232,4 +2232,23 @@ void clean_acked_data_disable(struct inet_connection_sock *icsk);
void clean_acked_data_flush(void);
+static inline void tcp_add_tx_delay(struct sk_buff *skb,
+ const struct tcp_sock *tp)
+ if (static_branch_unlikely(&tcp_tx_delay_enabled))
+ skb->skb_mstamp_ns += (u64)tp->tcp_tx_delay * NSEC_PER_USEC;
+static inline void tcp_set_tx_time(struct sk_buff *skb,
+ const struct sock *sk)
+ if (static_branch_unlikely(&tcp_tx_delay_enabled)) {
+ u32 delay = (sk->sk_state == TCP_TIME_WAIT) ?
+ tcp_twsk(sk)->tw_tx_delay : tcp_sk(sk)->tcp_tx_delay;
+ skb->skb_mstamp_ns = tcp_clock_ns() + (u64)delay * NSEC_PER_USEC;
+ }
#endif /* _TCP_H */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index b521464ea962..b3564f85a762 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -127,6 +127,9 @@ enum {
+#define TCP_TX_DELAY 37 /* delay outgoing packets by XX usec */
#define TCP_REPAIR_ON 1
#define TCP_REPAIR_OFF 0
#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */