aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2019-09-16 09:56:27 -0700
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2019-09-16 09:56:27 -0700
commit0898782247ae533d1f4e47a06bc5d4870931b284 (patch)
tree21f75cc590542a870f42350b9410fc0588f02b79 /net/ipv4/tcp_output.c
parentInput: elan_i2c - remove Lenovo Legion Y7000 PnpID (diff)
parentInput: sidewinder - make array seq static const, makes object smaller (diff)
downloadwireguard-linux-0898782247ae533d1f4e47a06bc5d4870931b284.tar.xz
wireguard-linux-0898782247ae533d1f4e47a06bc5d4870931b284.zip
Merge branch 'next' into for-linus
Prepare input updates for 5.4 merge window.
Diffstat (limited to '')
-rw-r--r--net/ipv4/tcp_output.c36
1 files changed, 31 insertions, 5 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0ebc33d1c9e5..6e4afc48d7bb 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1153,6 +1153,8 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
sizeof(struct inet6_skb_parm)));
+ tcp_add_tx_delay(skb, tp);
+
err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
if (unlikely(err > 0)) {
@@ -1286,6 +1288,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
int nsize, old_factor;
+ long limit;
int nlen;
u8 flags;
@@ -1296,8 +1299,16 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
if (nsize < 0)
nsize = 0;
- if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf &&
- tcp_queue != TCP_FRAG_IN_WRITE_QUEUE)) {
+ /* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb.
+ * We need some allowance to not penalize applications setting small
+ * SO_SNDBUF values.
+ * Also allow first and last skb in retransmit queue to be split.
+ */
+ limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE);
+ if (unlikely((sk->sk_wmem_queued >> 1) > limit &&
+ tcp_queue != TCP_FRAG_IN_WRITE_QUEUE &&
+ skb != tcp_rtx_queue_head(sk) &&
+ skb != tcp_rtx_queue_tail(sk))) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
return -ENOMEM;
}
@@ -2239,6 +2250,18 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
limit <<= factor;
+ if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
+ tcp_sk(sk)->tcp_tx_delay) {
+ u64 extra_bytes = (u64)sk->sk_pacing_rate * tcp_sk(sk)->tcp_tx_delay;
+
+ /* TSQ is based on skb truesize sum (sk_wmem_alloc), so we
+ * approximate our needs assuming an ~100% skb->truesize overhead.
+ * USEC_PER_SEC is approximated by 2^20.
+ * do_div(extra_bytes, USEC_PER_SEC/2) is replaced by a right shift.
+ */
+ extra_bytes >>= (20 - 1);
+ limit += extra_bytes;
+ }
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
/* Always send skb if rtx queue is empty.
* No need to wait for TX completion to call us back,
@@ -3217,6 +3240,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
int tcp_header_size;
struct tcphdr *th;
int mss;
+ u64 now;
skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
if (unlikely(!skb)) {
@@ -3248,13 +3272,14 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
memset(&opts, 0, sizeof(opts));
+ now = tcp_clock_ns();
#ifdef CONFIG_SYN_COOKIES
if (unlikely(req->cookie_ts))
skb->skb_mstamp_ns = cookie_init_timestamp(req);
else
#endif
{
- skb->skb_mstamp_ns = tcp_clock_ns();
+ skb->skb_mstamp_ns = now;
if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */
tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
}
@@ -3297,8 +3322,9 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
rcu_read_unlock();
#endif
- /* Do not fool tcpdump (if any), clean our debris */
- skb->tstamp = 0;
+ skb->skb_mstamp_ns = now;
+ tcp_add_tx_delay(skb, tp);
+
return skb;
}
EXPORT_SYMBOL(tcp_make_synack);