aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c75
1 files changed, 59 insertions, 16 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e6ca5a1f3b59..3f7bd7ae7d7a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -100,6 +100,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */
#define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */
+#define FLAG_DSACK_TLP 0x20000 /* DSACK for tail loss probe */
#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -454,11 +455,12 @@ static void tcp_sndbuf_expand(struct sock *sk)
*/
/* Slow part of check#2. */
-static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
+static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
+ unsigned int skbtruesize)
{
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
- int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
+ int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
while (tp->rcv_ssthresh <= window) {
@@ -471,7 +473,27 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
return 0;
}
-static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
+/* Even if skb appears to have a bad len/truesize ratio, TCP coalescing
+ * can play nice with us, as sk_buff and skb->head might be either
+ * freed or shared with up to MAX_SKB_FRAGS segments.
+ * Only give a boost to drivers using page frag(s) to hold the frame(s),
+ * and if no payload was pulled in skb->head before reaching us.
+ */
+static u32 truesize_adjust(bool adjust, const struct sk_buff *skb)
+{
+ u32 truesize = skb->truesize;
+
+ if (adjust && !skb_headlen(skb)) {
+ truesize -= SKB_TRUESIZE(skb_end_offset(skb));
+ /* paranoid check, some drivers might be buggy */
+ if (unlikely((int)truesize < (int)skb->len))
+ truesize = skb->truesize;
+ }
+ return truesize;
+}
+
+static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
+ bool adjust)
{
struct tcp_sock *tp = tcp_sk(sk);
int room;
@@ -480,15 +502,16 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
/* Check #1 */
if (room > 0 && !tcp_under_memory_pressure(sk)) {
+ unsigned int truesize = truesize_adjust(adjust, skb);
int incr;
/* Check #2. Increase window, if skb with such overhead
* will fit to rcvbuf in future.
*/
- if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
+ if (tcp_win_from_space(sk, truesize) <= skb->len)
incr = 2 * tp->advmss;
else
- incr = __tcp_grow_window(sk, skb);
+ incr = __tcp_grow_window(sk, skb, truesize);
if (incr) {
incr = max_t(int, incr, 2 * skb->len);
@@ -782,7 +805,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
tcp_ecn_check_ce(sk, skb);
if (skb->len >= 128)
- tcp_grow_window(sk, skb);
+ tcp_grow_window(sk, skb, true);
}
/* Called to compute a smoothed rtt estimate. The data fed to this
@@ -969,6 +992,8 @@ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
return 0;
if (seq_len > tp->mss_cache)
dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache);
+ else if (tp->tlp_high_seq && tp->tlp_high_seq == end_seq)
+ state->flag |= FLAG_DSACK_TLP;
tp->dsack_dups += dup_segs;
/* Skip the DSACK if dup segs weren't retransmitted by sender */
@@ -976,7 +1001,14 @@ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
return 0;
tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
- tp->rack.dsack_seen = 1;
+ /* We increase the RACK ordering window in rounds where we receive
+ * DSACKs that may have been due to reordering causing RACK to trigger
+ * a spurious fast recovery. Thus RACK ignores DSACKs that happen
+ * without having seen reordering, or that match TLP probes (TLP
+ * is timer-driven, not triggered by RACK).
+ */
+ if (tp->reord_seen && !(state->flag & FLAG_DSACK_TLP))
+ tp->rack.dsack_seen = 1;
state->flag |= FLAG_DSACKING_ACK;
/* A spurious retransmission is delivered */
@@ -3628,7 +3660,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
if (!tp->tlp_retrans) {
/* TLP of new data has been acknowledged */
tp->tlp_high_seq = 0;
- } else if (flag & FLAG_DSACKING_ACK) {
+ } else if (flag & FLAG_DSACK_TLP) {
/* This DSACK means original and TLP probe arrived; no loss */
tp->tlp_high_seq = 0;
} else if (after(ack, tp->tlp_high_seq)) {
@@ -4247,6 +4279,9 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb)
{
trace_tcp_receive_reset(sk);
+ /* mptcp can't tell us to ignore reset pkts,
+ * so just ignore the return value of mptcp_incoming_options().
+ */
if (sk_is_mptcp(sk))
mptcp_incoming_options(sk, skb);
@@ -4766,7 +4801,7 @@ coalesce_done:
* and trigger fast retransmit.
*/
if (tcp_is_sack(tp))
- tcp_grow_window(sk, skb);
+ tcp_grow_window(sk, skb, true);
kfree_skb_partial(skb, fragstolen);
skb = NULL;
goto add_sack;
@@ -4854,7 +4889,7 @@ end:
* and trigger fast retransmit.
*/
if (tcp_is_sack(tp))
- tcp_grow_window(sk, skb);
+ tcp_grow_window(sk, skb, false);
skb_condense(skb);
skb_set_owner_r(skb, sk);
}
@@ -4941,8 +4976,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
bool fragstolen;
int eaten;
- if (sk_is_mptcp(sk))
- mptcp_incoming_options(sk, skb);
+ /* If a subflow has been reset, the packet should not continue
+ * to be processed, drop the packet.
+ */
+ if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb)) {
+ __kfree_skb(skb);
+ return;
+ }
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
__kfree_skb(skb);
@@ -5375,7 +5415,7 @@ static void tcp_new_space(struct sock *sk)
tp->snd_cwnd_stamp = tcp_jiffies32;
}
- sk->sk_write_space(sk);
+ INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk);
}
static void tcp_check_space(struct sock *sk)
@@ -5922,8 +5962,8 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
tp->snd_cwnd_stamp = tcp_jiffies32;
- icsk->icsk_ca_initialized = 0;
bpf_skops_established(sk, bpf_op, skb);
+ /* Initialize congestion control unless BPF initialized it already: */
if (!icsk->icsk_ca_initialized)
tcp_init_congestion_control(sk);
tcp_init_buffer_space(sk);
@@ -6523,8 +6563,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_CLOSING:
case TCP_LAST_ACK:
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
- if (sk_is_mptcp(sk))
- mptcp_incoming_options(sk, skb);
+ /* If a subflow has been reset, the packet should not
+ * continue to be processed, drop the packet.
+ */
+ if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb))
+ goto discard;
break;
}
fallthrough;