aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
authorIlpo Järvinen <ilpo.jarvinen@helsinki.fi>2008-03-11 17:55:27 -0700
committerDavid S. Miller <davem@davemloft.net>2008-03-11 17:55:27 -0700
commit5ea3a7480606cef06321cd85bc5113c72d2c7c68 (patch)
tree202d758d85e60b2a38cd7dd57b41a47d97ebbce8 /net/ipv4/tcp_output.c
parentrt2x00: Add new D-Link USB ID (diff)
downloadlinux-dev-5ea3a7480606cef06321cd85bc5113c72d2c7c68.tar.xz
linux-dev-5ea3a7480606cef06321cd85bc5113c72d2c7c68.zip
[TCP]: Prevent sending past receiver window with TSO (at last skb)
With TSO it was possible to send past the receiver window when the skb to be sent was the last in the write queue while the receiver window is the limiting factor. One can notice that there's a loophole in the tcp_mss_split_point that lacked a receiver window check for the tcp_write_queue_tail() if also cwnd was smaller than the full skb. Noticed by Thomas Gleixner <tglx@linutronix.de> in form of "Treason uncloaked! Peer ... shrinks window .... Repaired." messages (the peer didn't actually shrink its window as the message suggests, we had just sent something past it without a permission to do so). Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> Tested-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c12
1 files changed, 10 insertions, 2 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ed750f9ceb07..01578f544ad6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1035,6 +1035,13 @@ static void tcp_cwnd_validate(struct sock *sk)
* introducing MSS oddities to segment boundaries. In rare cases where
* mss_now != mss_cache, we will request caller to create a small skb
* per input skb which could be mostly avoided here (if desired).
+ *
+ * We explicitly want to create a request for splitting write queue tail
+ * to a small skb for Nagle purposes while avoiding unnecessary modulos,
+ * thus all the complexity (cwnd_len is always MSS multiple which we
+ * return whenever allowed by the other factors). Basically we need the
+ * modulo only when the receiver window alone is the limiting factor or
+ * when we would be allowed to send the split-due-to-Nagle skb fully.
*/
static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
unsigned int mss_now, unsigned int cwnd)
@@ -1048,10 +1055,11 @@ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
return cwnd_len;
- if (skb == tcp_write_queue_tail(sk) && cwnd_len <= skb->len)
+ needed = min(skb->len, window);
+
+ if (skb == tcp_write_queue_tail(sk) && cwnd_len <= needed)
return cwnd_len;
- needed = min(skb->len, window);
return needed - needed % mss_now;
}