aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2018-04-16 10:33:35 -0700
committerDavid S. Miller <davem@davemloft.net>2018-04-16 18:26:37 -0400
commitd1361840f8c519eaee9a78ffe09e4f0a1b586846 (patch)
tree86a904ade99a93544e0817cda7dc842b12f9b833 /net/ipv4
parenttc-testing: add sample action tests (diff)
downloadlinux-dev-d1361840f8c519eaee9a78ffe09e4f0a1b586846.tar.xz
linux-dev-d1361840f8c519eaee9a78ffe09e4f0a1b586846.zip
tcp: fix SO_RCVLOWAT and RCVBUF autotuning
Applications might use SO_RCVLOWAT on TCP socket hoping to receive one [E]POLLIN event only when a given amount of bytes are ready in socket receive queue. Problem is that receive autotuning is not aware of this constraint, meaning sk_rcvbuf might be too small to allow all bytes to be stored. Add a new (struct proto_ops)->set_rcvlowat method so that a protocol can override the default setsockopt(SO_RCVLOWAT) behavior. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/tcp.c21
2 files changed, 22 insertions, 0 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index eaed0367e669..f5c562aaef35 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1006,6 +1006,7 @@ const struct proto_ops inet_stream_ops = {
.compat_getsockopt = compat_sock_common_getsockopt,
.compat_ioctl = inet_compat_ioctl,
#endif
+ .set_rcvlowat = tcp_set_rcvlowat,
};
EXPORT_SYMBOL(inet_stream_ops);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bccc4c270087..0abd8d1d3d1d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1701,6 +1701,27 @@ int tcp_peek_len(struct socket *sock)
}
EXPORT_SYMBOL(tcp_peek_len);
+/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
+int tcp_set_rcvlowat(struct sock *sk, int val)
+{
+ sk->sk_rcvlowat = val ? : 1;
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
+ return 0;
+
+ /* val comes from user space and might be close to INT_MAX */
+ val <<= 1;
+ if (val < 0)
+ val = INT_MAX;
+
+ val = min(val, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ if (val > sk->sk_rcvbuf) {
+ sk->sk_rcvbuf = val;
+ tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(tcp_set_rcvlowat);
+
static void tcp_update_recv_tstamps(struct sk_buff *skb,
struct scm_timestamping *tss)
{