Merge branch 'tcp-lockless-listener'

Eric Dumazet says: ==================== tcp/dccp: lockless listener TCP listener refactoring : this is becoming interesting ! This patch series takes the steps to use normal TCP/DCCP ehash table to store SYN_RECV requests, instead of the private per-listener hash table we had until now. SYNACK skb are now attached to their syn_recv request socket, so that we no longer heavily modify listener sk_wmem_alloc. listener lock is no longer held in fast path, including SYNCOOKIE mode. During my tests, my server was able to process 3,500,000 SYN packets per second on one listener and still had available cpu cycles. That is about 2 to 3 order of magnitude what we had with older kernels. This effort started two years ago and I am pleased to reach expectations. We'll probably extend SO_REUSEPORT to add proper cpu/numa affinities, so that heavy duty TCP servers can get proper siloing thanks to multi-queues NIC. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2015-10-03 04:32:52 -0700
committer: David S. Miller <davem@davemloft.net> 2015-10-03 04:32:52 -0700
commit: c3fc7ac9a0b978ee8538058743d21feef25f7b33 (patch)
tree: 0caf05649d27830ba0f9548704abbb1ec4b5bb91 /net/ipv4/tcp_input.c
parent: Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (diff)
parent: tcp: do not lock listener to process SYN packets (diff)
download: linux-dev-c3fc7ac9a0b978ee8538058743d21feef25f7b33.tar.xz
linux-dev-c3fc7ac9a0b978ee8538058743d21feef25f7b33.zip
1 files changed, 15 insertions, 15 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e58cbcd2f07e..27108757c310 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6068,9 +6068,9 @@ static bool tcp_syn_flood_action(const struct sock *sk,
 				 const struct sk_buff *skb,
 				 const char *proto)
 {
+	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
 	const char *msg = "Dropping request";
 	bool want_cookie = false;
-	struct listen_sock *lopt;
 
 #ifdef CONFIG_SYN_COOKIES
 	if (sysctl_tcp_syncookies) {
@@ -6081,10 +6081,9 @@ static bool tcp_syn_flood_action(const struct sock *sk,
 #endif
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
 
-	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
-	if (!lopt->synflood_warned &&
+	if (!queue->synflood_warned &&
 	    sysctl_tcp_syncookies != 2 &&
-	    xchg(&lopt->synflood_warned, 1) == 0)
+	    xchg(&queue->synflood_warned, 1) == 0)
 		pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
 			proto, ntohs(tcp_hdr(skb)->dest), msg);
 
@@ -6121,8 +6120,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	struct request_sock *req;
 	bool want_cookie = false;
 	struct flowi fl;
-	int err;
-
 
 	/* TW buckets are converted to open requests without
 	 * limitations, they conserve resources and peer is
@@ -6231,21 +6228,24 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	tcp_rsk(req)->snt_isn = isn;
 	tcp_rsk(req)->txhash = net_tx_rndhash();
 	tcp_openreq_init_rwin(req, sk, dst);
-	if (!want_cookie)
+	if (!want_cookie) {
 		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
-	err = af_ops->send_synack(fastopen_sk ?: sk, dst, &fl, req,
-				  skb_get_queue_mapping(skb), &foc);
+		tcp_reqsk_record_syn(sk, req, skb);
+	}
 	if (fastopen_sk) {
+		af_ops->send_synack(fastopen_sk, dst, &fl, req,
+				    skb_get_queue_mapping(skb), &foc, false);
 		sock_put(fastopen_sk);
 	} else {
-		if (err || want_cookie)
-			goto drop_and_free;
-
 		tcp_rsk(req)->tfo_listener = false;
-		af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+		if (!want_cookie)
+			inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+		af_ops->send_synack(sk, dst, &fl, req,
+				    skb_get_queue_mapping(skb), &foc, !want_cookie);
+		if (want_cookie)
+			goto drop_and_free;
 	}
-	tcp_reqsk_record_syn(sk, req, skb);
-
+	reqsk_put(req);
 	return 0;
 
 drop_and_release:
author	David S. Miller <davem@davemloft.net>	2015-10-03 04:32:52 -0700
committer	David S. Miller <davem@davemloft.net>	2015-10-03 04:32:52 -0700
commit	c3fc7ac9a0b978ee8538058743d21feef25f7b33 (patch)
tree	0caf05649d27830ba0f9548704abbb1ec4b5bb91 /net/ipv4/tcp_input.c
parent	Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (diff)
parent	tcp: do not lock listener to process SYN packets (diff)
download	linux-dev-c3fc7ac9a0b978ee8538058743d21feef25f7b33.tar.xz linux-dev-c3fc7ac9a0b978ee8538058743d21feef25f7b33.zip