From 8dbd76e79a16b45b2ccb01d2f2e08dbf64e71e40 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Dec 2019 18:20:41 -0800 Subject: tcp/dccp: fix possible race __inet_lookup_established() Michal Kubecek and Firo Yang did a very nice analysis of crashes happening in __inet_lookup_established(). Since a TCP socket can go from TCP_ESTABLISH to TCP_LISTEN (via a close()/socket()/listen() cycle) without a RCU grace period, I should not have changed listeners linkage in their hash table. They must use the nulls protocol (Documentation/RCU/rculist_nulls.txt), so that a lookup can detect a socket in a hash list was moved in another one. Since we added code in commit d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6 ordering fix"), we have to add hlist_nulls_add_tail_rcu() helper. Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood") Signed-off-by: Eric Dumazet Reported-by: Michal Kubecek Reported-by: Firo Yang Reviewed-by: Michal Kubecek Link: https://lore.kernel.org/netdev/20191120083919.GH27852@unicorn.suse.cz/ Signed-off-by: Jakub Kicinski --- net/ipv4/inet_hashtables.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net/ipv4/inet_hashtables.c') diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 83fb00153018..2bbaaf0c7176 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -516,10 +516,11 @@ static int inet_reuseport_add_sock(struct sock *sk, struct inet_listen_hashbucket *ilb) { struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; + const struct hlist_nulls_node *node; struct sock *sk2; kuid_t uid = sock_i_uid(sk); - sk_for_each_rcu(sk2, &ilb->head) { + sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) { if (sk2 != sk && sk2->sk_family == sk->sk_family && ipv6_only_sock(sk2) == ipv6_only_sock(sk) && @@ -555,9 +556,9 @@ int __inet_hash(struct sock *sk, struct sock *osk) } if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && sk->sk_family == AF_INET6) - hlist_add_tail_rcu(&sk->sk_node, &ilb->head); + __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head); else - hlist_add_head_rcu(&sk->sk_node, &ilb->head); + __sk_nulls_add_node_rcu(sk, &ilb->nulls_head); inet_hash2(hashinfo, sk); ilb->count++; sock_set_flag(sk, SOCK_RCU_FREE); @@ -606,11 +607,9 @@ void inet_unhash(struct sock *sk) reuseport_detach_sock(sk); if (ilb) { inet_unhash2(hashinfo, sk); - __sk_del_node_init(sk); - ilb->count--; - } else { - __sk_nulls_del_node_init_rcu(sk); + ilb->count--; } + __sk_nulls_del_node_init_rcu(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); unlock: spin_unlock_bh(lock); @@ -750,7 +749,8 @@ void inet_hashinfo_init(struct inet_hashinfo *h) for (i = 0; i < INET_LHTABLE_SIZE; i++) { spin_lock_init(&h->listening_hash[i].lock); - INIT_HLIST_HEAD(&h->listening_hash[i].head); + INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head, + i + LISTENING_NULLS_BASE); h->listening_hash[i].count = 0; } -- cgit v1.2.3-59-g8ed1b