aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/inet_hashtables.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2015-10-22 08:20:46 -0700
committerDavid S. Miller <davem@davemloft.net>2015-10-23 05:42:21 -0700
commit5e0724d027f0548511a2165a209572d48fe7a4c8 (patch)
tree709301be9b56652004047b89f4467b3c917814cd /net/ipv4/inet_hashtables.c
parentipv4: implement support for NOPREFIXROUTE ifa flag for ipv4 address (diff)
downloadlinux-dev-5e0724d027f0548511a2165a209572d48fe7a4c8.tar.xz
linux-dev-5e0724d027f0548511a2165a209572d48fe7a4c8.zip
tcp/dccp: fix hashdance race for passive sessions
Multiple cpus can process duplicates of incoming ACK messages matching a SYN_RECV request socket. This is a rare event under normal operations, but definitely can happen. Only one must win the race, otherwise corruption would occur. To fix this without adding new atomic ops, we use logic in inet_ehash_nolisten() to detect the request was present in the same ehash bucket where we try to insert the new child. If request socket was not found, we have to undo the child creation. This actually removes a spin_lock()/spin_unlock() pair in reqsk_queue_unlink() for the fast path. Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/inet_hashtables.c')
-rw-r--r--net/ipv4/inet_hashtables.c39
1 files changed, 25 insertions, 14 deletions
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 958728a22001..ccc5980797fc 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -407,13 +407,13 @@ static u32 inet_sk_port_offset(const struct sock *sk)
/* insert a socket into ehash, and eventually remove another one
* (The another one can be a SYN_RECV or TIMEWAIT
*/
-int inet_ehash_insert(struct sock *sk, struct sock *osk)
+bool inet_ehash_insert(struct sock *sk, struct sock *osk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct hlist_nulls_head *list;
struct inet_ehash_bucket *head;
spinlock_t *lock;
- int ret = 0;
+ bool ret = true;
WARN_ON_ONCE(!sk_unhashed(sk));
@@ -423,30 +423,41 @@ int inet_ehash_insert(struct sock *sk, struct sock *osk)
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
spin_lock(lock);
- __sk_nulls_add_node_rcu(sk, list);
if (osk) {
- WARN_ON(sk->sk_hash != osk->sk_hash);
- sk_nulls_del_node_init_rcu(osk);
+ WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
+ ret = sk_nulls_del_node_init_rcu(osk);
}
+ if (ret)
+ __sk_nulls_add_node_rcu(sk, list);
spin_unlock(lock);
return ret;
}
-void __inet_hash_nolisten(struct sock *sk, struct sock *osk)
+bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
{
- inet_ehash_insert(sk, osk);
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ bool ok = inet_ehash_insert(sk, osk);
+
+ if (ok) {
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ } else {
+ percpu_counter_inc(sk->sk_prot->orphan_count);
+ sk->sk_state = TCP_CLOSE;
+ sock_set_flag(sk, SOCK_DEAD);
+ inet_csk_destroy_sock(sk);
+ }
+ return ok;
}
-EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
+EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
void __inet_hash(struct sock *sk, struct sock *osk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb;
- if (sk->sk_state != TCP_LISTEN)
- return __inet_hash_nolisten(sk, osk);
-
+ if (sk->sk_state != TCP_LISTEN) {
+ inet_ehash_nolisten(sk, osk);
+ return;
+ }
WARN_ON(!sk_unhashed(sk));
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
@@ -567,7 +578,7 @@ ok:
inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
- __inet_hash_nolisten(sk, (struct sock *)tw);
+ inet_ehash_nolisten(sk, (struct sock *)tw);
}
if (tw)
inet_twsk_bind_unhash(tw, hinfo);
@@ -584,7 +595,7 @@ ok:
tb = inet_csk(sk)->icsk_bind_hash;
spin_lock_bh(&head->lock);
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
- __inet_hash_nolisten(sk, NULL);
+ inet_ehash_nolisten(sk, NULL);
spin_unlock_bh(&head->lock);
return 0;
} else {