diff options
author | 2020-03-12 12:08:10 -0700 | |
---|---|---|
committer | 2020-03-12 12:08:10 -0700 | |
commit | 93e616131a3825189570524d4792b5223bf1d68c (patch) | |
tree | 46fadd2182ac940b8f992ca32d32eb017eca43b4 /net | |
parent | Merge branch 'ethtool-consolidate-irq-coalescing-part-4' (diff) | |
parent | selftests: net: Add SO_REUSEADDR test to check if 4-tuples are fully utilized. (diff) | |
download | wireguard-linux-93e616131a3825189570524d4792b5223bf1d68c.tar.xz wireguard-linux-93e616131a3825189570524d4792b5223bf1d68c.zip |
Merge branch 'bind_addr_zero'
Kuniyuki Iwashima says:
====================
Improve bind(addr, 0) behaviour.
Currently we fail to bind sockets to ephemeral ports when all of the ports
are exhausted even if all sockets have SO_REUSEADDR enabled. In this case,
we still have a chance to connect to the different remote hosts.
These patches add net.ipv4.ip_autobind_reuse option and fix the behaviour
to fully utilize all space of the local (addr, port) tuples.
Changes in v5:
- Add more description to documents.
- Fix sysctl option to use proc_dointvec_minmax.
- Remove the Fixes: tag and squash two commits.
Changes in v4:
- Add net.ipv4.ip_autobind_reuse option to not change the current behaviour.
- Modify .gitignore for test.
https://lore.kernel.org/netdev/20200308181615.90135-1-kuniyu@amazon.co.jp/
Changes in v3:
- Change the title and write more specific description of the 3rd patch.
- Add a test in tools/testing/selftests/net/ as the 4th patch.
https://lore.kernel.org/netdev/20200229113554.78338-1-kuniyu@amazon.co.jp/
Changes in v2:
- Change the description of the 2nd patch ('localhost' -> 'address').
- Correct the description and the if statement of the 3rd patch.
https://lore.kernel.org/netdev/20200226074631.67688-1-kuniyu@amazon.co.jp/
v1 with tests:
https://lore.kernel.org/netdev/20200220152020.13056-1-kuniyu@amazon.co.jp/
====================
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 36 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 9 |
2 files changed, 33 insertions, 12 deletions
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a4db79b1b643..3b4f81790e3e 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -131,7 +131,7 @@ static int inet_csk_bind_conflict(const struct sock *sk, { struct sock *sk2; bool reuse = sk->sk_reuse; - bool reuseport = !!sk->sk_reuseport && reuseport_ok; + bool reuseport = !!sk->sk_reuseport; kuid_t uid = sock_i_uid((struct sock *)sk); /* @@ -146,17 +146,21 @@ static int inet_csk_bind_conflict(const struct sock *sk, (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - if ((!reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - (!reuseport || !sk2->sk_reuseport || - rcu_access_pointer(sk->sk_reuseport_cb) || - (sk2->sk_state != TCP_TIME_WAIT && - !uid_eq(uid, sock_i_uid(sk2))))) { - if (inet_rcv_saddr_equal(sk, sk2, true)) - break; - } - if (!relax && reuse && sk2->sk_reuse && + if (reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN) { + if ((!relax || + (!reuseport_ok && + reuseport && sk2->sk_reuseport && + !rcu_access_pointer(sk->sk_reuseport_cb) && + (sk2->sk_state == TCP_TIME_WAIT || + uid_eq(uid, sock_i_uid(sk2))))) && + inet_rcv_saddr_equal(sk, sk2, true)) + break; + } else if (!reuseport_ok || + !reuseport || !sk2->sk_reuseport || + rcu_access_pointer(sk->sk_reuseport_cb) || + (sk2->sk_state != TCP_TIME_WAIT && + !uid_eq(uid, sock_i_uid(sk2)))) { if (inet_rcv_saddr_equal(sk, sk2, true)) break; } @@ -176,12 +180,14 @@ inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int * int port = 0; struct inet_bind_hashbucket *head; struct net *net = sock_net(sk); + bool relax = false; int i, low, high, attempt_half; struct inet_bind_bucket *tb; u32 remaining, offset; int l3mdev; l3mdev = inet_sk_bound_l3mdev(sk); +ports_exhausted: attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; other_half_scan: inet_get_local_port_range(net, &low, &high); @@ -219,7 +225,7 @@ other_parity_scan: inet_bind_bucket_for_each(tb, &head->chain) if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && tb->port == port) { - if (!inet_csk_bind_conflict(sk, tb, false, false)) + if (!inet_csk_bind_conflict(sk, tb, relax, false)) goto success; goto next_port; } @@ -239,6 +245,12 @@ next_port: attempt_half = 2; goto other_half_scan; } + + if (net->ipv4.sysctl_ip_autobind_reuse && !relax) { + /* We still have a chance to connect to different destinations */ + relax = true; + goto ports_exhausted; + } return NULL; success: *port_ret = port; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d9531b4b33f2..81b267e990a1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -764,6 +764,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { + .procname = "ip_autobind_reuse", + .data = &init_net.ipv4.sysctl_ip_autobind_reuse, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { .procname = "fwmark_reflect", .data = &init_net.ipv4.sysctl_fwmark_reflect, .maxlen = sizeof(int), |