aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-03-12 12:08:10 -0700
committerDavid S. Miller <davem@davemloft.net>2020-03-12 12:08:10 -0700
commit93e616131a3825189570524d4792b5223bf1d68c (patch)
tree46fadd2182ac940b8f992ca32d32eb017eca43b4 /net
parentMerge branch 'ethtool-consolidate-irq-coalescing-part-4' (diff)
parentselftests: net: Add SO_REUSEADDR test to check if 4-tuples are fully utilized. (diff)
downloadwireguard-linux-93e616131a3825189570524d4792b5223bf1d68c.tar.xz
wireguard-linux-93e616131a3825189570524d4792b5223bf1d68c.zip
Merge branch 'bind_addr_zero'
Kuniyuki Iwashima says: ==================== Improve bind(addr, 0) behaviour. Currently we fail to bind sockets to ephemeral ports when all of the ports are exhausted even if all sockets have SO_REUSEADDR enabled. In this case, we still have a chance to connect to the different remote hosts. These patches add net.ipv4.ip_autobind_reuse option and fix the behaviour to fully utilize all space of the local (addr, port) tuples. Changes in v5: - Add more description to documents. - Fix sysctl option to use proc_dointvec_minmax. - Remove the Fixes: tag and squash two commits. Changes in v4: - Add net.ipv4.ip_autobind_reuse option to not change the current behaviour. - Modify .gitignore for test. https://lore.kernel.org/netdev/20200308181615.90135-1-kuniyu@amazon.co.jp/ Changes in v3: - Change the title and write more specific description of the 3rd patch. - Add a test in tools/testing/selftests/net/ as the 4th patch. https://lore.kernel.org/netdev/20200229113554.78338-1-kuniyu@amazon.co.jp/ Changes in v2: - Change the description of the 2nd patch ('localhost' -> 'address'). - Correct the description and the if statement of the 3rd patch. https://lore.kernel.org/netdev/20200226074631.67688-1-kuniyu@amazon.co.jp/ v1 with tests: https://lore.kernel.org/netdev/20200220152020.13056-1-kuniyu@amazon.co.jp/ ==================== Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/inet_connection_sock.c36
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
2 files changed, 33 insertions, 12 deletions
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a4db79b1b643..3b4f81790e3e 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -131,7 +131,7 @@ static int inet_csk_bind_conflict(const struct sock *sk,
{
struct sock *sk2;
bool reuse = sk->sk_reuse;
- bool reuseport = !!sk->sk_reuseport && reuseport_ok;
+ bool reuseport = !!sk->sk_reuseport;
kuid_t uid = sock_i_uid((struct sock *)sk);
/*
@@ -146,17 +146,21 @@ static int inet_csk_bind_conflict(const struct sock *sk,
(!sk->sk_bound_dev_if ||
!sk2->sk_bound_dev_if ||
sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
- if ((!reuse || !sk2->sk_reuse ||
- sk2->sk_state == TCP_LISTEN) &&
- (!reuseport || !sk2->sk_reuseport ||
- rcu_access_pointer(sk->sk_reuseport_cb) ||
- (sk2->sk_state != TCP_TIME_WAIT &&
- !uid_eq(uid, sock_i_uid(sk2))))) {
- if (inet_rcv_saddr_equal(sk, sk2, true))
- break;
- }
- if (!relax && reuse && sk2->sk_reuse &&
+ if (reuse && sk2->sk_reuse &&
sk2->sk_state != TCP_LISTEN) {
+ if ((!relax ||
+ (!reuseport_ok &&
+ reuseport && sk2->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ (sk2->sk_state == TCP_TIME_WAIT ||
+ uid_eq(uid, sock_i_uid(sk2))))) &&
+ inet_rcv_saddr_equal(sk, sk2, true))
+ break;
+ } else if (!reuseport_ok ||
+ !reuseport || !sk2->sk_reuseport ||
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
+ !uid_eq(uid, sock_i_uid(sk2)))) {
if (inet_rcv_saddr_equal(sk, sk2, true))
break;
}
@@ -176,12 +180,14 @@ inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *
int port = 0;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
+ bool relax = false;
int i, low, high, attempt_half;
struct inet_bind_bucket *tb;
u32 remaining, offset;
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
+ports_exhausted:
attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
other_half_scan:
inet_get_local_port_range(net, &low, &high);
@@ -219,7 +225,7 @@ other_parity_scan:
inet_bind_bucket_for_each(tb, &head->chain)
if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
tb->port == port) {
- if (!inet_csk_bind_conflict(sk, tb, false, false))
+ if (!inet_csk_bind_conflict(sk, tb, relax, false))
goto success;
goto next_port;
}
@@ -239,6 +245,12 @@ next_port:
attempt_half = 2;
goto other_half_scan;
}
+
+ if (net->ipv4.sysctl_ip_autobind_reuse && !relax) {
+ /* We still have a chance to connect to different destinations */
+ relax = true;
+ goto ports_exhausted;
+ }
return NULL;
success:
*port_ret = port;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d9531b4b33f2..81b267e990a1 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -764,6 +764,15 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "ip_autobind_reuse",
+ .data = &init_net.ipv4.sysctl_ip_autobind_reuse,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
.procname = "fwmark_reflect",
.data = &init_net.ipv4.sysctl_fwmark_reflect,
.maxlen = sizeof(int),