aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-05-04 19:22:34 -0700
committerJakub Kicinski <kuba@kernel.org>2022-05-04 19:22:35 -0700
commitef562489818723ea0a66c57bfdfbf151ad568c42 (patch)
tree57699c177cf0115d66ca9dc3eec9666d9f0a415d
parentMerge branch 'wireguard-patches-for-5-18-rc6' (diff)
parenttcp: drop the hash_32() part from the index calculation (diff)
downloadwireguard-linux-ef562489818723ea0a66c57bfdfbf151ad568c42.tar.xz
wireguard-linux-ef562489818723ea0a66c57bfdfbf151ad568c42.zip
Merge branch 'insufficient-tcp-source-port-randomness'
Willy Tarreau says: ==================== insufficient TCP source port randomness In a not-yet published paper, Moshe Kol, Amit Klein, and Yossi Gilad report being able to accurately identify a client by forcing it to emit only 40 times more connections than the number of entries in the table_perturb[] table, which is indexed by hashing the connection tuple. The current 2^8 setting allows them to perform that attack with only 10k connections, which is not hard to achieve in a few seconds. Eric, Amit and I have been working on this for a few weeks now imagining, testing and eliminating a number of approaches that Amit and his team were still able to break or that were found to be too risky or too expensive, and ended up with the simple improvements in this series that resists to the attack, doesn't degrade the performance, and preserves a reliable port selection algorithm to avoid connection failures, including the odd/even port selection preference that allows bind() to always find a port quickly even under strong connect() stress. The approach relies on several factors: - resalting the hash secret that's used to choose the table_perturb[] entry every 10 seconds to eliminate slow attacks and force the attacker to forget everything that was learned after this delay. This already eliminates most of the problem because if a client stays silent for more than 10 seconds there's no link between the previous and the next patterns, and 10s isn't yet frequent enough to cause too frequent repetition of a same port that may induce a connection failure ; - adding small random increments to the source port. Previously, a random 0 or 1 was added every 16 ports. Now a random 0 to 7 is added after each port. This means that with the default 32768-60999 range, a worst case rollover happens after 1764 connections, and an average of 3137. This doesn't stop statistical attacks but requires significantly more iterations of the same attack to confirm a guess. - increasing the table_perturb[] size from 2^8 to 2^16, which Amit says will require 2.6 million connections to be attacked with the changes above, making it pointless to get a fingerprint that will only last 10 seconds. Due to the size, the table was made dynamic. - a few minor improvements on the bits used from the hash, to eliminate some unfortunate correlations that may possibly have been exploited to design future attack models. These changes were tested under the most extreme conditions, up to 1.1 million connections per second to one and a few targets, showing no performance regression, and only 2 connection failures within 13 billion, which is less than 2^-32 and perfectly within usual values. The series is split into small reviewable changes and was already reviewed by Amit and Eric. ==================== Link: https://lore.kernel.org/r/20220502084614.24123-1-w@1wt.eu Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/net/inet_hashtables.h2
-rw-r--r--include/net/secure_seq.h4
-rw-r--r--net/core/secure_seq.c16
-rw-r--r--net/ipv4/inet_hashtables.c42
-rw-r--r--net/ipv6/inet6_hashtables.c4
5 files changed, 43 insertions, 25 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index f72ec113ae56..98e1ec1a14f0 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -425,7 +425,7 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr)
}
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
- struct sock *sk, u32 port_offset,
+ struct sock *sk, u64 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16,
struct inet_timewait_sock **));
diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index d7d2495f83c2..dac91aa38c5a 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -4,8 +4,8 @@
#include <linux/types.h>
-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
+u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport);
u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport);
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 9b8443774449..5f85e01d4093 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -22,6 +22,8 @@
static siphash_aligned_key_t net_secret;
static siphash_aligned_key_t ts_secret;
+#define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ)
+
static __always_inline void net_secret_init(void)
{
net_get_random_once(&net_secret, sizeof(net_secret));
@@ -94,17 +96,19 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
}
EXPORT_SYMBOL(secure_tcpv6_seq);
-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport)
{
const struct {
struct in6_addr saddr;
struct in6_addr daddr;
+ unsigned int timeseed;
__be16 dport;
} __aligned(SIPHASH_ALIGNMENT) combined = {
.saddr = *(struct in6_addr *)saddr,
.daddr = *(struct in6_addr *)daddr,
- .dport = dport
+ .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
+ .dport = dport,
};
net_secret_init();
return siphash(&combined, offsetofend(typeof(combined), dport),
@@ -142,11 +146,13 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
}
EXPORT_SYMBOL_GPL(secure_tcp_seq);
-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
+u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
net_secret_init();
- return siphash_3u32((__force u32)saddr, (__force u32)daddr,
- (__force u16)dport, &net_secret);
+ return siphash_4u32((__force u32)saddr, (__force u32)daddr,
+ (__force u16)dport,
+ jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
+ &net_secret);
}
EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
#endif
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 17440840a791..a5d57fa679ca 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -504,7 +504,7 @@ not_unique:
return -EADDRNOTAVAIL;
}
-static u32 inet_sk_port_offset(const struct sock *sk)
+static u64 inet_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -726,15 +726,17 @@ EXPORT_SYMBOL_GPL(inet_unhash);
* Note that we use 32bit integers (vs RFC 'short integers')
* because 2^16 is not a multiple of num_ephemeral and this
* property might be used by clever attacker.
- * RFC claims using TABLE_LENGTH=10 buckets gives an improvement,
- * we use 256 instead to really give more isolation and
- * privacy, this only consumes 1 KB of kernel memory.
+ * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though
+ * attacks were since demonstrated, thus we use 65536 instead to really
+ * give more isolation and privacy, at the expense of 256kB of kernel
+ * memory.
*/
-#define INET_TABLE_PERTURB_SHIFT 8
-static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT];
+#define INET_TABLE_PERTURB_SHIFT 16
+#define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT)
+static u32 *table_perturb;
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
- struct sock *sk, u32 port_offset,
+ struct sock *sk, u64 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16, struct inet_timewait_sock **))
{
@@ -774,10 +776,13 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
if (likely(remaining > 1))
remaining &= ~1U;
- net_get_random_once(table_perturb, sizeof(table_perturb));
- index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT);
+ net_get_random_once(table_perturb,
+ INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb));
+ index = port_offset & (INET_TABLE_PERTURB_SIZE - 1);
+
+ offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32);
+ offset %= remaining;
- offset = (READ_ONCE(table_perturb[index]) + port_offset) % remaining;
/* In first pass we try ports of @low parity.
* inet_csk_get_port() does the opposite choice.
*/
@@ -831,11 +836,12 @@ next_port:
return -EADDRNOTAVAIL;
ok:
- /* If our first attempt found a candidate, skip next candidate
- * in 1/16 of cases to add some noise.
+ /* Here we want to add a little bit of randomness to the next source
+ * port that will be chosen. We use a max() with a random here so that
+ * on low contention the randomness is maximal and on high contention
+ * it may be inexistent.
*/
- if (!i && !(prandom_u32() % 16))
- i = 2;
+ i = max_t(int, i, (prandom_u32() & 7) * 2);
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
@@ -859,7 +865,7 @@ ok:
int inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
- u32 port_offset = 0;
+ u64 port_offset = 0;
if (!inet_sk(sk)->inet_num)
port_offset = inet_sk_port_offset(sk);
@@ -909,6 +915,12 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
low_limit,
high_limit);
init_hashinfo_lhash2(h);
+
+ /* this one is used for source ports of outgoing connections */
+ table_perturb = kmalloc_array(INET_TABLE_PERTURB_SIZE,
+ sizeof(*table_perturb), GFP_KERNEL);
+ if (!table_perturb)
+ panic("TCP: failed to alloc table_perturb");
}
int inet_hashinfo2_init_mod(struct inet_hashinfo *h)
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 4740afecf7c6..32ccac10bd62 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -308,7 +308,7 @@ not_unique:
return -EADDRNOTAVAIL;
}
-static u32 inet6_sk_port_offset(const struct sock *sk)
+static u64 inet6_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -320,7 +320,7 @@ static u32 inet6_sk_port_offset(const struct sock *sk)
int inet6_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
- u32 port_offset = 0;
+ u64 port_offset = 0;
if (!inet_sk(sk)->inet_num)
port_offset = inet6_sk_port_offset(sk);