From cc2b6d5363a34b8827e3da59f1784d0bb340f7b1 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 24 Nov 2017 17:56:41 +0100 Subject: allowedips: simplify --- src/allowedips.c | 47 ++++++++++++----------------------------------- 1 file changed, 12 insertions(+), 35 deletions(-) diff --git a/src/allowedips.c b/src/allowedips.c index 26d3d32..8ad3291 100644 --- a/src/allowedips.c +++ b/src/allowedips.c @@ -4,6 +4,8 @@ #include "peer.h" struct allowedips_node { + struct wireguard_peer *peer; + struct rcu_head rcu; struct allowedips_node __rcu *bit[2]; /* While it may seem scandalous that we waste space for v4, * we're alloc'ing to the nearest power of 2 anyway, so this @@ -14,19 +16,10 @@ struct allowedips_node { __be32 v4; u8 bits[16]; }; - union { - __be64 mask_v6; - __be32 mask_v4; - }; u8 cidr, bit_at_a, bit_at_b; - /* Putting these members here puts a big hole in the struct, - * but it also keeps it above the cache line, which is important. - */ - struct wireguard_peer *peer; - struct rcu_head rcu; }; -static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src, u8 cidr, u8 bits) +static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src, u8 cidr) { node->cidr = cidr; node->bit_at_a = cidr / 8; @@ -34,10 +27,6 @@ static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src, u8 if (cidr) { memcpy(node->bits, src, (cidr + 7) / 8); node->bits[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8); - if (bits == 32) - node->mask_v4 = cpu_to_be32(~0U << (32 - cidr)); - else if (bits == 128) - node->mask_v6 = cpu_to_be64(~0ULL << (64 - (cidr & 63))); } } @@ -143,24 +132,12 @@ static __always_inline u8 common_bits(const struct allowedips_node *node, const return 128 - fls128(be64_to_cpu(*(const __be64 *)&node->bits[0] ^ *(const __be64 *)&key[0]), be64_to_cpu(*(const __be64 *)&node->bits[8] ^ *(const __be64 *)&key[8])); return 0; } -static __always_inline bool prefix_matches(struct allowedips_node *node, const u8 *key, u8 bits) -{ - if (!node->cidr) - return true; - if (bits == 32) - return !((node->v4 ^ ((__be32 *)key)[0]) & node->mask_v4); - else if (bits == 128) { - if (node->cidr >= 64) { - if (node->v6[0] ^ ((__be64 *)key)[0]) - return false; - if (node->cidr == 64) - return true; - return !((node->v6[1] ^ ((__be64 *)key)[1]) & node->mask_v6); - } - return !((node->v6[0] ^ ((__be64 *)key)[0]) & node->mask_v6); - } else - return false; -} + +/* This could be much faster if it actually just compared the common bits properly, + * by precomputing a mask bswap(~0 << (32 - cidr)), and the rest, but it turns out that + * common_bits is already super fast on modern processors, even taking into account + * the unfortunate bswap. So, we just inline it like this instead. */ +#define prefix_matches(node, key, bits) (common_bits(node, key, bits) >= node->cidr) static __always_inline struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits, const u8 *key) { @@ -219,7 +196,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, u8 c if (!node) return -ENOMEM; node->peer = peer; - copy_and_assign_cidr(node, key, cidr, bits); + copy_and_assign_cidr(node, key, cidr); rcu_assign_pointer(*trie, node); return 0; } @@ -232,7 +209,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, u8 c if (!newnode) return -ENOMEM; newnode->peer = peer; - copy_and_assign_cidr(newnode, key, cidr, bits); + copy_and_assign_cidr(newnode, key, cidr); if (!node) down = rcu_dereference_protected(*trie, lockdep_is_held(lock)); @@ -258,7 +235,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, u8 c kfree(newnode); return -ENOMEM; } - copy_and_assign_cidr(node, newnode->bits, cidr, bits); + copy_and_assign_cidr(node, newnode->bits, cidr); rcu_assign_pointer(choose_node(node, down->bits), down); rcu_assign_pointer(choose_node(node, newnode->bits), newnode); -- cgit v1.2.3-59-g8ed1b