aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-12-18 08:49:48 -0800
committerDavid S. Miller <davem@davemloft.net>2018-12-18 08:49:48 -0800
commit77c7a7b3e7da2c97c182c7e57a16d16c6b555c24 (patch)
tree6bde171b004d5cee63dd161dc9a66e16a48f7eae /net
parentnfp: flower: fix cb_ident duplicate in indirect block register (diff)
parentxfrm: clean an indentation issue, remove a space (diff)
downloadlinux-dev-77c7a7b3e7da2c97c182c7e57a16d16c6b555c24.tar.xz
linux-dev-77c7a7b3e7da2c97c182c7e57a16d16c6b555c24.zip
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next
Steffen Klassert says: ==================== pull request (net-next): ipsec-next 2018-12-18 1) Add xfrm policy selftest scripts. From Florian Westphal. 2) Split inexact policies into four different search list classes and use the rbtree infrastructure to store/lookup the policies. This is to improve the policy lookup performance after the flowcache removal. Patches from Florian Westphal. 3) Various coding style fixes, from Colin Ian King. 4) Fix policy lookup logic after adding the inexact policy search tree infrastructure. From Florian Westphal. 5) Remove a useless remove BUG_ON from xfrm6_dst_ifdown. From Li RongQing. 6) Use the correct policy direction for lookups on hash rebuilding. From Florian Westphal. Please pull or let me know if there are problems. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv6/xfrm6_policy.c1
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/xfrm/xfrm_policy.c1248
3 files changed, 1118 insertions, 133 deletions
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d35bcf92969c..769f8f78d3b8 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -262,7 +262,6 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
if (xdst->u.rt6.rt6i_idev->dev == dev) {
struct inet6_dev *loopback_idev =
in6_dev_get(dev_net(dev)->loopback_dev);
- BUG_ON(!loopback_idev);
do {
in6_dev_put(xdst->u.rt6.rt6i_idev);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9d61266526e7..655c787f9d54 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2020,7 +2020,7 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
static inline int pfkey_xfrm_policy2sec_ctx_size(const struct xfrm_policy *xp)
{
- struct xfrm_sec_ctx *xfrm_ctx = xp->security;
+ struct xfrm_sec_ctx *xfrm_ctx = xp->security;
if (xfrm_ctx) {
int len = sizeof(struct sadb_x_sec_ctx);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 119a427d9b2b..be04091eb7db 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -26,6 +26,7 @@
#include <linux/cache.h>
#include <linux/cpu.h>
#include <linux/audit.h>
+#include <linux/rhashtable.h>
#include <net/dst.h>
#include <net/flow.h>
#include <net/xfrm.h>
@@ -45,6 +46,99 @@ struct xfrm_flo {
u8 flags;
};
+/* prefixes smaller than this are stored in lists, not trees. */
+#define INEXACT_PREFIXLEN_IPV4 16
+#define INEXACT_PREFIXLEN_IPV6 48
+
+struct xfrm_pol_inexact_node {
+ struct rb_node node;
+ union {
+ xfrm_address_t addr;
+ struct rcu_head rcu;
+ };
+ u8 prefixlen;
+
+ struct rb_root root;
+
+ /* the policies matching this node, can be empty list */
+ struct hlist_head hhead;
+};
+
+/* xfrm inexact policy search tree:
+ * xfrm_pol_inexact_bin = hash(dir,type,family,if_id);
+ * |
+ * +---- root_d: sorted by daddr:prefix
+ * | |
+ * | xfrm_pol_inexact_node
+ * | |
+ * | +- root: sorted by saddr/prefix
+ * | | |
+ * | | xfrm_pol_inexact_node
+ * | | |
+ * | | + root: unused
+ * | | |
+ * | | + hhead: saddr:daddr policies
+ * | |
+ * | +- coarse policies and all any:daddr policies
+ * |
+ * +---- root_s: sorted by saddr:prefix
+ * | |
+ * | xfrm_pol_inexact_node
+ * | |
+ * | + root: unused
+ * | |
+ * | + hhead: saddr:any policies
+ * |
+ * +---- coarse policies and all any:any policies
+ *
+ * Lookups return four candidate lists:
+ * 1. any:any list from top-level xfrm_pol_inexact_bin
+ * 2. any:daddr list from daddr tree
+ * 3. saddr:daddr list from 2nd level daddr tree
+ * 4. saddr:any list from saddr tree
+ *
+ * This result set then needs to be searched for the policy with
+ * the lowest priority. If two results have same prio, youngest one wins.
+ */
+
+struct xfrm_pol_inexact_key {
+ possible_net_t net;
+ u32 if_id;
+ u16 family;
+ u8 dir, type;
+};
+
+struct xfrm_pol_inexact_bin {
+ struct xfrm_pol_inexact_key k;
+ struct rhash_head head;
+ /* list containing '*:*' policies */
+ struct hlist_head hhead;
+
+ seqcount_t count;
+ /* tree sorted by daddr/prefix */
+ struct rb_root root_d;
+
+ /* tree sorted by saddr/prefix */
+ struct rb_root root_s;
+
+ /* slow path below */
+ struct list_head inexact_bins;
+ struct rcu_head rcu;
+};
+
+enum xfrm_pol_inexact_candidate_type {
+ XFRM_POL_CAND_BOTH,
+ XFRM_POL_CAND_SADDR,
+ XFRM_POL_CAND_DADDR,
+ XFRM_POL_CAND_ANY,
+
+ XFRM_POL_CAND_MAX,
+};
+
+struct xfrm_pol_inexact_candidates {
+ struct hlist_head *res[XFRM_POL_CAND_MAX];
+};
+
static DEFINE_SPINLOCK(xfrm_if_cb_lock);
static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
@@ -55,6 +149,9 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
static struct kmem_cache *xfrm_dst_cache __ro_after_init;
static __read_mostly seqcount_t xfrm_policy_hash_generation;
+static struct rhashtable xfrm_policy_inexact_table;
+static const struct rhashtable_params xfrm_pol_inexact_params;
+
static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr);
static int stale_bundle(struct dst_entry *dst);
static int xfrm_bundle_ok(struct xfrm_dst *xdst);
@@ -64,6 +161,25 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
int dir);
+static struct xfrm_pol_inexact_bin *
+xfrm_policy_inexact_lookup(struct net *net, u8 type, u16 family, u8 dir,
+ u32 if_id);
+
+static struct xfrm_pol_inexact_bin *
+xfrm_policy_inexact_lookup_rcu(struct net *net,
+ u8 type, u16 family, u8 dir, u32 if_id);
+static struct xfrm_policy *
+xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy,
+ bool excl);
+static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
+ struct xfrm_policy *policy);
+
+static bool
+xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
+ struct xfrm_pol_inexact_bin *b,
+ const xfrm_address_t *saddr,
+ const xfrm_address_t *daddr);
+
static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy)
{
return refcount_inc_not_zero(&policy->refcnt);
@@ -269,6 +385,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
if (policy) {
write_pnet(&policy->xp_net, net);
INIT_LIST_HEAD(&policy->walk.all);
+ INIT_HLIST_NODE(&policy->bydst_inexact_list);
INIT_HLIST_NODE(&policy->bydst);
INIT_HLIST_NODE(&policy->byidx);
rwlock_init(&policy->lock);
@@ -365,7 +482,7 @@ static struct hlist_head *policy_hash_bysel(struct net *net,
hash = __sel_hash(sel, family, hmask, dbits, sbits);
if (hash == hmask + 1)
- return &net->xfrm.policy_inexact[dir];
+ return NULL;
return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
@@ -563,6 +680,533 @@ static void xfrm_hash_resize(struct work_struct *work)
mutex_unlock(&hash_resize_mutex);
}
+static void xfrm_hash_reset_inexact_table(struct net *net)
+{
+ struct xfrm_pol_inexact_bin *b;
+
+ lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
+
+ list_for_each_entry(b, &net->xfrm.inexact_bins, inexact_bins)
+ INIT_HLIST_HEAD(&b->hhead);
+}
+
+/* Make sure *pol can be inserted into fastbin.
+ * Useful to check that later insert requests will be sucessful
+ * (provided xfrm_policy_lock is held throughout).
+ */
+static struct xfrm_pol_inexact_bin *
+xfrm_policy_inexact_alloc_bin(const struct xfrm_policy *pol, u8 dir)
+{
+ struct xfrm_pol_inexact_bin *bin, *prev;
+ struct xfrm_pol_inexact_key k = {
+ .family = pol->family,
+ .type = pol->type,
+ .dir = dir,
+ .if_id = pol->if_id,
+ };
+ struct net *net = xp_net(pol);
+
+ lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
+
+ write_pnet(&k.net, net);
+ bin = rhashtable_lookup_fast(&xfrm_policy_inexact_table, &k,
+ xfrm_pol_inexact_params);
+ if (bin)
+ return bin;
+
+ bin = kzalloc(sizeof(*bin), GFP_ATOMIC);
+ if (!bin)
+ return NULL;
+
+ bin->k = k;
+ INIT_HLIST_HEAD(&bin->hhead);
+ bin->root_d = RB_ROOT;
+ bin->root_s = RB_ROOT;
+ seqcount_init(&bin->count);
+
+ prev = rhashtable_lookup_get_insert_key(&xfrm_policy_inexact_table,
+ &bin->k, &bin->head,
+ xfrm_pol_inexact_params);
+ if (!prev) {
+ list_add(&bin->inexact_bins, &net->xfrm.inexact_bins);
+ return bin;
+ }
+
+ kfree(bin);
+
+ return IS_ERR(prev) ? NULL : prev;
+}
+
+static bool xfrm_pol_inexact_addr_use_any_list(const xfrm_address_t *addr,
+ int family, u8 prefixlen)
+{
+ if (xfrm_addr_any(addr, family))
+ return true;
+
+ if (family == AF_INET6 && prefixlen < INEXACT_PREFIXLEN_IPV6)
+ return true;
+
+ if (family == AF_INET && prefixlen < INEXACT_PREFIXLEN_IPV4)
+ return true;
+
+ return false;
+}
+
+static bool
+xfrm_policy_inexact_insert_use_any_list(const struct xfrm_policy *policy)
+{
+ const xfrm_address_t *addr;
+ bool saddr_any, daddr_any;
+ u8 prefixlen;
+
+ addr = &policy->selector.saddr;
+ prefixlen = policy->selector.prefixlen_s;
+
+ saddr_any = xfrm_pol_inexact_addr_use_any_list(addr,
+ policy->family,
+ prefixlen);
+ addr = &policy->selector.daddr;
+ prefixlen = policy->selector.prefixlen_d;
+ daddr_any = xfrm_pol_inexact_addr_use_any_list(addr,
+ policy->family,
+ prefixlen);
+ return saddr_any && daddr_any;
+}
+
+static void xfrm_pol_inexact_node_init(struct xfrm_pol_inexact_node *node,
+ const xfrm_address_t *addr, u8 prefixlen)
+{
+ node->addr = *addr;
+ node->prefixlen = prefixlen;
+}
+
+static struct xfrm_pol_inexact_node *
+xfrm_pol_inexact_node_alloc(const xfrm_address_t *addr, u8 prefixlen)
+{
+ struct xfrm_pol_inexact_node *node;
+
+ node = kzalloc(sizeof(*node), GFP_ATOMIC);
+ if (node)
+ xfrm_pol_inexact_node_init(node, addr, prefixlen);
+
+ return node;
+}
+
+static int xfrm_policy_addr_delta(const xfrm_address_t *a,
+ const xfrm_address_t *b,
+ u8 prefixlen, u16 family)
+{
+ unsigned int pdw, pbi;
+ int delta = 0;
+
+ switch (family) {
+ case AF_INET:
+ if (sizeof(long) == 4 && prefixlen == 0)
+ return ntohl(a->a4) - ntohl(b->a4);
+ return (ntohl(a->a4) & ((~0UL << (32 - prefixlen)))) -
+ (ntohl(b->a4) & ((~0UL << (32 - prefixlen))));
+ case AF_INET6:
+ pdw = prefixlen >> 5;
+ pbi = prefixlen & 0x1f;
+
+ if (pdw) {
+ delta = memcmp(a->a6, b->a6, pdw << 2);
+ if (delta)
+ return delta;
+ }
+ if (pbi) {
+ u32 mask = ~0u << (32 - pbi);
+
+ delta = (ntohl(a->a6[pdw]) & mask) -
+ (ntohl(b->a6[pdw]) & mask);
+ }
+ break;
+ default:
+ break;
+ }
+
+ return delta;
+}
+
+static void xfrm_policy_inexact_list_reinsert(struct net *net,
+ struct xfrm_pol_inexact_node *n,
+ u16 family)
+{
+ unsigned int matched_s, matched_d;
+ struct hlist_node *newpos = NULL;
+ struct xfrm_policy *policy, *p;
+
+ matched_s = 0;
+ matched_d = 0;
+
+ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
+ bool matches_s, matches_d;
+
+ if (!policy->bydst_reinsert)
+ continue;
+
+ WARN_ON_ONCE(policy->family != family);
+
+ policy->bydst_reinsert = false;
+ hlist_for_each_entry(p, &n->hhead, bydst) {
+ if (policy->priority >= p->priority)
+ newpos = &p->bydst;
+ else
+ break;
+ }
+
+ if (newpos)
+ hlist_add_behind(&policy->bydst, newpos);
+ else
+ hlist_add_head(&policy->bydst, &n->hhead);
+
+ /* paranoia checks follow.
+ * Check that the reinserted policy matches at least
+ * saddr or daddr for current node prefix.
+ *
+ * Matching both is fine, matching saddr in one policy
+ * (but not daddr) and then matching only daddr in another
+ * is a bug.
+ */
+ matches_s = xfrm_policy_addr_delta(&policy->selector.saddr,
+ &n->addr,
+ n->prefixlen,
+ family) == 0;
+ matches_d = xfrm_policy_addr_delta(&policy->selector.daddr,
+ &n->addr,
+ n->prefixlen,
+ family) == 0;
+ if (matches_s && matches_d)
+ continue;
+
+ WARN_ON_ONCE(!matches_s && !matches_d);
+ if (matches_s)
+ matched_s++;
+ if (matches_d)
+ matched_d++;
+ WARN_ON_ONCE(matched_s && matched_d);
+ }
+}
+
+static void xfrm_policy_inexact_node_reinsert(struct net *net,
+ struct xfrm_pol_inexact_node *n,
+ struct rb_root *new,
+ u16 family)
+{
+ struct rb_node **p, *parent = NULL;
+ struct xfrm_pol_inexact_node *node;
+
+ /* we should not have another subtree here */
+ WARN_ON_ONCE(!RB_EMPTY_ROOT(&n->root));
+
+ p = &new->rb_node;
+ while (*p) {
+ u8 prefixlen;
+ int delta;
+
+ parent = *p;
+ node = rb_entry(*p, struct xfrm_pol_inexact_node, node);
+
+ prefixlen = min(node->prefixlen, n->prefixlen);
+
+ delta = xfrm_policy_addr_delta(&n->addr, &node->addr,
+ prefixlen, family);
+ if (delta < 0) {
+ p = &parent->rb_left;
+ } else if (delta > 0) {
+ p = &parent->rb_right;
+ } else {
+ struct xfrm_policy *tmp;
+
+ hlist_for_each_entry(tmp, &node->hhead, bydst)
+ tmp->bydst_reinsert = true;
+ hlist_for_each_entry(tmp, &n->hhead, bydst)
+ tmp->bydst_reinsert = true;
+
+ INIT_HLIST_HEAD(&node->hhead);
+ xfrm_policy_inexact_list_reinsert(net, node, family);
+
+ if (node->prefixlen == n->prefixlen) {
+ kfree_rcu(n, rcu);
+ return;
+ }
+
+ rb_erase(*p, new);
+ kfree_rcu(n, rcu);
+ n = node;
+ n->prefixlen = prefixlen;
+ *p = new->rb_node;
+ parent = NULL;
+ }
+ }
+
+ rb_link_node_rcu(&n->node, parent, p);
+ rb_insert_color(&n->node, new);
+}
+
+/* merge nodes v and n */
+static void xfrm_policy_inexact_node_merge(struct net *net,
+ struct xfrm_pol_inexact_node *v,
+ struct xfrm_pol_inexact_node *n,
+ u16 family)
+{
+ struct xfrm_pol_inexact_node *node;
+ struct xfrm_policy *tmp;
+ struct rb_node *rnode;
+
+ /* To-be-merged node v has a subtree.
+ *
+ * Dismantle it and insert its nodes to n->root.
+ */
+ while ((rnode = rb_first(&v->root)) != NULL) {
+ node = rb_entry(rnode, struct xfrm_pol_inexact_node, node);
+ rb_erase(&node->node, &v->root);
+ xfrm_policy_inexact_node_reinsert(net, node, &n->root,
+ family);
+ }
+
+ hlist_for_each_entry(tmp, &v->hhead, bydst)
+ tmp->bydst_reinsert = true;
+ hlist_for_each_entry(tmp, &n->hhead, bydst)
+ tmp->bydst_reinsert = true;
+
+ INIT_HLIST_HEAD(&n->hhead);
+ xfrm_policy_inexact_list_reinsert(net, n, family);
+}
+
+static struct xfrm_pol_inexact_node *
+xfrm_policy_inexact_insert_node(struct net *net,
+ struct rb_root *root,
+ xfrm_address_t *addr,
+ u16 family, u8 prefixlen, u8 dir)
+{
+ struct xfrm_pol_inexact_node *cached = NULL;
+ struct rb_node **p, *parent = NULL;
+ struct xfrm_pol_inexact_node *node;
+
+ p = &root->rb_node;
+ while (*p) {
+ int delta;
+
+ parent = *p;
+ node = rb_entry(*p, struct xfrm_pol_inexact_node, node);
+
+ delta = xfrm_policy_addr_delta(addr, &node->addr,
+ node->prefixlen,
+ family);
+ if (delta == 0 && prefixlen >= node->prefixlen) {
+ WARN_ON_ONCE(cached); /* ipsec policies got lost */
+ return node;
+ }
+
+ if (delta < 0)
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+
+ if (prefixlen < node->prefixlen) {
+ delta = xfrm_policy_addr_delta(addr, &node->addr,
+ prefixlen,
+ family);
+ if (delta)
+ continue;
+
+ /* This node is a subnet of the new prefix. It needs
+ * to be removed and re-inserted with the smaller
+ * prefix and all nodes that are now also covered
+ * by the reduced prefixlen.
+ */
+ rb_erase(&node->node, root);
+
+ if (!cached) {
+ xfrm_pol_inexact_node_init(node, addr,
+ prefixlen);
+ cached = node;
+ } else {
+ /* This node also falls within the new
+ * prefixlen. Merge the to-be-reinserted
+ * node and this one.
+ */
+ xfrm_policy_inexact_node_merge(net, node,
+ cached, family);
+ kfree_rcu(node, rcu);
+ }
+
+ /* restart */
+ p = &root->rb_node;
+ parent = NULL;
+ }
+ }
+
+ node = cached;
+ if (!node) {
+ node = xfrm_pol_inexact_node_alloc(addr, prefixlen);
+ if (!node)
+ return NULL;
+ }
+
+ rb_link_node_rcu(&node->node, parent, p);
+ rb_insert_color(&node->node, root);
+
+ return node;
+}
+
+static void xfrm_policy_inexact_gc_tree(struct rb_root *r, bool rm)
+{
+ struct xfrm_pol_inexact_node *node;
+ struct rb_node *rn = rb_first(r);
+
+ while (rn) {
+ node = rb_entry(rn, struct xfrm_pol_inexact_node, node);
+
+ xfrm_policy_inexact_gc_tree(&node->root, rm);
+ rn = rb_next(rn);
+
+ if (!hlist_empty(&node->hhead) || !RB_EMPTY_ROOT(&node->root)) {
+ WARN_ON_ONCE(rm);
+ continue;
+ }
+
+ rb_erase(&node->node, r);
+ kfree_rcu(node, rcu);
+ }
+}
+
+static void __xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b, bool net_exit)
+{
+ write_seqcount_begin(&b->count);
+ xfrm_policy_inexact_gc_tree(&b->root_d, net_exit);
+ xfrm_policy_inexact_gc_tree(&b->root_s, net_exit);
+ write_seqcount_end(&b->count);
+
+ if (!RB_EMPTY_ROOT(&b->root_d) || !RB_EMPTY_ROOT(&b->root_s) ||
+ !hlist_empty(&b->hhead)) {
+ WARN_ON_ONCE(net_exit);
+ return;
+ }
+
+ if (rhashtable_remove_fast(&xfrm_policy_inexact_table, &b->head,
+ xfrm_pol_inexact_params) == 0) {
+ list_del(&b->inexact_bins);
+ kfree_rcu(b, rcu);
+ }
+}
+
+static void xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b)
+{
+ struct net *net = read_pnet(&b->k.net);
+
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+ __xfrm_policy_inexact_prune_bin(b, false);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+}
+
+static void __xfrm_policy_inexact_flush(struct net *net)
+{
+ struct xfrm_pol_inexact_bin *bin, *t;
+
+ lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
+
+ list_for_each_entry_safe(bin, t, &net->xfrm.inexact_bins, inexact_bins)
+ __xfrm_policy_inexact_prune_bin(bin, false);
+}
+
+static struct hlist_head *
+xfrm_policy_inexact_alloc_chain(struct xfrm_pol_inexact_bin *bin,
+ struct xfrm_policy *policy, u8 dir)
+{
+ struct xfrm_pol_inexact_node *n;
+ struct net *net;
+
+ net = xp_net(policy);
+ lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
+
+ if (xfrm_policy_inexact_insert_use_any_list(policy))
+ return &bin->hhead;
+
+ if (xfrm_pol_inexact_addr_use_any_list(&policy->selector.daddr,
+ policy->family,
+ policy->selector.prefixlen_d)) {
+ write_seqcount_begin(&bin->count);
+ n = xfrm_policy_inexact_insert_node(net,
+ &bin->root_s,
+ &policy->selector.saddr,
+ policy->family,
+ policy->selector.prefixlen_s,
+ dir);
+ write_seqcount_end(&bin->count);
+ if (!n)
+ return NULL;
+
+ return &n->hhead;
+ }
+
+ /* daddr is fixed */
+ write_seqcount_begin(&bin->count);
+ n = xfrm_policy_inexact_insert_node(net,
+ &bin->root_d,
+ &policy->selector.daddr,
+ policy->family,
+ policy->selector.prefixlen_d, dir);
+ write_seqcount_end(&bin->count);
+ if (!n)
+ return NULL;
+
+ /* saddr is wildcard */
+ if (xfrm_pol_inexact_addr_use_any_list(&policy->selector.saddr,
+ policy->family,
+ policy->selector.prefixlen_s))
+ return &n->hhead;
+
+ write_seqcount_begin(&bin->count);
+ n = xfrm_policy_inexact_insert_node(net,
+ &n->root,
+ &policy->selector.saddr,
+ policy->family,
+ policy->selector.prefixlen_s, dir);
+ write_seqcount_end(&bin->count);
+ if (!n)
+ return NULL;
+
+ return &n->hhead;
+}
+
+static struct xfrm_policy *
+xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl)
+{
+ struct xfrm_pol_inexact_bin *bin;
+ struct xfrm_policy *delpol;
+ struct hlist_head *chain;
+ struct net *net;
+
+ bin = xfrm_policy_inexact_alloc_bin(policy, dir);
+ if (!bin)
+ return ERR_PTR(-ENOMEM);
+
+ net = xp_net(policy);
+ lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
+
+ chain = xfrm_policy_inexact_alloc_chain(bin, policy, dir);
+ if (!chain) {
+ __xfrm_policy_inexact_prune_bin(bin, false);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ delpol = xfrm_policy_insert_list(chain, policy, excl);
+ if (delpol && excl) {
+ __xfrm_policy_inexact_prune_bin(bin, false);
+ return ERR_PTR(-EEXIST);
+ }
+
+ chain = &net->xfrm.policy_inexact[dir];
+ xfrm_policy_insert_inexact_list(chain, policy);
+
+ if (delpol)
+ __xfrm_policy_inexact_prune_bin(bin, false);
+
+ return delpol;
+}
+
static void xfrm_hash_rebuild(struct work_struct *work)
{
struct net *net = container_of(work, struct net,
@@ -592,7 +1236,50 @@ static void xfrm_hash_rebuild(struct work_struct *work)
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+ /* make sure that we can insert the indirect policies again before
+ * we start with destructive action.
+ */
+ list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) {
+ struct xfrm_pol_inexact_bin *bin;
+ u8 dbits, sbits;
+
+ dir = xfrm_policy_id2dir(policy->index);
+ if (policy->walk.dead || dir >= XFRM_POLICY_MAX)
+ continue;
+
+ if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
+ if (policy->family == AF_INET) {
+ dbits = rbits4;
+ sbits = lbits4;
+ } else {
+ dbits = rbits6;
+ sbits = lbits6;
+ }
+ } else {
+ if (policy->family == AF_INET) {
+ dbits = lbits4;
+ sbits = rbits4;
+ } else {
+ dbits = lbits6;
+ sbits = rbits6;
+ }
+ }
+
+ if (policy->selector.prefixlen_d < dbits ||
+ policy->selector.prefixlen_s < sbits)
+ continue;
+
+ bin = xfrm_policy_inexact_alloc_bin(policy, dir);
+ if (!bin)
+ goto out_unlock;
+
+ if (!xfrm_policy_inexact_alloc_chain(bin, policy, dir))
+ goto out_unlock;
+ }
+
/* reset the bydst and inexact table in all directions */
+ xfrm_hash_reset_inexact_table(net);
+
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
hmask = net->xfrm.policy_bydst[dir].hmask;
@@ -616,15 +1303,23 @@ static void xfrm_hash_rebuild(struct work_struct *work)
/* re-insert all policies by order of creation */
list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
- if (policy->walk.dead ||
- xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) {
+ if (policy->walk.dead)
+ continue;
+ dir = xfrm_policy_id2dir(policy->index);
+ if (dir >= XFRM_POLICY_MAX) {
/* skip socket policies */
continue;
}
newpos = NULL;
chain = policy_hash_bysel(net, &policy->selector,
- policy->family,
- xfrm_policy_id2dir(policy->index));
+ policy->family, dir);
+ if (!chain) {
+ void *p = xfrm_policy_inexact_insert(policy, dir, 0);
+
+ WARN_ONCE(IS_ERR(p), "reinsert: %ld\n", PTR_ERR(p));
+ continue;
+ }
+
hlist_for_each_entry(pol, chain, bydst) {
if (policy->priority >= pol->priority)
newpos = &pol->bydst;
@@ -637,6 +1332,8 @@ static void xfrm_hash_rebuild(struct work_struct *work)
hlist_add_head_rcu(&policy->bydst, chain);
}
+out_unlock:
+ __xfrm_policy_inexact_flush(net);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
mutex_unlock(&hash_resize_mutex);
@@ -740,18 +1437,97 @@ static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
return false;
}
-int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+static u32 xfrm_pol_bin_key(const void *data, u32 len, u32 seed)
{
- struct net *net = xp_net(policy);
- struct xfrm_policy *pol;
- struct xfrm_policy *delpol;
- struct hlist_head *chain;
- struct hlist_node *newpos;
+ const struct xfrm_pol_inexact_key *k = data;
+ u32 a = k->type << 24 | k->dir << 16 | k->family;
+
+ return jhash_3words(a, k->if_id, net_hash_mix(read_pnet(&k->net)),
+ seed);
+}
+
+static u32 xfrm_pol_bin_obj(const void *data, u32 len, u32 seed)
+{
+ const struct xfrm_pol_inexact_bin *b = data;
+
+ return xfrm_pol_bin_key(&b->k, 0, seed);
+}
+
+static int xfrm_pol_bin_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct xfrm_pol_inexact_key *key = arg->key;
+ const struct xfrm_pol_inexact_bin *b = ptr;
+ int ret;
+
+ if (!net_eq(read_pnet(&b->k.net), read_pnet(&key->net)))
+ return -1;
+
+ ret = b->k.dir ^ key->dir;
+ if (ret)
+ return ret;
+
+ ret = b->k.type ^ key->type;
+ if (ret)
+ return ret;
+
+ ret = b->k.family ^ key->family;
+ if (ret)
+ return ret;
+
+ return b->k.if_id ^ key->if_id;
+}
+
+static const struct rhashtable_params xfrm_pol_inexact_params = {
+ .head_offset = offsetof(struct xfrm_pol_inexact_bin, head),
+ .hashfn = xfrm_pol_bin_key,
+ .obj_hashfn = xfrm_pol_bin_obj,
+ .obj_cmpfn = xfrm_pol_bin_cmp,
+ .automatic_shrinking = true,
+};
+
+static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
+ struct xfrm_policy *policy)
+{
+ struct xfrm_policy *pol, *delpol = NULL;
+ struct hlist_node *newpos = NULL;
+ int i = 0;
+
+ hlist_for_each_entry(pol, chain, bydst_inexact_list) {
+ if (pol->type == policy->type &&
+ pol->if_id == policy->if_id &&
+ !selector_cmp(&pol->selector, &policy->selector) &&
+ xfrm_policy_mark_match(policy, pol) &&
+ xfrm_sec_ctx_match(pol->security, policy->security) &&
+ !WARN_ON(delpol)) {
+ delpol = pol;
+ if (policy->priority > pol->priority)
+ continue;
+ } else if (policy->priority >= pol->priority) {
+ newpos = &pol->bydst_inexact_list;
+ continue;
+ }
+ if (delpol)
+ break;
+ }
+
+ if (newpos)
+ hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos);
+ else
+ hlist_add_head_rcu(&policy->bydst_inexact_list, chain);
+
+ hlist_for_each_entry(pol, chain, bydst_inexact_list) {
+ pol->pos = i;
+ i++;
+ }
+}
+
+static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
+ struct xfrm_policy *policy,
+ bool excl)
+{
+ struct xfrm_policy *pol, *newpos = NULL, *delpol = NULL;
- spin_lock_bh(&net->xfrm.xfrm_policy_lock);
- chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
- delpol = NULL;
- newpos = NULL;
hlist_for_each_entry(pol, chain, bydst) {
if (pol->type == policy->type &&
pol->if_id == policy->if_id &&
@@ -759,24 +1535,45 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
xfrm_policy_mark_match(policy, pol) &&
xfrm_sec_ctx_match(pol->security, policy->security) &&
!WARN_ON(delpol)) {
- if (excl) {
- spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
- return -EEXIST;
- }
+ if (excl)
+ return ERR_PTR(-EEXIST);
delpol = pol;
if (policy->priority > pol->priority)
continue;
} else if (policy->priority >= pol->priority) {
- newpos = &pol->bydst;
+ newpos = pol;
continue;
}
if (delpol)
break;
}
+
if (newpos)
- hlist_add_behind_rcu(&policy->bydst, newpos);
+ hlist_add_behind_rcu(&policy->bydst, &newpos->bydst);
else
hlist_add_head_rcu(&policy->bydst, chain);
+
+ return delpol;
+}
+
+int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+{
+ struct net *net = xp_net(policy);
+ struct xfrm_policy *delpol;
+ struct hlist_head *chain;
+
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+ chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
+ if (chain)
+ delpol = xfrm_policy_insert_list(chain, policy, excl);
+ else
+ delpol = xfrm_policy_inexact_insert(policy, dir, excl);
+
+ if (IS_ERR(delpol)) {
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ return PTR_ERR(delpol);
+ }
+
__xfrm_policy_link(policy, dir);
/* After previous checking, family can either be AF_INET or AF_INET6 */
@@ -806,43 +1603,96 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
}
EXPORT_SYMBOL(xfrm_policy_insert);
+static struct xfrm_policy *
+__xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id,
+ u8 type, int dir,
+ struct xfrm_selector *sel,
+ struct xfrm_sec_ctx *ctx)
+{
+ struct xfrm_policy *pol;
+
+ if (!chain)
+ return NULL;
+
+ hlist_for_each_entry(pol, chain, bydst) {
+ if (pol->type == type &&
+ pol->if_id == if_id &&
+ (mark & pol->mark.m) == pol->mark.v &&
+ !selector_cmp(sel, &pol->selector) &&
+ xfrm_sec_ctx_match(ctx, pol->security))
+ return pol;
+ }
+
+ return NULL;
+}
+
struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
u8 type, int dir,
struct xfrm_selector *sel,
struct xfrm_sec_ctx *ctx, int delete,
int *err)
{
- struct xfrm_policy *pol, *ret;
+ struct xfrm_pol_inexact_bin *bin = NULL;
+ struct xfrm_policy *pol, *ret = NULL;
struct hlist_head *chain;
*err = 0;
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_bysel(net, sel, sel->family, dir);
- ret = NULL;
- hlist_for_each_entry(pol, chain, bydst) {
- if (pol->type == type &&
- pol->if_id == if_id &&
- (mark & pol->mark.m) == pol->mark.v &&
- !selector_cmp(sel, &pol->selector) &&
- xfrm_sec_ctx_match(ctx, pol->security)) {
- xfrm_pol_hold(pol);
- if (delete) {
- *err = security_xfrm_policy_delete(
- pol->security);
- if (*err) {
- spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
- return pol;
- }
- __xfrm_policy_unlink(pol, dir);
+ if (!chain) {
+ struct xfrm_pol_inexact_candidates cand;
+ int i;
+
+ bin = xfrm_policy_inexact_lookup(net, type,
+ sel->family, dir, if_id);
+ if (!bin) {
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ return NULL;
+ }
+
+ if (!xfrm_policy_find_inexact_candidates(&cand, bin,
+ &sel->saddr,
+ &sel->daddr)) {
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ return NULL;
+ }
+
+ pol = NULL;
+ for (i = 0; i < ARRAY_SIZE(cand.res); i++) {
+ struct xfrm_policy *tmp;
+
+ tmp = __xfrm_policy_bysel_ctx(cand.res[i], mark,
+ if_id, type, dir,
+ sel, ctx);
+ if (!tmp)
+ continue;
+
+ if (!pol || tmp->pos < pol->pos)
+ pol = tmp;
+ }
+ } else {
+ pol = __xfrm_policy_bysel_ctx(chain, mark, if_id, type, dir,
+ sel, ctx);
+ }
+
+ if (pol) {
+ xfrm_pol_hold(pol);
+ if (delete) {
+ *err = security_xfrm_policy_delete(pol->security);
+ if (*err) {
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ return pol;
}
- ret = pol;
- break;
+ __xfrm_policy_unlink(pol, dir);
}
+ ret = pol;
}
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (ret && delete)
xfrm_policy_kill(ret);
+ if (bin && delete)
+ xfrm_policy_inexact_prune_bin(bin);
return ret;
}
EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
@@ -892,36 +1742,19 @@ EXPORT_SYMBOL(xfrm_policy_byid);
static inline int
xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
{
- int dir, err = 0;
+ struct xfrm_policy *pol;
+ int err = 0;
- for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
- struct xfrm_policy *pol;
- int i;
+ list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ if (pol->walk.dead ||
+ xfrm_policy_id2dir(pol->index) >= XFRM_POLICY_MAX ||
+ pol->type != type)
+ continue;
- hlist_for_each_entry(pol,
- &net->xfrm.policy_inexact[dir], bydst) {
- if (pol->type != type)
- continue;
- err = security_xfrm_policy_delete(pol->security);
- if (err) {
- xfrm_audit_policy_delete(pol, 0, task_valid);
- return err;
- }
- }
- for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
- hlist_for_each_entry(pol,
- net->xfrm.policy_bydst[dir].table + i,
- bydst) {
- if (pol->type != type)
- continue;
- err = security_xfrm_policy_delete(
- pol->security);
- if (err) {
- xfrm_audit_policy_delete(pol, 0,
- task_valid);
- return err;
- }
- }
+ err = security_xfrm_policy_delete(pol->security);
+ if (err) {
+ xfrm_audit_policy_delete(pol, 0, task_valid);
+ return err;
}
}
return err;
@@ -937,6 +1770,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
{
int dir, err = 0, cnt = 0;
+ struct xfrm_policy *pol;
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
@@ -944,48 +1778,25 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
if (err)
goto out;
- for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
- struct xfrm_policy *pol;
- int i;
-
- again1:
- hlist_for_each_entry(pol,
- &net->xfrm.policy_inexact[dir], bydst) {
- if (pol->type != type)
- continue;
- __xfrm_policy_unlink(pol, dir);
- spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
- cnt++;
-
- xfrm_audit_policy_delete(pol, 1, task_valid);
-
- xfrm_policy_kill(pol);
-
- spin_lock_bh(&net->xfrm.xfrm_policy_lock);
- goto again1;
- }
-
- for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
- again2:
- hlist_for_each_entry(pol,
- net->xfrm.policy_bydst[dir].table + i,
- bydst) {
- if (pol->type != type)
- continue;
- __xfrm_policy_unlink(pol, dir);
- spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
- cnt++;
-
- xfrm_audit_policy_delete(pol, 1, task_valid);
- xfrm_policy_kill(pol);
-
- spin_lock_bh(&net->xfrm.xfrm_policy_lock);
- goto again2;
- }
- }
+again:
+ list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ dir = xfrm_policy_id2dir(pol->index);
+ if (pol->walk.dead ||
+ dir >= XFRM_POLICY_MAX ||
+ pol->type != type)
+ continue;
+ __xfrm_policy_unlink(pol, dir);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ cnt++;
+ xfrm_audit_policy_delete(pol, 1, task_valid);
+ xfrm_policy_kill(pol);
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+ goto again;
}
- if (!cnt)
+ if (cnt)
+ __xfrm_policy_inexact_flush(net);
+ else
err = -ESRCH;
out:
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
@@ -1084,21 +1895,189 @@ static int xfrm_policy_match(const struct xfrm_policy *pol,
if (match)
ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
dir);
-
return ret;
}
+static struct xfrm_pol_inexact_node *
+xfrm_policy_lookup_inexact_addr(const struct rb_root *r,
+ seqcount_t *count,
+ const xfrm_address_t *addr, u16 family)
+{
+ const struct rb_node *parent;
+ int seq;
+
+again:
+ seq = read_seqcount_begin(count);
+
+ parent = rcu_dereference_raw(r->rb_node);
+ while (parent) {
+ struct xfrm_pol_inexact_node *node;
+ int delta;
+
+ node = rb_entry(parent, struct xfrm_pol_inexact_node, node);
+
+ delta = xfrm_policy_addr_delta(addr, &node->addr,
+ node->prefixlen, family);
+ if (delta < 0) {
+ parent = rcu_dereference_raw(parent->rb_left);
+ continue;
+ } else if (delta > 0) {
+ parent = rcu_dereference_raw(parent->rb_right);
+ continue;
+ }
+
+ return node;
+ }
+
+ if (read_seqcount_retry(count, seq))
+ goto again;
+
+ return NULL;
+}
+
+static bool
+xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
+ struct xfrm_pol_inexact_bin *b,
+ const xfrm_address_t *saddr,
+ const xfrm_address_t *daddr)
+{
+ struct xfrm_pol_inexact_node *n;
+ u16 family;
+
+ if (!b)
+ return false;
+
+ family = b->k.family;
+ memset(cand, 0, sizeof(*cand));
+ cand->res[XFRM_POL_CAND_ANY] = &b->hhead;
+
+ n = xfrm_policy_lookup_inexact_addr(&b->root_d, &b->count, daddr,
+ family);
+ if (n) {
+ cand->res[XFRM_POL_CAND_DADDR] = &n->hhead;
+ n = xfrm_policy_lookup_inexact_addr(&n->root, &b->count, saddr,
+ family);
+ if (n)
+ cand->res[XFRM_POL_CAND_BOTH] = &n->hhead;
+ }
+
+ n = xfrm_policy_lookup_inexact_addr(&b->root_s, &b->count, saddr,
+ family);
+ if (n)
+ cand->res[XFRM_POL_CAND_SADDR] = &n->hhead;
+
+ return true;
+}
+
+static struct xfrm_pol_inexact_bin *
+xfrm_policy_inexact_lookup_rcu(struct net *net, u8 type, u16 family,
+ u8 dir, u32 if_id)
+{
+ struct xfrm_pol_inexact_key k = {
+ .family = family,
+ .type = type,
+ .dir = dir,
+ .if_id = if_id,
+ };
+
+ write_pnet(&k.net, net);
+
+ return rhashtable_lookup(&xfrm_policy_inexact_table, &k,
+ xfrm_pol_inexact_params);
+}
+
+static struct xfrm_pol_inexact_bin *
+xfrm_policy_inexact_lookup(struct net *net, u8 type, u16 family,
+ u8 dir, u32 if_id)
+{
+ struct xfrm_pol_inexact_bin *bin;
+
+ lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
+
+ rcu_read_lock();
+ bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id);
+ rcu_read_unlock();
+
+ return bin;
+}
+
+static struct xfrm_policy *
+__xfrm_policy_eval_candidates(struct hlist_head *chain,
+ struct xfrm_policy *prefer,
+ const struct flowi *fl,
+ u8 type, u16 family, int dir, u32 if_id)
+{
+ u32 priority = prefer ? prefer->priority : ~0u;
+ struct xfrm_policy *pol;
+
+ if (!chain)
+ return NULL;
+
+ hlist_for_each_entry_rcu(pol, chain, bydst) {
+ int err;
+
+ if (pol->priority > priority)
+ break;
+
+ err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
+ if (err) {
+ if (err != -ESRCH)
+ return ERR_PTR(err);
+
+ continue;
+ }
+
+ if (prefer) {
+ /* matches. Is it older than *prefer? */
+ if (pol->priority == priority &&
+ prefer->pos < pol->pos)
+ return prefer;
+ }
+
+ return pol;
+ }
+
+ return NULL;
+}
+
+static struct xfrm_policy *
+xfrm_policy_eval_candidates(struct xfrm_pol_inexact_candidates *cand,
+ struct xfrm_policy *prefer,
+ const struct flowi *fl,
+ u8 type, u16 family, int dir, u32 if_id)
+{
+ struct xfrm_policy *tmp;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cand->res); i++) {
+ tmp = __xfrm_policy_eval_candidates(cand->res[i],
+ prefer,
+ fl, type, family, dir,
+ if_id);
+ if (!tmp)
+ continue;
+
+ if (IS_ERR(tmp))
+ return tmp;
+ prefer = tmp;
+ }
+
+ return prefer;
+}
+
static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
const struct flowi *fl,
u16 family, u8 dir,
u32 if_id)
{
- int err;
- struct xfrm_policy *pol, *ret;
+ struct xfrm_pol_inexact_candidates cand;
const xfrm_address_t *daddr, *saddr;
+ struct xfrm_pol_inexact_bin *bin;
+ struct xfrm_policy *pol, *ret;
struct hlist_head *chain;
unsigned int sequence;
u32 priority;
+ int err;
daddr = xfrm_flowi_daddr(fl, family);
saddr = xfrm_flowi_saddr(fl, family);
@@ -1129,25 +2108,20 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
break;
}
}
- chain = &net->xfrm.policy_inexact[dir];
- hlist_for_each_entry_rcu(pol, chain, bydst) {
- if ((pol->priority >= priority) && ret)
- break;
+ bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id);
+ if (!bin || !xfrm_policy_find_inexact_candidates(&cand, bin, saddr,
+ daddr))
+ goto skip_inexact;
- err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
- if (err) {
- if (err == -ESRCH)
- continue;
- else {
- ret = ERR_PTR(err);
- goto fail;
- }
- } else {
- ret = pol;
- break;
- }
+ pol = xfrm_policy_eval_candidates(&cand, ret, fl, type,
+ family, dir, if_id);
+ if (pol) {
+ ret = pol;
+ if (IS_ERR(pol))
+ goto fail;
}
+skip_inexact:
if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence))
goto retry;
@@ -1239,6 +2213,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
/* Socket policies are not hashed. */
if (!hlist_unhashed(&pol->bydst)) {
hlist_del_rcu(&pol->bydst);
+ hlist_del_init(&pol->bydst_inexact_list);
hlist_del(&pol->byidx);
}
@@ -1811,7 +2786,7 @@ static void xfrm_policy_queue_process(struct timer_list *t)
pq->timeout = pq->timeout << 1;
if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
xfrm_pol_hold(pol);
- goto out;
+ goto out;
}
dst_release(dst);
@@ -2816,13 +3791,17 @@ static void xfrm_statistics_fini(struct net *net)
static int __net_init xfrm_policy_init(struct net *net)
{
unsigned int hmask, sz;
- int dir;
+ int dir, err;
- if (net_eq(net, &init_net))
+ if (net_eq(net, &init_net)) {
xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
sizeof(struct xfrm_dst),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
+ err = rhashtable_init(&xfrm_policy_inexact_table,
+ &xfrm_pol_inexact_params);
+ BUG_ON(err);
+ }
hmask = 8 - 1;
sz = (hmask+1) * sizeof(struct hlist_head);
@@ -2857,6 +3836,7 @@ static int __net_init xfrm_policy_init(struct net *net)
seqlock_init(&net->xfrm.policy_hthresh.lock);
INIT_LIST_HEAD(&net->xfrm.policy_all);
+ INIT_LIST_HEAD(&net->xfrm.inexact_bins);
INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
return 0;
@@ -2875,6 +3855,7 @@ out_byidx:
static void xfrm_policy_fini(struct net *net)
{
+ struct xfrm_pol_inexact_bin *b, *t;
unsigned int sz;
int dir;
@@ -2900,6 +3881,11 @@ static void xfrm_policy_fini(struct net *net)
sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
xfrm_hash_free(net->xfrm.policy_byidx, sz);
+
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+ list_for_each_entry_safe(b, t, &net->xfrm.inexact_bins, inexact_bins)
+ __xfrm_policy_inexact_prune_bin(b, true);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
}
static int __net_init xfrm_net_init(struct net *net)
@@ -3065,7 +4051,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
}
}
chain = &net->xfrm.policy_inexact[dir];
- hlist_for_each_entry(pol, chain, bydst) {
+ hlist_for_each_entry(pol, chain, bydst_inexact_list) {
if ((pol->priority >= priority) && ret)
break;