aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter/nft_set_rbtree.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter/nft_set_rbtree.c')
-rw-r--r--net/netfilter/nft_set_rbtree.c167
1 files changed, 150 insertions, 17 deletions
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 5000b938ab1e..7325bee7d144 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -18,7 +18,7 @@
struct nft_rbtree {
struct rb_root root;
rwlock_t lock;
- seqcount_t count;
+ seqcount_rwlock_t count;
struct delayed_work gc_work;
};
@@ -33,6 +33,11 @@ static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
(*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
}
+static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe)
+{
+ return !nft_rbtree_interval_end(rbe);
+}
+
static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
const struct nft_rbtree_elem *interval)
{
@@ -64,7 +69,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
if (interval &&
nft_rbtree_equal(set, this, interval) &&
nft_rbtree_interval_end(rbe) &&
- !nft_rbtree_interval_end(interval))
+ nft_rbtree_interval_start(interval))
continue;
interval = rbe;
} else if (d > 0)
@@ -74,6 +79,10 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
parent = rcu_dereference_raw(parent->rb_left);
continue;
}
+
+ if (nft_set_elem_expired(&rbe->ext))
+ return false;
+
if (nft_rbtree_interval_end(rbe)) {
if (nft_set_is_anonymous(set))
return false;
@@ -89,7 +98,8 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
- !nft_rbtree_interval_end(interval)) {
+ !nft_set_elem_expired(&interval->ext) &&
+ nft_rbtree_interval_start(interval)) {
*ext = &interval->ext;
return true;
}
@@ -97,8 +107,9 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
return false;
}
-static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+INDIRECT_CALLABLE_SCOPE
+bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
{
struct nft_rbtree *priv = nft_set_priv(set);
unsigned int seq = read_seqcount_begin(&priv->count);
@@ -149,6 +160,9 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
continue;
}
+ if (nft_set_elem_expired(&rbe->ext))
+ return false;
+
if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) ||
(*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) ==
(flags & NFT_SET_ELEM_INTERVAL_END)) {
@@ -165,6 +179,7 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
+ !nft_set_elem_expired(&interval->ext) &&
((!nft_rbtree_interval_end(interval) &&
!(flags & NFT_SET_ELEM_INTERVAL_END)) ||
(nft_rbtree_interval_end(interval) &&
@@ -204,12 +219,66 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
struct nft_set_ext **ext)
{
+ bool overlap = false, dup_end_left = false, dup_end_right = false;
struct nft_rbtree *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
struct nft_rbtree_elem *rbe;
struct rb_node *parent, **p;
int d;
+ /* Detect overlaps as we descend the tree. Set the flag in these cases:
+ *
+ * a1. _ _ __>| ?_ _ __| (insert end before existing end)
+ * a2. _ _ ___| ?_ _ _>| (insert end after existing end)
+ * a3. _ _ ___? >|_ _ __| (insert start before existing end)
+ *
+ * and clear it later on, as we eventually reach the points indicated by
+ * '?' above, in the cases described below. We'll always meet these
+ * later, locally, due to tree ordering, and overlaps for the intervals
+ * that are the closest together are always evaluated last.
+ *
+ * b1. _ _ __>| !_ _ __| (insert end before existing start)
+ * b2. _ _ ___| !_ _ _>| (insert end after existing start)
+ * b3. _ _ ___! >|_ _ __| (insert start after existing end, as a leaf)
+ * '--' no nodes falling in this range
+ * b4. >|_ _ ! (insert start before existing start)
+ *
+ * Case a3. resolves to b3.:
+ * - if the inserted start element is the leftmost, because the '0'
+ * element in the tree serves as end element
+ * - otherwise, if an existing end is found immediately to the left. If
+ * there are existing nodes in between, we need to further descend the
+ * tree before we can conclude the new start isn't causing an overlap
+ *
+ * or to b4., which, preceded by a3., means we already traversed one or
+ * more existing intervals entirely, from the right.
+ *
+ * For a new, rightmost pair of elements, we'll hit cases b3. and b2.,
+ * in that order.
+ *
+ * The flag is also cleared in two special cases:
+ *
+ * b5. |__ _ _!|<_ _ _ (insert start right before existing end)
+ * b6. |__ _ >|!__ _ _ (insert end right after existing start)
+ *
+ * which always happen as last step and imply that no further
+ * overlapping is possible.
+ *
+ * Another special case comes from the fact that start elements matching
+ * an already existing start element are allowed: insertion is not
+ * performed but we return -EEXIST in that case, and the error will be
+ * cleared by the caller if NLM_F_EXCL is not present in the request.
+ * This way, request for insertion of an exact overlap isn't reported as
+ * error to userspace if not desired.
+ *
+ * However, if the existing start matches a pre-existing start, but the
+ * end element doesn't match the corresponding pre-existing end element,
+ * we need to report a partial overlap. This is a local condition that
+ * can be noticed without need for a tracking flag, by checking for a
+ * local duplicated end for a corresponding start, from left and right,
+ * separately.
+ */
+
parent = NULL;
p = &priv->root.rb_node;
while (*p != NULL) {
@@ -218,25 +287,82 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
d = memcmp(nft_set_ext_key(&rbe->ext),
nft_set_ext_key(&new->ext),
set->klen);
- if (d < 0)
+ if (d < 0) {
p = &parent->rb_left;
- else if (d > 0)
+
+ if (nft_rbtree_interval_start(new)) {
+ if (nft_rbtree_interval_end(rbe) &&
+ nft_set_elem_active(&rbe->ext, genmask) &&
+ !nft_set_elem_expired(&rbe->ext) && !*p)
+ overlap = false;
+ } else {
+ if (dup_end_left && !*p)
+ return -ENOTEMPTY;
+
+ overlap = nft_rbtree_interval_end(rbe) &&
+ nft_set_elem_active(&rbe->ext,
+ genmask) &&
+ !nft_set_elem_expired(&rbe->ext);
+
+ if (overlap) {
+ dup_end_right = true;
+ continue;
+ }
+ }
+ } else if (d > 0) {
p = &parent->rb_right;
- else {
+
+ if (nft_rbtree_interval_end(new)) {
+ if (dup_end_right && !*p)
+ return -ENOTEMPTY;
+
+ overlap = nft_rbtree_interval_end(rbe) &&
+ nft_set_elem_active(&rbe->ext,
+ genmask) &&
+ !nft_set_elem_expired(&rbe->ext);
+
+ if (overlap) {
+ dup_end_left = true;
+ continue;
+ }
+ } else if (nft_set_elem_active(&rbe->ext, genmask) &&
+ !nft_set_elem_expired(&rbe->ext)) {
+ overlap = nft_rbtree_interval_end(rbe);
+ }
+ } else {
if (nft_rbtree_interval_end(rbe) &&
- !nft_rbtree_interval_end(new)) {
+ nft_rbtree_interval_start(new)) {
p = &parent->rb_left;
- } else if (!nft_rbtree_interval_end(rbe) &&
+
+ if (nft_set_elem_active(&rbe->ext, genmask) &&
+ !nft_set_elem_expired(&rbe->ext))
+ overlap = false;
+ } else if (nft_rbtree_interval_start(rbe) &&
nft_rbtree_interval_end(new)) {
p = &parent->rb_right;
- } else if (nft_set_elem_active(&rbe->ext, genmask)) {
+
+ if (nft_set_elem_active(&rbe->ext, genmask) &&
+ !nft_set_elem_expired(&rbe->ext))
+ overlap = false;
+ } else if (nft_set_elem_active(&rbe->ext, genmask) &&
+ !nft_set_elem_expired(&rbe->ext)) {
*ext = &rbe->ext;
return -EEXIST;
} else {
- p = &parent->rb_left;
+ overlap = false;
+ if (nft_rbtree_interval_end(rbe))
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
}
}
+
+ dup_end_left = dup_end_right = false;
}
+
+ if (overlap)
+ return -ENOTEMPTY;
+
rb_link_node_rcu(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
return 0;
@@ -317,10 +443,10 @@ static void *nft_rbtree_deactivate(const struct net *net,
parent = parent->rb_right;
else {
if (nft_rbtree_interval_end(rbe) &&
- !nft_rbtree_interval_end(this)) {
+ nft_rbtree_interval_start(this)) {
parent = parent->rb_left;
continue;
- } else if (!nft_rbtree_interval_end(rbe) &&
+ } else if (nft_rbtree_interval_start(rbe) &&
nft_rbtree_interval_end(this)) {
parent = parent->rb_right;
continue;
@@ -350,6 +476,8 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
if (iter->count < iter->skip)
goto cont;
+ if (nft_set_elem_expired(&rbe->ext))
+ goto cont;
if (!nft_set_elem_active(&rbe->ext, iter->genmask))
goto cont;
@@ -418,6 +546,12 @@ static void nft_rbtree_gc(struct work_struct *work)
write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
+ rbe = nft_set_catchall_gc(set);
+ if (rbe) {
+ gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+ if (gcb)
+ nft_set_gc_batch_add(gcb, rbe);
+ }
nft_set_gc_batch_complete(gcb);
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
@@ -437,7 +571,7 @@ static int nft_rbtree_init(const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set);
rwlock_init(&priv->lock);
- seqcount_init(&priv->count);
+ seqcount_rwlock_init(&priv->count, &priv->lock);
priv->root = RB_ROOT;
INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc);
@@ -481,8 +615,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
-struct nft_set_type nft_set_rbtree_type __read_mostly = {
- .owner = THIS_MODULE,
+const struct nft_set_type nft_set_rbtree_type = {
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
.ops = {
.privsize = nft_rbtree_privsize,