diff options
Diffstat (limited to 'net/netfilter/nft_set_rbtree.c')
-rw-r--r-- | net/netfilter/nft_set_rbtree.c | 167 |
1 files changed, 150 insertions, 17 deletions
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 5000b938ab1e..7325bee7d144 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -18,7 +18,7 @@ struct nft_rbtree { struct rb_root root; rwlock_t lock; - seqcount_t count; + seqcount_rwlock_t count; struct delayed_work gc_work; }; @@ -33,6 +33,11 @@ static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe) (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END); } +static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe) +{ + return !nft_rbtree_interval_end(rbe); +} + static bool nft_rbtree_equal(const struct nft_set *set, const void *this, const struct nft_rbtree_elem *interval) { @@ -64,7 +69,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set if (interval && nft_rbtree_equal(set, this, interval) && nft_rbtree_interval_end(rbe) && - !nft_rbtree_interval_end(interval)) + nft_rbtree_interval_start(interval)) continue; interval = rbe; } else if (d > 0) @@ -74,6 +79,10 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set parent = rcu_dereference_raw(parent->rb_left); continue; } + + if (nft_set_elem_expired(&rbe->ext)) + return false; + if (nft_rbtree_interval_end(rbe)) { if (nft_set_is_anonymous(set)) return false; @@ -89,7 +98,8 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_set_elem_active(&interval->ext, genmask) && - !nft_rbtree_interval_end(interval)) { + !nft_set_elem_expired(&interval->ext) && + nft_rbtree_interval_start(interval)) { *ext = &interval->ext; return true; } @@ -97,8 +107,9 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set return false; } -static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) +INDIRECT_CALLABLE_SCOPE +bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) { struct nft_rbtree *priv = nft_set_priv(set); unsigned int seq = read_seqcount_begin(&priv->count); @@ -149,6 +160,9 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, continue; } + if (nft_set_elem_expired(&rbe->ext)) + return false; + if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) || (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) == (flags & NFT_SET_ELEM_INTERVAL_END)) { @@ -165,6 +179,7 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_set_elem_active(&interval->ext, genmask) && + !nft_set_elem_expired(&interval->ext) && ((!nft_rbtree_interval_end(interval) && !(flags & NFT_SET_ELEM_INTERVAL_END)) || (nft_rbtree_interval_end(interval) && @@ -204,12 +219,66 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *new, struct nft_set_ext **ext) { + bool overlap = false, dup_end_left = false, dup_end_right = false; struct nft_rbtree *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); struct nft_rbtree_elem *rbe; struct rb_node *parent, **p; int d; + /* Detect overlaps as we descend the tree. Set the flag in these cases: + * + * a1. _ _ __>| ?_ _ __| (insert end before existing end) + * a2. _ _ ___| ?_ _ _>| (insert end after existing end) + * a3. _ _ ___? >|_ _ __| (insert start before existing end) + * + * and clear it later on, as we eventually reach the points indicated by + * '?' above, in the cases described below. We'll always meet these + * later, locally, due to tree ordering, and overlaps for the intervals + * that are the closest together are always evaluated last. + * + * b1. _ _ __>| !_ _ __| (insert end before existing start) + * b2. _ _ ___| !_ _ _>| (insert end after existing start) + * b3. _ _ ___! >|_ _ __| (insert start after existing end, as a leaf) + * '--' no nodes falling in this range + * b4. >|_ _ ! (insert start before existing start) + * + * Case a3. resolves to b3.: + * - if the inserted start element is the leftmost, because the '0' + * element in the tree serves as end element + * - otherwise, if an existing end is found immediately to the left. If + * there are existing nodes in between, we need to further descend the + * tree before we can conclude the new start isn't causing an overlap + * + * or to b4., which, preceded by a3., means we already traversed one or + * more existing intervals entirely, from the right. + * + * For a new, rightmost pair of elements, we'll hit cases b3. and b2., + * in that order. + * + * The flag is also cleared in two special cases: + * + * b5. |__ _ _!|<_ _ _ (insert start right before existing end) + * b6. |__ _ >|!__ _ _ (insert end right after existing start) + * + * which always happen as last step and imply that no further + * overlapping is possible. + * + * Another special case comes from the fact that start elements matching + * an already existing start element are allowed: insertion is not + * performed but we return -EEXIST in that case, and the error will be + * cleared by the caller if NLM_F_EXCL is not present in the request. + * This way, request for insertion of an exact overlap isn't reported as + * error to userspace if not desired. + * + * However, if the existing start matches a pre-existing start, but the + * end element doesn't match the corresponding pre-existing end element, + * we need to report a partial overlap. This is a local condition that + * can be noticed without need for a tracking flag, by checking for a + * local duplicated end for a corresponding start, from left and right, + * separately. + */ + parent = NULL; p = &priv->root.rb_node; while (*p != NULL) { @@ -218,25 +287,82 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, d = memcmp(nft_set_ext_key(&rbe->ext), nft_set_ext_key(&new->ext), set->klen); - if (d < 0) + if (d < 0) { p = &parent->rb_left; - else if (d > 0) + + if (nft_rbtree_interval_start(new)) { + if (nft_rbtree_interval_end(rbe) && + nft_set_elem_active(&rbe->ext, genmask) && + !nft_set_elem_expired(&rbe->ext) && !*p) + overlap = false; + } else { + if (dup_end_left && !*p) + return -ENOTEMPTY; + + overlap = nft_rbtree_interval_end(rbe) && + nft_set_elem_active(&rbe->ext, + genmask) && + !nft_set_elem_expired(&rbe->ext); + + if (overlap) { + dup_end_right = true; + continue; + } + } + } else if (d > 0) { p = &parent->rb_right; - else { + + if (nft_rbtree_interval_end(new)) { + if (dup_end_right && !*p) + return -ENOTEMPTY; + + overlap = nft_rbtree_interval_end(rbe) && + nft_set_elem_active(&rbe->ext, + genmask) && + !nft_set_elem_expired(&rbe->ext); + + if (overlap) { + dup_end_left = true; + continue; + } + } else if (nft_set_elem_active(&rbe->ext, genmask) && + !nft_set_elem_expired(&rbe->ext)) { + overlap = nft_rbtree_interval_end(rbe); + } + } else { if (nft_rbtree_interval_end(rbe) && - !nft_rbtree_interval_end(new)) { + nft_rbtree_interval_start(new)) { p = &parent->rb_left; - } else if (!nft_rbtree_interval_end(rbe) && + + if (nft_set_elem_active(&rbe->ext, genmask) && + !nft_set_elem_expired(&rbe->ext)) + overlap = false; + } else if (nft_rbtree_interval_start(rbe) && nft_rbtree_interval_end(new)) { p = &parent->rb_right; - } else if (nft_set_elem_active(&rbe->ext, genmask)) { + + if (nft_set_elem_active(&rbe->ext, genmask) && + !nft_set_elem_expired(&rbe->ext)) + overlap = false; + } else if (nft_set_elem_active(&rbe->ext, genmask) && + !nft_set_elem_expired(&rbe->ext)) { *ext = &rbe->ext; return -EEXIST; } else { - p = &parent->rb_left; + overlap = false; + if (nft_rbtree_interval_end(rbe)) + p = &parent->rb_left; + else + p = &parent->rb_right; } } + + dup_end_left = dup_end_right = false; } + + if (overlap) + return -ENOTEMPTY; + rb_link_node_rcu(&new->node, parent, p); rb_insert_color(&new->node, &priv->root); return 0; @@ -317,10 +443,10 @@ static void *nft_rbtree_deactivate(const struct net *net, parent = parent->rb_right; else { if (nft_rbtree_interval_end(rbe) && - !nft_rbtree_interval_end(this)) { + nft_rbtree_interval_start(this)) { parent = parent->rb_left; continue; - } else if (!nft_rbtree_interval_end(rbe) && + } else if (nft_rbtree_interval_start(rbe) && nft_rbtree_interval_end(this)) { parent = parent->rb_right; continue; @@ -350,6 +476,8 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, if (iter->count < iter->skip) goto cont; + if (nft_set_elem_expired(&rbe->ext)) + goto cont; if (!nft_set_elem_active(&rbe->ext, iter->genmask)) goto cont; @@ -418,6 +546,12 @@ static void nft_rbtree_gc(struct work_struct *work) write_seqcount_end(&priv->count); write_unlock_bh(&priv->lock); + rbe = nft_set_catchall_gc(set); + if (rbe) { + gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); + if (gcb) + nft_set_gc_batch_add(gcb, rbe); + } nft_set_gc_batch_complete(gcb); queue_delayed_work(system_power_efficient_wq, &priv->gc_work, @@ -437,7 +571,7 @@ static int nft_rbtree_init(const struct nft_set *set, struct nft_rbtree *priv = nft_set_priv(set); rwlock_init(&priv->lock); - seqcount_init(&priv->count); + seqcount_rwlock_init(&priv->count, &priv->lock); priv->root = RB_ROOT; INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc); @@ -481,8 +615,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, return true; } -struct nft_set_type nft_set_rbtree_type __read_mostly = { - .owner = THIS_MODULE, +const struct nft_set_type nft_set_rbtree_type = { .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, .ops = { .privsize = nft_rbtree_privsize, |