aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/rhashtable.h35
-rw-r--r--lib/rhashtable.c167
-rw-r--r--net/netfilter/nft_hash.c53
-rw-r--r--net/netlink/af_netlink.c130
4 files changed, 302 insertions, 83 deletions
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index e0337844358e..58851275fed9 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -18,6 +18,7 @@
#ifndef _LINUX_RHASHTABLE_H
#define _LINUX_RHASHTABLE_H
+#include <linux/compiler.h>
#include <linux/list_nulls.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
@@ -111,6 +112,7 @@ struct rhashtable_params {
* @p: Configuration parameters
* @run_work: Deferred worker to expand/shrink asynchronously
* @mutex: Mutex to protect current/future table swapping
+ * @walkers: List of active walkers
* @being_destroyed: True if table is set up for destruction
*/
struct rhashtable {
@@ -121,9 +123,36 @@ struct rhashtable {
struct rhashtable_params p;
struct work_struct run_work;
struct mutex mutex;
+ struct list_head walkers;
bool being_destroyed;
};
+/**
+ * struct rhashtable_walker - Hash table walker
+ * @list: List entry on list of walkers
+ * @resize: Resize event occured
+ */
+struct rhashtable_walker {
+ struct list_head list;
+ bool resize;
+};
+
+/**
+ * struct rhashtable_iter - Hash table iterator, fits into netlink cb
+ * @ht: Table to iterate through
+ * @p: Current pointer
+ * @walker: Associated rhashtable walker
+ * @slot: Current slot
+ * @skip: Number of entries to skip in slot
+ */
+struct rhashtable_iter {
+ struct rhashtable *ht;
+ struct rhash_head *p;
+ struct rhashtable_walker *walker;
+ unsigned int slot;
+ unsigned int skip;
+};
+
static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash)
{
return NULLS_MARKER(ht->p.nulls_base + hash);
@@ -179,6 +208,12 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
bool (*compare)(void *, void *),
void *arg);
+int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter);
+void rhashtable_walk_exit(struct rhashtable_iter *iter);
+int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU);
+void *rhashtable_walk_next(struct rhashtable_iter *iter);
+void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
+
void rhashtable_destroy(struct rhashtable *ht);
#define rht_dereference(p, ht) \
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index c41e21096373..057919164e23 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -484,16 +484,24 @@ static void rht_deferred_worker(struct work_struct *work)
{
struct rhashtable *ht;
struct bucket_table *tbl;
+ struct rhashtable_walker *walker;
ht = container_of(work, struct rhashtable, run_work);
mutex_lock(&ht->mutex);
+ if (ht->being_destroyed)
+ goto unlock;
+
tbl = rht_dereference(ht->tbl, ht);
+ list_for_each_entry(walker, &ht->walkers, list)
+ walker->resize = true;
+
if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
rhashtable_expand(ht);
else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size))
rhashtable_shrink(ht);
+unlock:
mutex_unlock(&ht->mutex);
}
@@ -818,6 +826,164 @@ exit:
}
EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert);
+/**
+ * rhashtable_walk_init - Initialise an iterator
+ * @ht: Table to walk over
+ * @iter: Hash table Iterator
+ *
+ * This function prepares a hash table walk.
+ *
+ * Note that if you restart a walk after rhashtable_walk_stop you
+ * may see the same object twice. Also, you may miss objects if
+ * there are removals in between rhashtable_walk_stop and the next
+ * call to rhashtable_walk_start.
+ *
+ * For a completely stable walk you should construct your own data
+ * structure outside the hash table.
+ *
+ * This function may sleep so you must not call it from interrupt
+ * context or with spin locks held.
+ *
+ * You must call rhashtable_walk_exit if this function returns
+ * successfully.
+ */
+int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
+{
+ iter->ht = ht;
+ iter->p = NULL;
+ iter->slot = 0;
+ iter->skip = 0;
+
+ iter->walker = kmalloc(sizeof(*iter->walker), GFP_KERNEL);
+ if (!iter->walker)
+ return -ENOMEM;
+
+ mutex_lock(&ht->mutex);
+ list_add(&iter->walker->list, &ht->walkers);
+ mutex_unlock(&ht->mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_init);
+
+/**
+ * rhashtable_walk_exit - Free an iterator
+ * @iter: Hash table Iterator
+ *
+ * This function frees resources allocated by rhashtable_walk_init.
+ */
+void rhashtable_walk_exit(struct rhashtable_iter *iter)
+{
+ mutex_lock(&iter->ht->mutex);
+ list_del(&iter->walker->list);
+ mutex_unlock(&iter->ht->mutex);
+ kfree(iter->walker);
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
+
+/**
+ * rhashtable_walk_start - Start a hash table walk
+ * @iter: Hash table iterator
+ *
+ * Start a hash table walk. Note that we take the RCU lock in all
+ * cases including when we return an error. So you must always call
+ * rhashtable_walk_stop to clean up.
+ *
+ * Returns zero if successful.
+ *
+ * Returns -EAGAIN if resize event occured. Note that the iterator
+ * will rewind back to the beginning and you may use it immediately
+ * by calling rhashtable_walk_next.
+ */
+int rhashtable_walk_start(struct rhashtable_iter *iter)
+{
+ rcu_read_lock();
+
+ if (iter->walker->resize) {
+ iter->slot = 0;
+ iter->skip = 0;
+ iter->walker->resize = false;
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_start);
+
+/**
+ * rhashtable_walk_next - Return the next object and advance the iterator
+ * @iter: Hash table iterator
+ *
+ * Note that you must call rhashtable_walk_stop when you are finished
+ * with the walk.
+ *
+ * Returns the next object or NULL when the end of the table is reached.
+ *
+ * Returns -EAGAIN if resize event occured. Note that the iterator
+ * will rewind back to the beginning and you may continue to use it.
+ */
+void *rhashtable_walk_next(struct rhashtable_iter *iter)
+{
+ const struct bucket_table *tbl;
+ struct rhashtable *ht = iter->ht;
+ struct rhash_head *p = iter->p;
+ void *obj = NULL;
+
+ tbl = rht_dereference_rcu(ht->tbl, ht);
+
+ if (p) {
+ p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
+ goto next;
+ }
+
+ for (; iter->slot < tbl->size; iter->slot++) {
+ int skip = iter->skip;
+
+ rht_for_each_rcu(p, tbl, iter->slot) {
+ if (!skip)
+ break;
+ skip--;
+ }
+
+next:
+ if (!rht_is_a_nulls(p)) {
+ iter->skip++;
+ iter->p = p;
+ obj = rht_obj(ht, p);
+ goto out;
+ }
+
+ iter->skip = 0;
+ }
+
+ iter->p = NULL;
+
+out:
+ if (iter->walker->resize) {
+ iter->p = NULL;
+ iter->slot = 0;
+ iter->skip = 0;
+ iter->walker->resize = false;
+ return ERR_PTR(-EAGAIN);
+ }
+
+ return obj;
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_next);
+
+/**
+ * rhashtable_walk_stop - Finish a hash table walk
+ * @iter: Hash table iterator
+ *
+ * Finish a hash table walk.
+ */
+void rhashtable_walk_stop(struct rhashtable_iter *iter)
+{
+ rcu_read_unlock();
+ iter->p = NULL;
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
+
static size_t rounded_hashtable_size(struct rhashtable_params *params)
{
return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
@@ -890,6 +1056,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
memset(ht, 0, sizeof(*ht));
mutex_init(&ht->mutex);
memcpy(&ht->p, params, sizeof(*params));
+ INIT_LIST_HEAD(&ht->walkers);
if (params->locks_mul)
ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 75887d7d2c6a..61e6c407476a 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -130,31 +130,50 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
struct nft_set_iter *iter)
{
struct rhashtable *priv = nft_set_priv(set);
- const struct bucket_table *tbl;
const struct nft_hash_elem *he;
+ struct rhashtable_iter hti;
struct nft_set_elem elem;
- unsigned int i;
+ int err;
- tbl = rht_dereference_rcu(priv->tbl, priv);
- for (i = 0; i < tbl->size; i++) {
- struct rhash_head *pos;
+ err = rhashtable_walk_init(priv, &hti);
+ iter->err = err;
+ if (err)
+ return;
+
+ err = rhashtable_walk_start(&hti);
+ if (err && err != -EAGAIN) {
+ iter->err = err;
+ goto out;
+ }
- rht_for_each_entry_rcu(he, pos, tbl, i, node) {
- if (iter->count < iter->skip)
- goto cont;
+ while ((he = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(he)) {
+ err = PTR_ERR(he);
+ if (err != -EAGAIN) {
+ iter->err = err;
+ goto out;
+ }
+ }
+
+ if (iter->count < iter->skip)
+ goto cont;
+
+ memcpy(&elem.key, &he->key, sizeof(elem.key));
+ if (set->flags & NFT_SET_MAP)
+ memcpy(&elem.data, he->data, sizeof(elem.data));
+ elem.flags = 0;
- memcpy(&elem.key, &he->key, sizeof(elem.key));
- if (set->flags & NFT_SET_MAP)
- memcpy(&elem.data, he->data, sizeof(elem.data));
- elem.flags = 0;
+ iter->err = iter->fn(ctx, set, iter, &elem);
+ if (iter->err < 0)
+ goto out;
- iter->err = iter->fn(ctx, set, iter, &elem);
- if (iter->err < 0)
- return;
cont:
- iter->count++;
- }
+ iter->count++;
}
+
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
}
static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a36777b7cfb6..155854802d44 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2886,99 +2886,97 @@ EXPORT_SYMBOL(nlmsg_notify);
#ifdef CONFIG_PROC_FS
struct nl_seq_iter {
struct seq_net_private p;
+ struct rhashtable_iter hti;
int link;
- int hash_idx;
};
-static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
+static int netlink_walk_start(struct nl_seq_iter *iter)
{
- struct nl_seq_iter *iter = seq->private;
- int i, j;
- struct netlink_sock *nlk;
- struct sock *s;
- loff_t off = 0;
-
- for (i = 0; i < MAX_LINKS; i++) {
- struct rhashtable *ht = &nl_table[i].hash;
- const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
-
- for (j = 0; j < tbl->size; j++) {
- struct rhash_head *node;
-
- rht_for_each_entry_rcu(nlk, node, tbl, j, node) {
- s = (struct sock *)nlk;
+ int err;
- if (sock_net(s) != seq_file_net(seq))
- continue;
- if (off == pos) {
- iter->link = i;
- iter->hash_idx = j;
- return s;
- }
- ++off;
- }
- }
+ err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti);
+ if (err) {
+ iter->link = MAX_LINKS;
+ return err;
}
- return NULL;
+
+ err = rhashtable_walk_start(&iter->hti);
+ return err == -EAGAIN ? 0 : err;
}
-static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(RCU)
+static void netlink_walk_stop(struct nl_seq_iter *iter)
{
- rcu_read_lock();
- return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+ rhashtable_walk_stop(&iter->hti);
+ rhashtable_walk_exit(&iter->hti);
}
-static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+static void *__netlink_seq_next(struct seq_file *seq)
{
- struct rhashtable *ht;
- const struct bucket_table *tbl;
- struct rhash_head *node;
+ struct nl_seq_iter *iter = seq->private;
struct netlink_sock *nlk;
- struct nl_seq_iter *iter;
- struct net *net;
- int i, j;
- ++*pos;
+ do {
+ for (;;) {
+ int err;
- if (v == SEQ_START_TOKEN)
- return netlink_seq_socket_idx(seq, 0);
+ nlk = rhashtable_walk_next(&iter->hti);
- net = seq_file_net(seq);
- iter = seq->private;
- nlk = v;
+ if (IS_ERR(nlk)) {
+ if (PTR_ERR(nlk) == -EAGAIN)
+ continue;
- i = iter->link;
- ht = &nl_table[i].hash;
- tbl = rht_dereference_rcu(ht->tbl, ht);
- rht_for_each_entry_rcu_continue(nlk, node, nlk->node.next, tbl, iter->hash_idx, node)
- if (net_eq(sock_net((struct sock *)nlk), net))
- return nlk;
+ return nlk;
+ }
- j = iter->hash_idx + 1;
+ if (nlk)
+ break;
- do {
+ netlink_walk_stop(iter);
+ if (++iter->link >= MAX_LINKS)
+ return NULL;
- for (; j < tbl->size; j++) {
- rht_for_each_entry_rcu(nlk, node, tbl, j, node) {
- if (net_eq(sock_net((struct sock *)nlk), net)) {
- iter->link = i;
- iter->hash_idx = j;
- return nlk;
- }
- }
+ err = netlink_walk_start(iter);
+ if (err)
+ return ERR_PTR(err);
}
+ } while (sock_net(&nlk->sk) != seq_file_net(seq));
- j = 0;
- } while (++i < MAX_LINKS);
+ return nlk;
+}
- return NULL;
+static void *netlink_seq_start(struct seq_file *seq, loff_t *posp)
+{
+ struct nl_seq_iter *iter = seq->private;
+ void *obj = SEQ_START_TOKEN;
+ loff_t pos;
+ int err;
+
+ iter->link = 0;
+
+ err = netlink_walk_start(iter);
+ if (err)
+ return ERR_PTR(err);
+
+ for (pos = *posp; pos && obj && !IS_ERR(obj); pos--)
+ obj = __netlink_seq_next(seq);
+
+ return obj;
+}
+
+static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ ++*pos;
+ return __netlink_seq_next(seq);
}
static void netlink_seq_stop(struct seq_file *seq, void *v)
- __releases(RCU)
{
- rcu_read_unlock();
+ struct nl_seq_iter *iter = seq->private;
+
+ if (iter->link >= MAX_LINKS)
+ return;
+
+ netlink_walk_stop(iter);
}