From 4feb7c7a4fbb8f63371be31cda79433c7cf3da86 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 21 Mar 2019 14:42:40 +1100 Subject: rhashtable: don't hold lock on first table throughout insertion. rhashtable_try_insert() currently holds a lock on the bucket in the first table, while also locking buckets in subsequent tables. This is unnecessary and looks like a hold-over from some earlier version of the implementation. As insert and remove always lock a bucket in each table in turn, and as insert only inserts in the final table, there cannot be any races that are not covered by simply locking a bucket in each table in turn. When an insert call reaches that last table it can be sure that there is no matchinf entry in any other table as it has searched them all, and insertion never happens anywhere but in the last table. The fact that code tests for the existence of future_tbl while holding a lock on the relevant bucket ensures that two threads inserting the same key will make compatible decisions about which is the "last" table. This simplifies the code and allows the ->rehash field to be discarded. We still need a way to ensure that a dead bucket_table is never re-linked by rhashtable_walk_stop(). This can be achieved by calling call_rcu() inside the locked region, and checking with rcu_head_after_call_rcu() in rhashtable_walk_stop() to see if the bucket table is empty and dead. Acked-by: Herbert Xu Reviewed-by: Paul E. McKenney Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 13 ------------ lib/rhashtable.c | 52 ++++++++++++++-------------------------------- 2 files changed, 16 insertions(+), 49 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index ae9c0f71f311..3864193d5e2e 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -63,7 +63,6 @@ struct bucket_table { unsigned int size; unsigned int nest; - unsigned int rehash; u32 hash_rnd; unsigned int locks_mask; spinlock_t *locks; @@ -776,12 +775,6 @@ static inline int rhltable_insert( * @obj: pointer to hash head inside object * @params: hash table parameters * - * Locks down the bucket chain in both the old and new table if a resize - * is in progress to ensure that writers can't remove from the old table - * and can't insert to the new table during the atomic operation of search - * and insertion. Searches for duplicates in both the old and new table if - * a resize is in progress. - * * This lookup function may only be used for fixed key hash table (key_len * parameter set). It will BUG() if used inappropriately. * @@ -837,12 +830,6 @@ static inline void *rhashtable_lookup_get_insert_fast( * @obj: pointer to hash head inside object * @params: hash table parameters * - * Locks down the bucket chain in both the old and new table if a resize - * is in progress to ensure that writers can't remove from the old table - * and can't insert to the new table during the atomic operation of search - * and insertion. Searches for duplicates in both the old and new table if - * a resize is in progress. - * * Lookups may occur in parallel with hashtable mutations and resizing. * * Will trigger an automatic deferred table resizing if residency in the diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 0a105d4af166..776b3a82d3a1 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -197,6 +197,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, return NULL; } + rcu_head_init(&tbl->rcu); INIT_LIST_HEAD(&tbl->walkers); tbl->hash_rnd = get_random_u32(); @@ -280,10 +281,9 @@ static int rhashtable_rehash_chain(struct rhashtable *ht, while (!(err = rhashtable_rehash_one(ht, old_hash))) ; - if (err == -ENOENT) { - old_tbl->rehash++; + if (err == -ENOENT) err = 0; - } + spin_unlock_bh(old_bucket_lock); return err; @@ -330,13 +330,16 @@ static int rhashtable_rehash_table(struct rhashtable *ht) spin_lock(&ht->lock); list_for_each_entry(walker, &old_tbl->walkers, list) walker->tbl = NULL; - spin_unlock(&ht->lock); /* Wait for readers. All new readers will see the new * table, and thus no references to the old table will * remain. + * We do this inside the locked region so that + * rhashtable_walk_stop() can use rcu_head_after_call_rcu() + * to check if it should not re-link the table. */ call_rcu(&old_tbl->rcu, bucket_table_free_rcu); + spin_unlock(&ht->lock); return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; } @@ -578,46 +581,22 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, struct bucket_table *new_tbl; struct bucket_table *tbl; unsigned int hash; - spinlock_t *lock; void *data; - tbl = rcu_dereference(ht->tbl); - - /* All insertions must grab the oldest table containing - * the hashed bucket that is yet to be rehashed. - */ - for (;;) { - hash = rht_head_hashfn(ht, tbl, obj, ht->p); - lock = rht_bucket_lock(tbl, hash); - spin_lock_bh(lock); - - if (tbl->rehash <= hash) - break; - - spin_unlock_bh(lock); - tbl = rht_dereference_rcu(tbl->future_tbl, ht); - } - - data = rhashtable_lookup_one(ht, tbl, hash, key, obj); - new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); - if (PTR_ERR(new_tbl) != -EEXIST) - data = ERR_CAST(new_tbl); + new_tbl = rcu_dereference(ht->tbl); - while (!IS_ERR_OR_NULL(new_tbl)) { + do { tbl = new_tbl; hash = rht_head_hashfn(ht, tbl, obj, ht->p); - spin_lock_nested(rht_bucket_lock(tbl, hash), - SINGLE_DEPTH_NESTING); + spin_lock_bh(rht_bucket_lock(tbl, hash)); data = rhashtable_lookup_one(ht, tbl, hash, key, obj); new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); if (PTR_ERR(new_tbl) != -EEXIST) data = ERR_CAST(new_tbl); - spin_unlock(rht_bucket_lock(tbl, hash)); - } - - spin_unlock_bh(lock); + spin_unlock_bh(rht_bucket_lock(tbl, hash)); + } while (!IS_ERR_OR_NULL(new_tbl)); if (PTR_ERR(data) == -EAGAIN) data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?: @@ -939,10 +918,11 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) ht = iter->ht; spin_lock(&ht->lock); - if (tbl->rehash < tbl->size) - list_add(&iter->walker.list, &tbl->walkers); - else + if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu)) + /* This bucket table is being freed, don't re-link it. */ iter->walker.tbl = NULL; + else + list_add(&iter->walker.list, &tbl->walkers); spin_unlock(&ht->lock); out: -- cgit v1.2.3-59-g8ed1b From f7ad68bf98506f48129267438ada1255fc4edfa2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 21 Mar 2019 14:42:40 +1100 Subject: rhashtable: rename rht_for_each*continue as *from. The pattern set by list.h is that for_each..continue() iterators start at the next entry after the given one, while for_each..from() iterators start at the given entry. The rht_for_each*continue() iterators are documented as though the start at the 'next' entry, but actually start at the given entry, and they are used expecting that behaviour. So fix the documentation and change the names to *from for consistency with list.h Acked-by: Herbert Xu Acked-by: Miguel Ojeda Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- .clang-format | 8 ++++---- include/linux/rhashtable.h | 40 ++++++++++++++++++++-------------------- lib/rhashtable.c | 2 +- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/.clang-format b/.clang-format index f49620f506f1..3a4c8220df2f 100644 --- a/.clang-format +++ b/.clang-format @@ -366,14 +366,14 @@ ForEachMacros: - 'rhl_for_each_entry_rcu' - 'rhl_for_each_rcu' - 'rht_for_each' - - 'rht_for_each_continue' + - 'rht_for_each_from' - 'rht_for_each_entry' - - 'rht_for_each_entry_continue' + - 'rht_for_each_entry_from' - 'rht_for_each_entry_rcu' - - 'rht_for_each_entry_rcu_continue' + - 'rht_for_each_entry_rcu_from' - 'rht_for_each_entry_safe' - 'rht_for_each_rcu' - - 'rht_for_each_rcu_continue' + - 'rht_for_each_rcu_from' - '__rq_for_each_bio' - 'rq_for_each_segment' - 'scsi_for_each_prot_sg' diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 3864193d5e2e..86dfa417848d 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -306,13 +306,13 @@ static inline struct rhash_head __rcu **rht_bucket_insert( } /** - * rht_for_each_continue - continue iterating over hash chain + * rht_for_each_from - iterate over hash chain from given head * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from + * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index */ -#define rht_for_each_continue(pos, head, tbl, hash) \ +#define rht_for_each_from(pos, head, tbl, hash) \ for (pos = rht_dereference_bucket(head, tbl, hash); \ !rht_is_a_nulls(pos); \ pos = rht_dereference_bucket((pos)->next, tbl, hash)) @@ -324,18 +324,18 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * @hash: the hash value / bucket index */ #define rht_for_each(pos, tbl, hash) \ - rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash) + rht_for_each_from(pos, *rht_bucket(tbl, hash), tbl, hash) /** - * rht_for_each_entry_continue - continue iterating over hash chain + * rht_for_each_entry_from - iterate over hash chain from given head * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from + * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. */ -#define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member) \ +#define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member) \ for (pos = rht_dereference_bucket(head, tbl, hash); \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ pos = rht_dereference_bucket((pos)->next, tbl, hash)) @@ -349,7 +349,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * @member: name of the &struct rhash_head within the hashable struct. */ #define rht_for_each_entry(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash), \ + rht_for_each_entry_from(tpos, pos, *rht_bucket(tbl, hash), \ tbl, hash, member) /** @@ -374,9 +374,9 @@ static inline struct rhash_head __rcu **rht_bucket_insert( rht_dereference_bucket(pos->next, tbl, hash) : NULL) /** - * rht_for_each_rcu_continue - continue iterating over rcu hash chain + * rht_for_each_rcu_from - iterate over rcu hash chain from given head * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from + * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @@ -384,7 +384,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_rcu_continue(pos, head, tbl, hash) \ +#define rht_for_each_rcu_from(pos, head, tbl, hash) \ for (({barrier(); }), \ pos = rht_dereference_bucket_rcu(head, tbl, hash); \ !rht_is_a_nulls(pos); \ @@ -401,13 +401,13 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_rcu(pos, tbl, hash) \ - rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash) + rht_for_each_rcu_from(pos, *rht_bucket(tbl, hash), tbl, hash) /** - * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain + * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from + * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. @@ -416,7 +416,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \ +#define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \ for (({barrier(); }), \ pos = rht_dereference_bucket_rcu(head, tbl, hash); \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ @@ -435,7 +435,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \ + rht_for_each_entry_rcu_from(tpos, pos, *rht_bucket(tbl, hash), \ tbl, hash, member) /** @@ -491,7 +491,7 @@ restart: hash = rht_key_hashfn(ht, tbl, key, params); head = rht_bucket(tbl, hash); do { - rht_for_each_rcu_continue(he, *head, tbl, hash) { + rht_for_each_rcu_from(he, *head, tbl, hash) { if (params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, he)) : rhashtable_compare(&arg, rht_obj(ht, he))) @@ -625,7 +625,7 @@ slow_path: if (!pprev) goto out; - rht_for_each_continue(head, *pprev, tbl, hash) { + rht_for_each_from(head, *pprev, tbl, hash) { struct rhlist_head *plist; struct rhlist_head *list; @@ -890,7 +890,7 @@ static inline int __rhashtable_remove_fast_one( spin_lock_bh(lock); pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(he, *pprev, tbl, hash) { + rht_for_each_from(he, *pprev, tbl, hash) { struct rhlist_head *list; list = container_of(he, struct rhlist_head, rhead); @@ -1042,7 +1042,7 @@ static inline int __rhashtable_replace_fast( spin_lock_bh(lock); pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(he, *pprev, tbl, hash) { + rht_for_each_from(he, *pprev, tbl, hash) { if (he != obj_old) { pprev = &he->next; continue; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 776b3a82d3a1..f65e43fb1ff8 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -490,7 +490,7 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, elasticity = RHT_ELASTICITY; pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(head, *pprev, tbl, hash) { + rht_for_each_from(head, *pprev, tbl, hash) { struct rhlist_head *list; struct rhlist_head *plist; -- cgit v1.2.3-59-g8ed1b