aboutsummaryrefslogtreecommitdiffstats
path: root/lib/rhashtable.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-07 22:03:58 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-07 22:03:58 -0700
commit80f232121b69cc69a31ccb2b38c1665d770b0710 (patch)
tree106263eac4ff03b899df695e00dd11e593e74fe2 /lib/rhashtable.c
parentMerge tag 'devicetree-for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux (diff)
parentMerge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (diff)
downloadlinux-dev-80f232121b69cc69a31ccb2b38c1665d770b0710.tar.xz
linux-dev-80f232121b69cc69a31ccb2b38c1665d770b0710.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: 1) Support AES128-CCM ciphers in kTLS, from Vakul Garg. 2) Add fib_sync_mem to control the amount of dirty memory we allow to queue up between synchronize RCU calls, from David Ahern. 3) Make flow classifier more lockless, from Vlad Buslov. 4) Add PHY downshift support to aquantia driver, from Heiner Kallweit. 5) Add SKB cache for TCP rx and tx, from Eric Dumazet. This reduces contention on SLAB spinlocks in heavy RPC workloads. 6) Partial GSO offload support in XFRM, from Boris Pismenny. 7) Add fast link down support to ethtool, from Heiner Kallweit. 8) Use siphash for IP ID generator, from Eric Dumazet. 9) Pull nexthops even further out from ipv4/ipv6 routes and FIB entries, from David Ahern. 10) Move skb->xmit_more into a per-cpu variable, from Florian Westphal. 11) Improve eBPF verifier speed and increase maximum program size, from Alexei Starovoitov. 12) Eliminate per-bucket spinlocks in rhashtable, and instead use bit spinlocks. From Neil Brown. 13) Allow tunneling with GUE encap in ipvs, from Jacky Hu. 14) Improve link partner cap detection in generic PHY code, from Heiner Kallweit. 15) Add layer 2 encap support to bpf_skb_adjust_room(), from Alan Maguire. 16) Remove SKB list implementation assumptions in SCTP, your's truly. 17) Various cleanups, optimizations, and simplifications in r8169 driver. From Heiner Kallweit. 18) Add memory accounting on TX and RX path of SCTP, from Xin Long. 19) Switch PHY drivers over to use dynamic featue detection, from Heiner Kallweit. 20) Support flow steering without masking in dpaa2-eth, from Ioana Ciocoi. 21) Implement ndo_get_devlink_port in netdevsim driver, from Jiri Pirko. 22) Increase the strict parsing of current and future netlink attributes, also export such policies to userspace. From Johannes Berg. 23) Allow DSA tag drivers to be modular, from Andrew Lunn. 24) Remove legacy DSA probing support, also from Andrew Lunn. 25) Allow ll_temac driver to be used on non-x86 platforms, from Esben Haabendal. 26) Add a generic tracepoint for TX queue timeouts to ease debugging, from Cong Wang. 27) More indirect call optimizations, from Paolo Abeni" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1763 commits) cxgb4: Fix error path in cxgb4_init_module net: phy: improve pause mode reporting in phy_print_status dt-bindings: net: Fix a typo in the phy-mode list for ethernet bindings net: macb: Change interrupt and napi enable order in open net: ll_temac: Improve error message on error IRQ net/sched: remove block pointer from common offload structure net: ethernet: support of_get_mac_address new ERR_PTR error net: usb: smsc: fix warning reported by kbuild test robot staging: octeon-ethernet: Fix of_get_mac_address ERR_PTR check net: dsa: support of_get_mac_address new ERR_PTR error net: dsa: sja1105: Fix status initialization in sja1105_get_ethtool_stats vrf: sit mtu should not be updated when vrf netdev is the link net: dsa: Fix error cleanup path in dsa_init_module l2tp: Fix possible NULL pointer dereference taprio: add null check on sched_nest to avoid potential null pointer dereference net: mvpp2: cls: fix less than zero check on a u32 variable net_sched: sch_fq: handle non connected flows net_sched: sch_fq: do not assume EDT packets are ordered net: hns3: use devm_kcalloc when allocating desc_cb net: hns3: some cleanup for struct hns3_enet_ring ...
Diffstat (limited to 'lib/rhashtable.c')
-rw-r--r--lib/rhashtable.c210
1 files changed, 102 insertions, 108 deletions
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 97f59abc3e92..6529fe1b45c1 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -31,11 +31,10 @@
#define HASH_DEFAULT_SIZE 64UL
#define HASH_MIN_SIZE 4U
-#define BUCKET_LOCKS_PER_CPU 32UL
union nested_table {
union nested_table __rcu *table;
- struct rhash_head __rcu *bucket;
+ struct rhash_lock_head __rcu *bucket;
};
static u32 head_hashfn(struct rhashtable *ht,
@@ -56,9 +55,11 @@ EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
{
- spinlock_t *lock = rht_bucket_lock(tbl, hash);
-
- return (debug_locks) ? lockdep_is_held(lock) : 1;
+ if (!debug_locks)
+ return 1;
+ if (unlikely(tbl->nest))
+ return 1;
+ return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]);
}
EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
#else
@@ -104,7 +105,6 @@ static void bucket_table_free(const struct bucket_table *tbl)
if (tbl->nest)
nested_bucket_table_free(tbl);
- free_bucket_spinlocks(tbl->locks);
kvfree(tbl);
}
@@ -131,9 +131,11 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht,
INIT_RHT_NULLS_HEAD(ntbl[i].bucket);
}
- rcu_assign_pointer(*prev, ntbl);
-
- return ntbl;
+ if (cmpxchg(prev, NULL, ntbl) == NULL)
+ return ntbl;
+ /* Raced with another thread. */
+ kfree(ntbl);
+ return rcu_dereference(*prev);
}
static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
@@ -169,11 +171,11 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
gfp_t gfp)
{
struct bucket_table *tbl = NULL;
- size_t size, max_locks;
+ size_t size;
int i;
+ static struct lock_class_key __key;
- size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
- tbl = kvzalloc(size, gfp);
+ tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp);
size = nbuckets;
@@ -185,18 +187,11 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
if (tbl == NULL)
return NULL;
- tbl->size = size;
-
- max_locks = size >> 1;
- if (tbl->nest)
- max_locks = min_t(size_t, max_locks, 1U << tbl->nest);
+ lockdep_init_map(&tbl->dep_map, "rhashtable_bucket", &__key, 0);
- if (alloc_bucket_spinlocks(&tbl->locks, &tbl->locks_mask, max_locks,
- ht->p.locks_mul, gfp) < 0) {
- bucket_table_free(tbl);
- return NULL;
- }
+ tbl->size = size;
+ rcu_head_init(&tbl->rcu);
INIT_LIST_HEAD(&tbl->walkers);
tbl->hash_rnd = get_random_u32();
@@ -220,14 +215,15 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
return new_tbl;
}
-static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
+static int rhashtable_rehash_one(struct rhashtable *ht,
+ struct rhash_lock_head __rcu **bkt,
+ unsigned int old_hash)
{
struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
- struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash);
int err = -EAGAIN;
struct rhash_head *head, *next, *entry;
- spinlock_t *new_bucket_lock;
+ struct rhash_head __rcu **pprev = NULL;
unsigned int new_hash;
if (new_tbl->nest)
@@ -235,7 +231,8 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
err = -ENOENT;
- rht_for_each(entry, old_tbl, old_hash) {
+ rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash),
+ old_tbl, old_hash) {
err = 0;
next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
@@ -250,18 +247,19 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
new_hash = head_hashfn(ht, new_tbl, entry);
- new_bucket_lock = rht_bucket_lock(new_tbl, new_hash);
+ rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING);
- spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
- head = rht_dereference_bucket(new_tbl->buckets[new_hash],
- new_tbl, new_hash);
+ head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash);
RCU_INIT_POINTER(entry->next, head);
- rcu_assign_pointer(new_tbl->buckets[new_hash], entry);
- spin_unlock(new_bucket_lock);
+ rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry);
- rcu_assign_pointer(*pprev, next);
+ if (pprev)
+ rcu_assign_pointer(*pprev, next);
+ else
+ /* Need to preserved the bit lock. */
+ rht_assign_locked(bkt, next);
out:
return err;
@@ -271,20 +269,19 @@ static int rhashtable_rehash_chain(struct rhashtable *ht,
unsigned int old_hash)
{
struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
- spinlock_t *old_bucket_lock;
+ struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash);
int err;
- old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
+ if (!bkt)
+ return 0;
+ rht_lock(old_tbl, bkt);
- spin_lock_bh(old_bucket_lock);
- while (!(err = rhashtable_rehash_one(ht, old_hash)))
+ while (!(err = rhashtable_rehash_one(ht, bkt, old_hash)))
;
- if (err == -ENOENT) {
- old_tbl->rehash++;
+ if (err == -ENOENT)
err = 0;
- }
- spin_unlock_bh(old_bucket_lock);
+ rht_unlock(old_tbl, bkt);
return err;
}
@@ -330,13 +327,16 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
spin_lock(&ht->lock);
list_for_each_entry(walker, &old_tbl->walkers, list)
walker->tbl = NULL;
- spin_unlock(&ht->lock);
/* Wait for readers. All new readers will see the new
* table, and thus no references to the old table will
* remain.
+ * We do this inside the locked region so that
+ * rhashtable_walk_stop() can use rcu_head_after_call_rcu()
+ * to check if it should not re-link the table.
*/
call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
+ spin_unlock(&ht->lock);
return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
}
@@ -478,6 +478,7 @@ fail:
}
static void *rhashtable_lookup_one(struct rhashtable *ht,
+ struct rhash_lock_head __rcu **bkt,
struct bucket_table *tbl, unsigned int hash,
const void *key, struct rhash_head *obj)
{
@@ -485,13 +486,12 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
.ht = ht,
.key = key,
};
- struct rhash_head __rcu **pprev;
+ struct rhash_head __rcu **pprev = NULL;
struct rhash_head *head;
int elasticity;
elasticity = RHT_ELASTICITY;
- pprev = rht_bucket_var(tbl, hash);
- rht_for_each_continue(head, *pprev, tbl, hash) {
+ rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
struct rhlist_head *list;
struct rhlist_head *plist;
@@ -513,7 +513,11 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
RCU_INIT_POINTER(list->next, plist);
head = rht_dereference_bucket(head->next, tbl, hash);
RCU_INIT_POINTER(list->rhead.next, head);
- rcu_assign_pointer(*pprev, obj);
+ if (pprev)
+ rcu_assign_pointer(*pprev, obj);
+ else
+ /* Need to preserve the bit lock */
+ rht_assign_locked(bkt, obj);
return NULL;
}
@@ -525,12 +529,12 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
}
static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
+ struct rhash_lock_head __rcu **bkt,
struct bucket_table *tbl,
unsigned int hash,
struct rhash_head *obj,
void *data)
{
- struct rhash_head __rcu **pprev;
struct bucket_table *new_tbl;
struct rhash_head *head;
@@ -553,11 +557,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
if (unlikely(rht_grow_above_100(ht, tbl)))
return ERR_PTR(-EAGAIN);
- pprev = rht_bucket_insert(ht, tbl, hash);
- if (!pprev)
- return ERR_PTR(-ENOMEM);
-
- head = rht_dereference_bucket(*pprev, tbl, hash);
+ head = rht_ptr(bkt, tbl, hash);
RCU_INIT_POINTER(obj->next, head);
if (ht->rhlist) {
@@ -567,7 +567,10 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
RCU_INIT_POINTER(list->next, NULL);
}
- rcu_assign_pointer(*pprev, obj);
+ /* bkt is always the head of the list, so it holds
+ * the lock, which we need to preserve
+ */
+ rht_assign_locked(bkt, obj);
atomic_inc(&ht->nelems);
if (rht_grow_above_75(ht, tbl))
@@ -581,47 +584,35 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
{
struct bucket_table *new_tbl;
struct bucket_table *tbl;
+ struct rhash_lock_head __rcu **bkt;
unsigned int hash;
- spinlock_t *lock;
void *data;
- tbl = rcu_dereference(ht->tbl);
-
- /* All insertions must grab the oldest table containing
- * the hashed bucket that is yet to be rehashed.
- */
- for (;;) {
- hash = rht_head_hashfn(ht, tbl, obj, ht->p);
- lock = rht_bucket_lock(tbl, hash);
- spin_lock_bh(lock);
-
- if (tbl->rehash <= hash)
- break;
-
- spin_unlock_bh(lock);
- tbl = rht_dereference_rcu(tbl->future_tbl, ht);
- }
-
- data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
- new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
- if (PTR_ERR(new_tbl) != -EEXIST)
- data = ERR_CAST(new_tbl);
+ new_tbl = rcu_dereference(ht->tbl);
- while (!IS_ERR_OR_NULL(new_tbl)) {
+ do {
tbl = new_tbl;
hash = rht_head_hashfn(ht, tbl, obj, ht->p);
- spin_lock_nested(rht_bucket_lock(tbl, hash),
- SINGLE_DEPTH_NESTING);
-
- data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
- new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
- if (PTR_ERR(new_tbl) != -EEXIST)
- data = ERR_CAST(new_tbl);
-
- spin_unlock(rht_bucket_lock(tbl, hash));
- }
-
- spin_unlock_bh(lock);
+ if (rcu_access_pointer(tbl->future_tbl))
+ /* Failure is OK */
+ bkt = rht_bucket_var(tbl, hash);
+ else
+ bkt = rht_bucket_insert(ht, tbl, hash);
+ if (bkt == NULL) {
+ new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ data = ERR_PTR(-EAGAIN);
+ } else {
+ rht_lock(tbl, bkt);
+ data = rhashtable_lookup_one(ht, bkt, tbl,
+ hash, key, obj);
+ new_tbl = rhashtable_insert_one(ht, bkt, tbl,
+ hash, obj, data);
+ if (PTR_ERR(new_tbl) != -EEXIST)
+ data = ERR_CAST(new_tbl);
+
+ rht_unlock(tbl, bkt);
+ }
+ } while (!IS_ERR_OR_NULL(new_tbl));
if (PTR_ERR(data) == -EAGAIN)
data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
@@ -943,10 +934,11 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
ht = iter->ht;
spin_lock(&ht->lock);
- if (tbl->rehash < tbl->size)
- list_add(&iter->walker.list, &tbl->walkers);
- else
+ if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu))
+ /* This bucket table is being freed, don't re-link it. */
iter->walker.tbl = NULL;
+ else
+ list_add(&iter->walker.list, &tbl->walkers);
spin_unlock(&ht->lock);
out:
@@ -1046,11 +1038,6 @@ int rhashtable_init(struct rhashtable *ht,
size = rounded_hashtable_size(&ht->p);
- if (params->locks_mul)
- ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
- else
- ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
-
ht->key_len = ht->p.key_len;
if (!params->hashfn) {
ht->p.hashfn = jhash;
@@ -1152,7 +1139,7 @@ restart:
struct rhash_head *pos, *next;
cond_resched();
- for (pos = rht_dereference(*rht_bucket(tbl, i), ht),
+ for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)),
next = !rht_is_a_nulls(pos) ?
rht_dereference(pos->next, ht) : NULL;
!rht_is_a_nulls(pos);
@@ -1179,11 +1166,10 @@ void rhashtable_destroy(struct rhashtable *ht)
}
EXPORT_SYMBOL_GPL(rhashtable_destroy);
-struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
- unsigned int hash)
+struct rhash_lock_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl,
+ unsigned int hash)
{
const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
- static struct rhash_head __rcu *rhnull;
unsigned int index = hash & ((1 << tbl->nest) - 1);
unsigned int size = tbl->size >> tbl->nest;
unsigned int subhash = hash;
@@ -1201,20 +1187,28 @@ struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
subhash >>= shift;
}
- if (!ntbl) {
- if (!rhnull)
- INIT_RHT_NULLS_HEAD(rhnull);
- return &rhnull;
- }
+ if (!ntbl)
+ return NULL;
return &ntbl[subhash].bucket;
}
+EXPORT_SYMBOL_GPL(__rht_bucket_nested);
+
+struct rhash_lock_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
+ unsigned int hash)
+{
+ static struct rhash_lock_head __rcu *rhnull;
+
+ if (!rhnull)
+ INIT_RHT_NULLS_HEAD(rhnull);
+ return __rht_bucket_nested(tbl, hash) ?: &rhnull;
+}
EXPORT_SYMBOL_GPL(rht_bucket_nested);
-struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
- struct bucket_table *tbl,
- unsigned int hash)
+struct rhash_lock_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
+ struct bucket_table *tbl,
+ unsigned int hash)
{
const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
unsigned int index = hash & ((1 << tbl->nest) - 1);