aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--include/linux/percpu-rwsem.h83
-rw-r--r--include/linux/rwsem.h6
-rw-r--r--include/linux/wait.h1
-rw-r--r--kernel/cpu.c4
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/locking/lockdep.c331
-rw-r--r--kernel/locking/lockdep_internals.h14
-rw-r--r--kernel/locking/lockdep_proc.c31
-rw-r--r--kernel/locking/percpu-rwsem.c192
-rw-r--r--kernel/locking/rwsem.c7
-rw-r--r--kernel/locking/rwsem.h10
11 files changed, 499 insertions, 181 deletions
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index ad2ca2a89d5b..5e033fe1ff4e 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -3,41 +3,52 @@
#define _LINUX_PERCPU_RWSEM_H
#include <linux/atomic.h>
-#include <linux/rwsem.h>
#include <linux/percpu.h>
#include <linux/rcuwait.h>
+#include <linux/wait.h>
#include <linux/rcu_sync.h>
#include <linux/lockdep.h>
struct percpu_rw_semaphore {
struct rcu_sync rss;
unsigned int __percpu *read_count;
- struct rw_semaphore rw_sem; /* slowpath */
- struct rcuwait writer; /* blocked writer */
- int readers_block;
+ struct rcuwait writer;
+ wait_queue_head_t waiters;
+ atomic_t block;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
};
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname },
+#else
+#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname)
+#endif
+
#define __DEFINE_PERCPU_RWSEM(name, is_static) \
static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \
is_static struct percpu_rw_semaphore name = { \
.rss = __RCU_SYNC_INITIALIZER(name.rss), \
.read_count = &__percpu_rwsem_rc_##name, \
- .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
.writer = __RCUWAIT_INITIALIZER(name.writer), \
+ .waiters = __WAIT_QUEUE_HEAD_INITIALIZER(name.waiters), \
+ .block = ATOMIC_INIT(0), \
+ __PERCPU_RWSEM_DEP_MAP_INIT(name) \
}
+
#define DEFINE_PERCPU_RWSEM(name) \
__DEFINE_PERCPU_RWSEM(name, /* not static */)
#define DEFINE_STATIC_PERCPU_RWSEM(name) \
__DEFINE_PERCPU_RWSEM(name, static)
-extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
-extern void __percpu_up_read(struct percpu_rw_semaphore *);
+extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool);
static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
{
might_sleep();
- rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_);
+ rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
preempt_disable();
/*
@@ -48,8 +59,9 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
* and that once the synchronize_rcu() is done, the writer will see
* anything we did within this RCU-sched read-size critical section.
*/
- __this_cpu_inc(*sem->read_count);
- if (unlikely(!rcu_sync_is_idle(&sem->rss)))
+ if (likely(rcu_sync_is_idle(&sem->rss)))
+ __this_cpu_inc(*sem->read_count);
+ else
__percpu_down_read(sem, false); /* Unconditional memory barrier */
/*
* The preempt_enable() prevents the compiler from
@@ -58,16 +70,17 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
preempt_enable();
}
-static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
+static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
{
- int ret = 1;
+ bool ret = true;
preempt_disable();
/*
* Same as in percpu_down_read().
*/
- __this_cpu_inc(*sem->read_count);
- if (unlikely(!rcu_sync_is_idle(&sem->rss)))
+ if (likely(rcu_sync_is_idle(&sem->rss)))
+ __this_cpu_inc(*sem->read_count);
+ else
ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
preempt_enable();
/*
@@ -76,24 +89,36 @@ static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
*/
if (ret)
- rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 1, _RET_IP_);
+ rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
return ret;
}
static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
{
+ rwsem_release(&sem->dep_map, _RET_IP_);
+
preempt_disable();
/*
* Same as in percpu_down_read().
*/
- if (likely(rcu_sync_is_idle(&sem->rss)))
+ if (likely(rcu_sync_is_idle(&sem->rss))) {
__this_cpu_dec(*sem->read_count);
- else
- __percpu_up_read(sem); /* Unconditional memory barrier */
+ } else {
+ /*
+ * slowpath; reader will only ever wake a single blocked
+ * writer.
+ */
+ smp_mb(); /* B matches C */
+ /*
+ * In other words, if they see our decrement (presumably to
+ * aggregate zero, as that is the only time it matters) they
+ * will also see our critical section.
+ */
+ __this_cpu_dec(*sem->read_count);
+ rcuwait_wake_up(&sem->writer);
+ }
preempt_enable();
-
- rwsem_release(&sem->rw_sem.dep_map, _RET_IP_);
}
extern void percpu_down_write(struct percpu_rw_semaphore *);
@@ -110,29 +135,19 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
__percpu_init_rwsem(sem, #sem, &rwsem_key); \
})
-#define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
-
-#define percpu_rwsem_assert_held(sem) \
- lockdep_assert_held(&(sem)->rw_sem)
+#define percpu_rwsem_is_held(sem) lockdep_is_held(sem)
+#define percpu_rwsem_assert_held(sem) lockdep_assert_held(sem)
static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
bool read, unsigned long ip)
{
- lock_release(&sem->rw_sem.dep_map, ip);
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
- if (!read)
- atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
-#endif
+ lock_release(&sem->dep_map, ip);
}
static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
bool read, unsigned long ip)
{
- lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
- if (!read)
- atomic_long_set(&sem->rw_sem.owner, (long)current);
-#endif
+ lock_acquire(&sem->dep_map, 0, 1, read, 1, NULL, ip);
}
#endif
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 00d6054687dd..8a418d9eeb7a 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -53,12 +53,6 @@ struct rw_semaphore {
#endif
};
-/*
- * Setting all bits of the owner field except bit 0 will indicate
- * that the rwsem is writer-owned with an unknown owner.
- */
-#define RWSEM_OWNER_UNKNOWN (-2L)
-
/* In all implementations count != 0 means locked */
static inline int rwsem_is_locked(struct rw_semaphore *sem)
{
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 3283c8d02137..feeb6be5cad6 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -20,6 +20,7 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int
#define WQ_FLAG_EXCLUSIVE 0x01
#define WQ_FLAG_WOKEN 0x02
#define WQ_FLAG_BOOKMARK 0x04
+#define WQ_FLAG_CUSTOM 0x08
/*
* A single wait-queue entry structure:
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9c706af713fb..221bf6a9e98a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -331,12 +331,12 @@ void lockdep_assert_cpus_held(void)
static void lockdep_acquire_cpus_lock(void)
{
- rwsem_acquire(&cpu_hotplug_lock.rw_sem.dep_map, 0, 0, _THIS_IP_);
+ rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
}
static void lockdep_release_cpus_lock(void)
{
- rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, _THIS_IP_);
+ rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
}
/*
diff --git a/kernel/exit.c b/kernel/exit.c
index 2833ffb0c211..f64a8f9d412a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -258,6 +258,7 @@ void rcuwait_wake_up(struct rcuwait *w)
wake_up_process(task);
rcu_read_unlock();
}
+EXPORT_SYMBOL_GPL(rcuwait_wake_up);
/*
* Determine if a process group is "orphaned", according to the POSIX
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 32406ef0d6a2..e55c4ee14e64 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -147,6 +147,7 @@ static DECLARE_BITMAP(list_entries_in_use, MAX_LOCKDEP_ENTRIES);
#define KEYHASH_SIZE (1UL << KEYHASH_BITS)
static struct hlist_head lock_keys_hash[KEYHASH_SIZE];
unsigned long nr_lock_classes;
+unsigned long nr_zapped_classes;
#ifndef CONFIG_DEBUG_LOCKDEP
static
#endif
@@ -1070,13 +1071,15 @@ static inline void check_data_structures(void) { }
#endif /* CONFIG_DEBUG_LOCKDEP */
+static void init_chain_block_buckets(void);
+
/*
* Initialize the lock_classes[] array elements, the free_lock_classes list
* and also the delayed_free structure.
*/
static void init_data_structures_once(void)
{
- static bool ds_initialized, rcu_head_initialized;
+ static bool __read_mostly ds_initialized, rcu_head_initialized;
int i;
if (likely(rcu_head_initialized))
@@ -1100,6 +1103,7 @@ static void init_data_structures_once(void)
INIT_LIST_HEAD(&lock_classes[i].locks_after);
INIT_LIST_HEAD(&lock_classes[i].locks_before);
}
+ init_chain_block_buckets();
}
static inline struct hlist_head *keyhashentry(const struct lock_class_key *key)
@@ -2298,18 +2302,6 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
return 0;
}
-static void inc_chains(void)
-{
- if (current->hardirq_context)
- nr_hardirq_chains++;
- else {
- if (current->softirq_context)
- nr_softirq_chains++;
- else
- nr_process_chains++;
- }
-}
-
#else
static inline int check_irq_usage(struct task_struct *curr,
@@ -2317,13 +2309,27 @@ static inline int check_irq_usage(struct task_struct *curr,
{
return 1;
}
+#endif /* CONFIG_TRACE_IRQFLAGS */
-static inline void inc_chains(void)
+static void inc_chains(int irq_context)
{
- nr_process_chains++;
+ if (irq_context & LOCK_CHAIN_HARDIRQ_CONTEXT)
+ nr_hardirq_chains++;
+ else if (irq_context & LOCK_CHAIN_SOFTIRQ_CONTEXT)
+ nr_softirq_chains++;
+ else
+ nr_process_chains++;
}
-#endif /* CONFIG_TRACE_IRQFLAGS */
+static void dec_chains(int irq_context)
+{
+ if (irq_context & LOCK_CHAIN_HARDIRQ_CONTEXT)
+ nr_hardirq_chains--;
+ else if (irq_context & LOCK_CHAIN_SOFTIRQ_CONTEXT)
+ nr_softirq_chains--;
+ else
+ nr_process_chains--;
+}
static void
print_deadlock_scenario(struct held_lock *nxt, struct held_lock *prv)
@@ -2622,8 +2628,235 @@ out_bug:
struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS];
static DECLARE_BITMAP(lock_chains_in_use, MAX_LOCKDEP_CHAINS);
-int nr_chain_hlocks;
static u16 chain_hlocks[MAX_LOCKDEP_CHAIN_HLOCKS];
+unsigned long nr_zapped_lock_chains;
+unsigned int nr_free_chain_hlocks; /* Free chain_hlocks in buckets */
+unsigned int nr_lost_chain_hlocks; /* Lost chain_hlocks */
+unsigned int nr_large_chain_blocks; /* size > MAX_CHAIN_BUCKETS */
+
+/*
+ * The first 2 chain_hlocks entries in the chain block in the bucket
+ * list contains the following meta data:
+ *
+ * entry[0]:
+ * Bit 15 - always set to 1 (it is not a class index)
+ * Bits 0-14 - upper 15 bits of the next block index
+ * entry[1] - lower 16 bits of next block index
+ *
+ * A next block index of all 1 bits means it is the end of the list.
+ *
+ * On the unsized bucket (bucket-0), the 3rd and 4th entries contain
+ * the chain block size:
+ *
+ * entry[2] - upper 16 bits of the chain block size
+ * entry[3] - lower 16 bits of the chain block size
+ */
+#define MAX_CHAIN_BUCKETS 16
+#define CHAIN_BLK_FLAG (1U << 15)
+#define CHAIN_BLK_LIST_END 0xFFFFU
+
+static int chain_block_buckets[MAX_CHAIN_BUCKETS];
+
+static inline int size_to_bucket(int size)
+{
+ if (size > MAX_CHAIN_BUCKETS)
+ return 0;
+
+ return size - 1;
+}
+
+/*
+ * Iterate all the chain blocks in a bucket.
+ */
+#define for_each_chain_block(bucket, prev, curr) \
+ for ((prev) = -1, (curr) = chain_block_buckets[bucket]; \
+ (curr) >= 0; \
+ (prev) = (curr), (curr) = chain_block_next(curr))
+
+/*
+ * next block or -1
+ */
+static inline int chain_block_next(int offset)
+{
+ int next = chain_hlocks[offset];
+
+ WARN_ON_ONCE(!(next & CHAIN_BLK_FLAG));
+
+ if (next == CHAIN_BLK_LIST_END)
+ return -1;
+
+ next &= ~CHAIN_BLK_FLAG;
+ next <<= 16;
+ next |= chain_hlocks[offset + 1];
+
+ return next;
+}
+
+/*
+ * bucket-0 only
+ */
+static inline int chain_block_size(int offset)
+{
+ return (chain_hlocks[offset + 2] << 16) | chain_hlocks[offset + 3];
+}
+
+static inline void init_chain_block(int offset, int next, int bucket, int size)
+{
+ chain_hlocks[offset] = (next >> 16) | CHAIN_BLK_FLAG;
+ chain_hlocks[offset + 1] = (u16)next;
+
+ if (size && !bucket) {
+ chain_hlocks[offset + 2] = size >> 16;
+ chain_hlocks[offset + 3] = (u16)size;
+ }
+}
+
+static inline void add_chain_block(int offset, int size)
+{
+ int bucket = size_to_bucket(size);
+ int next = chain_block_buckets[bucket];
+ int prev, curr;
+
+ if (unlikely(size < 2)) {
+ /*
+ * We can't store single entries on the freelist. Leak them.
+ *
+ * One possible way out would be to uniquely mark them, other
+ * than with CHAIN_BLK_FLAG, such that we can recover them when
+ * the block before it is re-added.
+ */
+ if (size)
+ nr_lost_chain_hlocks++;
+ return;
+ }
+
+ nr_free_chain_hlocks += size;
+ if (!bucket) {
+ nr_large_chain_blocks++;
+
+ /*
+ * Variable sized, sort large to small.
+ */
+ for_each_chain_block(0, prev, curr) {
+ if (size >= chain_block_size(curr))
+ break;
+ }
+ init_chain_block(offset, curr, 0, size);
+ if (prev < 0)
+ chain_block_buckets[0] = offset;
+ else
+ init_chain_block(prev, offset, 0, 0);
+ return;
+ }
+ /*
+ * Fixed size, add to head.
+ */
+ init_chain_block(offset, next, bucket, size);
+ chain_block_buckets[bucket] = offset;
+}
+
+/*
+ * Only the first block in the list can be deleted.
+ *
+ * For the variable size bucket[0], the first block (the largest one) is
+ * returned, broken up and put back into the pool. So if a chain block of
+ * length > MAX_CHAIN_BUCKETS is ever used and zapped, it will just be
+ * queued up after the primordial chain block and never be used until the
+ * hlock entries in the primordial chain block is almost used up. That
+ * causes fragmentation and reduce allocation efficiency. That can be
+ * monitored by looking at the "large chain blocks" number in lockdep_stats.
+ */
+static inline void del_chain_block(int bucket, int size, int next)
+{
+ nr_free_chain_hlocks -= size;
+ chain_block_buckets[bucket] = next;
+
+ if (!bucket)
+ nr_large_chain_blocks--;
+}
+
+static void init_chain_block_buckets(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_CHAIN_BUCKETS; i++)
+ chain_block_buckets[i] = -1;
+
+ add_chain_block(0, ARRAY_SIZE(chain_hlocks));
+}
+
+/*
+ * Return offset of a chain block of the right size or -1 if not found.
+ *
+ * Fairly simple worst-fit allocator with the addition of a number of size
+ * specific free lists.
+ */
+static int alloc_chain_hlocks(int req)
+{
+ int bucket, curr, size;
+
+ /*
+ * We rely on the MSB to act as an escape bit to denote freelist
+ * pointers. Make sure this bit isn't set in 'normal' class_idx usage.
+ */
+ BUILD_BUG_ON((MAX_LOCKDEP_KEYS-1) & CHAIN_BLK_FLAG);
+
+ init_data_structures_once();
+
+ if (nr_free_chain_hlocks < req)
+ return -1;
+
+ /*
+ * We require a minimum of 2 (u16) entries to encode a freelist
+ * 'pointer'.
+ */
+ req = max(req, 2);
+ bucket = size_to_bucket(req);
+ curr = chain_block_buckets[bucket];
+
+ if (bucket) {
+ if (curr >= 0) {
+ del_chain_block(bucket, req, chain_block_next(curr));
+ return curr;
+ }
+ /* Try bucket 0 */
+ curr = chain_block_buckets[0];
+ }
+
+ /*
+ * The variable sized freelist is sorted by size; the first entry is
+ * the largest. Use it if it fits.
+ */
+ if (curr >= 0) {
+ size = chain_block_size(curr);
+ if (likely(size >= req)) {
+ del_chain_block(0, size, chain_block_next(curr));
+ add_chain_block(curr + req, size - req);
+ return curr;
+ }
+ }
+
+ /*
+ * Last resort, split a block in a larger sized bucket.
+ */
+ for (size = MAX_CHAIN_BUCKETS; size > req; size--) {
+ bucket = size_to_bucket(size);
+ curr = chain_block_buckets[bucket];
+ if (curr < 0)
+ continue;
+
+ del_chain_block(bucket, size, chain_block_next(curr));
+ add_chain_block(curr + req, size - req);
+ return curr;
+ }
+
+ return -1;
+}
+
+static inline void free_chain_hlocks(int base, int size)
+{
+ add_chain_block(base, max(size, 2));
+}
struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i)
{
@@ -2824,15 +3057,8 @@ static inline int add_chain_cache(struct task_struct *curr,
BUILD_BUG_ON((1UL << 6) <= ARRAY_SIZE(curr->held_locks));
BUILD_BUG_ON((1UL << 8*sizeof(chain_hlocks[0])) <= ARRAY_SIZE(lock_classes));
- if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
- chain->base = nr_chain_hlocks;
- for (j = 0; j < chain->depth - 1; j++, i++) {
- int lock_id = curr->held_locks[i].class_idx;
- chain_hlocks[chain->base + j] = lock_id;
- }
- chain_hlocks[chain->base + j] = class - lock_classes;
- nr_chain_hlocks += chain->depth;
- } else {
+ j = alloc_chain_hlocks(chain->depth);
+ if (j < 0) {
if (!debug_locks_off_graph_unlock())
return 0;
@@ -2841,9 +3067,16 @@ static inline int add_chain_cache(struct task_struct *curr,
return 0;
}
+ chain->base = j;
+ for (j = 0; j < chain->depth - 1; j++, i++) {
+ int lock_id = curr->held_locks[i].class_idx;
+
+ chain_hlocks[chain->base + j] = lock_id;
+ }
+ chain_hlocks[chain->base + j] = class - lock_classes;
hlist_add_head_rcu(&chain->entry, hash_head);
debug_atomic_inc(chain_lookup_misses);
- inc_chains();
+ inc_chains(chain->irq_context);
return 1;
}
@@ -2987,6 +3220,8 @@ static inline int validate_chain(struct task_struct *curr,
{
return 1;
}
+
+static void init_chain_block_buckets(void) { }
#endif /* CONFIG_PROVE_LOCKING */
/*
@@ -3596,7 +3831,8 @@ lock_used:
static inline unsigned int task_irq_context(struct task_struct *task)
{
- return 2 * !!task->hardirq_context + !!task->softirq_context;
+ return LOCK_CHAIN_HARDIRQ_CONTEXT * !!task->hardirq_context +
+ LOCK_CHAIN_SOFTIRQ_CONTEXT * !!task->softirq_context;
}
static int separate_irq_context(struct task_struct *curr,
@@ -4768,57 +5004,33 @@ static void remove_class_from_lock_chain(struct pending_free *pf,
struct lock_class *class)
{
#ifdef CONFIG_PROVE_LOCKING
- struct lock_chain *new_chain;
- u64 chain_key;
int i;
for (i = chain->base; i < chain->base + chain->depth; i++) {
if (chain_hlocks[i] != class - lock_classes)
continue;
- /* The code below leaks one chain_hlock[] entry. */
- if (--chain->depth > 0) {
- memmove(&chain_hlocks[i], &chain_hlocks[i + 1],
- (chain->base + chain->depth - i) *
- sizeof(chain_hlocks[0]));
- }
/*
* Each lock class occurs at most once in a lock chain so once
* we found a match we can break out of this loop.
*/
- goto recalc;
+ goto free_lock_chain;
}
/* Since the chain has not been modified, return. */
return;
-recalc:
- chain_key = INITIAL_CHAIN_KEY;
- for (i = chain->base; i < chain->base + chain->depth; i++)
- chain_key = iterate_chain_key(chain_key, chain_hlocks[i]);
- if (chain->depth && chain->chain_key == chain_key)
- return;
+free_lock_chain:
+ free_chain_hlocks(chain->base, chain->depth);
/* Overwrite the chain key for concurrent RCU readers. */
- WRITE_ONCE(chain->chain_key, chain_key);
+ WRITE_ONCE(chain->chain_key, INITIAL_CHAIN_KEY);
+ dec_chains(chain->irq_context);
+
/*
* Note: calling hlist_del_rcu() from inside a
* hlist_for_each_entry_rcu() loop is safe.
*/
hlist_del_rcu(&chain->entry);
__set_bit(chain - lock_chains, pf->lock_chains_being_freed);
- if (chain->depth == 0)
- return;
- /*
- * If the modified lock chain matches an existing lock chain, drop
- * the modified lock chain.
- */
- if (lookup_chain_cache(chain_key))
- return;
- new_chain = alloc_lock_chain();
- if (WARN_ON_ONCE(!new_chain)) {
- debug_locks_off();
- return;
- }
- *new_chain = *chain;
- hlist_add_head_rcu(&new_chain->entry, chainhashentry(chain_key));
+ nr_zapped_lock_chains++;
#endif
}
@@ -4874,6 +5086,7 @@ static void zap_class(struct pending_free *pf, struct lock_class *class)
}
remove_class_from_lock_chains(pf, class);
+ nr_zapped_classes++;
}
static void reinit_class(struct lock_class *class)
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index 18d85aebbb57..baca699b94e9 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -106,6 +106,12 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ =
#define STACK_TRACE_HASH_SIZE 16384
#endif
+/*
+ * Bit definitions for lock_chain.irq_context
+ */
+#define LOCK_CHAIN_SOFTIRQ_CONTEXT (1 << 0)
+#define LOCK_CHAIN_HARDIRQ_CONTEXT (1 << 1)
+
#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
@@ -124,17 +130,21 @@ extern const char *__get_key_name(const struct lockdep_subclass_key *key,
struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i);
extern unsigned long nr_lock_classes;
+extern unsigned long nr_zapped_classes;
+extern unsigned long nr_zapped_lock_chains;
extern unsigned long nr_list_entries;
long lockdep_next_lockchain(long i);
unsigned long lock_chain_count(void);
-extern int nr_chain_hlocks;
extern unsigned long nr_stack_trace_entries;
extern unsigned int nr_hardirq_chains;
extern unsigned int nr_softirq_chains;
extern unsigned int nr_process_chains;
-extern unsigned int max_lockdep_depth;
+extern unsigned int nr_free_chain_hlocks;
+extern unsigned int nr_lost_chain_hlocks;
+extern unsigned int nr_large_chain_blocks;
+extern unsigned int max_lockdep_depth;
extern unsigned int max_bfs_queue_depth;
#ifdef CONFIG_PROVE_LOCKING
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index 231684cfc5ae..5525cd3ba0c8 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -128,15 +128,22 @@ static int lc_show(struct seq_file *m, void *v)
struct lock_chain *chain = v;
struct lock_class *class;
int i;
+ static const char * const irq_strs[] = {
+ [0] = "0",
+ [LOCK_CHAIN_HARDIRQ_CONTEXT] = "hardirq",
+ [LOCK_CHAIN_SOFTIRQ_CONTEXT] = "softirq",
+ [LOCK_CHAIN_SOFTIRQ_CONTEXT|
+ LOCK_CHAIN_HARDIRQ_CONTEXT] = "hardirq|softirq",
+ };
if (v == SEQ_START_TOKEN) {
- if (nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)
+ if (!nr_free_chain_hlocks)
seq_printf(m, "(buggered) ");
seq_printf(m, "all lock chains:\n");
return 0;
}
- seq_printf(m, "irq_context: %d\n", chain->irq_context);
+ seq_printf(m, "irq_context: %s\n", irq_strs[chain->irq_context]);
for (i = 0; i < chain->depth; i++) {
class = lock_chain_get_class(chain, i);
@@ -271,8 +278,12 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
#ifdef CONFIG_PROVE_LOCKING
seq_printf(m, " dependency chains: %11lu [max: %lu]\n",
lock_chain_count(), MAX_LOCKDEP_CHAINS);
- seq_printf(m, " dependency chain hlocks: %11d [max: %lu]\n",
- nr_chain_hlocks, MAX_LOCKDEP_CHAIN_HLOCKS);
+ seq_printf(m, " dependency chain hlocks used: %11lu [max: %lu]\n",
+ MAX_LOCKDEP_CHAIN_HLOCKS -
+ (nr_free_chain_hlocks + nr_lost_chain_hlocks),
+ MAX_LOCKDEP_CHAIN_HLOCKS);
+ seq_printf(m, " dependency chain hlocks lost: %11u\n",
+ nr_lost_chain_hlocks);
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
@@ -336,6 +347,18 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
seq_printf(m, " debug_locks: %11u\n",
debug_locks);
+ /*
+ * Zappped classes and lockdep data buffers reuse statistics.
+ */
+ seq_puts(m, "\n");
+ seq_printf(m, " zapped classes: %11lu\n",
+ nr_zapped_classes);
+#ifdef CONFIG_PROVE_LOCKING
+ seq_printf(m, " zapped lock chains: %11lu\n",
+ nr_zapped_lock_chains);
+ seq_printf(m, " large chain blocks: %11u\n",
+ nr_large_chain_blocks);
+#endif
return 0;
}
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 364d38a0c444..183a3aaa8509 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -1,27 +1,29 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/atomic.h>
-#include <linux/rwsem.h>
#include <linux/percpu.h>
+#include <linux/wait.h>
#include <linux/lockdep.h>
#include <linux/percpu-rwsem.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
+#include <linux/sched/task.h>
#include <linux/errno.h>
-#include "rwsem.h"
-
int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
- const char *name, struct lock_class_key *rwsem_key)
+ const char *name, struct lock_class_key *key)
{
sem->read_count = alloc_percpu(int);
if (unlikely(!sem->read_count))
return -ENOMEM;
- /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
rcu_sync_init(&sem->rss);
- __init_rwsem(&sem->rw_sem, name, rwsem_key);
rcuwait_init(&sem->writer);
- sem->readers_block = 0;
+ init_waitqueue_head(&sem->waiters);
+ atomic_set(&sem->block, 0);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ debug_check_no_locks_freed((void *)sem, sizeof(*sem));
+ lockdep_init_map(&sem->dep_map, name, key, 0);
+#endif
return 0;
}
EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
@@ -41,73 +43,139 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
}
EXPORT_SYMBOL_GPL(percpu_free_rwsem);
-int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
+static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
{
+ __this_cpu_inc(*sem->read_count);
+
/*
* Due to having preemption disabled the decrement happens on
* the same CPU as the increment, avoiding the
* increment-on-one-CPU-and-decrement-on-another problem.
*
- * If the reader misses the writer's assignment of readers_block, then
- * the writer is guaranteed to see the reader's increment.
+ * If the reader misses the writer's assignment of sem->block, then the
+ * writer is guaranteed to see the reader's increment.
*
* Conversely, any readers that increment their sem->read_count after
- * the writer looks are guaranteed to see the readers_block value,
- * which in turn means that they are guaranteed to immediately
- * decrement their sem->read_count, so that it doesn't matter that the
- * writer missed them.
+ * the writer looks are guaranteed to see the sem->block value, which
+ * in turn means that they are guaranteed to immediately decrement
+ * their sem->read_count, so that it doesn't matter that the writer
+ * missed them.
*/
smp_mb(); /* A matches D */
/*
- * If !readers_block the critical section starts here, matched by the
+ * If !sem->block the critical section starts here, matched by the
* release in percpu_up_write().
*/
- if (likely(!smp_load_acquire(&sem->readers_block)))
+ if (likely(!atomic_read_acquire(&sem->block)))
+ return true;
+
+ __this_cpu_dec(*sem->read_count);
+
+ /* Prod writer to re-evaluate readers_active_check() */
+ rcuwait_wake_up(&sem->writer);
+
+ return false;
+}
+
+static inline bool __percpu_down_write_trylock(struct percpu_rw_semaphore *sem)
+{
+ if (atomic_read(&sem->block))
+ return false;
+
+ return atomic_xchg(&sem->block, 1) == 0;
+}
+
+static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader)
+{
+ if (reader) {
+ bool ret;
+
+ preempt_disable();
+ ret = __percpu_down_read_trylock(sem);
+ preempt_enable();
+
+ return ret;
+ }
+ return __percpu_down_write_trylock(sem);
+}
+
+/*
+ * The return value of wait_queue_entry::func means:
+ *
+ * <0 - error, wakeup is terminated and the error is returned
+ * 0 - no wakeup, a next waiter is tried
+ * >0 - woken, if EXCLUSIVE, counted towards @nr_exclusive.
+ *
+ * We use EXCLUSIVE for both readers and writers to preserve FIFO order,
+ * and play games with the return value to allow waking multiple readers.
+ *
+ * Specifically, we wake readers until we've woken a single writer, or until a
+ * trylock fails.
+ */
+static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
+ unsigned int mode, int wake_flags,
+ void *key)
+{
+ struct task_struct *p = get_task_struct(wq_entry->private);
+ bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
+ struct percpu_rw_semaphore *sem = key;
+
+ /* concurrent against percpu_down_write(), can get stolen */
+ if (!__percpu_rwsem_trylock(sem, reader))
return 1;
- /*
- * Per the above comment; we still have preemption disabled and
- * will thus decrement on the same CPU as we incremented.
- */
- __percpu_up_read(sem);
+ list_del_init(&wq_entry->entry);
+ smp_store_release(&wq_entry->private, NULL);
- if (try)
- return 0;
+ wake_up_process(p);
+ put_task_struct(p);
- /*
- * We either call schedule() in the wait, or we'll fall through
- * and reschedule on the preempt_enable() in percpu_down_read().
- */
- preempt_enable_no_resched();
+ return !reader; /* wake (readers until) 1 writer */
+}
+
+static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
+{
+ DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function);
+ bool wait;
+ spin_lock_irq(&sem->waiters.lock);
/*
- * Avoid lockdep for the down/up_read() we already have them.
+ * Serialize against the wakeup in percpu_up_write(), if we fail
+ * the trylock, the wakeup must see us on the list.
*/
- __down_read(&sem->rw_sem);
- this_cpu_inc(*sem->read_count);
- __up_read(&sem->rw_sem);
+ wait = !__percpu_rwsem_trylock(sem, reader);
+ if (wait) {
+ wq_entry.flags |= WQ_FLAG_EXCLUSIVE | reader * WQ_FLAG_CUSTOM;
+ __add_wait_queue_entry_tail(&sem->waiters, &wq_entry);
+ }
+ spin_unlock_irq(&sem->waiters.lock);
- preempt_disable();
- return 1;
+ while (wait) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (!smp_load_acquire(&wq_entry.private))
+ break;
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
}
-EXPORT_SYMBOL_GPL(__percpu_down_read);
-void __percpu_up_read(struct percpu_rw_semaphore *sem)
+bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
{
- smp_mb(); /* B matches C */
- /*
- * In other words, if they see our decrement (presumably to aggregate
- * zero, as that is the only time it matters) they will also see our
- * critical section.
- */
- __this_cpu_dec(*sem->read_count);
+ if (__percpu_down_read_trylock(sem))
+ return true;
- /* Prod writer to recheck readers_active */
- rcuwait_wake_up(&sem->writer);
+ if (try)
+ return false;
+
+ preempt_enable();
+ percpu_rwsem_wait(sem, /* .reader = */ true);
+ preempt_disable();
+
+ return true;
}
-EXPORT_SYMBOL_GPL(__percpu_up_read);
+EXPORT_SYMBOL_GPL(__percpu_down_read);
#define per_cpu_sum(var) \
({ \
@@ -124,6 +192,8 @@ EXPORT_SYMBOL_GPL(__percpu_up_read);
* zero. If this sum is zero, then it is stable due to the fact that if any
* newly arriving readers increment a given counter, they will immediately
* decrement that same counter.
+ *
+ * Assumes sem->block is set.
*/
static bool readers_active_check(struct percpu_rw_semaphore *sem)
{
@@ -142,32 +212,36 @@ static bool readers_active_check(struct percpu_rw_semaphore *sem)
void percpu_down_write(struct percpu_rw_semaphore *sem)
{
+ might_sleep();
+ rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
+
/* Notify readers to take the slow path. */
rcu_sync_enter(&sem->rss);
- down_write(&sem->rw_sem);
-
/*
- * Notify new readers to block; up until now, and thus throughout the
- * longish rcu_sync_enter() above, new readers could still come in.
+ * Try set sem->block; this provides writer-writer exclusion.
+ * Having sem->block set makes new readers block.
*/
- WRITE_ONCE(sem->readers_block, 1);
+ if (!__percpu_down_write_trylock(sem))
+ percpu_rwsem_wait(sem, /* .reader = */ false);
- smp_mb(); /* D matches A */
+ /* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */
/*
- * If they don't see our writer of readers_block, then we are
- * guaranteed to see their sem->read_count increment, and therefore
- * will wait for them.
+ * If they don't see our store of sem->block, then we are guaranteed to
+ * see their sem->read_count increment, and therefore will wait for
+ * them.
*/
- /* Wait for all now active readers to complete. */
+ /* Wait for all active readers to complete. */
rcuwait_wait_event(&sem->writer, readers_active_check(sem));
}
EXPORT_SYMBOL_GPL(percpu_down_write);
void percpu_up_write(struct percpu_rw_semaphore *sem)
{
+ rwsem_release(&sem->dep_map, _RET_IP_);
+
/*
* Signal the writer is done, no fast path yet.
*
@@ -178,12 +252,12 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
* Therefore we force it through the slow path which guarantees an
* acquire and thereby guarantees the critical section's consistency.
*/
- smp_store_release(&sem->readers_block, 0);
+ atomic_set_release(&sem->block, 0);
/*
- * Release the write lock, this will allow readers back in the game.
+ * Prod any pending reader/writer to make progress.
*/
- up_write(&sem->rw_sem);
+ __wake_up(&sem->waiters, TASK_NORMAL, 1, sem);
/*
* Once this completes (at least one RCU-sched grace period hence) the
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 0d9b6be9ecc8..e6f437bafb23 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -28,7 +28,6 @@
#include <linux/rwsem.h>
#include <linux/atomic.h>
-#include "rwsem.h"
#include "lock_events.h"
/*
@@ -660,8 +659,6 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,
unsigned long flags;
bool ret = true;
- BUILD_BUG_ON(!(RWSEM_OWNER_UNKNOWN & RWSEM_NONSPINNABLE));
-
if (need_resched()) {
lockevent_inc(rwsem_opt_fail);
return false;
@@ -1338,7 +1335,7 @@ static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
/*
* lock for reading
*/
-inline void __down_read(struct rw_semaphore *sem)
+static inline void __down_read(struct rw_semaphore *sem)
{
if (!rwsem_read_trylock(sem)) {
rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE);
@@ -1426,7 +1423,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
/*
* unlock after reading
*/
-inline void __up_read(struct rw_semaphore *sem)
+static inline void __up_read(struct rw_semaphore *sem)
{
long tmp;
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index 2534ce49f648..e69de29bb2d1 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef __INTERNAL_RWSEM_H
-#define __INTERNAL_RWSEM_H
-#include <linux/rwsem.h>
-
-extern void __down_read(struct rw_semaphore *sem);
-extern void __up_read(struct rw_semaphore *sem);
-
-#endif /* __INTERNAL_RWSEM_H */