diff options
-rw-r--r-- | include/linux/percpu-rwsem.h | 83 | ||||
-rw-r--r-- | include/linux/rwsem.h | 6 | ||||
-rw-r--r-- | include/linux/wait.h | 1 | ||||
-rw-r--r-- | kernel/cpu.c | 4 | ||||
-rw-r--r-- | kernel/exit.c | 1 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 331 | ||||
-rw-r--r-- | kernel/locking/lockdep_internals.h | 14 | ||||
-rw-r--r-- | kernel/locking/lockdep_proc.c | 31 | ||||
-rw-r--r-- | kernel/locking/percpu-rwsem.c | 192 | ||||
-rw-r--r-- | kernel/locking/rwsem.c | 7 | ||||
-rw-r--r-- | kernel/locking/rwsem.h | 10 |
11 files changed, 499 insertions, 181 deletions
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index ad2ca2a89d5b..5e033fe1ff4e 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -3,41 +3,52 @@ #define _LINUX_PERCPU_RWSEM_H #include <linux/atomic.h> -#include <linux/rwsem.h> #include <linux/percpu.h> #include <linux/rcuwait.h> +#include <linux/wait.h> #include <linux/rcu_sync.h> #include <linux/lockdep.h> struct percpu_rw_semaphore { struct rcu_sync rss; unsigned int __percpu *read_count; - struct rw_semaphore rw_sem; /* slowpath */ - struct rcuwait writer; /* blocked writer */ - int readers_block; + struct rcuwait writer; + wait_queue_head_t waiters; + atomic_t block; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif }; +#ifdef CONFIG_DEBUG_LOCK_ALLOC +#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }, +#else +#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname) +#endif + #define __DEFINE_PERCPU_RWSEM(name, is_static) \ static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \ is_static struct percpu_rw_semaphore name = { \ .rss = __RCU_SYNC_INITIALIZER(name.rss), \ .read_count = &__percpu_rwsem_rc_##name, \ - .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ .writer = __RCUWAIT_INITIALIZER(name.writer), \ + .waiters = __WAIT_QUEUE_HEAD_INITIALIZER(name.waiters), \ + .block = ATOMIC_INIT(0), \ + __PERCPU_RWSEM_DEP_MAP_INIT(name) \ } + #define DEFINE_PERCPU_RWSEM(name) \ __DEFINE_PERCPU_RWSEM(name, /* not static */) #define DEFINE_STATIC_PERCPU_RWSEM(name) \ __DEFINE_PERCPU_RWSEM(name, static) -extern int __percpu_down_read(struct percpu_rw_semaphore *, int); -extern void __percpu_up_read(struct percpu_rw_semaphore *); +extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool); static inline void percpu_down_read(struct percpu_rw_semaphore *sem) { might_sleep(); - rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_); + rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); preempt_disable(); /* @@ -48,8 +59,9 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem) * and that once the synchronize_rcu() is done, the writer will see * anything we did within this RCU-sched read-size critical section. */ - __this_cpu_inc(*sem->read_count); - if (unlikely(!rcu_sync_is_idle(&sem->rss))) + if (likely(rcu_sync_is_idle(&sem->rss))) + __this_cpu_inc(*sem->read_count); + else __percpu_down_read(sem, false); /* Unconditional memory barrier */ /* * The preempt_enable() prevents the compiler from @@ -58,16 +70,17 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem) preempt_enable(); } -static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem) +static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) { - int ret = 1; + bool ret = true; preempt_disable(); /* * Same as in percpu_down_read(). */ - __this_cpu_inc(*sem->read_count); - if (unlikely(!rcu_sync_is_idle(&sem->rss))) + if (likely(rcu_sync_is_idle(&sem->rss))) + __this_cpu_inc(*sem->read_count); + else ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */ preempt_enable(); /* @@ -76,24 +89,36 @@ static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem) */ if (ret) - rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 1, _RET_IP_); + rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); return ret; } static inline void percpu_up_read(struct percpu_rw_semaphore *sem) { + rwsem_release(&sem->dep_map, _RET_IP_); + preempt_disable(); /* * Same as in percpu_down_read(). */ - if (likely(rcu_sync_is_idle(&sem->rss))) + if (likely(rcu_sync_is_idle(&sem->rss))) { __this_cpu_dec(*sem->read_count); - else - __percpu_up_read(sem); /* Unconditional memory barrier */ + } else { + /* + * slowpath; reader will only ever wake a single blocked + * writer. + */ + smp_mb(); /* B matches C */ + /* + * In other words, if they see our decrement (presumably to + * aggregate zero, as that is the only time it matters) they + * will also see our critical section. + */ + __this_cpu_dec(*sem->read_count); + rcuwait_wake_up(&sem->writer); + } preempt_enable(); - - rwsem_release(&sem->rw_sem.dep_map, _RET_IP_); } extern void percpu_down_write(struct percpu_rw_semaphore *); @@ -110,29 +135,19 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *); __percpu_init_rwsem(sem, #sem, &rwsem_key); \ }) -#define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem) - -#define percpu_rwsem_assert_held(sem) \ - lockdep_assert_held(&(sem)->rw_sem) +#define percpu_rwsem_is_held(sem) lockdep_is_held(sem) +#define percpu_rwsem_assert_held(sem) lockdep_assert_held(sem) static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem, bool read, unsigned long ip) { - lock_release(&sem->rw_sem.dep_map, ip); -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER - if (!read) - atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN); -#endif + lock_release(&sem->dep_map, ip); } static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem, bool read, unsigned long ip) { - lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip); -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER - if (!read) - atomic_long_set(&sem->rw_sem.owner, (long)current); -#endif + lock_acquire(&sem->dep_map, 0, 1, read, 1, NULL, ip); } #endif diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 00d6054687dd..8a418d9eeb7a 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -53,12 +53,6 @@ struct rw_semaphore { #endif }; -/* - * Setting all bits of the owner field except bit 0 will indicate - * that the rwsem is writer-owned with an unknown owner. - */ -#define RWSEM_OWNER_UNKNOWN (-2L) - /* In all implementations count != 0 means locked */ static inline int rwsem_is_locked(struct rw_semaphore *sem) { diff --git a/include/linux/wait.h b/include/linux/wait.h index 3283c8d02137..feeb6be5cad6 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -20,6 +20,7 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int #define WQ_FLAG_EXCLUSIVE 0x01 #define WQ_FLAG_WOKEN 0x02 #define WQ_FLAG_BOOKMARK 0x04 +#define WQ_FLAG_CUSTOM 0x08 /* * A single wait-queue entry structure: diff --git a/kernel/cpu.c b/kernel/cpu.c index 9c706af713fb..221bf6a9e98a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -331,12 +331,12 @@ void lockdep_assert_cpus_held(void) static void lockdep_acquire_cpus_lock(void) { - rwsem_acquire(&cpu_hotplug_lock.rw_sem.dep_map, 0, 0, _THIS_IP_); + rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_); } static void lockdep_release_cpus_lock(void) { - rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, _THIS_IP_); + rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_); } /* diff --git a/kernel/exit.c b/kernel/exit.c index 2833ffb0c211..f64a8f9d412a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -258,6 +258,7 @@ void rcuwait_wake_up(struct rcuwait *w) wake_up_process(task); rcu_read_unlock(); } +EXPORT_SYMBOL_GPL(rcuwait_wake_up); /* * Determine if a process group is "orphaned", according to the POSIX diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 32406ef0d6a2..e55c4ee14e64 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -147,6 +147,7 @@ static DECLARE_BITMAP(list_entries_in_use, MAX_LOCKDEP_ENTRIES); #define KEYHASH_SIZE (1UL << KEYHASH_BITS) static struct hlist_head lock_keys_hash[KEYHASH_SIZE]; unsigned long nr_lock_classes; +unsigned long nr_zapped_classes; #ifndef CONFIG_DEBUG_LOCKDEP static #endif @@ -1070,13 +1071,15 @@ static inline void check_data_structures(void) { } #endif /* CONFIG_DEBUG_LOCKDEP */ +static void init_chain_block_buckets(void); + /* * Initialize the lock_classes[] array elements, the free_lock_classes list * and also the delayed_free structure. */ static void init_data_structures_once(void) { - static bool ds_initialized, rcu_head_initialized; + static bool __read_mostly ds_initialized, rcu_head_initialized; int i; if (likely(rcu_head_initialized)) @@ -1100,6 +1103,7 @@ static void init_data_structures_once(void) INIT_LIST_HEAD(&lock_classes[i].locks_after); INIT_LIST_HEAD(&lock_classes[i].locks_before); } + init_chain_block_buckets(); } static inline struct hlist_head *keyhashentry(const struct lock_class_key *key) @@ -2298,18 +2302,6 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, return 0; } -static void inc_chains(void) -{ - if (current->hardirq_context) - nr_hardirq_chains++; - else { - if (current->softirq_context) - nr_softirq_chains++; - else - nr_process_chains++; - } -} - #else static inline int check_irq_usage(struct task_struct *curr, @@ -2317,13 +2309,27 @@ static inline int check_irq_usage(struct task_struct *curr, { return 1; } +#endif /* CONFIG_TRACE_IRQFLAGS */ -static inline void inc_chains(void) +static void inc_chains(int irq_context) { - nr_process_chains++; + if (irq_context & LOCK_CHAIN_HARDIRQ_CONTEXT) + nr_hardirq_chains++; + else if (irq_context & LOCK_CHAIN_SOFTIRQ_CONTEXT) + nr_softirq_chains++; + else + nr_process_chains++; } -#endif /* CONFIG_TRACE_IRQFLAGS */ +static void dec_chains(int irq_context) +{ + if (irq_context & LOCK_CHAIN_HARDIRQ_CONTEXT) + nr_hardirq_chains--; + else if (irq_context & LOCK_CHAIN_SOFTIRQ_CONTEXT) + nr_softirq_chains--; + else + nr_process_chains--; +} static void print_deadlock_scenario(struct held_lock *nxt, struct held_lock *prv) @@ -2622,8 +2628,235 @@ out_bug: struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS]; static DECLARE_BITMAP(lock_chains_in_use, MAX_LOCKDEP_CHAINS); -int nr_chain_hlocks; static u16 chain_hlocks[MAX_LOCKDEP_CHAIN_HLOCKS]; +unsigned long nr_zapped_lock_chains; +unsigned int nr_free_chain_hlocks; /* Free chain_hlocks in buckets */ +unsigned int nr_lost_chain_hlocks; /* Lost chain_hlocks */ +unsigned int nr_large_chain_blocks; /* size > MAX_CHAIN_BUCKETS */ + +/* + * The first 2 chain_hlocks entries in the chain block in the bucket + * list contains the following meta data: + * + * entry[0]: + * Bit 15 - always set to 1 (it is not a class index) + * Bits 0-14 - upper 15 bits of the next block index + * entry[1] - lower 16 bits of next block index + * + * A next block index of all 1 bits means it is the end of the list. + * + * On the unsized bucket (bucket-0), the 3rd and 4th entries contain + * the chain block size: + * + * entry[2] - upper 16 bits of the chain block size + * entry[3] - lower 16 bits of the chain block size + */ +#define MAX_CHAIN_BUCKETS 16 +#define CHAIN_BLK_FLAG (1U << 15) +#define CHAIN_BLK_LIST_END 0xFFFFU + +static int chain_block_buckets[MAX_CHAIN_BUCKETS]; + +static inline int size_to_bucket(int size) +{ + if (size > MAX_CHAIN_BUCKETS) + return 0; + + return size - 1; +} + +/* + * Iterate all the chain blocks in a bucket. + */ +#define for_each_chain_block(bucket, prev, curr) \ + for ((prev) = -1, (curr) = chain_block_buckets[bucket]; \ + (curr) >= 0; \ + (prev) = (curr), (curr) = chain_block_next(curr)) + +/* + * next block or -1 + */ +static inline int chain_block_next(int offset) +{ + int next = chain_hlocks[offset]; + + WARN_ON_ONCE(!(next & CHAIN_BLK_FLAG)); + + if (next == CHAIN_BLK_LIST_END) + return -1; + + next &= ~CHAIN_BLK_FLAG; + next <<= 16; + next |= chain_hlocks[offset + 1]; + + return next; +} + +/* + * bucket-0 only + */ +static inline int chain_block_size(int offset) +{ + return (chain_hlocks[offset + 2] << 16) | chain_hlocks[offset + 3]; +} + +static inline void init_chain_block(int offset, int next, int bucket, int size) +{ + chain_hlocks[offset] = (next >> 16) | CHAIN_BLK_FLAG; + chain_hlocks[offset + 1] = (u16)next; + + if (size && !bucket) { + chain_hlocks[offset + 2] = size >> 16; + chain_hlocks[offset + 3] = (u16)size; + } +} + +static inline void add_chain_block(int offset, int size) +{ + int bucket = size_to_bucket(size); + int next = chain_block_buckets[bucket]; + int prev, curr; + + if (unlikely(size < 2)) { + /* + * We can't store single entries on the freelist. Leak them. + * + * One possible way out would be to uniquely mark them, other + * than with CHAIN_BLK_FLAG, such that we can recover them when + * the block before it is re-added. + */ + if (size) + nr_lost_chain_hlocks++; + return; + } + + nr_free_chain_hlocks += size; + if (!bucket) { + nr_large_chain_blocks++; + + /* + * Variable sized, sort large to small. + */ + for_each_chain_block(0, prev, curr) { + if (size >= chain_block_size(curr)) + break; + } + init_chain_block(offset, curr, 0, size); + if (prev < 0) + chain_block_buckets[0] = offset; + else + init_chain_block(prev, offset, 0, 0); + return; + } + /* + * Fixed size, add to head. + */ + init_chain_block(offset, next, bucket, size); + chain_block_buckets[bucket] = offset; +} + +/* + * Only the first block in the list can be deleted. + * + * For the variable size bucket[0], the first block (the largest one) is + * returned, broken up and put back into the pool. So if a chain block of + * length > MAX_CHAIN_BUCKETS is ever used and zapped, it will just be + * queued up after the primordial chain block and never be used until the + * hlock entries in the primordial chain block is almost used up. That + * causes fragmentation and reduce allocation efficiency. That can be + * monitored by looking at the "large chain blocks" number in lockdep_stats. + */ +static inline void del_chain_block(int bucket, int size, int next) +{ + nr_free_chain_hlocks -= size; + chain_block_buckets[bucket] = next; + + if (!bucket) + nr_large_chain_blocks--; +} + +static void init_chain_block_buckets(void) +{ + int i; + + for (i = 0; i < MAX_CHAIN_BUCKETS; i++) + chain_block_buckets[i] = -1; + + add_chain_block(0, ARRAY_SIZE(chain_hlocks)); +} + +/* + * Return offset of a chain block of the right size or -1 if not found. + * + * Fairly simple worst-fit allocator with the addition of a number of size + * specific free lists. + */ +static int alloc_chain_hlocks(int req) +{ + int bucket, curr, size; + + /* + * We rely on the MSB to act as an escape bit to denote freelist + * pointers. Make sure this bit isn't set in 'normal' class_idx usage. + */ + BUILD_BUG_ON((MAX_LOCKDEP_KEYS-1) & CHAIN_BLK_FLAG); + + init_data_structures_once(); + + if (nr_free_chain_hlocks < req) + return -1; + + /* + * We require a minimum of 2 (u16) entries to encode a freelist + * 'pointer'. + */ + req = max(req, 2); + bucket = size_to_bucket(req); + curr = chain_block_buckets[bucket]; + + if (bucket) { + if (curr >= 0) { + del_chain_block(bucket, req, chain_block_next(curr)); + return curr; + } + /* Try bucket 0 */ + curr = chain_block_buckets[0]; + } + + /* + * The variable sized freelist is sorted by size; the first entry is + * the largest. Use it if it fits. + */ + if (curr >= 0) { + size = chain_block_size(curr); + if (likely(size >= req)) { + del_chain_block(0, size, chain_block_next(curr)); + add_chain_block(curr + req, size - req); + return curr; + } + } + + /* + * Last resort, split a block in a larger sized bucket. + */ + for (size = MAX_CHAIN_BUCKETS; size > req; size--) { + bucket = size_to_bucket(size); + curr = chain_block_buckets[bucket]; + if (curr < 0) + continue; + + del_chain_block(bucket, size, chain_block_next(curr)); + add_chain_block(curr + req, size - req); + return curr; + } + + return -1; +} + +static inline void free_chain_hlocks(int base, int size) +{ + add_chain_block(base, max(size, 2)); +} struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i) { @@ -2824,15 +3057,8 @@ static inline int add_chain_cache(struct task_struct *curr, BUILD_BUG_ON((1UL << 6) <= ARRAY_SIZE(curr->held_locks)); BUILD_BUG_ON((1UL << 8*sizeof(chain_hlocks[0])) <= ARRAY_SIZE(lock_classes)); - if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { - chain->base = nr_chain_hlocks; - for (j = 0; j < chain->depth - 1; j++, i++) { - int lock_id = curr->held_locks[i].class_idx; - chain_hlocks[chain->base + j] = lock_id; - } - chain_hlocks[chain->base + j] = class - lock_classes; - nr_chain_hlocks += chain->depth; - } else { + j = alloc_chain_hlocks(chain->depth); + if (j < 0) { if (!debug_locks_off_graph_unlock()) return 0; @@ -2841,9 +3067,16 @@ static inline int add_chain_cache(struct task_struct *curr, return 0; } + chain->base = j; + for (j = 0; j < chain->depth - 1; j++, i++) { + int lock_id = curr->held_locks[i].class_idx; + + chain_hlocks[chain->base + j] = lock_id; + } + chain_hlocks[chain->base + j] = class - lock_classes; hlist_add_head_rcu(&chain->entry, hash_head); debug_atomic_inc(chain_lookup_misses); - inc_chains(); + inc_chains(chain->irq_context); return 1; } @@ -2987,6 +3220,8 @@ static inline int validate_chain(struct task_struct *curr, { return 1; } + +static void init_chain_block_buckets(void) { } #endif /* CONFIG_PROVE_LOCKING */ /* @@ -3596,7 +3831,8 @@ lock_used: static inline unsigned int task_irq_context(struct task_struct *task) { - return 2 * !!task->hardirq_context + !!task->softirq_context; + return LOCK_CHAIN_HARDIRQ_CONTEXT * !!task->hardirq_context + + LOCK_CHAIN_SOFTIRQ_CONTEXT * !!task->softirq_context; } static int separate_irq_context(struct task_struct *curr, @@ -4768,57 +5004,33 @@ static void remove_class_from_lock_chain(struct pending_free *pf, struct lock_class *class) { #ifdef CONFIG_PROVE_LOCKING - struct lock_chain *new_chain; - u64 chain_key; int i; for (i = chain->base; i < chain->base + chain->depth; i++) { if (chain_hlocks[i] != class - lock_classes) continue; - /* The code below leaks one chain_hlock[] entry. */ - if (--chain->depth > 0) { - memmove(&chain_hlocks[i], &chain_hlocks[i + 1], - (chain->base + chain->depth - i) * - sizeof(chain_hlocks[0])); - } /* * Each lock class occurs at most once in a lock chain so once * we found a match we can break out of this loop. */ - goto recalc; + goto free_lock_chain; } /* Since the chain has not been modified, return. */ return; -recalc: - chain_key = INITIAL_CHAIN_KEY; - for (i = chain->base; i < chain->base + chain->depth; i++) - chain_key = iterate_chain_key(chain_key, chain_hlocks[i]); - if (chain->depth && chain->chain_key == chain_key) - return; +free_lock_chain: + free_chain_hlocks(chain->base, chain->depth); /* Overwrite the chain key for concurrent RCU readers. */ - WRITE_ONCE(chain->chain_key, chain_key); + WRITE_ONCE(chain->chain_key, INITIAL_CHAIN_KEY); + dec_chains(chain->irq_context); + /* * Note: calling hlist_del_rcu() from inside a * hlist_for_each_entry_rcu() loop is safe. */ hlist_del_rcu(&chain->entry); __set_bit(chain - lock_chains, pf->lock_chains_being_freed); - if (chain->depth == 0) - return; - /* - * If the modified lock chain matches an existing lock chain, drop - * the modified lock chain. - */ - if (lookup_chain_cache(chain_key)) - return; - new_chain = alloc_lock_chain(); - if (WARN_ON_ONCE(!new_chain)) { - debug_locks_off(); - return; - } - *new_chain = *chain; - hlist_add_head_rcu(&new_chain->entry, chainhashentry(chain_key)); + nr_zapped_lock_chains++; #endif } @@ -4874,6 +5086,7 @@ static void zap_class(struct pending_free *pf, struct lock_class *class) } remove_class_from_lock_chains(pf, class); + nr_zapped_classes++; } static void reinit_class(struct lock_class *class) diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index 18d85aebbb57..baca699b94e9 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -106,6 +106,12 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ = #define STACK_TRACE_HASH_SIZE 16384 #endif +/* + * Bit definitions for lock_chain.irq_context + */ +#define LOCK_CHAIN_SOFTIRQ_CONTEXT (1 << 0) +#define LOCK_CHAIN_HARDIRQ_CONTEXT (1 << 1) + #define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) #define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) @@ -124,17 +130,21 @@ extern const char *__get_key_name(const struct lockdep_subclass_key *key, struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i); extern unsigned long nr_lock_classes; +extern unsigned long nr_zapped_classes; +extern unsigned long nr_zapped_lock_chains; extern unsigned long nr_list_entries; long lockdep_next_lockchain(long i); unsigned long lock_chain_count(void); -extern int nr_chain_hlocks; extern unsigned long nr_stack_trace_entries; extern unsigned int nr_hardirq_chains; extern unsigned int nr_softirq_chains; extern unsigned int nr_process_chains; -extern unsigned int max_lockdep_depth; +extern unsigned int nr_free_chain_hlocks; +extern unsigned int nr_lost_chain_hlocks; +extern unsigned int nr_large_chain_blocks; +extern unsigned int max_lockdep_depth; extern unsigned int max_bfs_queue_depth; #ifdef CONFIG_PROVE_LOCKING diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index 231684cfc5ae..5525cd3ba0c8 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -128,15 +128,22 @@ static int lc_show(struct seq_file *m, void *v) struct lock_chain *chain = v; struct lock_class *class; int i; + static const char * const irq_strs[] = { + [0] = "0", + [LOCK_CHAIN_HARDIRQ_CONTEXT] = "hardirq", + [LOCK_CHAIN_SOFTIRQ_CONTEXT] = "softirq", + [LOCK_CHAIN_SOFTIRQ_CONTEXT| + LOCK_CHAIN_HARDIRQ_CONTEXT] = "hardirq|softirq", + }; if (v == SEQ_START_TOKEN) { - if (nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS) + if (!nr_free_chain_hlocks) seq_printf(m, "(buggered) "); seq_printf(m, "all lock chains:\n"); return 0; } - seq_printf(m, "irq_context: %d\n", chain->irq_context); + seq_printf(m, "irq_context: %s\n", irq_strs[chain->irq_context]); for (i = 0; i < chain->depth; i++) { class = lock_chain_get_class(chain, i); @@ -271,8 +278,12 @@ static int lockdep_stats_show(struct seq_file *m, void *v) #ifdef CONFIG_PROVE_LOCKING seq_printf(m, " dependency chains: %11lu [max: %lu]\n", lock_chain_count(), MAX_LOCKDEP_CHAINS); - seq_printf(m, " dependency chain hlocks: %11d [max: %lu]\n", - nr_chain_hlocks, MAX_LOCKDEP_CHAIN_HLOCKS); + seq_printf(m, " dependency chain hlocks used: %11lu [max: %lu]\n", + MAX_LOCKDEP_CHAIN_HLOCKS - + (nr_free_chain_hlocks + nr_lost_chain_hlocks), + MAX_LOCKDEP_CHAIN_HLOCKS); + seq_printf(m, " dependency chain hlocks lost: %11u\n", + nr_lost_chain_hlocks); #endif #ifdef CONFIG_TRACE_IRQFLAGS @@ -336,6 +347,18 @@ static int lockdep_stats_show(struct seq_file *m, void *v) seq_printf(m, " debug_locks: %11u\n", debug_locks); + /* + * Zappped classes and lockdep data buffers reuse statistics. + */ + seq_puts(m, "\n"); + seq_printf(m, " zapped classes: %11lu\n", + nr_zapped_classes); +#ifdef CONFIG_PROVE_LOCKING + seq_printf(m, " zapped lock chains: %11lu\n", + nr_zapped_lock_chains); + seq_printf(m, " large chain blocks: %11u\n", + nr_large_chain_blocks); +#endif return 0; } diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index 364d38a0c444..183a3aaa8509 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -1,27 +1,29 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/atomic.h> -#include <linux/rwsem.h> #include <linux/percpu.h> +#include <linux/wait.h> #include <linux/lockdep.h> #include <linux/percpu-rwsem.h> #include <linux/rcupdate.h> #include <linux/sched.h> +#include <linux/sched/task.h> #include <linux/errno.h> -#include "rwsem.h" - int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, - const char *name, struct lock_class_key *rwsem_key) + const char *name, struct lock_class_key *key) { sem->read_count = alloc_percpu(int); if (unlikely(!sem->read_count)) return -ENOMEM; - /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ rcu_sync_init(&sem->rss); - __init_rwsem(&sem->rw_sem, name, rwsem_key); rcuwait_init(&sem->writer); - sem->readers_block = 0; + init_waitqueue_head(&sem->waiters); + atomic_set(&sem->block, 0); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + debug_check_no_locks_freed((void *)sem, sizeof(*sem)); + lockdep_init_map(&sem->dep_map, name, key, 0); +#endif return 0; } EXPORT_SYMBOL_GPL(__percpu_init_rwsem); @@ -41,73 +43,139 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *sem) } EXPORT_SYMBOL_GPL(percpu_free_rwsem); -int __percpu_down_read(struct percpu_rw_semaphore *sem, int try) +static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem) { + __this_cpu_inc(*sem->read_count); + /* * Due to having preemption disabled the decrement happens on * the same CPU as the increment, avoiding the * increment-on-one-CPU-and-decrement-on-another problem. * - * If the reader misses the writer's assignment of readers_block, then - * the writer is guaranteed to see the reader's increment. + * If the reader misses the writer's assignment of sem->block, then the + * writer is guaranteed to see the reader's increment. * * Conversely, any readers that increment their sem->read_count after - * the writer looks are guaranteed to see the readers_block value, - * which in turn means that they are guaranteed to immediately - * decrement their sem->read_count, so that it doesn't matter that the - * writer missed them. + * the writer looks are guaranteed to see the sem->block value, which + * in turn means that they are guaranteed to immediately decrement + * their sem->read_count, so that it doesn't matter that the writer + * missed them. */ smp_mb(); /* A matches D */ /* - * If !readers_block the critical section starts here, matched by the + * If !sem->block the critical section starts here, matched by the * release in percpu_up_write(). */ - if (likely(!smp_load_acquire(&sem->readers_block))) + if (likely(!atomic_read_acquire(&sem->block))) + return true; + + __this_cpu_dec(*sem->read_count); + + /* Prod writer to re-evaluate readers_active_check() */ + rcuwait_wake_up(&sem->writer); + + return false; +} + +static inline bool __percpu_down_write_trylock(struct percpu_rw_semaphore *sem) +{ + if (atomic_read(&sem->block)) + return false; + + return atomic_xchg(&sem->block, 1) == 0; +} + +static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader) +{ + if (reader) { + bool ret; + + preempt_disable(); + ret = __percpu_down_read_trylock(sem); + preempt_enable(); + + return ret; + } + return __percpu_down_write_trylock(sem); +} + +/* + * The return value of wait_queue_entry::func means: + * + * <0 - error, wakeup is terminated and the error is returned + * 0 - no wakeup, a next waiter is tried + * >0 - woken, if EXCLUSIVE, counted towards @nr_exclusive. + * + * We use EXCLUSIVE for both readers and writers to preserve FIFO order, + * and play games with the return value to allow waking multiple readers. + * + * Specifically, we wake readers until we've woken a single writer, or until a + * trylock fails. + */ +static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry, + unsigned int mode, int wake_flags, + void *key) +{ + struct task_struct *p = get_task_struct(wq_entry->private); + bool reader = wq_entry->flags & WQ_FLAG_CUSTOM; + struct percpu_rw_semaphore *sem = key; + + /* concurrent against percpu_down_write(), can get stolen */ + if (!__percpu_rwsem_trylock(sem, reader)) return 1; - /* - * Per the above comment; we still have preemption disabled and - * will thus decrement on the same CPU as we incremented. - */ - __percpu_up_read(sem); + list_del_init(&wq_entry->entry); + smp_store_release(&wq_entry->private, NULL); - if (try) - return 0; + wake_up_process(p); + put_task_struct(p); - /* - * We either call schedule() in the wait, or we'll fall through - * and reschedule on the preempt_enable() in percpu_down_read(). - */ - preempt_enable_no_resched(); + return !reader; /* wake (readers until) 1 writer */ +} + +static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader) +{ + DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function); + bool wait; + spin_lock_irq(&sem->waiters.lock); /* - * Avoid lockdep for the down/up_read() we already have them. + * Serialize against the wakeup in percpu_up_write(), if we fail + * the trylock, the wakeup must see us on the list. */ - __down_read(&sem->rw_sem); - this_cpu_inc(*sem->read_count); - __up_read(&sem->rw_sem); + wait = !__percpu_rwsem_trylock(sem, reader); + if (wait) { + wq_entry.flags |= WQ_FLAG_EXCLUSIVE | reader * WQ_FLAG_CUSTOM; + __add_wait_queue_entry_tail(&sem->waiters, &wq_entry); + } + spin_unlock_irq(&sem->waiters.lock); - preempt_disable(); - return 1; + while (wait) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (!smp_load_acquire(&wq_entry.private)) + break; + schedule(); + } + __set_current_state(TASK_RUNNING); } -EXPORT_SYMBOL_GPL(__percpu_down_read); -void __percpu_up_read(struct percpu_rw_semaphore *sem) +bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) { - smp_mb(); /* B matches C */ - /* - * In other words, if they see our decrement (presumably to aggregate - * zero, as that is the only time it matters) they will also see our - * critical section. - */ - __this_cpu_dec(*sem->read_count); + if (__percpu_down_read_trylock(sem)) + return true; - /* Prod writer to recheck readers_active */ - rcuwait_wake_up(&sem->writer); + if (try) + return false; + + preempt_enable(); + percpu_rwsem_wait(sem, /* .reader = */ true); + preempt_disable(); + + return true; } -EXPORT_SYMBOL_GPL(__percpu_up_read); +EXPORT_SYMBOL_GPL(__percpu_down_read); #define per_cpu_sum(var) \ ({ \ @@ -124,6 +192,8 @@ EXPORT_SYMBOL_GPL(__percpu_up_read); * zero. If this sum is zero, then it is stable due to the fact that if any * newly arriving readers increment a given counter, they will immediately * decrement that same counter. + * + * Assumes sem->block is set. */ static bool readers_active_check(struct percpu_rw_semaphore *sem) { @@ -142,32 +212,36 @@ static bool readers_active_check(struct percpu_rw_semaphore *sem) void percpu_down_write(struct percpu_rw_semaphore *sem) { + might_sleep(); + rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); + /* Notify readers to take the slow path. */ rcu_sync_enter(&sem->rss); - down_write(&sem->rw_sem); - /* - * Notify new readers to block; up until now, and thus throughout the - * longish rcu_sync_enter() above, new readers could still come in. + * Try set sem->block; this provides writer-writer exclusion. + * Having sem->block set makes new readers block. */ - WRITE_ONCE(sem->readers_block, 1); + if (!__percpu_down_write_trylock(sem)) + percpu_rwsem_wait(sem, /* .reader = */ false); - smp_mb(); /* D matches A */ + /* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */ /* - * If they don't see our writer of readers_block, then we are - * guaranteed to see their sem->read_count increment, and therefore - * will wait for them. + * If they don't see our store of sem->block, then we are guaranteed to + * see their sem->read_count increment, and therefore will wait for + * them. */ - /* Wait for all now active readers to complete. */ + /* Wait for all active readers to complete. */ rcuwait_wait_event(&sem->writer, readers_active_check(sem)); } EXPORT_SYMBOL_GPL(percpu_down_write); void percpu_up_write(struct percpu_rw_semaphore *sem) { + rwsem_release(&sem->dep_map, _RET_IP_); + /* * Signal the writer is done, no fast path yet. * @@ -178,12 +252,12 @@ void percpu_up_write(struct percpu_rw_semaphore *sem) * Therefore we force it through the slow path which guarantees an * acquire and thereby guarantees the critical section's consistency. */ - smp_store_release(&sem->readers_block, 0); + atomic_set_release(&sem->block, 0); /* - * Release the write lock, this will allow readers back in the game. + * Prod any pending reader/writer to make progress. */ - up_write(&sem->rw_sem); + __wake_up(&sem->waiters, TASK_NORMAL, 1, sem); /* * Once this completes (at least one RCU-sched grace period hence) the diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 0d9b6be9ecc8..e6f437bafb23 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -28,7 +28,6 @@ #include <linux/rwsem.h> #include <linux/atomic.h> -#include "rwsem.h" #include "lock_events.h" /* @@ -660,8 +659,6 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem, unsigned long flags; bool ret = true; - BUILD_BUG_ON(!(RWSEM_OWNER_UNKNOWN & RWSEM_NONSPINNABLE)); - if (need_resched()) { lockevent_inc(rwsem_opt_fail); return false; @@ -1338,7 +1335,7 @@ static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) /* * lock for reading */ -inline void __down_read(struct rw_semaphore *sem) +static inline void __down_read(struct rw_semaphore *sem) { if (!rwsem_read_trylock(sem)) { rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE); @@ -1426,7 +1423,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) /* * unlock after reading */ -inline void __up_read(struct rw_semaphore *sem) +static inline void __up_read(struct rw_semaphore *sem) { long tmp; diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 2534ce49f648..e69de29bb2d1 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef __INTERNAL_RWSEM_H -#define __INTERNAL_RWSEM_H -#include <linux/rwsem.h> - -extern void __down_read(struct rw_semaphore *sem); -extern void __up_read(struct rw_semaphore *sem); - -#endif /* __INTERNAL_RWSEM_H */ |