diff options
Diffstat (limited to 'kernel/locking')
| -rw-r--r-- | kernel/locking/lockdep.c | 2 | ||||
| -rw-r--r-- | kernel/locking/mcs_spinlock.c | 8 | ||||
| -rw-r--r-- | kernel/locking/mcs_spinlock.h | 4 | ||||
| -rw-r--r-- | kernel/locking/mutex.c | 39 | ||||
| -rw-r--r-- | kernel/locking/qrwlock.c | 9 | ||||
| -rw-r--r-- | kernel/locking/rtmutex-debug.c | 5 | ||||
| -rw-r--r-- | kernel/locking/rtmutex-debug.h | 7 | ||||
| -rw-r--r-- | kernel/locking/rtmutex.c | 562 | ||||
| -rw-r--r-- | kernel/locking/rtmutex.h | 7 | ||||
| -rw-r--r-- | kernel/locking/rtmutex_common.h | 22 | ||||
| -rw-r--r-- | kernel/locking/rwsem-xadd.c | 4 | 
11 files changed, 481 insertions, 188 deletions
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index d24e4339b46d..88d0d4420ad2 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -384,7 +384,9 @@ static void print_lockdep_off(const char *bug_msg)  {  	printk(KERN_DEBUG "%s\n", bug_msg);  	printk(KERN_DEBUG "turning off the locking correctness validator.\n"); +#ifdef CONFIG_LOCK_STAT  	printk(KERN_DEBUG "Please attach the output of /proc/lock_stat to the bug report\n"); +#endif  }  static int save_trace(struct stack_trace *trace) diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c index be9ee1559fca..9887a905a762 100644 --- a/kernel/locking/mcs_spinlock.c +++ b/kernel/locking/mcs_spinlock.c @@ -1,6 +1,4 @@ -  #include <linux/percpu.h> -#include <linux/mutex.h>  #include <linux/sched.h>  #include "mcs_spinlock.h" @@ -79,7 +77,7 @@ osq_wait_next(struct optimistic_spin_queue *lock,  				break;  		} -		arch_mutex_cpu_relax(); +		cpu_relax_lowlatency();  	}  	return next; @@ -120,7 +118,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)  		if (need_resched())  			goto unqueue; -		arch_mutex_cpu_relax(); +		cpu_relax_lowlatency();  	}  	return true; @@ -146,7 +144,7 @@ unqueue:  		if (smp_load_acquire(&node->locked))  			return true; -		arch_mutex_cpu_relax(); +		cpu_relax_lowlatency();  		/*  		 * Or we race against a concurrent unqueue()'s step-B, in which diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index 74356dc0ce29..23e89c5930e9 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -27,7 +27,7 @@ struct mcs_spinlock {  #define arch_mcs_spin_lock_contended(l)					\  do {									\  	while (!(smp_load_acquire(l)))					\ -		arch_mutex_cpu_relax();					\ +		cpu_relax_lowlatency();					\  } while (0)  #endif @@ -104,7 +104,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)  			return;  		/* Wait until the next pointer is set */  		while (!(next = ACCESS_ONCE(node->next))) -			arch_mutex_cpu_relax(); +			cpu_relax_lowlatency();  	}  	/* Pass lock to next waiter. */ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index acca2c1a3c5e..ae712b25e492 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -46,12 +46,6 @@  # include <asm/mutex.h>  #endif -/* - * A negative mutex count indicates that waiters are sleeping waiting for the - * mutex. - */ -#define	MUTEX_SHOW_NO_WAITER(mutex)	(atomic_read(&(mutex)->count) >= 0) -  void  __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)  { @@ -152,7 +146,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)  		if (need_resched())  			break; -		arch_mutex_cpu_relax(); +		cpu_relax_lowlatency();  	}  	rcu_read_unlock(); @@ -388,12 +382,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,  	/*  	 * Optimistic spinning.  	 * -	 * We try to spin for acquisition when we find that there are no -	 * pending waiters and the lock owner is currently running on a -	 * (different) CPU. -	 * -	 * The rationale is that if the lock owner is running, it is likely to -	 * release the lock soon. +	 * We try to spin for acquisition when we find that the lock owner +	 * is currently running on a (different) CPU and while we don't +	 * need to reschedule. The rationale is that if the lock owner is +	 * running, it is likely to release the lock soon.  	 *  	 * Since this needs the lock owner, and this mutex implementation  	 * doesn't track the owner atomically in the lock field, we need to @@ -440,7 +432,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,  		if (owner && !mutex_spin_on_owner(lock, owner))  			break; -		if ((atomic_read(&lock->count) == 1) && +		/* Try to acquire the mutex if it is unlocked. */ +		if (!mutex_is_locked(lock) &&  		    (atomic_cmpxchg(&lock->count, 1, 0) == 1)) {  			lock_acquired(&lock->dep_map, ip);  			if (use_ww_ctx) { @@ -471,7 +464,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,  		 * memory barriers as we'll eventually observe the right  		 * values at the cost of a few extra spins.  		 */ -		arch_mutex_cpu_relax(); +		cpu_relax_lowlatency();  	}  	osq_unlock(&lock->osq);  slowpath: @@ -485,8 +478,11 @@ slowpath:  #endif  	spin_lock_mutex(&lock->wait_lock, flags); -	/* once more, can we acquire the lock? */ -	if (MUTEX_SHOW_NO_WAITER(lock) && (atomic_xchg(&lock->count, 0) == 1)) +	/* +	 * Once more, try to acquire the lock. Only try-lock the mutex if +	 * it is unlocked to reduce unnecessary xchg() operations. +	 */ +	if (!mutex_is_locked(lock) && (atomic_xchg(&lock->count, 0) == 1))  		goto skip_wait;  	debug_mutex_lock_common(lock, &waiter); @@ -506,9 +502,10 @@ slowpath:  		 * it's unlocked. Later on, if we sleep, this is the  		 * operation that gives us the lock. We xchg it to -1, so  		 * that when we release the lock, we properly wake up the -		 * other waiters: +		 * other waiters. We only attempt the xchg if the count is +		 * non-negative in order to avoid unnecessary xchg operations:  		 */ -		if (MUTEX_SHOW_NO_WAITER(lock) && +		if (atomic_read(&lock->count) >= 0 &&  		    (atomic_xchg(&lock->count, -1) == 1))  			break; @@ -823,6 +820,10 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)  	unsigned long flags;  	int prev; +	/* No need to trylock if the mutex is locked. */ +	if (mutex_is_locked(lock)) +		return 0; +  	spin_lock_mutex(&lock->wait_lock, flags);  	prev = atomic_xchg(&lock->count, -1); diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index fb5b8ac411a5..f956ede7f90d 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -20,7 +20,6 @@  #include <linux/cpumask.h>  #include <linux/percpu.h>  #include <linux/hardirq.h> -#include <linux/mutex.h>  #include <asm/qrwlock.h>  /** @@ -35,7 +34,7 @@ static __always_inline void  rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)  {  	while ((cnts & _QW_WMASK) == _QW_LOCKED) { -		arch_mutex_cpu_relax(); +		cpu_relax_lowlatency();  		cnts = smp_load_acquire((u32 *)&lock->cnts);  	}  } @@ -75,7 +74,7 @@ void queue_read_lock_slowpath(struct qrwlock *lock)  	 * to make sure that the write lock isn't taken.  	 */  	while (atomic_read(&lock->cnts) & _QW_WMASK) -		arch_mutex_cpu_relax(); +		cpu_relax_lowlatency();  	cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;  	rspin_until_writer_unlock(lock, cnts); @@ -114,7 +113,7 @@ void queue_write_lock_slowpath(struct qrwlock *lock)  				    cnts | _QW_WAITING) == cnts))  			break; -		arch_mutex_cpu_relax(); +		cpu_relax_lowlatency();  	}  	/* When no more readers, set the locked flag */ @@ -125,7 +124,7 @@ void queue_write_lock_slowpath(struct qrwlock *lock)  				    _QW_LOCKED) == _QW_WAITING))  			break; -		arch_mutex_cpu_relax(); +		cpu_relax_lowlatency();  	}  unlock:  	arch_spin_unlock(&lock->lock); diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c index 49b2ed3dced8..62b6cee8ea7f 100644 --- a/kernel/locking/rtmutex-debug.c +++ b/kernel/locking/rtmutex-debug.c @@ -66,12 +66,13 @@ void rt_mutex_debug_task_free(struct task_struct *task)   * the deadlock. We print when we return. act_waiter can be NULL in   * case of a remove waiter operation.   */ -void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *act_waiter, +void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk, +			     struct rt_mutex_waiter *act_waiter,  			     struct rt_mutex *lock)  {  	struct task_struct *task; -	if (!debug_locks || detect || !act_waiter) +	if (!debug_locks || chwalk == RT_MUTEX_FULL_CHAINWALK || !act_waiter)  		return;  	task = rt_mutex_owner(act_waiter->lock); diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h index ab29b6a22669..d0519c3432b6 100644 --- a/kernel/locking/rtmutex-debug.h +++ b/kernel/locking/rtmutex-debug.h @@ -20,14 +20,15 @@ extern void debug_rt_mutex_unlock(struct rt_mutex *lock);  extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,  				      struct task_struct *powner);  extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock); -extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter, +extern void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk, +				    struct rt_mutex_waiter *waiter,  				    struct rt_mutex *lock);  extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter);  # define debug_rt_mutex_reset_waiter(w)			\  	do { (w)->deadlock_lock = NULL; } while (0) -static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, -						 int detect) +static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, +						  enum rtmutex_chainwalk walk)  {  	return (waiter != NULL);  } diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index fc605941b9b8..a0ea2a141b3b 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -308,6 +308,32 @@ static void rt_mutex_adjust_prio(struct task_struct *task)  }  /* + * Deadlock detection is conditional: + * + * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted + * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. + * + * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always + * conducted independent of the detect argument. + * + * If the waiter argument is NULL this indicates the deboost path and + * deadlock detection is disabled independent of the detect argument + * and the config settings. + */ +static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, +					  enum rtmutex_chainwalk chwalk) +{ +	/* +	 * This is just a wrapper function for the following call, +	 * because debug_rt_mutex_detect_deadlock() smells like a magic +	 * debug feature and I wanted to keep the cond function in the +	 * main source file along with the comments instead of having +	 * two of the same in the headers. +	 */ +	return debug_rt_mutex_detect_deadlock(waiter, chwalk); +} + +/*   * Max number of times we'll walk the boosting chain:   */  int max_lock_depth = 1024; @@ -337,21 +363,65 @@ static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)   * @top_task:	the current top waiter   *   * Returns 0 or -EDEADLK. + * + * Chain walk basics and protection scope + * + * [R] refcount on task + * [P] task->pi_lock held + * [L] rtmutex->wait_lock held + * + * Step	Description				Protected by + *	function arguments: + *	@task					[R] + *	@orig_lock if != NULL			@top_task is blocked on it + *	@next_lock				Unprotected. Cannot be + *						dereferenced. Only used for + *						comparison. + *	@orig_waiter if != NULL			@top_task is blocked on it + *	@top_task				current, or in case of proxy + *						locking protected by calling + *						code + *	again: + *	  loop_sanity_check(); + *	retry: + * [1]	  lock(task->pi_lock);			[R] acquire [P] + * [2]	  waiter = task->pi_blocked_on;		[P] + * [3]	  check_exit_conditions_1();		[P] + * [4]	  lock = waiter->lock;			[P] + * [5]	  if (!try_lock(lock->wait_lock)) {	[P] try to acquire [L] + *	    unlock(task->pi_lock);		release [P] + *	    goto retry; + *	  } + * [6]	  check_exit_conditions_2();		[P] + [L] + * [7]	  requeue_lock_waiter(lock, waiter);	[P] + [L] + * [8]	  unlock(task->pi_lock);		release [P] + *	  put_task_struct(task);		release [R] + * [9]	  check_exit_conditions_3();		[L] + * [10]	  task = owner(lock);			[L] + *	  get_task_struct(task);		[L] acquire [R] + *	  lock(task->pi_lock);			[L] acquire [P] + * [11]	  requeue_pi_waiter(tsk, waiters(lock));[P] + [L] + * [12]	  check_exit_conditions_4();		[P] + [L] + * [13]	  unlock(task->pi_lock);		release [P] + *	  unlock(lock->wait_lock);		release [L] + *	  goto again;   */  static int rt_mutex_adjust_prio_chain(struct task_struct *task, -				      int deadlock_detect, +				      enum rtmutex_chainwalk chwalk,  				      struct rt_mutex *orig_lock,  				      struct rt_mutex *next_lock,  				      struct rt_mutex_waiter *orig_waiter,  				      struct task_struct *top_task)  { -	struct rt_mutex *lock;  	struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; -	int detect_deadlock, ret = 0, depth = 0; +	struct rt_mutex_waiter *prerequeue_top_waiter; +	int ret = 0, depth = 0; +	struct rt_mutex *lock; +	bool detect_deadlock;  	unsigned long flags; +	bool requeue = true; -	detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter, -							 deadlock_detect); +	detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);  	/*  	 * The (de)boosting is a step by step approach with a lot of @@ -360,6 +430,9 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,  	 * carefully whether things change under us.  	 */   again: +	/* +	 * We limit the lock chain length for each invocation. +	 */  	if (++depth > max_lock_depth) {  		static int prev_max; @@ -377,13 +450,28 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,  		return -EDEADLK;  	} + +	/* +	 * We are fully preemptible here and only hold the refcount on +	 * @task. So everything can have changed under us since the +	 * caller or our own code below (goto retry/again) dropped all +	 * locks. +	 */   retry:  	/* -	 * Task can not go away as we did a get_task() before ! +	 * [1] Task cannot go away as we did a get_task() before !  	 */  	raw_spin_lock_irqsave(&task->pi_lock, flags); +	/* +	 * [2] Get the waiter on which @task is blocked on. +	 */  	waiter = task->pi_blocked_on; + +	/* +	 * [3] check_exit_conditions_1() protected by task->pi_lock. +	 */ +  	/*  	 * Check whether the end of the boosting chain has been  	 * reached or the state of the chain has changed while we @@ -421,20 +509,41 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,  			goto out_unlock_pi;  		/*  		 * If deadlock detection is off, we stop here if we -		 * are not the top pi waiter of the task. +		 * are not the top pi waiter of the task. If deadlock +		 * detection is enabled we continue, but stop the +		 * requeueing in the chain walk.  		 */ -		if (!detect_deadlock && top_waiter != task_top_pi_waiter(task)) -			goto out_unlock_pi; +		if (top_waiter != task_top_pi_waiter(task)) { +			if (!detect_deadlock) +				goto out_unlock_pi; +			else +				requeue = false; +		}  	}  	/* -	 * When deadlock detection is off then we check, if further -	 * priority adjustment is necessary. +	 * If the waiter priority is the same as the task priority +	 * then there is no further priority adjustment necessary.  If +	 * deadlock detection is off, we stop the chain walk. If its +	 * enabled we continue, but stop the requeueing in the chain +	 * walk.  	 */ -	if (!detect_deadlock && waiter->prio == task->prio) -		goto out_unlock_pi; +	if (waiter->prio == task->prio) { +		if (!detect_deadlock) +			goto out_unlock_pi; +		else +			requeue = false; +	} +	/* +	 * [4] Get the next lock +	 */  	lock = waiter->lock; +	/* +	 * [5] We need to trylock here as we are holding task->pi_lock, +	 * which is the reverse lock order versus the other rtmutex +	 * operations. +	 */  	if (!raw_spin_trylock(&lock->wait_lock)) {  		raw_spin_unlock_irqrestore(&task->pi_lock, flags);  		cpu_relax(); @@ -442,79 +551,180 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,  	}  	/* +	 * [6] check_exit_conditions_2() protected by task->pi_lock and +	 * lock->wait_lock. +	 *  	 * Deadlock detection. If the lock is the same as the original  	 * lock which caused us to walk the lock chain or if the  	 * current lock is owned by the task which initiated the chain  	 * walk, we detected a deadlock.  	 */  	if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { -		debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); +		debug_rt_mutex_deadlock(chwalk, orig_waiter, lock);  		raw_spin_unlock(&lock->wait_lock);  		ret = -EDEADLK;  		goto out_unlock_pi;  	} -	top_waiter = rt_mutex_top_waiter(lock); +	/* +	 * If we just follow the lock chain for deadlock detection, no +	 * need to do all the requeue operations. To avoid a truckload +	 * of conditionals around the various places below, just do the +	 * minimum chain walk checks. +	 */ +	if (!requeue) { +		/* +		 * No requeue[7] here. Just release @task [8] +		 */ +		raw_spin_unlock_irqrestore(&task->pi_lock, flags); +		put_task_struct(task); + +		/* +		 * [9] check_exit_conditions_3 protected by lock->wait_lock. +		 * If there is no owner of the lock, end of chain. +		 */ +		if (!rt_mutex_owner(lock)) { +			raw_spin_unlock(&lock->wait_lock); +			return 0; +		} + +		/* [10] Grab the next task, i.e. owner of @lock */ +		task = rt_mutex_owner(lock); +		get_task_struct(task); +		raw_spin_lock_irqsave(&task->pi_lock, flags); + +		/* +		 * No requeue [11] here. We just do deadlock detection. +		 * +		 * [12] Store whether owner is blocked +		 * itself. Decision is made after dropping the locks +		 */ +		next_lock = task_blocked_on_lock(task); +		/* +		 * Get the top waiter for the next iteration +		 */ +		top_waiter = rt_mutex_top_waiter(lock); + +		/* [13] Drop locks */ +		raw_spin_unlock_irqrestore(&task->pi_lock, flags); +		raw_spin_unlock(&lock->wait_lock); + +		/* If owner is not blocked, end of chain. */ +		if (!next_lock) +			goto out_put_task; +		goto again; +	} -	/* Requeue the waiter */ +	/* +	 * Store the current top waiter before doing the requeue +	 * operation on @lock. We need it for the boost/deboost +	 * decision below. +	 */ +	prerequeue_top_waiter = rt_mutex_top_waiter(lock); + +	/* [7] Requeue the waiter in the lock waiter list. */  	rt_mutex_dequeue(lock, waiter);  	waiter->prio = task->prio;  	rt_mutex_enqueue(lock, waiter); -	/* Release the task */ +	/* [8] Release the task */  	raw_spin_unlock_irqrestore(&task->pi_lock, flags); +	put_task_struct(task); + +	/* +	 * [9] check_exit_conditions_3 protected by lock->wait_lock. +	 * +	 * We must abort the chain walk if there is no lock owner even +	 * in the dead lock detection case, as we have nothing to +	 * follow here. This is the end of the chain we are walking. +	 */  	if (!rt_mutex_owner(lock)) {  		/* -		 * If the requeue above changed the top waiter, then we need -		 * to wake the new top waiter up to try to get the lock. +		 * If the requeue [7] above changed the top waiter, +		 * then we need to wake the new top waiter up to try +		 * to get the lock.  		 */ - -		if (top_waiter != rt_mutex_top_waiter(lock)) +		if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))  			wake_up_process(rt_mutex_top_waiter(lock)->task);  		raw_spin_unlock(&lock->wait_lock); -		goto out_put_task; +		return 0;  	} -	put_task_struct(task); -	/* Grab the next task */ +	/* [10] Grab the next task, i.e. the owner of @lock */  	task = rt_mutex_owner(lock);  	get_task_struct(task);  	raw_spin_lock_irqsave(&task->pi_lock, flags); +	/* [11] requeue the pi waiters if necessary */  	if (waiter == rt_mutex_top_waiter(lock)) { -		/* Boost the owner */ -		rt_mutex_dequeue_pi(task, top_waiter); +		/* +		 * The waiter became the new top (highest priority) +		 * waiter on the lock. Replace the previous top waiter +		 * in the owner tasks pi waiters list with this waiter +		 * and adjust the priority of the owner. +		 */ +		rt_mutex_dequeue_pi(task, prerequeue_top_waiter);  		rt_mutex_enqueue_pi(task, waiter);  		__rt_mutex_adjust_prio(task); -	} else if (top_waiter == waiter) { -		/* Deboost the owner */ +	} else if (prerequeue_top_waiter == waiter) { +		/* +		 * The waiter was the top waiter on the lock, but is +		 * no longer the top prority waiter. Replace waiter in +		 * the owner tasks pi waiters list with the new top +		 * (highest priority) waiter and adjust the priority +		 * of the owner. +		 * The new top waiter is stored in @waiter so that +		 * @waiter == @top_waiter evaluates to true below and +		 * we continue to deboost the rest of the chain. +		 */  		rt_mutex_dequeue_pi(task, waiter);  		waiter = rt_mutex_top_waiter(lock);  		rt_mutex_enqueue_pi(task, waiter);  		__rt_mutex_adjust_prio(task); +	} else { +		/* +		 * Nothing changed. No need to do any priority +		 * adjustment. +		 */  	}  	/* +	 * [12] check_exit_conditions_4() protected by task->pi_lock +	 * and lock->wait_lock. The actual decisions are made after we +	 * dropped the locks. +	 *  	 * Check whether the task which owns the current lock is pi  	 * blocked itself. If yes we store a pointer to the lock for  	 * the lock chain change detection above. After we dropped  	 * task->pi_lock next_lock cannot be dereferenced anymore.  	 */  	next_lock = task_blocked_on_lock(task); +	/* +	 * Store the top waiter of @lock for the end of chain walk +	 * decision below. +	 */ +	top_waiter = rt_mutex_top_waiter(lock); +	/* [13] Drop the locks */  	raw_spin_unlock_irqrestore(&task->pi_lock, flags); - -	top_waiter = rt_mutex_top_waiter(lock);  	raw_spin_unlock(&lock->wait_lock);  	/* +	 * Make the actual exit decisions [12], based on the stored +	 * values. +	 *  	 * We reached the end of the lock chain. Stop right here. No  	 * point to go back just to figure that out.  	 */  	if (!next_lock)  		goto out_put_task; +	/* +	 * If the current waiter is not the top waiter on the lock, +	 * then we can stop the chain walk here if we are not in full +	 * deadlock detection mode. +	 */  	if (!detect_deadlock && waiter != top_waiter)  		goto out_put_task; @@ -533,76 +743,119 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,   *   * Must be called with lock->wait_lock held.   * - * @lock:   the lock to be acquired. - * @task:   the task which wants to acquire the lock - * @waiter: the waiter that is queued to the lock's wait list. (could be NULL) + * @lock:   The lock to be acquired. + * @task:   The task which wants to acquire the lock + * @waiter: The waiter that is queued to the lock's wait list if the + *	    callsite called task_blocked_on_lock(), otherwise NULL   */  static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, -		struct rt_mutex_waiter *waiter) +				struct rt_mutex_waiter *waiter)  { +	unsigned long flags; +  	/* -	 * We have to be careful here if the atomic speedups are -	 * enabled, such that, when -	 *  - no other waiter is on the lock -	 *  - the lock has been released since we did the cmpxchg -	 * the lock can be released or taken while we are doing the -	 * checks and marking the lock with RT_MUTEX_HAS_WAITERS. +	 * Before testing whether we can acquire @lock, we set the +	 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all +	 * other tasks which try to modify @lock into the slow path +	 * and they serialize on @lock->wait_lock. +	 * +	 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state +	 * as explained at the top of this file if and only if:  	 * -	 * The atomic acquire/release aware variant of -	 * mark_rt_mutex_waiters uses a cmpxchg loop. After setting -	 * the WAITERS bit, the atomic release / acquire can not -	 * happen anymore and lock->wait_lock protects us from the -	 * non-atomic case. +	 * - There is a lock owner. The caller must fixup the +	 *   transient state if it does a trylock or leaves the lock +	 *   function due to a signal or timeout.  	 * -	 * Note, that this might set lock->owner = -	 * RT_MUTEX_HAS_WAITERS in the case the lock is not contended -	 * any more. This is fixed up when we take the ownership. -	 * This is the transitional state explained at the top of this file. +	 * - @task acquires the lock and there are no other +	 *   waiters. This is undone in rt_mutex_set_owner(@task) at +	 *   the end of this function.  	 */  	mark_rt_mutex_waiters(lock); +	/* +	 * If @lock has an owner, give up. +	 */  	if (rt_mutex_owner(lock))  		return 0;  	/* -	 * It will get the lock because of one of these conditions: -	 * 1) there is no waiter -	 * 2) higher priority than waiters -	 * 3) it is top waiter +	 * If @waiter != NULL, @task has already enqueued the waiter +	 * into @lock waiter list. If @waiter == NULL then this is a +	 * trylock attempt.  	 */ -	if (rt_mutex_has_waiters(lock)) { -		if (task->prio >= rt_mutex_top_waiter(lock)->prio) { -			if (!waiter || waiter != rt_mutex_top_waiter(lock)) -				return 0; -		} -	} - -	if (waiter || rt_mutex_has_waiters(lock)) { -		unsigned long flags; -		struct rt_mutex_waiter *top; - -		raw_spin_lock_irqsave(&task->pi_lock, flags); +	if (waiter) { +		/* +		 * If waiter is not the highest priority waiter of +		 * @lock, give up. +		 */ +		if (waiter != rt_mutex_top_waiter(lock)) +			return 0; -		/* remove the queued waiter. */ -		if (waiter) { -			rt_mutex_dequeue(lock, waiter); -			task->pi_blocked_on = NULL; -		} +		/* +		 * We can acquire the lock. Remove the waiter from the +		 * lock waiters list. +		 */ +		rt_mutex_dequeue(lock, waiter); +	} else {  		/* -		 * We have to enqueue the top waiter(if it exists) into -		 * task->pi_waiters list. +		 * If the lock has waiters already we check whether @task is +		 * eligible to take over the lock. +		 * +		 * If there are no other waiters, @task can acquire +		 * the lock.  @task->pi_blocked_on is NULL, so it does +		 * not need to be dequeued.  		 */  		if (rt_mutex_has_waiters(lock)) { -			top = rt_mutex_top_waiter(lock); -			rt_mutex_enqueue_pi(task, top); +			/* +			 * If @task->prio is greater than or equal to +			 * the top waiter priority (kernel view), +			 * @task lost. +			 */ +			if (task->prio >= rt_mutex_top_waiter(lock)->prio) +				return 0; + +			/* +			 * The current top waiter stays enqueued. We +			 * don't have to change anything in the lock +			 * waiters order. +			 */ +		} else { +			/* +			 * No waiters. Take the lock without the +			 * pi_lock dance.@task->pi_blocked_on is NULL +			 * and we have no waiters to enqueue in @task +			 * pi waiters list. +			 */ +			goto takeit;  		} -		raw_spin_unlock_irqrestore(&task->pi_lock, flags);  	} +	/* +	 * Clear @task->pi_blocked_on. Requires protection by +	 * @task->pi_lock. Redundant operation for the @waiter == NULL +	 * case, but conditionals are more expensive than a redundant +	 * store. +	 */ +	raw_spin_lock_irqsave(&task->pi_lock, flags); +	task->pi_blocked_on = NULL; +	/* +	 * Finish the lock acquisition. @task is the new owner. If +	 * other waiters exist we have to insert the highest priority +	 * waiter into @task->pi_waiters list. +	 */ +	if (rt_mutex_has_waiters(lock)) +		rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); +	raw_spin_unlock_irqrestore(&task->pi_lock, flags); + +takeit:  	/* We got the lock. */  	debug_rt_mutex_lock(lock); +	/* +	 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there +	 * are still waiters or clears it. +	 */  	rt_mutex_set_owner(lock, task);  	rt_mutex_deadlock_account_lock(lock, task); @@ -620,7 +873,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,  static int task_blocks_on_rt_mutex(struct rt_mutex *lock,  				   struct rt_mutex_waiter *waiter,  				   struct task_struct *task, -				   int detect_deadlock) +				   enum rtmutex_chainwalk chwalk)  {  	struct task_struct *owner = rt_mutex_owner(lock);  	struct rt_mutex_waiter *top_waiter = waiter; @@ -666,7 +919,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,  		__rt_mutex_adjust_prio(owner);  		if (owner->pi_blocked_on)  			chain_walk = 1; -	} else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) { +	} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {  		chain_walk = 1;  	} @@ -691,7 +944,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,  	raw_spin_unlock(&lock->wait_lock); -	res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, +	res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,  					 next_lock, waiter, task);  	raw_spin_lock(&lock->wait_lock); @@ -753,9 +1006,9 @@ static void wakeup_next_waiter(struct rt_mutex *lock)  static void remove_waiter(struct rt_mutex *lock,  			  struct rt_mutex_waiter *waiter)  { -	int first = (waiter == rt_mutex_top_waiter(lock)); +	bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));  	struct task_struct *owner = rt_mutex_owner(lock); -	struct rt_mutex *next_lock = NULL; +	struct rt_mutex *next_lock;  	unsigned long flags;  	raw_spin_lock_irqsave(¤t->pi_lock, flags); @@ -763,29 +1016,31 @@ static void remove_waiter(struct rt_mutex *lock,  	current->pi_blocked_on = NULL;  	raw_spin_unlock_irqrestore(¤t->pi_lock, flags); -	if (!owner) +	/* +	 * Only update priority if the waiter was the highest priority +	 * waiter of the lock and there is an owner to update. +	 */ +	if (!owner || !is_top_waiter)  		return; -	if (first) { - -		raw_spin_lock_irqsave(&owner->pi_lock, flags); +	raw_spin_lock_irqsave(&owner->pi_lock, flags); -		rt_mutex_dequeue_pi(owner, waiter); +	rt_mutex_dequeue_pi(owner, waiter); -		if (rt_mutex_has_waiters(lock)) { -			struct rt_mutex_waiter *next; +	if (rt_mutex_has_waiters(lock)) +		rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); -			next = rt_mutex_top_waiter(lock); -			rt_mutex_enqueue_pi(owner, next); -		} -		__rt_mutex_adjust_prio(owner); +	__rt_mutex_adjust_prio(owner); -		/* Store the lock on which owner is blocked or NULL */ -		next_lock = task_blocked_on_lock(owner); +	/* Store the lock on which owner is blocked or NULL */ +	next_lock = task_blocked_on_lock(owner); -		raw_spin_unlock_irqrestore(&owner->pi_lock, flags); -	} +	raw_spin_unlock_irqrestore(&owner->pi_lock, flags); +	/* +	 * Don't walk the chain, if the owner task is not blocked +	 * itself. +	 */  	if (!next_lock)  		return; @@ -794,7 +1049,8 @@ static void remove_waiter(struct rt_mutex *lock,  	raw_spin_unlock(&lock->wait_lock); -	rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current); +	rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, +				   next_lock, NULL, current);  	raw_spin_lock(&lock->wait_lock);  } @@ -824,7 +1080,8 @@ void rt_mutex_adjust_pi(struct task_struct *task)  	/* gets dropped in rt_mutex_adjust_prio_chain()! */  	get_task_struct(task); -	rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task); +	rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, +				   next_lock, NULL, task);  }  /** @@ -902,7 +1159,7 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock,  static int __sched  rt_mutex_slowlock(struct rt_mutex *lock, int state,  		  struct hrtimer_sleeper *timeout, -		  int detect_deadlock) +		  enum rtmutex_chainwalk chwalk)  {  	struct rt_mutex_waiter waiter;  	int ret = 0; @@ -928,7 +1185,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,  			timeout->task = NULL;  	} -	ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock); +	ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);  	if (likely(!ret))  		ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); @@ -937,7 +1194,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,  	if (unlikely(ret)) {  		remove_waiter(lock, &waiter); -		rt_mutex_handle_deadlock(ret, detect_deadlock, &waiter); +		rt_mutex_handle_deadlock(ret, chwalk, &waiter);  	}  	/* @@ -960,22 +1217,31 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,  /*   * Slow path try-lock function:   */ -static inline int -rt_mutex_slowtrylock(struct rt_mutex *lock) +static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)  { -	int ret = 0; +	int ret; + +	/* +	 * If the lock already has an owner we fail to get the lock. +	 * This can be done without taking the @lock->wait_lock as +	 * it is only being read, and this is a trylock anyway. +	 */ +	if (rt_mutex_owner(lock)) +		return 0; +	/* +	 * The mutex has currently no owner. Lock the wait lock and +	 * try to acquire the lock. +	 */  	raw_spin_lock(&lock->wait_lock); -	if (likely(rt_mutex_owner(lock) != current)) { +	ret = try_to_take_rt_mutex(lock, current, NULL); -		ret = try_to_take_rt_mutex(lock, current, NULL); -		/* -		 * try_to_take_rt_mutex() sets the lock waiters -		 * bit unconditionally. Clean this up. -		 */ -		fixup_rt_mutex_waiters(lock); -	} +	/* +	 * try_to_take_rt_mutex() sets the lock waiters bit +	 * unconditionally. Clean this up. +	 */ +	fixup_rt_mutex_waiters(lock);  	raw_spin_unlock(&lock->wait_lock); @@ -1053,30 +1319,31 @@ rt_mutex_slowunlock(struct rt_mutex *lock)   */  static inline int  rt_mutex_fastlock(struct rt_mutex *lock, int state, -		  int detect_deadlock,  		  int (*slowfn)(struct rt_mutex *lock, int state,  				struct hrtimer_sleeper *timeout, -				int detect_deadlock)) +				enum rtmutex_chainwalk chwalk))  { -	if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { +	if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {  		rt_mutex_deadlock_account_lock(lock, current);  		return 0;  	} else -		return slowfn(lock, state, NULL, detect_deadlock); +		return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);  }  static inline int  rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, -			struct hrtimer_sleeper *timeout, int detect_deadlock, +			struct hrtimer_sleeper *timeout, +			enum rtmutex_chainwalk chwalk,  			int (*slowfn)(struct rt_mutex *lock, int state,  				      struct hrtimer_sleeper *timeout, -				      int detect_deadlock)) +				      enum rtmutex_chainwalk chwalk))  { -	if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { +	if (chwalk == RT_MUTEX_MIN_CHAINWALK && +	    likely(rt_mutex_cmpxchg(lock, NULL, current))) {  		rt_mutex_deadlock_account_lock(lock, current);  		return 0;  	} else -		return slowfn(lock, state, timeout, detect_deadlock); +		return slowfn(lock, state, timeout, chwalk);  }  static inline int @@ -1109,54 +1376,61 @@ void __sched rt_mutex_lock(struct rt_mutex *lock)  {  	might_sleep(); -	rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock); +	rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);  }  EXPORT_SYMBOL_GPL(rt_mutex_lock);  /**   * rt_mutex_lock_interruptible - lock a rt_mutex interruptible   * - * @lock: 		the rt_mutex to be locked - * @detect_deadlock:	deadlock detection on/off + * @lock:		the rt_mutex to be locked   *   * Returns: - *  0 		on success - * -EINTR 	when interrupted by a signal - * -EDEADLK	when the lock would deadlock (when deadlock detection is on) + *  0		on success + * -EINTR	when interrupted by a signal   */ -int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock, -						 int detect_deadlock) +int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)  {  	might_sleep(); -	return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, -				 detect_deadlock, rt_mutex_slowlock); +	return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);  }  EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); +/* + * Futex variant with full deadlock detection. + */ +int rt_mutex_timed_futex_lock(struct rt_mutex *lock, +			      struct hrtimer_sleeper *timeout) +{ +	might_sleep(); + +	return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, +				       RT_MUTEX_FULL_CHAINWALK, +				       rt_mutex_slowlock); +} +  /**   * rt_mutex_timed_lock - lock a rt_mutex interruptible   *			the timeout structure is provided   *			by the caller   * - * @lock: 		the rt_mutex to be locked + * @lock:		the rt_mutex to be locked   * @timeout:		timeout structure or NULL (no timeout) - * @detect_deadlock:	deadlock detection on/off   *   * Returns: - *  0 		on success - * -EINTR 	when interrupted by a signal + *  0		on success + * -EINTR	when interrupted by a signal   * -ETIMEDOUT	when the timeout expired - * -EDEADLK	when the lock would deadlock (when deadlock detection is on)   */  int -rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout, -		    int detect_deadlock) +rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)  {  	might_sleep();  	return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, -				       detect_deadlock, rt_mutex_slowlock); +				       RT_MUTEX_MIN_CHAINWALK, +				       rt_mutex_slowlock);  }  EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); @@ -1262,7 +1536,6 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,   * @lock:		the rt_mutex to take   * @waiter:		the pre-initialized rt_mutex_waiter   * @task:		the task to prepare - * @detect_deadlock:	perform deadlock detection (1) or not (0)   *   * Returns:   *  0 - task blocked on lock @@ -1273,7 +1546,7 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,   */  int rt_mutex_start_proxy_lock(struct rt_mutex *lock,  			      struct rt_mutex_waiter *waiter, -			      struct task_struct *task, int detect_deadlock) +			      struct task_struct *task)  {  	int ret; @@ -1285,7 +1558,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,  	}  	/* We enforce deadlock detection for futexes */ -	ret = task_blocks_on_rt_mutex(lock, waiter, task, 1); +	ret = task_blocks_on_rt_mutex(lock, waiter, task, +				      RT_MUTEX_FULL_CHAINWALK);  	if (ret && !rt_mutex_owner(lock)) {  		/* @@ -1331,22 +1605,20 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)   * rt_mutex_finish_proxy_lock() - Complete lock acquisition   * @lock:		the rt_mutex we were woken on   * @to:			the timeout, null if none. hrtimer should already have - * 			been started. + *			been started.   * @waiter:		the pre-initialized rt_mutex_waiter - * @detect_deadlock:	perform deadlock detection (1) or not (0)   *   * Complete the lock acquisition started our behalf by another thread.   *   * Returns:   *  0 - success - * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK + * <0 - error, one of -EINTR, -ETIMEDOUT   *   * Special API call for PI-futex requeue support   */  int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,  			       struct hrtimer_sleeper *to, -			       struct rt_mutex_waiter *waiter, -			       int detect_deadlock) +			       struct rt_mutex_waiter *waiter)  {  	int ret; diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h index f6a1f3c133b1..c4060584c407 100644 --- a/kernel/locking/rtmutex.h +++ b/kernel/locking/rtmutex.h @@ -22,10 +22,15 @@  #define debug_rt_mutex_init(m, n)			do { } while (0)  #define debug_rt_mutex_deadlock(d, a ,l)		do { } while (0)  #define debug_rt_mutex_print_deadlock(w)		do { } while (0) -#define debug_rt_mutex_detect_deadlock(w,d)		(d)  #define debug_rt_mutex_reset_waiter(w)			do { } while (0)  static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w)  {  	WARN(1, "rtmutex deadlock detected\n");  } + +static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *w, +						  enum rtmutex_chainwalk walk) +{ +	return walk == RT_MUTEX_FULL_CHAINWALK; +} diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 7431a9c86f35..855212501407 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -102,6 +102,21 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)  }  /* + * Constants for rt mutex functions which have a selectable deadlock + * detection. + * + * RT_MUTEX_MIN_CHAINWALK:	Stops the lock chain walk when there are + *				no further PI adjustments to be made. + * + * RT_MUTEX_FULL_CHAINWALK:	Invoke deadlock detection with a full + *				walk of the lock chain. + */ +enum rtmutex_chainwalk { +	RT_MUTEX_MIN_CHAINWALK, +	RT_MUTEX_FULL_CHAINWALK, +}; + +/*   * PI-futex support (proxy locking functions, etc.):   */  extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); @@ -111,12 +126,11 @@ extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,  				  struct task_struct *proxy_owner);  extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,  				     struct rt_mutex_waiter *waiter, -				     struct task_struct *task, -				     int detect_deadlock); +				     struct task_struct *task);  extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,  				      struct hrtimer_sleeper *to, -				      struct rt_mutex_waiter *waiter, -				      int detect_deadlock); +				      struct rt_mutex_waiter *waiter); +extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);  #ifdef CONFIG_DEBUG_RT_MUTEXES  # include "rtmutex-debug.h" diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index a2391ac135c8..d6203faf2eb1 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -329,7 +329,7 @@ bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)  		if (need_resched())  			break; -		arch_mutex_cpu_relax(); +		cpu_relax_lowlatency();  	}  	rcu_read_unlock(); @@ -381,7 +381,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)  		 * memory barriers as we'll eventually observe the right  		 * values at the cost of a few extra spins.  		 */ -		arch_mutex_cpu_relax(); +		cpu_relax_lowlatency();  	}  	osq_unlock(&sem->osq);  done:  | 
