aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/locking/rwsem.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/locking/rwsem.c')
-rw-r--r--kernel/locking/rwsem.c176
1 files changed, 112 insertions, 64 deletions
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 69aba4abe104..44873594de03 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -27,6 +27,7 @@
#include <linux/export.h>
#include <linux/rwsem.h>
#include <linux/atomic.h>
+#include <trace/events/lock.h>
#ifndef CONFIG_PREEMPT_RT
#include "lock_events.h"
@@ -132,14 +133,19 @@
* the owner value concurrently without lock. Read from owner, however,
* may not need READ_ONCE() as long as the pointer value is only used
* for comparison and isn't being dereferenced.
+ *
+ * Both rwsem_{set,clear}_owner() functions should be in the same
+ * preempt disable section as the atomic op that changes sem->count.
*/
static inline void rwsem_set_owner(struct rw_semaphore *sem)
{
+ lockdep_assert_preemption_disabled();
atomic_long_set(&sem->owner, (long)current);
}
static inline void rwsem_clear_owner(struct rw_semaphore *sem)
{
+ lockdep_assert_preemption_disabled();
atomic_long_set(&sem->owner, 0);
}
@@ -250,13 +256,16 @@ static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cntp)
static inline bool rwsem_write_trylock(struct rw_semaphore *sem)
{
long tmp = RWSEM_UNLOCKED_VALUE;
+ bool ret = false;
+ preempt_disable();
if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, RWSEM_WRITER_LOCKED)) {
rwsem_set_owner(sem);
- return true;
+ ret = true;
}
- return false;
+ preempt_enable();
+ return ret;
}
/*
@@ -334,8 +343,6 @@ struct rwsem_waiter {
struct task_struct *task;
enum rwsem_waiter_type type;
unsigned long timeout;
-
- /* Writer only, not initialized in reader */
bool handoff_set;
};
#define rwsem_first_waiter(sem) \
@@ -375,16 +382,19 @@ rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
*
* Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of
* this function. Modify with care.
+ *
+ * Return: true if wait_list isn't empty and false otherwise
*/
-static inline void
+static inline bool
rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
{
lockdep_assert_held(&sem->wait_lock);
list_del(&waiter->list);
if (likely(!list_empty(&sem->wait_list)))
- return;
+ return true;
atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count);
+ return false;
}
/*
@@ -455,10 +465,12 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
* to give up the lock), request a HANDOFF to
* force the issue.
*/
- if (!(oldcount & RWSEM_FLAG_HANDOFF) &&
- time_after(jiffies, waiter->timeout)) {
- adjustment -= RWSEM_FLAG_HANDOFF;
- lockevent_inc(rwsem_rlock_handoff);
+ if (time_after(jiffies, waiter->timeout)) {
+ if (!(oldcount & RWSEM_FLAG_HANDOFF)) {
+ adjustment -= RWSEM_FLAG_HANDOFF;
+ lockevent_inc(rwsem_rlock_handoff);
+ }
+ waiter->handoff_set = true;
}
atomic_long_add(-adjustment, &sem->count);
@@ -559,6 +571,33 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
}
/*
+ * Remove a waiter and try to wake up other waiters in the wait queue
+ * This function is called from the out_nolock path of both the reader and
+ * writer slowpaths with wait_lock held. It releases the wait_lock and
+ * optionally wake up waiters before it returns.
+ */
+static inline void
+rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
+ struct wake_q_head *wake_q)
+ __releases(&sem->wait_lock)
+{
+ bool first = rwsem_first_waiter(sem) == waiter;
+
+ wake_q_init(wake_q);
+
+ /*
+ * If the wait_list isn't empty and the waiter to be deleted is
+ * the first waiter, we wake up the remaining waiters as they may
+ * be eligible to acquire or spin on the lock.
+ */
+ if (rwsem_del_waiter(sem, waiter) && first)
+ rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q);
+ raw_spin_unlock_irq(&sem->wait_lock);
+ if (!wake_q_empty(wake_q))
+ wake_up_q(wake_q);
+}
+
+/*
* This function must be called with the sem->wait_lock held to prevent
* race conditions between checking the rwsem wait list and setting the
* sem->count accordingly.
@@ -568,7 +607,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
struct rwsem_waiter *waiter)
{
- bool first = rwsem_first_waiter(sem) == waiter;
+ struct rwsem_waiter *first = rwsem_first_waiter(sem);
long count, new;
lockdep_assert_held(&sem->wait_lock);
@@ -578,11 +617,20 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
if (has_handoff) {
- if (!first)
+ /*
+ * Honor handoff bit and yield only when the first
+ * waiter is the one that set it. Otherwisee, we
+ * still try to acquire the rwsem.
+ */
+ if (first->handoff_set && (waiter != first))
return false;
- /* First waiter inherits a previously set handoff bit */
- waiter->handoff_set = true;
+ /*
+ * First waiter can inherit a previously set handoff
+ * bit and spin on rwsem if lock acquisition fails.
+ */
+ if (waiter == first)
+ waiter->handoff_set = true;
}
new = count;
@@ -901,7 +949,7 @@ done:
*/
static inline void clear_nonspinnable(struct rw_semaphore *sem)
{
- if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE))
+ if (unlikely(rwsem_test_oflags(sem, RWSEM_NONSPINNABLE)))
atomic_long_andnot(RWSEM_NONSPINNABLE, &sem->owner);
}
@@ -926,6 +974,31 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
#endif
/*
+ * Prepare to wake up waiter(s) in the wait queue by putting them into the
+ * given wake_q if the rwsem lock owner isn't a writer. If rwsem is likely
+ * reader-owned, wake up read lock waiters in queue front or wake up any
+ * front waiter otherwise.
+
+ * This is being called from both reader and writer slow paths.
+ */
+static inline void rwsem_cond_wake_waiter(struct rw_semaphore *sem, long count,
+ struct wake_q_head *wake_q)
+{
+ enum rwsem_wake_type wake_type;
+
+ if (count & RWSEM_WRITER_MASK)
+ return;
+
+ if (count & RWSEM_READER_MASK) {
+ wake_type = RWSEM_WAKE_READERS;
+ } else {
+ wake_type = RWSEM_WAKE_ANY;
+ clear_nonspinnable(sem);
+ }
+ rwsem_mark_wake(sem, wake_type, wake_q);
+}
+
+/*
* Wait for the read lock to be granted
*/
static struct rw_semaphore __sched *
@@ -935,7 +1008,6 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
long rcnt = (count >> RWSEM_READER_SHIFT);
struct rwsem_waiter waiter;
DEFINE_WAKE_Q(wake_q);
- bool wake = false;
/*
* To prevent a constant stream of readers from starving a sleeping
@@ -972,17 +1044,17 @@ queue:
waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_READ;
waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
+ waiter.handoff_set = false;
raw_spin_lock_irq(&sem->wait_lock);
if (list_empty(&sem->wait_list)) {
/*
* In case the wait queue is empty and the lock isn't owned
- * by a writer or has the handoff bit set, this reader can
- * exit the slowpath and return immediately as its
- * RWSEM_READER_BIAS has already been set in the count.
+ * by a writer, this reader can exit the slowpath and return
+ * immediately as its RWSEM_READER_BIAS has already been set
+ * in the count.
*/
- if (!(atomic_long_read(&sem->count) &
- (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
+ if (!(atomic_long_read(&sem->count) & RWSEM_WRITER_MASK)) {
/* Provide lock ACQUIRE */
smp_acquire__after_ctrl_dep();
raw_spin_unlock_irq(&sem->wait_lock);
@@ -997,22 +1069,13 @@ queue:
/* we're now waiting on the lock, but no longer actively locking */
count = atomic_long_add_return(adjustment, &sem->count);
- /*
- * If there are no active locks, wake the front queued process(es).
- *
- * If there are no writers and we are first in the queue,
- * wake our own waiter to join the existing active readers !
- */
- if (!(count & RWSEM_LOCK_MASK)) {
- clear_nonspinnable(sem);
- wake = true;
- }
- if (wake || (!(count & RWSEM_WRITER_MASK) &&
- (adjustment & RWSEM_FLAG_WAITERS)))
- rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
-
+ rwsem_cond_wake_waiter(sem, count, &wake_q);
raw_spin_unlock_irq(&sem->wait_lock);
- wake_up_q(&wake_q);
+
+ if (!wake_q_empty(&wake_q))
+ wake_up_q(&wake_q);
+
+ trace_contention_begin(sem, LCB_F_READ);
/* wait to be given the lock */
for (;;) {
@@ -1035,23 +1098,23 @@ queue:
__set_current_state(TASK_RUNNING);
lockevent_inc(rwsem_rlock);
+ trace_contention_end(sem, 0);
return sem;
out_nolock:
- rwsem_del_waiter(sem, &waiter);
- raw_spin_unlock_irq(&sem->wait_lock);
+ rwsem_del_wake_waiter(sem, &waiter, &wake_q);
__set_current_state(TASK_RUNNING);
lockevent_inc(rwsem_rlock_fail);
+ trace_contention_end(sem, -EINTR);
return ERR_PTR(-EINTR);
}
/*
* Wait until we successfully acquire the write lock
*/
-static struct rw_semaphore *
+static struct rw_semaphore __sched *
rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
{
- long count;
struct rwsem_waiter waiter;
DEFINE_WAKE_Q(wake_q);
@@ -1075,23 +1138,8 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
/* we're now waiting on the lock */
if (rwsem_first_waiter(sem) != &waiter) {
- count = atomic_long_read(&sem->count);
-
- /*
- * If there were already threads queued before us and:
- * 1) there are no active locks, wake the front
- * queued process(es) as the handoff bit might be set.
- * 2) there are no active writers and some readers, the lock
- * must be read owned; so we try to wake any read lock
- * waiters that were queued ahead of us.
- */
- if (count & RWSEM_WRITER_MASK)
- goto wait;
-
- rwsem_mark_wake(sem, (count & RWSEM_READER_MASK)
- ? RWSEM_WAKE_READERS
- : RWSEM_WAKE_ANY, &wake_q);
-
+ rwsem_cond_wake_waiter(sem, atomic_long_read(&sem->count),
+ &wake_q);
if (!wake_q_empty(&wake_q)) {
/*
* We want to minimize wait_lock hold time especially
@@ -1099,16 +1147,16 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
*/
raw_spin_unlock_irq(&sem->wait_lock);
wake_up_q(&wake_q);
- wake_q_init(&wake_q); /* Used again, reinit */
raw_spin_lock_irq(&sem->wait_lock);
}
} else {
atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
}
-wait:
/* wait until we successfully acquire the lock */
set_current_state(state);
+ trace_contention_begin(sem, LCB_F_WRITE);
+
for (;;) {
if (rwsem_try_write_lock(sem, &waiter)) {
/* rwsem_try_write_lock() implies ACQUIRE on success */
@@ -1148,17 +1196,15 @@ trylock_again:
__set_current_state(TASK_RUNNING);
raw_spin_unlock_irq(&sem->wait_lock);
lockevent_inc(rwsem_wlock);
+ trace_contention_end(sem, 0);
return sem;
out_nolock:
__set_current_state(TASK_RUNNING);
raw_spin_lock_irq(&sem->wait_lock);
- rwsem_del_waiter(sem, &waiter);
- if (!list_empty(&sem->wait_list))
- rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
- raw_spin_unlock_irq(&sem->wait_lock);
- wake_up_q(&wake_q);
+ rwsem_del_wake_waiter(sem, &waiter, &wake_q);
lockevent_inc(rwsem_wlock_fail);
+ trace_contention_end(sem, -EINTR);
return ERR_PTR(-EINTR);
}
@@ -1314,8 +1360,10 @@ static inline void __up_write(struct rw_semaphore *sem)
DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) &&
!rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem);
+ preempt_disable();
rwsem_clear_owner(sem);
tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
+ preempt_enable();
if (unlikely(tmp & RWSEM_FLAG_WAITERS))
rwsem_wake(sem);
}