aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--include/linux/spinlock.h53
-rw-r--r--kernel/sched/core.c41
2 files changed, 57 insertions, 37 deletions
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index fd57888d4942..3190997df9ca 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -114,29 +114,48 @@ do { \
#endif /*arch_spin_is_contended*/
/*
- * This barrier must provide two things:
+ * smp_mb__after_spinlock() provides the equivalent of a full memory barrier
+ * between program-order earlier lock acquisitions and program-order later
+ * memory accesses.
*
- * - it must guarantee a STORE before the spin_lock() is ordered against a
- * LOAD after it, see the comments at its two usage sites.
+ * This guarantees that the following two properties hold:
*
- * - it must ensure the critical section is RCsc.
+ * 1) Given the snippet:
*
- * The latter is important for cases where we observe values written by other
- * CPUs in spin-loops, without barriers, while being subject to scheduling.
+ * { X = 0; Y = 0; }
*
- * CPU0 CPU1 CPU2
+ * CPU0 CPU1
*
- * for (;;) {
- * if (READ_ONCE(X))
- * break;
- * }
- * X=1
- * <sched-out>
- * <sched-in>
- * r = X;
+ * WRITE_ONCE(X, 1); WRITE_ONCE(Y, 1);
+ * spin_lock(S); smp_mb();
+ * smp_mb__after_spinlock(); r1 = READ_ONCE(X);
+ * r0 = READ_ONCE(Y);
+ * spin_unlock(S);
*
- * without transitivity it could be that CPU1 observes X!=0 breaks the loop,
- * we get migrated and CPU2 sees X==0.
+ * it is forbidden that CPU0 does not observe CPU1's store to Y (r0 = 0)
+ * and CPU1 does not observe CPU0's store to X (r1 = 0); see the comments
+ * preceding the call to smp_mb__after_spinlock() in __schedule() and in
+ * try_to_wake_up().
+ *
+ * 2) Given the snippet:
+ *
+ * { X = 0; Y = 0; }
+ *
+ * CPU0 CPU1 CPU2
+ *
+ * spin_lock(S); spin_lock(S); r1 = READ_ONCE(Y);
+ * WRITE_ONCE(X, 1); smp_mb__after_spinlock(); smp_rmb();
+ * spin_unlock(S); r0 = READ_ONCE(X); r2 = READ_ONCE(X);
+ * WRITE_ONCE(Y, 1);
+ * spin_unlock(S);
+ *
+ * it is forbidden that CPU0's critical section executes before CPU1's
+ * critical section (r0 = 1), CPU2 observes CPU1's store to Y (r1 = 1)
+ * and CPU2 does not observe CPU0's store to X (r2 = 0); see the comments
+ * preceding the calls to smp_rmb() in try_to_wake_up() for similar
+ * snippets but "projected" onto two CPUs.
+ *
+ * Property (2) upgrades the lock to an RCsc lock.
*
* Since most load-store architectures implement ACQUIRE with an smp_mb() after
* the LL/SC loop, they need no further barriers. Similarly all our TSO
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe365c9a08e9..0c5ec2abdf93 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1998,21 +1998,20 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
* be possible to, falsely, observe p->on_rq == 0 and get stuck
* in smp_cond_load_acquire() below.
*
- * sched_ttwu_pending() try_to_wake_up()
- * [S] p->on_rq = 1; [L] P->state
- * UNLOCK rq->lock -----.
- * \
- * +--- RMB
- * schedule() /
- * LOCK rq->lock -----'
- * UNLOCK rq->lock
+ * sched_ttwu_pending() try_to_wake_up()
+ * STORE p->on_rq = 1 LOAD p->state
+ * UNLOCK rq->lock
+ *
+ * __schedule() (switch to task 'p')
+ * LOCK rq->lock smp_rmb();
+ * smp_mb__after_spinlock();
+ * UNLOCK rq->lock
*
* [task p]
- * [S] p->state = UNINTERRUPTIBLE [L] p->on_rq
+ * STORE p->state = UNINTERRUPTIBLE LOAD p->on_rq
*
- * Pairs with the UNLOCK+LOCK on rq->lock from the
- * last wakeup of our task and the schedule that got our task
- * current.
+ * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
+ * __schedule(). See the comment for smp_mb__after_spinlock().
*/
smp_rmb();
if (p->on_rq && ttwu_remote(p, wake_flags))
@@ -2026,15 +2025,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
* One must be running (->on_cpu == 1) in order to remove oneself
* from the runqueue.
*
- * [S] ->on_cpu = 1; [L] ->on_rq
- * UNLOCK rq->lock
- * RMB
- * LOCK rq->lock
- * [S] ->on_rq = 0; [L] ->on_cpu
+ * __schedule() (switch to task 'p') try_to_wake_up()
+ * STORE p->on_cpu = 1 LOAD p->on_rq
+ * UNLOCK rq->lock
+ *
+ * __schedule() (put 'p' to sleep)
+ * LOCK rq->lock smp_rmb();
+ * smp_mb__after_spinlock();
+ * STORE p->on_rq = 0 LOAD p->on_cpu
*
- * Pairs with the full barrier implied in the UNLOCK+LOCK on rq->lock
- * from the consecutive calls to schedule(); the first switching to our
- * task, the second putting it to sleep.
+ * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
+ * __schedule(). See the comment for smp_mb__after_spinlock().
*/
smp_rmb();