From 0172d9e322035bf7bb66a7dfdd795c38d71dbba9 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 15 Nov 2018 11:19:44 -0500 Subject: tools/memory-model: Rename some RCU relations In preparation for adding support for SRCU, rename "crit" to "rcu-rscs", rename "rscs" to "rcu-rscsi", and remove the restriction to only the outermost level of nesting. The name change is needed for disambiguating RCU read-side critical sections from SRCU read-side critical sections. Adding the "i" at the end of "rcu-rscsi" emphasizes that the relation is inverted; it links rcu_read_unlock() events to their corresponding preceding rcu_read_lock() events. The restriction to outermost nesting levels was never essential; it was included mostly to show that it could be done. Rather than add equivalent unnecessary code for SRCU lock nesting, it seemed better to remove the existing code. Signed-off-by: Alan Stern Signed-off-by: Paul E. McKenney Tested-by: Andrea Parri --- tools/memory-model/linux-kernel.bell | 9 +++------ tools/memory-model/linux-kernel.cat | 10 +++++----- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell index 796513362c05..353c8d68e030 100644 --- a/tools/memory-model/linux-kernel.bell +++ b/tools/memory-model/linux-kernel.bell @@ -34,7 +34,7 @@ enum Barriers = 'wmb (*smp_wmb*) || instructions F[Barriers] (* Compute matching pairs of nested Rcu-lock and Rcu-unlock *) -let matched = let rec +let rcu-rscs = let rec unmatched-locks = Rcu-lock \ domain(matched) and unmatched-unlocks = Rcu-unlock \ range(matched) and unmatched = unmatched-locks | unmatched-unlocks @@ -46,8 +46,5 @@ let matched = let rec in matched (* Validate nesting *) -flag ~empty Rcu-lock \ domain(matched) as unbalanced-rcu-locking -flag ~empty Rcu-unlock \ range(matched) as unbalanced-rcu-locking - -(* Outermost level of nesting only *) -let crit = matched \ (po^-1 ; matched ; po^-1) +flag ~empty Rcu-lock \ domain(rcu-rscs) as unbalanced-rcu-locking +flag ~empty Rcu-unlock \ range(rcu-rscs) as unbalanced-rcu-locking diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index 8f23c74a96fd..ab9de9c1234b 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat @@ -95,7 +95,7 @@ acyclic pb as propagation * onward on the one hand and from the rcu_read_unlock() backwards on the * other hand. *) -let rscs = po ; crit^-1 ; po? +let rcu-rscsi = po ; rcu-rscs^-1 ; po? (* * The synchronize_rcu() strong fence is special in that it can order not @@ -109,10 +109,10 @@ let rcu-link = hb* ; pb* ; prop * critical sections (joined by rcu-link) acts as a generalized strong fence. *) let rec rcu-fence = gp | - (gp ; rcu-link ; rscs) | - (rscs ; rcu-link ; gp) | - (gp ; rcu-link ; rcu-fence ; rcu-link ; rscs) | - (rscs ; rcu-link ; rcu-fence ; rcu-link ; gp) | + (gp ; rcu-link ; rcu-rscsi) | + (rcu-rscsi ; rcu-link ; gp) | + (gp ; rcu-link ; rcu-fence ; rcu-link ; rcu-rscsi) | + (rcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; gp) | (rcu-fence ; rcu-link ; rcu-fence) (* rb orders instructions just as pb does *) -- cgit v1.2.3-59-g8ed1b From 284749b0aebbf3ab26ff92198545aea36165f6bf Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 15 Nov 2018 11:19:58 -0500 Subject: tools/memory-model: Refactor some RCU relations In preparation for adding support for SRCU, refactor the definitions of rcu-fence, rcu-rscsi, rcu-link, and rb by moving the po and po? terms from the first two to the second two. An rcu-gp relation is added; it is equivalent to gp with the po and po? terms removed. This is necessary because for SRCU, we will have to use the loc relation to check that the terms at the start and end of each disjunct in the definition of rcu-fence refer to the same srcu_struct location. If these terms are hidden behind po and po?, there's no way to carry out this check. Signed-off-by: Alan Stern Signed-off-by: Paul E. McKenney Tested-by: Andrea Parri --- tools/memory-model/linux-kernel.cat | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index ab9de9c1234b..b8e6197f05af 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat @@ -91,32 +91,37 @@ acyclic pb as propagation (*******) (* - * Effect of read-side critical section proceeds from the rcu_read_lock() - * onward on the one hand and from the rcu_read_unlock() backwards on the + * Effects of read-side critical sections proceed from the rcu_read_unlock() + * backwards on the one hand, and from the rcu_read_lock() forwards on the * other hand. + * + * In the definition of rcu-fence below, the po term at the left-hand side + * of each disjunct and the po? term at the right-hand end have been factored + * out. They have been moved into the definitions of rcu-link and rb. *) -let rcu-rscsi = po ; rcu-rscs^-1 ; po? +let rcu-gp = [Sync-rcu] (* Compare with gp *) +let rcu-rscsi = rcu-rscs^-1 (* * The synchronize_rcu() strong fence is special in that it can order not * one but two non-rf relations, but only in conjunction with an RCU * read-side critical section. *) -let rcu-link = hb* ; pb* ; prop +let rcu-link = po? ; hb* ; pb* ; prop ; po (* * Any sequence containing at least as many grace periods as RCU read-side * critical sections (joined by rcu-link) acts as a generalized strong fence. *) -let rec rcu-fence = gp | - (gp ; rcu-link ; rcu-rscsi) | - (rcu-rscsi ; rcu-link ; gp) | - (gp ; rcu-link ; rcu-fence ; rcu-link ; rcu-rscsi) | - (rcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; gp) | +let rec rcu-fence = rcu-gp | + (rcu-gp ; rcu-link ; rcu-rscsi) | + (rcu-rscsi ; rcu-link ; rcu-gp) | + (rcu-gp ; rcu-link ; rcu-fence ; rcu-link ; rcu-rscsi) | + (rcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; rcu-gp) | (rcu-fence ; rcu-link ; rcu-fence) (* rb orders instructions just as pb does *) -let rb = prop ; rcu-fence ; hb* ; pb* +let rb = prop ; po ; rcu-fence ; po? ; hb* ; pb* irreflexive rb as rcu -- cgit v1.2.3-59-g8ed1b From a3f600d92da564ad35f237c8aeab268ca49377cc Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 15 Nov 2018 11:20:37 -0500 Subject: tools/memory-model: Add SRCU support Add support for SRCU. Herd creates srcu events and linux-kernel.def associates them with three possible annotations (srcu-lock, srcu-unlock, and sync-srcu) corresponding to the API routines srcu_read_lock(), srcu_read_unlock(), and synchronize_srcu(). The linux-kernel.bell file now declares the annotations and determines matching lock/unlock pairs delimiting SRCU read-side critical sections, and it also checks for synchronize_srcu() calls inside an RCU critical section (which would generate a "sleeping in atomic context" error in real kernel code). The linux-kernel.cat file now adds SRCU-induced ordering, analogous to the existing RCU-induced ordering, to the gp and rcu-fence relations. Curiously enough, these small changes to the model's .cat code are all that is needed to describe SRCU. Portions of this patch (linux-kernel.def and the first hunk in linux-kernel.bell) were written by Luc Maranget. Signed-off-by: Alan Stern CC: Luc Maranget Signed-off-by: Paul E. McKenney Tested-by: Andrea Parri --- tools/memory-model/linux-kernel.bell | 25 +++++++++++++++++++++++++ tools/memory-model/linux-kernel.cat | 18 ++++++++++++++---- tools/memory-model/linux-kernel.def | 5 +++++ 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell index 353c8d68e030..9c42cd9ddcb4 100644 --- a/tools/memory-model/linux-kernel.bell +++ b/tools/memory-model/linux-kernel.bell @@ -33,6 +33,12 @@ enum Barriers = 'wmb (*smp_wmb*) || 'after-unlock-lock (*smp_mb__after_unlock_lock*) instructions F[Barriers] +(* SRCU *) +enum SRCU = 'srcu-lock || 'srcu-unlock || 'sync-srcu +instructions SRCU[SRCU] +(* All srcu events *) +let Srcu = Srcu-lock | Srcu-unlock | Sync-srcu + (* Compute matching pairs of nested Rcu-lock and Rcu-unlock *) let rcu-rscs = let rec unmatched-locks = Rcu-lock \ domain(matched) @@ -48,3 +54,22 @@ let rcu-rscs = let rec (* Validate nesting *) flag ~empty Rcu-lock \ domain(rcu-rscs) as unbalanced-rcu-locking flag ~empty Rcu-unlock \ range(rcu-rscs) as unbalanced-rcu-locking + +(* Compute matching pairs of nested Srcu-lock and Srcu-unlock *) +let srcu-rscs = let rec + unmatched-locks = Srcu-lock \ domain(matched) + and unmatched-unlocks = Srcu-unlock \ range(matched) + and unmatched = unmatched-locks | unmatched-unlocks + and unmatched-po = ([unmatched] ; po ; [unmatched]) & loc + and unmatched-locks-to-unlocks = + ([unmatched-locks] ; po ; [unmatched-unlocks]) & loc + and matched = matched | (unmatched-locks-to-unlocks \ + (unmatched-po ; unmatched-po)) + in matched + +(* Validate nesting *) +flag ~empty Srcu-lock \ domain(srcu-rscs) as unbalanced-srcu-locking +flag ~empty Srcu-unlock \ range(srcu-rscs) as unbalanced-srcu-locking + +(* Check for use of synchronize_srcu() inside an RCU critical section *) +flag ~empty rcu-rscs & (po ; [Sync-srcu] ; po) as invalid-sleep diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index b8e6197f05af..8dcb37835b61 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat @@ -33,7 +33,7 @@ let mb = ([M] ; fencerel(Mb) ; [M]) | ([M] ; po? ; [LKW] ; fencerel(After-spinlock) ; [M]) | ([M] ; po ; [UL] ; (co | po) ; [LKW] ; fencerel(After-unlock-lock) ; [M]) -let gp = po ; [Sync-rcu] ; po? +let gp = po ; [Sync-rcu | Sync-srcu] ; po? let strong-fence = mb | gp @@ -92,15 +92,18 @@ acyclic pb as propagation (* * Effects of read-side critical sections proceed from the rcu_read_unlock() - * backwards on the one hand, and from the rcu_read_lock() forwards on the - * other hand. + * or srcu_read_unlock() backwards on the one hand, and from the + * rcu_read_lock() or srcu_read_lock() forwards on the other hand. * * In the definition of rcu-fence below, the po term at the left-hand side * of each disjunct and the po? term at the right-hand end have been factored * out. They have been moved into the definitions of rcu-link and rb. + * This was necessary in order to apply the "& loc" tests correctly. *) let rcu-gp = [Sync-rcu] (* Compare with gp *) +let srcu-gp = [Sync-srcu] let rcu-rscsi = rcu-rscs^-1 +let srcu-rscsi = srcu-rscs^-1 (* * The synchronize_rcu() strong fence is special in that it can order not @@ -112,12 +115,19 @@ let rcu-link = po? ; hb* ; pb* ; prop ; po (* * Any sequence containing at least as many grace periods as RCU read-side * critical sections (joined by rcu-link) acts as a generalized strong fence. + * Likewise for SRCU grace periods and read-side critical sections, provided + * the synchronize_srcu() and srcu_read_[un]lock() calls refer to the same + * struct srcu_struct location. *) -let rec rcu-fence = rcu-gp | +let rec rcu-fence = rcu-gp | srcu-gp | (rcu-gp ; rcu-link ; rcu-rscsi) | + ((srcu-gp ; rcu-link ; srcu-rscsi) & loc) | (rcu-rscsi ; rcu-link ; rcu-gp) | + ((srcu-rscsi ; rcu-link ; srcu-gp) & loc) | (rcu-gp ; rcu-link ; rcu-fence ; rcu-link ; rcu-rscsi) | + ((srcu-gp ; rcu-link ; rcu-fence ; rcu-link ; srcu-rscsi) & loc) | (rcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; rcu-gp) | + ((srcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; srcu-gp) & loc) | (rcu-fence ; rcu-link ; rcu-fence) (* rb orders instructions just as pb does *) diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def index b27911cc087d..1d6a120cde14 100644 --- a/tools/memory-model/linux-kernel.def +++ b/tools/memory-model/linux-kernel.def @@ -47,6 +47,11 @@ rcu_read_unlock() { __fence{rcu-unlock}; } synchronize_rcu() { __fence{sync-rcu}; } synchronize_rcu_expedited() { __fence{sync-rcu}; } +// SRCU +srcu_read_lock(X) __srcu{srcu-lock}(X) +srcu_read_unlock(X,Y) { __srcu{srcu-unlock}(X); } +synchronize_srcu(X) { __srcu{sync-srcu}(X); } + // Atomic atomic_read(X) READ_ONCE(*X) atomic_set(X,V) { WRITE_ONCE(*X,V); } -- cgit v1.2.3-59-g8ed1b From ad9fd20b6dadb0cb14551477fcebe0fdf2e697dd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 26 Nov 2018 14:26:43 -0800 Subject: tools/memory-model: Update README for addition of SRCU This commit updates the section on LKMM limitations to no longer say that SRCU is not modeled, but instead describe how LKMM's modeling of SRCU departs from the Linux-kernel implementation. TL;DR: There is no known valid use case that cares about the Linux kernel's ability to have partially overlapping SRCU read-side critical sections. Signed-off-by: Paul E. McKenney Acked-by: Andrea Parri --- tools/memory-model/README | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/tools/memory-model/README b/tools/memory-model/README index 0f2c366518c6..9d7d4f23503f 100644 --- a/tools/memory-model/README +++ b/tools/memory-model/README @@ -221,8 +221,29 @@ The Linux-kernel memory model has the following limitations: additional call_rcu() process to the site of the emulated rcu-barrier(). - e. Sleepable RCU (SRCU) is not modeled. It can be - emulated, but perhaps not simply. + e. Although sleepable RCU (SRCU) is now modeled, there + are some subtle differences between its semantics and + those in the Linux kernel. For example, the kernel + might interpret the following sequence as two partially + overlapping SRCU read-side critical sections: + + 1 r1 = srcu_read_lock(&my_srcu); + 2 do_something_1(); + 3 r2 = srcu_read_lock(&my_srcu); + 4 do_something_2(); + 5 srcu_read_unlock(&my_srcu, r1); + 6 do_something_3(); + 7 srcu_read_unlock(&my_srcu, r2); + + In contrast, LKMM will interpret this as a nested pair of + SRCU read-side critical sections, with the outer critical + section spanning lines 1-7 and the inner critical section + spanning lines 3-5. + + This difference would be more of a concern had anyone + identified a reasonable use case for partially overlapping + SRCU read-side critical sections. For more information, + please see: https://paulmck.livejournal.com/40593.html f. Reader-writer locking is not modeled. It can be emulated in litmus tests using atomic read-modify-write -- cgit v1.2.3-59-g8ed1b From 648e717586f2a832687fe44e2e0afb7a6fdea232 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 11 Dec 2018 11:38:53 -0500 Subject: tools/memory-model: Update Documentation/explanation.txt to include SRCU support The recent commit adding support for SRCU to the Linux Kernel Memory Model ended up changing the names and meanings of several relations. This patch updates the explanation.txt documentation file to reflect those changes. It also revises the statement of the RCU Guarantee to a more accurate form, and it adds a short paragraph mentioning the new support for SRCU. Signed-off-by: Alan Stern Cc: Akira Yokosawa Cc: Andrea Parri Cc: Boqun Feng Cc: Daniel Lustig Cc: David Howells Cc: Jade Alglave Cc: Luc Maranget Cc: Nicholas Piggin Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Will Deacon Signed-off-by: Paul E. McKenney Acked-by: Andrea Parri --- tools/memory-model/Documentation/explanation.txt | 289 ++++++++++++----------- 1 file changed, 152 insertions(+), 137 deletions(-) diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt index 35bff92cc773..68caa9a976d0 100644 --- a/tools/memory-model/Documentation/explanation.txt +++ b/tools/memory-model/Documentation/explanation.txt @@ -27,7 +27,7 @@ Explanation of the Linux-Kernel Memory Consistency Model 19. AND THEN THERE WAS ALPHA 20. THE HAPPENS-BEFORE RELATION: hb 21. THE PROPAGATES-BEFORE RELATION: pb - 22. RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb + 22. RCU RELATIONS: rcu-link, rcu-gp, rcu-rscsi, rcu-fence, and rb 23. LOCKING 24. ODDS AND ENDS @@ -1430,8 +1430,8 @@ they execute means that it cannot have cycles. This requirement is the content of the LKMM's "propagation" axiom. -RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb ----------------------------------------------------- +RCU RELATIONS: rcu-link, rcu-gp, rcu-rscsi, rcu-fence, and rb +------------------------------------------------------------- RCU (Read-Copy-Update) is a powerful synchronization mechanism. It rests on two concepts: grace periods and read-side critical sections. @@ -1446,17 +1446,19 @@ As far as memory models are concerned, RCU's main feature is its Grace-Period Guarantee, which states that a critical section can never span a full grace period. In more detail, the Guarantee says: - If a critical section starts before a grace period then it - must end before the grace period does. In addition, every - store that propagates to the critical section's CPU before the - end of the critical section must propagate to every CPU before - the end of the grace period. + For any critical section C and any grace period G, at least + one of the following statements must hold: - If a critical section ends after a grace period ends then it - must start after the grace period does. In addition, every - store that propagates to the grace period's CPU before the - start of the grace period must propagate to every CPU before - the start of the critical section. +(1) C ends before G does, and in addition, every store that + propagates to C's CPU before the end of C must propagate to + every CPU before G ends. + +(2) G starts before C does, and in addition, every store that + propagates to G's CPU before the start of G must propagate + to every CPU before C starts. + +In particular, it is not possible for a critical section to both start +before and end after a grace period. Here is a simple example of RCU in action: @@ -1483,10 +1485,11 @@ The Grace Period Guarantee tells us that when this code runs, it will never end with r1 = 1 and r2 = 0. The reasoning is as follows. r1 = 1 means that P0's store to x propagated to P1 before P1 called synchronize_rcu(), so P0's critical section must have started before -P1's grace period. On the other hand, r2 = 0 means that P0's store to -y, which occurs before the end of the critical section, did not -propagate to P1 before the end of the grace period, violating the -Guarantee. +P1's grace period, contrary to part (2) of the Guarantee. On the +other hand, r2 = 0 means that P0's store to y, which occurs before the +end of the critical section, did not propagate to P1 before the end of +the grace period, contrary to part (1). Together the results violate +the Guarantee. In the kernel's implementations of RCU, the requirements for stores to propagate to every CPU are fulfilled by placing strong fences at @@ -1504,11 +1507,11 @@ before" or "ends after" a grace period? Some aspects of the meaning are pretty obvious, as in the example above, but the details aren't entirely clear. The LKMM formalizes this notion by means of the rcu-link relation. rcu-link encompasses a very general notion of -"before": Among other things, X ->rcu-link Z includes cases where X -happens-before or is equal to some event Y which is equal to or comes -before Z in the coherence order. When Y = Z this says that X ->rfe Z -implies X ->rcu-link Z. In addition, when Y = X it says that X ->fr Z -and X ->co Z each imply X ->rcu-link Z. +"before": If E and F are RCU fence events (i.e., rcu_read_lock(), +rcu_read_unlock(), or synchronize_rcu()) then among other things, +E ->rcu-link F includes cases where E is po-before some memory-access +event X, F is po-after some memory-access event Y, and we have any of +X ->rfe Y, X ->co Y, or X ->fr Y. The formal definition of the rcu-link relation is more than a little obscure, and we won't give it here. It is closely related to the pb @@ -1516,171 +1519,173 @@ relation, and the details don't matter unless you want to comb through a somewhat lengthy formal proof. Pretty much all you need to know about rcu-link is the information in the preceding paragraph. -The LKMM also defines the gp and rscs relations. They bring grace -periods and read-side critical sections into the picture, in the +The LKMM also defines the rcu-gp and rcu-rscsi relations. They bring +grace periods and read-side critical sections into the picture, in the following way: - E ->gp F means there is a synchronize_rcu() fence event S such - that E ->po S and either S ->po F or S = F. In simple terms, - there is a grace period po-between E and F. + E ->rcu-gp F means that E and F are in fact the same event, + and that event is a synchronize_rcu() fence (i.e., a grace + period). - E ->rscs F means there is a critical section delimited by an - rcu_read_lock() fence L and an rcu_read_unlock() fence U, such - that E ->po U and either L ->po F or L = F. You can think of - this as saying that E and F are in the same critical section - (in fact, it also allows E to be po-before the start of the - critical section and F to be po-after the end). + E ->rcu-rscsi F means that E and F are the rcu_read_unlock() + and rcu_read_lock() fence events delimiting some read-side + critical section. (The 'i' at the end of the name emphasizes + that this relation is "inverted": It links the end of the + critical section to the start.) If we think of the rcu-link relation as standing for an extended -"before", then X ->gp Y ->rcu-link Z says that X executes before a -grace period which ends before Z executes. (In fact it covers more -than this, because it also includes cases where X executes before a -grace period and some store propagates to Z's CPU before Z executes -but doesn't propagate to some other CPU until after the grace period -ends.) Similarly, X ->rscs Y ->rcu-link Z says that X is part of (or -before the start of) a critical section which starts before Z -executes. - -The LKMM goes on to define the rcu-fence relation as a sequence of gp -and rscs links separated by rcu-link links, in which the number of gp -links is >= the number of rscs links. For example: +"before", then X ->rcu-gp Y ->rcu-link Z roughly says that X is a +grace period which ends before Z begins. (In fact it covers more than +this, because it also includes cases where some store propagates to +Z's CPU before Z begins but doesn't propagate to some other CPU until +after X ends.) Similarly, X ->rcu-rscsi Y ->rcu-link Z says that X is +the end of a critical section which starts before Z begins. + +The LKMM goes on to define the rcu-fence relation as a sequence of +rcu-gp and rcu-rscsi links separated by rcu-link links, in which the +number of rcu-gp links is >= the number of rcu-rscsi links. For +example: - X ->gp Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V + X ->rcu-gp Y ->rcu-link Z ->rcu-rscsi T ->rcu-link U ->rcu-gp V would imply that X ->rcu-fence V, because this sequence contains two -gp links and only one rscs link. (It also implies that X ->rcu-fence T -and Z ->rcu-fence V.) On the other hand: +rcu-gp links and one rcu-rscsi link. (It also implies that +X ->rcu-fence T and Z ->rcu-fence V.) On the other hand: - X ->rscs Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V + X ->rcu-rscsi Y ->rcu-link Z ->rcu-rscsi T ->rcu-link U ->rcu-gp V does not imply X ->rcu-fence V, because the sequence contains only -one gp link but two rscs links. +one rcu-gp link but two rcu-rscsi links. The rcu-fence relation is important because the Grace Period Guarantee means that rcu-fence acts kind of like a strong fence. In particular, -if W is a write and we have W ->rcu-fence Z, the Guarantee says that W -will propagate to every CPU before Z executes. +E ->rcu-fence F implies not only that E begins before F ends, but also +that any write po-before E will propagate to every CPU before any +instruction po-after F can execute. (However, it does not imply that +E must execute before F; in fact, each synchronize_rcu() fence event +is linked to itself by rcu-fence as a degenerate case.) To prove this in full generality requires some intellectual effort. We'll consider just a very simple case: - W ->gp X ->rcu-link Y ->rscs Z. + G ->rcu-gp W ->rcu-link Z ->rcu-rscsi F. -This formula means that there is a grace period G and a critical -section C such that: +This formula means that G and W are the same event (a grace period), +and there are events X, Y and a read-side critical section C such that: - 1. W is po-before G; + 1. G = W is po-before or equal to X; - 2. X is equal to or po-after G; + 2. X comes "before" Y in some sense (including rfe, co and fr); - 3. X comes "before" Y in some sense; + 2. Y is po-before Z; - 4. Y is po-before the end of C; + 4. Z is the rcu_read_unlock() event marking the end of C; - 5. Z is equal to or po-after the start of C. + 5. F is the rcu_read_lock() event marking the start of C. -From 2 - 4 we deduce that the grace period G ends before the critical -section C. Then the second part of the Grace Period Guarantee says -not only that G starts before C does, but also that W (which executes -on G's CPU before G starts) must propagate to every CPU before C -starts. In particular, W propagates to every CPU before Z executes -(or finishes executing, in the case where Z is equal to the -rcu_read_lock() fence event which starts C.) This sort of reasoning -can be expanded to handle all the situations covered by rcu-fence. +From 1 - 4 we deduce that the grace period G ends before the critical +section C. Then part (2) of the Grace Period Guarantee says not only +that G starts before C does, but also that any write which executes on +G's CPU before G starts must propagate to every CPU before C starts. +In particular, the write propagates to every CPU before F finishes +executing and hence before any instruction po-after F can execute. +This sort of reasoning can be extended to handle all the situations +covered by rcu-fence. Finally, the LKMM defines the RCU-before (rb) relation in terms of rcu-fence. This is done in essentially the same way as the pb relation was defined in terms of strong-fence. We will omit the -details; the end result is that E ->rb F implies E must execute before -F, just as E ->pb F does (and for much the same reasons). +details; the end result is that E ->rb F implies E must execute +before F, just as E ->pb F does (and for much the same reasons). Putting this all together, the LKMM expresses the Grace Period Guarantee by requiring that the rb relation does not contain a cycle. -Equivalently, this "rcu" axiom requires that there are no events E and -F with E ->rcu-link F ->rcu-fence E. Or to put it a third way, the -axiom requires that there are no cycles consisting of gp and rscs -alternating with rcu-link, where the number of gp links is >= the -number of rscs links. +Equivalently, this "rcu" axiom requires that there are no events E +and F with E ->rcu-link F ->rcu-fence E. Or to put it a third way, +the axiom requires that there are no cycles consisting of rcu-gp and +rcu-rscsi alternating with rcu-link, where the number of rcu-gp links +is >= the number of rcu-rscsi links. Justifying the axiom isn't easy, but it is in fact a valid formalization of the Grace Period Guarantee. We won't attempt to go through the detailed argument, but the following analysis gives a -taste of what is involved. Suppose we have a violation of the first -part of the Guarantee: A critical section starts before a grace -period, and some store propagates to the critical section's CPU before -the end of the critical section but doesn't propagate to some other -CPU until after the end of the grace period. +taste of what is involved. Suppose both parts of the Guarantee are +violated: A critical section starts before a grace period, and some +store propagates to the critical section's CPU before the end of the +critical section but doesn't propagate to some other CPU until after +the end of the grace period. Putting symbols to these ideas, let L and U be the rcu_read_lock() and rcu_read_unlock() fence events delimiting the critical section in question, and let S be the synchronize_rcu() fence event for the grace period. Saying that the critical section starts before S means there -are events E and F where E is po-after L (which marks the start of the -critical section), E is "before" F in the sense of the rcu-link -relation, and F is po-before the grace period S: +are events Q and R where Q is po-after L (which marks the start of the +critical section), Q is "before" R in the sense used by the rcu-link +relation, and R is po-before the grace period S. Thus we have: - L ->po E ->rcu-link F ->po S. + L ->rcu-link S. -Let W be the store mentioned above, let Z come before the end of the +Let W be the store mentioned above, let Y come before the end of the critical section and witness that W propagates to the critical -section's CPU by reading from W, and let Y on some arbitrary CPU be a -witness that W has not propagated to that CPU, where Y happens after +section's CPU by reading from W, and let Z on some arbitrary CPU be a +witness that W has not propagated to that CPU, where Z happens after some event X which is po-after S. Symbolically, this amounts to: - S ->po X ->hb* Y ->fr W ->rf Z ->po U. + S ->po X ->hb* Z ->fr W ->rf Y ->po U. -The fr link from Y to W indicates that W has not propagated to Y's CPU -at the time that Y executes. From this, it can be shown (see the -discussion of the rcu-link relation earlier) that X and Z are related -by rcu-link, yielding: +The fr link from Z to W indicates that W has not propagated to Z's CPU +at the time that Z executes. From this, it can be shown (see the +discussion of the rcu-link relation earlier) that S and U are related +by rcu-link: - S ->po X ->rcu-link Z ->po U. + S ->rcu-link U. -The formulas say that S is po-between F and X, hence F ->gp X. They -also say that Z comes before the end of the critical section and E -comes after its start, hence Z ->rscs E. From all this we obtain: +Since S is a grace period we have S ->rcu-gp S, and since L and U are +the start and end of the critical section C we have U ->rcu-rscsi L. +From this we obtain: - F ->gp X ->rcu-link Z ->rscs E ->rcu-link F, + S ->rcu-gp S ->rcu-link U ->rcu-rscsi L ->rcu-link S, a forbidden cycle. Thus the "rcu" axiom rules out this violation of the Grace Period Guarantee. For something a little more down-to-earth, let's see how the axiom works out in practice. Consider the RCU code example from above, this -time with statement labels added to the memory access instructions: +time with statement labels added: int x, y; P0() { - rcu_read_lock(); - W: WRITE_ONCE(x, 1); - X: WRITE_ONCE(y, 1); - rcu_read_unlock(); + L: rcu_read_lock(); + X: WRITE_ONCE(x, 1); + Y: WRITE_ONCE(y, 1); + U: rcu_read_unlock(); } P1() { int r1, r2; - Y: r1 = READ_ONCE(x); - synchronize_rcu(); - Z: r2 = READ_ONCE(y); + Z: r1 = READ_ONCE(x); + S: synchronize_rcu(); + W: r2 = READ_ONCE(y); } -If r2 = 0 at the end then P0's store at X overwrites the value that -P1's load at Z reads from, so we have Z ->fre X and thus Z ->rcu-link X. -In addition, there is a synchronize_rcu() between Y and Z, so therefore -we have Y ->gp Z. +If r2 = 0 at the end then P0's store at Y overwrites the value that +P1's load at W reads from, so we have W ->fre Y. Since S ->po W and +also Y ->po U, we get S ->rcu-link U. In addition, S ->rcu-gp S +because S is a grace period. -If r1 = 1 at the end then P1's load at Y reads from P0's store at W, -so we have W ->rcu-link Y. In addition, W and X are in the same critical -section, so therefore we have X ->rscs W. +If r1 = 1 at the end then P1's load at Z reads from P0's store at X, +so we have X ->rfe Z. Together with L ->po X and Z ->po S, this +yields L ->rcu-link S. And since L and U are the start and end of a +critical section, we have U ->rcu-rscsi L. -Then X ->rscs W ->rcu-link Y ->gp Z ->rcu-link X is a forbidden cycle, -violating the "rcu" axiom. Hence the outcome is not allowed by the -LKMM, as we would expect. +Then U ->rcu-rscsi L ->rcu-link S ->rcu-gp S ->rcu-link U is a +forbidden cycle, violating the "rcu" axiom. Hence the outcome is not +allowed by the LKMM, as we would expect. For contrast, let's see what can happen in a more complicated example: @@ -1690,51 +1695,52 @@ For contrast, let's see what can happen in a more complicated example: { int r0; - rcu_read_lock(); - W: r0 = READ_ONCE(x); - X: WRITE_ONCE(y, 1); - rcu_read_unlock(); + L0: rcu_read_lock(); + r0 = READ_ONCE(x); + WRITE_ONCE(y, 1); + U0: rcu_read_unlock(); } P1() { int r1; - Y: r1 = READ_ONCE(y); - synchronize_rcu(); - Z: WRITE_ONCE(z, 1); + r1 = READ_ONCE(y); + S1: synchronize_rcu(); + WRITE_ONCE(z, 1); } P2() { int r2; - rcu_read_lock(); - U: r2 = READ_ONCE(z); - V: WRITE_ONCE(x, 1); - rcu_read_unlock(); + L2: rcu_read_lock(); + r2 = READ_ONCE(z); + WRITE_ONCE(x, 1); + U2: rcu_read_unlock(); } If r0 = r1 = r2 = 1 at the end, then similar reasoning to before shows -that W ->rscs X ->rcu-link Y ->gp Z ->rcu-link U ->rscs V ->rcu-link W. -However this cycle is not forbidden, because the sequence of relations -contains fewer instances of gp (one) than of rscs (two). Consequently -the outcome is allowed by the LKMM. The following instruction timing -diagram shows how it might actually occur: +that U0 ->rcu-rscsi L0 ->rcu-link S1 ->rcu-gp S1 ->rcu-link U2 ->rcu-rscsi +L2 ->rcu-link U0. However this cycle is not forbidden, because the +sequence of relations contains fewer instances of rcu-gp (one) than of +rcu-rscsi (two). Consequently the outcome is allowed by the LKMM. +The following instruction timing diagram shows how it might actually +occur: P0 P1 P2 -------------------- -------------------- -------------------- rcu_read_lock() -X: WRITE_ONCE(y, 1) - Y: r1 = READ_ONCE(y) +WRITE_ONCE(y, 1) + r1 = READ_ONCE(y) synchronize_rcu() starts . rcu_read_lock() - . V: WRITE_ONCE(x, 1) -W: r0 = READ_ONCE(x) . + . WRITE_ONCE(x, 1) +r0 = READ_ONCE(x) . rcu_read_unlock() . synchronize_rcu() ends - Z: WRITE_ONCE(z, 1) - U: r2 = READ_ONCE(z) + WRITE_ONCE(z, 1) + r2 = READ_ONCE(z) rcu_read_unlock() This requires P0 and P2 to execute their loads and stores out of @@ -1744,6 +1750,15 @@ section in P0 both starts before P1's grace period does and ends before it does, and the critical section in P2 both starts after P1's grace period does and ends after it does. +Addendum: The LKMM now supports SRCU (Sleepable Read-Copy-Update) in +addition to normal RCU. The ideas involved are much the same as +above, with new relations srcu-gp and srcu-rscsi added to represent +SRCU grace periods and read-side critical sections. There is a +restriction on the srcu-gp and srcu-rscsi links that can appear in an +rcu-fence sequence (the srcu-rscsi links must be paired with srcu-gp +links having the same SRCU domain with proper nesting); the details +are relatively unimportant. + LOCKING ------- -- cgit v1.2.3-59-g8ed1b From 9393998e9ee094f99d18783cc85c489e20f0e0e7 Mon Sep 17 00:00:00 2001 From: Luc Maranget Date: Thu, 27 Dec 2018 16:27:12 +0100 Subject: tools/memory-model: Dynamically check SRCU lock-to-unlock matching This commit checks that the return value of srcu_read_lock() is passed to the matching srcu_read_unlock(), where "matching" is determined by nesting. This check operates as follows: 1. srcu_read_lock() creates an integer token, which is stored into the generated events. 2. srcu_read_unlock() records its second (token) argument into the generated event. 3. A new herd primitive 'different-values' filters out pairs of events with identical values from the relation passed as its argument. 4. The bell file applies the above primitive to the (srcu) read-side-critical-section relation 'srcu-rscs' and flags non-empty results. BEWARE: Works only with herd version 7.51+6 and onwards. Signed-off-by: Luc Maranget Signed-off-by: Paul E. McKenney [ paulmck: Apply Andrea Parri's off-list feedback. ] Acked-by: Alan Stern --- tools/memory-model/linux-kernel.bell | 3 +++ tools/memory-model/linux-kernel.cat | 2 ++ tools/memory-model/linux-kernel.def | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell index 9c42cd9ddcb4..def9131d3d8e 100644 --- a/tools/memory-model/linux-kernel.bell +++ b/tools/memory-model/linux-kernel.bell @@ -73,3 +73,6 @@ flag ~empty Srcu-unlock \ range(srcu-rscs) as unbalanced-srcu-locking (* Check for use of synchronize_srcu() inside an RCU critical section *) flag ~empty rcu-rscs & (po ; [Sync-srcu] ; po) as invalid-sleep + +(* Validate SRCU dynamic match *) +flag ~empty different-values(srcu-rscs) as srcu-bad-nesting diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index 8dcb37835b61..95bf45f1215f 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0+ (* + * Requires herd version 7.51+6 or higher. + * * Copyright (C) 2015 Jade Alglave , * Copyright (C) 2016 Luc Maranget for Inria * Copyright (C) 2017 Alan Stern , diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def index 1d6a120cde14..0c3f0ef486f4 100644 --- a/tools/memory-model/linux-kernel.def +++ b/tools/memory-model/linux-kernel.def @@ -49,7 +49,7 @@ synchronize_rcu_expedited() { __fence{sync-rcu}; } // SRCU srcu_read_lock(X) __srcu{srcu-lock}(X) -srcu_read_unlock(X,Y) { __srcu{srcu-unlock}(X); } +srcu_read_unlock(X,Y) { __srcu{srcu-unlock}(X,Y); } synchronize_srcu(X) { __srcu{sync-srcu}(X); } // Atomic -- cgit v1.2.3-59-g8ed1b From 034fb712a620c84efa78e2889845d5dea95f688f Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Thu, 31 Jan 2019 08:08:40 -0800 Subject: tools/memory-model: Avoid duplicating herdtools versions Currently, herdtools version information appears no fewer than three times in the LKMM source, which is difficult to maintain. This commit therefore places the required version in one place, namely the tools/memory-model/README file. Signed-off-by: Andrea Parri Signed-off-by: Paul E. McKenney Acked-by: Alan Stern --- tools/memory-model/README | 8 ++++++-- tools/memory-model/linux-kernel.cat | 2 -- tools/memory-model/lock.cat | 3 --- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tools/memory-model/README b/tools/memory-model/README index 9d7d4f23503f..2b87f3971548 100644 --- a/tools/memory-model/README +++ b/tools/memory-model/README @@ -20,13 +20,17 @@ that litmus test to be exercised within the Linux kernel. REQUIREMENTS ============ -Version 7.49 of the "herd7" and "klitmus7" tools must be downloaded -separately: +Version 7.52 or higher of the "herd7" and "klitmus7" tools must be +downloaded separately: https://github.com/herd/herdtools7 See "herdtools7/INSTALL.md" for installation instructions. +Note that although these tools usually provide backwards compatibility, +this is not absolutely guaranteed. Therefore, if a later version does +not work, please try using the exact version called out above. + ================== BASIC USAGE: HERD7 diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index 95bf45f1215f..8dcb37835b61 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0+ (* - * Requires herd version 7.51+6 or higher. - * * Copyright (C) 2015 Jade Alglave , * Copyright (C) 2016 Luc Maranget for Inria * Copyright (C) 2017 Alan Stern , diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat index 305ded17e741..a059d1a6d8a2 100644 --- a/tools/memory-model/lock.cat +++ b/tools/memory-model/lock.cat @@ -6,9 +6,6 @@ (* * Generate coherence orders and handle lock operations - * - * Warning: spin_is_locked() crashes herd7 versions strictly before 7.48. - * spin_is_locked() is functional from herd7 version 7.49. *) include "cross.cat" -- cgit v1.2.3-59-g8ed1b From 487ecc460732cab178897e81a7f5ef6183eff143 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 25 Jan 2019 06:55:47 +0900 Subject: sched/Documentation/kokr: Update Korean translation to update wake_up() & co. memory-barrier guarantees Translate this commit to Korean: 7696f9910a9a ("sched/Documentation: Update wake_up() & co. memory-barrier guarantees") Signed-off-by: SeongJae Park Reviewed-by: Yunjae Lee Signed-off-by: Paul E. McKenney --- .../translations/ko_KR/memory-barriers.txt | 43 +++++++++++++--------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt index 7f01fb1c1084..4a6cf4d58cbf 100644 --- a/Documentation/translations/ko_KR/memory-barriers.txt +++ b/Documentation/translations/ko_KR/memory-barriers.txt @@ -2146,33 +2146,40 @@ set_current_state() 는 다음의 것들로 감싸질 수도 있습니다: event_indicated = 1; wake_up_process(event_daemon); -wake_up() 류에 의해 쓰기 메모리 배리어가 내포됩니다. 만약 그것들이 뭔가를 -깨운다면요. 이 배리어는 태스크 상태가 지워지기 전에 수행되므로, 이벤트를 -알리기 위한 STORE 와 태스크 상태를 TASK_RUNNING 으로 설정하는 STORE 사이에 -위치하게 됩니다. +wake_up() 이 무언가를 깨우게 되면, 이 함수는 범용 메모리 배리어를 수행합니다. +이 함수가 아무것도 깨우지 않는다면 메모리 배리어는 수행될 수도, 수행되지 않을 +수도 있습니다; 이 경우에 메모리 배리어를 수행할 거라 오해해선 안됩니다. 이 +배리어는 태스크 상태가 접근되기 전에 수행되는데, 자세히 말하면 이 이벤트를 +알리기 위한 STORE 와 TASK_RUNNING 으로 상태를 쓰는 STORE 사이에 수행됩니다: - CPU 1 CPU 2 + CPU 1 (Sleeper) CPU 2 (Waker) =============================== =============================== set_current_state(); STORE event_indicated smp_store_mb(); wake_up(); - STORE current->state <쓰기 배리어> - <범용 배리어> STORE current->state - LOAD event_indicated + STORE current->state ... + <범용 배리어> <범용 배리어> + LOAD event_indicated if ((LOAD task->state) & TASK_NORMAL) + STORE task->state -한번더 말합니다만, 이 쓰기 메모리 배리어는 이 코드가 정말로 뭔가를 깨울 때에만 -실행됩니다. 이걸 설명하기 위해, X 와 Y 는 모두 0 으로 초기화 되어 있다는 가정 -하에 아래의 이벤트 시퀀스를 생각해 봅시다: +여기서 "task" 는 깨어나지는 쓰레드이고 CPU 1 의 "current" 와 같습니다. + +반복하지만, wake_up() 이 무언가를 정말 깨운다면 범용 메모리 배리어가 수행될 +것이 보장되지만, 그렇지 않다면 그런 보장이 없습니다. 이걸 이해하기 위해, X 와 +Y 는 모두 0 으로 초기화 되어 있다는 가정 하에 아래의 이벤트 시퀀스를 생각해 +봅시다: CPU 1 CPU 2 =============================== =============================== - X = 1; STORE event_indicated + X = 1; Y = 1; smp_mb(); wake_up(); - Y = 1; wait_event(wq, Y == 1); - wake_up(); load from Y sees 1, no memory barrier - load from X might see 0 + LOAD Y LOAD X + +정말로 깨우기가 행해졌다면, 두 로드 중 (최소한) 하나는 1 을 보게 됩니다. +반면에, 실제 깨우기가 행해지지 않았다면, 두 로드 모두 0을 볼 수도 있습니다. -위 예제에서의 경우와 달리 깨우기가 정말로 행해졌다면, CPU 2 의 X 로드는 1 을 -본다고 보장될 수 있을 겁니다. +wake_up_process() 는 항상 범용 메모리 배리어를 수행합니다. 이 배리어 역시 +태스크 상태가 접근되기 전에 수행됩니다. 특히, 앞의 예제 코드에서 wake_up() 이 +wake_up_process() 로 대체된다면 두 로드 중 하나는 1을 볼 것이 보장됩니다. 사용 가능한 깨우기류 함수들로 다음과 같은 것들이 있습니다: @@ -2192,6 +2199,8 @@ wake_up() 류에 의해 쓰기 메모리 배리어가 내포됩니다. 만약 wake_up_poll(); wake_up_process(); +메모리 순서규칙 관점에서, 이 함수들은 모두 wake_up() 과 같거나 보다 강한 순서 +보장을 제공합니다. [!] 잠재우는 코드와 깨우는 코드에 내포되는 메모리 배리어들은 깨우기 전에 이루어진 스토어를 잠재우는 코드가 set_current_state() 를 호출한 후에 행하는 -- cgit v1.2.3-59-g8ed1b From db467147f131b8ab799066b9779addb8603d3baf Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 25 Jan 2019 06:55:48 +0900 Subject: locking/memory-barriers/kokr: Update Korean translation to replace smp_cond_acquire() with smp_cond_load_acquire() Transalte this commit to Korean: 2f359c7ea554 ("locking/memory-barriers: Replace smp_cond_acquire() with smp_cond_load_acquire()") Signed-off-by: SeongJae Park Reviewed-by: Yunjae Lee Signed-off-by: Paul E. McKenney --- Documentation/translations/ko_KR/memory-barriers.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt index 4a6cf4d58cbf..db0b9d8619f1 100644 --- a/Documentation/translations/ko_KR/memory-barriers.txt +++ b/Documentation/translations/ko_KR/memory-barriers.txt @@ -493,10 +493,8 @@ CPU 에게 기대할 수 있는 최소한의 보장사항 몇가지가 있습니 이 타입의 오퍼레이션은 단방향의 투과성 배리어처럼 동작합니다. ACQUIRE 오퍼레이션 뒤의 모든 메모리 오퍼레이션들이 ACQUIRE 오퍼레이션 후에 일어난 것으로 시스템의 나머지 컴포넌트들에 보이게 될 것이 보장됩니다. - LOCK 오퍼레이션과 smp_load_acquire(), smp_cond_acquire() 오퍼레이션도 - ACQUIRE 오퍼레이션에 포함됩니다. smp_cond_acquire() 오퍼레이션은 컨트롤 - 의존성과 smp_rmb() 를 사용해서 ACQUIRE 의 의미적 요구사항(semantic)을 - 충족시킵니다. + LOCK 오퍼레이션과 smp_load_acquire(), smp_cond_load_acquire() 오퍼레이션도 + ACQUIRE 오퍼레이션에 포함됩니다. ACQUIRE 오퍼레이션 앞의 메모리 오퍼레이션들은 ACQUIRE 오퍼레이션 완료 후에 수행된 것처럼 보일 수 있습니다. -- cgit v1.2.3-59-g8ed1b From f1887143f5984f23d2360f2efed6ef481bb41117 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 11 Feb 2019 18:09:43 +0100 Subject: Documentation/atomic_t: Clarify signed vs unsigned Clarify the whole signed vs unsigned issue for atomic_t. There has been enough confusion on this topic to warrant a few explicit words I feel. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Acked-by: Boqun Feng Signed-off-by: Paul E. McKenney --- Documentation/atomic_t.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt index 913396ac5824..dca3fb0554db 100644 --- a/Documentation/atomic_t.txt +++ b/Documentation/atomic_t.txt @@ -56,6 +56,23 @@ Barriers: smp_mb__{before,after}_atomic() +TYPES (signed vs unsigned) +----- + +While atomic_t, atomic_long_t and atomic64_t use int, long and s64 +respectively (for hysterical raisins), the kernel uses -fno-strict-overflow +(which implies -fwrapv) and defines signed overflow to behave like +2s-complement. + +Therefore, an explicitly unsigned variant of the atomic ops is strictly +unnecessary and we can simply cast, there is no UB. + +There was a bug in UBSAN prior to GCC-8 that would generate UB warnings for +signed types. + +With this we also conform to the C/C++ _Atomic behaviour and things like +P1236R1. + SEMANTICS --------- -- cgit v1.2.3-59-g8ed1b From 4fea6ef0b219d66b8a901fea1744745a1ed2f79b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 Jan 2019 14:48:09 -0800 Subject: doc: Remove obsolete RCU update functions from RCU documentation Now that synchronize_rcu_bh, synchronize_rcu_bh_expedited, call_rcu_bh, rcu_barrier_bh, synchronize_sched, synchronize_sched_expedited, call_rcu_sched, rcu_barrier_sched, get_state_synchronize_sched, and cond_synchronize_sched are obsolete, let's remove them from the documentation aside from a small historical section. Signed-off-by: Paul E. McKenney --- .../Design/Data-Structures/Data-Structures.html | 3 +- .../Expedited-Grace-Periods.html | 4 +- .../Memory-Ordering/Tree-RCU-Memory-Ordering.html | 5 +- Documentation/RCU/NMI-RCU.txt | 13 ++-- Documentation/RCU/UP.txt | 6 +- Documentation/RCU/checklist.txt | 76 ++++++++++------------ Documentation/RCU/rcu.txt | 8 +-- Documentation/RCU/rcubarrier.txt | 27 ++++---- 8 files changed, 66 insertions(+), 76 deletions(-) diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html index 18f179807563..c30c1957c7e6 100644 --- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html +++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html @@ -155,8 +155,7 @@ keeping lock contention under control at all tree levels regardless of the level of loading on the system.

RCU updaters wait for normal grace periods by registering -RCU callbacks, either directly via call_rcu() and -friends (namely call_rcu_bh() and call_rcu_sched()), +RCU callbacks, either directly via call_rcu() or indirectly via synchronize_rcu() and friends. RCU callbacks are represented by rcu_head structures, which are queued on rcu_data structures while they are diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html index 19e7a5fb6b73..57300db4b5ff 100644 --- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html +++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html @@ -56,6 +56,7 @@ sections. RCU-preempt Expedited Grace Periods

+CONFIG_PREEMPT=y kernels implement RCU-preempt. The overall flow of the handling of a given CPU by an RCU-preempt expedited grace period is shown in the following diagram: @@ -139,6 +140,7 @@ or offline, among other things. RCU-sched Expedited Grace Periods

+CONFIG_PREEMPT=n kernels implement RCU-sched. The overall flow of the handling of a given CPU by an RCU-sched expedited grace period is shown in the following diagram: @@ -146,7 +148,7 @@ expedited grace period is shown in the following diagram:

As with RCU-preempt, RCU-sched's -synchronize_sched_expedited() ignores offline and +synchronize_rcu_expedited() ignores offline and idle CPUs, again because they are in remotely detectable quiescent states. However, because the diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html index 8d21af02b1f0..c64f8d26609f 100644 --- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html +++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html @@ -34,12 +34,11 @@ Similarly, any code that happens before the beginning of a given RCU grace period is guaranteed to see the effects of all accesses following the end of that grace period that are within RCU read-side critical sections. -

This guarantee is particularly pervasive for synchronize_sched(), -for which RCU-sched read-side critical sections include any region +

Note well that RCU-sched read-side critical sections include any region of code for which preemption is disabled. Given that each individual machine instruction can be thought of as an extremely small region of preemption-disabled code, one can think of -synchronize_sched() as smp_mb() on steroids. +synchronize_rcu() as smp_mb() on steroids.

RCU updaters use this guarantee by splitting their updates into two phases, one of which is executed before the grace period and diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt index 687777f83b23..881353fd5bff 100644 --- a/Documentation/RCU/NMI-RCU.txt +++ b/Documentation/RCU/NMI-RCU.txt @@ -81,18 +81,19 @@ currently executing on some other CPU. We therefore cannot free up any data structures used by the old NMI handler until execution of it completes on all other CPUs. -One way to accomplish this is via synchronize_sched(), perhaps as +One way to accomplish this is via synchronize_rcu(), perhaps as follows: unset_nmi_callback(); - synchronize_sched(); + synchronize_rcu(); kfree(my_nmi_data); -This works because synchronize_sched() blocks until all CPUs complete -any preemption-disabled segments of code that they were executing. -Since NMI handlers disable preemption, synchronize_sched() is guaranteed +This works because (as of v4.20) synchronize_rcu() blocks until all +CPUs complete any preemption-disabled segments of code that they were +executing. +Since NMI handlers disable preemption, synchronize_rcu() is guaranteed not to return until all ongoing NMI handlers exit. It is therefore safe -to free up the handler's data as soon as synchronize_sched() returns. +to free up the handler's data as soon as synchronize_rcu() returns. Important note: for this to work, the architecture in question must invoke nmi_enter() and nmi_exit() on NMI entry and exit, respectively. diff --git a/Documentation/RCU/UP.txt b/Documentation/RCU/UP.txt index 90ec5341ee98..53bde717017b 100644 --- a/Documentation/RCU/UP.txt +++ b/Documentation/RCU/UP.txt @@ -86,10 +86,8 @@ even on a UP system. So do not do it! Even on a UP system, the RCU infrastructure -must- respect grace periods, and -must- invoke callbacks from a known environment in which no locks are held. -It -is- safe for synchronize_sched() and synchronize_rcu_bh() to return -immediately on an UP system. It is also safe for synchronize_rcu() -to return immediately on UP systems, except when running preemptable -RCU. +Note that it -is- safe for synchronize_rcu() to return immediately on +UP systems, including !PREEMPT SMP builds running on UP systems. Quick Quiz #3: Why can't synchronize_rcu() return immediately on UP systems running preemptable RCU? diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 6f469864d9f5..fcc59fea5cd4 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -182,16 +182,13 @@ over a rather long period of time, but improvements are always welcome! when publicizing a pointer to a structure that can be traversed by an RCU read-side critical section. -5. If call_rcu(), or a related primitive such as call_rcu_bh(), - call_rcu_sched(), or call_srcu() is used, the callback function - will be called from softirq context. In particular, it cannot - block. +5. If call_rcu() or call_srcu() is used, the callback function will + be called from softirq context. In particular, it cannot block. -6. Since synchronize_rcu() can block, it cannot be called from - any sort of irq context. The same rule applies for - synchronize_rcu_bh(), synchronize_sched(), synchronize_srcu(), - synchronize_rcu_expedited(), synchronize_rcu_bh_expedited(), - synchronize_sched_expedite(), and synchronize_srcu_expedited(). +6. Since synchronize_rcu() can block, it cannot be called + from any sort of irq context. The same rule applies + for synchronize_srcu(), synchronize_rcu_expedited(), and + synchronize_srcu_expedited(). The expedited forms of these primitives have the same semantics as the non-expedited forms, but expediting is both expensive and @@ -212,20 +209,20 @@ over a rather long period of time, but improvements are always welcome! of the system, especially to real-time workloads running on the rest of the system. -7. If the updater uses call_rcu() or synchronize_rcu(), then the - corresponding readers must use rcu_read_lock() and - rcu_read_unlock(). If the updater uses call_rcu_bh() or - synchronize_rcu_bh(), then the corresponding readers must - use rcu_read_lock_bh() and rcu_read_unlock_bh(). If the - updater uses call_rcu_sched() or synchronize_sched(), then - the corresponding readers must disable preemption, possibly - by calling rcu_read_lock_sched() and rcu_read_unlock_sched(). - If the updater uses synchronize_srcu() or call_srcu(), then - the corresponding readers must use srcu_read_lock() and +7. As of v4.20, a given kernel implements only one RCU flavor, + which is RCU-sched for PREEMPT=n and RCU-preempt for PREEMPT=y. + If the updater uses call_rcu() or synchronize_rcu(), + then the corresponding readers my use rcu_read_lock() and + rcu_read_unlock(), rcu_read_lock_bh() and rcu_read_unlock_bh(), + or any pair of primitives that disables and re-enables preemption, + for example, rcu_read_lock_sched() and rcu_read_unlock_sched(). + If the updater uses synchronize_srcu() or call_srcu(), + then the corresponding readers must use srcu_read_lock() and srcu_read_unlock(), and with the same srcu_struct. The rules for the expedited primitives are the same as for their non-expedited counterparts. Mixing things up will result in confusion and - broken kernels. + broken kernels, and has even resulted in an exploitable security + issue. One exception to this rule: rcu_read_lock() and rcu_read_unlock() may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh() @@ -288,8 +285,7 @@ over a rather long period of time, but improvements are always welcome! d. Periodically invoke synchronize_rcu(), permitting a limited number of updates per grace period. - The same cautions apply to call_rcu_bh(), call_rcu_sched(), - call_srcu(), and kfree_rcu(). + The same cautions apply to call_srcu() and kfree_rcu(). Note that although these primitives do take action to avoid memory exhaustion when any given CPU has too many callbacks, a determined @@ -336,12 +332,12 @@ over a rather long period of time, but improvements are always welcome! to safely access and/or modify that data structure. RCU callbacks are -usually- executed on the same CPU that executed - the corresponding call_rcu(), call_rcu_bh(), or call_rcu_sched(), - but are by -no- means guaranteed to be. For example, if a given - CPU goes offline while having an RCU callback pending, then that - RCU callback will execute on some surviving CPU. (If this was - not the case, a self-spawning RCU callback would prevent the - victim CPU from ever going offline.) + the corresponding call_rcu() or call_srcu(). but are by -no- + means guaranteed to be. For example, if a given CPU goes offline + while having an RCU callback pending, then that RCU callback + will execute on some surviving CPU. (If this was not the case, + a self-spawning RCU callback would prevent the victim CPU from + ever going offline.) 13. Unlike other forms of RCU, it -is- permissible to block in an SRCU read-side critical section (demarked by srcu_read_lock() @@ -381,8 +377,7 @@ over a rather long period of time, but improvements are always welcome! SRCU's expedited primitive (synchronize_srcu_expedited()) never sends IPIs to other CPUs, so it is easier on - real-time workloads than is synchronize_rcu_expedited(), - synchronize_rcu_bh_expedited() or synchronize_sched_expedited(). + real-time workloads than is synchronize_rcu_expedited(). Note that rcu_dereference() and rcu_assign_pointer() relate to SRCU just as they do to other forms of RCU. @@ -428,22 +423,19 @@ over a rather long period of time, but improvements are always welcome! These debugging aids can help you find problems that are otherwise extremely difficult to spot. -17. If you register a callback using call_rcu(), call_rcu_bh(), - call_rcu_sched(), or call_srcu(), and pass in a function defined - within a loadable module, then it in necessary to wait for - all pending callbacks to be invoked after the last invocation - and before unloading that module. Note that it is absolutely - -not- sufficient to wait for a grace period! The current (say) - synchronize_rcu() implementation waits only for all previous - callbacks registered on the CPU that synchronize_rcu() is running - on, but it is -not- guaranteed to wait for callbacks registered - on other CPUs. +17. If you register a callback using call_rcu() or call_srcu(), + and pass in a function defined within a loadable module, + then it in necessary to wait for all pending callbacks to + be invoked after the last invocation and before unloading + that module. Note that it is absolutely -not- sufficient to + wait for a grace period! The current (say) synchronize_rcu() + implementation waits only for all previous callbacks registered + on the CPU that synchronize_rcu() is running on, but it is -not- + guaranteed to wait for callbacks registered on other CPUs. You instead need to use one of the barrier functions: o call_rcu() -> rcu_barrier() - o call_rcu_bh() -> rcu_barrier() - o call_rcu_sched() -> rcu_barrier() o call_srcu() -> srcu_barrier() However, these barrier functions are absolutely -not- guaranteed diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt index 721b3e426515..c818cf65c5a9 100644 --- a/Documentation/RCU/rcu.txt +++ b/Documentation/RCU/rcu.txt @@ -52,10 +52,10 @@ o If I am running on a uniprocessor kernel, which can only do one o How can I see where RCU is currently used in the Linux kernel? Search for "rcu_read_lock", "rcu_read_unlock", "call_rcu", - "rcu_read_lock_bh", "rcu_read_unlock_bh", "call_rcu_bh", - "srcu_read_lock", "srcu_read_unlock", "synchronize_rcu", - "synchronize_net", "synchronize_srcu", and the other RCU - primitives. Or grab one of the cscope databases from: + "rcu_read_lock_bh", "rcu_read_unlock_bh", "srcu_read_lock", + "srcu_read_unlock", "synchronize_rcu", "synchronize_net", + "synchronize_srcu", and the other RCU primitives. Or grab one + of the cscope databases from: http://www.rdrop.com/users/paulmck/RCU/linuxusage/rculocktab.html diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt index 5d7759071a3e..a2782df69732 100644 --- a/Documentation/RCU/rcubarrier.txt +++ b/Documentation/RCU/rcubarrier.txt @@ -83,16 +83,15 @@ Pseudo-code using rcu_barrier() is as follows: 2. Execute rcu_barrier(). 3. Allow the module to be unloaded. -There are also rcu_barrier_bh(), rcu_barrier_sched(), and srcu_barrier() -functions for the other flavors of RCU, and you of course must match -the flavor of rcu_barrier() with that of call_rcu(). If your module -uses multiple flavors of call_rcu(), then it must also use multiple +There is also an srcu_barrier() function for SRCU, and you of course +must match the flavor of rcu_barrier() with that of call_rcu(). If your +module uses multiple flavors of call_rcu(), then it must also use multiple flavors of rcu_barrier() when unloading that module. For example, if -it uses call_rcu_bh(), call_srcu() on srcu_struct_1, and call_srcu() on +it uses call_rcu(), call_srcu() on srcu_struct_1, and call_srcu() on srcu_struct_2(), then the following three lines of code will be required when unloading: - 1 rcu_barrier_bh(); + 1 rcu_barrier(); 2 srcu_barrier(&srcu_struct_1); 3 srcu_barrier(&srcu_struct_2); @@ -185,12 +184,12 @@ module invokes call_rcu() from timers, you will need to first cancel all the timers, and only then invoke rcu_barrier() to wait for any remaining RCU callbacks to complete. -Of course, if you module uses call_rcu_bh(), you will need to invoke -rcu_barrier_bh() before unloading. Similarly, if your module uses -call_rcu_sched(), you will need to invoke rcu_barrier_sched() before -unloading. If your module uses call_rcu(), call_rcu_bh(), -and- -call_rcu_sched(), then you will need to invoke each of rcu_barrier(), -rcu_barrier_bh(), and rcu_barrier_sched(). +Of course, if you module uses call_rcu(), you will need to invoke +rcu_barrier() before unloading. Similarly, if your module uses +call_srcu(), you will need to invoke srcu_barrier() before unloading, +and on the same srcu_struct structure. If your module uses call_rcu() +-and- call_srcu(), then you will need to invoke rcu_barrier() -and- +srcu_barrier(). Implementing rcu_barrier() @@ -223,8 +222,8 @@ shown below. Note that the final "1" in on_each_cpu()'s argument list ensures that all the calls to rcu_barrier_func() will have completed before on_each_cpu() returns. Line 9 then waits for the completion. -This code was rewritten in 2008 to support rcu_barrier_bh() and -rcu_barrier_sched() in addition to the original rcu_barrier(). +This code was rewritten in 2008 and several times thereafter, but this +still gives the general idea. The rcu_barrier_func() runs on each CPU, where it invokes call_rcu() to post an RCU callback, as follows: -- cgit v1.2.3-59-g8ed1b From 0fa201d1618e0b39a60b1045aa1b323f9d351721 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Tue, 29 Jan 2019 15:05:46 -0700 Subject: doc: Repair some whitespace damage A diagram in whatisRCU.txt has space character before tabs. This commit therefore makes this diagram consistent with elsewhere in the document: Use one leading tab, followed by spaces for any additional whitespace required. Signed-off-by: Tycho Andersen Signed-off-by: Paul E. McKenney --- Documentation/RCU/whatisRCU.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 1ace20815bb1..981651a8b65d 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -310,7 +310,7 @@ reader, updater, and reclaimer. rcu_assign_pointer() - +--------+ + +--------+ +---------------------->| reader |---------+ | +--------+ | | | | @@ -318,12 +318,12 @@ reader, updater, and reclaimer. | | | rcu_read_lock() | | | rcu_read_unlock() | rcu_dereference() | | - +---------+ | | - | updater |<---------------------+ | - +---------+ V + +---------+ | | + | updater |<----------------+ | + +---------+ V | +-----------+ +----------------------------------->| reclaimer | - +-----------+ + +-----------+ Defer: synchronize_rcu() & call_rcu() -- cgit v1.2.3-59-g8ed1b From d1b493bbe101d85c86970f1b6c0401d4c1c473ed Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Feb 2019 07:51:24 -0800 Subject: doc: Describe choice of rcu_dereference() APIs and __rcu usage Reported-by: Andrew Morton Signed-off-by: Paul E. McKenney --- Documentation/RCU/rcu_dereference.txt | 103 ++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/Documentation/RCU/rcu_dereference.txt b/Documentation/RCU/rcu_dereference.txt index ab96227bad42..bf699e8cfc75 100644 --- a/Documentation/RCU/rcu_dereference.txt +++ b/Documentation/RCU/rcu_dereference.txt @@ -351,3 +351,106 @@ garbage values. In short, rcu_dereference() is -not- optional when you are going to dereference the resulting pointer. + + +WHICH MEMBER OF THE rcu_dereference() FAMILY SHOULD YOU USE? + +First, please avoid using rcu_dereference_raw() and also please avoid +using rcu_dereference_check() and rcu_dereference_protected() with a +second argument with a constant value of 1 (or true, for that matter). +With that caution out of the way, here is some guidance for which +member of the rcu_dereference() to use in various situations: + +1. If the access needs to be within an RCU read-side critical + section, use rcu_dereference(). With the new consolidated + RCU flavors, an RCU read-side critical section is entered + using rcu_read_lock(), anything that disables bottom halves, + anything that disables interrupts, or anything that disables + preemption. + +2. If the access might be within an RCU read-side critical section + on the one hand, or protected by (say) my_lock on the other, + use rcu_dereference_check(), for example: + + p1 = rcu_dereference_check(p->rcu_protected_pointer, + lockdep_is_held(&my_lock)); + + +3. If the access might be within an RCU read-side critical section + on the one hand, or protected by either my_lock or your_lock on + the other, again use rcu_dereference_check(), for example: + + p1 = rcu_dereference_check(p->rcu_protected_pointer, + lockdep_is_held(&my_lock) || + lockdep_is_held(&your_lock)); + +4. If the access is on the update side, so that it is always protected + by my_lock, use rcu_dereference_protected(): + + p1 = rcu_dereference_protected(p->rcu_protected_pointer, + lockdep_is_held(&my_lock)); + + This can be extended to handle multiple locks as in #3 above, + and both can be extended to check other conditions as well. + +5. If the protection is supplied by the caller, and is thus unknown + to this code, that is the rare case when rcu_dereference_raw() + is appropriate. In addition, rcu_dereference_raw() might be + appropriate when the lockdep expression would be excessively + complex, except that a better approach in that case might be to + take a long hard look at your synchronization design. Still, + there are data-locking cases where any one of a very large number + of locks or reference counters suffices to protect the pointer, + so rcu_dereference_raw() does have its place. + + However, its place is probably quite a bit smaller than one + might expect given the number of uses in the current kernel. + Ditto for its synonym, rcu_dereference_check( ... , 1), and + its close relative, rcu_dereference_protected(... , 1). + + +SPARSE CHECKING OF RCU-PROTECTED POINTERS + +The sparse static-analysis tool checks for direct access to RCU-protected +pointers, which can result in "interesting" bugs due to compiler +optimizations involving invented loads and perhaps also load tearing. +For example, suppose someone mistakenly does something like this: + + p = q->rcu_protected_pointer; + do_something_with(p->a); + do_something_else_with(p->b); + +If register pressure is high, the compiler might optimize "p" out +of existence, transforming the code to something like this: + + do_something_with(q->rcu_protected_pointer->a); + do_something_else_with(q->rcu_protected_pointer->b); + +This could fatally disappoint your code if q->rcu_protected_pointer +changed in the meantime. Nor is this a theoretical problem: Exactly +this sort of bug cost Paul E. McKenney (and several of his innocent +colleagues) a three-day weekend back in the early 1990s. + +Load tearing could of course result in dereferencing a mashup of a pair +of pointers, which also might fatally disappoint your code. + +These problems could have been avoided simply by making the code instead +read as follows: + + p = rcu_dereference(q->rcu_protected_pointer); + do_something_with(p->a); + do_something_else_with(p->b); + +Unfortunately, these sorts of bugs can be extremely hard to spot during +review. This is where the sparse tool comes into play, along with the +"__rcu" marker. If you mark a pointer declaration, whether in a structure +or as a formal parameter, with "__rcu", which tells sparse to complain if +this pointer is accessed directly. It will also cause sparse to complain +if a pointer not marked with "__rcu" is accessed using rcu_dereference() +and friends. For example, ->rcu_protected_pointer might be declared as +follows: + + struct foo __rcu *rcu_protected_pointer; + +Use of "__rcu" is opt-in. If you choose not to use it, then you should +ignore the sparse warnings. -- cgit v1.2.3-59-g8ed1b From 884b429ae66758bd2e006dc5fd56757e0fde896d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 6 Mar 2019 11:24:35 -0800 Subject: doc: Fix typos and otherwise modernize checklist.txt This commit fixes some issues with Documentation/RCU/checklist.txt. Signed-off-by: Paul E. McKenney --- Documentation/RCU/checklist.txt | 43 ++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index fcc59fea5cd4..e98ff261a438 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -318,7 +318,7 @@ over a rather long period of time, but improvements are always welcome! 11. Any lock acquired by an RCU callback must be acquired elsewhere with softirq disabled, e.g., via spin_lock_irqsave(), - spin_lock_bh(), etc. Failing to disable irq on a given + spin_lock_bh(), etc. Failing to disable softirq on a given acquisition of that lock will result in deadlock as soon as the RCU softirq handler happens to run your RCU callback while interrupting that acquisition's critical section. @@ -331,13 +331,16 @@ over a rather long period of time, but improvements are always welcome! must use whatever locking or other synchronization is required to safely access and/or modify that data structure. - RCU callbacks are -usually- executed on the same CPU that executed - the corresponding call_rcu() or call_srcu(). but are by -no- - means guaranteed to be. For example, if a given CPU goes offline - while having an RCU callback pending, then that RCU callback - will execute on some surviving CPU. (If this was not the case, - a self-spawning RCU callback would prevent the victim CPU from - ever going offline.) + Do not assume that RCU callbacks will be executed on the same + CPU that executed the corresponding call_rcu() or call_srcu(). + For example, if a given CPU goes offline while having an RCU + callback pending, then that RCU callback will execute on some + surviving CPU. (If this was not the case, a self-spawning RCU + callback would prevent the victim CPU from ever going offline.) + Furthermore, CPUs designated by rcu_nocbs= might well -always- + have their RCU callbacks executed on some other CPUs, in fact, + for some real-time workloads, this is the whole point of using + the rcu_nocbs= kernel boot parameter. 13. Unlike other forms of RCU, it -is- permissible to block in an SRCU read-side critical section (demarked by srcu_read_lock() @@ -379,8 +382,9 @@ over a rather long period of time, but improvements are always welcome! never sends IPIs to other CPUs, so it is easier on real-time workloads than is synchronize_rcu_expedited(). - Note that rcu_dereference() and rcu_assign_pointer() relate to - SRCU just as they do to other forms of RCU. + Note that rcu_assign_pointer() relates to SRCU just as it does to + other forms of RCU, but instead of rcu_dereference() you should + use srcu_dereference() in order to avoid lockdep splats. 14. The whole point of call_rcu(), synchronize_rcu(), and friends is to wait until all pre-existing readers have finished before @@ -400,6 +404,9 @@ over a rather long period of time, but improvements are always welcome! read-side critical sections. It is the responsibility of the RCU update-side primitives to deal with this. + For SRCU readers, you can use smp_mb__after_srcu_read_unlock() + immediately after an srcu_read_unlock() to get a full barrier. + 16. Use CONFIG_PROVE_LOCKING, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the __rcu sparse checks to validate your RCU code. These can help find problems as follows: @@ -423,15 +430,15 @@ over a rather long period of time, but improvements are always welcome! These debugging aids can help you find problems that are otherwise extremely difficult to spot. -17. If you register a callback using call_rcu() or call_srcu(), - and pass in a function defined within a loadable module, - then it in necessary to wait for all pending callbacks to - be invoked after the last invocation and before unloading - that module. Note that it is absolutely -not- sufficient to - wait for a grace period! The current (say) synchronize_rcu() - implementation waits only for all previous callbacks registered - on the CPU that synchronize_rcu() is running on, but it is -not- +17. If you register a callback using call_rcu() or call_srcu(), and + pass in a function defined within a loadable module, then it in + necessary to wait for all pending callbacks to be invoked after + the last invocation and before unloading that module. Note that + it is absolutely -not- sufficient to wait for a grace period! + The current (say) synchronize_rcu() implementation is -not- guaranteed to wait for callbacks registered on other CPUs. + Or even on the current CPU if that CPU recently went offline + and came back online. You instead need to use one of the barrier functions: -- cgit v1.2.3-59-g8ed1b From e85e6a21b2b5f31148cc3f2e785262b37c3e1ec7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 10 Jan 2019 15:30:15 -0800 Subject: rcu: Unconditionally expedite during suspend/hibernate The rcu_pm_notify() function refuses to switch to/from expedited grace periods on systems with more than 256 CPUs due to the serialized initialization of expedited grace periods. However, expedited grace periods are now initialized in parallel, removing this concern. This commit therefore removes the checks from rcu_pm_notify(), so that expedited grace periods are used unconditionally during suspend/resume and hibernate/wake operations. As always, real-time workloads wishing to completely avoid expedited grace periods can use the rcupdate.rcu_normal= kernel parameter. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index acd6ccf56faf..95e3250b7b6e 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3559,13 +3559,11 @@ static int rcu_pm_notify(struct notifier_block *self, switch (action) { case PM_HIBERNATION_PREPARE: case PM_SUSPEND_PREPARE: - if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */ - rcu_expedite_gp(); + rcu_expedite_gp(); break; case PM_POST_HIBERNATION: case PM_POST_SUSPEND: - if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */ - rcu_unexpedite_gp(); + rcu_unexpedite_gp(); break; default: break; -- cgit v1.2.3-59-g8ed1b From 671a63517cf983ad8eaa324167165cef245ab744 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sat, 19 Jan 2019 11:14:18 -0500 Subject: rcu: Avoid unnecessary softirq when system is idle When there are no callbacks pending on an idle system, I noticed that RCU softirq is continuously firing. During this the cpu_no_qs is set to false, and core_needs_qs is set to true indefinitely. This causes rcu_process_callbacks to be repeatedly called, even though the node corresponding to the CPU has that CPU's mask bit cleared and the system is idle. I believe the race is when such mask clearing is done during idle CPU scan of the quiescent state forcing stage in the kthread instead of the softirq. Since the rnp mask is cleared, but the flags on the CPU's rdp are not cleared, the CPU thinks it still needs to report to core RCU. Cure this by clearing the core_needs_qs flag when the CPU detects that its node is already updated which will avoid the unwanted softirq raises to the benefit of real-time systems. Test: Ran rcutorture for various tree RCU configs. Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 95e3250b7b6e..2f78a115d34c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2296,6 +2296,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp) } mask = rdp->grpmask; if ((rnp->qsmask & mask) == 0) { + rdp->core_needs_qs = false; raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } else { rdp->core_needs_qs = false; -- cgit v1.2.3-59-g8ed1b From 18d7e40679ef574e428f893101be1c0035e95ee3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 24 Jan 2019 21:14:37 +0300 Subject: rcu: rcu_qs -- Use raise_softirq_irqoff to not save irqs twice The rcu_qs is disabling IRQs by self so no need to do the same in raise_softirq but instead we can save some cycles using raise_softirq_irqoff directly. CC: Paul E. McKenney Signed-off-by: Cyrill Gorcunov Signed-off-by: Paul E. McKenney --- kernel/rcu/tiny.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 911bd9076d43..477b4eb44af5 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -52,7 +52,7 @@ void rcu_qs(void) local_irq_save(flags); if (rcu_ctrlblk.donetail != rcu_ctrlblk.curtail) { rcu_ctrlblk.donetail = rcu_ctrlblk.curtail; - raise_softirq(RCU_SOFTIRQ); + raise_softirq_irqoff(RCU_SOFTIRQ); } local_irq_restore(flags); } -- cgit v1.2.3-59-g8ed1b From 884157cef0acf05648fe921d80c680afababb428 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 11 Feb 2019 07:21:29 -0800 Subject: rcu: Make exit_rcu() handle non-preempted RCU readers The purpose of exit_rcu() is to handle cases where buggy code causes a task to exit within an RCU read-side critical section. It currently does that in the case where said RCU read-side critical section was preempted at least once, but fails to handle cases where preemption did not occur. This case needs to be handled because otherwise the final context switch away from the exiting task will incorrectly behave as if task exit were instead a preemption of an RCU read-side critical section, and will therefore queue the exiting task. The exiting task will have exited, and thus won't ever execute rcu_read_unlock(), which means that it will remain queued forever, blocking all subsequent grace periods, and eventually resulting in OOM. Although this is arguably better than letting grace periods proceed and having a later rcu_read_unlock() access the now-freed task structure that once belonged to the exiting tasks, it would obviously be better to correctly handle this case. This commit therefore sets ->rcu_read_lock_nesting to 1 in that case, so that the subsequence call to __rcu_read_unlock() causes the exiting task to exit its dangling RCU read-side critical section. Note that deferred quiescent states need not be considered. The reason is that removing the task from the ->blkd_tasks[] list in the call to rcu_preempt_deferred_qs() handles the per-task component of any deferred quiescent state, and all other components of any deferred quiescent state are associated with the CPU, which isn't going anywhere until some later CPU-hotplug operation, which will report any remaining deferred quiescent states from within the rcu_report_dead() function. Note also that negative values of ->rcu_read_lock_nesting need not be considered. First, these won't show up in exit_rcu() unless there is a serious bug in RCU, and second, setting ->rcu_read_lock_nesting sets the state so that the RCU read-side critical section will be exited normally. Again, this code has no effect unless there has been some prior bug that prevents a task from leaving an RCU read-side critical section before exiting. Furthermore, there have been no reports of the bug fixed by this commit appearing in production. This commit is therefore absolutely -not- recommended for backporting to -stable. Reported-by: ABHISHEK DUBEY Reported-by: BHARATH Y MOURYA Reported-by: Aravinda Prasad Signed-off-by: Paul E. McKenney Tested-by: ABHISHEK DUBEY --- kernel/rcu/tree_plugin.h | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 97dba50f6fb2..d408661d5fb7 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -804,19 +804,25 @@ static void rcu_flavor_sched_clock_irq(int user) /* * Check for a task exiting while in a preemptible-RCU read-side - * critical section, clean up if so. No need to issue warnings, - * as debug_check_no_locks_held() already does this if lockdep - * is enabled. + * critical section, clean up if so. No need to issue warnings, as + * debug_check_no_locks_held() already does this if lockdep is enabled. + * Besides, if this function does anything other than just immediately + * return, there was a bug of some sort. Spewing warnings from this + * function is like as not to simply obscure important prior warnings. */ void exit_rcu(void) { struct task_struct *t = current; - if (likely(list_empty(¤t->rcu_node_entry))) + if (unlikely(!list_empty(¤t->rcu_node_entry))) { + t->rcu_read_lock_nesting = 1; + barrier(); + t->rcu_read_unlock_special.b.blocked = true; + } else if (unlikely(t->rcu_read_lock_nesting)) { + t->rcu_read_lock_nesting = 1; + } else { return; - t->rcu_read_lock_nesting = 1; - barrier(); - t->rcu_read_unlock_special.b.blocked = true; + } __rcu_read_unlock(); rcu_preempt_deferred_qs(current); } -- cgit v1.2.3-59-g8ed1b From 3ffe3d1adc0b6cfb9b24db9995a537bb0aa30a8b Mon Sep 17 00:00:00 2001 From: Liu Song Date: Thu, 21 Feb 2019 22:13:27 +0800 Subject: rcu: Set rcutree.kthread_prio sysfs access to read-only The rcutree.kthread_prio kernel-boot parameter is used to set the priority for boost (rcub), per-CPU (rcuc), and grace-period (rcu_preempt or rcu_sched) kthreads. It is also used by rcutorture to check whether it is possible to meaningfully test RCU priority boosting. However, all of these cases will either ignore or be confused by any post-boot changes to rcutree.kthread_prio. Note that the user really can change the priorities of all of these kthreads using chrt, given sufficient privileges. Therefore, the read-write nature of sysfs access to rcutree.kthread_prio is thus at best an attractive nuisance. This commit therefore changes sysfs access to rcutree.kthread_prio to be read-only. Signed-off-by: Liu Song Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 2f78a115d34c..296131450414 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -149,7 +149,7 @@ static void sync_sched_exp_online_cleanup(int cpu); /* rcuc/rcub kthread realtime priority */ static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0; -module_param(kthread_prio, int, 0644); +module_param(kthread_prio, int, 0444); /* Delay in jiffies for grace-period initialization delays, debug only. */ -- cgit v1.2.3-59-g8ed1b From bdead419fb1ddd9df99be9f98b40631ae697a623 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 28 Feb 2019 12:31:06 -0800 Subject: MAINTAINERS: RCU now has its own email list This commit makes rcu@vger.kernel.org be the official list for RCU-related topics. Signed-off-by: Paul E. McKenney --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index e17ebf70b548..1924b52937a6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13031,7 +13031,7 @@ M: Josh Triplett R: Steven Rostedt R: Mathieu Desnoyers R: Lai Jiangshan -L: linux-kernel@vger.kernel.org +L: rcu@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git F: tools/testing/selftests/rcutorture @@ -13079,7 +13079,7 @@ R: Steven Rostedt R: Mathieu Desnoyers R: Lai Jiangshan R: Joel Fernandes -L: linux-kernel@vger.kernel.org +L: rcu@vger.kernel.org W: http://www.rdrop.com/users/paulmck/RCU/ S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git @@ -14234,7 +14234,7 @@ M: "Paul E. McKenney" M: Josh Triplett R: Steven Rostedt R: Mathieu Desnoyers -L: linux-kernel@vger.kernel.org +L: rcu@vger.kernel.org W: http://www.rdrop.com/users/paulmck/RCU/ S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git -- cgit v1.2.3-59-g8ed1b From 9145b505bbbd87c6c8d4854ec411daeb33ed3deb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 16 Mar 2019 16:42:49 -0700 Subject: MAINTAINERS: Add -rcu branch name ("dev") Signed-off-by: Paul E. McKenney --- MAINTAINERS | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1924b52937a6..a9b5270d006e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8983,7 +8983,7 @@ R: Daniel Lustig L: linux-kernel@vger.kernel.org L: linux-arch@vger.kernel.org S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev F: tools/memory-model/ F: Documentation/atomic_bitops.txt F: Documentation/atomic_t.txt @@ -13033,7 +13033,7 @@ R: Mathieu Desnoyers R: Lai Jiangshan L: rcu@vger.kernel.org S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev F: tools/testing/selftests/rcutorture RDC R-321X SoC @@ -13082,7 +13082,7 @@ R: Joel Fernandes L: rcu@vger.kernel.org W: http://www.rdrop.com/users/paulmck/RCU/ S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev F: Documentation/RCU/ X: Documentation/RCU/torture.txt F: include/linux/rcu* @@ -14237,7 +14237,7 @@ R: Mathieu Desnoyers L: rcu@vger.kernel.org W: http://www.rdrop.com/users/paulmck/RCU/ S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev F: include/linux/srcu*.h F: kernel/rcu/srcu*.c @@ -15684,7 +15684,7 @@ M: "Paul E. McKenney" M: Josh Triplett L: linux-kernel@vger.kernel.org S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev F: Documentation/RCU/torture.txt F: kernel/torture.c F: kernel/rcu/rcutorture.c -- cgit v1.2.3-59-g8ed1b From b2eb85b49a576515fb845cb12568b173c2bedffc Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Sat, 2 Mar 2019 17:25:19 +0900 Subject: rcu: Move common code out of if-else block As the result of recent addition of "rdp->core_needs_qs = false;" in the "if" block, now both branches of the if-else have the same assignment. Factor it out and reduce line count. Signed-off-by: Akira Yokosawa Cc: Joel Fernandes Signed-off-by: Paul E. McKenney Acked-by: Joel Fernandes (Google) --- kernel/rcu/tree.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 296131450414..5aefd36ac648 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2295,12 +2295,10 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp) return; } mask = rdp->grpmask; + rdp->core_needs_qs = false; if ((rnp->qsmask & mask) == 0) { - rdp->core_needs_qs = false; raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } else { - rdp->core_needs_qs = false; - /* * This GP can't end until cpu checks in, so all of our * callbacks can be processed during the next GP. -- cgit v1.2.3-59-g8ed1b From da8739f23fadf05809c6c37c327367b229467045 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 5 Mar 2019 15:28:19 -0800 Subject: rcu: Allow rcu_nocbs= to specify all CPUs Currently, the rcu_nocbs= kernel boot parameter requires that a specific list of CPUs be specified, and has no way to say "all of them". As noted by user RavFX in a comment to Phoronix topic 1002538, this is an inconvenient side effect of the removal of the RCU_NOCB_CPU_ALL Kconfig option. This commit therefore enables the rcu_nocbs= kernel boot parameter to be given the string "all", as in "rcu_nocbs=all" to specify that all CPUs on the system are to have their RCU callbacks offloaded. Another approach would be to make cpulist_parse() check for "all", but there are uses of cpulist_parse() that do other checking, which could conflict with an "all". This commit therefore focuses on the specific use of cpulist_parse() in rcu_nocb_setup(). Just a note to other people who would like changes to Linux-kernel RCU: If you send your requests to me directly, they might get fixed somewhat faster. RavFX's comment was posted on January 22, 2018 and I first saw it on March 5, 2019. And the only reason that I found it -at- -all- was that I was looking for projects using RCU, and my search engine showed me that Phoronix comment quite by accident. Your choice, though! ;-) Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 4 +++- kernel/rcu/tree_plugin.h | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2b8ee90bb644..d377a2166b79 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3623,7 +3623,9 @@ see CONFIG_RAS_CEC help text. rcu_nocbs= [KNL] - The argument is a cpu list, as described above. + The argument is a cpu list, as described above, + except that the string "all" can be used to + specify every CPU on the system. In kernels built with CONFIG_RCU_NOCB_CPU=y, set the specified list of CPUs to be no-callback CPUs. diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index d408661d5fb7..ed4a6dabf31d 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1776,7 +1776,10 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp) static int __init rcu_nocb_setup(char *str) { alloc_bootmem_cpumask_var(&rcu_nocb_mask); - cpulist_parse(str, rcu_nocb_mask); + if (!strcasecmp(str, "all")) + cpumask_setall(rcu_nocb_mask); + else + cpulist_parse(str, rcu_nocb_mask); return 1; } __setup("rcu_nocbs=", rcu_nocb_setup); -- cgit v1.2.3-59-g8ed1b From 497e42600b69aca1b799c840d2cfc7ad60bb8017 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 6 Mar 2019 14:47:56 -0800 Subject: rcu: Report error for bad rcu_nocbs= parameter values This commit prints a console message when cpulist_parse() reports a bad list of CPUs, and sets all CPUs' bits in that case. The reason for setting all CPUs' bits is that this is the safe(r) choice for real-time workloads, which would normally be the ones using the rcu_nocbs= kernel boot parameter. Either way, later RCU console log messages list the actual set of CPUs whose RCU callbacks will be offloaded. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index ed4a6dabf31d..f0aeb7416dcc 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1772,14 +1772,22 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp) */ -/* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. */ +/* + * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. + * The string after the "rcu_nocbs=" is either "all" for all CPUs, or a + * comma-separated list of CPUs and/or CPU ranges. If an invalid list is + * given, a warning is emitted and all CPUs are offloaded. + */ static int __init rcu_nocb_setup(char *str) { alloc_bootmem_cpumask_var(&rcu_nocb_mask); if (!strcasecmp(str, "all")) cpumask_setall(rcu_nocb_mask); else - cpulist_parse(str, rcu_nocb_mask); + if (cpulist_parse(str, rcu_nocb_mask)) { + pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n"); + cpumask_setall(rcu_nocb_mask); + } return 1; } __setup("rcu_nocbs=", rcu_nocb_setup); -- cgit v1.2.3-59-g8ed1b From 0f58d2ac2c87d27006c2b610668ebc4ff1f7c3ba Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Fri, 8 Mar 2019 15:16:18 +0530 Subject: rcu: Fix self-wakeups for grace-period kthread The current rcu_gp_kthread_wake() function uses in_interrupt() and thus does a self-wakeup from all interrupt contexts, including the pointless case where the GP kthread happens to be running with bottom halves disabled, along with the impossible case where the GP kthread is running within an NMI handler (you are not supposed to invoke rcu_gp_kthread_wake() from within an NMI handler. This commit therefore replaces the in_interrupt() with in_irq(), so that the self-wakeups happen only from handlers for hardware interrupts and softirqs. This also makes the code match the comment. Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney Acked-by: Steven Rostedt (VMware) --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 5aefd36ac648..139fa1f5c537 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1585,7 +1585,7 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp) static void rcu_gp_kthread_wake(void) { if ((current == rcu_state.gp_kthread && - !in_interrupt() && !in_serving_softirq()) || + !in_irq() && !in_serving_softirq()) || !READ_ONCE(rcu_state.gp_flags) || !rcu_state.gp_kthread) return; -- cgit v1.2.3-59-g8ed1b From 6973032a602ee678c98644a30d57ebf9c72dd6d3 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Mon, 11 Mar 2019 15:16:11 +0530 Subject: rcu: Default jiffies_to_sched_qs to jiffies_till_sched_qs The current code only calls adjust_jiffies_till_sched_qs() if jiffies_till_sched_qs is left at its default value, so when the jiffies_till_sched_qs kernel-boot parameter actually is specified, jiffies_to_sched_qs will be left with the value zero, which will result in useless slowdowns of cond_resched(). This commit therefore changes rcu_init_geometry() to unconditionally invoke adjust_jiffies_till_sched_qs(), which ensures that jiffies_to_sched_qs will be initialized in all cases, thus maintaining good cond_resched() performance. Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 139fa1f5c537..466299c3d2da 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3739,8 +3739,7 @@ static void __init rcu_init_geometry(void) jiffies_till_first_fqs = d; if (jiffies_till_next_fqs == ULONG_MAX) jiffies_till_next_fqs = d; - if (jiffies_till_sched_qs == ULONG_MAX) - adjust_jiffies_till_sched_qs(); + adjust_jiffies_till_sched_qs(); /* If the compile-time values are accurate, just leave. */ if (rcu_fanout_leaf == RCU_FANOUT_LEAF && -- cgit v1.2.3-59-g8ed1b From b699cce1604e828f19c39845252626eb78cdf38a Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Mon, 11 Mar 2019 17:28:03 +0530 Subject: rcu: Do a single rhp->func read in rcu_head_after_call_rcu() The rcu_head_after_call_rcu() function reads the rhp->func pointer twice, which can result in a false-positive WARN_ON_ONCE() if the callback were passed to call_rcu() between the two reads. Although racing rcu_head_after_call_rcu() with call_rcu() is to be a dubious use case (the return value is not reliable in that case), intermittent and irreproducible warnings are also quite dubious. This commit therefore uses a single READ_ONCE() to pick up the value of rhp->func once, then tests that value twice, thus guaranteeing consistent processing within rcu_head_after_call_rcu()(). Neverthless, racing rcu_head_after_call_rcu() with call_rcu() is still a dubious use case. Signed-off-by: Neeraj Upadhyay [ paulmck: Add blank line after declaration per checkpatch.pl. ] Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6cdb1db776cf..922bb6848813 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -878,9 +878,11 @@ static inline void rcu_head_init(struct rcu_head *rhp) static inline bool rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f) { - if (READ_ONCE(rhp->func) == f) + rcu_callback_t func = READ_ONCE(rhp->func); + + if (func == f) return true; - WARN_ON_ONCE(READ_ONCE(rhp->func) != (rcu_callback_t)~0L); + WARN_ON_ONCE(func != (rcu_callback_t)~0L); return false; } -- cgit v1.2.3-59-g8ed1b From 85f2b60c4321b088ba08ec9a05b8a7b68e4ada2a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 11 Mar 2019 15:45:13 -0700 Subject: rcu: Update jiffies_to_sched_qs and adjust_jiffies_till_sched_qs() comments This commit better documents the jiffies_to_sched_qs default-value strategy used by adjust_jiffies_till_sched_qs() Reported-by: Joel Fernandes Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 466299c3d2da..e117732bcd5d 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -406,7 +406,7 @@ static bool rcu_kick_kthreads; */ static ulong jiffies_till_sched_qs = ULONG_MAX; module_param(jiffies_till_sched_qs, ulong, 0444); -static ulong jiffies_to_sched_qs; /* Adjusted version of above if not default */ +static ulong jiffies_to_sched_qs; /* See adjust_jiffies_till_sched_qs(). */ module_param(jiffies_to_sched_qs, ulong, 0444); /* Display only! */ /* @@ -424,6 +424,7 @@ static void adjust_jiffies_till_sched_qs(void) WRITE_ONCE(jiffies_to_sched_qs, jiffies_till_sched_qs); return; } + /* Otherwise, set to third fqs scan, but bound below on large system. */ j = READ_ONCE(jiffies_till_first_fqs) + 2 * READ_ONCE(jiffies_till_next_fqs); if (j < HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV) -- cgit v1.2.3-59-g8ed1b From 5d8a752e31aaa4c9703f201956a40b45ed791217 Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Wed, 20 Mar 2019 03:33:00 +0000 Subject: rcu: Fix force_qs_rnp() header comment Previously, threads blocked on offlining CPUS were migrated to the root rcu_node structure, thus requiring RCU priority boosting on this structure. However, since commit d19fb8d1f3f6 ("rcu: Don't migrate blocked tasks even if all corresponding CPUs offline"), RCU does not migrate blocked tasks. Consequently, RCU no longer does RCU priority boosting on the root rcu_node structure as of commit 1be0085b515e ("rcu: Don't initiate RCU priority boosting on root rcu_node"). This commit therefore brings comments for the force_qs_rnp() function's header comment in line with this new no-root-boosting reality. Signed-off-by: Zhouyi Zhou [ paulmck: Also remove obsolete comment on suppressing new grace periods. ] Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e117732bcd5d..abc8512ceb5f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2548,11 +2548,11 @@ void rcu_sched_clock_irq(int user) } /* - * Scan the leaf rcu_node structures, processing dyntick state for any that - * have not yet encountered a quiescent state, using the function specified. - * Also initiate boosting for any threads blocked on the root rcu_node. - * - * The caller must have suppressed start of new grace periods. + * Scan the leaf rcu_node structures. For each structure on which all + * CPUs have reported a quiescent state and on which there are tasks + * blocking the current grace period, initiate RCU priority boosting. + * Otherwise, invoke the specified function to check dyntick state for + * each CPU that has not yet reported a quiescent state. */ static void force_qs_rnp(int (*f)(struct rcu_data *rdp)) { -- cgit v1.2.3-59-g8ed1b From a2badefa8574a7a424a72f67a3bd43248141f76a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 21 Mar 2019 16:29:50 -0700 Subject: rcu: Eliminate redundant NULL-pointer check Because rcu_wake_cond() checks for a null task_struct pointer, there is no need for its callers to do so. This commit eliminates the redundant check. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index f0aeb7416dcc..81d3cd821891 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1191,8 +1191,6 @@ static int rcu_boost_kthread(void *arg) static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) __releases(rnp->lock) { - struct task_struct *t; - raw_lockdep_assert_held_rcu_node(rnp); if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) { raw_spin_unlock_irqrestore_rcu_node(rnp, flags); @@ -1206,9 +1204,8 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) if (rnp->exp_tasks == NULL) rnp->boost_tasks = rnp->gp_tasks; raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - t = rnp->boost_kthread_task; - if (t) - rcu_wake_cond(t, rnp->boost_kthread_status); + rcu_wake_cond(rnp->boost_kthread_task, + rnp->boost_kthread_status); } else { raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } -- cgit v1.2.3-59-g8ed1b From f1a98045abd824df833354b309b5fa64ff95f792 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 23 Mar 2019 09:19:12 -0700 Subject: rcu: Fix typo in tree_exp.h comment This commit changes a rcu_exp_handler() comment from rcu_preempt_defer_qs() to rcu_preempt_deferred_qs() in order to better match reality. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 4c2a0189e748..ec4fb93a5dbe 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -648,7 +648,7 @@ static void rcu_exp_handler(void *unused) * * If the CPU is fully enabled (or if some buggy RCU-preempt * read-side critical section is being used from idle), just - * invoke rcu_preempt_defer_qs() to immediately report the + * invoke rcu_preempt_deferred_qs() to immediately report the * quiescent state. We cannot use rcu_read_unlock_special() * because we are in an interrupt handler, which will cause that * function to take an early exit without doing anything. -- cgit v1.2.3-59-g8ed1b From add0d37b4f1e77de7d170ece43c8d765572a1eab Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 26 Mar 2019 10:22:22 -0700 Subject: rcu: Correct READ_ONCE()/WRITE_ONCE() for ->rcu_read_unlock_special The task_struct structure's ->rcu_read_unlock_special field is only ever read or written by the owning task, but it is accessed both at process and interrupt levels. It may therefore be accessed using plain reads and writes while interrupts are disabled, but must be accessed using READ_ONCE() and WRITE_ONCE() or better otherwise. This commit makes a few adjustments to align with this discipline. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 2 +- kernel/rcu/tree_plugin.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index ec4fb93a5dbe..1ee0782213b8 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -633,7 +633,7 @@ static void rcu_exp_handler(void *unused) raw_spin_lock_irqsave_rcu_node(rnp, flags); if (rnp->expmask & rdp->grpmask) { rdp->deferred_qs = true; - WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, true); + t->rcu_read_unlock_special.b.exp_hint = true; } raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 81d3cd821891..6ddb3c05e88f 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -285,7 +285,7 @@ static void rcu_qs(void) TPS("cpuqs")); __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false); barrier(); /* Coordinate with rcu_flavor_sched_clock_irq(). */ - current->rcu_read_unlock_special.b.need_qs = false; + WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, false); } } @@ -817,7 +817,7 @@ void exit_rcu(void) if (unlikely(!list_empty(¤t->rcu_node_entry))) { t->rcu_read_lock_nesting = 1; barrier(); - t->rcu_read_unlock_special.b.blocked = true; + WRITE_ONCE(t->rcu_read_unlock_special.b.blocked, true); } else if (unlikely(t->rcu_read_lock_nesting)) { t->rcu_read_lock_nesting = 1; } else { -- cgit v1.2.3-59-g8ed1b From 5cdfd174ea6c2dc1d331b61bdc9572698658600a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Feb 2019 10:44:33 -0800 Subject: srcu: Check for in-flight callbacks in _cleanup_srcu_struct() If someone fails to drain the corresponding SRCU callbacks (for example, by failing to invoke srcu_barrier()) before invoking either cleanup_srcu_struct() or cleanup_srcu_struct_quiesced(), the resulting diagnostic is an ambiguous use-after-free diagnostic, and even then only if you are running something like KASAN. This commit therefore improves SRCU diagnostics by adding checks for in-flight callbacks at _cleanup_srcu_struct() time. Note that these diagnostics can still be defeated, for example, by invoking call_srcu() concurrently with cleanup_srcu_struct(). Which is a really bad idea, but sometimes all too easy to do. But even then, these diagnostics have at least some probability of catching the problem. Reported-by: Sagi Grimberg Reported-by: Bart Van Assche Signed-off-by: Paul E. McKenney Tested-by: Bart Van Assche --- kernel/rcu/srcutree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index a60b8ba9e1ac..4f30f3ecabc1 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -387,6 +387,8 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced) del_timer_sync(&sdp->delay_work); flush_work(&sdp->work); } + if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist))) + return; /* Forgot srcu_barrier(), so just leak it! */ } if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) || WARN_ON(srcu_readers_active(ssp))) { -- cgit v1.2.3-59-g8ed1b From f5ad3991493c69d203d42b94d32349b54c58a3f1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 13 Feb 2019 13:54:37 -0800 Subject: srcu: Remove cleanup_srcu_struct_quiesced() The cleanup_srcu_struct_quiesced() function was added because NVME used WQ_MEM_RECLAIM workqueues and SRCU did not, which meant that NVME workqueues waiting on SRCU workqueues could result in deadlocks during low-memory conditions. However, SRCU now also has WQ_MEM_RECLAIM workqueues, so there is no longer a potential for deadlock. Furthermore, it turns out to be extremely hard to use cleanup_srcu_struct_quiesced() correctly due to the fact that SRCU callback invocation accesses the srcu_struct structure's per-CPU data area just after callbacks are invoked. Therefore, the usual practice of using srcu_barrier() to wait for callbacks to be invoked before invoking cleanup_srcu_struct_quiesced() fails because SRCU's callback-invocation workqueue handler might be delayed, which can result in cleanup_srcu_struct_quiesced() being invoked (and thus freeing the per-CPU data) before the SRCU's callback-invocation workqueue handler is finished using that per-CPU data. Nor is this a theoretical problem: KASAN emitted use-after-free warnings because of this problem on actual runs. In short, NVME can now safely invoke cleanup_srcu_struct(), which avoids the use-after-free scenario. And cleanup_srcu_struct_quiesced() is quite difficult to use safely. This commit therefore removes cleanup_srcu_struct_quiesced(), switching its sole user back to cleanup_srcu_struct(). This effectively reverts the following pair of commits: f7194ac32ca2 ("srcu: Add cleanup_srcu_struct_quiesced()") 4317228ad9b8 ("nvme: Avoid flush dependency in delete controller flow") Reported-by: Bart Van Assche Signed-off-by: Paul E. McKenney Reviewed-by: Bart Van Assche Tested-by: Bart Van Assche --- drivers/nvme/host/core.c | 2 +- include/linux/srcu.h | 36 +----------------------------------- kernel/rcu/rcutorture.c | 7 +------ kernel/rcu/srcutiny.c | 9 +++------ kernel/rcu/srcutree.c | 30 ++++++++++++------------------ 5 files changed, 18 insertions(+), 66 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 470601980794..739c5b4830d7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -388,7 +388,7 @@ static void nvme_free_ns_head(struct kref *ref) nvme_mpath_remove_disk(head); ida_simple_remove(&head->subsys->ns_ida, head->instance); list_del_init(&head->entry); - cleanup_srcu_struct_quiesced(&head->srcu); + cleanup_srcu_struct(&head->srcu); nvme_put_subsystem(head->subsys); kfree(head); } diff --git a/include/linux/srcu.h b/include/linux/srcu.h index c495b2d51569..e432cc92c73d 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -56,45 +56,11 @@ struct srcu_struct { }; void call_srcu(struct srcu_struct *ssp, struct rcu_head *head, void (*func)(struct rcu_head *head)); -void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced); +void cleanup_srcu_struct(struct srcu_struct *ssp); int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); void synchronize_srcu(struct srcu_struct *ssp); -/** - * cleanup_srcu_struct - deconstruct a sleep-RCU structure - * @ssp: structure to clean up. - * - * Must invoke this after you are finished using a given srcu_struct that - * was initialized via init_srcu_struct(), else you leak memory. - */ -static inline void cleanup_srcu_struct(struct srcu_struct *ssp) -{ - _cleanup_srcu_struct(ssp, false); -} - -/** - * cleanup_srcu_struct_quiesced - deconstruct a quiesced sleep-RCU structure - * @ssp: structure to clean up. - * - * Must invoke this after you are finished using a given srcu_struct that - * was initialized via init_srcu_struct(), else you leak memory. Also, - * all grace-period processing must have completed. - * - * "Completed" means that the last synchronize_srcu() and - * synchronize_srcu_expedited() calls must have returned before the call - * to cleanup_srcu_struct_quiesced(). It also means that the callback - * from the last call_srcu() must have been invoked before the call to - * cleanup_srcu_struct_quiesced(), but you can use srcu_barrier() to help - * with this last. Violating these rules will get you a WARN_ON() splat - * (with high probability, anyway), and will also cause the srcu_struct - * to be leaked. - */ -static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *ssp) -{ - _cleanup_srcu_struct(ssp, true); -} - #ifdef CONFIG_DEBUG_LOCK_ALLOC /** diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index f14d1b18a74f..d2b226110835 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -592,12 +592,7 @@ static void srcu_torture_init(void) static void srcu_torture_cleanup(void) { - static DEFINE_TORTURE_RANDOM(rand); - - if (torture_random(&rand) & 0x800) - cleanup_srcu_struct(&srcu_ctld); - else - cleanup_srcu_struct_quiesced(&srcu_ctld); + cleanup_srcu_struct(&srcu_ctld); srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */ } diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 5d4a39a6505a..44d6606b8325 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -76,19 +76,16 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); * Must invoke this after you are finished using a given srcu_struct that * was initialized via init_srcu_struct(), else you leak memory. */ -void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced) +void cleanup_srcu_struct(struct srcu_struct *ssp) { WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]); - if (quiesced) - WARN_ON(work_pending(&ssp->srcu_work)); - else - flush_work(&ssp->srcu_work); + flush_work(&ssp->srcu_work); WARN_ON(ssp->srcu_gp_running); WARN_ON(ssp->srcu_gp_waiting); WARN_ON(ssp->srcu_cb_head); WARN_ON(&ssp->srcu_cb_head != ssp->srcu_cb_tail); } -EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); +EXPORT_SYMBOL_GPL(cleanup_srcu_struct); /* * Removes the count for the old reader from the appropriate element of diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 4f30f3ecabc1..9b761e546de8 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -360,8 +360,14 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp) return SRCU_INTERVAL; } -/* Helper for cleanup_srcu_struct() and cleanup_srcu_struct_quiesced(). */ -void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced) +/** + * cleanup_srcu_struct - deconstruct a sleep-RCU structure + * @ssp: structure to clean up. + * + * Must invoke this after you are finished using a given srcu_struct that + * was initialized via init_srcu_struct(), else you leak memory. + */ +void cleanup_srcu_struct(struct srcu_struct *ssp) { int cpu; @@ -369,24 +375,12 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced) return; /* Just leak it! */ if (WARN_ON(srcu_readers_active(ssp))) return; /* Just leak it! */ - if (quiesced) { - if (WARN_ON(delayed_work_pending(&ssp->work))) - return; /* Just leak it! */ - } else { - flush_delayed_work(&ssp->work); - } + flush_delayed_work(&ssp->work); for_each_possible_cpu(cpu) { struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); - if (quiesced) { - if (WARN_ON(timer_pending(&sdp->delay_work))) - return; /* Just leak it! */ - if (WARN_ON(work_pending(&sdp->work))) - return; /* Just leak it! */ - } else { - del_timer_sync(&sdp->delay_work); - flush_work(&sdp->work); - } + del_timer_sync(&sdp->delay_work); + flush_work(&sdp->work); if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist))) return; /* Forgot srcu_barrier(), so just leak it! */ } @@ -399,7 +393,7 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced) free_percpu(ssp->sda); ssp->sda = NULL; } -EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); +EXPORT_SYMBOL_GPL(cleanup_srcu_struct); /* * Counts the new reader in the appropriate per-CPU element of the -- cgit v1.2.3-59-g8ed1b From 10462d6f58fb6dbde7563e9343505d98d5bfba3d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 11 Jan 2019 16:10:57 -0800 Subject: rcu: Move RCU CPU stall-warning code out of update.c The RCU CPU stall-warning code for normal grace periods is currently scattered across three files, due to earlier Tiny RCU support for RCU CPU stall warnings and for old Kconfig options that have long since been retired. Given that it is hard for the lead RCU maintainer to find relevant stall-warning code, it would be good to consolidate it. This commit starts this process by moving stall-warning code from kernel/rcu/update.c to a new kernel/rcu/tree_stall.h file. Note that the definitions of rcu_cpu_stall_suppress and rcu_cpu_stall_timeout must remain in kernel/rcu/update.h to provide compatibility for kernel boot parameter lists. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcu.h | 1 + kernel/rcu/tree.c | 1 + kernel/rcu/tree_stall.h | 63 +++++++++++++++++++++++++++++++++++++++++++++++++ kernel/rcu/update.c | 59 +-------------------------------------------- 4 files changed, 66 insertions(+), 58 deletions(-) create mode 100644 kernel/rcu/tree_stall.h diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index acee72c0b24b..4b58c907b4b7 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -233,6 +233,7 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head) #ifdef CONFIG_RCU_STALL_COMMON extern int rcu_cpu_stall_suppress; +extern int rcu_cpu_stall_timeout; int rcu_jiffies_till_stall_check(void); #define rcu_ftrace_dump_stall_suppress() \ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index acd6ccf56faf..424d50ccf9e6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3858,5 +3858,6 @@ void __init rcu_init(void) srcu_init(); } +#include "tree_stall.h" #include "tree_exp.h" #include "tree_plugin.h" diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h new file mode 100644 index 000000000000..682189f4d083 --- /dev/null +++ b/kernel/rcu/tree_stall.h @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * RCU CPU stall warnings for normal RCU grace periods + * + * Copyright IBM Corporation, 2019 + * + * Author: Paul E. McKenney + */ + + +#ifdef CONFIG_PROVE_RCU +#define RCU_STALL_DELAY_DELTA (5 * HZ) +#else +#define RCU_STALL_DELAY_DELTA 0 +#endif + +int rcu_jiffies_till_stall_check(void) +{ + int till_stall_check = READ_ONCE(rcu_cpu_stall_timeout); + + /* + * Limit check must be consistent with the Kconfig limits + * for CONFIG_RCU_CPU_STALL_TIMEOUT. + */ + if (till_stall_check < 3) { + WRITE_ONCE(rcu_cpu_stall_timeout, 3); + till_stall_check = 3; + } else if (till_stall_check > 300) { + WRITE_ONCE(rcu_cpu_stall_timeout, 300); + till_stall_check = 300; + } + return till_stall_check * HZ + RCU_STALL_DELAY_DELTA; +} +EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check); + +void rcu_sysrq_start(void) +{ + if (!rcu_cpu_stall_suppress) + rcu_cpu_stall_suppress = 2; +} + +void rcu_sysrq_end(void) +{ + if (rcu_cpu_stall_suppress == 2) + rcu_cpu_stall_suppress = 0; +} + +static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) +{ + rcu_cpu_stall_suppress = 1; + return NOTIFY_DONE; +} + +static struct notifier_block rcu_panic_block = { + .notifier_call = rcu_panic, +}; + +static int __init check_cpu_stall_init(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); + return 0; +} +early_initcall(check_cpu_stall_init); diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index cbaa976c5945..c3bf44ba42e5 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -424,68 +424,11 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); #endif #ifdef CONFIG_RCU_STALL_COMMON - -#ifdef CONFIG_PROVE_RCU -#define RCU_STALL_DELAY_DELTA (5 * HZ) -#else -#define RCU_STALL_DELAY_DELTA 0 -#endif - int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress); -static int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; - module_param(rcu_cpu_stall_suppress, int, 0644); +int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; module_param(rcu_cpu_stall_timeout, int, 0644); - -int rcu_jiffies_till_stall_check(void) -{ - int till_stall_check = READ_ONCE(rcu_cpu_stall_timeout); - - /* - * Limit check must be consistent with the Kconfig limits - * for CONFIG_RCU_CPU_STALL_TIMEOUT. - */ - if (till_stall_check < 3) { - WRITE_ONCE(rcu_cpu_stall_timeout, 3); - till_stall_check = 3; - } else if (till_stall_check > 300) { - WRITE_ONCE(rcu_cpu_stall_timeout, 300); - till_stall_check = 300; - } - return till_stall_check * HZ + RCU_STALL_DELAY_DELTA; -} -EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check); - -void rcu_sysrq_start(void) -{ - if (!rcu_cpu_stall_suppress) - rcu_cpu_stall_suppress = 2; -} - -void rcu_sysrq_end(void) -{ - if (rcu_cpu_stall_suppress == 2) - rcu_cpu_stall_suppress = 0; -} - -static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) -{ - rcu_cpu_stall_suppress = 1; - return NOTIFY_DONE; -} - -static struct notifier_block rcu_panic_block = { - .notifier_call = rcu_panic, -}; - -static int __init check_cpu_stall_init(void) -{ - atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); - return 0; -} -early_initcall(check_cpu_stall_init); - #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ #ifdef CONFIG_TASKS_RCU -- cgit v1.2.3-59-g8ed1b From 3fc3d1709fc75995ee09ad4f35f160cf360b397b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 11 Jan 2019 16:34:47 -0800 Subject: rcu: Move RCU CPU stall-warning code out of tree_plugin.h The RCU CPU stall-warning code for normal grace periods is currently scattered across two files, due to earlier Tiny RCU support for RCU CPU stall warnings and for old Kconfig options that have long since been retired. Given that it is hard for the lead RCU maintainer to find relevant stall-warning code, it would be good to consolidate it. This commit continues this process by moving stall-warning code from kernel/rcu/tree_plugin.c to a new kernel/rcu/tree_stall.h file. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 90 --------------------------------------------- kernel/rcu/tree_stall.h | 95 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 90 deletions(-) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 97dba50f6fb2..7fa3bc4d481b 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -642,79 +642,6 @@ static void rcu_read_unlock_special(struct task_struct *t) rcu_preempt_deferred_qs_irqrestore(t, flags); } -/* - * Dump detailed information for all tasks blocking the current RCU - * grace period on the specified rcu_node structure. - */ -static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) -{ - unsigned long flags; - struct task_struct *t; - - raw_spin_lock_irqsave_rcu_node(rnp, flags); - if (!rcu_preempt_blocked_readers_cgp(rnp)) { - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - return; - } - t = list_entry(rnp->gp_tasks->prev, - struct task_struct, rcu_node_entry); - list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { - /* - * We could be printing a lot while holding a spinlock. - * Avoid triggering hard lockup. - */ - touch_nmi_watchdog(); - sched_show_task(t); - } - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); -} - -/* - * Dump detailed information for all tasks blocking the current RCU - * grace period. - */ -static void rcu_print_detail_task_stall(void) -{ - struct rcu_node *rnp = rcu_get_root(); - - rcu_print_detail_task_stall_rnp(rnp); - rcu_for_each_leaf_node(rnp) - rcu_print_detail_task_stall_rnp(rnp); -} - -static void rcu_print_task_stall_begin(struct rcu_node *rnp) -{ - pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", - rnp->level, rnp->grplo, rnp->grphi); -} - -static void rcu_print_task_stall_end(void) -{ - pr_cont("\n"); -} - -/* - * Scan the current list of tasks blocked within RCU read-side critical - * sections, printing out the tid of each. - */ -static int rcu_print_task_stall(struct rcu_node *rnp) -{ - struct task_struct *t; - int ndetected = 0; - - if (!rcu_preempt_blocked_readers_cgp(rnp)) - return 0; - rcu_print_task_stall_begin(rnp); - t = list_entry(rnp->gp_tasks->prev, - struct task_struct, rcu_node_entry); - list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { - pr_cont(" P%d", t->pid); - ndetected++; - } - rcu_print_task_stall_end(); - return ndetected; -} - /* * Scan the current list of tasks blocked within RCU read-side critical * sections, printing out the tid of each that is blocking the current @@ -979,23 +906,6 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t) } static void rcu_preempt_deferred_qs(struct task_struct *t) { } -/* - * Because preemptible RCU does not exist, we never have to check for - * tasks blocked within RCU read-side critical sections. - */ -static void rcu_print_detail_task_stall(void) -{ -} - -/* - * Because preemptible RCU does not exist, we never have to check for - * tasks blocked within RCU read-side critical sections. - */ -static int rcu_print_task_stall(struct rcu_node *rnp) -{ - return 0; -} - /* * Because preemptible RCU does not exist, we never have to check for * tasks blocked within RCU read-side critical sections that are diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 682189f4d083..6f5f94944f49 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -61,3 +61,98 @@ static int __init check_cpu_stall_init(void) return 0; } early_initcall(check_cpu_stall_init); + +#ifdef CONFIG_PREEMPT + +/* + * Dump detailed information for all tasks blocking the current RCU + * grace period on the specified rcu_node structure. + */ +static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) +{ + unsigned long flags; + struct task_struct *t; + + raw_spin_lock_irqsave_rcu_node(rnp, flags); + if (!rcu_preempt_blocked_readers_cgp(rnp)) { + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + return; + } + t = list_entry(rnp->gp_tasks->prev, + struct task_struct, rcu_node_entry); + list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { + /* + * We could be printing a lot while holding a spinlock. + * Avoid triggering hard lockup. + */ + touch_nmi_watchdog(); + sched_show_task(t); + } + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); +} + +/* + * Dump detailed information for all tasks blocking the current RCU + * grace period. + */ +static void rcu_print_detail_task_stall(void) +{ + struct rcu_node *rnp = rcu_get_root(); + + rcu_print_detail_task_stall_rnp(rnp); + rcu_for_each_leaf_node(rnp) + rcu_print_detail_task_stall_rnp(rnp); +} + +static void rcu_print_task_stall_begin(struct rcu_node *rnp) +{ + pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", + rnp->level, rnp->grplo, rnp->grphi); +} + +static void rcu_print_task_stall_end(void) +{ + pr_cont("\n"); +} + +/* + * Scan the current list of tasks blocked within RCU read-side critical + * sections, printing out the tid of each. + */ +static int rcu_print_task_stall(struct rcu_node *rnp) +{ + struct task_struct *t; + int ndetected = 0; + + if (!rcu_preempt_blocked_readers_cgp(rnp)) + return 0; + rcu_print_task_stall_begin(rnp); + t = list_entry(rnp->gp_tasks->prev, + struct task_struct, rcu_node_entry); + list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { + pr_cont(" P%d", t->pid); + ndetected++; + } + rcu_print_task_stall_end(); + return ndetected; +} + +#else /* #ifdef CONFIG_PREEMPT */ + +/* + * Because preemptible RCU does not exist, we never have to check for + * tasks blocked within RCU read-side critical sections. + */ +static void rcu_print_detail_task_stall(void) +{ +} + +/* + * Because preemptible RCU does not exist, we never have to check for + * tasks blocked within RCU read-side critical sections. + */ +static int rcu_print_task_stall(struct rcu_node *rnp) +{ + return 0; +} +#endif /* #else #ifdef CONFIG_PREEMPT */ -- cgit v1.2.3-59-g8ed1b From 32255d51b6ed00de2b88970ceea8db0ec3bae6f8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 11 Jan 2019 16:57:41 -0800 Subject: rcu: Move RCU CPU stall-warning code out of tree.c This commit completes the process of consolidating the code for RCU CPU stall warnings for normal grace periods by moving the remaining such code from kernel/rcu/tree.c to kernel/rcu/tree_stall.h. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 291 ----------------------------------------------- kernel/rcu/tree.h | 10 +- kernel/rcu/tree_stall.h | 292 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 299 insertions(+), 294 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 424d50ccf9e6..001dd05f6e38 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -102,8 +102,6 @@ int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; /* Number of rcu_nodes at specified level. */ int num_rcu_lvl[] = NUM_RCU_LVL_INIT; int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ -/* panic() on RCU Stall sysctl. */ -int sysctl_panic_on_rcu_stall __read_mostly; /* Commandeer a sysrq key to dump RCU's tree. */ static bool sysrq_rcu; module_param(sysrq_rcu, bool, 0444); @@ -1167,295 +1165,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) return 0; } -static void record_gp_stall_check_time(void) -{ - unsigned long j = jiffies; - unsigned long j1; - - rcu_state.gp_start = j; - j1 = rcu_jiffies_till_stall_check(); - /* Record ->gp_start before ->jiffies_stall. */ - smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */ - rcu_state.jiffies_resched = j + j1 / 2; - rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs); -} - -/* - * Complain about starvation of grace-period kthread. - */ -static void rcu_check_gp_kthread_starvation(void) -{ - struct task_struct *gpk = rcu_state.gp_kthread; - unsigned long j; - - j = jiffies - READ_ONCE(rcu_state.gp_activity); - if (j > 2 * HZ) { - pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n", - rcu_state.name, j, - (long)rcu_seq_current(&rcu_state.gp_seq), - READ_ONCE(rcu_state.gp_flags), - gp_state_getname(rcu_state.gp_state), rcu_state.gp_state, - gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1); - if (gpk) { - pr_err("RCU grace-period kthread stack dump:\n"); - sched_show_task(gpk); - wake_up_process(gpk); - } - } -} - -/* - * Dump stacks of all tasks running on stalled CPUs. First try using - * NMIs, but fall back to manual remote stack tracing on architectures - * that don't support NMI-based stack dumps. The NMI-triggered stack - * traces are more accurate because they are printed by the target CPU. - */ -static void rcu_dump_cpu_stacks(void) -{ - int cpu; - unsigned long flags; - struct rcu_node *rnp; - - rcu_for_each_leaf_node(rnp) { - raw_spin_lock_irqsave_rcu_node(rnp, flags); - for_each_leaf_node_possible_cpu(rnp, cpu) - if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) - if (!trigger_single_cpu_backtrace(cpu)) - dump_cpu_task(cpu); - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - } -} - -/* - * If too much time has passed in the current grace period, and if - * so configured, go kick the relevant kthreads. - */ -static void rcu_stall_kick_kthreads(void) -{ - unsigned long j; - - if (!rcu_kick_kthreads) - return; - j = READ_ONCE(rcu_state.jiffies_kick_kthreads); - if (time_after(jiffies, j) && rcu_state.gp_kthread && - (rcu_gp_in_progress() || READ_ONCE(rcu_state.gp_flags))) { - WARN_ONCE(1, "Kicking %s grace-period kthread\n", - rcu_state.name); - rcu_ftrace_dump(DUMP_ALL); - wake_up_process(rcu_state.gp_kthread); - WRITE_ONCE(rcu_state.jiffies_kick_kthreads, j + HZ); - } -} - -static void panic_on_rcu_stall(void) -{ - if (sysctl_panic_on_rcu_stall) - panic("RCU Stall\n"); -} - -static void print_other_cpu_stall(unsigned long gp_seq) -{ - int cpu; - unsigned long flags; - unsigned long gpa; - unsigned long j; - int ndetected = 0; - struct rcu_node *rnp = rcu_get_root(); - long totqlen = 0; - - /* Kick and suppress, if so configured. */ - rcu_stall_kick_kthreads(); - if (rcu_cpu_stall_suppress) - return; - - /* - * OK, time to rat on our buddy... - * See Documentation/RCU/stallwarn.txt for info on how to debug - * RCU CPU stall warnings. - */ - pr_err("INFO: %s detected stalls on CPUs/tasks:", rcu_state.name); - print_cpu_stall_info_begin(); - rcu_for_each_leaf_node(rnp) { - raw_spin_lock_irqsave_rcu_node(rnp, flags); - ndetected += rcu_print_task_stall(rnp); - if (rnp->qsmask != 0) { - for_each_leaf_node_possible_cpu(rnp, cpu) - if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) { - print_cpu_stall_info(cpu); - ndetected++; - } - } - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - } - - print_cpu_stall_info_end(); - for_each_possible_cpu(cpu) - totqlen += rcu_get_n_cbs_cpu(cpu); - pr_cont("(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n", - smp_processor_id(), (long)(jiffies - rcu_state.gp_start), - (long)rcu_seq_current(&rcu_state.gp_seq), totqlen); - if (ndetected) { - rcu_dump_cpu_stacks(); - - /* Complain about tasks blocking the grace period. */ - rcu_print_detail_task_stall(); - } else { - if (rcu_seq_current(&rcu_state.gp_seq) != gp_seq) { - pr_err("INFO: Stall ended before state dump start\n"); - } else { - j = jiffies; - gpa = READ_ONCE(rcu_state.gp_activity); - pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n", - rcu_state.name, j - gpa, j, gpa, - READ_ONCE(jiffies_till_next_fqs), - rcu_get_root()->qsmask); - /* In this case, the current CPU might be at fault. */ - sched_show_task(current); - } - } - /* Rewrite if needed in case of slow consoles. */ - if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall))) - WRITE_ONCE(rcu_state.jiffies_stall, - jiffies + 3 * rcu_jiffies_till_stall_check() + 3); - - rcu_check_gp_kthread_starvation(); - - panic_on_rcu_stall(); - - rcu_force_quiescent_state(); /* Kick them all. */ -} - -static void print_cpu_stall(void) -{ - int cpu; - unsigned long flags; - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - struct rcu_node *rnp = rcu_get_root(); - long totqlen = 0; - - /* Kick and suppress, if so configured. */ - rcu_stall_kick_kthreads(); - if (rcu_cpu_stall_suppress) - return; - - /* - * OK, time to rat on ourselves... - * See Documentation/RCU/stallwarn.txt for info on how to debug - * RCU CPU stall warnings. - */ - pr_err("INFO: %s self-detected stall on CPU", rcu_state.name); - print_cpu_stall_info_begin(); - raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags); - print_cpu_stall_info(smp_processor_id()); - raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags); - print_cpu_stall_info_end(); - for_each_possible_cpu(cpu) - totqlen += rcu_get_n_cbs_cpu(cpu); - pr_cont(" (t=%lu jiffies g=%ld q=%lu)\n", - jiffies - rcu_state.gp_start, - (long)rcu_seq_current(&rcu_state.gp_seq), totqlen); - - rcu_check_gp_kthread_starvation(); - - rcu_dump_cpu_stacks(); - - raw_spin_lock_irqsave_rcu_node(rnp, flags); - /* Rewrite if needed in case of slow consoles. */ - if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall))) - WRITE_ONCE(rcu_state.jiffies_stall, - jiffies + 3 * rcu_jiffies_till_stall_check() + 3); - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - - panic_on_rcu_stall(); - - /* - * Attempt to revive the RCU machinery by forcing a context switch. - * - * A context switch would normally allow the RCU state machine to make - * progress and it could be we're stuck in kernel space without context - * switches for an entirely unreasonable amount of time. - */ - set_tsk_need_resched(current); - set_preempt_need_resched(); -} - -static void check_cpu_stall(struct rcu_data *rdp) -{ - unsigned long gs1; - unsigned long gs2; - unsigned long gps; - unsigned long j; - unsigned long jn; - unsigned long js; - struct rcu_node *rnp; - - if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) || - !rcu_gp_in_progress()) - return; - rcu_stall_kick_kthreads(); - j = jiffies; - - /* - * Lots of memory barriers to reject false positives. - * - * The idea is to pick up rcu_state.gp_seq, then - * rcu_state.jiffies_stall, then rcu_state.gp_start, and finally - * another copy of rcu_state.gp_seq. These values are updated in - * the opposite order with memory barriers (or equivalent) during - * grace-period initialization and cleanup. Now, a false positive - * can occur if we get an new value of rcu_state.gp_start and a old - * value of rcu_state.jiffies_stall. But given the memory barriers, - * the only way that this can happen is if one grace period ends - * and another starts between these two fetches. This is detected - * by comparing the second fetch of rcu_state.gp_seq with the - * previous fetch from rcu_state.gp_seq. - * - * Given this check, comparisons of jiffies, rcu_state.jiffies_stall, - * and rcu_state.gp_start suffice to forestall false positives. - */ - gs1 = READ_ONCE(rcu_state.gp_seq); - smp_rmb(); /* Pick up ->gp_seq first... */ - js = READ_ONCE(rcu_state.jiffies_stall); - smp_rmb(); /* ...then ->jiffies_stall before the rest... */ - gps = READ_ONCE(rcu_state.gp_start); - smp_rmb(); /* ...and finally ->gp_start before ->gp_seq again. */ - gs2 = READ_ONCE(rcu_state.gp_seq); - if (gs1 != gs2 || - ULONG_CMP_LT(j, js) || - ULONG_CMP_GE(gps, js)) - return; /* No stall or GP completed since entering function. */ - rnp = rdp->mynode; - jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; - if (rcu_gp_in_progress() && - (READ_ONCE(rnp->qsmask) & rdp->grpmask) && - cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { - - /* We haven't checked in, so go dump stack. */ - print_cpu_stall(); - - } else if (rcu_gp_in_progress() && - ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) && - cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { - - /* They had a few time units to dump stack, so complain. */ - print_other_cpu_stall(gs2); - } -} - -/** - * rcu_cpu_stall_reset - prevent further stall warnings in current grace period - * - * Set the stall-warning timeout way off into the future, thus preventing - * any RCU CPU stall-warning messages from appearing in the current set of - * RCU grace periods. - * - * The caller must disable hard irqs. - */ -void rcu_cpu_stall_reset(void) -{ - WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2); -} - /* Trace-event wrapper function for trace_rcu_future_grace_period. */ static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, unsigned long gp_seq_req, const char *s) diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index bb4f995f2d3f..3c4e26fff806 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -393,15 +393,13 @@ static const char *tp_rcu_varname __used __tracepoint_string = rcu_name; int rcu_dynticks_snap(struct rcu_data *rdp); -/* Forward declarations for rcutree_plugin.h */ +/* Forward declarations for tree_plugin.h */ static void rcu_bootup_announce(void); static void rcu_qs(void); static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU static bool rcu_preempt_has_tasks(struct rcu_node *rnp); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -static void rcu_print_detail_task_stall(void); -static int rcu_print_task_stall(struct rcu_node *rnp); static int rcu_print_task_exp_stall(struct rcu_node *rnp); static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); static void rcu_flavor_sched_clock_irq(int user); @@ -445,3 +443,9 @@ static void rcu_bind_gp_kthread(void); static bool rcu_nohz_full_cpu(void); static void rcu_dynticks_task_enter(void); static void rcu_dynticks_task_exit(void); + +/* Forward declarations for tree_stall.h */ +static void rcu_print_detail_task_stall(void); +static int rcu_print_task_stall(struct rcu_node *rnp); +static void record_gp_stall_check_time(void); +static void check_cpu_stall(struct rcu_data *rdp); diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 6f5f94944f49..e0e73f493363 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -8,6 +8,9 @@ */ +/* panic() on RCU Stall sysctl. */ +int sysctl_panic_on_rcu_stall __read_mostly; + #ifdef CONFIG_PROVE_RCU #define RCU_STALL_DELAY_DELTA (5 * HZ) #else @@ -156,3 +159,292 @@ static int rcu_print_task_stall(struct rcu_node *rnp) return 0; } #endif /* #else #ifdef CONFIG_PREEMPT */ + +static void record_gp_stall_check_time(void) +{ + unsigned long j = jiffies; + unsigned long j1; + + rcu_state.gp_start = j; + j1 = rcu_jiffies_till_stall_check(); + /* Record ->gp_start before ->jiffies_stall. */ + smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */ + rcu_state.jiffies_resched = j + j1 / 2; + rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs); +} + +/* + * Complain about starvation of grace-period kthread. + */ +static void rcu_check_gp_kthread_starvation(void) +{ + struct task_struct *gpk = rcu_state.gp_kthread; + unsigned long j; + + j = jiffies - READ_ONCE(rcu_state.gp_activity); + if (j > 2 * HZ) { + pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n", + rcu_state.name, j, + (long)rcu_seq_current(&rcu_state.gp_seq), + READ_ONCE(rcu_state.gp_flags), + gp_state_getname(rcu_state.gp_state), rcu_state.gp_state, + gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1); + if (gpk) { + pr_err("RCU grace-period kthread stack dump:\n"); + sched_show_task(gpk); + wake_up_process(gpk); + } + } +} + +/* + * Dump stacks of all tasks running on stalled CPUs. First try using + * NMIs, but fall back to manual remote stack tracing on architectures + * that don't support NMI-based stack dumps. The NMI-triggered stack + * traces are more accurate because they are printed by the target CPU. + */ +static void rcu_dump_cpu_stacks(void) +{ + int cpu; + unsigned long flags; + struct rcu_node *rnp; + + rcu_for_each_leaf_node(rnp) { + raw_spin_lock_irqsave_rcu_node(rnp, flags); + for_each_leaf_node_possible_cpu(rnp, cpu) + if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) + if (!trigger_single_cpu_backtrace(cpu)) + dump_cpu_task(cpu); + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + } +} + +/* + * If too much time has passed in the current grace period, and if + * so configured, go kick the relevant kthreads. + */ +static void rcu_stall_kick_kthreads(void) +{ + unsigned long j; + + if (!rcu_kick_kthreads) + return; + j = READ_ONCE(rcu_state.jiffies_kick_kthreads); + if (time_after(jiffies, j) && rcu_state.gp_kthread && + (rcu_gp_in_progress() || READ_ONCE(rcu_state.gp_flags))) { + WARN_ONCE(1, "Kicking %s grace-period kthread\n", + rcu_state.name); + rcu_ftrace_dump(DUMP_ALL); + wake_up_process(rcu_state.gp_kthread); + WRITE_ONCE(rcu_state.jiffies_kick_kthreads, j + HZ); + } +} + +static void panic_on_rcu_stall(void) +{ + if (sysctl_panic_on_rcu_stall) + panic("RCU Stall\n"); +} + +static void print_other_cpu_stall(unsigned long gp_seq) +{ + int cpu; + unsigned long flags; + unsigned long gpa; + unsigned long j; + int ndetected = 0; + struct rcu_node *rnp = rcu_get_root(); + long totqlen = 0; + + /* Kick and suppress, if so configured. */ + rcu_stall_kick_kthreads(); + if (rcu_cpu_stall_suppress) + return; + + /* + * OK, time to rat on our buddy... + * See Documentation/RCU/stallwarn.txt for info on how to debug + * RCU CPU stall warnings. + */ + pr_err("INFO: %s detected stalls on CPUs/tasks:", rcu_state.name); + print_cpu_stall_info_begin(); + rcu_for_each_leaf_node(rnp) { + raw_spin_lock_irqsave_rcu_node(rnp, flags); + ndetected += rcu_print_task_stall(rnp); + if (rnp->qsmask != 0) { + for_each_leaf_node_possible_cpu(rnp, cpu) + if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) { + print_cpu_stall_info(cpu); + ndetected++; + } + } + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + } + + print_cpu_stall_info_end(); + for_each_possible_cpu(cpu) + totqlen += rcu_get_n_cbs_cpu(cpu); + pr_cont("(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n", + smp_processor_id(), (long)(jiffies - rcu_state.gp_start), + (long)rcu_seq_current(&rcu_state.gp_seq), totqlen); + if (ndetected) { + rcu_dump_cpu_stacks(); + + /* Complain about tasks blocking the grace period. */ + rcu_print_detail_task_stall(); + } else { + if (rcu_seq_current(&rcu_state.gp_seq) != gp_seq) { + pr_err("INFO: Stall ended before state dump start\n"); + } else { + j = jiffies; + gpa = READ_ONCE(rcu_state.gp_activity); + pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n", + rcu_state.name, j - gpa, j, gpa, + READ_ONCE(jiffies_till_next_fqs), + rcu_get_root()->qsmask); + /* In this case, the current CPU might be at fault. */ + sched_show_task(current); + } + } + /* Rewrite if needed in case of slow consoles. */ + if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall))) + WRITE_ONCE(rcu_state.jiffies_stall, + jiffies + 3 * rcu_jiffies_till_stall_check() + 3); + + rcu_check_gp_kthread_starvation(); + + panic_on_rcu_stall(); + + rcu_force_quiescent_state(); /* Kick them all. */ +} + +static void print_cpu_stall(void) +{ + int cpu; + unsigned long flags; + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + struct rcu_node *rnp = rcu_get_root(); + long totqlen = 0; + + /* Kick and suppress, if so configured. */ + rcu_stall_kick_kthreads(); + if (rcu_cpu_stall_suppress) + return; + + /* + * OK, time to rat on ourselves... + * See Documentation/RCU/stallwarn.txt for info on how to debug + * RCU CPU stall warnings. + */ + pr_err("INFO: %s self-detected stall on CPU", rcu_state.name); + print_cpu_stall_info_begin(); + raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags); + print_cpu_stall_info(smp_processor_id()); + raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags); + print_cpu_stall_info_end(); + for_each_possible_cpu(cpu) + totqlen += rcu_get_n_cbs_cpu(cpu); + pr_cont(" (t=%lu jiffies g=%ld q=%lu)\n", + jiffies - rcu_state.gp_start, + (long)rcu_seq_current(&rcu_state.gp_seq), totqlen); + + rcu_check_gp_kthread_starvation(); + + rcu_dump_cpu_stacks(); + + raw_spin_lock_irqsave_rcu_node(rnp, flags); + /* Rewrite if needed in case of slow consoles. */ + if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall))) + WRITE_ONCE(rcu_state.jiffies_stall, + jiffies + 3 * rcu_jiffies_till_stall_check() + 3); + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + + panic_on_rcu_stall(); + + /* + * Attempt to revive the RCU machinery by forcing a context switch. + * + * A context switch would normally allow the RCU state machine to make + * progress and it could be we're stuck in kernel space without context + * switches for an entirely unreasonable amount of time. + */ + set_tsk_need_resched(current); + set_preempt_need_resched(); +} + +static void check_cpu_stall(struct rcu_data *rdp) +{ + unsigned long gs1; + unsigned long gs2; + unsigned long gps; + unsigned long j; + unsigned long jn; + unsigned long js; + struct rcu_node *rnp; + + if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) || + !rcu_gp_in_progress()) + return; + rcu_stall_kick_kthreads(); + j = jiffies; + + /* + * Lots of memory barriers to reject false positives. + * + * The idea is to pick up rcu_state.gp_seq, then + * rcu_state.jiffies_stall, then rcu_state.gp_start, and finally + * another copy of rcu_state.gp_seq. These values are updated in + * the opposite order with memory barriers (or equivalent) during + * grace-period initialization and cleanup. Now, a false positive + * can occur if we get an new value of rcu_state.gp_start and a old + * value of rcu_state.jiffies_stall. But given the memory barriers, + * the only way that this can happen is if one grace period ends + * and another starts between these two fetches. This is detected + * by comparing the second fetch of rcu_state.gp_seq with the + * previous fetch from rcu_state.gp_seq. + * + * Given this check, comparisons of jiffies, rcu_state.jiffies_stall, + * and rcu_state.gp_start suffice to forestall false positives. + */ + gs1 = READ_ONCE(rcu_state.gp_seq); + smp_rmb(); /* Pick up ->gp_seq first... */ + js = READ_ONCE(rcu_state.jiffies_stall); + smp_rmb(); /* ...then ->jiffies_stall before the rest... */ + gps = READ_ONCE(rcu_state.gp_start); + smp_rmb(); /* ...and finally ->gp_start before ->gp_seq again. */ + gs2 = READ_ONCE(rcu_state.gp_seq); + if (gs1 != gs2 || + ULONG_CMP_LT(j, js) || + ULONG_CMP_GE(gps, js)) + return; /* No stall or GP completed since entering function. */ + rnp = rdp->mynode; + jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; + if (rcu_gp_in_progress() && + (READ_ONCE(rnp->qsmask) & rdp->grpmask) && + cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { + + /* We haven't checked in, so go dump stack. */ + print_cpu_stall(); + + } else if (rcu_gp_in_progress() && + ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) && + cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { + + /* They had a few time units to dump stack, so complain. */ + print_other_cpu_stall(gs2); + } +} + +/** + * rcu_cpu_stall_reset - prevent further stall warnings in current grace period + * + * Set the stall-warning timeout way off into the future, thus preventing + * any RCU CPU stall-warning messages from appearing in the current set of + * RCU grace periods. + * + * The caller must disable hard irqs. + */ +void rcu_cpu_stall_reset(void) +{ + WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2); +} -- cgit v1.2.3-59-g8ed1b From 21d0d79ab051bf9facb9960a30e58b93a31c75a5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 11 Jan 2019 20:36:45 -0800 Subject: rcu: Inline RCU task stall-warning helper functions The rcu_print_detail_task_stall(), rcu_print_task_stall_begin(), and rcu_print_task_stall_end() functions were defined to allow long-gone Kconfig options to provide an abbreviated RCU CPU stall warning printout. This commit saves a few lines of code by inlining them into their sole callers. While in the area, a useless call of rcu_print_detail_task_stall_rnp() on the root rcu_node structure was eliminated. If there is only one rcu_node structure, its tasks get printed twice, but if there are more, the root rcu_node structure is guaranteed to have an empty list of blocked tasks, hence the uselessness. (Long ago, root rcu_node structures with non-empty ->blkd_tasks lists could happen, but no longer.) Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 1 - kernel/rcu/tree_stall.h | 36 +++++++----------------------------- 2 files changed, 7 insertions(+), 30 deletions(-) diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 3c4e26fff806..c6df9a13dd06 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -445,7 +445,6 @@ static void rcu_dynticks_task_enter(void); static void rcu_dynticks_task_exit(void); /* Forward declarations for tree_stall.h */ -static void rcu_print_detail_task_stall(void); static int rcu_print_task_stall(struct rcu_node *rnp); static void record_gp_stall_check_time(void); static void check_cpu_stall(struct rcu_data *rdp); diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index e0e73f493363..b476786b8ef7 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -94,30 +94,6 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } -/* - * Dump detailed information for all tasks blocking the current RCU - * grace period. - */ -static void rcu_print_detail_task_stall(void) -{ - struct rcu_node *rnp = rcu_get_root(); - - rcu_print_detail_task_stall_rnp(rnp); - rcu_for_each_leaf_node(rnp) - rcu_print_detail_task_stall_rnp(rnp); -} - -static void rcu_print_task_stall_begin(struct rcu_node *rnp) -{ - pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", - rnp->level, rnp->grplo, rnp->grphi); -} - -static void rcu_print_task_stall_end(void) -{ - pr_cont("\n"); -} - /* * Scan the current list of tasks blocked within RCU read-side critical * sections, printing out the tid of each. @@ -129,14 +105,15 @@ static int rcu_print_task_stall(struct rcu_node *rnp) if (!rcu_preempt_blocked_readers_cgp(rnp)) return 0; - rcu_print_task_stall_begin(rnp); + pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", + rnp->level, rnp->grplo, rnp->grphi); t = list_entry(rnp->gp_tasks->prev, struct task_struct, rcu_node_entry); list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { pr_cont(" P%d", t->pid); ndetected++; } - rcu_print_task_stall_end(); + pr_cont("\n"); return ndetected; } @@ -146,7 +123,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp) * Because preemptible RCU does not exist, we never have to check for * tasks blocked within RCU read-side critical sections. */ -static void rcu_print_detail_task_stall(void) +static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) { } @@ -253,7 +230,7 @@ static void print_other_cpu_stall(unsigned long gp_seq) unsigned long gpa; unsigned long j; int ndetected = 0; - struct rcu_node *rnp = rcu_get_root(); + struct rcu_node *rnp; long totqlen = 0; /* Kick and suppress, if so configured. */ @@ -291,7 +268,8 @@ static void print_other_cpu_stall(unsigned long gp_seq) rcu_dump_cpu_stacks(); /* Complain about tasks blocking the grace period. */ - rcu_print_detail_task_stall(); + rcu_for_each_leaf_node(rnp) + rcu_print_detail_task_stall_rnp(rnp); } else { if (rcu_seq_current(&rcu_state.gp_seq) != gp_seq) { pr_err("INFO: Stall ended before state dump start\n"); -- cgit v1.2.3-59-g8ed1b From d87cda5094585b7a0f62075de68266cb9c1b35ca Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 11 Jan 2019 20:51:49 -0800 Subject: rcu: Move rcu_print_task_exp_stall() to tree_exp.h Because expedited CPU stall warnings are contained within the kernel/rcu/tree_exp.h file, rcu_print_task_exp_stall() should live there too. This commit carries out the required code motion. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 32 ++++++++++++++++++++++++++++++++ kernel/rcu/tree_plugin.h | 31 ------------------------------- 2 files changed, 32 insertions(+), 31 deletions(-) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 4c2a0189e748..7be3e085ddd6 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -10,6 +10,7 @@ #include static void rcu_exp_handler(void *unused); +static int rcu_print_task_exp_stall(struct rcu_node *rnp); /* * Record the start of an expedited grace period. @@ -670,6 +671,27 @@ static void sync_sched_exp_online_cleanup(int cpu) { } +/* + * Scan the current list of tasks blocked within RCU read-side critical + * sections, printing out the tid of each that is blocking the current + * expedited grace period. + */ +static int rcu_print_task_exp_stall(struct rcu_node *rnp) +{ + struct task_struct *t; + int ndetected = 0; + + if (!rnp->exp_tasks) + return 0; + t = list_entry(rnp->exp_tasks->prev, + struct task_struct, rcu_node_entry); + list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { + pr_cont(" P%d", t->pid); + ndetected++; + } + return ndetected; +} + #else /* #ifdef CONFIG_PREEMPT_RCU */ /* Invoked on each online non-idle CPU for expedited quiescent state. */ @@ -709,6 +731,16 @@ static void sync_sched_exp_online_cleanup(int cpu) WARN_ON_ONCE(ret); } +/* + * Because preemptible RCU does not exist, we never have to check for + * tasks blocked within RCU read-side critical sections that are + * blocking the current expedited grace period. + */ +static int rcu_print_task_exp_stall(struct rcu_node *rnp) +{ + return 0; +} + #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ /** diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 7fa3bc4d481b..72519c57f656 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -642,27 +642,6 @@ static void rcu_read_unlock_special(struct task_struct *t) rcu_preempt_deferred_qs_irqrestore(t, flags); } -/* - * Scan the current list of tasks blocked within RCU read-side critical - * sections, printing out the tid of each that is blocking the current - * expedited grace period. - */ -static int rcu_print_task_exp_stall(struct rcu_node *rnp) -{ - struct task_struct *t; - int ndetected = 0; - - if (!rnp->exp_tasks) - return 0; - t = list_entry(rnp->exp_tasks->prev, - struct task_struct, rcu_node_entry); - list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { - pr_cont(" P%d", t->pid); - ndetected++; - } - return ndetected; -} - /* * Check that the list of blocked tasks for the newly completed grace * period is in fact empty. It is a serious bug to complete a grace @@ -906,16 +885,6 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t) } static void rcu_preempt_deferred_qs(struct task_struct *t) { } -/* - * Because preemptible RCU does not exist, we never have to check for - * tasks blocked within RCU read-side critical sections that are - * blocking the current expedited grace period. - */ -static int rcu_print_task_exp_stall(struct rcu_node *rnp) -{ - return 0; -} - /* * Because there is no preemptible RCU, there can be no readers blocked, * so there is no need to check for blocked tasks. So check only for -- cgit v1.2.3-59-g8ed1b From 40e69ac7d0c5a19ea14656bc3131c55719baec96 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 11 Jan 2019 20:58:58 -0800 Subject: rcu: Inline RCU stall-warning info helper functions The print_cpu_stall_info_begin() and print_cpu_stall_info_end() print a single character each onto the console, and are a holdover from a time when RCU CPU stall warning messages could be abbreviated using a long-gone Kconfig option. This commit therefore adds these single characters to already-printed strings in the calling functions, and then eliminates both print_cpu_stall_info_begin() and print_cpu_stall_info_end(). Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 2 -- kernel/rcu/tree_plugin.h | 12 ------------ kernel/rcu/tree_stall.h | 12 ++++-------- 3 files changed, 4 insertions(+), 22 deletions(-) diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index c6df9a13dd06..d73472af49e7 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -416,9 +416,7 @@ static void rcu_prepare_for_idle(void); static bool rcu_preempt_has_tasks(struct rcu_node *rnp); static bool rcu_preempt_need_deferred_qs(struct task_struct *t); static void rcu_preempt_deferred_qs(struct task_struct *t); -static void print_cpu_stall_info_begin(void); static void print_cpu_stall_info(int cpu); -static void print_cpu_stall_info_end(void); static void zero_cpu_stall_ticks(struct rcu_data *rdp); static bool rcu_nocb_cpu_needs_barrier(int cpu); static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 72519c57f656..2df5bb04fd7a 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1550,12 +1550,6 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu) #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ -/* Initiate the stall-info list. */ -static void print_cpu_stall_info_begin(void) -{ - pr_cont("\n"); -} - /* * Print out diagnostic information for the specified stalled CPU. * @@ -1606,12 +1600,6 @@ static void print_cpu_stall_info(int cpu) fast_no_hz); } -/* Terminate the stall-info list. */ -static void print_cpu_stall_info_end(void) -{ - pr_err("\t"); -} - /* Zero ->ticks_this_gp and snapshot the number of RCU softirq handlers. */ static void zero_cpu_stall_ticks(struct rcu_data *rdp) { diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index b476786b8ef7..7ef3b596e45f 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -243,8 +243,7 @@ static void print_other_cpu_stall(unsigned long gp_seq) * See Documentation/RCU/stallwarn.txt for info on how to debug * RCU CPU stall warnings. */ - pr_err("INFO: %s detected stalls on CPUs/tasks:", rcu_state.name); - print_cpu_stall_info_begin(); + pr_err("INFO: %s detected stalls on CPUs/tasks:\n", rcu_state.name); rcu_for_each_leaf_node(rnp) { raw_spin_lock_irqsave_rcu_node(rnp, flags); ndetected += rcu_print_task_stall(rnp); @@ -258,10 +257,9 @@ static void print_other_cpu_stall(unsigned long gp_seq) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } - print_cpu_stall_info_end(); for_each_possible_cpu(cpu) totqlen += rcu_get_n_cbs_cpu(cpu); - pr_cont("(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n", + pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n", smp_processor_id(), (long)(jiffies - rcu_state.gp_start), (long)rcu_seq_current(&rcu_state.gp_seq), totqlen); if (ndetected) { @@ -314,15 +312,13 @@ static void print_cpu_stall(void) * See Documentation/RCU/stallwarn.txt for info on how to debug * RCU CPU stall warnings. */ - pr_err("INFO: %s self-detected stall on CPU", rcu_state.name); - print_cpu_stall_info_begin(); + pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name); raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags); print_cpu_stall_info(smp_processor_id()); raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags); - print_cpu_stall_info_end(); for_each_possible_cpu(cpu) totqlen += rcu_get_n_cbs_cpu(cpu); - pr_cont(" (t=%lu jiffies g=%ld q=%lu)\n", + pr_cont("\t(t=%lu jiffies g=%ld q=%lu)\n", jiffies - rcu_state.gp_start, (long)rcu_seq_current(&rcu_state.gp_seq), totqlen); -- cgit v1.2.3-59-g8ed1b From 59b73a27681c5841440391f970a9a085228ba975 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 11 Jan 2019 21:05:17 -0800 Subject: rcu: Move FAST_NO_HZ stall-warning code to tree_stall.h This commit further consolidates the stall-warning code by moving print_cpu_stall_info() and its helper functions along with zero_cpu_stall_ticks() to kernel/rcu/tree_stall.h. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 1 - kernel/rcu/tree_plugin.h | 80 ------------------------------------------------ kernel/rcu/tree_stall.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 81 deletions(-) diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index d73472af49e7..49bf3b00bb50 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -416,7 +416,6 @@ static void rcu_prepare_for_idle(void); static bool rcu_preempt_has_tasks(struct rcu_node *rnp); static bool rcu_preempt_need_deferred_qs(struct task_struct *t); static void rcu_preempt_deferred_qs(struct task_struct *t); -static void print_cpu_stall_info(int cpu); static void zero_cpu_stall_ticks(struct rcu_data *rdp); static bool rcu_nocb_cpu_needs_barrier(int cpu); static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 2df5bb04fd7a..a1f9d7c15bd8 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1528,86 +1528,6 @@ static void rcu_cleanup_after_idle(void) #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ -#ifdef CONFIG_RCU_FAST_NO_HZ - -static void print_cpu_stall_fast_no_hz(char *cp, int cpu) -{ - struct rcu_data *rdp = &per_cpu(rcu_data, cpu); - - sprintf(cp, "last_accelerate: %04lx/%04lx, Nonlazy posted: %c%c%c", - rdp->last_accelerate & 0xffff, jiffies & 0xffff, - ".l"[rdp->all_lazy], - ".L"[!rcu_segcblist_n_nonlazy_cbs(&rdp->cblist)], - ".D"[!rdp->tick_nohz_enabled_snap]); -} - -#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ - -static void print_cpu_stall_fast_no_hz(char *cp, int cpu) -{ - *cp = '\0'; -} - -#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ - -/* - * Print out diagnostic information for the specified stalled CPU. - * - * If the specified CPU is aware of the current RCU grace period, then - * print the number of scheduling clock interrupts the CPU has taken - * during the time that it has been aware. Otherwise, print the number - * of RCU grace periods that this CPU is ignorant of, for example, "1" - * if the CPU was aware of the previous grace period. - * - * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info. - */ -static void print_cpu_stall_info(int cpu) -{ - unsigned long delta; - char fast_no_hz[72]; - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); - char *ticks_title; - unsigned long ticks_value; - - /* - * We could be printing a lot while holding a spinlock. Avoid - * triggering hard lockup. - */ - touch_nmi_watchdog(); - - ticks_value = rcu_seq_ctr(rcu_state.gp_seq - rdp->gp_seq); - if (ticks_value) { - ticks_title = "GPs behind"; - } else { - ticks_title = "ticks this GP"; - ticks_value = rdp->ticks_this_gp; - } - print_cpu_stall_fast_no_hz(fast_no_hz, cpu); - delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq); - pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n", - cpu, - "O."[!!cpu_online(cpu)], - "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)], - "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)], - !IS_ENABLED(CONFIG_IRQ_WORK) ? '?' : - rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' : - "!."[!delta], - ticks_value, ticks_title, - rcu_dynticks_snap(rdp) & 0xfff, - rdp->dynticks_nesting, rdp->dynticks_nmi_nesting, - rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), - READ_ONCE(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart, - fast_no_hz); -} - -/* Zero ->ticks_this_gp and snapshot the number of RCU softirq handlers. */ -static void zero_cpu_stall_ticks(struct rcu_data *rdp) -{ - rdp->ticks_this_gp = 0; - rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id()); - WRITE_ONCE(rdp->last_fqs_resched, jiffies); -} - #ifdef CONFIG_RCU_NOCB_CPU /* diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 7ef3b596e45f..19b915380a6f 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -223,6 +223,86 @@ static void panic_on_rcu_stall(void) panic("RCU Stall\n"); } +#ifdef CONFIG_RCU_FAST_NO_HZ + +static void print_cpu_stall_fast_no_hz(char *cp, int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + + sprintf(cp, "last_accelerate: %04lx/%04lx, Nonlazy posted: %c%c%c", + rdp->last_accelerate & 0xffff, jiffies & 0xffff, + ".l"[rdp->all_lazy], + ".L"[!rcu_segcblist_n_nonlazy_cbs(&rdp->cblist)], + ".D"[!rdp->tick_nohz_enabled_snap]); +} + +#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ + +static void print_cpu_stall_fast_no_hz(char *cp, int cpu) +{ + *cp = '\0'; +} + +#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ + +/* + * Print out diagnostic information for the specified stalled CPU. + * + * If the specified CPU is aware of the current RCU grace period, then + * print the number of scheduling clock interrupts the CPU has taken + * during the time that it has been aware. Otherwise, print the number + * of RCU grace periods that this CPU is ignorant of, for example, "1" + * if the CPU was aware of the previous grace period. + * + * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info. + */ +static void print_cpu_stall_info(int cpu) +{ + unsigned long delta; + char fast_no_hz[72]; + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + char *ticks_title; + unsigned long ticks_value; + + /* + * We could be printing a lot while holding a spinlock. Avoid + * triggering hard lockup. + */ + touch_nmi_watchdog(); + + ticks_value = rcu_seq_ctr(rcu_state.gp_seq - rdp->gp_seq); + if (ticks_value) { + ticks_title = "GPs behind"; + } else { + ticks_title = "ticks this GP"; + ticks_value = rdp->ticks_this_gp; + } + print_cpu_stall_fast_no_hz(fast_no_hz, cpu); + delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq); + pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n", + cpu, + "O."[!!cpu_online(cpu)], + "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)], + "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)], + !IS_ENABLED(CONFIG_IRQ_WORK) ? '?' : + rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' : + "!."[!delta], + ticks_value, ticks_title, + rcu_dynticks_snap(rdp) & 0xfff, + rdp->dynticks_nesting, rdp->dynticks_nmi_nesting, + rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), + READ_ONCE(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart, + fast_no_hz); +} + +/* Zero ->ticks_this_gp and snapshot the number of RCU softirq handlers. */ +static void zero_cpu_stall_ticks(struct rcu_data *rdp) +{ + rdp->ticks_this_gp = 0; + rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id()); + WRITE_ONCE(rdp->last_fqs_resched, jiffies); +} + static void print_other_cpu_stall(unsigned long gp_seq) { int cpu; -- cgit v1.2.3-59-g8ed1b From e23344c2ca42d0083596bb39964675bef00ad691 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 12 Jan 2019 09:35:44 -0800 Subject: rcu: Organize functions in tree_stall.h This commit does only code movement, removal of now-unneeded forward declarations, and addition of comments. It organizes the functions that implement RCU CPU stall warnings for normal grace periods into three categories: 1. Control of RCU CPU stall warnings, including computing timeouts. 2. Interaction of stall warnings with grace periods. 3. Actual printing of the RCU CPU stall-warning messages. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 1 - kernel/rcu/tree_stall.h | 180 ++++++++++++++++++++++++++---------------------- 2 files changed, 97 insertions(+), 84 deletions(-) diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 49bf3b00bb50..099410dbcbe9 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -442,6 +442,5 @@ static void rcu_dynticks_task_enter(void); static void rcu_dynticks_task_exit(void); /* Forward declarations for tree_stall.h */ -static int rcu_print_task_stall(struct rcu_node *rnp); static void record_gp_stall_check_time(void); static void check_cpu_stall(struct rcu_data *rdp); diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 19b915380a6f..03ed47883d8a 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -7,6 +7,9 @@ * Author: Paul E. McKenney */ +////////////////////////////////////////////////////////////////////////////// +// +// Controlling CPU stall warnings, including delay calculation. /* panic() on RCU Stall sysctl. */ int sysctl_panic_on_rcu_stall __read_mostly; @@ -17,6 +20,7 @@ int sysctl_panic_on_rcu_stall __read_mostly; #define RCU_STALL_DELAY_DELTA 0 #endif +/* Limit-check stall timeouts specified at boottime and runtime. */ int rcu_jiffies_till_stall_check(void) { int till_stall_check = READ_ONCE(rcu_cpu_stall_timeout); @@ -36,6 +40,7 @@ int rcu_jiffies_till_stall_check(void) } EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check); +/* Don't do RCU CPU stall warnings during long sysrq printouts. */ void rcu_sysrq_start(void) { if (!rcu_cpu_stall_suppress) @@ -48,6 +53,7 @@ void rcu_sysrq_end(void) rcu_cpu_stall_suppress = 0; } +/* Don't print RCU CPU stall warnings during a kernel panic. */ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) { rcu_cpu_stall_suppress = 1; @@ -65,6 +71,78 @@ static int __init check_cpu_stall_init(void) } early_initcall(check_cpu_stall_init); +/* If so specified via sysctl, panic, yielding cleaner stall-warning output. */ +static void panic_on_rcu_stall(void) +{ + if (sysctl_panic_on_rcu_stall) + panic("RCU Stall\n"); +} + +/** + * rcu_cpu_stall_reset - prevent further stall warnings in current grace period + * + * Set the stall-warning timeout way off into the future, thus preventing + * any RCU CPU stall-warning messages from appearing in the current set of + * RCU grace periods. + * + * The caller must disable hard irqs. + */ +void rcu_cpu_stall_reset(void) +{ + WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2); +} + +////////////////////////////////////////////////////////////////////////////// +// +// Interaction with RCU grace periods + +/* Start of new grace period, so record stall time (and forcing times). */ +static void record_gp_stall_check_time(void) +{ + unsigned long j = jiffies; + unsigned long j1; + + rcu_state.gp_start = j; + j1 = rcu_jiffies_till_stall_check(); + /* Record ->gp_start before ->jiffies_stall. */ + smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */ + rcu_state.jiffies_resched = j + j1 / 2; + rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs); +} + +/* Zero ->ticks_this_gp and snapshot the number of RCU softirq handlers. */ +static void zero_cpu_stall_ticks(struct rcu_data *rdp) +{ + rdp->ticks_this_gp = 0; + rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id()); + WRITE_ONCE(rdp->last_fqs_resched, jiffies); +} + +/* + * If too much time has passed in the current grace period, and if + * so configured, go kick the relevant kthreads. + */ +static void rcu_stall_kick_kthreads(void) +{ + unsigned long j; + + if (!rcu_kick_kthreads) + return; + j = READ_ONCE(rcu_state.jiffies_kick_kthreads); + if (time_after(jiffies, j) && rcu_state.gp_kthread && + (rcu_gp_in_progress() || READ_ONCE(rcu_state.gp_flags))) { + WARN_ONCE(1, "Kicking %s grace-period kthread\n", + rcu_state.name); + rcu_ftrace_dump(DUMP_ALL); + wake_up_process(rcu_state.gp_kthread); + WRITE_ONCE(rcu_state.jiffies_kick_kthreads, j + HZ); + } +} + +////////////////////////////////////////////////////////////////////////////// +// +// Printing RCU CPU stall warnings + #ifdef CONFIG_PREEMPT /* @@ -137,43 +215,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp) } #endif /* #else #ifdef CONFIG_PREEMPT */ -static void record_gp_stall_check_time(void) -{ - unsigned long j = jiffies; - unsigned long j1; - - rcu_state.gp_start = j; - j1 = rcu_jiffies_till_stall_check(); - /* Record ->gp_start before ->jiffies_stall. */ - smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */ - rcu_state.jiffies_resched = j + j1 / 2; - rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs); -} - -/* - * Complain about starvation of grace-period kthread. - */ -static void rcu_check_gp_kthread_starvation(void) -{ - struct task_struct *gpk = rcu_state.gp_kthread; - unsigned long j; - - j = jiffies - READ_ONCE(rcu_state.gp_activity); - if (j > 2 * HZ) { - pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n", - rcu_state.name, j, - (long)rcu_seq_current(&rcu_state.gp_seq), - READ_ONCE(rcu_state.gp_flags), - gp_state_getname(rcu_state.gp_state), rcu_state.gp_state, - gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1); - if (gpk) { - pr_err("RCU grace-period kthread stack dump:\n"); - sched_show_task(gpk); - wake_up_process(gpk); - } - } -} - /* * Dump stacks of all tasks running on stalled CPUs. First try using * NMIs, but fall back to manual remote stack tracing on architectures @@ -196,33 +237,6 @@ static void rcu_dump_cpu_stacks(void) } } -/* - * If too much time has passed in the current grace period, and if - * so configured, go kick the relevant kthreads. - */ -static void rcu_stall_kick_kthreads(void) -{ - unsigned long j; - - if (!rcu_kick_kthreads) - return; - j = READ_ONCE(rcu_state.jiffies_kick_kthreads); - if (time_after(jiffies, j) && rcu_state.gp_kthread && - (rcu_gp_in_progress() || READ_ONCE(rcu_state.gp_flags))) { - WARN_ONCE(1, "Kicking %s grace-period kthread\n", - rcu_state.name); - rcu_ftrace_dump(DUMP_ALL); - wake_up_process(rcu_state.gp_kthread); - WRITE_ONCE(rcu_state.jiffies_kick_kthreads, j + HZ); - } -} - -static void panic_on_rcu_stall(void) -{ - if (sysctl_panic_on_rcu_stall) - panic("RCU Stall\n"); -} - #ifdef CONFIG_RCU_FAST_NO_HZ static void print_cpu_stall_fast_no_hz(char *cp, int cpu) @@ -295,12 +309,26 @@ static void print_cpu_stall_info(int cpu) fast_no_hz); } -/* Zero ->ticks_this_gp and snapshot the number of RCU softirq handlers. */ -static void zero_cpu_stall_ticks(struct rcu_data *rdp) +/* Complain about starvation of grace-period kthread. */ +static void rcu_check_gp_kthread_starvation(void) { - rdp->ticks_this_gp = 0; - rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id()); - WRITE_ONCE(rdp->last_fqs_resched, jiffies); + struct task_struct *gpk = rcu_state.gp_kthread; + unsigned long j; + + j = jiffies - READ_ONCE(rcu_state.gp_activity); + if (j > 2 * HZ) { + pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n", + rcu_state.name, j, + (long)rcu_seq_current(&rcu_state.gp_seq), + READ_ONCE(rcu_state.gp_flags), + gp_state_getname(rcu_state.gp_state), rcu_state.gp_state, + gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1); + if (gpk) { + pr_err("RCU grace-period kthread stack dump:\n"); + sched_show_task(gpk); + wake_up_process(gpk); + } + } } static void print_other_cpu_stall(unsigned long gp_seq) @@ -488,17 +516,3 @@ static void check_cpu_stall(struct rcu_data *rdp) print_other_cpu_stall(gs2); } } - -/** - * rcu_cpu_stall_reset - prevent further stall warnings in current grace period - * - * Set the stall-warning timeout way off into the future, thus preventing - * any RCU CPU stall-warning messages from appearing in the current set of - * RCU grace periods. - * - * The caller must disable hard irqs. - */ -void rcu_cpu_stall_reset(void) -{ - WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2); -} -- cgit v1.2.3-59-g8ed1b From 7ac1907c9e7ba5f80d3c298fe9d2dbf620566a49 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 14 Jan 2019 10:19:20 -0800 Subject: rcu: Move irq-disabled stall-warning checking to tree_stall.h The rcu_iw_handler() function's sole purpose in life is to indicate whether a stalled CPU had interrupts disabled, so it belongs in kernel/rcu/tree_stall.h. This commit therefore makes that move, clarifying its header comment while in the area. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 21 --------------------- kernel/rcu/tree.h | 1 + kernel/rcu/tree_stall.h | 20 ++++++++++++++++++++ 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 001dd05f6e38..929531ed168c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1031,27 +1031,6 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) return 0; } -/* - * Handler for the irq_work request posted when a grace period has - * gone on for too long, but not yet long enough for an RCU CPU - * stall warning. Set state appropriately, but just complain if - * there is unexpected state on entry. - */ -static void rcu_iw_handler(struct irq_work *iwp) -{ - struct rcu_data *rdp; - struct rcu_node *rnp; - - rdp = container_of(iwp, struct rcu_data, rcu_iw); - rnp = rdp->mynode; - raw_spin_lock_rcu_node(rnp); - if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) { - rdp->rcu_iw_gp_seq = rnp->gp_seq; - rdp->rcu_iw_pending = false; - } - raw_spin_unlock_rcu_node(rnp); -} - /* * Return true if the specified CPU has passed through a quiescent * state by virtue of being in or having passed through an dynticks diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 099410dbcbe9..f882ce3ca5a5 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -443,4 +443,5 @@ static void rcu_dynticks_task_exit(void); /* Forward declarations for tree_stall.h */ static void record_gp_stall_check_time(void); +static void rcu_iw_handler(struct irq_work *iwp); static void check_cpu_stall(struct rcu_data *rdp); diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 03ed47883d8a..526e223e41ce 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -139,6 +139,26 @@ static void rcu_stall_kick_kthreads(void) } } +/* + * Handler for the irq_work request posted about halfway into the RCU CPU + * stall timeout, and used to detect excessive irq disabling. Set state + * appropriately, but just complain if there is unexpected state on entry. + */ +static void rcu_iw_handler(struct irq_work *iwp) +{ + struct rcu_data *rdp; + struct rcu_node *rnp; + + rdp = container_of(iwp, struct rcu_data, rcu_iw); + rnp = rdp->mynode; + raw_spin_lock_rcu_node(rnp); + if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) { + rdp->rcu_iw_gp_seq = rnp->gp_seq; + rdp->rcu_iw_pending = false; + } + raw_spin_unlock_rcu_node(rnp); +} + ////////////////////////////////////////////////////////////////////////////// // // Printing RCU CPU stall warnings -- cgit v1.2.3-59-g8ed1b From b51bcbbf16ef0ea352e8b924dd8638112e4037a5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 15 Jan 2019 07:01:33 -0800 Subject: rcu: Move forward-progress checkers into tree_stall.h This commit further consolidates stall-warning functionality by moving forward-progress checkers into kernel/rcu/tree_stall.h, updating a comment or two while in the area. More specifically, this commit moves show_rcu_gp_kthreads(), rcu_check_gp_start_stall(), rcu_fwd_progress_check(), sysrq_rcu, sysrq_show_rcu(), sysrq_rcudump_op, and rcu_sysrq_init() from kernel/rcu/tree.c to kernel/rcu/tree_stall.h. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 166 ---------------------------------------------- kernel/rcu/tree.h | 2 + kernel/rcu/tree_stall.h | 171 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 173 insertions(+), 166 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 929531ed168c..4cb1ebd93b0c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -102,9 +102,6 @@ int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; /* Number of rcu_nodes at specified level. */ int num_rcu_lvl[] = NUM_RCU_LVL_INIT; int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ -/* Commandeer a sysrq key to dump RCU's tree. */ -static bool sysrq_rcu; -module_param(sysrq_rcu, bool, 0444); /* * The rcu_scheduler_active variable is initialized to the value @@ -510,74 +507,6 @@ static const char *gp_state_getname(short gs) return gp_state_names[gs]; } -/* - * Show the state of the grace-period kthreads. - */ -void show_rcu_gp_kthreads(void) -{ - int cpu; - unsigned long j; - unsigned long ja; - unsigned long jr; - unsigned long jw; - struct rcu_data *rdp; - struct rcu_node *rnp; - - j = jiffies; - ja = j - READ_ONCE(rcu_state.gp_activity); - jr = j - READ_ONCE(rcu_state.gp_req_activity); - jw = j - READ_ONCE(rcu_state.gp_wake_time); - pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n", - rcu_state.name, gp_state_getname(rcu_state.gp_state), - rcu_state.gp_state, - rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL, - ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq), - (long)READ_ONCE(rcu_state.gp_seq), - (long)READ_ONCE(rcu_get_root()->gp_seq_needed), - READ_ONCE(rcu_state.gp_flags)); - rcu_for_each_node_breadth_first(rnp) { - if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed)) - continue; - pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n", - rnp->grplo, rnp->grphi, (long)rnp->gp_seq, - (long)rnp->gp_seq_needed); - if (!rcu_is_leaf_node(rnp)) - continue; - for_each_leaf_node_possible_cpu(rnp, cpu) { - rdp = per_cpu_ptr(&rcu_data, cpu); - if (rdp->gpwrap || - ULONG_CMP_GE(rcu_state.gp_seq, - rdp->gp_seq_needed)) - continue; - pr_info("\tcpu %d ->gp_seq_needed %ld\n", - cpu, (long)rdp->gp_seq_needed); - } - } - /* sched_show_task(rcu_state.gp_kthread); */ -} -EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads); - -/* Dump grace-period-request information due to commandeered sysrq. */ -static void sysrq_show_rcu(int key) -{ - show_rcu_gp_kthreads(); -} - -static struct sysrq_key_op sysrq_rcudump_op = { - .handler = sysrq_show_rcu, - .help_msg = "show-rcu(y)", - .action_msg = "Show RCU tree", - .enable_mask = SYSRQ_ENABLE_DUMP, -}; - -static int __init rcu_sysrq_init(void) -{ - if (sysrq_rcu) - return register_sysrq_key('y', &sysrq_rcudump_op); - return 0; -} -early_initcall(rcu_sysrq_init); - /* * Send along grace-period-related data for rcutorture diagnostics. */ @@ -2323,101 +2252,6 @@ void rcu_force_quiescent_state(void) } EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); -/* - * This function checks for grace-period requests that fail to motivate - * RCU to come out of its idle mode. - */ -void -rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp, - const unsigned long gpssdelay) -{ - unsigned long flags; - unsigned long j; - struct rcu_node *rnp_root = rcu_get_root(); - static atomic_t warned = ATOMIC_INIT(0); - - if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress() || - ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed)) - return; - j = jiffies; /* Expensive access, and in common case don't get here. */ - if (time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) || - time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) || - atomic_read(&warned)) - return; - - raw_spin_lock_irqsave_rcu_node(rnp, flags); - j = jiffies; - if (rcu_gp_in_progress() || - ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) || - time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) || - time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) || - atomic_read(&warned)) { - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - return; - } - /* Hold onto the leaf lock to make others see warned==1. */ - - if (rnp_root != rnp) - raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ - j = jiffies; - if (rcu_gp_in_progress() || - ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) || - time_before(j, rcu_state.gp_req_activity + gpssdelay) || - time_before(j, rcu_state.gp_activity + gpssdelay) || - atomic_xchg(&warned, 1)) { - raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */ - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - return; - } - WARN_ON(1); - if (rnp_root != rnp) - raw_spin_unlock_rcu_node(rnp_root); - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - show_rcu_gp_kthreads(); -} - -/* - * Do a forward-progress check for rcutorture. This is normally invoked - * due to an OOM event. The argument "j" gives the time period during - * which rcutorture would like progress to have been made. - */ -void rcu_fwd_progress_check(unsigned long j) -{ - unsigned long cbs; - int cpu; - unsigned long max_cbs = 0; - int max_cpu = -1; - struct rcu_data *rdp; - - if (rcu_gp_in_progress()) { - pr_info("%s: GP age %lu jiffies\n", - __func__, jiffies - rcu_state.gp_start); - show_rcu_gp_kthreads(); - } else { - pr_info("%s: Last GP end %lu jiffies ago\n", - __func__, jiffies - rcu_state.gp_end); - preempt_disable(); - rdp = this_cpu_ptr(&rcu_data); - rcu_check_gp_start_stall(rdp->mynode, rdp, j); - preempt_enable(); - } - for_each_possible_cpu(cpu) { - cbs = rcu_get_n_cbs_cpu(cpu); - if (!cbs) - continue; - if (max_cpu < 0) - pr_info("%s: callbacks", __func__); - pr_cont(" %d: %lu", cpu, cbs); - if (cbs <= max_cbs) - continue; - max_cbs = cbs; - max_cpu = cpu; - } - if (max_cpu >= 0) - pr_cont("\n"); -} -EXPORT_SYMBOL_GPL(rcu_fwd_progress_check); - /* Perform RCU core processing work for the current CPU. */ static __latent_entropy void rcu_core(struct softirq_action *unused) { diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index f882ce3ca5a5..e253d11af3c4 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -445,3 +445,5 @@ static void rcu_dynticks_task_exit(void); static void record_gp_stall_check_time(void); static void rcu_iw_handler(struct irq_work *iwp); static void check_cpu_stall(struct rcu_data *rdp); +static void rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp, + const unsigned long gpssdelay); diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 526e223e41ce..9e3db08d02bc 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -536,3 +536,174 @@ static void check_cpu_stall(struct rcu_data *rdp) print_other_cpu_stall(gs2); } } + +////////////////////////////////////////////////////////////////////////////// +// +// RCU forward-progress mechanisms, including of callback invocation. + + +/* + * Show the state of the grace-period kthreads. + */ +void show_rcu_gp_kthreads(void) +{ + int cpu; + unsigned long j; + unsigned long ja; + unsigned long jr; + unsigned long jw; + struct rcu_data *rdp; + struct rcu_node *rnp; + + j = jiffies; + ja = j - READ_ONCE(rcu_state.gp_activity); + jr = j - READ_ONCE(rcu_state.gp_req_activity); + jw = j - READ_ONCE(rcu_state.gp_wake_time); + pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n", + rcu_state.name, gp_state_getname(rcu_state.gp_state), + rcu_state.gp_state, + rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL, + ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq), + (long)READ_ONCE(rcu_state.gp_seq), + (long)READ_ONCE(rcu_get_root()->gp_seq_needed), + READ_ONCE(rcu_state.gp_flags)); + rcu_for_each_node_breadth_first(rnp) { + if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed)) + continue; + pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n", + rnp->grplo, rnp->grphi, (long)rnp->gp_seq, + (long)rnp->gp_seq_needed); + if (!rcu_is_leaf_node(rnp)) + continue; + for_each_leaf_node_possible_cpu(rnp, cpu) { + rdp = per_cpu_ptr(&rcu_data, cpu); + if (rdp->gpwrap || + ULONG_CMP_GE(rcu_state.gp_seq, + rdp->gp_seq_needed)) + continue; + pr_info("\tcpu %d ->gp_seq_needed %ld\n", + cpu, (long)rdp->gp_seq_needed); + } + } + /* sched_show_task(rcu_state.gp_kthread); */ +} +EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads); + +/* + * This function checks for grace-period requests that fail to motivate + * RCU to come out of its idle mode. + */ +static void rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp, + const unsigned long gpssdelay) +{ + unsigned long flags; + unsigned long j; + struct rcu_node *rnp_root = rcu_get_root(); + static atomic_t warned = ATOMIC_INIT(0); + + if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress() || + ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed)) + return; + j = jiffies; /* Expensive access, and in common case don't get here. */ + if (time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) || + time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) || + atomic_read(&warned)) + return; + + raw_spin_lock_irqsave_rcu_node(rnp, flags); + j = jiffies; + if (rcu_gp_in_progress() || + ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) || + time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) || + time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) || + atomic_read(&warned)) { + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + return; + } + /* Hold onto the leaf lock to make others see warned==1. */ + + if (rnp_root != rnp) + raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ + j = jiffies; + if (rcu_gp_in_progress() || + ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) || + time_before(j, rcu_state.gp_req_activity + gpssdelay) || + time_before(j, rcu_state.gp_activity + gpssdelay) || + atomic_xchg(&warned, 1)) { + raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */ + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + return; + } + WARN_ON(1); + if (rnp_root != rnp) + raw_spin_unlock_rcu_node(rnp_root); + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + show_rcu_gp_kthreads(); +} + +/* + * Do a forward-progress check for rcutorture. This is normally invoked + * due to an OOM event. The argument "j" gives the time period during + * which rcutorture would like progress to have been made. + */ +void rcu_fwd_progress_check(unsigned long j) +{ + unsigned long cbs; + int cpu; + unsigned long max_cbs = 0; + int max_cpu = -1; + struct rcu_data *rdp; + + if (rcu_gp_in_progress()) { + pr_info("%s: GP age %lu jiffies\n", + __func__, jiffies - rcu_state.gp_start); + show_rcu_gp_kthreads(); + } else { + pr_info("%s: Last GP end %lu jiffies ago\n", + __func__, jiffies - rcu_state.gp_end); + preempt_disable(); + rdp = this_cpu_ptr(&rcu_data); + rcu_check_gp_start_stall(rdp->mynode, rdp, j); + preempt_enable(); + } + for_each_possible_cpu(cpu) { + cbs = rcu_get_n_cbs_cpu(cpu); + if (!cbs) + continue; + if (max_cpu < 0) + pr_info("%s: callbacks", __func__); + pr_cont(" %d: %lu", cpu, cbs); + if (cbs <= max_cbs) + continue; + max_cbs = cbs; + max_cpu = cpu; + } + if (max_cpu >= 0) + pr_cont("\n"); +} +EXPORT_SYMBOL_GPL(rcu_fwd_progress_check); + +/* Commandeer a sysrq key to dump RCU's tree. */ +static bool sysrq_rcu; +module_param(sysrq_rcu, bool, 0444); + +/* Dump grace-period-request information due to commandeered sysrq. */ +static void sysrq_show_rcu(int key) +{ + show_rcu_gp_kthreads(); +} + +static struct sysrq_key_op sysrq_rcudump_op = { + .handler = sysrq_show_rcu, + .help_msg = "show-rcu(y)", + .action_msg = "Show RCU tree", + .enable_mask = SYSRQ_ENABLE_DUMP, +}; + +static int __init rcu_sysrq_init(void) +{ + if (sysrq_rcu) + return register_sysrq_key('y', &sysrq_rcudump_op); + return 0; +} +early_initcall(rcu_sysrq_init); -- cgit v1.2.3-59-g8ed1b From 6c70e9cd5f3c6d93f3e1da6d101073e898f39170 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Fri, 8 Mar 2019 11:57:48 -0800 Subject: rcu: Fix nohz status in stall warning The Documentation/RCU/stallwarn.txt file says that stall warnings print "D" if dyntick-idle processing is enabled, but the code in print_cpu_stall_fast_no_hz() prints "." instead. This commit therefore reverses the sense of the test to make the code match the documentation. Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_stall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 9e3db08d02bc..f65a73a97323 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -267,7 +267,7 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu) rdp->last_accelerate & 0xffff, jiffies & 0xffff, ".l"[rdp->all_lazy], ".L"[!rcu_segcblist_n_nonlazy_cbs(&rdp->cblist)], - ".D"[!rdp->tick_nohz_enabled_snap]); + ".D"[!!rdp->tick_nohz_enabled_snap]); } #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ -- cgit v1.2.3-59-g8ed1b From 24aca4aea4f0179e0e56cf9ec610c27d07702945 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 22 Jan 2019 19:23:00 -0800 Subject: torture: Don't try to offline the last CPU If there is only one online CPU, it doesn't make sense to try to offline it, as any such attempt is guaranteed to fail. This commit therefore check for this condition and refuses to attempt the nonsensical. Reported-by: Su Yue Signed-off-by: Paul E. McKenney Tested-By: Su Yue --- kernel/torture.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/torture.c b/kernel/torture.c index 8faa1a9aaeb9..17b2be9bde12 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -88,6 +88,8 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes, if (!cpu_online(cpu) || !cpu_is_hotpluggable(cpu)) return false; + if (num_online_cpus() <= 1) + return false; /* Can't offline the last CPU. */ if (verbose > 1) pr_alert("%s" TORTURE_FLAG -- cgit v1.2.3-59-g8ed1b From fef141f6195be5829c76ff061cc5263badc39a89 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 11 Feb 2019 07:14:09 -0800 Subject: tools/.../rcutorture: Convert to SPDX license identifier Replace the license boiler plate with a SPDX license identifier. While in the area, update an email address and add copyright notices. Signed-off-by: Paul E. McKenney --- .../testing/selftests/rcutorture/bin/configNR_CPUS.sh | 17 ++--------------- .../selftests/rcutorture/bin/config_override.sh | 17 ++--------------- tools/testing/selftests/rcutorture/bin/configcheck.sh | 18 +++--------------- tools/testing/selftests/rcutorture/bin/configinit.sh | 17 ++--------------- tools/testing/selftests/rcutorture/bin/cpus2use.sh | 17 ++--------------- tools/testing/selftests/rcutorture/bin/functions.sh | 17 ++--------------- tools/testing/selftests/rcutorture/bin/jitter.sh | 17 ++--------------- tools/testing/selftests/rcutorture/bin/kvm-build.sh | 17 ++--------------- .../selftests/rcutorture/bin/kvm-find-errors.sh | 5 +++++ .../selftests/rcutorture/bin/kvm-recheck-lock.sh | 17 ++--------------- .../selftests/rcutorture/bin/kvm-recheck-rcu.sh | 17 ++--------------- .../rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh | 17 ++--------------- .../selftests/rcutorture/bin/kvm-recheck-rcuperf.sh | 17 ++--------------- tools/testing/selftests/rcutorture/bin/kvm-recheck.sh | 17 ++--------------- .../testing/selftests/rcutorture/bin/kvm-test-1-run.sh | 17 ++--------------- tools/testing/selftests/rcutorture/bin/kvm.sh | 17 ++--------------- tools/testing/selftests/rcutorture/bin/mkinitrd.sh | 15 +-------------- tools/testing/selftests/rcutorture/bin/parse-build.sh | 17 ++--------------- .../testing/selftests/rcutorture/bin/parse-console.sh | 17 ++--------------- .../selftests/rcutorture/configs/lock/ver_functions.sh | 17 ++--------------- .../selftests/rcutorture/configs/rcu/ver_functions.sh | 17 ++--------------- .../rcutorture/configs/rcuperf/ver_functions.sh | 17 ++--------------- 22 files changed, 47 insertions(+), 314 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh b/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh index 43540f1828cc..2deea2169fc2 100755 --- a/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh +++ b/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Extract the number of CPUs expected from the specified Kconfig-file # fragment by checking CONFIG_SMP and CONFIG_NR_CPUS. If the specified @@ -7,23 +8,9 @@ # # Usage: configNR_CPUS.sh config-frag # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2013 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney cf=$1 if test ! -r $cf diff --git a/tools/testing/selftests/rcutorture/bin/config_override.sh b/tools/testing/selftests/rcutorture/bin/config_override.sh index ef7fcbac3d42..90016c359e83 100755 --- a/tools/testing/selftests/rcutorture/bin/config_override.sh +++ b/tools/testing/selftests/rcutorture/bin/config_override.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # config_override.sh base override # @@ -6,23 +7,9 @@ # that conflict with any in override, concatenating what remains and # sending the result to standard output. # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2017 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney base=$1 if test -r $base diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh index 197deece7c7c..5b25524d0366 100755 --- a/tools/testing/selftests/rcutorture/bin/configcheck.sh +++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh @@ -1,23 +1,11 @@ #!/bin/bash -# Usage: configcheck.sh .config .config-template -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. +# SPDX-License-Identifier: GPL-2.0+ # -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. +# Usage: configcheck.sh .config .config-template # # Copyright (C) IBM Corporation, 2011 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney T=${TMPDIR-/tmp}/abat-chk-config.sh.$$ trap 'rm -rf $T' 0 diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh index 65541c21a544..40359486b3a8 100755 --- a/tools/testing/selftests/rcutorture/bin/configinit.sh +++ b/tools/testing/selftests/rcutorture/bin/configinit.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Usage: configinit.sh config-spec-file build-output-dir results-dir # @@ -14,23 +15,9 @@ # for example, "O=/tmp/foo". If this argument is omitted, the .config # file will be generated directly in the current directory. # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2013 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney T=${TMPDIR-/tmp}/configinit.sh.$$ trap 'rm -rf $T' 0 diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh index bb99cde3f5f9..ff7102212703 100755 --- a/tools/testing/selftests/rcutorture/bin/cpus2use.sh +++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh @@ -1,26 +1,13 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Get an estimate of how CPU-hoggy to be. # # Usage: cpus2use.sh # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2013 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney ncpus=`grep '^processor' /proc/cpuinfo | wc -l` idlecpus=`mpstat | tail -1 | \ diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh index 65f6655026f0..6bcb8b5b2ff2 100644 --- a/tools/testing/selftests/rcutorture/bin/functions.sh +++ b/tools/testing/selftests/rcutorture/bin/functions.sh @@ -1,24 +1,11 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Shell functions for the rest of the scripts. # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2013 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney # bootparam_hotplug_cpu bootparam-string # diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh index 3633828375e3..435b60933985 100755 --- a/tools/testing/selftests/rcutorture/bin/jitter.sh +++ b/tools/testing/selftests/rcutorture/bin/jitter.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Alternate sleeping and spinning on randomly selected CPUs. The purpose # of this script is to inflict random OS jitter on a concurrently running @@ -11,23 +12,9 @@ # sleepmax: Maximum microseconds to sleep, defaults to one second. # spinmax: Maximum microseconds to spin, defaults to one millisecond. # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2016 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney me=$(($1 * 1000)) duration=$2 diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh index 9115fcdb5617..c27a0bbb9c02 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh @@ -1,26 +1,13 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Build a kvm-ready Linux kernel from the tree in the current directory. # # Usage: kvm-build.sh config-template build-dir resdir # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2011 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney config_template=${1} if test -z "$config_template" -o ! -f "$config_template" -o ! -r "$config_template" diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh index 98f650c9bf54..8426fe1f15ee 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ # # Invoke a text editor on all console.log files for all runs with diagnostics, # that is, on all such files having a console.log.diags counterpart. @@ -10,6 +11,10 @@ # # The "directory" above should end with the date/time directory, for example, # "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27". +# +# Copyright (C) IBM Corporation, 2018 +# +# Author: Paul E. McKenney rundir="${1}" if test -z "$rundir" -o ! -d "$rundir" diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh index 2de92f43ee8c..f3a7a5e2b89d 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh @@ -1,26 +1,13 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Analyze a given results directory for locktorture progress. # # Usage: kvm-recheck-lock.sh resdir # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2014 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney i="$1" if test -d "$i" -a -r "$i" diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh index 0fa8a61ccb7b..2a7f3f4756a7 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh @@ -1,26 +1,13 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Analyze a given results directory for rcutorture progress. # # Usage: kvm-recheck-rcu.sh resdir # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2014 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney i="$1" if test -d "$i" -a -r "$i" diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh index 8948f7926b21..7d3c2be66c64 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Analyze a given results directory for rcuperf performance measurements, # looking for ftrace data. Exits with 0 if data was found, analyzed, and @@ -7,23 +8,9 @@ # # Usage: kvm-recheck-rcuperf-ftrace.sh resdir # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2016 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney i="$1" . functions.sh diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh index ccebf772fa1e..db0375a57f28 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh @@ -1,26 +1,13 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Analyze a given results directory for rcuperf performance measurements. # # Usage: kvm-recheck-rcuperf.sh resdir # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2016 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney i="$1" if test -d "$i" -a -r "$i" diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index c9bab57a77eb..2adde6aaafdb 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Given the results directories for previous KVM-based torture runs, # check the build and console output for errors. Given a directory @@ -6,23 +7,9 @@ # # Usage: kvm-recheck.sh resdir ... # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2011 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH . functions.sh diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index 58ca758a5786..0eb1ec16d78a 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Run a kvm-based test of the specified tree on the specified configs. # Fully automated run and error checking, no graphics console. @@ -20,23 +21,9 @@ # # More sophisticated argument parsing is clearly needed. # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2011 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney T=${TMPDIR-/tmp}/kvm-test-1-run.sh.$$ trap 'rm -rf $T' 0 diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 19864f1cb27a..8f1e337b9b54 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Run a series of tests under KVM. By default, this series is specified # by the relevant CFLIST file, but can be overridden by the --configs @@ -6,23 +7,9 @@ # # Usage: kvm.sh [ options ] # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2011 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney scriptname=$0 args="$*" diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh index 83552bb007b4..6fa9bd1ddc09 100755 --- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh +++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh @@ -1,21 +1,8 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Create an initrd directory if one does not already exist. # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2013 # # Author: Connor Shu diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh index 24fe5f822b28..0701b3bf6ade 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-build.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Check the build output from an rcutorture run for goodness. # The "file" is a pathname on the local system, and "title" is @@ -8,23 +9,9 @@ # # Usage: parse-build.sh file title # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2011 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney F=$1 title=$2 diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 84933f6aed77..4508373a922f 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Check the console output from an rcutorture run for oopses. # The "file" is a pathname on the local system, and "title" is @@ -6,23 +7,9 @@ # # Usage: parse-console.sh file title # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2011 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney T=${TMPDIR-/tmp}/parse-console.sh.$$ file="$1" diff --git a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh index 80eb646e1319..d3e4b2971f92 100644 --- a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh @@ -1,24 +1,11 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Kernel-version-dependent shell functions for the rest of the scripts. # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2014 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney # locktorture_param_onoff bootparam-string config-file # diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh index 7bab8246392b..effa415f9b92 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh @@ -1,24 +1,11 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Kernel-version-dependent shell functions for the rest of the scripts. # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2013 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney # rcutorture_param_n_barrier_cbs bootparam-string # diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh index d36b8fd6f0fc..777d5b0c190f 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh @@ -1,24 +1,11 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ # # Torture-suite-dependent shell functions for the rest of the scripts. # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# # Copyright (C) IBM Corporation, 2015 # -# Authors: Paul E. McKenney +# Authors: Paul E. McKenney # per_version_boot_params bootparam-string config-file seconds # -- cgit v1.2.3-59-g8ed1b From a3b0e1e59ef1757488ef05b66bc376eaf7b06ada Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 28 Feb 2019 15:06:13 -0800 Subject: rcutorture: Make rcutorture_extend_mask() comment match the code The code actually rarely uses more than one type of RCU read-side protection, as is actually desired given that we need some reasonable probability of preempting RCU read-side critical sections, which cannot happen with multiple types of protection. This comment therefore adjusts the comment. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index f14d1b18a74f..2453229ba15a 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1160,7 +1160,7 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp) unsigned long randmask2 = randmask1 >> 3; WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT); - /* Most of the time lots of bits, half the time only one bit. */ + /* Mostly only one bit (need preemption!), sometimes lots of bits. */ if (!(randmask1 & 0x7)) mask = mask & randmask2; else -- cgit v1.2.3-59-g8ed1b From f47cb1bb0da23162f4c17e0c0023df4889ecb492 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 28 Feb 2019 15:59:43 -0800 Subject: rcutorture: Remove ->ext_irq_conflict field Back when there was a separate RCU-bh flavor, the ->ext_irq_conflict field was used to prevent executing local_bh_enable() while interrupts were disabled. However, there is no longer an RCU-bh flavor, so this commit removes the no-longer-needed ->ext_irq_conflict field. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 2453229ba15a..21ab3c7eb221 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -299,7 +299,6 @@ struct rcu_torture_ops { int irq_capable; int can_boost; int extendables; - int ext_irq_conflict; const char *name; }; @@ -1170,10 +1169,6 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp) ((!(mask & RCUTORTURE_RDR_BH) && (oldmask & RCUTORTURE_RDR_BH)) || (!(mask & RCUTORTURE_RDR_RBH) && (oldmask & RCUTORTURE_RDR_RBH)))) mask |= RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH; - if ((mask & RCUTORTURE_RDR_IRQ) && - !(mask & cur_ops->ext_irq_conflict) && - (oldmask & cur_ops->ext_irq_conflict)) - mask |= cur_ops->ext_irq_conflict; /* Or if readers object. */ return mask ?: RCUTORTURE_RDR_RCU; } -- cgit v1.2.3-59-g8ed1b From d44ac1bebc47e46d62019808a893582f56496a98 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Sat, 9 Mar 2019 00:40:45 +0530 Subject: rcutorture: Fix expected forward progress duration in OOM notifier The rcutorture_oom_notify() function has a misplaced close parenthesis that results in increasingly long delays in rcu_fwd_progress_check()'s checking for various RCU forward-progress problems. This commit therefore puts the parenthesis in the right place. Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 21ab3c7eb221..b42682b94cb7 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1843,7 +1843,7 @@ static int rcutorture_oom_notify(struct notifier_block *self, WARN(1, "%s invoked upon OOM during forward-progress testing.\n", __func__); rcu_torture_fwd_cb_hist(); - rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rcu_fwd_startat) / 2)); + rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rcu_fwd_startat)) / 2); WRITE_ONCE(rcu_fwd_emergency_stop, true); smp_mb(); /* Emergency stop before free and wait to avoid hangs. */ pr_info("%s: Freed %lu RCU callbacks.\n", -- cgit v1.2.3-59-g8ed1b From b813afae7ab6a5e91b4e16cc567331d9c2ae1f04 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 21 Mar 2019 09:27:28 -0700 Subject: rcutorture: Fix cleanup path for invalid torture_type strings If the specified rcutorture.torture_type is not in the rcu_torture_init() function's torture_ops[] array, rcutorture prints some console messages and then invokes rcu_torture_cleanup() to set state so that a future torture test can run. However, rcu_torture_cleanup() also attempts to end the test that didn't actually start, and in doing so relies on the value of cur_ops, a value that is not particularly relevant in this case. This can result in confusing output or even follow-on failures due to attempts to use facilities that have not been properly initialized. This commit therefore sets the value of cur_ops to NULL in this case and inserts a check near the beginning of rcu_torture_cleanup(), thus avoiding relying on an irrelevant cur_ops value. Reported-by: kernel test robot Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index b42682b94cb7..e3c0f57ab0aa 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2089,6 +2089,10 @@ rcu_torture_cleanup(void) cur_ops->cb_barrier(); return; } + if (!cur_ops) { + torture_cleanup_end(); + return; + } rcu_torture_barrier_cleanup(); torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_task); @@ -2262,6 +2266,7 @@ rcu_torture_init(void) pr_cont("\n"); WARN_ON(!IS_MODULE(CONFIG_RCU_TORTURE_TEST)); firsterr = -EINVAL; + cur_ops = NULL; goto unwind; } if (cur_ops->fqs == NULL && fqs_duration != 0) { -- cgit v1.2.3-59-g8ed1b From ad092c027713a68a34168942a5ef422e42e039f4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 21 Mar 2019 10:26:41 -0700 Subject: rcuperf: Fix cleanup path for invalid perf_type strings If the specified rcuperf.perf_type is not in the rcu_perf_init() function's perf_ops[] array, rcuperf prints some console messages and then invokes rcu_perf_cleanup() to set state so that a future torture test can run. However, rcu_perf_cleanup() also attempts to end the test that didn't actually start, and in doing so relies on the value of cur_ops, a value that is not particularly relevant in this case. This can result in confusing output or even follow-on failures due to attempts to use facilities that have not been properly initialized. This commit therefore sets the value of cur_ops to NULL in this case and inserts a check near the beginning of rcu_perf_cleanup(), thus avoiding relying on an irrelevant cur_ops value. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcuperf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index c29761152874..7a6890b23c5f 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -494,6 +494,10 @@ rcu_perf_cleanup(void) if (torture_cleanup_begin()) return; + if (!cur_ops) { + torture_cleanup_end(); + return; + } if (reader_tasks) { for (i = 0; i < nrealreaders; i++) @@ -614,6 +618,7 @@ rcu_perf_init(void) pr_cont("\n"); WARN_ON(!IS_MODULE(CONFIG_RCU_PERF_TEST)); firsterr = -EINVAL; + cur_ops = NULL; goto unwind; } if (cur_ops->init) -- cgit v1.2.3-59-g8ed1b From a9d6938ddb7f892552013b93e4842fc1a538628d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 21 Mar 2019 13:30:01 -0700 Subject: locktorture: NULL cxt.lwsa and cxt.lrsa to allow bad-arg detection Currently, lock_torture_cleanup() uses the values of cxt.lwsa and cxt.lrsa to detect bad parameters that prevented locktorture from initializing, let alone running. In this case, lock_torture_cleanup() does no cleanup aside from invoking torture_cleanup_begin() and torture_cleanup_end(), as required to permit future torture tests to run. However, this heuristic fails if the run with bad parameters was preceded by a previous run that actually ran: In this case, both cxt.lwsa and cxt.lrsa will remain non-zero, which means that the current lock_torture_cleanup() invocation will be unable to detect the fact that it should skip cleanup, which can result in charming outcomes such as double frees. This commit therefore NULLs out both cxt.lwsa and cxt.lrsa at the end of any run that actually ran. Signed-off-by: Paul E. McKenney Cc: Davidlohr Bueso Cc: Josh Triplett --- kernel/locking/locktorture.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index ad40a2617063..80a463d31a8d 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -829,7 +829,9 @@ static void lock_torture_cleanup(void) "End of test: SUCCESS"); kfree(cxt.lwsa); + cxt.lwsa = NULL; kfree(cxt.lrsa); + cxt.lrsa = NULL; end: torture_cleanup_end(); -- cgit v1.2.3-59-g8ed1b From 164a4daaeaecb69b0373f20eead9626026352963 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 26 Mar 2019 13:09:33 -0700 Subject: torture: Suppress false-positive CONFIG_INITRAMFS_SOURCE complaint The scripting must supply the CONFIG_INITRAMFS_SOURCE Kconfig option so that kbuild can find the desired initrd, but the configcheck.sh script gets confused by this option because it takes a string instead of the expected y/n/m. This causes checkconfig.sh to complain about CONFIG_INITRAMFS_SOURCE in the torture-test output (though not in the summary). As more people use rcutorture, the resulting confusion is an increasing concern. This commit therefore suppresses this false-positive warning by filtering CONFIG_INITRAMFS_SOURCE from within the checkconfig.sh script. Reported-by: Joel Fernandes Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/configcheck.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh index 5b25524d0366..31584cee84d7 100755 --- a/tools/testing/selftests/rcutorture/bin/configcheck.sh +++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh @@ -14,6 +14,7 @@ mkdir $T cat $1 > $T/.config cat $2 | sed -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' | +grep -v '^CONFIG_INITRAMFS_SOURCE' | awk ' { print "if grep -q \"" $0 "\" < '"$T/.config"'"; -- cgit v1.2.3-59-g8ed1b From 1755ecedc485e8a898ac27b90be664784ddb6b57 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 Jan 2019 14:50:29 -0800 Subject: doc/kprobes: Update obsolete RCU update functions The RCU flavors have been consolidated, so this commit replaces mentions of the now-obsolete synchronize_sched() function with synchronize_rcu(). Signed-off-by: Paul E. McKenney Cc: "Naveen N. Rao" Cc: Anil S Keshavamurthy Cc: "David S. Miller" Cc: Jonathan Corbet Cc: Acked-by: Masami Hiramatsu --- Documentation/kprobes.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 10f4499e677c..ee60e519438a 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -243,10 +243,10 @@ Optimization ^^^^^^^^^^^^ The Kprobe-optimizer doesn't insert the jump instruction immediately; -rather, it calls synchronize_sched() for safety first, because it's +rather, it calls synchronize_rcu() for safety first, because it's possible for a CPU to be interrupted in the middle of executing the -optimized region [3]_. As you know, synchronize_sched() can ensure -that all interruptions that were active when synchronize_sched() +optimized region [3]_. As you know, synchronize_rcu() can ensure +that all interruptions that were active when synchronize_rcu() was called are done, but only if CONFIG_PREEMPT=n. So, this version of kprobe optimization supports only kernels with CONFIG_PREEMPT=n [4]_. -- cgit v1.2.3-59-g8ed1b From a5220e7d2e1b13e62c0d5eab3fbfaef401186e3b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 19 Mar 2019 13:25:03 -0700 Subject: tools/memory-model: Add support for synchronize_srcu_expedited() Given that synchronize_rcu_expedited() is supported, this commit adds support for synchronize_srcu_expedited(). Signed-off-by: Paul E. McKenney Acked-by: Andrea Parri --- tools/memory-model/linux-kernel.def | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def index 0c3f0ef486f4..551eeaa389d4 100644 --- a/tools/memory-model/linux-kernel.def +++ b/tools/memory-model/linux-kernel.def @@ -51,6 +51,7 @@ synchronize_rcu_expedited() { __fence{sync-rcu}; } srcu_read_lock(X) __srcu{srcu-lock}(X) srcu_read_unlock(X,Y) { __srcu{srcu-unlock}(X,Y); } synchronize_srcu(X) { __srcu{sync-srcu}(X); } +synchronize_srcu_expedited(X) { __srcu{sync-srcu}(X); } // Atomic atomic_read(X) READ_ONCE(*X) -- cgit v1.2.3-59-g8ed1b From bee58fe346750d53e3cf141554eea6164c8c748a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 Jan 2019 14:52:05 -0800 Subject: net/ipv4/netfilter: Update comment from call_rcu_bh() to call_rcu() The RCU flavors have been consolidated, so this commit replaces a comment's mention of call_rcu_bh() with call_rcu(). Signed-off-by: Paul E. McKenney Cc: Florian Westphal Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Cc: Cc: Cc: Acked-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 835d50b279f5..a2a88ab07f7b 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -56,7 +56,7 @@ struct clusterip_config { #endif enum clusterip_hashmode hash_mode; /* which hashing mode */ u_int32_t hash_initval; /* hash initialization */ - struct rcu_head rcu; /* for call_rcu_bh */ + struct rcu_head rcu; /* for call_rcu */ struct net *net; /* netns for pernet list */ char ifname[IFNAMSIZ]; /* device ifname */ }; -- cgit v1.2.3-59-g8ed1b