From 486e259340fc4c60474f2c14703e3b3634bb58ca Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 6 Jan 2012 14:11:30 -0800 Subject: rcu: Avoid waking up CPUs having only kfree_rcu() callbacks When CONFIG_RCU_FAST_NO_HZ is enabled, RCU will allow a given CPU to enter dyntick-idle mode even if it still has RCU callbacks queued. RCU avoids system hangs in this case by scheduling a timer for several jiffies in the future. However, if all of the callbacks on that CPU are from kfree_rcu(), there is no reason to wake the CPU up, as it is not a problem to defer freeing of memory. This commit therefore tracks the number of callbacks on a given CPU that are from kfree_rcu(), and avoids scheduling the timer if all of a given CPU's callbacks are from kfree_rcu(). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/rcupdate.h') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 81c04f4348ec..a67d5f1072ea 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -841,7 +841,7 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) /* See the kfree_rcu() header comment. */ BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); - call_rcu(head, (rcu_callback)offset); + kfree_call_rcu(head, (rcu_callback)offset); } /** -- cgit v1.3-14-g43fede From 50406b98b6372e7de21d903d2cf3914e9d64e094 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 12 Jan 2012 13:49:19 -0800 Subject: rcu: Make rcu_sleep_check() also check rcu_lock_map Although it is OK to be preempted in an RCU read-side critical section for TREE_PREEMPT_RCU, it is definitely not OK to be preempted, block, or might_sleep() within an RCU read-side critical section for TREE_RCU. Unfortunately, rcu_might_sleep() currently only checks for RCU-bh and RCU-sched read-side critical sections. This commit therefore makes rcu_might_sleep() check for RCU read-side critical sections, but only in TREE_RCU builds. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux/rcupdate.h') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a67d5f1072ea..6df0ae197810 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -381,8 +381,22 @@ extern int rcu_my_thread_group_empty(void); } \ } while (0) +#if defined(CONFIG_PROVE_RCU) && !defined(CONFIG_PREEMPT_RCU) +static inline void rcu_preempt_sleep_check(void) +{ + rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), + "Illegal context switch in RCU read-side " + "critical section"); +} +#else /* #ifdef CONFIG_PROVE_RCU */ +static inline void rcu_preempt_sleep_check(void) +{ +} +#endif /* #else #ifdef CONFIG_PROVE_RCU */ + #define rcu_sleep_check() \ do { \ + rcu_preempt_sleep_check(); \ rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \ "Illegal context switch in RCU-bh" \ " read-side critical section"); \ -- cgit v1.3-14-g43fede From 5e1ee6e1016763812018bf5c5e966992821dc47e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 12 Jan 2012 17:21:20 -0800 Subject: rcu: Note that rcu_access_pointer() can be used for teardown There is no convenient expression for rcu_deference_protected() when it is used in tearing down multilinked structures following a grace period. For example, suppose that an element containing an RCU-protected pointer to a second element is removed from an enclosing RCU-protected data structure, then the write-side lock is released, and finally synchronize_rcu() is invoked to wait for a grace period. Then it is necessary to traverse the pointer in order to free up the second element. But we are not in an RCU read-side critical section and we are holding no locks, so the usual rcu_dereference_check() and rcu_dereference_protected() primitives are not appropriate. Neither is rcu_dereference_raw(), as it is intended for use in data structures where the user defines the locking design (for example, list_head). So this responsibility is added to rcu_access_pointer()'s list, and this commit updates rcu_assign_pointer()'s header comment accordingly. Suggested-by: David Howells Signed-off-by: Paul E. McKenney Acked-by: David Howells --- include/linux/rcupdate.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux/rcupdate.h') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6df0ae197810..f409529ff35a 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -484,6 +484,13 @@ static inline void rcu_preempt_sleep_check(void) * NULL. Although rcu_access_pointer() may also be used in cases where * update-side locks prevent the value of the pointer from changing, you * should instead use rcu_dereference_protected() for this use case. + * + * It is also permissible to use rcu_access_pointer() when read-side + * access to the pointer was removed at least one grace period ago, as + * is the case in the context of the RCU callback that is freeing up + * the data, or after a synchronize_rcu() returns. This can be useful + * when tearing down multi-linked structures after a grace period + * has elapsed. */ #define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) -- cgit v1.3-14-g43fede From c0d6d01bffdce19fa19baad6cb8cc3eed7bfd6f5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 23 Jan 2012 12:41:26 -0800 Subject: rcu: Check for illegal use of RCU from offlined CPUs Although it is legal to use RCU during early boot, it is anything but legal to use RCU at runtime from an offlined CPU. After all, RCU explicitly ignores offlined CPUs. This commit therefore adds checks for runtime use of RCU from offlined CPUs. These checks are not perfect, in particular, they can be subverted through use of things like rcu_dereference_raw(). Note that it is not possible to put checks in rcu_read_lock() and friends due to the fact that these primitives are used in code that might be used under either RCU or lock-based protection, which means that checking rcu_read_lock() gets you fat piles of false positives. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 19 +++++++++++++++++++ include/linux/srcu.h | 11 +++++++---- kernel/rcupdate.c | 5 +++++ kernel/rcutree.c | 29 +++++++++++++++++++++++++++++ kernel/rcutree_plugin.h | 1 + 5 files changed, 61 insertions(+), 4 deletions(-) (limited to 'include/linux/rcupdate.h') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f409529ff35a..146d37d31778 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -226,6 +226,15 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ +#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) +bool rcu_lockdep_current_cpu_online(void); +#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ +static inline bool rcu_lockdep_current_cpu_online(void) +{ + return 1; +} +#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ + #ifdef CONFIG_DEBUG_LOCK_ALLOC #ifdef CONFIG_PROVE_RCU @@ -270,6 +279,9 @@ extern int debug_lockdep_rcu_enabled(void); * occur in the same context, for example, it is illegal to invoke * rcu_read_unlock() in process context if the matching rcu_read_lock() * was invoked from within an irq handler. + * + * Note that rcu_read_lock() is disallowed if the CPU is either idle or + * offline from an RCU perspective, so check for those as well. */ static inline int rcu_read_lock_held(void) { @@ -277,6 +289,8 @@ static inline int rcu_read_lock_held(void) return 1; if (rcu_is_cpu_idle()) return 0; + if (!rcu_lockdep_current_cpu_online()) + return 0; return lock_is_held(&rcu_lock_map); } @@ -313,6 +327,9 @@ extern int rcu_read_lock_bh_held(void); * notice an extended quiescent state to other CPUs that started a grace * period. Otherwise we would delay any grace period as long as we run in * the idle task. + * + * Similarly, we avoid claiming an SRCU read lock held if the current + * CPU is offline. */ #ifdef CONFIG_PREEMPT_COUNT static inline int rcu_read_lock_sched_held(void) @@ -323,6 +340,8 @@ static inline int rcu_read_lock_sched_held(void) return 1; if (rcu_is_cpu_idle()) return 0; + if (!rcu_lockdep_current_cpu_online()) + return 0; if (debug_locks) lockdep_opinion = lock_is_held(&rcu_sched_lock_map); return lockdep_opinion || preempt_count() != 0 || irqs_disabled(); diff --git a/include/linux/srcu.h b/include/linux/srcu.h index e1b005918bbb..9a323728e60c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -99,15 +99,18 @@ long srcu_batches_completed(struct srcu_struct *sp); * power mode. This way we can notice an extended quiescent state to * other CPUs that started a grace period. Otherwise we would delay any * grace period as long as we run in the idle task. + * + * Similarly, we avoid claiming an SRCU read lock held if the current + * CPU is offline. */ static inline int srcu_read_lock_held(struct srcu_struct *sp) { - if (rcu_is_cpu_idle()) - return 0; - if (!debug_lockdep_rcu_enabled()) return 1; - + if (rcu_is_cpu_idle()) + return 0; + if (!rcu_lockdep_current_cpu_online()) + return 0; return lock_is_held(&sp->dep_map); } diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 2bc4e135ff23..a86f1741cc27 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -88,6 +88,9 @@ EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); * section. * * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. + * + * Note that rcu_read_lock() is disallowed if the CPU is either idle or + * offline from an RCU perspective, so check for those as well. */ int rcu_read_lock_bh_held(void) { @@ -95,6 +98,8 @@ int rcu_read_lock_bh_held(void) return 1; if (rcu_is_cpu_idle()) return 0; + if (!rcu_lockdep_current_cpu_online()) + return 0; return in_softirq() || irqs_disabled(); } EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index dccd2f78db4e..bcf7db2f2fd2 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -591,6 +591,35 @@ int rcu_is_cpu_idle(void) } EXPORT_SYMBOL(rcu_is_cpu_idle); +#ifdef CONFIG_HOTPLUG_CPU + +/* + * Is the current CPU online? Disable preemption to avoid false positives + * that could otherwise happen due to the current CPU number being sampled, + * this task being preempted, its old CPU being taken offline, resuming + * on some other CPU, then determining that its old CPU is now offline. + * It is OK to use RCU on an offline processor during initial boot, hence + * the check for rcu_scheduler_fully_active. + * + * Disable checking if in an NMI handler because we cannot safely report + * errors from NMI handlers anyway. + */ +bool rcu_lockdep_current_cpu_online(void) +{ + bool ret; + + if (in_nmi()) + return 1; + preempt_disable(); + ret = cpu_online(smp_processor_id()) || + !rcu_scheduler_fully_active; + preempt_enable(); + return ret; +} +EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); + +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + #endif /* #ifdef CONFIG_PROVE_RCU */ /** diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index aa93b074bb2f..cecea84f4f3f 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1946,6 +1946,7 @@ void synchronize_sched_expedited(void) /* Note that atomic_inc_return() implies full memory barrier. */ firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started); get_online_cpus(); + WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); /* * Each pass through the following loop attempts to force a -- cgit v1.3-14-g43fede From bde23c6892878e48f64de668660778991bc2fb56 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 1 Feb 2012 10:30:46 -0800 Subject: rcu: Convert WARN_ON_ONCE() in rcu_lock_acquire() to lockdep The WARN_ON_ONCE() in rcu_lock_acquire() results in infinite recursion on S390, and also doesn't print very much information. Remove this. Updated patch to add lockdep-RCU assertions to RCU's read-side primitives. Signed-off-by: Heiko Carstens Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 14 ++++++++++++-- include/linux/srcu.h | 4 ++++ 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include/linux/rcupdate.h') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 146d37d31778..6ee663c8745a 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -248,13 +248,11 @@ static inline int rcu_is_cpu_idle(void) static inline void rcu_lock_acquire(struct lockdep_map *map) { - WARN_ON_ONCE(rcu_is_cpu_idle()); lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_); } static inline void rcu_lock_release(struct lockdep_map *map) { - WARN_ON_ONCE(rcu_is_cpu_idle()); lock_release(map, 1, _THIS_IP_); } @@ -699,6 +697,8 @@ static inline void rcu_read_lock(void) __rcu_read_lock(); __acquire(RCU); rcu_lock_acquire(&rcu_lock_map); + rcu_lockdep_assert(!rcu_is_cpu_idle(), + "rcu_read_lock() used illegally while idle"); } /* @@ -718,6 +718,8 @@ static inline void rcu_read_lock(void) */ static inline void rcu_read_unlock(void) { + rcu_lockdep_assert(!rcu_is_cpu_idle(), + "rcu_read_unlock() used illegally while idle"); rcu_lock_release(&rcu_lock_map); __release(RCU); __rcu_read_unlock(); @@ -745,6 +747,8 @@ static inline void rcu_read_lock_bh(void) local_bh_disable(); __acquire(RCU_BH); rcu_lock_acquire(&rcu_bh_lock_map); + rcu_lockdep_assert(!rcu_is_cpu_idle(), + "rcu_read_lock_bh() used illegally while idle"); } /* @@ -754,6 +758,8 @@ static inline void rcu_read_lock_bh(void) */ static inline void rcu_read_unlock_bh(void) { + rcu_lockdep_assert(!rcu_is_cpu_idle(), + "rcu_read_unlock_bh() used illegally while idle"); rcu_lock_release(&rcu_bh_lock_map); __release(RCU_BH); local_bh_enable(); @@ -777,6 +783,8 @@ static inline void rcu_read_lock_sched(void) preempt_disable(); __acquire(RCU_SCHED); rcu_lock_acquire(&rcu_sched_lock_map); + rcu_lockdep_assert(!rcu_is_cpu_idle(), + "rcu_read_lock_sched() used illegally while idle"); } /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ @@ -793,6 +801,8 @@ static inline notrace void rcu_read_lock_sched_notrace(void) */ static inline void rcu_read_unlock_sched(void) { + rcu_lockdep_assert(!rcu_is_cpu_idle(), + "rcu_read_unlock_sched() used illegally while idle"); rcu_lock_release(&rcu_sched_lock_map); __release(RCU_SCHED); preempt_enable(); diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 9a323728e60c..d3d5fa54f25e 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -172,6 +172,8 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) int retval = __srcu_read_lock(sp); rcu_lock_acquire(&(sp)->dep_map); + rcu_lockdep_assert(!rcu_is_cpu_idle(), + "srcu_read_lock() used illegally while idle"); return retval; } @@ -185,6 +187,8 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp) { + rcu_lockdep_assert(!rcu_is_cpu_idle(), + "srcu_read_unlock() used illegally while idle"); rcu_lock_release(&(sp)->dep_map); __srcu_read_unlock(sp, idx); } -- cgit v1.3-14-g43fede From 8a2ecf474d3ee8dd5d001490349e422cec52f39f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 Feb 2012 15:42:04 -0800 Subject: rcu: Add RCU_NONIDLE() for idle-loop RCU read-side critical sections RCU, RCU-bh, and RCU-sched read-side critical sections are forbidden in the inner idle loop, that is, between the rcu_idle_enter() and the rcu_idle_exit() -- RCU will happily ignore any such read-side critical sections. However, things like powertop need tracepoints in the inner idle loop. This commit therefore provides an RCU_NONIDLE() macro that can be used to wrap code in the idle loop that requires RCU read-side critical sections. Suggested-by: Steven Rostedt Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Acked-by: Deepthi Dharwar --- include/linux/rcupdate.h | 27 +++++++++++++++++++++++++++ kernel/rcutiny.c | 2 ++ kernel/rcutree.c | 2 ++ 3 files changed, 31 insertions(+) (limited to 'include/linux/rcupdate.h') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6ee663c8745a..937217425c47 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -190,6 +190,33 @@ extern void rcu_idle_exit(void); extern void rcu_irq_enter(void); extern void rcu_irq_exit(void); +/** + * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers + * @a: Code that RCU needs to pay attention to. + * + * RCU, RCU-bh, and RCU-sched read-side critical sections are forbidden + * in the inner idle loop, that is, between the rcu_idle_enter() and + * the rcu_idle_exit() -- RCU will happily ignore any such read-side + * critical sections. However, things like powertop need tracepoints + * in the inner idle loop. + * + * This macro provides the way out: RCU_NONIDLE(do_something_with_RCU()) + * will tell RCU that it needs to pay attending, invoke its argument + * (in this example, a call to the do_something_with_RCU() function), + * and then tell RCU to go back to ignoring this CPU. It is permissible + * to nest RCU_NONIDLE() wrappers, but the nesting level is currently + * quite limited. If deeper nesting is required, it will be necessary + * to adjust DYNTICK_TASK_NESTING_VALUE accordingly. + * + * This macro may be used from process-level code only. + */ +#define RCU_NONIDLE(a) \ + do { \ + rcu_idle_exit(); \ + do { a; } while (0); \ + rcu_idle_enter(); \ + } while (0) + /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index c8b0e1582c04..37a5444204d2 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -97,6 +97,7 @@ void rcu_idle_enter(void) rcu_idle_enter_common(oldval); local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(rcu_idle_enter); /* * Exit an interrupt handler towards idle. @@ -153,6 +154,7 @@ void rcu_idle_exit(void) rcu_idle_exit_common(oldval); local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(rcu_idle_exit); /* * Enter an interrupt handler, moving away from idle. diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 92b47760edf3..eacc10b03531 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -402,6 +402,7 @@ void rcu_idle_enter(void) rcu_idle_enter_common(rdtp, oldval); local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(rcu_idle_enter); /** * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle @@ -493,6 +494,7 @@ void rcu_idle_exit(void) rcu_idle_exit_common(rdtp, oldval); local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(rcu_idle_exit); /** * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle -- cgit v1.3-14-g43fede