From b3fbab0571eb09746cc0283648165ec00efc8eb2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 24 May 2011 08:31:09 -0700 Subject: rcu: Restore checks for blocking in RCU read-side critical sections Long ago, using TREE_RCU with PREEMPT would result in "scheduling while atomic" diagnostics if you blocked in an RCU read-side critical section. However, PREEMPT now implies TREE_PREEMPT_RCU, which defeats this diagnostic. This commit therefore adds a replacement diagnostic based on PROVE_RCU. Because rcu_lockdep_assert() and lockdep_rcu_dereference() are now being used for things that have nothing to do with rcu_dereference(), rename lockdep_rcu_dereference() to lockdep_rcu_suspicious() and add a third argument that is a string indicating what is suspicious. This third argument is passed in from a new third argument to rcu_lockdep_assert(). Update all calls to rcu_lockdep_assert() to add an informative third argument. Also, add a pair of rcu_lockdep_assert() calls from within rcu_note_context_switch(), one complaining if a context switch occurs in an RCU-bh read-side critical section and another complaining if a context switch occurs in an RCU-sched read-side critical section. These are present only if the PROVE_RCU kernel parameter is enabled. Finally, fix some checkpatch whitespace complaints in lockdep.c. Again, you must enable PROVE_RCU to see these new diagnostics. But you are enabling PROVE_RCU to check out new RCU uses in any case, aren't you? Signed-off-by: Paul E. McKenney --- include/linux/lockdep.h | 2 +- include/linux/rcupdate.h | 28 ++++++++++++++++++++++------ 2 files changed, 23 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index ef820a3c378b..b6a56e37284c 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -548,7 +548,7 @@ do { \ #endif #ifdef CONFIG_PROVE_RCU -extern void lockdep_rcu_dereference(const char *file, const int line); +void lockdep_rcu_suspicious(const char *file, const int line, const char *s); #endif #endif /* __LINUX_LOCKDEP_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 8f4f881a0ad8..8e7470d8b676 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -297,19 +297,31 @@ extern int rcu_my_thread_group_empty(void); /** * rcu_lockdep_assert - emit lockdep splat if specified condition not met * @c: condition to check + * @s: informative message */ -#define rcu_lockdep_assert(c) \ +#define rcu_lockdep_assert(c, s) \ do { \ static bool __warned; \ if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ __warned = true; \ - lockdep_rcu_dereference(__FILE__, __LINE__); \ + lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ } \ } while (0) +#define rcu_sleep_check() \ + do { \ + rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \ + "Illegal context switch in RCU-bh" \ + " read-side critical section"); \ + rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), \ + "Illegal context switch in RCU-sched"\ + " read-side critical section"); \ + } while (0) + #else /* #ifdef CONFIG_PROVE_RCU */ -#define rcu_lockdep_assert(c) do { } while (0) +#define rcu_lockdep_assert(c, s) do { } while (0) +#define rcu_sleep_check() do { } while (0) #endif /* #else #ifdef CONFIG_PROVE_RCU */ @@ -338,14 +350,16 @@ extern int rcu_my_thread_group_empty(void); #define __rcu_dereference_check(p, c, space) \ ({ \ typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ - rcu_lockdep_assert(c); \ + rcu_lockdep_assert(c, "suspicious rcu_dereference_check()" \ + " usage"); \ rcu_dereference_sparse(p, space); \ smp_read_barrier_depends(); \ ((typeof(*p) __force __kernel *)(_________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ - rcu_lockdep_assert(c); \ + rcu_lockdep_assert(c, "suspicious rcu_dereference_protected()" \ + " usage"); \ rcu_dereference_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) @@ -359,7 +373,9 @@ extern int rcu_my_thread_group_empty(void); #define __rcu_dereference_index_check(p, c) \ ({ \ typeof(p) _________p1 = ACCESS_ONCE(p); \ - rcu_lockdep_assert(c); \ + rcu_lockdep_assert(c, \ + "suspicious rcu_dereference_index_check()" \ + " usage"); \ smp_read_barrier_depends(); \ (_________p1); \ }) -- cgit v1.3-8-gc7d7 From 990987511c200877bb20201772d5de46644151f2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 31 May 2011 21:03:55 -0700 Subject: rcu: Move rcu_head definition to types.h Take a first step towards untangling Linux kernel header files by placing the struct rcu_head definition into include/linux/types.h and including include/linux/types.h in include/linux/rcupdate.h where struct rcu_head used to be defined. The actual inclusion point for include/linux/types.h is with the rest of the #include directives rather than at the point where struct rcu_head used to be defined, as suggested by Mathieu Desnoyers. Once this is in place, then header files that need only rcu_head can include types.h rather than rcupdate.h. Signed-off-by: Paul E. McKenney Cc: Paul Gortmaker Acked-by: Mathieu Desnoyers --- include/linux/rcupdate.h | 11 +---------- include/linux/types.h | 10 ++++++++++ 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 8e7470d8b676..87bd390df73f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -33,6 +33,7 @@ #ifndef __LINUX_RCUPDATE_H #define __LINUX_RCUPDATE_H +#include #include #include #include @@ -64,16 +65,6 @@ static inline void rcutorture_record_progress(unsigned long vernum) #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) -/** - * struct rcu_head - callback structure for use with RCU - * @next: next update requests in a list - * @func: actual update function to call after the grace period. - */ -struct rcu_head { - struct rcu_head *next; - void (*func)(struct rcu_head *head); -}; - /* Exported common interfaces */ extern void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); diff --git a/include/linux/types.h b/include/linux/types.h index 176da8c1fbb1..57a97234bec1 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -238,6 +238,16 @@ struct ustat { char f_fpack[6]; }; +/** + * struct rcu_head - callback structure for use with RCU + * @next: next update requests in a list + * @func: actual update function to call after the grace period. + */ +struct rcu_head { + struct rcu_head *next; + void (*func)(struct rcu_head *head); +}; + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ #endif /* _LINUX_TYPES_H */ -- cgit v1.3-8-gc7d7 From 2c42818e962e2858334bf45bfc56662b3752df34 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 26 May 2011 22:14:36 -0700 Subject: rcu: Abstract common code for RCU grace-period-wait primitives Pull the code that waits for an RCU grace period into a single function, which is then called by synchronize_rcu() and friends in the case of TREE_RCU and TREE_PREEMPT_RCU, and from rcu_barrier() and friends in the case of TINY_RCU and TINY_PREEMPT_RCU. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 130 ++++++++++++++++++++++++++--------------------- include/linux/rcutiny.h | 16 +++++- include/linux/rcutree.h | 2 + kernel/rcupdate.c | 21 +++++++- kernel/rcutiny.c | 28 ---------- kernel/rcutiny_plugin.h | 14 ----- kernel/rcutree.c | 22 +------- kernel/rcutree_plugin.h | 11 +--- 8 files changed, 113 insertions(+), 131 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 87bd390df73f..ae5327de41aa 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -66,11 +66,73 @@ static inline void rcutorture_record_progress(unsigned long vernum) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) /* Exported common interfaces */ + +#ifdef CONFIG_PREEMPT_RCU + +/** + * call_rcu() - Queue an RCU callback for invocation after a grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all pre-existing RCU read-side + * critical sections have completed. However, the callback function + * might well execute concurrently with RCU read-side critical sections + * that started after call_rcu() was invoked. RCU read-side critical + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), + * and may be nested. + */ +extern void call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *head)); + +#else /* #ifdef CONFIG_PREEMPT_RCU */ + +/* In classic RCU, call_rcu() is just call_rcu_sched(). */ +#define call_rcu call_rcu_sched + +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + +/** + * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. call_rcu_bh() assumes + * that the read-side critical sections end on completion of a softirq + * handler. This means that read-side critical sections in process + * context must not be interrupted by softirqs. This interface is to be + * used when most of the read-side critical sections are in softirq context. + * RCU read-side critical sections are delimited by : + * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. + * OR + * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. + * These may be nested. + */ +extern void call_rcu_bh(struct rcu_head *head, + void (*func)(struct rcu_head *head)); + +/** + * call_rcu_sched() - Queue an RCU for invocation after sched grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. call_rcu_sched() assumes + * that the read-side critical sections end on enabling of preemption + * or on voluntary preemption. + * RCU read-side critical sections are delimited by : + * - rcu_read_lock_sched() and rcu_read_unlock_sched(), + * OR + * anything that disables preemption. + * These may be nested. + */ extern void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); + extern void synchronize_sched(void); -extern void rcu_barrier_bh(void); -extern void rcu_barrier_sched(void); static inline void __rcu_read_lock_bh(void) { @@ -143,6 +205,15 @@ static inline void rcu_exit_nohz(void) #endif /* #else #ifdef CONFIG_NO_HZ */ +/* + * Infrastructure to implement the synchronize_() primitives in + * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. + */ + +typedef void call_rcu_func_t(struct rcu_head *head, + void (*func)(struct rcu_head *head)); +void wait_rcu_gp(call_rcu_func_t crf); + #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include #elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) @@ -723,61 +794,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define RCU_INIT_POINTER(p, v) \ p = (typeof(*v) __force __rcu *)(v) -/* Infrastructure to implement the synchronize_() primitives. */ - -struct rcu_synchronize { - struct rcu_head head; - struct completion completion; -}; - -extern void wakeme_after_rcu(struct rcu_head *head); - -#ifdef CONFIG_PREEMPT_RCU - -/** - * call_rcu() - Queue an RCU callback for invocation after a grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual callback function to be invoked after the grace period - * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all pre-existing RCU read-side - * critical sections have completed. However, the callback function - * might well execute concurrently with RCU read-side critical sections - * that started after call_rcu() was invoked. RCU read-side critical - * sections are delimited by rcu_read_lock() and rcu_read_unlock(), - * and may be nested. - */ -extern void call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *head)); - -#else /* #ifdef CONFIG_PREEMPT_RCU */ - -/* In classic RCU, call_rcu() is just call_rcu_sched(). */ -#define call_rcu call_rcu_sched - -#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ - -/** - * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual callback function to be invoked after the grace period - * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU - * read-side critical sections have completed. call_rcu_bh() assumes - * that the read-side critical sections end on completion of a softirq - * handler. This means that read-side critical sections in process - * context must not be interrupted by softirqs. This interface is to be - * used when most of the read-side critical sections are in softirq context. - * RCU read-side critical sections are delimited by : - * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. - * OR - * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. - * These may be nested. - */ -extern void call_rcu_bh(struct rcu_head *head, - void (*func)(struct rcu_head *head)); - /* * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally * by call_rcu() and rcu callback execution, and are therefore not part of the diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 52b3e0281fd0..4eab233a00cd 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -31,6 +31,16 @@ static inline void rcu_init(void) { } +static inline void rcu_barrier_bh(void) +{ + wait_rcu_gp(call_rcu_bh); +} + +static inline void rcu_barrier_sched(void) +{ + wait_rcu_gp(call_rcu_sched); +} + #ifdef CONFIG_TINY_RCU static inline void synchronize_rcu_expedited(void) @@ -45,9 +55,13 @@ static inline void rcu_barrier(void) #else /* #ifdef CONFIG_TINY_RCU */ -void rcu_barrier(void); void synchronize_rcu_expedited(void); +static inline void rcu_barrier(void) +{ + wait_rcu_gp(call_rcu); +} + #endif /* #else #ifdef CONFIG_TINY_RCU */ static inline void synchronize_rcu_bh(void) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index e65d06634dd8..67458468f1a8 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -67,6 +67,8 @@ static inline void synchronize_rcu_bh_expedited(void) } extern void rcu_barrier(void); +extern void rcu_barrier_bh(void); +extern void rcu_barrier_sched(void); extern unsigned long rcutorture_testseq; extern unsigned long rcutorture_vernum; diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index ddddb320be61..09b3b1b54e02 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -94,11 +94,16 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +struct rcu_synchronize { + struct rcu_head head; + struct completion completion; +}; + /* * Awaken the corresponding synchronize_rcu() instance now that a * grace period has elapsed. */ -void wakeme_after_rcu(struct rcu_head *head) +static void wakeme_after_rcu(struct rcu_head *head) { struct rcu_synchronize *rcu; @@ -106,6 +111,20 @@ void wakeme_after_rcu(struct rcu_head *head) complete(&rcu->completion); } +void wait_rcu_gp(call_rcu_func_t crf) +{ + struct rcu_synchronize rcu; + + init_rcu_head_on_stack(&rcu.head); + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + crf(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); +} +EXPORT_SYMBOL_GPL(wait_rcu_gp); + #ifdef CONFIG_PROVE_RCU /* * wrapper function to avoid #include problems. diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 7bbac7d0f5ab..f544e343256a 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -281,34 +281,6 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } EXPORT_SYMBOL_GPL(call_rcu_bh); -void rcu_barrier_bh(void) -{ - struct rcu_synchronize rcu; - - init_rcu_head_on_stack(&rcu.head); - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu_bh(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); - destroy_rcu_head_on_stack(&rcu.head); -} -EXPORT_SYMBOL_GPL(rcu_barrier_bh); - -void rcu_barrier_sched(void) -{ - struct rcu_synchronize rcu; - - init_rcu_head_on_stack(&rcu.head); - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu_sched(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); - destroy_rcu_head_on_stack(&rcu.head); -} -EXPORT_SYMBOL_GPL(rcu_barrier_sched); - /* * Spawn the kthread that invokes RCU callbacks. */ diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index f259c676195f..6b0cedb383e0 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -697,20 +697,6 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } EXPORT_SYMBOL_GPL(call_rcu); -void rcu_barrier(void) -{ - struct rcu_synchronize rcu; - - init_rcu_head_on_stack(&rcu.head); - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); - destroy_rcu_head_on_stack(&rcu.head); -} -EXPORT_SYMBOL_GPL(rcu_barrier); - /* * synchronize_rcu - wait until a grace period has elapsed. * diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ba06207b1dd3..a7c6bce1af83 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1613,18 +1613,9 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); */ void synchronize_sched(void) { - struct rcu_synchronize rcu; - if (rcu_blocking_is_gp()) return; - - init_rcu_head_on_stack(&rcu.head); - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu_sched(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); - destroy_rcu_head_on_stack(&rcu.head); + wait_rcu_gp(call_rcu_sched); } EXPORT_SYMBOL_GPL(synchronize_sched); @@ -1639,18 +1630,9 @@ EXPORT_SYMBOL_GPL(synchronize_sched); */ void synchronize_rcu_bh(void) { - struct rcu_synchronize rcu; - if (rcu_blocking_is_gp()) return; - - init_rcu_head_on_stack(&rcu.head); - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu_bh(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); - destroy_rcu_head_on_stack(&rcu.head); + wait_rcu_gp(call_rcu_bh); } EXPORT_SYMBOL_GPL(synchronize_rcu_bh); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 970329853dc5..43daa46bc6f2 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -656,18 +656,9 @@ EXPORT_SYMBOL_GPL(call_rcu); */ void synchronize_rcu(void) { - struct rcu_synchronize rcu; - if (!rcu_scheduler_active) return; - - init_rcu_head_on_stack(&rcu.head); - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); - destroy_rcu_head_on_stack(&rcu.head); + wait_rcu_gp(call_rcu); } EXPORT_SYMBOL_GPL(synchronize_rcu); -- cgit v1.3-8-gc7d7 From 29c00b4a1d9e277786120032aa8364631820d863 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Jun 2011 15:53:19 -0700 Subject: rcu: Add event-tracing for RCU callback invocation There was recently some controversy about the overhead of invoking RCU callbacks. Add TRACE_EVENT()s to obtain fine-grained timings for the start and stop of a batch of callbacks and also for each callback invoked. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 50 ----------------------- include/trace/events/rcu.h | 98 ++++++++++++++++++++++++++++++++++++++++++++++ kernel/rcu.h | 79 +++++++++++++++++++++++++++++++++++++ kernel/rcupdate.c | 5 +++ kernel/rcutiny.c | 26 +++++++++++- kernel/rcutree.c | 15 +++++-- 6 files changed, 219 insertions(+), 54 deletions(-) create mode 100644 include/trace/events/rcu.h create mode 100644 kernel/rcu.h (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ae5327de41aa..dd2bc2c6a285 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -794,44 +794,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define RCU_INIT_POINTER(p, v) \ p = (typeof(*v) __force __rcu *)(v) -/* - * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally - * by call_rcu() and rcu callback execution, and are therefore not part of the - * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. - */ - -#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD -# define STATE_RCU_HEAD_READY 0 -# define STATE_RCU_HEAD_QUEUED 1 - -extern struct debug_obj_descr rcuhead_debug_descr; - -static inline void debug_rcu_head_queue(struct rcu_head *head) -{ - WARN_ON_ONCE((unsigned long)head & 0x3); - debug_object_activate(head, &rcuhead_debug_descr); - debug_object_active_state(head, &rcuhead_debug_descr, - STATE_RCU_HEAD_READY, - STATE_RCU_HEAD_QUEUED); -} - -static inline void debug_rcu_head_unqueue(struct rcu_head *head) -{ - debug_object_active_state(head, &rcuhead_debug_descr, - STATE_RCU_HEAD_QUEUED, - STATE_RCU_HEAD_READY); - debug_object_deactivate(head, &rcuhead_debug_descr); -} -#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ -static inline void debug_rcu_head_queue(struct rcu_head *head) -{ -} - -static inline void debug_rcu_head_unqueue(struct rcu_head *head) -{ -} -#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ - static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) { return offset < 4096; @@ -850,18 +812,6 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) call_rcu(head, (rcu_callback)offset); } -extern void kfree(const void *); - -static inline void __rcu_reclaim(struct rcu_head *head) -{ - unsigned long offset = (unsigned long)head->func; - - if (__is_kfree_rcu_offset(offset)) - kfree((void *)head - offset); - else - head->func(head); -} - /** * kfree_rcu() - kfree an object after a grace period. * @ptr: pointer to kfree diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h new file mode 100644 index 000000000000..db3f6e9e63e6 --- /dev/null +++ b/include/trace/events/rcu.h @@ -0,0 +1,98 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rcu + +#if !defined(_TRACE_RCU_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RCU_H + +#include + +/* + * Tracepoint for calling rcu_do_batch, performed to start callback invocation: + */ +TRACE_EVENT(rcu_batch_start, + + TP_PROTO(long callbacks_ready, int blimit), + + TP_ARGS(callbacks_ready, blimit), + + TP_STRUCT__entry( + __field( long, callbacks_ready ) + __field( int, blimit ) + ), + + TP_fast_assign( + __entry->callbacks_ready = callbacks_ready; + __entry->blimit = blimit; + ), + + TP_printk("CBs=%ld bl=%d", __entry->callbacks_ready, __entry->blimit) +); + +/* + * Tracepoint for the invocation of a single RCU callback + */ +TRACE_EVENT(rcu_invoke_callback, + + TP_PROTO(struct rcu_head *rhp), + + TP_ARGS(rhp), + + TP_STRUCT__entry( + __field( void *, rhp ) + __field( void *, func ) + ), + + TP_fast_assign( + __entry->rhp = rhp; + __entry->func = rhp->func; + ), + + TP_printk("rhp=%p func=%pf", __entry->rhp, __entry->func) +); + +/* + * Tracepoint for the invocation of a single RCU kfree callback + */ +TRACE_EVENT(rcu_invoke_kfree_callback, + + TP_PROTO(struct rcu_head *rhp, unsigned long offset), + + TP_ARGS(rhp, offset), + + TP_STRUCT__entry( + __field(void *, rhp ) + __field(unsigned long, offset ) + ), + + TP_fast_assign( + __entry->rhp = rhp; + __entry->offset = offset; + ), + + TP_printk("rhp=%p func=%ld", __entry->rhp, __entry->offset) +); + +/* + * Tracepoint for leaving rcu_do_batch, performed after callback invocation: + */ +TRACE_EVENT(rcu_batch_end, + + TP_PROTO(int callbacks_invoked), + + TP_ARGS(callbacks_invoked), + + TP_STRUCT__entry( + __field( int, callbacks_invoked ) + ), + + TP_fast_assign( + __entry->callbacks_invoked = callbacks_invoked; + ), + + TP_printk("CBs-invoked=%d", __entry->callbacks_invoked) +); + +#endif /* _TRACE_RCU_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/rcu.h b/kernel/rcu.h new file mode 100644 index 000000000000..7bc16436aba0 --- /dev/null +++ b/kernel/rcu.h @@ -0,0 +1,79 @@ +/* + * Read-Copy Update definitions shared among RCU implementations. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2011 + * + * Author: Paul E. McKenney + */ + +#ifndef __LINUX_RCU_H +#define __LINUX_RCU_H + +/* + * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally + * by call_rcu() and rcu callback execution, and are therefore not part of the + * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. + */ + +#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD +# define STATE_RCU_HEAD_READY 0 +# define STATE_RCU_HEAD_QUEUED 1 + +extern struct debug_obj_descr rcuhead_debug_descr; + +static inline void debug_rcu_head_queue(struct rcu_head *head) +{ + WARN_ON_ONCE((unsigned long)head & 0x3); + debug_object_activate(head, &rcuhead_debug_descr); + debug_object_active_state(head, &rcuhead_debug_descr, + STATE_RCU_HEAD_READY, + STATE_RCU_HEAD_QUEUED); +} + +static inline void debug_rcu_head_unqueue(struct rcu_head *head) +{ + debug_object_active_state(head, &rcuhead_debug_descr, + STATE_RCU_HEAD_QUEUED, + STATE_RCU_HEAD_READY); + debug_object_deactivate(head, &rcuhead_debug_descr); +} +#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ +static inline void debug_rcu_head_queue(struct rcu_head *head) +{ +} + +static inline void debug_rcu_head_unqueue(struct rcu_head *head) +{ +} +#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ + +extern void kfree(const void *); + +static inline void __rcu_reclaim(struct rcu_head *head) +{ + unsigned long offset = (unsigned long)head->func; + + if (__is_kfree_rcu_offset(offset)) { + trace_rcu_invoke_kfree_callback(head, offset); + kfree((void *)head - offset); + } else { + trace_rcu_invoke_callback(head); + head->func(head); + } +} + +#endif /* __LINUX_RCU_H */ diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 09b3b1b54e02..ca0d23b6b3e8 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -46,6 +46,11 @@ #include #include +#define CREATE_TRACE_POINTS +#include + +#include "rcu.h" + #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key rcu_lock_key; struct lockdep_map rcu_lock_map = diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index f544e343256a..19453ba1392e 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -37,6 +37,25 @@ #include #include +#ifdef CONFIG_RCU_TRACE + +#include + +#else /* #ifdef CONFIG_RCU_TRACE */ + +/* No by-default tracing in TINY_RCU: Keep TINY_RCU tiny! */ +static void trace_rcu_invoke_kfree_callback(struct rcu_head *rhp, + unsigned long offset) +{ +} +static void trace_rcu_invoke_callback(struct rcu_head *head) +{ +} + +#endif /* #else #ifdef CONFIG_RCU_TRACE */ + +#include "rcu.h" + /* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ static struct task_struct *rcu_kthread_task; static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); @@ -161,11 +180,15 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) RCU_TRACE(int cb_count = 0); /* If no RCU callbacks ready to invoke, just return. */ - if (&rcp->rcucblist == rcp->donetail) + if (&rcp->rcucblist == rcp->donetail) { + RCU_TRACE(trace_rcu_batch_start(0, -1)); + RCU_TRACE(trace_rcu_batch_end(0)); return; + } /* Move the ready-to-invoke callbacks to a local list. */ local_irq_save(flags); + RCU_TRACE(trace_rcu_batch_start(0, -1)); list = rcp->rcucblist; rcp->rcucblist = *rcp->donetail; *rcp->donetail = NULL; @@ -187,6 +210,7 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) RCU_TRACE(cb_count++); } RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); + RCU_TRACE(trace_rcu_batch_end(cb_count)); } /* diff --git a/kernel/rcutree.c b/kernel/rcutree.c index a7c6bce1af83..45dcc2036a1e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -52,6 +52,9 @@ #include #include "rcutree.h" +#include + +#include "rcu.h" /* Data structures. */ @@ -1190,17 +1193,22 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) { unsigned long flags; struct rcu_head *next, *list, **tail; - int count; + int bl, count; /* If no callbacks are ready, just return.*/ - if (!cpu_has_callbacks_ready_to_invoke(rdp)) + if (!cpu_has_callbacks_ready_to_invoke(rdp)) { + trace_rcu_batch_start(0, 0); + trace_rcu_batch_end(0); return; + } /* * Extract the list of ready callbacks, disabling to prevent * races with call_rcu() from interrupt handlers. */ local_irq_save(flags); + bl = rdp->blimit; + trace_rcu_batch_start(rdp->qlen, bl); list = rdp->nxtlist; rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; *rdp->nxttail[RCU_DONE_TAIL] = NULL; @@ -1218,11 +1226,12 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) debug_rcu_head_unqueue(list); __rcu_reclaim(list); list = next; - if (++count >= rdp->blimit) + if (++count >= bl) break; } local_irq_save(flags); + trace_rcu_batch_end(count); /* Update count, and requeue any remaining callbacks. */ rdp->qlen -= count; -- cgit v1.3-8-gc7d7 From 300df91ca9358f7f09298eec9503c12b32054ef7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 18 Jun 2011 22:26:31 -0700 Subject: rcu: Event-trace markers for computing RCU CPU utilization This commit adds the trace_rcu_utilization() marker that is to be used to allow postprocessing scripts compute RCU's CPU utilization, give or take event-trace overhead. Note that we do not include RCU's dyntick-idle interface because event tracing requires RCU protection, which is not available in dyntick-idle mode. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 73 +++++++++++++++++++++++++++++++++------------- kernel/rcutree.c | 16 +++++++++- 2 files changed, 68 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index db3f6e9e63e6..ab458eb689fb 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -7,29 +7,58 @@ #include /* - * Tracepoint for calling rcu_do_batch, performed to start callback invocation: + * Tracepoint for start/end markers used for utilization calculations. + * By convention, the string is of the following forms: + * + * "Start " -- Mark the start of the specified activity, + * such as "context switch". Nesting is permitted. + * "End " -- Mark the end of the specified activity. + */ +TRACE_EVENT(rcu_utilization, + + TP_PROTO(char *s), + + TP_ARGS(s), + + TP_STRUCT__entry( + __field(char *, s) + ), + + TP_fast_assign( + __entry->s = s; + ), + + TP_printk("%s", __entry->s) +); + +/* + * Tracepoint for marking the beginning rcu_do_batch, performed to start + * RCU callback invocation. The first argument is the total number of + * callbacks (including those that are not yet ready to be invoked), + * and the second argument is the current RCU-callback batch limit. */ TRACE_EVENT(rcu_batch_start, - TP_PROTO(long callbacks_ready, int blimit), + TP_PROTO(long qlen, int blimit), - TP_ARGS(callbacks_ready, blimit), + TP_ARGS(qlen, blimit), TP_STRUCT__entry( - __field( long, callbacks_ready ) - __field( int, blimit ) + __field(long, qlen) + __field(int, blimit) ), TP_fast_assign( - __entry->callbacks_ready = callbacks_ready; - __entry->blimit = blimit; + __entry->qlen = qlen; + __entry->blimit = blimit; ), - TP_printk("CBs=%ld bl=%d", __entry->callbacks_ready, __entry->blimit) + TP_printk("CBs=%ld bl=%d", __entry->qlen, __entry->blimit) ); /* - * Tracepoint for the invocation of a single RCU callback + * Tracepoint for the invocation of a single RCU callback function. + * The argument is a pointer to the RCU callback itself. */ TRACE_EVENT(rcu_invoke_callback, @@ -38,20 +67,23 @@ TRACE_EVENT(rcu_invoke_callback, TP_ARGS(rhp), TP_STRUCT__entry( - __field( void *, rhp ) - __field( void *, func ) + __field(void *, rhp) + __field(void *, func) ), TP_fast_assign( - __entry->rhp = rhp; - __entry->func = rhp->func; + __entry->rhp = rhp; + __entry->func = rhp->func; ), TP_printk("rhp=%p func=%pf", __entry->rhp, __entry->func) ); /* - * Tracepoint for the invocation of a single RCU kfree callback + * Tracepoint for the invocation of a single RCU callback of the special + * kfree() form. The first argument is a pointer to the RCU callback + * and the second argument is the offset of the callback within the + * enclosing RCU-protected data structure. */ TRACE_EVENT(rcu_invoke_kfree_callback, @@ -60,12 +92,12 @@ TRACE_EVENT(rcu_invoke_kfree_callback, TP_ARGS(rhp, offset), TP_STRUCT__entry( - __field(void *, rhp ) - __field(unsigned long, offset ) + __field(void *, rhp) + __field(unsigned long, offset) ), TP_fast_assign( - __entry->rhp = rhp; + __entry->rhp = rhp; __entry->offset = offset; ), @@ -73,7 +105,8 @@ TRACE_EVENT(rcu_invoke_kfree_callback, ); /* - * Tracepoint for leaving rcu_do_batch, performed after callback invocation: + * Tracepoint for exiting rcu_do_batch after RCU callbacks have been + * invoked. The first argument is the number of callbacks actually invoked. */ TRACE_EVENT(rcu_batch_end, @@ -82,11 +115,11 @@ TRACE_EVENT(rcu_batch_end, TP_ARGS(callbacks_invoked), TP_STRUCT__entry( - __field( int, callbacks_invoked ) + __field(int, callbacks_invoked) ), TP_fast_assign( - __entry->callbacks_invoked = callbacks_invoked; + __entry->callbacks_invoked = callbacks_invoked; ), TP_printk("CBs-invoked=%d", __entry->callbacks_invoked) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 45dcc2036a1e..2a9643bd6ae9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -184,8 +184,10 @@ void rcu_bh_qs(int cpu) */ void rcu_note_context_switch(int cpu) { + trace_rcu_utilization("Start context switch"); rcu_sched_qs(cpu); rcu_preempt_note_context_switch(cpu); + trace_rcu_utilization("End context switch"); } EXPORT_SYMBOL_GPL(rcu_note_context_switch); @@ -1275,6 +1277,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) */ void rcu_check_callbacks(int cpu, int user) { + trace_rcu_utilization("Start scheduler-tick"); if (user || (idle_cpu(cpu) && rcu_scheduler_active && !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { @@ -1308,6 +1311,7 @@ void rcu_check_callbacks(int cpu, int user) rcu_preempt_check_callbacks(cpu); if (rcu_pending(cpu)) invoke_rcu_core(); + trace_rcu_utilization("End scheduler-tick"); } #ifdef CONFIG_SMP @@ -1369,10 +1373,14 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) unsigned long flags; struct rcu_node *rnp = rcu_get_root(rsp); - if (!rcu_gp_in_progress(rsp)) + trace_rcu_utilization("Start fqs"); + if (!rcu_gp_in_progress(rsp)) { + trace_rcu_utilization("End fqs"); return; /* No grace period in progress, nothing to force. */ + } if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ + trace_rcu_utilization("End fqs"); return; /* Someone else is already on the job. */ } if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) @@ -1421,11 +1429,13 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ rsp->fqs_need_gp = 0; rcu_start_gp(rsp, flags); /* releases rnp->lock */ + trace_rcu_utilization("End fqs"); return; } raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ unlock_fqs_ret: raw_spin_unlock_irqrestore(&rsp->fqslock, flags); + trace_rcu_utilization("End fqs"); } #else /* #ifdef CONFIG_SMP */ @@ -1481,6 +1491,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) */ static void rcu_process_callbacks(struct softirq_action *unused) { + trace_rcu_utilization("Start RCU core"); __rcu_process_callbacks(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); @@ -1488,6 +1499,7 @@ static void rcu_process_callbacks(struct softirq_action *unused) /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ rcu_needs_cpu_flush(); + trace_rcu_utilization("End RCU core"); } /* @@ -1910,6 +1922,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); struct rcu_node *rnp = rdp->mynode; + trace_rcu_utilization("Start CPU hotplug"); switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: @@ -1945,6 +1958,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, default: break; } + trace_rcu_utilization("End CPU hotplug"); return NOTIFY_OK; } -- cgit v1.3-8-gc7d7 From 72fe701b70e6ced35d734b676c13efbc8fc769a9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Jun 2011 01:14:54 -0700 Subject: rcu: Add RCU type to callback-invocation tracing Add a string to the rcu_batch_start() and rcu_batch_end() trace messages that indicates the RCU type ("rcu_sched", "rcu_bh", or "rcu_preempt"). The trace messages for the actual invocations themselves are not marked, as it should be clear from the rcu_batch_start() and rcu_batch_end() events before and after. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 28 ++++++++++++++++++---------- kernel/rcutiny.c | 8 ++++---- kernel/rcutree.c | 8 ++++---- 3 files changed, 26 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index ab458eb689fb..508824e5a77d 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -33,27 +33,31 @@ TRACE_EVENT(rcu_utilization, /* * Tracepoint for marking the beginning rcu_do_batch, performed to start - * RCU callback invocation. The first argument is the total number of - * callbacks (including those that are not yet ready to be invoked), - * and the second argument is the current RCU-callback batch limit. + * RCU callback invocation. The first argument is the RCU flavor, + * the second is the total number of callbacks (including those that + * are not yet ready to be invoked), and the third argument is the + * current RCU-callback batch limit. */ TRACE_EVENT(rcu_batch_start, - TP_PROTO(long qlen, int blimit), + TP_PROTO(char *rcuname, long qlen, int blimit), - TP_ARGS(qlen, blimit), + TP_ARGS(rcuname, qlen, blimit), TP_STRUCT__entry( + __field(char *, rcuname) __field(long, qlen) __field(int, blimit) ), TP_fast_assign( + __entry->rcuname = rcuname; __entry->qlen = qlen; __entry->blimit = blimit; ), - TP_printk("CBs=%ld bl=%d", __entry->qlen, __entry->blimit) + TP_printk("%s CBs=%ld bl=%d", + __entry->rcuname, __entry->qlen, __entry->blimit) ); /* @@ -106,23 +110,27 @@ TRACE_EVENT(rcu_invoke_kfree_callback, /* * Tracepoint for exiting rcu_do_batch after RCU callbacks have been - * invoked. The first argument is the number of callbacks actually invoked. + * invoked. The first argument is the name of the RCU flavor and + * the second argument is number of callbacks actually invoked. */ TRACE_EVENT(rcu_batch_end, - TP_PROTO(int callbacks_invoked), + TP_PROTO(char *rcuname, int callbacks_invoked), - TP_ARGS(callbacks_invoked), + TP_ARGS(rcuname, callbacks_invoked), TP_STRUCT__entry( + __field(char *, rcuname) __field(int, callbacks_invoked) ), TP_fast_assign( + __entry->rcuname = rcuname; __entry->callbacks_invoked = callbacks_invoked; ), - TP_printk("CBs-invoked=%d", __entry->callbacks_invoked) + TP_printk("%s CBs-invoked=%d", + __entry->rcuname, __entry->callbacks_invoked) ); #endif /* _TRACE_RCU_H */ diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 0d28974b78f4..1c37bdd464f1 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -168,14 +168,14 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) /* If no RCU callbacks ready to invoke, just return. */ if (&rcp->rcucblist == rcp->donetail) { - RCU_TRACE(trace_rcu_batch_start(0, -1)); - RCU_TRACE(trace_rcu_batch_end(0)); + RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); + RCU_TRACE(trace_rcu_batch_end(rcp->name, 0)); return; } /* Move the ready-to-invoke callbacks to a local list. */ local_irq_save(flags); - RCU_TRACE(trace_rcu_batch_start(0, -1)); + RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); list = rcp->rcucblist; rcp->rcucblist = *rcp->donetail; *rcp->donetail = NULL; @@ -197,7 +197,7 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) RCU_TRACE(cb_count++); } RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); - RCU_TRACE(trace_rcu_batch_end(cb_count)); + RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count)); } /* diff --git a/kernel/rcutree.c b/kernel/rcutree.c index b953e2c72e25..eb6e731088a0 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1199,8 +1199,8 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) /* If no callbacks are ready, just return.*/ if (!cpu_has_callbacks_ready_to_invoke(rdp)) { - trace_rcu_batch_start(0, 0); - trace_rcu_batch_end(0); + trace_rcu_batch_start(rsp->name, 0, 0); + trace_rcu_batch_end(rsp->name, 0); return; } @@ -1210,7 +1210,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) */ local_irq_save(flags); bl = rdp->blimit; - trace_rcu_batch_start(rdp->qlen, bl); + trace_rcu_batch_start(rsp->name, rdp->qlen, bl); list = rdp->nxtlist; rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; *rdp->nxttail[RCU_DONE_TAIL] = NULL; @@ -1233,7 +1233,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) } local_irq_save(flags); - trace_rcu_batch_end(count); + trace_rcu_batch_end(rsp->name, count); /* Update count, and requeue any remaining callbacks. */ rdp->qlen -= count; -- cgit v1.3-8-gc7d7 From 385680a9487d2f85382ad6d74e2a15837e47bfd9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Jun 2011 22:43:26 -0700 Subject: rcu: Add event-trace markers to TREE_RCU kthreads Add event-trace markers to TREE_RCU kthreads to allow including these kthread's CPU time in the utilization calculations. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 3 +++ kernel/rcutree_plugin.h | 12 ++++++++++++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 508824e5a77d..ac52aba00a3e 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -13,6 +13,9 @@ * "Start " -- Mark the start of the specified activity, * such as "context switch". Nesting is permitted. * "End " -- Mark the end of the specified activity. + * + * An "@" character within "" is a comment character: Data + * reduction scripts will ignore the "@" and the remainder of the line. */ TRACE_EVENT(rcu_utilization, diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index ecd48a2e3eeb..94d9ca1e4061 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1219,9 +1219,12 @@ static int rcu_boost_kthread(void *arg) int spincnt = 0; int more2boost; + trace_rcu_utilization("Start boost kthread@init"); for (;;) { rnp->boost_kthread_status = RCU_KTHREAD_WAITING; + trace_rcu_utilization("End boost kthread@rcu_wait"); rcu_wait(rnp->boost_tasks || rnp->exp_tasks); + trace_rcu_utilization("Start boost kthread@rcu_wait"); rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; more2boost = rcu_boost(rnp); if (more2boost) @@ -1229,11 +1232,14 @@ static int rcu_boost_kthread(void *arg) else spincnt = 0; if (spincnt > 10) { + trace_rcu_utilization("End boost kthread@rcu_yield"); rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); + trace_rcu_utilization("Start boost kthread@rcu_yield"); spincnt = 0; } } /* NOTREACHED */ + trace_rcu_utilization("End boost kthread@notreached"); return 0; } @@ -1490,9 +1496,12 @@ static int rcu_cpu_kthread(void *arg) char work; char *workp = &per_cpu(rcu_cpu_has_work, cpu); + trace_rcu_utilization("Start CPU kthread@init"); for (;;) { *statusp = RCU_KTHREAD_WAITING; + trace_rcu_utilization("End CPU kthread@rcu_wait"); rcu_wait(*workp != 0 || kthread_should_stop()); + trace_rcu_utilization("Start CPU kthread@rcu_wait"); local_bh_disable(); if (rcu_cpu_kthread_should_stop(cpu)) { local_bh_enable(); @@ -1513,11 +1522,14 @@ static int rcu_cpu_kthread(void *arg) spincnt = 0; if (spincnt > 10) { *statusp = RCU_KTHREAD_YIELDING; + trace_rcu_utilization("End CPU kthread@rcu_yield"); rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); + trace_rcu_utilization("Start CPU kthread@rcu_yield"); spincnt = 0; } } *statusp = RCU_KTHREAD_STOPPED; + trace_rcu_utilization("End CPU kthread@term"); return 0; } -- cgit v1.3-8-gc7d7 From 965a002b4f1a458c5dcb334ec29f48a0046faa25 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 18 Jun 2011 09:55:39 -0700 Subject: rcu: Make TINY_RCU also use softirq for RCU_BOOST=n This patch #ifdefs TINY_RCU kthreads out of the kernel unless RCU_BOOST=y, thus eliminating context-switch overhead if RCU priority boosting has not been configured. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 4 ++ kernel/rcutiny.c | 74 +++++--------------------------- kernel/rcutiny_plugin.h | 110 ++++++++++++++++++++++++++++++++++++------------ 3 files changed, 97 insertions(+), 91 deletions(-) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 4eab233a00cd..00b7a5e493d2 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -27,9 +27,13 @@ #include +#ifdef CONFIG_RCU_BOOST static inline void rcu_init(void) { } +#else /* #ifdef CONFIG_RCU_BOOST */ +void rcu_init(void); +#endif /* #else #ifdef CONFIG_RCU_BOOST */ static inline void rcu_barrier_bh(void) { diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 1c37bdd464f1..c9321d86999b 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -43,16 +43,11 @@ #include "rcu.h" -/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ -static struct task_struct *rcu_kthread_task; -static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); -static unsigned long have_rcu_kthread_work; - /* Forward declarations for rcutiny_plugin.h. */ struct rcu_ctrlblk; -static void invoke_rcu_kthread(void); -static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); -static int rcu_kthread(void *arg); +static void invoke_rcu_callbacks(void); +static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); +static void rcu_process_callbacks(struct softirq_action *unused); static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_ctrlblk *rcp); @@ -101,16 +96,6 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) return 0; } -/* - * Wake up rcu_kthread() to process callbacks now eligible for invocation - * or to boost readers. - */ -static void invoke_rcu_kthread(void) -{ - have_rcu_kthread_work = 1; - wake_up(&rcu_kthread_wq); -} - /* * Record an rcu quiescent state. And an rcu_bh quiescent state while we * are at it, given that any rcu quiescent state is also an rcu_bh @@ -123,7 +108,7 @@ void rcu_sched_qs(int cpu) local_irq_save(flags); if (rcu_qsctr_help(&rcu_sched_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) - invoke_rcu_kthread(); + invoke_rcu_callbacks(); local_irq_restore(flags); } @@ -136,7 +121,7 @@ void rcu_bh_qs(int cpu) local_irq_save(flags); if (rcu_qsctr_help(&rcu_bh_ctrlblk)) - invoke_rcu_kthread(); + invoke_rcu_callbacks(); local_irq_restore(flags); } @@ -160,7 +145,7 @@ void rcu_check_callbacks(int cpu, int user) * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure * whose grace period has elapsed. */ -static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) +static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) { struct rcu_head *next, *list; unsigned long flags; @@ -200,36 +185,11 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count)); } -/* - * This kthread invokes RCU callbacks whose grace periods have - * elapsed. It is awakened as needed, and takes the place of the - * RCU_SOFTIRQ that was used previously for this purpose. - * This is a kthread, but it is never stopped, at least not until - * the system goes down. - */ -static int rcu_kthread(void *arg) +static void rcu_process_callbacks(struct softirq_action *unused) { - unsigned long work; - unsigned long morework; - unsigned long flags; - - for (;;) { - wait_event_interruptible(rcu_kthread_wq, - have_rcu_kthread_work != 0); - morework = rcu_boost(); - local_irq_save(flags); - work = have_rcu_kthread_work; - have_rcu_kthread_work = morework; - local_irq_restore(flags); - if (work) { - rcu_process_callbacks(&rcu_sched_ctrlblk); - rcu_process_callbacks(&rcu_bh_ctrlblk); - rcu_preempt_process_callbacks(); - } - schedule_timeout_interruptible(1); /* Leave CPU for others. */ - } - - return 0; /* Not reached, but needed to shut gcc up. */ + __rcu_process_callbacks(&rcu_sched_ctrlblk); + __rcu_process_callbacks(&rcu_bh_ctrlblk); + rcu_preempt_process_callbacks(); } /* @@ -291,17 +251,3 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) __call_rcu(head, func, &rcu_bh_ctrlblk); } EXPORT_SYMBOL_GPL(call_rcu_bh); - -/* - * Spawn the kthread that invokes RCU callbacks. - */ -static int __init rcu_spawn_kthreads(void) -{ - struct sched_param sp; - - rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); - sp.sched_priority = RCU_BOOST_PRIO; - sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); - return 0; -} -early_initcall(rcu_spawn_kthreads); diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 791ddf7c99ab..02aa7139861c 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -245,6 +245,13 @@ static void show_tiny_preempt_stats(struct seq_file *m) #include "rtmutex_common.h" +#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO + +/* Controls for rcu_kthread() kthread. */ +static struct task_struct *rcu_kthread_task; +static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); +static unsigned long have_rcu_kthread_work; + /* * Carry out RCU priority boosting on the task indicated by ->boost_tasks, * and advance ->boost_tasks to the next task in the ->blkd_tasks list. @@ -332,7 +339,7 @@ static int rcu_initiate_boost(void) if (rcu_preempt_ctrlblk.exp_tasks == NULL) rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; - invoke_rcu_kthread(); + invoke_rcu_callbacks(); } else RCU_TRACE(rcu_initiate_boost_trace()); return 1; @@ -350,14 +357,6 @@ static void rcu_preempt_boost_start_gp(void) #else /* #ifdef CONFIG_RCU_BOOST */ -/* - * If there is no RCU priority boosting, we don't boost. - */ -static int rcu_boost(void) -{ - return 0; -} - /* * If there is no RCU priority boosting, we don't initiate boosting, * but we do indicate whether there are blocked readers blocking the @@ -425,7 +424,7 @@ static void rcu_preempt_cpu_qs(void) /* If there are done callbacks, cause them to be invoked. */ if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) - invoke_rcu_kthread(); + invoke_rcu_callbacks(); } /* @@ -646,7 +645,7 @@ static void rcu_preempt_check_callbacks(void) rcu_preempt_cpu_qs(); if (&rcu_preempt_ctrlblk.rcb.rcucblist != rcu_preempt_ctrlblk.rcb.donetail) - invoke_rcu_kthread(); + invoke_rcu_callbacks(); if (rcu_preempt_gp_in_progress() && rcu_cpu_blocking_cur_gp() && rcu_preempt_running_reader()) @@ -672,7 +671,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) */ static void rcu_preempt_process_callbacks(void) { - rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); + __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); } /* @@ -847,15 +846,6 @@ static void show_tiny_preempt_stats(struct seq_file *m) #endif /* #ifdef CONFIG_RCU_TRACE */ -/* - * Because preemptible RCU does not exist, it is never necessary to - * boost preempted RCU readers. - */ -static int rcu_boost(void) -{ - return 0; -} - /* * Because preemptible RCU does not exist, it never has any callbacks * to check. @@ -882,6 +872,78 @@ static void rcu_preempt_process_callbacks(void) #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ +#ifdef CONFIG_RCU_BOOST + +/* + * Wake up rcu_kthread() to process callbacks now eligible for invocation + * or to boost readers. + */ +static void invoke_rcu_callbacks(void) +{ + have_rcu_kthread_work = 1; + wake_up(&rcu_kthread_wq); +} + +/* + * This kthread invokes RCU callbacks whose grace periods have + * elapsed. It is awakened as needed, and takes the place of the + * RCU_SOFTIRQ that is used for this purpose when boosting is disabled. + * This is a kthread, but it is never stopped, at least not until + * the system goes down. + */ +static int rcu_kthread(void *arg) +{ + unsigned long work; + unsigned long morework; + unsigned long flags; + + for (;;) { + wait_event_interruptible(rcu_kthread_wq, + have_rcu_kthread_work != 0); + morework = rcu_boost(); + local_irq_save(flags); + work = have_rcu_kthread_work; + have_rcu_kthread_work = morework; + local_irq_restore(flags); + if (work) + rcu_process_callbacks(NULL); + schedule_timeout_interruptible(1); /* Leave CPU for others. */ + } + + return 0; /* Not reached, but needed to shut gcc up. */ +} + +/* + * Spawn the kthread that invokes RCU callbacks. + */ +static int __init rcu_spawn_kthreads(void) +{ + struct sched_param sp; + + rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); + sp.sched_priority = RCU_BOOST_PRIO; + sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); + return 0; +} +early_initcall(rcu_spawn_kthreads); + +#else /* #ifdef CONFIG_RCU_BOOST */ + +/* + * Start up softirq processing of callbacks. + */ +void invoke_rcu_callbacks(void) +{ + raise_softirq(RCU_SOFTIRQ); +} + +void rcu_init(void) +{ + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); +} + +#endif /* #else #ifdef CONFIG_RCU_BOOST */ + #ifdef CONFIG_DEBUG_LOCK_ALLOC #include @@ -897,12 +959,6 @@ void __init rcu_scheduler_starting(void) #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -#ifdef CONFIG_RCU_BOOST -#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO -#else /* #ifdef CONFIG_RCU_BOOST */ -#define RCU_BOOST_PRIO 1 -#endif /* #else #ifdef CONFIG_RCU_BOOST */ - #ifdef CONFIG_RCU_TRACE #ifdef CONFIG_RCU_BOOST -- cgit v1.3-8-gc7d7 From d4c08f2ac311a360230eef7e5395b0ec8d8f0670 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 25 Jun 2011 06:36:56 -0700 Subject: rcu: Add grace-period, quiescent-state, and call_rcu trace events Add trace events to record grace-period start and end, quiescent states, CPUs noticing grace-period start and end, grace-period initialization, call_rcu() invocation, tasks blocking in RCU read-side critical sections, tasks exiting those same critical sections, force_quiescent_state() detection of dyntick-idle and offline CPUs, CPUs entering and leaving dyntick-idle mode (except from NMIs), CPUs coming online and going offline, and CPUs being kicked for staying in dyntick-idle mode for too long (as in many weeks, even on 32-bit systems). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney rcu: Add the rcu flavor to callback trace events The earlier trace events for registering RCU callbacks and for invoking them did not include the RCU flavor (rcu_bh, rcu_preempt, or rcu_sched). This commit adds the RCU flavor to those trace events. Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 345 +++++++++++++++++++++++++++++++++++++++++++-- kernel/rcu.h | 6 +- kernel/rcutiny.c | 4 +- kernel/rcutree.c | 45 +++++- kernel/rcutree.h | 1 + kernel/rcutree_plugin.h | 22 ++- 6 files changed, 399 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index ac52aba00a3e..669fbd62ec25 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -24,7 +24,7 @@ TRACE_EVENT(rcu_utilization, TP_ARGS(s), TP_STRUCT__entry( - __field(char *, s) + __field(char *, s) ), TP_fast_assign( @@ -34,6 +34,297 @@ TRACE_EVENT(rcu_utilization, TP_printk("%s", __entry->s) ); +#ifdef CONFIG_RCU_TRACE + +#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) + +/* + * Tracepoint for grace-period events: starting and ending a grace + * period ("start" and "end", respectively), a CPU noting the start + * of a new grace period or the end of an old grace period ("cpustart" + * and "cpuend", respectively), a CPU passing through a quiescent + * state ("cpuqs"), a CPU coming online or going offline ("cpuonl" + * and "cpuofl", respectively), and a CPU being kicked for being too + * long in dyntick-idle mode ("kick"). + */ +TRACE_EVENT(rcu_grace_period, + + TP_PROTO(char *rcuname, unsigned long gpnum, char *gpevent), + + TP_ARGS(rcuname, gpnum, gpevent), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(char *, gpevent) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->gpevent = gpevent; + ), + + TP_printk("%s %lu %s", + __entry->rcuname, __entry->gpnum, __entry->gpevent) +); + +/* + * Tracepoint for grace-period-initialization events. These are + * distinguished by the type of RCU, the new grace-period number, the + * rcu_node structure level, the starting and ending CPU covered by the + * rcu_node structure, and the mask of CPUs that will be waited for. + * All but the type of RCU are extracted from the rcu_node structure. + */ +TRACE_EVENT(rcu_grace_period_init, + + TP_PROTO(char *rcuname, unsigned long gpnum, u8 level, + int grplo, int grphi, unsigned long qsmask), + + TP_ARGS(rcuname, gpnum, level, grplo, grphi, qsmask), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(u8, level) + __field(int, grplo) + __field(int, grphi) + __field(unsigned long, qsmask) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->level = level; + __entry->grplo = grplo; + __entry->grphi = grphi; + __entry->qsmask = qsmask; + ), + + TP_printk("%s %lu %u %d %d %lx", + __entry->rcuname, __entry->gpnum, __entry->level, + __entry->grplo, __entry->grphi, __entry->qsmask) +); + +/* + * Tracepoint for tasks blocking within preemptible-RCU read-side + * critical sections. Track the type of RCU (which one day might + * include SRCU), the grace-period number that the task is blocking + * (the current or the next), and the task's PID. + */ +TRACE_EVENT(rcu_preempt_task, + + TP_PROTO(char *rcuname, int pid, unsigned long gpnum), + + TP_ARGS(rcuname, pid, gpnum), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(int, pid) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->pid = pid; + ), + + TP_printk("%s %lu %d", + __entry->rcuname, __entry->gpnum, __entry->pid) +); + +/* + * Tracepoint for tasks that blocked within a given preemptible-RCU + * read-side critical section exiting that critical section. Track the + * type of RCU (which one day might include SRCU) and the task's PID. + */ +TRACE_EVENT(rcu_unlock_preempted_task, + + TP_PROTO(char *rcuname, unsigned long gpnum, int pid), + + TP_ARGS(rcuname, gpnum, pid), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(int, pid) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->pid = pid; + ), + + TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid) +); + +/* + * Tracepoint for quiescent-state-reporting events. These are + * distinguished by the type of RCU, the grace-period number, the + * mask of quiescent lower-level entities, the rcu_node structure level, + * the starting and ending CPU covered by the rcu_node structure, and + * whether there are any blocked tasks blocking the current grace period. + * All but the type of RCU are extracted from the rcu_node structure. + */ +TRACE_EVENT(rcu_quiescent_state_report, + + TP_PROTO(char *rcuname, unsigned long gpnum, + unsigned long mask, unsigned long qsmask, + u8 level, int grplo, int grphi, int gp_tasks), + + TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(unsigned long, mask) + __field(unsigned long, qsmask) + __field(u8, level) + __field(int, grplo) + __field(int, grphi) + __field(u8, gp_tasks) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->mask = mask; + __entry->qsmask = qsmask; + __entry->level = level; + __entry->grplo = grplo; + __entry->grphi = grphi; + __entry->gp_tasks = gp_tasks; + ), + + TP_printk("%s %lu %lx>%lx %u %d %d %u", + __entry->rcuname, __entry->gpnum, + __entry->mask, __entry->qsmask, __entry->level, + __entry->grplo, __entry->grphi, __entry->gp_tasks) +); + +/* + * Tracepoint for quiescent states detected by force_quiescent_state(). + * These trace events include the type of RCU, the grace-period number + * that was blocked by the CPU, the CPU itself, and the type of quiescent + * state, which can be "dti" for dyntick-idle mode, "ofl" for CPU offline, + * or "kick" when kicking a CPU that has been in dyntick-idle mode for + * too long. + */ +TRACE_EVENT(rcu_fqs, + + TP_PROTO(char *rcuname, unsigned long gpnum, int cpu, char *qsevent), + + TP_ARGS(rcuname, gpnum, cpu, qsevent), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(int, cpu) + __field(char *, qsevent) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->cpu = cpu; + __entry->qsevent = qsevent; + ), + + TP_printk("%s %lu %d %s", + __entry->rcuname, __entry->gpnum, + __entry->cpu, __entry->qsevent) +); + +#endif /* #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) */ + +/* + * Tracepoint for dyntick-idle entry/exit events. These take a string + * as argument: "Start" for entering dyntick-idle mode and "End" for + * leaving it. + */ +TRACE_EVENT(rcu_dyntick, + + TP_PROTO(char *polarity), + + TP_ARGS(polarity), + + TP_STRUCT__entry( + __field(char *, polarity) + ), + + TP_fast_assign( + __entry->polarity = polarity; + ), + + TP_printk("%s", __entry->polarity) +); + +/* + * Tracepoint for the registration of a single RCU callback function. + * The first argument is the type of RCU, the second argument is + * a pointer to the RCU callback itself, and the third element is the + * new RCU callback queue length for the current CPU. + */ +TRACE_EVENT(rcu_callback, + + TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen), + + TP_ARGS(rcuname, rhp, qlen), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(void *, rhp) + __field(void *, func) + __field(long, qlen) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->rhp = rhp; + __entry->func = rhp->func; + __entry->qlen = qlen; + ), + + TP_printk("%s rhp=%p func=%pf %ld", + __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen) +); + +/* + * Tracepoint for the registration of a single RCU callback of the special + * kfree() form. The first argument is the RCU type, the second argument + * is a pointer to the RCU callback, the third argument is the offset + * of the callback within the enclosing RCU-protected data structure, + * and the fourth argument is the new RCU callback queue length for the + * current CPU. + */ +TRACE_EVENT(rcu_kfree_callback, + + TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset, + long qlen), + + TP_ARGS(rcuname, rhp, offset, qlen), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(void *, rhp) + __field(unsigned long, offset) + __field(long, qlen) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->rhp = rhp; + __entry->offset = offset; + __entry->qlen = qlen; + ), + + TP_printk("%s rhp=%p func=%ld %ld", + __entry->rcuname, __entry->rhp, __entry->offset, + __entry->qlen) +); + /* * Tracepoint for marking the beginning rcu_do_batch, performed to start * RCU callback invocation. The first argument is the RCU flavor, @@ -65,50 +356,58 @@ TRACE_EVENT(rcu_batch_start, /* * Tracepoint for the invocation of a single RCU callback function. - * The argument is a pointer to the RCU callback itself. + * The first argument is the type of RCU, and the second argument is + * a pointer to the RCU callback itself. */ TRACE_EVENT(rcu_invoke_callback, - TP_PROTO(struct rcu_head *rhp), + TP_PROTO(char *rcuname, struct rcu_head *rhp), - TP_ARGS(rhp), + TP_ARGS(rcuname, rhp), TP_STRUCT__entry( - __field(void *, rhp) - __field(void *, func) + __field(char *, rcuname) + __field(void *, rhp) + __field(void *, func) ), TP_fast_assign( + __entry->rcuname = rcuname; __entry->rhp = rhp; __entry->func = rhp->func; ), - TP_printk("rhp=%p func=%pf", __entry->rhp, __entry->func) + TP_printk("%s rhp=%p func=%pf", + __entry->rcuname, __entry->rhp, __entry->func) ); /* * Tracepoint for the invocation of a single RCU callback of the special - * kfree() form. The first argument is a pointer to the RCU callback - * and the second argument is the offset of the callback within the - * enclosing RCU-protected data structure. + * kfree() form. The first argument is the RCU flavor, the second + * argument is a pointer to the RCU callback, and the third argument + * is the offset of the callback within the enclosing RCU-protected + * data structure. */ TRACE_EVENT(rcu_invoke_kfree_callback, - TP_PROTO(struct rcu_head *rhp, unsigned long offset), + TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset), - TP_ARGS(rhp, offset), + TP_ARGS(rcuname, rhp, offset), TP_STRUCT__entry( - __field(void *, rhp) + __field(char *, rcuname) + __field(void *, rhp) __field(unsigned long, offset) ), TP_fast_assign( + __entry->rcuname = rcuname; __entry->rhp = rhp; __entry->offset = offset; ), - TP_printk("rhp=%p func=%ld", __entry->rhp, __entry->offset) + TP_printk("%s rhp=%p func=%ld", + __entry->rcuname, __entry->rhp, __entry->offset) ); /* @@ -136,6 +435,24 @@ TRACE_EVENT(rcu_batch_end, __entry->rcuname, __entry->callbacks_invoked) ); +#else /* #ifdef CONFIG_RCU_TRACE */ + +#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) +#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0) +#define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) +#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) +#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) +#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) +#define trace_rcu_dyntick(polarity) do { } while (0) +#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) +#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) +#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) +#define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) +#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) +#define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0) + +#endif /* #else #ifdef CONFIG_RCU_TRACE */ + #endif /* _TRACE_RCU_H */ /* This part must be outside protection */ diff --git a/kernel/rcu.h b/kernel/rcu.h index d7f00ec8b47b..f600868d550d 100644 --- a/kernel/rcu.h +++ b/kernel/rcu.h @@ -69,15 +69,15 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head) extern void kfree(const void *); -static inline void __rcu_reclaim(struct rcu_head *head) +static inline void __rcu_reclaim(char *rn, struct rcu_head *head) { unsigned long offset = (unsigned long)head->func; if (__is_kfree_rcu_offset(offset)) { - RCU_TRACE(trace_rcu_invoke_kfree_callback(head, offset)); + RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset)); kfree((void *)head - offset); } else { - RCU_TRACE(trace_rcu_invoke_callback(head)); + RCU_TRACE(trace_rcu_invoke_callback(rn, head)); head->func(head); } } diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index c9321d86999b..da775c87f27f 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -147,6 +147,7 @@ void rcu_check_callbacks(int cpu, int user) */ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) { + char *rn = NULL; struct rcu_head *next, *list; unsigned long flags; RCU_TRACE(int cb_count = 0); @@ -171,12 +172,13 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) local_irq_restore(flags); /* Invoke the callbacks on the local list. */ + RCU_TRACE(rn = rcp->name); while (list) { next = list->next; prefetch(next); debug_rcu_head_unqueue(list); local_bh_disable(); - __rcu_reclaim(list); + __rcu_reclaim(rn, list); local_bh_enable(); list = next; RCU_TRACE(cb_count++); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4e24399cabcf..7e0282949f8a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -166,6 +166,8 @@ void rcu_sched_qs(int cpu) rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); + if (rdp->passed_quiesc == 0) + trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); rdp->passed_quiesc = 1; } @@ -175,6 +177,8 @@ void rcu_bh_qs(int cpu) rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); + if (rdp->passed_quiesc == 0) + trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); rdp->passed_quiesc = 1; } @@ -319,6 +323,7 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp) * trust its state not to change because interrupts are disabled. */ if (cpu_is_offline(rdp->cpu)) { + trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); rdp->offline_fqs++; return 1; } @@ -359,6 +364,7 @@ void rcu_enter_nohz(void) local_irq_restore(flags); return; } + trace_rcu_dyntick("Start"); /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ smp_mb__before_atomic_inc(); /* See above. */ atomic_inc(&rdtp->dynticks); @@ -396,6 +402,7 @@ void rcu_exit_nohz(void) /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ smp_mb__after_atomic_inc(); /* See above. */ WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); + trace_rcu_dyntick("End"); local_irq_restore(flags); } @@ -501,6 +508,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) * of the current RCU grace period. */ if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) { + trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti"); rdp->dynticks_fqs++; return 1; } @@ -683,6 +691,7 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct * go looking for one. */ rdp->gpnum = rnp->gpnum; + trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); if (rnp->qsmask & rdp->grpmask) { rdp->qs_pending = 1; rdp->passed_quiesc = 0; @@ -746,6 +755,7 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat /* Remember that we saw this grace-period completion. */ rdp->completed = rnp->completed; + trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend"); /* * If we were in an extended quiescent state, we may have @@ -856,6 +866,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) /* Advance to a new grace period and initialize state. */ rsp->gpnum++; + trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; @@ -870,6 +881,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ rcu_start_gp_per_cpu(rsp, rnp, rdp); rcu_preempt_boost_start_gp(rnp); + trace_rcu_grace_period_init(rsp->name, rnp->gpnum, + rnp->level, rnp->grplo, + rnp->grphi, rnp->qsmask); raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } @@ -906,6 +920,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) if (rnp == rdp->mynode) rcu_start_gp_per_cpu(rsp, rnp, rdp); rcu_preempt_boost_start_gp(rnp); + trace_rcu_grace_period_init(rsp->name, rnp->gpnum, + rnp->level, rnp->grplo, + rnp->grphi, rnp->qsmask); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } @@ -939,6 +956,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) if (gp_duration > rsp->gp_max) rsp->gp_max = gp_duration; rsp->completed = rsp->gpnum; + trace_rcu_grace_period(rsp->name, rsp->completed, "end"); rsp->signaled = RCU_GP_IDLE; rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ } @@ -967,6 +985,10 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, return; } rnp->qsmask &= ~mask; + trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, + mask, rnp->qsmask, rnp->level, + rnp->grplo, rnp->grphi, + !!rnp->gp_tasks); if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { /* Other bits still set at this level, so done. */ @@ -1135,11 +1157,20 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) if (rnp->qsmaskinit != 0) { if (rnp != rdp->mynode) raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + else + trace_rcu_grace_period(rsp->name, + rnp->gpnum + 1 - + !!(rnp->qsmask & mask), + "cpuofl"); break; } - if (rnp == rdp->mynode) + if (rnp == rdp->mynode) { + trace_rcu_grace_period(rsp->name, + rnp->gpnum + 1 - + !!(rnp->qsmask & mask), + "cpuofl"); need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); - else + } else raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ mask = rnp->grpmask; rnp = rnp->parent; @@ -1226,7 +1257,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) next = list->next; prefetch(next); debug_rcu_head_unqueue(list); - __rcu_reclaim(list); + __rcu_reclaim(rsp->name, list); list = next; if (++count >= bl) break; @@ -1552,6 +1583,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), rdp->nxttail[RCU_NEXT_TAIL] = &head->next; rdp->qlen++; + if (__is_kfree_rcu_offset((unsigned long)func)) + trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, + rdp->qlen); + else + trace_rcu_callback(rsp->name, head, rdp->qlen); + /* If interrupts were disabled, don't dive into RCU core. */ if (irqs_disabled_flags(flags)) { local_irq_restore(flags); @@ -1850,6 +1887,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->dynticks = &per_cpu(rcu_dynticks, cpu); #endif /* #ifdef CONFIG_NO_HZ */ rdp->cpu = cpu; + rdp->rsp = rsp; raw_spin_unlock_irqrestore(&rnp->lock, flags); } @@ -1898,6 +1936,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->gpnum = rnp->completed; /* if GP in progress... */ rdp->completed = rnp->completed; rdp->passed_quiesc_completed = rnp->completed - 1; + trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); } raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ rnp = rnp->parent; diff --git a/kernel/rcutree.h b/kernel/rcutree.h index eee6c9406b46..d11a0065321c 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -299,6 +299,7 @@ struct rcu_data { unsigned long n_rp_need_nothing; int cpu; + struct rcu_state *rsp; }; /* Values for signaled field in struct rcu_state. */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 94d9ca1e4061..bdb2e82f78d3 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -124,6 +124,8 @@ static void rcu_preempt_qs(int cpu) rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); + if (rdp->passed_quiesc == 0) + trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs"); rdp->passed_quiesc = 1; current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; } @@ -190,6 +192,11 @@ static void rcu_preempt_note_context_switch(int cpu) if (rnp->qsmask & rdp->grpmask) rnp->gp_tasks = &t->rcu_node_entry; } + trace_rcu_preempt_task(rdp->rsp->name, + t->pid, + (rnp->qsmask & rdp->grpmask) + ? rnp->gpnum + : rnp->gpnum + 1); raw_spin_unlock_irqrestore(&rnp->lock, flags); } else if (t->rcu_read_lock_nesting < 0 && t->rcu_read_unlock_special) { @@ -344,6 +351,8 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ np = rcu_next_node_entry(t, rnp); list_del_init(&t->rcu_node_entry); + trace_rcu_unlock_preempted_task("rcu_preempt", + rnp->gpnum, t->pid); if (&t->rcu_node_entry == rnp->gp_tasks) rnp->gp_tasks = np; if (&t->rcu_node_entry == rnp->exp_tasks) @@ -364,10 +373,17 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) * we aren't waiting on any CPUs, report the quiescent state. * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. */ - if (empty) - raw_spin_unlock_irqrestore(&rnp->lock, flags); - else + if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { + trace_rcu_quiescent_state_report("preempt_rcu", + rnp->gpnum, + 0, rnp->qsmask, + rnp->level, + rnp->grplo, + rnp->grphi, + !!rnp->gp_tasks); rcu_report_unblock_qs_rnp(rnp, flags); + } else + raw_spin_unlock_irqrestore(&rnp->lock, flags); #ifdef CONFIG_RCU_BOOST /* Unboost if we were boosted. */ -- cgit v1.3-8-gc7d7 From 22507ed9b9d587486fb4681e93a8c58837738a25 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 18 Jul 2011 16:54:51 -0700 Subject: rcu: Remove unused and redundant interfaces The rcu_dereference_bh_protected() and rcu_dereference_sched_protected() macros are synonyms for rcu_dereference_protected() and are not used anywhere in mainline. This commit therefore removes them. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index dd2bc2c6a285..caf22e84a743 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -578,26 +578,6 @@ extern int rcu_my_thread_group_empty(void); #define rcu_dereference_protected(p, c) \ __rcu_dereference_protected((p), (c), __rcu) -/** - * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented - * @p: The pointer to read, prior to dereferencing - * @c: The conditions under which the dereference will take place - * - * This is the RCU-bh counterpart to rcu_dereference_protected(). - */ -#define rcu_dereference_bh_protected(p, c) \ - __rcu_dereference_protected((p), (c), __rcu) - -/** - * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented - * @p: The pointer to read, prior to dereferencing - * @c: The conditions under which the dereference will take place - * - * This is the RCU-sched counterpart to rcu_dereference_protected(). - */ -#define rcu_dereference_sched_protected(p, c) \ - __rcu_dereference_protected((p), (c), __rcu) - /** * rcu_dereference() - fetch RCU-protected pointer for dereferencing -- cgit v1.3-8-gc7d7 From fc0763f53e3ff6a6bfa66934662a3446b9ca6f16 Mon Sep 17 00:00:00 2001 From: "Shi, Alex" Date: Thu, 28 Jul 2011 14:56:12 +0800 Subject: nohz: Remove nohz_cpu_mask RCU no longer uses this global variable, nor does anyone else. This commit therefore removes this variable. This reduces memory footprint and also removes some atomic instructions and memory barriers from the dyntick-idle path. Signed-off-by: Alex Shi Signed-off-by: Paul E. McKenney --- include/linux/sched.h | 1 - kernel/sched.c | 11 ----------- kernel/time/tick-sched.c | 6 ------ 3 files changed, 18 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ac2c0578e0f..6ee91e20353b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -270,7 +270,6 @@ extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(int cpu); -extern cpumask_var_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern void select_nohz_load_balancer(int stop_tick); extern int get_nohz_timer_target(void); diff --git a/kernel/sched.c b/kernel/sched.c index e24cebe0e6cb..3e5525630459 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5979,15 +5979,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) ftrace_graph_init_idle_task(idle, cpu); } -/* - * In a system that switches off the HZ timer nohz_cpu_mask - * indicates which cpus entered this state. This is used - * in the rcu update to wait only for active cpus. For system - * which do not switch off the HZ timer nohz_cpu_mask should - * always be CPU_BITS_NONE. - */ -cpumask_var_t nohz_cpu_mask; - /* * Increase the granularity value when there are more CPUs, * because with more CPUs the 'effective latency' as visible @@ -8200,8 +8191,6 @@ void __init sched_init(void) */ current->sched_class = &fair_sched_class; - /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ - zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); #ifdef CONFIG_SMP zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); #ifdef CONFIG_NO_HZ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d5097c44b407..eb98e55196b9 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -139,7 +139,6 @@ static void tick_nohz_update_jiffies(ktime_t now) struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); unsigned long flags; - cpumask_clear_cpu(cpu, nohz_cpu_mask); ts->idle_waketime = now; local_irq_save(flags); @@ -389,9 +388,6 @@ void tick_nohz_stop_sched_tick(int inidle) else expires.tv64 = KTIME_MAX; - if (delta_jiffies > 1) - cpumask_set_cpu(cpu, nohz_cpu_mask); - /* Skip reprogram of event if its not changed */ if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) goto out; @@ -441,7 +437,6 @@ void tick_nohz_stop_sched_tick(int inidle) * softirq. */ tick_do_update_jiffies64(ktime_get()); - cpumask_clear_cpu(cpu, nohz_cpu_mask); } raise_softirq_irqoff(TIMER_SOFTIRQ); out: @@ -524,7 +519,6 @@ void tick_nohz_restart_sched_tick(void) /* Update jiffies first */ select_nohz_load_balancer(0); tick_do_update_jiffies64(now); - cpumask_clear_cpu(cpu, nohz_cpu_mask); #ifndef CONFIG_VIRT_CPU_ACCOUNTING /* -- cgit v1.3-8-gc7d7 From d322f45ceed525daa9401154590bbae3222cfefb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 31 Jul 2011 22:09:25 -0700 Subject: rcu: Make rcu_assign_pointer() unconditionally insert a memory barrier Recent changes to gcc give warning messages on rcu_assign_pointers()'s checks that allow it to determine when it is OK to omit the memory barrier. Stephen Hemminger tried a number of gcc tricks to silence this warning, but #pragmas and CPP macros do not work together in the way that would be required to make this work. However, we now have RCU_INIT_POINTER(), which already omits this memory barrier, and which therefore may be used when assigning NULL to an RCU-protected pointer that is accessible to readers. This commit therefore makes rcu_assign_pointer() unconditionally emit the memory barrier. Reported-by: Stephen Hemminger Signed-off-by: Eric Dumazet Acked-by: David S. Miller Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index caf22e84a743..d7accc5f8892 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -443,9 +443,7 @@ extern int rcu_my_thread_group_empty(void); }) #define __rcu_assign_pointer(p, v, space) \ ({ \ - if (!__builtin_constant_p(v) || \ - ((v) != NULL)) \ - smp_wmb(); \ + smp_wmb(); \ (p) = (typeof(*v) __force space *)(v); \ }) -- cgit v1.3-8-gc7d7 From 6846c0c54074d47927c90eab4a805115e1ae3292 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 31 Jul 2011 22:33:02 -0700 Subject: rcu: Improve rcu_assign_pointer() and RCU_INIT_POINTER() documentation The differences between rcu_assign_pointer() and RCU_INIT_POINTER() are subtle, and it is easy to use the the cheaper RCU_INIT_POINTER() when the more-expensive rcu_assign_pointer() should have been used instead. The consequences of this mistake are quite severe. This commit therefore carefully lays out the situations in which it it permissible to use RCU_INIT_POINTER(). Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 47 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d7accc5f8892..af186e260c43 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -754,11 +754,18 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * any prior initialization. Returns the value assigned. * * Inserts memory barriers on architectures that require them - * (pretty much all of them other than x86), and also prevents - * the compiler from reordering the code that initializes the - * structure after the pointer assignment. More importantly, this - * call documents which pointers will be dereferenced by RCU read-side - * code. + * (which is most of them), and also prevents the compiler from + * reordering the code that initializes the structure after the pointer + * assignment. More importantly, this call documents which pointers + * will be dereferenced by RCU read-side code. + * + * In some special cases, you may use RCU_INIT_POINTER() instead + * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due + * to the fact that it does not constrain either the CPU or the compiler. + * That said, using RCU_INIT_POINTER() when you should have used + * rcu_assign_pointer() is a very bad thing that results in + * impossible-to-diagnose memory corruption. So please be careful. + * See the RCU_INIT_POINTER() comment header for details. */ #define rcu_assign_pointer(p, v) \ __rcu_assign_pointer((p), (v), __rcu) @@ -766,8 +773,34 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) /** * RCU_INIT_POINTER() - initialize an RCU protected pointer * - * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep - * splats. + * Initialize an RCU-protected pointer in special cases where readers + * do not need ordering constraints on the CPU or the compiler. These + * special cases are: + * + * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer -or- + * 2. The caller has taken whatever steps are required to prevent + * RCU readers from concurrently accessing this pointer -or- + * 3. The referenced data structure has already been exposed to + * readers either at compile time or via rcu_assign_pointer() -and- + * a. You have not made -any- reader-visible changes to + * this structure since then -or- + * b. It is OK for readers accessing this structure from its + * new location to see the old state of the structure. (For + * example, the changes were to statistical counters or to + * other state where exact synchronization is not required.) + * + * Failure to follow these rules governing use of RCU_INIT_POINTER() will + * result in impossible-to-diagnose memory corruption. As in the structures + * will look OK in crash dumps, but any concurrent RCU readers might + * see pre-initialized values of the referenced data structure. So + * please be very careful how you use RCU_INIT_POINTER()!!! + * + * If you are creating an RCU-protected linked structure that is accessed + * by a single external-to-structure RCU-protected pointer, then you may + * use RCU_INIT_POINTER() to initialize the internal RCU-protected + * pointers, but you must use rcu_assign_pointer() to initialize the + * external-to-structure pointer -after- you have completely initialized + * the reader-accessible portions of the linked structure. */ #define RCU_INIT_POINTER(p, v) \ p = (typeof(*v) __force __rcu *)(v) -- cgit v1.3-8-gc7d7 From 6206ab9bab620fc0fbbed30ce20d145b0b3d1840 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 1 Aug 2011 06:22:11 -0700 Subject: rcu: Move __rcu_read_unlock()'s barrier() within if-statement We only need to constrain the compiler if we are actually exiting the top-level RCU read-side critical section. This commit therefore moves the first barrier() cal in __rcu_read_unlock() to inside the "if" statement, thus avoiding needless register flushes for inner rcu_read_unlock() calls. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 14 ++------------ kernel/rcutree_plugin.h | 2 +- 2 files changed, 3 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index af186e260c43..2cf4226ade7e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -134,16 +134,6 @@ extern void call_rcu_sched(struct rcu_head *head, extern void synchronize_sched(void); -static inline void __rcu_read_lock_bh(void) -{ - local_bh_disable(); -} - -static inline void __rcu_read_unlock_bh(void) -{ - local_bh_enable(); -} - #ifdef CONFIG_PREEMPT_RCU extern void __rcu_read_lock(void); @@ -686,7 +676,7 @@ static inline void rcu_read_unlock(void) */ static inline void rcu_read_lock_bh(void) { - __rcu_read_lock_bh(); + local_bh_disable(); __acquire(RCU_BH); rcu_read_acquire_bh(); } @@ -700,7 +690,7 @@ static inline void rcu_read_unlock_bh(void) { rcu_read_release_bh(); __release(RCU_BH); - __rcu_read_unlock_bh(); + local_bh_enable(); } /** diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 4bac5a29fb69..ed70f6bf4c31 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -415,10 +415,10 @@ void __rcu_read_unlock(void) { struct task_struct *t = current; - barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ if (t->rcu_read_lock_nesting != 1) --t->rcu_read_lock_nesting; else { + barrier(); /* critical section before exit code. */ t->rcu_read_lock_nesting = INT_MIN; barrier(); /* assign before ->rcu_read_unlock_special load */ if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) -- cgit v1.3-8-gc7d7 From 82e78d80fc392ac7e98326bc8beeb8a679913ffd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 4 Aug 2011 07:55:34 -0700 Subject: rcu: Simplify unboosting checks Commit 7765be (Fix RCU_BOOST race handling current->rcu_read_unlock_special) introduced a new ->rcu_boosted field in the task structure. This is redundant because the existing ->rcu_boost_mutex will be non-NULL at any time that ->rcu_boosted is nonzero. Therefore, this commit removes ->rcu_boosted and tests ->rcu_boost_mutex instead. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/sched.h | 3 --- kernel/rcutree_plugin.h | 20 ++++++++++---------- 2 files changed, 10 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6ee91e20353b..acca43560805 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1259,9 +1259,6 @@ struct task_struct { #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; char rcu_read_unlock_special; -#if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) - int rcu_boosted; -#endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */ struct list_head rcu_node_entry; #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TREE_PREEMPT_RCU diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index ed70f6bf4c31..eeb38ee8ebba 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -306,6 +306,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) int empty_exp; unsigned long flags; struct list_head *np; +#ifdef CONFIG_RCU_BOOST + struct rt_mutex *rbmp = NULL; +#endif /* #ifdef CONFIG_RCU_BOOST */ struct rcu_node *rnp; int special; @@ -351,6 +354,7 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ np = rcu_next_node_entry(t, rnp); list_del_init(&t->rcu_node_entry); + t->rcu_blocked_node = NULL; trace_rcu_unlock_preempted_task("rcu_preempt", rnp->gpnum, t->pid); if (&t->rcu_node_entry == rnp->gp_tasks) @@ -360,13 +364,12 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) #ifdef CONFIG_RCU_BOOST if (&t->rcu_node_entry == rnp->boost_tasks) rnp->boost_tasks = np; - /* Snapshot and clear ->rcu_boosted with rcu_node lock held. */ - if (t->rcu_boosted) { - special |= RCU_READ_UNLOCK_BOOSTED; - t->rcu_boosted = 0; + /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */ + if (t->rcu_boost_mutex) { + rbmp = t->rcu_boost_mutex; + t->rcu_boost_mutex = NULL; } #endif /* #ifdef CONFIG_RCU_BOOST */ - t->rcu_blocked_node = NULL; /* * If this was the last task on the current list, and if @@ -387,10 +390,8 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) #ifdef CONFIG_RCU_BOOST /* Unboost if we were boosted. */ - if (special & RCU_READ_UNLOCK_BOOSTED) { - rt_mutex_unlock(t->rcu_boost_mutex); - t->rcu_boost_mutex = NULL; - } + if (rbmp) + rt_mutex_unlock(rbmp); #endif /* #ifdef CONFIG_RCU_BOOST */ /* @@ -1206,7 +1207,6 @@ static int rcu_boost(struct rcu_node *rnp) t = container_of(tb, struct task_struct, rcu_node_entry); rt_mutex_init_proxy_locked(&mtx, t); t->rcu_boost_mutex = &mtx; - t->rcu_boosted = 1; raw_spin_unlock_irqrestore(&rnp->lock, flags); rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ -- cgit v1.3-8-gc7d7