From 486e259340fc4c60474f2c14703e3b3634bb58ca Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 6 Jan 2012 14:11:30 -0800 Subject: rcu: Avoid waking up CPUs having only kfree_rcu() callbacks When CONFIG_RCU_FAST_NO_HZ is enabled, RCU will allow a given CPU to enter dyntick-idle mode even if it still has RCU callbacks queued. RCU avoids system hangs in this case by scheduling a timer for several jiffies in the future. However, if all of the callbacks on that CPU are from kfree_rcu(), there is no reason to wake the CPU up, as it is not a problem to defer freeing of memory. This commit therefore tracks the number of callbacks on a given CPU that are from kfree_rcu(), and avoids scheduling the timer if all of a given CPU's callbacks are from kfree_rcu(). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcu.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/rcu.h') diff --git a/kernel/rcu.h b/kernel/rcu.h index aa88baab5f78..a074b0b43fc2 100644 --- a/kernel/rcu.h +++ b/kernel/rcu.h @@ -76,16 +76,18 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head) extern void kfree(const void *); -static inline void __rcu_reclaim(char *rn, struct rcu_head *head) +static inline bool __rcu_reclaim(char *rn, struct rcu_head *head) { unsigned long offset = (unsigned long)head->func; if (__is_kfree_rcu_offset(offset)) { RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset)); kfree((void *)head - offset); + return 1; } else { RCU_TRACE(trace_rcu_invoke_callback(rn, head)); head->func(head); + return 0; } } -- cgit v1.2.3-59-g8ed1b From ce5df97be530e4746bf9a4ac14589a1cfdfd8efc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 Feb 2012 08:26:06 -0800 Subject: rcu: Remove redundant check for rcu_head misalignment There is now an unconditional check for rcu_head misalignment in __call_rcu(), so remove the old conditional one in debug_rcu_head_queue(). Reported-by: Josh Triplett Signed-off-by: Paul E. McKenney --- kernel/rcu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/rcu.h') diff --git a/kernel/rcu.h b/kernel/rcu.h index a074b0b43fc2..30876f4063b6 100644 --- a/kernel/rcu.h +++ b/kernel/rcu.h @@ -50,7 +50,6 @@ extern struct debug_obj_descr rcuhead_debug_descr; static inline void debug_rcu_head_queue(struct rcu_head *head) { - WARN_ON_ONCE((unsigned long)head & 0x3); debug_object_activate(head, &rcuhead_debug_descr); debug_object_active_state(head, &rcuhead_debug_descr, STATE_RCU_HEAD_READY, -- cgit v1.2.3-59-g8ed1b From 29e37d814188ac8d60f2120583704d3ef6d634b4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 17 Nov 2011 16:55:56 -0800 Subject: rcu: Allow nesting of rcu_idle_enter() and rcu_idle_exit() Use of RCU in the idle loop is incorrect, quite a few instances of just that have made their way into mainline, primarily event tracing. The problem with RCU read-side critical sections on CPUs that RCU believes to be idle is that RCU is completely ignoring the CPU, along with any attempts and RCU read-side critical sections. The approaches of eliminating the offending uses and of pushing the definition of idle down beyond the offending uses have both proved impractical. The new approach is to encapsulate offending uses of RCU with rcu_idle_exit() and rcu_idle_enter(), but this requires nesting for code that is invoked both during idle and and during normal execution. Therefore, this commit modifies rcu_idle_enter() and rcu_idle_exit() to permit nesting. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Acked-by: Deepthi Dharwar --- kernel/rcu.h | 21 ++++++++++++++++++++- kernel/rcutiny.c | 16 ++++++++++++---- kernel/rcutree.c | 21 ++++++++++++++------- 3 files changed, 46 insertions(+), 12 deletions(-) (limited to 'kernel/rcu.h') diff --git a/kernel/rcu.h b/kernel/rcu.h index 30876f4063b6..8ba99cdc6515 100644 --- a/kernel/rcu.h +++ b/kernel/rcu.h @@ -33,8 +33,27 @@ * Process-level increment to ->dynticks_nesting field. This allows for * architectures that use half-interrupts and half-exceptions from * process context. + * + * DYNTICK_TASK_NEST_MASK defines a field of width DYNTICK_TASK_NEST_WIDTH + * that counts the number of process-based reasons why RCU cannot + * consider the corresponding CPU to be idle, and DYNTICK_TASK_NEST_VALUE + * is the value used to increment or decrement this field. + * + * The rest of the bits could in principle be used to count interrupts, + * but this would mean that a negative-one value in the interrupt + * field could incorrectly zero out the DYNTICK_TASK_NEST_MASK field. + * We therefore provide a two-bit guard field defined by DYNTICK_TASK_MASK + * that is set to DYNTICK_TASK_FLAG upon initial exit from idle. + * The DYNTICK_TASK_EXIT_IDLE value is thus the combined value used upon + * initial exit from idle. */ -#define DYNTICK_TASK_NESTING (LLONG_MAX / 2 - 1) +#define DYNTICK_TASK_NEST_WIDTH 7 +#define DYNTICK_TASK_NEST_VALUE ((LLONG_MAX >> DYNTICK_TASK_NEST_WIDTH) + 1) +#define DYNTICK_TASK_NEST_MASK (LLONG_MAX - DYNTICK_TASK_NEST_VALUE + 1) +#define DYNTICK_TASK_FLAG ((DYNTICK_TASK_NEST_VALUE / 8) * 2) +#define DYNTICK_TASK_MASK ((DYNTICK_TASK_NEST_VALUE / 8) * 3) +#define DYNTICK_TASK_EXIT_IDLE (DYNTICK_TASK_NEST_VALUE + \ + DYNTICK_TASK_FLAG) /* * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 4eb34fcc2a75..c8b0e1582c04 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -53,7 +53,7 @@ static void __call_rcu(struct rcu_head *head, #include "rcutiny_plugin.h" -static long long rcu_dynticks_nesting = DYNTICK_TASK_NESTING; +static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ static void rcu_idle_enter_common(long long oldval) @@ -88,7 +88,12 @@ void rcu_idle_enter(void) local_irq_save(flags); oldval = rcu_dynticks_nesting; - rcu_dynticks_nesting = 0; + WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0); + if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == + DYNTICK_TASK_NEST_VALUE) + rcu_dynticks_nesting = 0; + else + rcu_dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; rcu_idle_enter_common(oldval); local_irq_restore(flags); } @@ -140,8 +145,11 @@ void rcu_idle_exit(void) local_irq_save(flags); oldval = rcu_dynticks_nesting; - WARN_ON_ONCE(oldval != 0); - rcu_dynticks_nesting = DYNTICK_TASK_NESTING; + WARN_ON_ONCE(rcu_dynticks_nesting < 0); + if (rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) + rcu_dynticks_nesting += DYNTICK_TASK_NEST_VALUE; + else + rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; rcu_idle_exit_common(oldval); local_irq_restore(flags); } diff --git a/kernel/rcutree.c b/kernel/rcutree.c index df0e3c1bb68e..92b47760edf3 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -198,7 +198,7 @@ void rcu_note_context_switch(int cpu) EXPORT_SYMBOL_GPL(rcu_note_context_switch); DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { - .dynticks_nesting = DYNTICK_TASK_NESTING, + .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, .dynticks = ATOMIC_INIT(1), }; @@ -394,7 +394,11 @@ void rcu_idle_enter(void) local_irq_save(flags); rdtp = &__get_cpu_var(rcu_dynticks); oldval = rdtp->dynticks_nesting; - rdtp->dynticks_nesting = 0; + WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); + if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) + rdtp->dynticks_nesting = 0; + else + rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; rcu_idle_enter_common(rdtp, oldval); local_irq_restore(flags); } @@ -467,7 +471,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) * Exit idle mode, in other words, -enter- the mode in which RCU * read-side critical sections can occur. * - * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to + * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to * allow for the possibility of usermode upcalls messing up our count * of interrupt nesting level during the busy period that is just * now starting. @@ -481,8 +485,11 @@ void rcu_idle_exit(void) local_irq_save(flags); rdtp = &__get_cpu_var(rcu_dynticks); oldval = rdtp->dynticks_nesting; - WARN_ON_ONCE(oldval != 0); - rdtp->dynticks_nesting = DYNTICK_TASK_NESTING; + WARN_ON_ONCE(oldval < 0); + if (oldval & DYNTICK_TASK_NEST_MASK) + rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; + else + rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; rcu_idle_exit_common(rdtp, oldval); local_irq_restore(flags); } @@ -2253,7 +2260,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->qlen_lazy = 0; rdp->qlen = 0; rdp->dynticks = &per_cpu(rcu_dynticks, cpu); - WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING); + WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); rdp->cpu = cpu; rdp->rsp = rsp; @@ -2281,7 +2288,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; - rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING; + rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; atomic_set(&rdp->dynticks->dynticks, (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); rcu_prepare_for_idle_init(cpu); -- cgit v1.2.3-59-g8ed1b