From 353af9c9a866b07492b15a4efd18ec93123d3a1d Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 20 Dec 2012 09:35:02 -0800 Subject: rcu: Prevent soft-lockup complaints about no-CBs CPUs The wait_event() at the head of the rcu_nocb_kthread() can result in soft-lockup complaints if the CPU in question does not register RCU callbacks for an extended period. This commit therefore changes the wait_event() to a wait_event_interruptible(). Reported-by: Frederic Weisbecker Signed-off-by: Paul Gortmaker Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index f6e5ec2932b4..43dba2d798ac 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2366,10 +2366,11 @@ static int rcu_nocb_kthread(void *arg) for (;;) { /* If not polling, wait for next batch of callbacks. */ if (!rcu_nocb_poll) - wait_event(rdp->nocb_wq, rdp->nocb_head); + wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head); list = ACCESS_ONCE(rdp->nocb_head); if (!list) { schedule_timeout_interruptible(1); + flush_signals(current); continue; } -- cgit v1.2.3-59-g8ed1b From 1b0048a44c502c5ab850203e6e0a6498d7d8676d Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 20 Dec 2012 13:19:22 -0800 Subject: rcu: Make rcu_nocb_poll an early_param instead of module_param The as-documented rcu_nocb_poll will fail to enable this feature for two reasons. (1) there is an extra "s" in the documented name which is not in the code, and (2) since it uses module_param, it really is expecting a prefix, akin to "rcutree.fanout_leaf" and the prefix isn't documented. However, there are several reasons why we might not want to simply fix the typo and add the prefix: 1) we'd end up with rcutree.rcu_nocb_poll, and rather probably make a change to rcutree.nocb_poll 2) if we did #1, then the prefix wouldn't be consistent with the rcu_nocbs= parameter (i.e. one with, one without prefix) 3) the use of module_param in a header file is less than desired, since it isn't immediately obvious that it will get processed via rcutree.c and get the prefix from that (although use of module_param_named() could clarify that.) 4) the implied export of /sys/module/rcutree/parameters/rcu_nocb_poll data to userspace via module_param() doesn't really buy us anything, as it is read-only and we can tell if it is enabled already without it, since there is a printk at early boot telling us so. In light of all that, just change it from a module_param() to an early_setup() call, and worry about adding it to /sys later on if we decide to allow a dynamic setting of it. Also change the variable to be tagged as read_mostly, since it will only ever be fiddled with at most, once at boot. Signed-off-by: Paul Gortmaker Signed-off-by: Paul E. McKenney --- Documentation/kernel-parameters.txt | 2 +- kernel/rcutree_plugin.h | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 363e348bff9b..6c723811c0a0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2438,7 +2438,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. real-time workloads. It can also improve energy efficiency for asymmetric multiprocessors. - rcu_nocbs_poll [KNL,BOOT] + rcu_nocb_poll [KNL,BOOT] Rather than requiring that offloaded CPUs (specified by rcu_nocbs= above) explicitly awaken the corresponding "rcuoN" kthreads, diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 43dba2d798ac..c1cc7e17ff9d 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -40,8 +40,7 @@ #ifdef CONFIG_RCU_NOCB_CPU static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ -static bool rcu_nocb_poll; /* Offload kthread are to poll. */ -module_param(rcu_nocb_poll, bool, 0444); +static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ static char __initdata nocb_buf[NR_CPUS * 5]; #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ @@ -2159,6 +2158,13 @@ static int __init rcu_nocb_setup(char *str) } __setup("rcu_nocbs=", rcu_nocb_setup); +static int __init parse_rcu_nocb_poll(char *arg) +{ + rcu_nocb_poll = 1; + return 0; +} +early_param("rcu_nocb_poll", parse_rcu_nocb_poll); + /* Is the specified CPU a no-CPUs CPU? */ static bool is_nocb_cpu(int cpu) { -- cgit v1.2.3-59-g8ed1b From aa7f67304d1a03180f463258aa6f15a8b434e77d Mon Sep 17 00:00:00 2001 From: Shawn Bohrer Date: Mon, 14 Jan 2013 11:55:31 -0600 Subject: sched/rt: Use root_domain of rt_rq not current processor When the system has multiple domains do_sched_rt_period_timer() can run on any CPU and may iterate over all rt_rq in cpu_online_mask. This means when balance_runtime() is run for a given rt_rq that rt_rq may be in a different rd than the current processor. Thus if we use smp_processor_id() to get rd in do_balance_runtime() we may borrow runtime from a rt_rq that is not part of our rd. This changes do_balance_runtime to get the rd from the passed in rt_rq ensuring that we borrow runtime only from the correct rd for the given rt_rq. This fixes a BUG at kernel/sched/rt.c:687! in __disable_runtime when we try reclaim runtime lent to other rt_rq but runtime has been lent to a rt_rq in another rd. Signed-off-by: Shawn Bohrer Acked-by: Steven Rostedt Acked-by: Mike Galbraith Cc: peterz@infradead.org Cc: Link: http://lkml.kernel.org/r/1358186131-29494-1-git-send-email-sbohrer@rgmadvisors.com Signed-off-by: Ingo Molnar --- kernel/sched/rt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 418feb01344e..4f02b2847357 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -566,7 +566,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) static int do_balance_runtime(struct rt_rq *rt_rq) { struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); - struct root_domain *rd = cpu_rq(smp_processor_id())->rd; + struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd; int i, weight, more = 0; u64 rt_period; -- cgit v1.2.3-59-g8ed1b From 38dc3348e36d6cbe6ad51d771e4db948cda5b0e3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 Jan 2013 14:14:22 +0000 Subject: sched: Fix warning in kernel/sched/fair.c a4c96ae319 "sched: Unthrottle rt runqueues in __disable_runtime()" turned the unthrottle_offline_cfs_rqs function into a static symbol, which now triggers a warning about it being potentially unused: kernel/sched/fair.c:2055:13: warning: 'unthrottle_offline_cfs_rqs' defined but not used [-Wunused-function] Marking it __maybe_unused shuts up the gcc warning and lets the compiler safely drop the function body when it's not being used. To reproduce, build the ARM bcm2835_defconfig. Signed-off-by: Arnd Bergmann Cc: Peter Boonstoppel Cc: Peter Zijlstra Cc: Paul Turner Cc: linux-arm-kernel@list.infradead.org Link: http://lkml.kernel.org/r/1359123276-15833-6-git-send-email-arnd@arndb.de Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5eea8707234a..81fa53643409 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2663,7 +2663,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) hrtimer_cancel(&cfs_b->slack_timer); } -static void unthrottle_offline_cfs_rqs(struct rq *rq) +static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq) { struct cfs_rq *cfs_rq; -- cgit v1.2.3-59-g8ed1b From cff3c124a7e82ca0ea1d6864b27ef18c403c0773 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 Jan 2013 14:14:23 +0000 Subject: sched/debug: Fix format string for 32-bit platforms The type returned from atomic64_t can be either unsigned long or unsigned long long, depending on the architecture. Using a cast to unsigned long long lets us use the same format string for all architectures. Without this patch, building with scheduler debugging enabled results in: kernel/sched/debug.c: In function 'print_cfs_rq': kernel/sched/debug.c:225:2: warning: format '%ld' expects argument of type 'long int', but argument 4 has type 'long long int' [-Wformat] kernel/sched/debug.c:225:2: warning: format '%ld' expects argument of type 'long int', but argument 3 has type 'long long int' [-Wformat] Signed-off-by: Arnd Bergmann Cc: Peter Zijlstra Cc: Paul Turner Cc: linux-arm-kernel@list.infradead.org Link: http://lkml.kernel.org/r/1359123276-15833-7-git-send-email-arnd@arndb.de Signed-off-by: Ingo Molnar --- kernel/sched/debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 2cd3c1b4e582..7ae4c4c5420e 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -222,8 +222,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) cfs_rq->runnable_load_avg); SEQ_printf(m, " .%-30s: %lld\n", "blocked_load_avg", cfs_rq->blocked_load_avg); - SEQ_printf(m, " .%-30s: %ld\n", "tg_load_avg", - atomic64_read(&cfs_rq->tg->load_avg)); + SEQ_printf(m, " .%-30s: %lld\n", "tg_load_avg", + (unsigned long long)atomic64_read(&cfs_rq->tg->load_avg)); SEQ_printf(m, " .%-30s: %lld\n", "tg_load_contrib", cfs_rq->tg_load_contrib); SEQ_printf(m, " .%-30s: %d\n", "tg_runnable_contrib", -- cgit v1.2.3-59-g8ed1b From f44310b98ddb7f0d06550d73ed67df5865e3eda5 Mon Sep 17 00:00:00 2001 From: Wang YanQing Date: Sat, 26 Jan 2013 15:53:57 +0800 Subject: smp: Fix SMP function call empty cpu mask race I get the following warning every day with v3.7, once or twice a day: [ 2235.186027] WARNING: at /mnt/sda7/kernel/linux/arch/x86/kernel/apic/ipi.c:109 default_send_IPI_mask_logical+0x2f/0xb8() As explained by Linus as well: | | Once we've done the "list_add_rcu()" to add it to the | queue, we can have (another) IPI to the target CPU that can | now see it and clear the mask. | | So by the time we get to actually send the IPI, the mask might | have been cleared by another IPI. | This patch also fixes a system hang problem, if the data->cpumask gets cleared after passing this point: if (WARN_ONCE(!mask, "empty IPI mask")) return; then the problem in commit 83d349f35e1a ("x86: don't send an IPI to the empty set of CPU's") will happen again. Signed-off-by: Wang YanQing Acked-by: Linus Torvalds Acked-by: Jan Beulich Cc: Paul E. McKenney Cc: Andrew Morton Cc: peterz@infradead.org Cc: mina86@mina86.org Cc: srivatsa.bhat@linux.vnet.ibm.com Cc: Link: http://lkml.kernel.org/r/20130126075357.GA3205@udknight [ Tidied up the changelog and the comment in the code. ] Signed-off-by: Ingo Molnar --- kernel/smp.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/smp.c b/kernel/smp.c index 29dd40a9f2f4..69f38bd98b42 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -33,6 +33,7 @@ struct call_function_data { struct call_single_data csd; atomic_t refs; cpumask_var_t cpumask; + cpumask_var_t cpumask_ipi; }; static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data); @@ -56,6 +57,9 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, cpu_to_node(cpu))) return notifier_from_errno(-ENOMEM); + if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL, + cpu_to_node(cpu))) + return notifier_from_errno(-ENOMEM); break; #ifdef CONFIG_HOTPLUG_CPU @@ -65,6 +69,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) case CPU_DEAD: case CPU_DEAD_FROZEN: free_cpumask_var(cfd->cpumask); + free_cpumask_var(cfd->cpumask_ipi); break; #endif }; @@ -526,6 +531,12 @@ void smp_call_function_many(const struct cpumask *mask, return; } + /* + * After we put an entry into the list, data->cpumask + * may be cleared again when another CPU sends another IPI for + * a SMP function call, so data->cpumask will be zero. + */ + cpumask_copy(data->cpumask_ipi, data->cpumask); raw_spin_lock_irqsave(&call_function.lock, flags); /* * Place entry at the _HEAD_ of the list, so that any cpu still @@ -549,7 +560,7 @@ void smp_call_function_many(const struct cpumask *mask, smp_mb(); /* Send a message to all CPUs in the map */ - arch_send_call_function_ipi_mask(data->cpumask); + arch_send_call_function_ipi_mask(data->cpumask_ipi); /* Optionally wait for the CPUs to complete */ if (wait) -- cgit v1.2.3-59-g8ed1b From ff0d05bf73620eb7dc8aee7423e992ef87870bdf Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 31 Jan 2013 14:27:03 +1100 Subject: Revert "console: implement lockdep support for console_lock" This reverts commit daee779718a319ff9f83e1ba3339334ac650bb22. I'll requeue this after the console locking fixes, so lockdep is useful again for people until fbcon is fixed. Signed-off-by: Dave Airlie --- kernel/printk.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 357f714ddd49..267ce780abe8 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -87,12 +87,6 @@ static DEFINE_SEMAPHORE(console_sem); struct console *console_drivers; EXPORT_SYMBOL_GPL(console_drivers); -#ifdef CONFIG_LOCKDEP -static struct lockdep_map console_lock_dep_map = { - .name = "console_lock" -}; -#endif - /* * This is used for debugging the mess that is the VT code by * keeping track if we have the console semaphore held. It's @@ -1924,7 +1918,6 @@ void console_lock(void) return; console_locked = 1; console_may_schedule = 1; - mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_); } EXPORT_SYMBOL(console_lock); @@ -1946,7 +1939,6 @@ int console_trylock(void) } console_locked = 1; console_may_schedule = 0; - mutex_acquire(&console_lock_dep_map, 0, 1, _RET_IP_); return 1; } EXPORT_SYMBOL(console_trylock); @@ -2107,7 +2099,6 @@ skip: local_irq_restore(flags); } console_locked = 0; - mutex_release(&console_lock_dep_map, 1, _RET_IP_); /* Release the exclusive_console once it is used */ if (unlikely(exclusive_console)) -- cgit v1.2.3-59-g8ed1b From 0231bb5336758426b44ccd798ccd3c5419c95d58 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 1 Feb 2013 11:23:45 +0100 Subject: perf: Fix event group context move When we have group with mixed events (hw/sw) we want to end up with group leader being in hw context. So if group leader is initialy sw event, we move all the events under hw context. The move is done for each event by removing it from its context and adding it back into proper one. As a part of the removal the event is automatically disabled, which is not what we want at this stage of creating groups. The fix is to initialize event state after removal from sw context. This fix resulted from the following discussion: http://thread.gmane.org/gmane.linux.kernel.perf.user/1144 Reported-by: Andreas Hollmann Signed-off-by: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Vince Weaver Link: http://lkml.kernel.org/r/1359714225-4231-1-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 301079d06f24..7b6646a8c067 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -907,6 +907,15 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) ctx->nr_stat++; } +/* + * Initialize event state based on the perf_event_attr::disabled. + */ +static inline void perf_event__state_init(struct perf_event *event) +{ + event->state = event->attr.disabled ? PERF_EVENT_STATE_OFF : + PERF_EVENT_STATE_INACTIVE; +} + /* * Called at perf_event creation and when events are attached/detached from a * group. @@ -6179,8 +6188,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, event->overflow_handler = overflow_handler; event->overflow_handler_context = context; - if (attr->disabled) - event->state = PERF_EVENT_STATE_OFF; + perf_event__state_init(event); pmu = NULL; @@ -6609,9 +6617,17 @@ SYSCALL_DEFINE5(perf_event_open, mutex_lock(&gctx->mutex); perf_remove_from_context(group_leader); + + /* + * Removing from the context ends up with disabled + * event. What we want here is event in the initial + * startup state, ready to be add into new context. + */ + perf_event__state_init(group_leader); list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) { perf_remove_from_context(sibling); + perf_event__state_init(sibling); put_ctx(gctx); } mutex_unlock(&gctx->mutex); -- cgit v1.2.3-59-g8ed1b