aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-03-27 09:46:03 +0100
committerIngo Molnar <mingo@kernel.org>2015-03-27 09:46:03 +0100
commit072e5a1cfabca7276744d24726e094d85721df5c (patch)
treec7237fa143f72273aa8a8179f741c24b9072001c /kernel
parentperf/x86/intel: Add INST_RETIRED.ALL workarounds (diff)
parentperf: Fix irq_work 'tail' recursion (diff)
downloadlinux-dev-072e5a1cfabca7276744d24726e094d85721df5c.tar.xz
linux-dev-072e5a1cfabca7276744d24726e094d85721df5c.zip
Merge branch 'perf/urgent' into perf/core, to pick up fixes and to refresh the tree
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c9
-rw-r--r--kernel/events/core.c12
-rw-r--r--kernel/irq/manage.c7
-rw-r--r--kernel/irq/pm.c7
-rw-r--r--kernel/livepatch/core.c43
-rw-r--r--kernel/locking/rtmutex.c1
-rw-r--r--kernel/module.c4
-rw-r--r--kernel/printk/console_cmdline.h2
-rw-r--r--kernel/printk/printk.c1
-rw-r--r--kernel/sched/idle.c54
-rw-r--r--kernel/sys.c3
-rw-r--r--kernel/trace/ftrace.c40
-rw-r--r--kernel/workqueue.c56
13 files changed, 182 insertions, 57 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1d1fe9361d29..fc7f4748d34a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -548,9 +548,6 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
rcu_read_lock();
cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
- if (cp == root_cs)
- continue;
-
/* skip the whole subtree if @cp doesn't have any CPU */
if (cpumask_empty(cp->cpus_allowed)) {
pos_css = css_rightmost_descendant(pos_css);
@@ -873,7 +870,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
* If it becomes empty, inherit the effective mask of the
* parent, which is guaranteed to have some CPUs.
*/
- if (cpumask_empty(new_cpus))
+ if (cgroup_on_dfl(cp->css.cgroup) && cpumask_empty(new_cpus))
cpumask_copy(new_cpus, parent->effective_cpus);
/* Skip the whole subtree if the cpumask remains the same. */
@@ -1129,7 +1126,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
* If it becomes empty, inherit the effective mask of the
* parent, which is guaranteed to have some MEMs.
*/
- if (nodes_empty(*new_mems))
+ if (cgroup_on_dfl(cp->css.cgroup) && nodes_empty(*new_mems))
*new_mems = parent->effective_mems;
/* Skip the whole subtree if the nodemask remains the same. */
@@ -1979,7 +1976,9 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
spin_lock_irq(&callback_lock);
cs->mems_allowed = parent->mems_allowed;
+ cs->effective_mems = parent->mems_allowed;
cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
+ cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
spin_unlock_irq(&callback_lock);
out_unlock:
mutex_unlock(&cpuset_mutex);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8bb20cc39a92..9a5f339a0e2d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3610,7 +3610,7 @@ static void put_event(struct perf_event *event)
ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING);
WARN_ON_ONCE(ctx->parent_ctx);
perf_remove_from_context(event, true);
- mutex_unlock(&ctx->mutex);
+ perf_event_ctx_unlock(event, ctx);
_free_event(event);
}
@@ -4593,6 +4593,13 @@ static void perf_pending_event(struct irq_work *entry)
{
struct perf_event *event = container_of(entry,
struct perf_event, pending);
+ int rctx;
+
+ rctx = perf_swevent_get_recursion_context();
+ /*
+ * If we 'fail' here, that's OK, it means recursion is already disabled
+ * and we won't recurse 'further'.
+ */
if (event->pending_disable) {
event->pending_disable = 0;
@@ -4603,6 +4610,9 @@ static void perf_pending_event(struct irq_work *entry)
event->pending_wakeup = 0;
perf_event_wakeup(event);
}
+
+ if (rctx >= 0)
+ perf_swevent_put_recursion_context(rctx);
}
/*
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 196a06fbc122..886d09e691d5 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1474,8 +1474,13 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
* otherwise we'll have trouble later trying to figure out
* which interrupt is which (messes up the interrupt freeing
* logic etc).
+ *
+ * Also IRQF_COND_SUSPEND only makes sense for shared interrupts and
+ * it cannot be set along with IRQF_NO_SUSPEND.
*/
- if ((irqflags & IRQF_SHARED) && !dev_id)
+ if (((irqflags & IRQF_SHARED) && !dev_id) ||
+ (!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) ||
+ ((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND)))
return -EINVAL;
desc = irq_to_desc(irq);
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index 3ca532592704..5204a6d1b985 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -43,9 +43,12 @@ void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action)
if (action->flags & IRQF_NO_SUSPEND)
desc->no_suspend_depth++;
+ else if (action->flags & IRQF_COND_SUSPEND)
+ desc->cond_suspend_depth++;
WARN_ON_ONCE(desc->no_suspend_depth &&
- desc->no_suspend_depth != desc->nr_actions);
+ (desc->no_suspend_depth +
+ desc->cond_suspend_depth) != desc->nr_actions);
}
/*
@@ -61,6 +64,8 @@ void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action)
if (action->flags & IRQF_NO_SUSPEND)
desc->no_suspend_depth--;
+ else if (action->flags & IRQF_COND_SUSPEND)
+ desc->cond_suspend_depth--;
}
static bool suspend_device_irq(struct irq_desc *desc, int irq)
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index ff7f47d026ac..3f9f1d6b4c2e 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -89,16 +89,28 @@ static bool klp_is_object_loaded(struct klp_object *obj)
/* sets obj->mod if object is not vmlinux and module is found */
static void klp_find_object_module(struct klp_object *obj)
{
+ struct module *mod;
+
if (!klp_is_module(obj))
return;
mutex_lock(&module_mutex);
/*
- * We don't need to take a reference on the module here because we have
- * the klp_mutex, which is also taken by the module notifier. This
- * prevents any module from unloading until we release the klp_mutex.
+ * We do not want to block removal of patched modules and therefore
+ * we do not take a reference here. The patches are removed by
+ * a going module handler instead.
+ */
+ mod = find_module(obj->name);
+ /*
+ * Do not mess work of the module coming and going notifiers.
+ * Note that the patch might still be needed before the going handler
+ * is called. Module functions can be called even in the GOING state
+ * until mod->exit() finishes. This is especially important for
+ * patches that modify semantic of the functions.
*/
- obj->mod = find_module(obj->name);
+ if (mod && mod->klp_alive)
+ obj->mod = mod;
+
mutex_unlock(&module_mutex);
}
@@ -248,11 +260,12 @@ static int klp_find_external_symbol(struct module *pmod, const char *name,
/* first, check if it's an exported symbol */
preempt_disable();
sym = find_symbol(name, NULL, NULL, true, true);
- preempt_enable();
if (sym) {
*addr = sym->value;
+ preempt_enable();
return 0;
}
+ preempt_enable();
/* otherwise check if it's in another .o within the patch module */
return klp_find_object_symbol(pmod->name, name, addr);
@@ -314,12 +327,12 @@ static void notrace klp_ftrace_handler(unsigned long ip,
rcu_read_lock();
func = list_first_or_null_rcu(&ops->func_stack, struct klp_func,
stack_node);
- rcu_read_unlock();
-
if (WARN_ON_ONCE(!func))
- return;
+ goto unlock;
klp_arch_set_pc(regs, (unsigned long)func->new_func);
+unlock:
+ rcu_read_unlock();
}
static int klp_disable_func(struct klp_func *func)
@@ -731,7 +744,7 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func)
func->state = KLP_DISABLED;
return kobject_init_and_add(&func->kobj, &klp_ktype_func,
- obj->kobj, func->old_name);
+ obj->kobj, "%s", func->old_name);
}
/* parts of the initialization that is done only when the object is loaded */
@@ -766,6 +779,7 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj)
return -EINVAL;
obj->state = KLP_DISABLED;
+ obj->mod = NULL;
klp_find_object_module(obj);
@@ -807,7 +821,7 @@ static int klp_init_patch(struct klp_patch *patch)
patch->state = KLP_DISABLED;
ret = kobject_init_and_add(&patch->kobj, &klp_ktype_patch,
- klp_root_kobj, patch->mod->name);
+ klp_root_kobj, "%s", patch->mod->name);
if (ret)
goto unlock;
@@ -960,6 +974,15 @@ static int klp_module_notify(struct notifier_block *nb, unsigned long action,
mutex_lock(&klp_mutex);
+ /*
+ * Each module has to know that the notifier has been called.
+ * We never know what module will get patched by a new patch.
+ */
+ if (action == MODULE_STATE_COMING)
+ mod->klp_alive = true;
+ else /* MODULE_STATE_GOING */
+ mod->klp_alive = false;
+
list_for_each_entry(patch, &klp_patches, list) {
for (obj = patch->objs; obj->funcs; obj++) {
if (!klp_is_module(obj) || strcmp(obj->name, mod->name))
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index e16e5542bf13..6357265a31ad 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1193,6 +1193,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
if (unlikely(ret)) {
+ __set_current_state(TASK_RUNNING);
if (rt_mutex_has_waiters(lock))
remove_waiter(lock, &waiter);
rt_mutex_handle_deadlock(ret, chwalk, &waiter);
diff --git a/kernel/module.c b/kernel/module.c
index b34813f725e9..b3d634ed06c9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -56,7 +56,6 @@
#include <linux/async.h>
#include <linux/percpu.h>
#include <linux/kmemleak.h>
-#include <linux/kasan.h>
#include <linux/jump_label.h>
#include <linux/pfn.h>
#include <linux/bsearch.h>
@@ -1814,7 +1813,6 @@ static void unset_module_init_ro_nx(struct module *mod) { }
void __weak module_memfree(void *module_region)
{
vfree(module_region);
- kasan_module_free(module_region);
}
void __weak module_arch_cleanup(struct module *mod)
@@ -2313,11 +2311,13 @@ static void layout_symtab(struct module *mod, struct load_info *info)
info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
mod->core_size += strtab_size;
+ mod->core_size = debug_align(mod->core_size);
/* Put string table section at end of init part of module. */
strsect->sh_flags |= SHF_ALLOC;
strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
info->index.str) | INIT_OFFSET_MASK;
+ mod->init_size = debug_align(mod->init_size);
pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
}
diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h
index cbd69d842341..2ca4a8b5fe57 100644
--- a/kernel/printk/console_cmdline.h
+++ b/kernel/printk/console_cmdline.h
@@ -3,7 +3,7 @@
struct console_cmdline
{
- char name[8]; /* Name of the driver */
+ char name[16]; /* Name of the driver */
int index; /* Minor dev. to use */
char *options; /* Options for the driver */
#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 01cfd69c54c6..bb0635bd74f2 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2464,6 +2464,7 @@ void register_console(struct console *newcon)
for (i = 0, c = console_cmdline;
i < MAX_CMDLINECONSOLES && c->name[0];
i++, c++) {
+ BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name));
if (strcmp(c->name, newcon->name) != 0)
continue;
if (newcon->index >= 0 &&
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 94b2d7b88a27..80014a178342 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -82,6 +82,7 @@ static void cpuidle_idle_call(void)
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
int next_state, entered_state;
unsigned int broadcast;
+ bool reflect;
/*
* Check if the idle task must be rescheduled. If it is the
@@ -105,6 +106,9 @@ static void cpuidle_idle_call(void)
*/
rcu_idle_enter();
+ if (cpuidle_not_available(drv, dev))
+ goto use_default;
+
/*
* Suspend-to-idle ("freeze") is a system state in which all user space
* has been frozen, all I/O devices have been suspended and the only
@@ -115,30 +119,24 @@ static void cpuidle_idle_call(void)
* until a proper wakeup interrupt happens.
*/
if (idle_should_freeze()) {
- cpuidle_enter_freeze();
- local_irq_enable();
- goto exit_idle;
- }
+ entered_state = cpuidle_enter_freeze(drv, dev);
+ if (entered_state >= 0) {
+ local_irq_enable();
+ goto exit_idle;
+ }
- /*
- * Ask the cpuidle framework to choose a convenient idle state.
- * Fall back to the default arch idle method on errors.
- */
- next_state = cpuidle_select(drv, dev);
- if (next_state < 0) {
-use_default:
+ reflect = false;
+ next_state = cpuidle_find_deepest_state(drv, dev);
+ } else {
+ reflect = true;
/*
- * We can't use the cpuidle framework, let's use the default
- * idle routine.
+ * Ask the cpuidle framework to choose a convenient idle state.
*/
- if (current_clr_polling_and_test())
- local_irq_enable();
- else
- arch_cpu_idle();
-
- goto exit_idle;
+ next_state = cpuidle_select(drv, dev);
}
-
+ /* Fall back to the default arch idle method on errors. */
+ if (next_state < 0)
+ goto use_default;
/*
* The idle task must be scheduled, it is pointless to
@@ -183,7 +181,8 @@ use_default:
/*
* Give the governor an opportunity to reflect on the outcome
*/
- cpuidle_reflect(dev, entered_state);
+ if (reflect)
+ cpuidle_reflect(dev, entered_state);
exit_idle:
__current_set_polling();
@@ -196,6 +195,19 @@ exit_idle:
rcu_idle_exit();
start_critical_timings();
+ return;
+
+use_default:
+ /*
+ * We can't use the cpuidle framework, let's use the default
+ * idle routine.
+ */
+ if (current_clr_polling_and_test())
+ local_irq_enable();
+ else
+ arch_cpu_idle();
+
+ goto exit_idle;
}
/*
diff --git a/kernel/sys.c b/kernel/sys.c
index 667b2e62fad2..a03d9cd23ed7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1108,6 +1108,7 @@ DECLARE_RWSEM(uts_sem);
/*
* Work around broken programs that cannot handle "Linux 3.0".
* Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
+ * And we map 4.x to 2.6.60+x, so 4.0 would be 2.6.60.
*/
static int override_release(char __user *release, size_t len)
{
@@ -1127,7 +1128,7 @@ static int override_release(char __user *release, size_t len)
break;
rest++;
}
- v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
+ v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 60;
copy = clamp_t(size_t, len, 1, sizeof(buf));
copy = scnprintf(buf, copy, "2.6.%u%s", v, rest);
ret = copy_to_user(release, buf, copy + 1);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 45e5cb143d17..4f228024055b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1059,6 +1059,12 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
static struct pid * const ftrace_swapper_pid = &init_struct_pid;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static int ftrace_graph_active;
+#else
+# define ftrace_graph_active 0
+#endif
+
#ifdef CONFIG_DYNAMIC_FTRACE
static struct ftrace_ops *removed_ops;
@@ -2041,8 +2047,12 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
if (!ftrace_rec_count(rec))
rec->flags = 0;
else
- /* Just disable the record (keep REGS state) */
- rec->flags &= ~FTRACE_FL_ENABLED;
+ /*
+ * Just disable the record, but keep the ops TRAMP
+ * and REGS states. The _EN flags must be disabled though.
+ */
+ rec->flags &= ~(FTRACE_FL_ENABLED | FTRACE_FL_TRAMP_EN |
+ FTRACE_FL_REGS_EN);
}
return FTRACE_UPDATE_MAKE_NOP;
@@ -2688,24 +2698,36 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
static void ftrace_startup_sysctl(void)
{
+ int command;
+
if (unlikely(ftrace_disabled))
return;
/* Force update next time */
saved_ftrace_func = NULL;
/* ftrace_start_up is true if we want ftrace running */
- if (ftrace_start_up)
- ftrace_run_update_code(FTRACE_UPDATE_CALLS);
+ if (ftrace_start_up) {
+ command = FTRACE_UPDATE_CALLS;
+ if (ftrace_graph_active)
+ command |= FTRACE_START_FUNC_RET;
+ ftrace_startup_enable(command);
+ }
}
static void ftrace_shutdown_sysctl(void)
{
+ int command;
+
if (unlikely(ftrace_disabled))
return;
/* ftrace_start_up is true if ftrace is running */
- if (ftrace_start_up)
- ftrace_run_update_code(FTRACE_DISABLE_CALLS);
+ if (ftrace_start_up) {
+ command = FTRACE_DISABLE_CALLS;
+ if (ftrace_graph_active)
+ command |= FTRACE_STOP_FUNC_RET;
+ ftrace_run_update_code(command);
+ }
}
static cycle_t ftrace_update_time;
@@ -5558,12 +5580,12 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
if (ftrace_enabled) {
- ftrace_startup_sysctl();
-
/* we are starting ftrace again */
if (ftrace_ops_list != &ftrace_list_end)
update_ftrace_function();
+ ftrace_startup_sysctl();
+
} else {
/* stopping ftrace calls (just send to ftrace_stub) */
ftrace_trace_function = ftrace_stub;
@@ -5590,8 +5612,6 @@ static struct ftrace_ops graph_ops = {
ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash)
};
-static int ftrace_graph_active;
-
int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
{
return 0;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f28849394791..41ff75b478c6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2728,19 +2728,57 @@ bool flush_work(struct work_struct *work)
}
EXPORT_SYMBOL_GPL(flush_work);
+struct cwt_wait {
+ wait_queue_t wait;
+ struct work_struct *work;
+};
+
+static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+ struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
+
+ if (cwait->work != key)
+ return 0;
+ return autoremove_wake_function(wait, mode, sync, key);
+}
+
static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
{
+ static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
unsigned long flags;
int ret;
do {
ret = try_to_grab_pending(work, is_dwork, &flags);
/*
- * If someone else is canceling, wait for the same event it
- * would be waiting for before retrying.
+ * If someone else is already canceling, wait for it to
+ * finish. flush_work() doesn't work for PREEMPT_NONE
+ * because we may get scheduled between @work's completion
+ * and the other canceling task resuming and clearing
+ * CANCELING - flush_work() will return false immediately
+ * as @work is no longer busy, try_to_grab_pending() will
+ * return -ENOENT as @work is still being canceled and the
+ * other canceling task won't be able to clear CANCELING as
+ * we're hogging the CPU.
+ *
+ * Let's wait for completion using a waitqueue. As this
+ * may lead to the thundering herd problem, use a custom
+ * wake function which matches @work along with exclusive
+ * wait and wakeup.
*/
- if (unlikely(ret == -ENOENT))
- flush_work(work);
+ if (unlikely(ret == -ENOENT)) {
+ struct cwt_wait cwait;
+
+ init_wait(&cwait.wait);
+ cwait.wait.func = cwt_wakefn;
+ cwait.work = work;
+
+ prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
+ TASK_UNINTERRUPTIBLE);
+ if (work_is_canceling(work))
+ schedule();
+ finish_wait(&cancel_waitq, &cwait.wait);
+ }
} while (unlikely(ret < 0));
/* tell other tasks trying to grab @work to back off */
@@ -2749,6 +2787,16 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
flush_work(work);
clear_work_data(work);
+
+ /*
+ * Paired with prepare_to_wait() above so that either
+ * waitqueue_active() is visible here or !work_is_canceling() is
+ * visible there.
+ */
+ smp_mb();
+ if (waitqueue_active(&cancel_waitq))
+ __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
+
return ret;
}