aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/cpu.c2
-rw-r--r--kernel/debug/kdb/kdb_main.c12
-rw-r--r--kernel/early_res.c6
-rw-r--r--kernel/exec_domain.c18
-rw-r--r--kernel/futex.c17
-rw-r--r--kernel/irq/manage.c3
-rw-r--r--kernel/kexec.c7
-rw-r--r--kernel/module.c348
-rw-r--r--kernel/perf_event.c356
-rw-r--r--kernel/power/Kconfig9
-rw-r--r--kernel/power/Makefile2
-rw-r--r--kernel/power/nvs.c (renamed from kernel/power/hibernate_nvs.c)24
-rw-r--r--kernel/power/suspend.c6
-rw-r--r--kernel/sched.c161
-rw-r--r--kernel/sched_fair.c24
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/stop_machine.c2
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/time/tick-sched.c21
-rw-r--r--kernel/timer.c2
-rw-r--r--kernel/trace/blktrace.c2
-rw-r--r--kernel/trace/trace_event_perf.c19
-rw-r--r--kernel/trace/trace_kprobe.c4
-rw-r--r--kernel/trace/trace_syscalls.c4
25 files changed, 639 insertions, 422 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 422cb19f156e..3ac6f5b0a64b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4598,7 +4598,7 @@ static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
parent_css = parent->subsys[subsys_id];
child_css = child->subsys[subsys_id];
parent_id = parent_css->id;
- depth = parent_id->depth;
+ depth = parent_id->depth + 1;
child_id = get_new_cssid(ss, depth);
if (IS_ERR(child_id))
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8b92539b4754..97d1b426a4ac 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -34,7 +34,7 @@ void cpu_maps_update_done(void)
mutex_unlock(&cpu_add_remove_lock);
}
-static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
+static RAW_NOTIFIER_HEAD(cpu_chain);
/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
* Should always be manipulated under cpu_add_remove_lock
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index b724c791b6d4..184cd8209c36 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1857,12 +1857,6 @@ static int kdb_ef(int argc, const char **argv)
}
#if defined(CONFIG_MODULES)
-/* modules using other modules */
-struct module_use {
- struct list_head list;
- struct module *module_which_uses;
-};
-
/*
* kdb_lsmod - This function implements the 'lsmod' command. Lists
* currently loaded kernel modules.
@@ -1894,9 +1888,9 @@ static int kdb_lsmod(int argc, const char **argv)
{
struct module_use *use;
kdb_printf(" [ ");
- list_for_each_entry(use, &mod->modules_which_use_me,
- list)
- kdb_printf("%s ", use->module_which_uses->name);
+ list_for_each_entry(use, &mod->source_list,
+ source_list)
+ kdb_printf("%s ", use->target->name);
kdb_printf("]\n");
}
#endif
diff --git a/kernel/early_res.c b/kernel/early_res.c
index 31aa9332ef3f..7bfae887f211 100644
--- a/kernel/early_res.c
+++ b/kernel/early_res.c
@@ -7,6 +7,8 @@
#include <linux/bootmem.h>
#include <linux/mm.h>
#include <linux/early_res.h>
+#include <linux/slab.h>
+#include <linux/kmemleak.h>
/*
* Early reserved memory areas.
@@ -319,6 +321,8 @@ void __init free_early(u64 start, u64 end)
struct early_res *r;
int i;
+ kmemleak_free_part(__va(start), end - start);
+
i = find_overlapped_early(start, end);
r = &early_res[i];
if (i >= max_early_res || r->end != end || r->start != start)
@@ -333,6 +337,8 @@ void __init free_early_partial(u64 start, u64 end)
struct early_res *r;
int i;
+ kmemleak_free_part(__va(start), end - start);
+
if (start == end)
return;
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index c35452cadded..dd62f8e714ca 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -27,7 +27,7 @@ static struct exec_domain *exec_domains = &default_exec_domain;
static DEFINE_RWLOCK(exec_domains_lock);
-static u_long ident_map[32] = {
+static unsigned long ident_map[32] = {
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
@@ -56,10 +56,10 @@ default_handler(int segment, struct pt_regs *regp)
}
static struct exec_domain *
-lookup_exec_domain(u_long personality)
+lookup_exec_domain(unsigned int personality)
{
- struct exec_domain * ep;
- u_long pers = personality(personality);
+ unsigned int pers = personality(personality);
+ struct exec_domain *ep;
read_lock(&exec_domains_lock);
for (ep = exec_domains; ep; ep = ep->next) {
@@ -70,7 +70,7 @@ lookup_exec_domain(u_long personality)
#ifdef CONFIG_MODULES
read_unlock(&exec_domains_lock);
- request_module("personality-%ld", pers);
+ request_module("personality-%d", pers);
read_lock(&exec_domains_lock);
for (ep = exec_domains; ep; ep = ep->next) {
@@ -135,7 +135,7 @@ unregister:
}
int
-__set_personality(u_long personality)
+__set_personality(unsigned int personality)
{
struct exec_domain *ep, *oep;
@@ -188,9 +188,9 @@ static int __init proc_execdomains_init(void)
module_init(proc_execdomains_init);
#endif
-SYSCALL_DEFINE1(personality, u_long, personality)
+SYSCALL_DEFINE1(personality, unsigned int, personality)
{
- u_long old = current->personality;
+ unsigned int old = current->personality;
if (personality != 0xffffffff) {
set_personality(personality);
@@ -198,7 +198,7 @@ SYSCALL_DEFINE1(personality, u_long, personality)
return -EINVAL;
}
- return (long)old;
+ return old;
}
diff --git a/kernel/futex.c b/kernel/futex.c
index e7a35f1039e7..6a3a5fa1526d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -429,20 +429,11 @@ static void free_pi_state(struct futex_pi_state *pi_state)
static struct task_struct * futex_find_get_task(pid_t pid)
{
struct task_struct *p;
- const struct cred *cred = current_cred(), *pcred;
rcu_read_lock();
p = find_task_by_vpid(pid);
- if (!p) {
- p = ERR_PTR(-ESRCH);
- } else {
- pcred = __task_cred(p);
- if (cred->euid != pcred->euid &&
- cred->euid != pcred->uid)
- p = ERR_PTR(-ESRCH);
- else
- get_task_struct(p);
- }
+ if (p)
+ get_task_struct(p);
rcu_read_unlock();
@@ -564,8 +555,8 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
if (!pid)
return -ESRCH;
p = futex_find_get_task(pid);
- if (IS_ERR(p))
- return PTR_ERR(p);
+ if (!p)
+ return -ESRCH;
/*
* We need to look at the task state flags to figure out,
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 3164ba7ce151..e1497481fe8a 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -456,6 +456,9 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
/* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */
desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK);
desc->status |= flags;
+
+ if (chip != desc->chip)
+ irq_chip_set_defaults(desc->chip);
}
return ret;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 474a84715eac..131b1703936f 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1089,9 +1089,10 @@ void crash_kexec(struct pt_regs *regs)
size_t crash_get_memory_size(void)
{
- size_t size;
+ size_t size = 0;
mutex_lock(&kexec_mutex);
- size = crashk_res.end - crashk_res.start + 1;
+ if (crashk_res.end != crashk_res.start)
+ size = crashk_res.end - crashk_res.start + 1;
mutex_unlock(&kexec_mutex);
return size;
}
@@ -1134,7 +1135,7 @@ int crash_shrink_memory(unsigned long new_size)
free_reserved_phys_range(end, crashk_res.end);
- if (start == end)
+ if ((start == end) && (crashk_res.parent != NULL))
release_resource(&crashk_res);
crashk_res.end = end - 1;
diff --git a/kernel/module.c b/kernel/module.c
index 333fbcc96978..5d2d28197c82 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -72,7 +72,11 @@
/* If this is set, the section belongs in the init part of the module */
#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
-/* List of modules, protected by module_mutex or preempt_disable
+/*
+ * Mutex protects:
+ * 1) List of modules (also safely readable with preempt_disable),
+ * 2) module_use links,
+ * 3) module_addr_min/module_addr_max.
* (delete uses stop_machine/add uses RCU list operations). */
DEFINE_MUTEX(module_mutex);
EXPORT_SYMBOL_GPL(module_mutex);
@@ -90,7 +94,8 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq);
static BLOCKING_NOTIFIER_HEAD(module_notify_list);
-/* Bounds of module allocation, for speeding __module_address */
+/* Bounds of module allocation, for speeding __module_address.
+ * Protected by module_mutex. */
static unsigned long module_addr_min = -1UL, module_addr_max = 0;
int register_module_notifier(struct notifier_block * nb)
@@ -329,7 +334,7 @@ static bool find_symbol_in_section(const struct symsearch *syms,
}
/* Find a symbol and return it, along with, (optional) crc and
- * (optional) module which owns it */
+ * (optional) module which owns it. Needs preempt disabled or module_mutex. */
const struct kernel_symbol *find_symbol(const char *name,
struct module **owner,
const unsigned long **crc,
@@ -403,7 +408,7 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr,
Elf_Shdr *sechdrs,
const char *secstrings)
{
- return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
+ return find_sec(hdr, sechdrs, secstrings, ".data..percpu");
}
static void percpu_modcopy(struct module *mod,
@@ -523,7 +528,8 @@ static void module_unload_init(struct module *mod)
{
int cpu;
- INIT_LIST_HEAD(&mod->modules_which_use_me);
+ INIT_LIST_HEAD(&mod->source_list);
+ INIT_LIST_HEAD(&mod->target_list);
for_each_possible_cpu(cpu) {
per_cpu_ptr(mod->refptr, cpu)->incs = 0;
per_cpu_ptr(mod->refptr, cpu)->decs = 0;
@@ -535,20 +541,13 @@ static void module_unload_init(struct module *mod)
mod->waiter = current;
}
-/* modules using other modules */
-struct module_use
-{
- struct list_head list;
- struct module *module_which_uses;
-};
-
/* Does a already use b? */
static int already_uses(struct module *a, struct module *b)
{
struct module_use *use;
- list_for_each_entry(use, &b->modules_which_use_me, list) {
- if (use->module_which_uses == a) {
+ list_for_each_entry(use, &b->source_list, source_list) {
+ if (use->source == a) {
DEBUGP("%s uses %s!\n", a->name, b->name);
return 1;
}
@@ -557,62 +556,68 @@ static int already_uses(struct module *a, struct module *b)
return 0;
}
-/* Module a uses b */
-int use_module(struct module *a, struct module *b)
+/*
+ * Module a uses b
+ * - we add 'a' as a "source", 'b' as a "target" of module use
+ * - the module_use is added to the list of 'b' sources (so
+ * 'b' can walk the list to see who sourced them), and of 'a'
+ * targets (so 'a' can see what modules it targets).
+ */
+static int add_module_usage(struct module *a, struct module *b)
{
struct module_use *use;
- int no_warn, err;
- if (b == NULL || already_uses(a, b)) return 1;
+ DEBUGP("Allocating new usage for %s.\n", a->name);
+ use = kmalloc(sizeof(*use), GFP_ATOMIC);
+ if (!use) {
+ printk(KERN_WARNING "%s: out of memory loading\n", a->name);
+ return -ENOMEM;
+ }
+
+ use->source = a;
+ use->target = b;
+ list_add(&use->source_list, &b->source_list);
+ list_add(&use->target_list, &a->target_list);
+ return 0;
+}
- /* If we're interrupted or time out, we fail. */
- if (wait_event_interruptible_timeout(
- module_wq, (err = strong_try_module_get(b)) != -EBUSY,
- 30 * HZ) <= 0) {
- printk("%s: gave up waiting for init of module %s.\n",
- a->name, b->name);
+/* Module a uses b: caller needs module_mutex() */
+int ref_module(struct module *a, struct module *b)
+{
+ int err;
+
+ if (b == NULL || already_uses(a, b))
return 0;
- }
- /* If strong_try_module_get() returned a different error, we fail. */
+ /* If module isn't available, we fail. */
+ err = strong_try_module_get(b);
if (err)
- return 0;
+ return err;
- DEBUGP("Allocating new usage for %s.\n", a->name);
- use = kmalloc(sizeof(*use), GFP_ATOMIC);
- if (!use) {
- printk("%s: out of memory loading\n", a->name);
+ err = add_module_usage(a, b);
+ if (err) {
module_put(b);
- return 0;
+ return err;
}
-
- use->module_which_uses = a;
- list_add(&use->list, &b->modules_which_use_me);
- no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name);
- return 1;
+ return 0;
}
-EXPORT_SYMBOL_GPL(use_module);
+EXPORT_SYMBOL_GPL(ref_module);
/* Clear the unload stuff of the module. */
static void module_unload_free(struct module *mod)
{
- struct module *i;
-
- list_for_each_entry(i, &modules, list) {
- struct module_use *use;
+ struct module_use *use, *tmp;
- list_for_each_entry(use, &i->modules_which_use_me, list) {
- if (use->module_which_uses == mod) {
- DEBUGP("%s unusing %s\n", mod->name, i->name);
- module_put(i);
- list_del(&use->list);
- kfree(use);
- sysfs_remove_link(i->holders_dir, mod->name);
- /* There can be at most one match. */
- break;
- }
- }
+ mutex_lock(&module_mutex);
+ list_for_each_entry_safe(use, tmp, &mod->target_list, target_list) {
+ struct module *i = use->target;
+ DEBUGP("%s unusing %s\n", mod->name, i->name);
+ module_put(i);
+ list_del(&use->source_list);
+ list_del(&use->target_list);
+ kfree(use);
}
+ mutex_unlock(&module_mutex);
}
#ifdef CONFIG_MODULE_FORCE_UNLOAD
@@ -735,7 +740,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
goto out;
}
- if (!list_empty(&mod->modules_which_use_me)) {
+ if (!list_empty(&mod->source_list)) {
/* Other modules depend on us: get rid of them first. */
ret = -EWOULDBLOCK;
goto out;
@@ -779,13 +784,14 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_GOING, mod);
async_synchronize_full();
- mutex_lock(&module_mutex);
+
/* Store the name of the last unloaded module for diagnostic purposes */
strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
ddebug_remove_module(mod->name);
- free_module(mod);
- out:
+ free_module(mod);
+ return 0;
+out:
mutex_unlock(&module_mutex);
return ret;
}
@@ -799,9 +805,9 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod)
/* Always include a trailing , so userspace can differentiate
between this and the old multi-field proc format. */
- list_for_each_entry(use, &mod->modules_which_use_me, list) {
+ list_for_each_entry(use, &mod->source_list, source_list) {
printed_something = 1;
- seq_printf(m, "%s,", use->module_which_uses->name);
+ seq_printf(m, "%s,", use->source->name);
}
if (mod->init != NULL && mod->exit == NULL) {
@@ -880,11 +886,11 @@ static inline void module_unload_free(struct module *mod)
{
}
-int use_module(struct module *a, struct module *b)
+int ref_module(struct module *a, struct module *b)
{
- return strong_try_module_get(b) == 0;
+ return strong_try_module_get(b);
}
-EXPORT_SYMBOL_GPL(use_module);
+EXPORT_SYMBOL_GPL(ref_module);
static inline void module_unload_init(struct module *mod)
{
@@ -1001,6 +1007,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
{
const unsigned long *crc;
+ /* Since this should be found in kernel (which can't be removed),
+ * no locking is necessary. */
if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL,
&crc, true, false))
BUG();
@@ -1043,29 +1051,62 @@ static inline int same_magic(const char *amagic, const char *bmagic,
}
#endif /* CONFIG_MODVERSIONS */
-/* Resolve a symbol for this module. I.e. if we find one, record usage.
- Must be holding module_mutex. */
+/* Resolve a symbol for this module. I.e. if we find one, record usage. */
static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
unsigned int versindex,
const char *name,
- struct module *mod)
+ struct module *mod,
+ char ownername[])
{
struct module *owner;
const struct kernel_symbol *sym;
const unsigned long *crc;
+ int err;
+ mutex_lock(&module_mutex);
sym = find_symbol(name, &owner, &crc,
!(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true);
- /* use_module can fail due to OOM,
- or module initialization or unloading */
- if (sym) {
- if (!check_version(sechdrs, versindex, name, mod, crc, owner)
- || !use_module(mod, owner))
- sym = NULL;
+ if (!sym)
+ goto unlock;
+
+ if (!check_version(sechdrs, versindex, name, mod, crc, owner)) {
+ sym = ERR_PTR(-EINVAL);
+ goto getname;
}
+
+ err = ref_module(mod, owner);
+ if (err) {
+ sym = ERR_PTR(err);
+ goto getname;
+ }
+
+getname:
+ /* We must make copy under the lock if we failed to get ref. */
+ strncpy(ownername, module_name(owner), MODULE_NAME_LEN);
+unlock:
+ mutex_unlock(&module_mutex);
return sym;
}
+static const struct kernel_symbol *resolve_symbol_wait(Elf_Shdr *sechdrs,
+ unsigned int versindex,
+ const char *name,
+ struct module *mod)
+{
+ const struct kernel_symbol *ksym;
+ char ownername[MODULE_NAME_LEN];
+
+ if (wait_event_interruptible_timeout(module_wq,
+ !IS_ERR(ksym = resolve_symbol(sechdrs, versindex, name,
+ mod, ownername)) ||
+ PTR_ERR(ksym) != -EBUSY,
+ 30 * HZ) <= 0) {
+ printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n",
+ mod->name, ownername);
+ }
+ return ksym;
+}
+
/*
* /sys/module/foo/sections stuff
* J. Corbet <corbet@lwn.net>
@@ -1295,7 +1336,34 @@ static inline void remove_notes_attrs(struct module *mod)
#endif
#ifdef CONFIG_SYSFS
-int module_add_modinfo_attrs(struct module *mod)
+static void add_usage_links(struct module *mod)
+{
+#ifdef CONFIG_MODULE_UNLOAD
+ struct module_use *use;
+ int nowarn;
+
+ mutex_lock(&module_mutex);
+ list_for_each_entry(use, &mod->target_list, target_list) {
+ nowarn = sysfs_create_link(use->target->holders_dir,
+ &mod->mkobj.kobj, mod->name);
+ }
+ mutex_unlock(&module_mutex);
+#endif
+}
+
+static void del_usage_links(struct module *mod)
+{
+#ifdef CONFIG_MODULE_UNLOAD
+ struct module_use *use;
+
+ mutex_lock(&module_mutex);
+ list_for_each_entry(use, &mod->target_list, target_list)
+ sysfs_remove_link(use->target->holders_dir, mod->name);
+ mutex_unlock(&module_mutex);
+#endif
+}
+
+static int module_add_modinfo_attrs(struct module *mod)
{
struct module_attribute *attr;
struct module_attribute *temp_attr;
@@ -1321,7 +1389,7 @@ int module_add_modinfo_attrs(struct module *mod)
return error;
}
-void module_remove_modinfo_attrs(struct module *mod)
+static void module_remove_modinfo_attrs(struct module *mod)
{
struct module_attribute *attr;
int i;
@@ -1337,7 +1405,7 @@ void module_remove_modinfo_attrs(struct module *mod)
kfree(mod->modinfo_attrs);
}
-int mod_sysfs_init(struct module *mod)
+static int mod_sysfs_init(struct module *mod)
{
int err;
struct kobject *kobj;
@@ -1371,12 +1439,16 @@ out:
return err;
}
-int mod_sysfs_setup(struct module *mod,
+static int mod_sysfs_setup(struct module *mod,
struct kernel_param *kparam,
unsigned int num_params)
{
int err;
+ err = mod_sysfs_init(mod);
+ if (err)
+ goto out;
+
mod->holders_dir = kobject_create_and_add("holders", &mod->mkobj.kobj);
if (!mod->holders_dir) {
err = -ENOMEM;
@@ -1391,6 +1463,8 @@ int mod_sysfs_setup(struct module *mod,
if (err)
goto out_unreg_param;
+ add_usage_links(mod);
+
kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
return 0;
@@ -1400,6 +1474,7 @@ out_unreg_holders:
kobject_put(mod->holders_dir);
out_unreg:
kobject_put(&mod->mkobj.kobj);
+out:
return err;
}
@@ -1410,14 +1485,40 @@ static void mod_sysfs_fini(struct module *mod)
#else /* CONFIG_SYSFS */
+static inline int mod_sysfs_init(struct module *mod)
+{
+ return 0;
+}
+
+static inline int mod_sysfs_setup(struct module *mod,
+ struct kernel_param *kparam,
+ unsigned int num_params)
+{
+ return 0;
+}
+
+static inline int module_add_modinfo_attrs(struct module *mod)
+{
+ return 0;
+}
+
+static inline void module_remove_modinfo_attrs(struct module *mod)
+{
+}
+
static void mod_sysfs_fini(struct module *mod)
{
}
+static void del_usage_links(struct module *mod)
+{
+}
+
#endif /* CONFIG_SYSFS */
static void mod_kobject_remove(struct module *mod)
{
+ del_usage_links(mod);
module_remove_modinfo_attrs(mod);
module_param_sysfs_remove(mod);
kobject_put(mod->mkobj.drivers_dir);
@@ -1436,13 +1537,15 @@ static int __unlink_module(void *_mod)
return 0;
}
-/* Free a module, remove from lists, etc (must hold module_mutex). */
+/* Free a module, remove from lists, etc. */
static void free_module(struct module *mod)
{
trace_module_free(mod);
/* Delete from various lists */
+ mutex_lock(&module_mutex);
stop_machine(__unlink_module, mod, NULL);
+ mutex_unlock(&module_mutex);
remove_notes_attrs(mod);
remove_sect_attrs(mod);
mod_kobject_remove(mod);
@@ -1493,6 +1596,8 @@ EXPORT_SYMBOL_GPL(__symbol_get);
/*
* Ensure that an exported symbol [global namespace] does not already exist
* in the kernel or in some other module's exported symbol table.
+ *
+ * You must hold the module_mutex.
*/
static int verify_export_symbols(struct module *mod)
{
@@ -1558,21 +1663,23 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
break;
case SHN_UNDEF:
- ksym = resolve_symbol(sechdrs, versindex,
- strtab + sym[i].st_name, mod);
+ ksym = resolve_symbol_wait(sechdrs, versindex,
+ strtab + sym[i].st_name,
+ mod);
/* Ok if resolved. */
- if (ksym) {
+ if (ksym && !IS_ERR(ksym)) {
sym[i].st_value = ksym->value;
break;
}
/* Ok if weak. */
- if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK)
+ if (!ksym && ELF_ST_BIND(sym[i].st_info) == STB_WEAK)
break;
- printk(KERN_WARNING "%s: Unknown symbol %s\n",
- mod->name, strtab + sym[i].st_name);
- ret = -ENOENT;
+ printk(KERN_WARNING "%s: Unknown symbol %s (err %li)\n",
+ mod->name, strtab + sym[i].st_name,
+ PTR_ERR(ksym));
+ ret = PTR_ERR(ksym) ?: -ENOENT;
break;
default:
@@ -1955,16 +2062,24 @@ static void dynamic_debug_setup(struct _ddebug *debug, unsigned int num)
#endif
}
+static void dynamic_debug_remove(struct _ddebug *debug)
+{
+ if (debug)
+ ddebug_remove_module(debug->modname);
+}
+
static void *module_alloc_update_bounds(unsigned long size)
{
void *ret = module_alloc(size);
if (ret) {
+ mutex_lock(&module_mutex);
/* Update module bounds. */
if ((unsigned long)ret < module_addr_min)
module_addr_min = (unsigned long)ret;
if ((unsigned long)ret + size > module_addr_max)
module_addr_max = (unsigned long)ret + size;
+ mutex_unlock(&module_mutex);
}
return ret;
}
@@ -2014,6 +2129,9 @@ static noinline struct module *load_module(void __user *umod,
long err = 0;
void *ptr = NULL; /* Stops spurious gcc warning */
unsigned long symoffs, stroffs, *strmap;
+ void __percpu *percpu;
+ struct _ddebug *debug = NULL;
+ unsigned int num_debug = 0;
mm_segment_t old_fs;
@@ -2138,11 +2256,6 @@ static noinline struct module *load_module(void __user *umod,
goto free_mod;
}
- if (find_module(mod->name)) {
- err = -EEXIST;
- goto free_mod;
- }
-
mod->state = MODULE_STATE_COMING;
/* Allow arches to frob section contents and sizes. */
@@ -2158,6 +2271,8 @@ static noinline struct module *load_module(void __user *umod,
goto free_mod;
sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
}
+ /* Keep this around for failure path. */
+ percpu = mod_percpu(mod);
/* Determine total sizes, and put offsets in sh_entsize. For now
this is done generically; there doesn't appear to be any
@@ -2231,11 +2346,6 @@ static noinline struct module *load_module(void __user *umod,
/* Now we've moved module, initialize linked lists, etc. */
module_unload_init(mod);
- /* add kobject, so we can reference it. */
- err = mod_sysfs_init(mod);
- if (err)
- goto free_unload;
-
/* Set up license info based on the info section */
set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
@@ -2360,11 +2470,6 @@ static noinline struct module *load_module(void __user *umod,
goto cleanup;
}
- /* Find duplicate symbols */
- err = verify_export_symbols(mod);
- if (err < 0)
- goto cleanup;
-
/* Set up and sort exception table */
mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table",
sizeof(*mod->extable), &mod->num_exentries);
@@ -2379,15 +2484,9 @@ static noinline struct module *load_module(void __user *umod,
kfree(strmap);
strmap = NULL;
- if (!mod->taints) {
- struct _ddebug *debug;
- unsigned int num_debug;
-
+ if (!mod->taints)
debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
sizeof(*debug), &num_debug);
- if (debug)
- dynamic_debug_setup(debug, num_debug);
- }
err = module_finalize(hdr, sechdrs, mod);
if (err < 0)
@@ -2423,7 +2522,22 @@ static noinline struct module *load_module(void __user *umod,
* function to insert in a way safe to concurrent readers.
* The mutex protects against concurrent writers.
*/
+ mutex_lock(&module_mutex);
+ if (find_module(mod->name)) {
+ err = -EEXIST;
+ goto unlock;
+ }
+
+ if (debug)
+ dynamic_debug_setup(debug, num_debug);
+
+ /* Find duplicate symbols */
+ err = verify_export_symbols(mod);
+ if (err < 0)
+ goto ddebug;
+
list_add_rcu(&mod->list, &modules);
+ mutex_unlock(&module_mutex);
err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL);
if (err < 0)
@@ -2432,6 +2546,7 @@ static noinline struct module *load_module(void __user *umod,
err = mod_sysfs_setup(mod, mod->kp, mod->num_kp);
if (err < 0)
goto unlink;
+
add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
@@ -2444,15 +2559,17 @@ static noinline struct module *load_module(void __user *umod,
return mod;
unlink:
+ mutex_lock(&module_mutex);
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
+ ddebug:
+ dynamic_debug_remove(debug);
+ unlock:
+ mutex_unlock(&module_mutex);
synchronize_sched();
module_arch_cleanup(mod);
cleanup:
free_modinfo(mod);
- kobject_del(&mod->mkobj.kobj);
- kobject_put(&mod->mkobj.kobj);
- free_unload:
module_unload_free(mod);
#if defined(CONFIG_MODULE_UNLOAD)
free_percpu(mod->refptr);
@@ -2463,7 +2580,7 @@ static noinline struct module *load_module(void __user *umod,
module_free(mod, mod->module_core);
/* mod will be freed with core. Don't access it beyond this line! */
free_percpu:
- percpu_modfree(mod);
+ free_percpu(percpu);
free_mod:
kfree(args);
kfree(strmap);
@@ -2499,19 +2616,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
if (!capable(CAP_SYS_MODULE) || modules_disabled)
return -EPERM;
- /* Only one module load at a time, please */
- if (mutex_lock_interruptible(&module_mutex) != 0)
- return -EINTR;
-
/* Do all the hard work */
mod = load_module(umod, len, uargs);
- if (IS_ERR(mod)) {
- mutex_unlock(&module_mutex);
+ if (IS_ERR(mod))
return PTR_ERR(mod);
- }
-
- /* Drop lock so they can recurse */
- mutex_unlock(&module_mutex);
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_COMING, mod);
@@ -2528,9 +2636,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
module_put(mod);
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_GOING, mod);
- mutex_lock(&module_mutex);
free_module(mod);
- mutex_unlock(&module_mutex);
wake_up(&module_wq);
return ret;
}
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index bd7ce8ca5bb9..ff86c558af4c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -283,14 +283,15 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
static void
list_add_event(struct perf_event *event, struct perf_event_context *ctx)
{
- struct perf_event *group_leader = event->group_leader;
+ WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
+ event->attach_state |= PERF_ATTACH_CONTEXT;
/*
- * Depending on whether it is a standalone or sibling event,
- * add it straight to the context's event list, or to the group
- * leader's sibling list:
+ * If we're a stand alone event or group leader, we go to the context
+ * list, group events are kept attached to the group so that
+ * perf_group_detach can, at all times, locate all siblings.
*/
- if (group_leader == event) {
+ if (event->group_leader == event) {
struct list_head *list;
if (is_software_event(event))
@@ -298,13 +299,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
list = ctx_group_list(event, ctx);
list_add_tail(&event->group_entry, list);
- } else {
- if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
- !is_software_event(event))
- group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
-
- list_add_tail(&event->group_entry, &group_leader->sibling_list);
- group_leader->nr_siblings++;
}
list_add_rcu(&event->event_entry, &ctx->event_list);
@@ -313,6 +307,24 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
ctx->nr_stat++;
}
+static void perf_group_attach(struct perf_event *event)
+{
+ struct perf_event *group_leader = event->group_leader;
+
+ WARN_ON_ONCE(event->attach_state & PERF_ATTACH_GROUP);
+ event->attach_state |= PERF_ATTACH_GROUP;
+
+ if (group_leader == event)
+ return;
+
+ if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
+ !is_software_event(event))
+ group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
+
+ list_add_tail(&event->group_entry, &group_leader->sibling_list);
+ group_leader->nr_siblings++;
+}
+
/*
* Remove a event from the lists for its context.
* Must be called with ctx->mutex and ctx->lock held.
@@ -320,17 +332,22 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
static void
list_del_event(struct perf_event *event, struct perf_event_context *ctx)
{
- if (list_empty(&event->group_entry))
+ /*
+ * We can have double detach due to exit/hot-unplug + close.
+ */
+ if (!(event->attach_state & PERF_ATTACH_CONTEXT))
return;
+
+ event->attach_state &= ~PERF_ATTACH_CONTEXT;
+
ctx->nr_events--;
if (event->attr.inherit_stat)
ctx->nr_stat--;
- list_del_init(&event->group_entry);
list_del_rcu(&event->event_entry);
- if (event->group_leader != event)
- event->group_leader->nr_siblings--;
+ if (event->group_leader == event)
+ list_del_init(&event->group_entry);
update_group_times(event);
@@ -345,21 +362,39 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
event->state = PERF_EVENT_STATE_OFF;
}
-static void
-perf_destroy_group(struct perf_event *event, struct perf_event_context *ctx)
+static void perf_group_detach(struct perf_event *event)
{
struct perf_event *sibling, *tmp;
+ struct list_head *list = NULL;
+
+ /*
+ * We can have double detach due to exit/hot-unplug + close.
+ */
+ if (!(event->attach_state & PERF_ATTACH_GROUP))
+ return;
+
+ event->attach_state &= ~PERF_ATTACH_GROUP;
+
+ /*
+ * If this is a sibling, remove it from its group.
+ */
+ if (event->group_leader != event) {
+ list_del_init(&event->group_entry);
+ event->group_leader->nr_siblings--;
+ return;
+ }
+
+ if (!list_empty(&event->group_entry))
+ list = &event->group_entry;
/*
* If this was a group event with sibling events then
* upgrade the siblings to singleton events by adding them
- * to the context list directly:
+ * to whatever list we are on.
*/
list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
- struct list_head *list;
-
- list = ctx_group_list(event, ctx);
- list_move_tail(&sibling->group_entry, list);
+ if (list)
+ list_move_tail(&sibling->group_entry, list);
sibling->group_leader = sibling;
/* Inherit group flags from the previous leader */
@@ -652,8 +687,11 @@ group_sched_in(struct perf_event *group_event,
if (txn)
pmu->start_txn(pmu);
- if (event_sched_in(group_event, cpuctx, ctx))
+ if (event_sched_in(group_event, cpuctx, ctx)) {
+ if (txn)
+ pmu->cancel_txn(pmu);
return -EAGAIN;
+ }
/*
* Schedule in siblings as one group (if any):
@@ -675,9 +713,6 @@ group_sched_in(struct perf_event *group_event,
}
group_error:
- if (txn)
- pmu->cancel_txn(pmu);
-
/*
* Groups can be scheduled in as one unit only, so undo any
* partial group before returning:
@@ -689,6 +724,9 @@ group_error:
}
event_sched_out(group_event, cpuctx, ctx);
+ if (txn)
+ pmu->cancel_txn(pmu);
+
return -EAGAIN;
}
@@ -727,6 +765,7 @@ static void add_event_to_ctx(struct perf_event *event,
struct perf_event_context *ctx)
{
list_add_event(event, ctx);
+ perf_group_attach(event);
event->tstamp_enabled = ctx->time;
event->tstamp_running = ctx->time;
event->tstamp_stopped = ctx->time;
@@ -1468,6 +1507,9 @@ do { \
divisor = nsec * frequency;
}
+ if (!divisor)
+ return dividend;
+
return div64_u64(dividend, divisor);
}
@@ -1490,7 +1532,7 @@ static int perf_event_start(struct perf_event *event)
static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
{
struct hw_perf_event *hwc = &event->hw;
- u64 period, sample_period;
+ s64 period, sample_period;
s64 delta;
period = perf_calculate_period(event, nsec, count);
@@ -1841,6 +1883,7 @@ static void free_event_rcu(struct rcu_head *head)
}
static void perf_pending_sync(struct perf_event *event);
+static void perf_mmap_data_put(struct perf_mmap_data *data);
static void free_event(struct perf_event *event)
{
@@ -1856,9 +1899,9 @@ static void free_event(struct perf_event *event)
atomic_dec(&nr_task_events);
}
- if (event->output) {
- fput(event->output->filp);
- event->output = NULL;
+ if (event->data) {
+ perf_mmap_data_put(event->data);
+ event->data = NULL;
}
if (event->destroy)
@@ -1893,8 +1936,8 @@ int perf_event_release_kernel(struct perf_event *event)
*/
mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
raw_spin_lock_irq(&ctx->lock);
+ perf_group_detach(event);
list_del_event(event, ctx);
- perf_destroy_group(event, ctx);
raw_spin_unlock_irq(&ctx->lock);
mutex_unlock(&ctx->mutex);
@@ -2175,7 +2218,27 @@ unlock:
return ret;
}
-static int perf_event_set_output(struct perf_event *event, int output_fd);
+static const struct file_operations perf_fops;
+
+static struct perf_event *perf_fget_light(int fd, int *fput_needed)
+{
+ struct file *file;
+
+ file = fget_light(fd, fput_needed);
+ if (!file)
+ return ERR_PTR(-EBADF);
+
+ if (file->f_op != &perf_fops) {
+ fput_light(file, *fput_needed);
+ *fput_needed = 0;
+ return ERR_PTR(-EBADF);
+ }
+
+ return file->private_data;
+}
+
+static int perf_event_set_output(struct perf_event *event,
+ struct perf_event *output_event);
static int perf_event_set_filter(struct perf_event *event, void __user *arg);
static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -2202,7 +2265,23 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return perf_event_period(event, (u64 __user *)arg);
case PERF_EVENT_IOC_SET_OUTPUT:
- return perf_event_set_output(event, arg);
+ {
+ struct perf_event *output_event = NULL;
+ int fput_needed = 0;
+ int ret;
+
+ if (arg != -1) {
+ output_event = perf_fget_light(arg, &fput_needed);
+ if (IS_ERR(output_event))
+ return PTR_ERR(output_event);
+ }
+
+ ret = perf_event_set_output(event, output_event);
+ if (output_event)
+ fput_light(output_event->filp, fput_needed);
+
+ return ret;
+ }
case PERF_EVENT_IOC_SET_FILTER:
return perf_event_set_filter(event, (void __user *)arg);
@@ -2335,8 +2414,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
unsigned long size;
int i;
- WARN_ON(atomic_read(&event->mmap_count));
-
size = sizeof(struct perf_mmap_data);
size += nr_pages * sizeof(void *);
@@ -2452,8 +2529,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
unsigned long size;
void *all_buf;
- WARN_ON(atomic_read(&event->mmap_count));
-
size = sizeof(struct perf_mmap_data);
size += sizeof(void *);
@@ -2536,7 +2611,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
if (!data->watermark)
data->watermark = max_size / 2;
-
+ atomic_set(&data->refcount, 1);
rcu_assign_pointer(event->data, data);
}
@@ -2548,13 +2623,26 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
perf_mmap_data_free(data);
}
-static void perf_mmap_data_release(struct perf_event *event)
+static struct perf_mmap_data *perf_mmap_data_get(struct perf_event *event)
{
- struct perf_mmap_data *data = event->data;
+ struct perf_mmap_data *data;
+
+ rcu_read_lock();
+ data = rcu_dereference(event->data);
+ if (data) {
+ if (!atomic_inc_not_zero(&data->refcount))
+ data = NULL;
+ }
+ rcu_read_unlock();
+
+ return data;
+}
- WARN_ON(atomic_read(&event->mmap_count));
+static void perf_mmap_data_put(struct perf_mmap_data *data)
+{
+ if (!atomic_dec_and_test(&data->refcount))
+ return;
- rcu_assign_pointer(event->data, NULL);
call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
}
@@ -2569,15 +2657,18 @@ static void perf_mmap_close(struct vm_area_struct *vma)
{
struct perf_event *event = vma->vm_file->private_data;
- WARN_ON_ONCE(event->ctx->parent_ctx);
if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
unsigned long size = perf_data_size(event->data);
- struct user_struct *user = current_user();
+ struct user_struct *user = event->mmap_user;
+ struct perf_mmap_data *data = event->data;
atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
- vma->vm_mm->locked_vm -= event->data->nr_locked;
- perf_mmap_data_release(event);
+ vma->vm_mm->locked_vm -= event->mmap_locked;
+ rcu_assign_pointer(event->data, NULL);
mutex_unlock(&event->mmap_mutex);
+
+ perf_mmap_data_put(data);
+ free_uid(user);
}
}
@@ -2629,13 +2720,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
WARN_ON_ONCE(event->ctx->parent_ctx);
mutex_lock(&event->mmap_mutex);
- if (event->output) {
- ret = -EINVAL;
- goto unlock;
- }
-
- if (atomic_inc_not_zero(&event->mmap_count)) {
- if (nr_pages != event->data->nr_pages)
+ if (event->data) {
+ if (event->data->nr_pages == nr_pages)
+ atomic_inc(&event->data->refcount);
+ else
ret = -EINVAL;
goto unlock;
}
@@ -2667,21 +2755,23 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
WARN_ON(event->data);
data = perf_mmap_data_alloc(event, nr_pages);
- ret = -ENOMEM;
- if (!data)
+ if (!data) {
+ ret = -ENOMEM;
goto unlock;
+ }
- ret = 0;
perf_mmap_data_init(event, data);
-
- atomic_set(&event->mmap_count, 1);
- atomic_long_add(user_extra, &user->locked_vm);
- vma->vm_mm->locked_vm += extra;
- event->data->nr_locked = extra;
if (vma->vm_flags & VM_WRITE)
event->data->writable = 1;
+ atomic_long_add(user_extra, &user->locked_vm);
+ event->mmap_locked = extra;
+ event->mmap_user = get_current_user();
+ vma->vm_mm->locked_vm += event->mmap_locked;
+
unlock:
+ if (!ret)
+ atomic_inc(&event->mmap_count);
mutex_unlock(&event->mmap_mutex);
vma->vm_flags |= VM_RESERVED;
@@ -2977,6 +3067,7 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
len -= size;
handle->addr += size;
+ buf += size;
handle->size -= size;
if (!handle->size) {
struct perf_mmap_data *data = handle->data;
@@ -2993,7 +3084,6 @@ int perf_output_begin(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size,
int nmi, int sample)
{
- struct perf_event *output_event;
struct perf_mmap_data *data;
unsigned long tail, offset, head;
int have_lost;
@@ -3010,10 +3100,6 @@ int perf_output_begin(struct perf_output_handle *handle,
if (event->parent)
event = event->parent;
- output_event = rcu_dereference(event->output);
- if (output_event)
- event = output_event;
-
data = rcu_dereference(event->data);
if (!data)
goto out;
@@ -3972,13 +4058,6 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
}
}
-static void perf_swevent_unthrottle(struct perf_event *event)
-{
- /*
- * Nothing to do, we already reset hwc->interrupts.
- */
-}
-
static void perf_swevent_add(struct perf_event *event, u64 nr,
int nmi, struct perf_sample_data *data,
struct pt_regs *regs)
@@ -4193,11 +4272,22 @@ static void perf_swevent_disable(struct perf_event *event)
hlist_del_rcu(&event->hlist_entry);
}
+static void perf_swevent_void(struct perf_event *event)
+{
+}
+
+static int perf_swevent_int(struct perf_event *event)
+{
+ return 0;
+}
+
static const struct pmu perf_ops_generic = {
.enable = perf_swevent_enable,
.disable = perf_swevent_disable,
+ .start = perf_swevent_int,
+ .stop = perf_swevent_void,
.read = perf_swevent_read,
- .unthrottle = perf_swevent_unthrottle,
+ .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */
};
/*
@@ -4478,8 +4568,10 @@ static int swevent_hlist_get(struct perf_event *event)
static const struct pmu perf_ops_tracepoint = {
.enable = perf_trace_enable,
.disable = perf_trace_disable,
+ .start = perf_swevent_int,
+ .stop = perf_swevent_void,
.read = perf_swevent_read,
- .unthrottle = perf_swevent_unthrottle,
+ .unthrottle = perf_swevent_void,
};
static int perf_tp_filter_match(struct perf_event *event,
@@ -4912,39 +5004,17 @@ err_size:
goto out;
}
-static int perf_event_set_output(struct perf_event *event, int output_fd)
+static int
+perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
{
- struct perf_event *output_event = NULL;
- struct file *output_file = NULL;
- struct perf_event *old_output;
- int fput_needed = 0;
+ struct perf_mmap_data *data = NULL, *old_data = NULL;
int ret = -EINVAL;
- /*
- * Don't allow output of inherited per-task events. This would
- * create performance issues due to cross cpu access.
- */
- if (event->cpu == -1 && event->attr.inherit)
- return -EINVAL;
-
- if (!output_fd)
+ if (!output_event)
goto set;
- output_file = fget_light(output_fd, &fput_needed);
- if (!output_file)
- return -EBADF;
-
- if (output_file->f_op != &perf_fops)
- goto out;
-
- output_event = output_file->private_data;
-
- /* Don't chain output fds */
- if (output_event->output)
- goto out;
-
- /* Don't set an output fd when we already have an output channel */
- if (event->data)
+ /* don't allow circular references */
+ if (event == output_event)
goto out;
/*
@@ -4959,26 +5029,28 @@ static int perf_event_set_output(struct perf_event *event, int output_fd)
if (output_event->cpu == -1 && output_event->ctx != event->ctx)
goto out;
- atomic_long_inc(&output_file->f_count);
-
set:
mutex_lock(&event->mmap_mutex);
- old_output = event->output;
- rcu_assign_pointer(event->output, output_event);
- mutex_unlock(&event->mmap_mutex);
+ /* Can't redirect output if we've got an active mmap() */
+ if (atomic_read(&event->mmap_count))
+ goto unlock;
- if (old_output) {
- /*
- * we need to make sure no existing perf_output_*()
- * is still referencing this event.
- */
- synchronize_rcu();
- fput(old_output->filp);
+ if (output_event) {
+ /* get the buffer we want to redirect to */
+ data = perf_mmap_data_get(output_event);
+ if (!data)
+ goto unlock;
}
+ old_data = event->data;
+ rcu_assign_pointer(event->data, data);
ret = 0;
+unlock:
+ mutex_unlock(&event->mmap_mutex);
+
+ if (old_data)
+ perf_mmap_data_put(old_data);
out:
- fput_light(output_file, fput_needed);
return ret;
}
@@ -4994,7 +5066,7 @@ SYSCALL_DEFINE5(perf_event_open,
struct perf_event_attr __user *, attr_uptr,
pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
{
- struct perf_event *event, *group_leader;
+ struct perf_event *event, *group_leader = NULL, *output_event = NULL;
struct perf_event_attr attr;
struct perf_event_context *ctx;
struct file *event_file = NULL;
@@ -5034,19 +5106,25 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_fd;
}
+ if (group_fd != -1) {
+ group_leader = perf_fget_light(group_fd, &fput_needed);
+ if (IS_ERR(group_leader)) {
+ err = PTR_ERR(group_leader);
+ goto err_put_context;
+ }
+ group_file = group_leader->filp;
+ if (flags & PERF_FLAG_FD_OUTPUT)
+ output_event = group_leader;
+ if (flags & PERF_FLAG_FD_NO_GROUP)
+ group_leader = NULL;
+ }
+
/*
* Look up the group leader (we will attach this event to it):
*/
- group_leader = NULL;
- if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
+ if (group_leader) {
err = -EINVAL;
- group_file = fget_light(group_fd, &fput_needed);
- if (!group_file)
- goto err_put_context;
- if (group_file->f_op != &perf_fops)
- goto err_put_context;
- group_leader = group_file->private_data;
/*
* Do not allow a recursive hierarchy (this new sibling
* becoming part of another group-sibling):
@@ -5068,9 +5146,16 @@ SYSCALL_DEFINE5(perf_event_open,
event = perf_event_alloc(&attr, cpu, ctx, group_leader,
NULL, NULL, GFP_KERNEL);
- err = PTR_ERR(event);
- if (IS_ERR(event))
+ if (IS_ERR(event)) {
+ err = PTR_ERR(event);
goto err_put_context;
+ }
+
+ if (output_event) {
+ err = perf_event_set_output(event, output_event);
+ if (err)
+ goto err_free_put_context;
+ }
event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR);
if (IS_ERR(event_file)) {
@@ -5078,12 +5163,6 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_free_put_context;
}
- if (flags & PERF_FLAG_FD_OUTPUT) {
- err = perf_event_set_output(event, group_fd);
- if (err)
- goto err_fput_free_put_context;
- }
-
event->filp = event_file;
WARN_ON_ONCE(ctx->parent_ctx);
mutex_lock(&ctx->mutex);
@@ -5097,12 +5176,16 @@ SYSCALL_DEFINE5(perf_event_open,
list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex);
+ /*
+ * Drop the reference on the group_event after placing the
+ * new event on the sibling_list. This ensures destruction
+ * of the group leader will find the pointer to itself in
+ * perf_group_detach().
+ */
fput_light(group_file, fput_needed);
fd_install(event_fd, event_file);
return event_fd;
-err_fput_free_put_context:
- fput(event_file);
err_free_put_context:
free_event(event);
err_put_context:
@@ -5420,6 +5503,7 @@ static void perf_free_event(struct perf_event *event,
fput(parent->filp);
+ perf_group_detach(event);
list_del_event(event, ctx);
free_event(event);
}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 5c36ea9d55d2..ca6066a6952e 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -99,9 +99,13 @@ config PM_SLEEP_ADVANCED_DEBUG
depends on PM_ADVANCED_DEBUG
default n
+config SUSPEND_NVS
+ bool
+
config SUSPEND
bool "Suspend to RAM and standby"
depends on PM && ARCH_SUSPEND_POSSIBLE
+ select SUSPEND_NVS if HAS_IOMEM
default y
---help---
Allow the system to enter sleep states in which main memory is
@@ -130,13 +134,10 @@ config SUSPEND_FREEZER
Turning OFF this setting is NOT recommended! If in doubt, say Y.
-config HIBERNATION_NVS
- bool
-
config HIBERNATION
bool "Hibernation (aka 'suspend to disk')"
depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
- select HIBERNATION_NVS if HAS_IOMEM
+ select SUSPEND_NVS if HAS_IOMEM
---help---
Enable the suspend to disk (STD) functionality, which is usually
called "hibernation" in user interfaces. STD checkpoints the
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 524e058dcf06..f9063c6b185d 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -10,6 +10,6 @@ obj-$(CONFIG_SUSPEND) += suspend.o
obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \
block_io.o
-obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o
+obj-$(CONFIG_SUSPEND_NVS) += nvs.o
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/nvs.c
index fdcad9ed5a7b..1836db60bbb6 100644
--- a/kernel/power/hibernate_nvs.c
+++ b/kernel/power/nvs.c
@@ -15,7 +15,7 @@
/*
* Platforms, like ACPI, may want us to save some memory used by them during
- * hibernation and to restore the contents of this memory during the subsequent
+ * suspend and to restore the contents of this memory during the subsequent
* resume. The code below implements a mechanism allowing us to do that.
*/
@@ -30,7 +30,7 @@ struct nvs_page {
static LIST_HEAD(nvs_list);
/**
- * hibernate_nvs_register - register platform NVS memory region to save
+ * suspend_nvs_register - register platform NVS memory region to save
* @start - physical address of the region
* @size - size of the region
*
@@ -38,7 +38,7 @@ static LIST_HEAD(nvs_list);
* things so that the data from page-aligned addresses in this region will
* be copied into separate RAM pages.
*/
-int hibernate_nvs_register(unsigned long start, unsigned long size)
+int suspend_nvs_register(unsigned long start, unsigned long size)
{
struct nvs_page *entry, *next;
@@ -68,9 +68,9 @@ int hibernate_nvs_register(unsigned long start, unsigned long size)
}
/**
- * hibernate_nvs_free - free data pages allocated for saving NVS regions
+ * suspend_nvs_free - free data pages allocated for saving NVS regions
*/
-void hibernate_nvs_free(void)
+void suspend_nvs_free(void)
{
struct nvs_page *entry;
@@ -86,16 +86,16 @@ void hibernate_nvs_free(void)
}
/**
- * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions
+ * suspend_nvs_alloc - allocate memory necessary for saving NVS regions
*/
-int hibernate_nvs_alloc(void)
+int suspend_nvs_alloc(void)
{
struct nvs_page *entry;
list_for_each_entry(entry, &nvs_list, node) {
entry->data = (void *)__get_free_page(GFP_KERNEL);
if (!entry->data) {
- hibernate_nvs_free();
+ suspend_nvs_free();
return -ENOMEM;
}
}
@@ -103,9 +103,9 @@ int hibernate_nvs_alloc(void)
}
/**
- * hibernate_nvs_save - save NVS memory regions
+ * suspend_nvs_save - save NVS memory regions
*/
-void hibernate_nvs_save(void)
+void suspend_nvs_save(void)
{
struct nvs_page *entry;
@@ -119,12 +119,12 @@ void hibernate_nvs_save(void)
}
/**
- * hibernate_nvs_restore - restore NVS memory regions
+ * suspend_nvs_restore - restore NVS memory regions
*
* This function is going to be called with interrupts disabled, so it
* cannot iounmap the virtual addresses used to access the NVS region.
*/
-void hibernate_nvs_restore(void)
+void suspend_nvs_restore(void)
{
struct nvs_page *entry;
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 56e7dbb8b996..f37cb7dd4402 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -16,6 +16,12 @@
#include <linux/cpu.h>
#include <linux/syscalls.h>
#include <linux/gfp.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/suspend.h>
#include "power.h"
diff --git a/kernel/sched.c b/kernel/sched.c
index d48408142503..f52a8801b7a2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -306,52 +306,6 @@ static int init_task_group_load = INIT_TASK_GROUP_LOAD;
*/
struct task_group init_task_group;
-/* return group to which a task belongs */
-static inline struct task_group *task_group(struct task_struct *p)
-{
- struct task_group *tg;
-
-#ifdef CONFIG_CGROUP_SCHED
- tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
- struct task_group, css);
-#else
- tg = &init_task_group;
-#endif
- return tg;
-}
-
-/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
-static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
-{
- /*
- * Strictly speaking this rcu_read_lock() is not needed since the
- * task_group is tied to the cgroup, which in turn can never go away
- * as long as there are tasks attached to it.
- *
- * However since task_group() uses task_subsys_state() which is an
- * rcu_dereference() user, this quiets CONFIG_PROVE_RCU.
- */
- rcu_read_lock();
-#ifdef CONFIG_FAIR_GROUP_SCHED
- p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
- p->se.parent = task_group(p)->se[cpu];
-#endif
-
-#ifdef CONFIG_RT_GROUP_SCHED
- p->rt.rt_rq = task_group(p)->rt_rq[cpu];
- p->rt.parent = task_group(p)->rt_se[cpu];
-#endif
- rcu_read_unlock();
-}
-
-#else
-
-static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
-static inline struct task_group *task_group(struct task_struct *p)
-{
- return NULL;
-}
-
#endif /* CONFIG_CGROUP_SCHED */
/* CFS-related fields in a runqueue */
@@ -544,6 +498,8 @@ struct rq {
struct root_domain *rd;
struct sched_domain *sd;
+ unsigned long cpu_power;
+
unsigned char idle_at_tick;
/* For active balancing */
int post_schedule;
@@ -642,6 +598,49 @@ static inline int cpu_of(struct rq *rq)
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define raw_rq() (&__raw_get_cpu_var(runqueues))
+#ifdef CONFIG_CGROUP_SCHED
+
+/*
+ * Return the group to which this tasks belongs.
+ *
+ * We use task_subsys_state_check() and extend the RCU verification
+ * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach()
+ * holds that lock for each task it moves into the cgroup. Therefore
+ * by holding that lock, we pin the task to the current cgroup.
+ */
+static inline struct task_group *task_group(struct task_struct *p)
+{
+ struct cgroup_subsys_state *css;
+
+ css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
+ lockdep_is_held(&task_rq(p)->lock));
+ return container_of(css, struct task_group, css);
+}
+
+/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
+static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
+ p->se.parent = task_group(p)->se[cpu];
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ p->rt.rt_rq = task_group(p)->rt_rq[cpu];
+ p->rt.parent = task_group(p)->rt_se[cpu];
+#endif
+}
+
+#else /* CONFIG_CGROUP_SCHED */
+
+static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
+static inline struct task_group *task_group(struct task_struct *p)
+{
+ return NULL;
+}
+
+#endif /* CONFIG_CGROUP_SCHED */
+
inline void update_rq_clock(struct rq *rq)
{
if (!rq->skip_clock_update)
@@ -1255,6 +1254,12 @@ static void sched_avg_update(struct rq *rq)
s64 period = sched_avg_period();
while ((s64)(rq->clock - rq->age_stamp) > period) {
+ /*
+ * Inline assembly required to prevent the compiler
+ * optimising this loop into a divmod call.
+ * See __iter_div_u64_rem() for another example of this.
+ */
+ asm("" : "+rm" (rq->age_stamp));
rq->age_stamp += period;
rq->rt_avg /= 2;
}
@@ -1499,24 +1504,9 @@ static unsigned long target_load(int cpu, int type)
return max(rq->cpu_load[type-1], total);
}
-static struct sched_group *group_of(int cpu)
-{
- struct sched_domain *sd = rcu_dereference_sched(cpu_rq(cpu)->sd);
-
- if (!sd)
- return NULL;
-
- return sd->groups;
-}
-
static unsigned long power_of(int cpu)
{
- struct sched_group *group = group_of(cpu);
-
- if (!group)
- return SCHED_LOAD_SCALE;
-
- return group->cpu_power;
+ return cpu_rq(cpu)->cpu_power;
}
static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
@@ -1673,9 +1663,6 @@ static void update_shares(struct sched_domain *sd)
static void update_h_load(long cpu)
{
- if (root_task_group_empty())
- return;
-
walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
}
@@ -1854,8 +1841,8 @@ static void dec_nr_running(struct rq *rq)
static void set_load_weight(struct task_struct *p)
{
if (task_has_rt_policy(p)) {
- p->se.load.weight = prio_to_weight[0] * 2;
- p->se.load.inv_weight = prio_to_wmult[0] >> 1;
+ p->se.load.weight = 0;
+ p->se.load.inv_weight = WMULT_CONST;
return;
}
@@ -2507,7 +2494,16 @@ void sched_fork(struct task_struct *p, int clone_flags)
if (p->sched_class->task_fork)
p->sched_class->task_fork(p);
+ /*
+ * The child is not yet in the pid-hash so no cgroup attach races,
+ * and the cgroup is pinned to this child due to cgroup_fork()
+ * is ran before sched_fork().
+ *
+ * Silence PROVE_RCU.
+ */
+ rcu_read_lock();
set_task_cpu(p, cpu);
+ rcu_read_unlock();
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
if (likely(sched_info_on()))
@@ -2877,9 +2873,9 @@ unsigned long nr_iowait(void)
return sum;
}
-unsigned long nr_iowait_cpu(void)
+unsigned long nr_iowait_cpu(int cpu)
{
- struct rq *this = this_rq();
+ struct rq *this = cpu_rq(cpu);
return atomic_read(&this->nr_iowait);
}
@@ -4478,16 +4474,6 @@ recheck:
}
if (user) {
-#ifdef CONFIG_RT_GROUP_SCHED
- /*
- * Do not allow realtime tasks into groups that have no runtime
- * assigned.
- */
- if (rt_bandwidth_enabled() && rt_policy(policy) &&
- task_group(p)->rt_bandwidth.rt_runtime == 0)
- return -EPERM;
-#endif
-
retval = security_task_setscheduler(p, policy, param);
if (retval)
return retval;
@@ -4503,6 +4489,22 @@ recheck:
* runqueue lock must be held.
*/
rq = __task_rq_lock(p);
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ if (user) {
+ /*
+ * Do not allow realtime tasks into groups that have no runtime
+ * assigned.
+ */
+ if (rt_bandwidth_enabled() && rt_policy(policy) &&
+ task_group(p)->rt_bandwidth.rt_runtime == 0) {
+ __task_rq_unlock(rq);
+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+ return -EPERM;
+ }
+ }
+#endif
+
/* recheck policy now with rq lock held */
if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
policy = oldpolicy = -1;
@@ -7605,6 +7607,7 @@ void __init sched_init(void)
#ifdef CONFIG_SMP
rq->sd = NULL;
rq->rd = NULL;
+ rq->cpu_power = SCHED_LOAD_SCALE;
rq->post_schedule = 0;
rq->active_balance = 0;
rq->next_balance = jiffies;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 217e4a9393e4..a878b5332daa 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1225,7 +1225,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
unsigned long this_load, load;
int idx, this_cpu, prev_cpu;
unsigned long tl_per_task;
- unsigned int imbalance;
struct task_group *tg;
unsigned long weight;
int balanced;
@@ -1241,6 +1240,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
* effect of the currently running task from the load
* of the current CPU:
*/
+ rcu_read_lock();
if (sync) {
tg = task_group(current);
weight = current->se.load.weight;
@@ -1252,8 +1252,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
tg = task_group(p);
weight = p->se.load.weight;
- imbalance = 100 + (sd->imbalance_pct - 100) / 2;
-
/*
* In low-load situations, where prev_cpu is idle and this_cpu is idle
* due to the sync cause above having dropped this_load to 0, we'll
@@ -1263,9 +1261,22 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
* Otherwise check if either cpus are near enough in load to allow this
* task to be woken on this_cpu.
*/
- balanced = !this_load ||
- 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <=
- imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
+ if (this_load) {
+ unsigned long this_eff_load, prev_eff_load;
+
+ this_eff_load = 100;
+ this_eff_load *= power_of(prev_cpu);
+ this_eff_load *= this_load +
+ effective_load(tg, this_cpu, weight, weight);
+
+ prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
+ prev_eff_load *= power_of(this_cpu);
+ prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight);
+
+ balanced = this_eff_load <= prev_eff_load;
+ } else
+ balanced = true;
+ rcu_read_unlock();
/*
* If the currently running task will sleep within
@@ -2298,6 +2309,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
if (!power)
power = 1;
+ cpu_rq(cpu)->cpu_power = power;
sdg->cpu_power = power;
}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 825e1126008f..07b4f1b1a73a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -850,7 +850,7 @@ static __init int spawn_ksoftirqd(void)
void *cpu = (void *)(long)smp_processor_id();
int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
- BUG_ON(err == NOTIFY_BAD);
+ BUG_ON(err != NOTIFY_OK);
cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
register_cpu_notifier(&cpu_nfb);
return 0;
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index b4e7431e7c78..70f8d90331e9 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -321,7 +321,7 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
- case CPU_DEAD:
+ case CPU_POST_DEAD:
{
struct cpu_stop_work *work;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 997080f00e0b..d24f761f4876 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1471,12 +1471,12 @@ static struct ctl_table fs_table[] = {
},
#endif
{
- .procname = "pipe-max-pages",
- .data = &pipe_max_pages,
+ .procname = "pipe-max-size",
+ .data = &pipe_max_size,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .extra1 = &two,
+ .proc_handler = &pipe_proc_fn,
+ .extra1 = &pipe_min_size,
},
/*
* NOTE: do not add new entries to this table unless you have read
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1d7b9bc1c034..813993b5fb61 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -154,14 +154,14 @@ static void tick_nohz_update_jiffies(ktime_t now)
* Updates the per cpu time idle statistics counters
*/
static void
-update_ts_time_stats(struct tick_sched *ts, ktime_t now, u64 *last_update_time)
+update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
{
ktime_t delta;
if (ts->idle_active) {
delta = ktime_sub(now, ts->idle_entrytime);
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
- if (nr_iowait_cpu() > 0)
+ if (nr_iowait_cpu(cpu) > 0)
ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
ts->idle_entrytime = now;
}
@@ -175,19 +175,19 @@ static void tick_nohz_stop_idle(int cpu, ktime_t now)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
- update_ts_time_stats(ts, now, NULL);
+ update_ts_time_stats(cpu, ts, now, NULL);
ts->idle_active = 0;
sched_clock_idle_wakeup_event(0);
}
-static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
+static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts)
{
ktime_t now;
now = ktime_get();
- update_ts_time_stats(ts, now, NULL);
+ update_ts_time_stats(cpu, ts, now, NULL);
ts->idle_entrytime = now;
ts->idle_active = 1;
@@ -216,7 +216,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
if (!tick_nohz_enabled)
return -1;
- update_ts_time_stats(ts, ktime_get(), last_update_time);
+ update_ts_time_stats(cpu, ts, ktime_get(), last_update_time);
return ktime_to_us(ts->idle_sleeptime);
}
@@ -242,7 +242,7 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
if (!tick_nohz_enabled)
return -1;
- update_ts_time_stats(ts, ktime_get(), last_update_time);
+ update_ts_time_stats(cpu, ts, ktime_get(), last_update_time);
return ktime_to_us(ts->iowait_sleeptime);
}
@@ -284,7 +284,7 @@ void tick_nohz_stop_sched_tick(int inidle)
*/
ts->inidle = 1;
- now = tick_nohz_start_idle(ts);
+ now = tick_nohz_start_idle(cpu, ts);
/*
* If this cpu is offline and it is the one which updates
@@ -315,9 +315,6 @@ void tick_nohz_stop_sched_tick(int inidle)
goto end;
}
- if (nohz_ratelimit(cpu))
- goto end;
-
ts->idle_calls++;
/* Read jiffies and the time when jiffies were updated last */
do {
@@ -328,7 +325,7 @@ void tick_nohz_stop_sched_tick(int inidle)
} while (read_seqretry(&xtime_lock, seq));
if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
- arch_needs_cpu(cpu)) {
+ arch_needs_cpu(cpu) || nohz_ratelimit(cpu)) {
next_jiffies = last_jiffies + 1;
delta_jiffies = 1;
} else {
diff --git a/kernel/timer.c b/kernel/timer.c
index 2454172a80d3..ee305c8d4e18 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1717,7 +1717,7 @@ void __init init_timers(void)
init_timer_stats();
- BUG_ON(err == NOTIFY_BAD);
+ BUG_ON(err != NOTIFY_OK);
register_cpu_notifier(&timers_nb);
open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
}
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 36ea2b65dcdc..638711c17504 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -842,6 +842,7 @@ static void blk_add_trace_split(void *ignore,
/**
* blk_add_trace_remap - Add a trace for a remap operation
+ * @ignore: trace callback data parameter (not used)
* @q: queue the io is for
* @bio: the source bio
* @dev: target device
@@ -873,6 +874,7 @@ static void blk_add_trace_remap(void *ignore,
/**
* blk_add_trace_rq_remap - Add a trace for a request-remap operation
+ * @ignore: trace callback data parameter (not used)
* @q: queue the io is for
* @rq: the source request
* @dev: target device
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index cb6f365016e4..8a2b73f7c068 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -96,7 +96,9 @@ int perf_trace_init(struct perf_event *p_event)
mutex_lock(&event_mutex);
list_for_each_entry(tp_event, &ftrace_events, list) {
if (tp_event->event.type == event_id &&
- tp_event->class && tp_event->class->perf_probe &&
+ tp_event->class &&
+ (tp_event->class->perf_probe ||
+ tp_event->class->reg) &&
try_module_get(tp_event->mod)) {
ret = perf_trace_event_init(tp_event, p_event);
break;
@@ -116,7 +118,7 @@ int perf_trace_enable(struct perf_event *p_event)
if (WARN_ON_ONCE(!list))
return -EINVAL;
- list = per_cpu_ptr(list, smp_processor_id());
+ list = this_cpu_ptr(list);
hlist_add_head_rcu(&p_event->hlist_entry, list);
return 0;
@@ -132,8 +134,9 @@ void perf_trace_destroy(struct perf_event *p_event)
struct ftrace_event_call *tp_event = p_event->tp_event;
int i;
+ mutex_lock(&event_mutex);
if (--tp_event->perf_refcount > 0)
- return;
+ goto out;
if (tp_event->class->reg)
tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
@@ -142,6 +145,12 @@ void perf_trace_destroy(struct perf_event *p_event)
tp_event->class->perf_probe,
tp_event);
+ /*
+ * Ensure our callback won't be called anymore. See
+ * tracepoint_probe_unregister() and __DO_TRACE().
+ */
+ synchronize_sched();
+
free_percpu(tp_event->perf_events);
tp_event->perf_events = NULL;
@@ -151,6 +160,8 @@ void perf_trace_destroy(struct perf_event *p_event)
perf_trace_buf[i] = NULL;
}
}
+out:
+ mutex_unlock(&event_mutex);
}
__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
@@ -169,7 +180,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
if (*rctxp < 0)
return NULL;
- raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id());
+ raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
/* zero the dead bytes from align to not leak stack to user */
memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index faf7cefd15da..f52b5f50299d 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1359,7 +1359,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
for (i = 0; i < tp->nr_args; i++)
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
- head = per_cpu_ptr(call->perf_events, smp_processor_id());
+ head = this_cpu_ptr(call->perf_events);
perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
}
@@ -1392,7 +1392,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
for (i = 0; i < tp->nr_args; i++)
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
- head = per_cpu_ptr(call->perf_events, smp_processor_id());
+ head = this_cpu_ptr(call->perf_events);
perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index d2c859cec9ea..34e35804304b 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -519,7 +519,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args);
- head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id());
+ head = this_cpu_ptr(sys_data->enter_event->perf_events);
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
}
@@ -595,7 +595,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);
- head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id());
+ head = this_cpu_ptr(sys_data->exit_event->perf_events);
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
}