From d6cbf35dac8a3dadb9103379820c96d7c85df3d9 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 14 May 2013 19:44:20 +0800 Subject: cgroup: initialize xattr before calling d_instantiate() cgroup_create_file() calls d_instantiate(), which may decide to look at the xattrs on the file. Smack always does this and SELinux can be configured to do so. But cgroup_add_file() didn't initialize xattrs before calling cgroup_create_file(), which finally leads to dereferencing NULL dentry->d_fsdata. This bug has been there since cgroup xattr was introduced. Cc: # 3.8.x Reported-by: Ivan Bulatovic Reported-by: Casey Schaufler Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cgroup.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2a9926275f80..38b136553044 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2699,13 +2699,14 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, goto out; } + cfe->type = (void *)cft; + cfe->dentry = dentry; + dentry->d_fsdata = cfe; + simple_xattrs_init(&cfe->xattrs); + mode = cgroup_file_mode(cft); error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb); if (!error) { - cfe->type = (void *)cft; - cfe->dentry = dentry; - dentry->d_fsdata = cfe; - simple_xattrs_init(&cfe->xattrs); list_add_tail(&cfe->node, &parent->files); cfe = NULL; } -- cgit v1.3-14-g43fede From 6ed0106667d76589cb648c27edb4f4ffbf9d59ca Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 16 May 2013 20:48:49 +0900 Subject: tracing: Return -EBUSY when event_enable_func() fails to get module Since try_module_get() returns false( = 0) when it fails to pindown a module, event_enable_func() returns 0 which means "succeed". This can cause a kernel panic when the entry is removed, because the event is already released. This fixes the bug by returning -EBUSY, because the reason why it fails is that the module is being removed at that time. Link: http://lkml.kernel.org/r/20130516114848.13508.97899.stgit@mhiramat-M0-7522 Cc: Srikar Dronamraju Cc: Oleg Nesterov Cc: Tom Zanussi Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 7a0cf68027cc..27963e2bf4bf 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2072,8 +2072,10 @@ event_enable_func(struct ftrace_hash *hash, out_reg: /* Don't let event modules unload while probe registered */ ret = try_module_get(file->event_call->mod); - if (!ret) + if (!ret) { + ret = -EBUSY; goto out_free; + } ret = __ftrace_event_enable_disable(file, 1, 1); if (ret < 0) -- cgit v1.3-14-g43fede From fbe06b7bae7c9cf6ab05168fce5ee93b2f4bae7c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 17 May 2013 11:49:10 -0700 Subject: x86, range: fix missing merge during add range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Christian found v3.9 does not work with E350 with EFI is enabled. [ 1.658832] Trying to unpack rootfs image as initramfs... [ 1.679935] BUG: unable to handle kernel paging request at ffff88006e3fd000 [ 1.686940] IP: [] memset+0x1f/0xb0 [ 1.692010] PGD 1f77067 PUD 1f7a067 PMD 61420067 PTE 0 but early memtest report all memory could be accessed without problem. early page table is set in following sequence: [ 0.000000] init_memory_mapping: [mem 0x00000000-0x000fffff] [ 0.000000] init_memory_mapping: [mem 0x6e600000-0x6e7fffff] [ 0.000000] init_memory_mapping: [mem 0x6c000000-0x6e5fffff] [ 0.000000] init_memory_mapping: [mem 0x00100000-0x6bffffff] [ 0.000000] init_memory_mapping: [mem 0x6e800000-0x6ea07fff] but later efi_enter_virtual_mode try set mapping again wrongly. [ 0.010644] pid_max: default: 32768 minimum: 301 [ 0.015302] init_memory_mapping: [mem 0x640c5000-0x6e3fcfff] that means it fails with pfn_range_is_mapped. It turns out that we have a bug in add_range_with_merge and it does not merge range properly when new add one fill the hole between two exsiting ranges. In the case when [mem 0x00100000-0x6bffffff] is the hole between [mem 0x00000000-0x000fffff] and [mem 0x6c000000-0x6e7fffff]. Fix the add_range_with_merge by calling itself recursively. Reported-by: "Christian König" Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/CAE9FiQVofGoSk7q5-0irjkBxemqK729cND4hov-1QCBJDhxpgQ@mail.gmail.com Cc: v3.9 Signed-off-by: H. Peter Anvin --- kernel/range.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/range.c b/kernel/range.c index 071b0ab455cb..eb911dbce267 100644 --- a/kernel/range.c +++ b/kernel/range.c @@ -48,9 +48,11 @@ int add_range_with_merge(struct range *range, int az, int nr_range, final_start = min(range[i].start, start); final_end = max(range[i].end, end); - range[i].start = final_start; - range[i].end = final_end; - return nr_range; + /* clear it and add it back for further merge */ + range[i].start = 0; + range[i].end = 0; + return add_range_with_merge(range, az, nr_range, + final_start, final_end); } /* Need to add it: */ -- cgit v1.3-14-g43fede From ca1643186d3dce6171d8f171e516b02496360a9e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 23 May 2013 11:51:10 -0400 Subject: tracing: Fix crash when ftrace=nop on the kernel command line If ftrace= is on the kernel command line, when that tracer is registered, it will be initiated by tracing_set_tracer() to execute that tracer. The nop tracer is just a stub tracer that is used to have no tracer enabled. It is assigned at early bootup as it is the default tracer. But if ftrace=nop is on the kernel command line, the registering of the nop tracer will call tracing_set_tracer() which will try to execute the nop tracer. But it expects tr->current_trace to be assigned something as it usually is assigned to the nop tracer. As it hasn't been assigned to anything yet, it causes the system to crash. The simple fix is to move the tr->current_trace = nop before registering the nop tracer. The functionality is still the same as the nop tracer doesn't do anything anyway. Reported-by: Peter Zijlstra Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ae6fa2d1cdf7..4d79485b3237 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6216,10 +6216,15 @@ __init static int tracer_alloc_buffers(void) trace_init_cmdlines(); - register_tracer(&nop_trace); - + /* + * register_tracer() might reference current_trace, so it + * needs to be set before we register anything. This is + * just a bootstrap of current_trace anyway. + */ global_trace.current_trace = &nop_trace; + register_tracer(&nop_trace); + /* All seems OK, enable tracing */ tracing_disabled = 0; -- cgit v1.3-14-g43fede From 7805d000db30a3787a4c969bab6ae4d8a5fd8ce6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 May 2013 10:50:24 +0900 Subject: cgroup: fix a subtle bug in descendant pre-order walk When cgroup_next_descendant_pre() initiates a walk, it checks whether the subtree root doesn't have any children and if not returns NULL. Later code assumes that the subtree isn't empty. This is broken because the subtree may become empty inbetween, which can lead to the traversal escaping the subtree by walking to the sibling of the subtree root. There's no reason to have the early exit path. Remove it along with the later assumption that the subtree isn't empty. This simplifies the code a bit and fixes the subtle bug. While at it, fix the comment of cgroup_for_each_descendant_pre() which was incorrectly referring to ->css_offline() instead of ->css_online(). Signed-off-by: Tejun Heo Reviewed-by: Michal Hocko Cc: stable@vger.kernel.org --- include/linux/cgroup.h | 2 +- kernel/cgroup.c | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 5047355b9a0f..8bda1294c035 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -707,7 +707,7 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos); * * If a subsystem synchronizes against the parent in its ->css_online() and * before starting iterating, and synchronizes against @pos on each - * iteration, any descendant cgroup which finished ->css_offline() is + * iteration, any descendant cgroup which finished ->css_online() is * guaranteed to be visible in the future iterations. * * In other words, the following guarantees that a descendant can't escape diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 38b136553044..31e9ef319070 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2954,11 +2954,8 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, WARN_ON_ONCE(!rcu_read_lock_held()); /* if first iteration, pretend we just visited @cgroup */ - if (!pos) { - if (list_empty(&cgroup->children)) - return NULL; + if (!pos) pos = cgroup; - } /* visit the first child if exists */ next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling); @@ -2966,14 +2963,14 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, return next; /* no child, visit my or the closest ancestor's next sibling */ - do { + while (pos != cgroup) { next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling); if (&next->sibling != &pos->parent->children) return next; pos = pos->parent; - } while (pos != cgroup); + } return NULL; } -- cgit v1.3-14-g43fede From 387b8b3e37cb1c257fb607787f73815c30d22859 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 24 May 2013 15:55:25 -0700 Subject: auditfilter.c: fix kernel-doc warnings Fix kernel-doc warnings in kernel/auditfilter.c: Warning(kernel/auditfilter.c:1029): Excess function parameter 'loginuid' description in 'audit_receive_filter' Warning(kernel/auditfilter.c:1029): Excess function parameter 'sessionid' description in 'audit_receive_filter' Warning(kernel/auditfilter.c:1029): Excess function parameter 'sid' description in 'audit_receive_filter' Signed-off-by: Randy Dunlap Cc: Eric Paris Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/auditfilter.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel') diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 83a2970295d1..6bd4a90d1991 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1021,9 +1021,6 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re * @seq: netlink audit message sequence (serial) number * @data: payload data * @datasz: size of payload data - * @loginuid: loginuid of sender - * @sessionid: sessionid for netlink audit message - * @sid: SE Linux Security ID of sender */ int audit_receive_filter(int type, int pid, int seq, void *data, size_t datasz) { -- cgit v1.3-14-g43fede From 6721cb60022629ae76365551f05d9658b8d14c55 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 23 May 2013 14:21:36 -0400 Subject: ring-buffer: Do not poll non allocated cpu buffers The tracing infrastructure sets up for possible CPUs, but it uses the ring buffer polling, it is possible to call the ring buffer polling code with a CPU that hasn't been allocated. This will cause a kernel oops when it access a ring buffer cpu buffer that is part of the possible cpus but hasn't been allocated yet as the CPU has never been online. Reported-by: Mauro Carvalho Chehab Tested-by: Mauro Carvalho Chehab Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b59aea2c48c2..e444ff88f0a4 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -620,6 +620,9 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, if (cpu == RING_BUFFER_ALL_CPUS) work = &buffer->irq_work; else { + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return -EINVAL; + cpu_buffer = buffer->buffers[cpu]; work = &cpu_buffer->irq_work; } -- cgit v1.3-14-g43fede From 2a0ff3fbe39bc93f719ff857e5a359d9780579ff Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Sun, 26 May 2013 21:33:09 +0800 Subject: cgroup: warn about mismatching options of a new mount of an existing hierarchy With the new __DEVEL__sane_behavior mount option was introduced, if the root cgroup is alive with no xattr function, to mount a new cgroup with xattr will be rejected in terms of design which just fine. However, if the root cgroup does not mounted with __DEVEL__sane_hehavior, to create a new cgroup with xattr option will succeed although after that the EA function does not works as expected but will get ENOTSUPP for setting up attributes under either cgroup. e.g. setfattr: /cgroup2/test: Operation not supported Instead of keeping silence in this case, it's better to drop a log entry in warning level. That would be helpful to understand the reason behind the scene from the user's perspective, and this is essentially an improvement does not break the backward compatibilities. With this fix, above mount attemption will keep up works as usual but the following line cound be found at the system log: [ ...] cgroup: new mount options do not match the existing superblock tj: minor formatting / message updates. Signed-off-by: Jie Liu Reported-by: Alexey Kodanev Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- kernel/cgroup.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 31e9ef319070..a7c9e6ddb979 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1686,11 +1686,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, */ cgroup_drop_root(opts.new_root); - if (((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) && - root->flags != opts.flags) { - pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n"); - ret = -EINVAL; - goto drop_new_super; + if (root->flags != opts.flags) { + if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) { + pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n"); + ret = -EINVAL; + goto drop_new_super; + } else { + pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n"); + } } /* no subsys rebinding, so refcounts don't change */ -- cgit v1.3-14-g43fede