aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
authorGleb Natapov <gleb@redhat.com>2011-11-27 17:59:09 +0200
committerIngo Molnar <mingo@elte.hu>2011-12-06 08:34:02 +0100
commitb202952075f62603bea9bfb6ebc6b0420db11949 (patch)
tree9c8e0538b455e68b5c371caba5b1585ed0ef9d8a /kernel/events
parentperf: Fix enable_on_exec for sibling events (diff)
downloadlinux-dev-b202952075f62603bea9bfb6ebc6b0420db11949.tar.xz
linux-dev-b202952075f62603bea9bfb6ebc6b0420db11949.zip
perf, core: Rate limit perf_sched_events jump_label patching
jump_lable patching is very expensive operation that involves pausing all cpus. The patching of perf_sched_events jump_label is easily controllable from userspace by unprivileged user. When te user runs a loop like this: "while true; do perf stat -e cycles true; done" ... the performance of my test application that just increments a counter for one second drops by 4%. This is on a 16 cpu box with my test application using only one of them. An impact on a real server doing real work will be worse. Performance of KVM PMU drops nearly 50% due to jump_lable for "perf record" since KVM PMU implementation creates and destroys perf event frequently. This patch introduces a way to rate limit jump_label patching and uses it to fix the above problem. I believe that as jump_label use will spread the problem will become more common and thus solving it in a generic code is appropriate. Also fixing it in the perf code would result in moving jump_label accounting logic to perf code with all the ifdefs in case of JUMP_LABEL=n kernel. With this patch all details are nicely hidden inside jump_label code. Signed-off-by: Gleb Natapov <gleb@redhat.com> Acked-by: Jason Baron <jbaron@redhat.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20111127155909.GO2557@redhat.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c13
1 files changed, 8 insertions, 5 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3c1541d7a53d..3a3b1a18f490 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -128,7 +128,7 @@ enum event_type_t {
* perf_sched_events : >0 events exist
* perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
*/
-struct jump_label_key perf_sched_events __read_mostly;
+struct jump_label_key_deferred perf_sched_events __read_mostly;
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static atomic_t nr_mmap_events __read_mostly;
@@ -2748,7 +2748,7 @@ static void free_event(struct perf_event *event)
if (!event->parent) {
if (event->attach_state & PERF_ATTACH_TASK)
- jump_label_dec(&perf_sched_events);
+ jump_label_dec_deferred(&perf_sched_events);
if (event->attr.mmap || event->attr.mmap_data)
atomic_dec(&nr_mmap_events);
if (event->attr.comm)
@@ -2759,7 +2759,7 @@ static void free_event(struct perf_event *event)
put_callchain_buffers();
if (is_cgroup_event(event)) {
atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
- jump_label_dec(&perf_sched_events);
+ jump_label_dec_deferred(&perf_sched_events);
}
}
@@ -5784,7 +5784,7 @@ done:
if (!event->parent) {
if (event->attach_state & PERF_ATTACH_TASK)
- jump_label_inc(&perf_sched_events);
+ jump_label_inc(&perf_sched_events.key);
if (event->attr.mmap || event->attr.mmap_data)
atomic_inc(&nr_mmap_events);
if (event->attr.comm)
@@ -6022,7 +6022,7 @@ SYSCALL_DEFINE5(perf_event_open,
* - that may need work on context switch
*/
atomic_inc(&per_cpu(perf_cgroup_events, event->cpu));
- jump_label_inc(&perf_sched_events);
+ jump_label_inc(&perf_sched_events.key);
}
/*
@@ -6868,6 +6868,9 @@ void __init perf_event_init(void)
ret = init_hw_breakpoint();
WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
+
+ /* do not patch jump label more than once per second */
+ jump_label_rate_limit(&perf_sched_events, HZ);
}
static int __init perf_event_sysfs_init(void)