aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events/core.c')
-rw-r--r--kernel/events/core.c374
1 files changed, 320 insertions, 54 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3cd13a30f732..5f59d848171e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Performance events core code:
*
@@ -5,8 +6,6 @@
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
- *
- * For licensing details see kernel-base/COPYING
*/
#include <linux/fs.h>
@@ -385,6 +384,8 @@ static atomic_t nr_namespaces_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
+static atomic_t nr_ksymbol_events __read_mostly;
+static atomic_t nr_bpf_events __read_mostly;
static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
@@ -436,18 +437,18 @@ int perf_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
- int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-
- if (ret || !write)
- return ret;
-
+ int ret;
+ int perf_cpu = sysctl_perf_cpu_time_max_percent;
/*
* If throttling is disabled don't allow the write:
*/
- if (sysctl_perf_cpu_time_max_percent == 100 ||
- sysctl_perf_cpu_time_max_percent == 0)
+ if (write && (perf_cpu == 100 || perf_cpu == 0))
return -EINVAL;
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (ret || !write)
+ return ret;
+
max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
update_perf_cpu_limits();
@@ -1171,7 +1172,7 @@ static void perf_event_ctx_deactivate(struct perf_event_context *ctx)
static void get_ctx(struct perf_event_context *ctx)
{
- WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
+ refcount_inc(&ctx->refcount);
}
static void free_ctx(struct rcu_head *head)
@@ -1185,7 +1186,7 @@ static void free_ctx(struct rcu_head *head)
static void put_ctx(struct perf_event_context *ctx)
{
- if (atomic_dec_and_test(&ctx->refcount)) {
+ if (refcount_dec_and_test(&ctx->refcount)) {
if (ctx->parent_ctx)
put_ctx(ctx->parent_ctx);
if (ctx->task && ctx->task != TASK_TOMBSTONE)
@@ -1254,6 +1255,7 @@ static void put_ctx(struct perf_event_context *ctx)
* perf_event_context::lock
* perf_event::mmap_mutex
* mmap_sem
+ * perf_addr_filters_head::lock
*
* cpu_hotplug_lock
* pmus_lock
@@ -1267,7 +1269,7 @@ perf_event_ctx_lock_nested(struct perf_event *event, int nesting)
again:
rcu_read_lock();
ctx = READ_ONCE(event->ctx);
- if (!atomic_inc_not_zero(&ctx->refcount)) {
+ if (!refcount_inc_not_zero(&ctx->refcount)) {
rcu_read_unlock();
goto again;
}
@@ -1400,7 +1402,7 @@ retry:
}
if (ctx->task == TASK_TOMBSTONE ||
- !atomic_inc_not_zero(&ctx->refcount)) {
+ !refcount_inc_not_zero(&ctx->refcount)) {
raw_spin_unlock(&ctx->lock);
ctx = NULL;
} else {
@@ -2797,7 +2799,7 @@ static int perf_event_stop(struct perf_event *event, int restart)
*
* (p1) when userspace mappings change as a result of (1) or (2) or (3) below,
* we update the addresses of corresponding vmas in
- * event::addr_filters_offs array and bump the event::addr_filters_gen;
+ * event::addr_filter_ranges array and bump the event::addr_filters_gen;
* (p2) when an event is scheduled in (pmu::add), it calls
* perf_event_addr_filters_sync() which calls pmu::addr_filters_sync()
* if the generation has changed since the previous call.
@@ -4056,7 +4058,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
INIT_LIST_HEAD(&ctx->event_list);
INIT_LIST_HEAD(&ctx->pinned_active);
INIT_LIST_HEAD(&ctx->flexible_active);
- atomic_set(&ctx->refcount, 1);
+ refcount_set(&ctx->refcount, 1);
}
static struct perf_event_context *
@@ -4235,7 +4237,7 @@ static bool is_sb_event(struct perf_event *event)
if (attr->mmap || attr->mmap_data || attr->mmap2 ||
attr->comm || attr->comm_exec ||
- attr->task ||
+ attr->task || attr->ksymbol ||
attr->context_switch)
return true;
return false;
@@ -4305,6 +4307,10 @@ static void unaccount_event(struct perf_event *event)
dec = true;
if (has_branch_stack(event))
dec = true;
+ if (event->attr.ksymbol)
+ atomic_dec(&nr_ksymbol_events);
+ if (event->attr.bpf_event)
+ atomic_dec(&nr_bpf_events);
if (dec) {
if (!atomic_add_unless(&perf_sched_count, -1, 1))
@@ -4440,7 +4446,7 @@ static void _free_event(struct perf_event *event)
perf_event_free_bpf_prog(event);
perf_addr_filters_splice(event, NULL);
- kfree(event->addr_filters_offs);
+ kfree(event->addr_filter_ranges);
if (event->destroy)
event->destroy(event);
@@ -4963,6 +4969,11 @@ static void __perf_event_period(struct perf_event *event,
}
}
+static int perf_event_check_period(struct perf_event *event, u64 value)
+{
+ return event->pmu->check_period(event, value);
+}
+
static int perf_event_period(struct perf_event *event, u64 __user *arg)
{
u64 value;
@@ -4979,6 +4990,9 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
if (event->attr.freq && value > sysctl_perf_event_sample_rate)
return -EINVAL;
+ if (perf_event_check_period(event, value))
+ return -EINVAL;
+
event_function_call(event, __perf_event_period, &value);
return 0;
@@ -5388,7 +5402,7 @@ struct ring_buffer *ring_buffer_get(struct perf_event *event)
rcu_read_lock();
rb = rcu_dereference(event->rb);
if (rb) {
- if (!atomic_inc_not_zero(&rb->refcount))
+ if (!refcount_inc_not_zero(&rb->refcount))
rb = NULL;
}
rcu_read_unlock();
@@ -5398,7 +5412,7 @@ struct ring_buffer *ring_buffer_get(struct perf_event *event)
void ring_buffer_put(struct ring_buffer *rb)
{
- if (!atomic_dec_and_test(&rb->refcount))
+ if (!refcount_dec_and_test(&rb->refcount))
return;
WARN_ON_ONCE(!list_empty(&rb->event_list));
@@ -5463,7 +5477,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
/* this has to be the last one */
rb_free_aux(rb);
- WARN_ON_ONCE(atomic_read(&rb->aux_refcount));
+ WARN_ON_ONCE(refcount_read(&rb->aux_refcount));
mutex_unlock(&event->mmap_mutex);
}
@@ -6489,7 +6503,7 @@ void perf_prepare_sample(struct perf_event_header *header,
data->phys_addr = perf_virt_to_phys(data->addr);
}
-static __always_inline void
+static __always_inline int
__perf_event_output(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs,
@@ -6499,13 +6513,15 @@ __perf_event_output(struct perf_event *event,
{
struct perf_output_handle handle;
struct perf_event_header header;
+ int err;
/* protect the callchain buffers */
rcu_read_lock();
perf_prepare_sample(&header, data, event, regs);
- if (output_begin(&handle, event, header.size))
+ err = output_begin(&handle, event, header.size);
+ if (err)
goto exit;
perf_output_sample(&handle, &header, data, event);
@@ -6514,6 +6530,7 @@ __perf_event_output(struct perf_event *event,
exit:
rcu_read_unlock();
+ return err;
}
void
@@ -6532,12 +6549,12 @@ perf_event_output_backward(struct perf_event *event,
__perf_event_output(event, data, regs, perf_output_begin_backward);
}
-void
+int
perf_event_output(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
- __perf_event_output(event, data, regs, perf_output_begin);
+ return __perf_event_output(event, data, regs, perf_output_begin);
}
/*
@@ -6678,7 +6695,8 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
raw_spin_lock_irqsave(&ifh->lock, flags);
list_for_each_entry(filter, &ifh->list, entry) {
if (filter->path.dentry) {
- event->addr_filters_offs[count] = 0;
+ event->addr_filter_ranges[count].start = 0;
+ event->addr_filter_ranges[count].size = 0;
restart++;
}
@@ -7358,28 +7376,47 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter,
return true;
}
+static bool perf_addr_filter_vma_adjust(struct perf_addr_filter *filter,
+ struct vm_area_struct *vma,
+ struct perf_addr_filter_range *fr)
+{
+ unsigned long vma_size = vma->vm_end - vma->vm_start;
+ unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
+ struct file *file = vma->vm_file;
+
+ if (!perf_addr_filter_match(filter, file, off, vma_size))
+ return false;
+
+ if (filter->offset < off) {
+ fr->start = vma->vm_start;
+ fr->size = min(vma_size, filter->size - (off - filter->offset));
+ } else {
+ fr->start = vma->vm_start + filter->offset - off;
+ fr->size = min(vma->vm_end - fr->start, filter->size);
+ }
+
+ return true;
+}
+
static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
{
struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
struct vm_area_struct *vma = data;
- unsigned long off = vma->vm_pgoff << PAGE_SHIFT, flags;
- struct file *file = vma->vm_file;
struct perf_addr_filter *filter;
unsigned int restart = 0, count = 0;
+ unsigned long flags;
if (!has_addr_filter(event))
return;
- if (!file)
+ if (!vma->vm_file)
return;
raw_spin_lock_irqsave(&ifh->lock, flags);
list_for_each_entry(filter, &ifh->list, entry) {
- if (perf_addr_filter_match(filter, file, off,
- vma->vm_end - vma->vm_start)) {
- event->addr_filters_offs[count] = vma->vm_start;
+ if (perf_addr_filter_vma_adjust(filter, vma,
+ &event->addr_filter_ranges[count]))
restart++;
- }
count++;
}
@@ -7650,6 +7687,207 @@ static void perf_log_throttle(struct perf_event *event, int enable)
perf_output_end(&handle);
}
+/*
+ * ksymbol register/unregister tracking
+ */
+
+struct perf_ksymbol_event {
+ const char *name;
+ int name_len;
+ struct {
+ struct perf_event_header header;
+ u64 addr;
+ u32 len;
+ u16 ksym_type;
+ u16 flags;
+ } event_id;
+};
+
+static int perf_event_ksymbol_match(struct perf_event *event)
+{
+ return event->attr.ksymbol;
+}
+
+static void perf_event_ksymbol_output(struct perf_event *event, void *data)
+{
+ struct perf_ksymbol_event *ksymbol_event = data;
+ struct perf_output_handle handle;
+ struct perf_sample_data sample;
+ int ret;
+
+ if (!perf_event_ksymbol_match(event))
+ return;
+
+ perf_event_header__init_id(&ksymbol_event->event_id.header,
+ &sample, event);
+ ret = perf_output_begin(&handle, event,
+ ksymbol_event->event_id.header.size);
+ if (ret)
+ return;
+
+ perf_output_put(&handle, ksymbol_event->event_id);
+ __output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len);
+ perf_event__output_id_sample(event, &handle, &sample);
+
+ perf_output_end(&handle);
+}
+
+void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
+ const char *sym)
+{
+ struct perf_ksymbol_event ksymbol_event;
+ char name[KSYM_NAME_LEN];
+ u16 flags = 0;
+ int name_len;
+
+ if (!atomic_read(&nr_ksymbol_events))
+ return;
+
+ if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX ||
+ ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN)
+ goto err;
+
+ strlcpy(name, sym, KSYM_NAME_LEN);
+ name_len = strlen(name) + 1;
+ while (!IS_ALIGNED(name_len, sizeof(u64)))
+ name[name_len++] = '\0';
+ BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64));
+
+ if (unregister)
+ flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER;
+
+ ksymbol_event = (struct perf_ksymbol_event){
+ .name = name,
+ .name_len = name_len,
+ .event_id = {
+ .header = {
+ .type = PERF_RECORD_KSYMBOL,
+ .size = sizeof(ksymbol_event.event_id) +
+ name_len,
+ },
+ .addr = addr,
+ .len = len,
+ .ksym_type = ksym_type,
+ .flags = flags,
+ },
+ };
+
+ perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL);
+ return;
+err:
+ WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
+}
+
+/*
+ * bpf program load/unload tracking
+ */
+
+struct perf_bpf_event {
+ struct bpf_prog *prog;
+ struct {
+ struct perf_event_header header;
+ u16 type;
+ u16 flags;
+ u32 id;
+ u8 tag[BPF_TAG_SIZE];
+ } event_id;
+};
+
+static int perf_event_bpf_match(struct perf_event *event)
+{
+ return event->attr.bpf_event;
+}
+
+static void perf_event_bpf_output(struct perf_event *event, void *data)
+{
+ struct perf_bpf_event *bpf_event = data;
+ struct perf_output_handle handle;
+ struct perf_sample_data sample;
+ int ret;
+
+ if (!perf_event_bpf_match(event))
+ return;
+
+ perf_event_header__init_id(&bpf_event->event_id.header,
+ &sample, event);
+ ret = perf_output_begin(&handle, event,
+ bpf_event->event_id.header.size);
+ if (ret)
+ return;
+
+ perf_output_put(&handle, bpf_event->event_id);
+ perf_event__output_id_sample(event, &handle, &sample);
+
+ perf_output_end(&handle);
+}
+
+static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
+ enum perf_bpf_event_type type)
+{
+ bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
+ char sym[KSYM_NAME_LEN];
+ int i;
+
+ if (prog->aux->func_cnt == 0) {
+ bpf_get_prog_name(prog, sym);
+ perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
+ (u64)(unsigned long)prog->bpf_func,
+ prog->jited_len, unregister, sym);
+ } else {
+ for (i = 0; i < prog->aux->func_cnt; i++) {
+ struct bpf_prog *subprog = prog->aux->func[i];
+
+ bpf_get_prog_name(subprog, sym);
+ perf_event_ksymbol(
+ PERF_RECORD_KSYMBOL_TYPE_BPF,
+ (u64)(unsigned long)subprog->bpf_func,
+ subprog->jited_len, unregister, sym);
+ }
+ }
+}
+
+void perf_event_bpf_event(struct bpf_prog *prog,
+ enum perf_bpf_event_type type,
+ u16 flags)
+{
+ struct perf_bpf_event bpf_event;
+
+ if (type <= PERF_BPF_EVENT_UNKNOWN ||
+ type >= PERF_BPF_EVENT_MAX)
+ return;
+
+ switch (type) {
+ case PERF_BPF_EVENT_PROG_LOAD:
+ case PERF_BPF_EVENT_PROG_UNLOAD:
+ if (atomic_read(&nr_ksymbol_events))
+ perf_event_bpf_emit_ksymbols(prog, type);
+ break;
+ default:
+ break;
+ }
+
+ if (!atomic_read(&nr_bpf_events))
+ return;
+
+ bpf_event = (struct perf_bpf_event){
+ .prog = prog,
+ .event_id = {
+ .header = {
+ .type = PERF_RECORD_BPF_EVENT,
+ .size = sizeof(bpf_event.event_id),
+ },
+ .type = type,
+ .flags = flags,
+ .id = prog->aux->id,
+ },
+ };
+
+ BUILD_BUG_ON(BPF_TAG_SIZE % sizeof(u64));
+
+ memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE);
+ perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
+}
+
void perf_event_itrace_started(struct perf_event *event)
{
event->attach_state |= PERF_ATTACH_ITRACE;
@@ -8768,26 +9006,19 @@ static void perf_addr_filters_splice(struct perf_event *event,
* @filter; if so, adjust filter's address range.
* Called with mm::mmap_sem down for reading.
*/
-static unsigned long perf_addr_filter_apply(struct perf_addr_filter *filter,
- struct mm_struct *mm)
+static void perf_addr_filter_apply(struct perf_addr_filter *filter,
+ struct mm_struct *mm,
+ struct perf_addr_filter_range *fr)
{
struct vm_area_struct *vma;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
- struct file *file = vma->vm_file;
- unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
- unsigned long vma_size = vma->vm_end - vma->vm_start;
-
- if (!file)
- continue;
-
- if (!perf_addr_filter_match(filter, file, off, vma_size))
+ if (!vma->vm_file)
continue;
- return vma->vm_start;
+ if (perf_addr_filter_vma_adjust(filter, vma, fr))
+ return;
}
-
- return 0;
}
/*
@@ -8821,15 +9052,15 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
raw_spin_lock_irqsave(&ifh->lock, flags);
list_for_each_entry(filter, &ifh->list, entry) {
- event->addr_filters_offs[count] = 0;
+ event->addr_filter_ranges[count].start = 0;
+ event->addr_filter_ranges[count].size = 0;
/*
* Adjust base offset if the filter is associated to a binary
* that needs to be mapped:
*/
if (filter->path.dentry)
- event->addr_filters_offs[count] =
- perf_addr_filter_apply(filter, mm);
+ perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]);
count++;
}
@@ -9391,6 +9622,11 @@ static int perf_pmu_nop_int(struct pmu *pmu)
return 0;
}
+static int perf_event_nop_int(struct perf_event *event, u64 value)
+{
+ return 0;
+}
+
static DEFINE_PER_CPU(unsigned int, nop_txn_flags);
static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags)
@@ -9691,6 +9927,9 @@ got_cpu_context:
pmu->pmu_disable = perf_pmu_nop_void;
}
+ if (!pmu->check_period)
+ pmu->check_period = perf_event_nop_int;
+
if (!pmu->event_idx)
pmu->event_idx = perf_event_idx_default;
@@ -9772,6 +10011,15 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
if (ctx)
perf_event_ctx_unlock(event->group_leader, ctx);
+ if (!ret) {
+ if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE &&
+ event_has_any_exclude_flag(event)) {
+ if (event->destroy)
+ event->destroy(event);
+ ret = -EINVAL;
+ }
+ }
+
if (ret)
module_put(pmu->module);
@@ -9900,6 +10148,10 @@ static void account_event(struct perf_event *event)
inc = true;
if (is_cgroup_event(event))
inc = true;
+ if (event->attr.ksymbol)
+ atomic_inc(&nr_ksymbol_events);
+ if (event->attr.bpf_event)
+ atomic_inc(&nr_bpf_events);
if (inc) {
/*
@@ -10082,14 +10334,28 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
goto err_pmu;
if (has_addr_filter(event)) {
- event->addr_filters_offs = kcalloc(pmu->nr_addr_filters,
- sizeof(unsigned long),
- GFP_KERNEL);
- if (!event->addr_filters_offs) {
+ event->addr_filter_ranges = kcalloc(pmu->nr_addr_filters,
+ sizeof(struct perf_addr_filter_range),
+ GFP_KERNEL);
+ if (!event->addr_filter_ranges) {
err = -ENOMEM;
goto err_per_task;
}
+ /*
+ * Clone the parent's vma offsets: they are valid until exec()
+ * even if the mm is not shared with the parent.
+ */
+ if (event->parent) {
+ struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+
+ raw_spin_lock_irq(&ifh->lock);
+ memcpy(event->addr_filter_ranges,
+ event->parent->addr_filter_ranges,
+ pmu->nr_addr_filters * sizeof(struct perf_addr_filter_range));
+ raw_spin_unlock_irq(&ifh->lock);
+ }
+
/* force hw sync on the address filters */
event->addr_filters_gen = 1;
}
@@ -10108,7 +10374,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
return event;
err_addr_filters:
- kfree(event->addr_filters_offs);
+ kfree(event->addr_filter_ranges);
err_per_task:
exclusive_event_destroy(event);
@@ -10391,7 +10657,7 @@ __perf_event_ctx_lock_double(struct perf_event *group_leader,
again:
rcu_read_lock();
gctx = READ_ONCE(group_leader->ctx);
- if (!atomic_inc_not_zero(&gctx->refcount)) {
+ if (!refcount_inc_not_zero(&gctx->refcount)) {
rcu_read_unlock();
goto again;
}