aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events/core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 09:27:46 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 09:27:46 -0700
commit3871d93b82a4a6c1f4308064f046a544f16ada21 (patch)
tree99ef06b536798100cd7abb4753263505cc465215 /kernel/events/core.c
parentMerge tag 'sched-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (diff)
parentperf/hw_breakpoint: Annotate tsk->perf_event_mutex vs ctx->mutex (diff)
downloadlinux-dev-3871d93b82a4a6c1f4308064f046a544f16ada21.tar.xz
linux-dev-3871d93b82a4a6c1f4308064f046a544f16ada21.zip
Merge tag 'perf-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events updates from Ingo Molnar: "PMU driver updates: - Add AMD Last Branch Record Extension Version 2 (LbrExtV2) feature support for Zen 4 processors. - Extend the perf ABI to provide branch speculation information, if available, and use this on CPUs that have it (eg. LbrExtV2). - Improve Intel PEBS TSC timestamp handling & integration. - Add Intel Raptor Lake S CPU support. - Add 'perf mem' and 'perf c2c' memory profiling support on AMD CPUs by utilizing IBS tagged load/store samples. - Clean up & optimize various x86 PMU details. HW breakpoints: - Big rework to optimize the code for systems with hundreds of CPUs and thousands of breakpoints: - Replace the nr_bp_mutex global mutex with the bp_cpuinfo_sem per-CPU rwsem that is read-locked during most of the key operations. - Improve the O(#cpus * #tasks) logic in toggle_bp_slot() and fetch_bp_busy_slots(). - Apply micro-optimizations & cleanups. - Misc cleanups & enhancements" * tag 'perf-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (75 commits) perf/hw_breakpoint: Annotate tsk->perf_event_mutex vs ctx->mutex perf: Fix pmu_filter_match() perf: Fix lockdep_assert_event_ctx() perf/x86/amd/lbr: Adjust LBR regardless of filtering perf/x86/utils: Fix uninitialized var in get_branch_type() perf/uapi: Define PERF_MEM_SNOOPX_PEER in kernel header file perf/x86/amd: Support PERF_SAMPLE_PHY_ADDR perf/x86/amd: Support PERF_SAMPLE_ADDR perf/x86/amd: Support PERF_SAMPLE_{WEIGHT|WEIGHT_STRUCT} perf/x86/amd: Support PERF_SAMPLE_DATA_SRC perf/x86/amd: Add IBS OP_DATA2 DataSrc bit definitions perf/mem: Introduce PERF_MEM_LVLNUM_{EXTN_MEM|IO} perf/x86/uncore: Add new Raptor Lake S support perf/x86/cstate: Add new Raptor Lake S support perf/x86/msr: Add new Raptor Lake S support perf/x86: Add new Raptor Lake S support bpf: Check flags for branch stack in bpf_read_branch_records helper perf, hw_breakpoint: Fix use-after-free if perf_event_open() fails perf: Use sample_flags for raw_data perf: Use sample_flags for addr ...
Diffstat (limited to 'kernel/events/core.c')
-rw-r--r--kernel/events/core.c88
1 files changed, 59 insertions, 29 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ff4bffc502c6..43b0df997d13 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1468,6 +1468,8 @@ static void __update_context_time(struct perf_event_context *ctx, bool adv)
{
u64 now = perf_clock();
+ lockdep_assert_held(&ctx->lock);
+
if (adv)
ctx->time += now - ctx->timestamp;
ctx->timestamp = now;
@@ -2224,16 +2226,22 @@ static inline int __pmu_filter_match(struct perf_event *event)
static inline int pmu_filter_match(struct perf_event *event)
{
struct perf_event *sibling;
+ unsigned long flags;
+ int ret = 1;
if (!__pmu_filter_match(event))
return 0;
+ local_irq_save(flags);
for_each_sibling_event(sibling, event) {
- if (!__pmu_filter_match(sibling))
- return 0;
+ if (!__pmu_filter_match(sibling)) {
+ ret = 0;
+ break;
+ }
}
+ local_irq_restore(flags);
- return 1;
+ return ret;
}
static inline int
@@ -6794,11 +6802,10 @@ out_put:
static void __perf_event_header__init_id(struct perf_event_header *header,
struct perf_sample_data *data,
- struct perf_event *event)
+ struct perf_event *event,
+ u64 sample_type)
{
- u64 sample_type = event->attr.sample_type;
-
- data->type = sample_type;
+ data->type = event->attr.sample_type;
header->size += event->id_header_size;
if (sample_type & PERF_SAMPLE_TID) {
@@ -6827,7 +6834,7 @@ void perf_event_header__init_id(struct perf_event_header *header,
struct perf_event *event)
{
if (event->attr.sample_id_all)
- __perf_event_header__init_id(header, data, event);
+ __perf_event_header__init_id(header, data, event, event->attr.sample_type);
}
static void __perf_event__output_id_sample(struct perf_output_handle *handle,
@@ -6976,11 +6983,6 @@ static void perf_output_read(struct perf_output_handle *handle,
perf_output_read_one(handle, event, enabled, running);
}
-static inline bool perf_sample_save_hw_index(struct perf_event *event)
-{
- return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
-}
-
void perf_output_sample(struct perf_output_handle *handle,
struct perf_event_header *header,
struct perf_sample_data *data,
@@ -7062,14 +7064,14 @@ void perf_output_sample(struct perf_output_handle *handle,
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
- if (data->br_stack) {
+ if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) {
size_t size;
size = data->br_stack->nr
* sizeof(struct perf_branch_entry);
perf_output_put(handle, data->br_stack->nr);
- if (perf_sample_save_hw_index(event))
+ if (branch_sample_hw_index(event))
perf_output_put(handle, data->br_stack->hw_idx);
perf_output_copy(handle, data->br_stack->entries, size);
} else {
@@ -7312,6 +7314,7 @@ void perf_prepare_sample(struct perf_event_header *header,
struct pt_regs *regs)
{
u64 sample_type = event->attr.sample_type;
+ u64 filtered_sample_type;
header->type = PERF_RECORD_SAMPLE;
header->size = sizeof(*header) + event->header_size;
@@ -7319,7 +7322,12 @@ void perf_prepare_sample(struct perf_event_header *header,
header->misc = 0;
header->misc |= perf_misc_flags(regs);
- __perf_event_header__init_id(header, data, event);
+ /*
+ * Clear the sample flags that have already been done by the
+ * PMU driver.
+ */
+ filtered_sample_type = sample_type & ~data->sample_flags;
+ __perf_event_header__init_id(header, data, event, filtered_sample_type);
if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE))
data->ip = perf_instruction_pointer(regs);
@@ -7327,7 +7335,7 @@ void perf_prepare_sample(struct perf_event_header *header,
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
int size = 1;
- if (!(sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
+ if (filtered_sample_type & PERF_SAMPLE_CALLCHAIN)
data->callchain = perf_callchain(event, regs);
size += data->callchain->nr;
@@ -7339,7 +7347,7 @@ void perf_prepare_sample(struct perf_event_header *header,
struct perf_raw_record *raw = data->raw;
int size;
- if (raw) {
+ if (raw && (data->sample_flags & PERF_SAMPLE_RAW)) {
struct perf_raw_frag *frag = &raw->frag;
u32 sum = 0;
@@ -7355,6 +7363,7 @@ void perf_prepare_sample(struct perf_event_header *header,
frag->pad = raw->size - sum;
} else {
size = sizeof(u64);
+ data->raw = NULL;
}
header->size += size;
@@ -7362,8 +7371,8 @@ void perf_prepare_sample(struct perf_event_header *header,
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
int size = sizeof(u64); /* nr */
- if (data->br_stack) {
- if (perf_sample_save_hw_index(event))
+ if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) {
+ if (branch_sample_hw_index(event))
size += sizeof(u64);
size += data->br_stack->nr
@@ -7412,6 +7421,20 @@ void perf_prepare_sample(struct perf_event_header *header,
header->size += size;
}
+ if (filtered_sample_type & PERF_SAMPLE_WEIGHT_TYPE)
+ data->weight.full = 0;
+
+ if (filtered_sample_type & PERF_SAMPLE_DATA_SRC)
+ data->data_src.val = PERF_MEM_NA;
+
+ if (filtered_sample_type & PERF_SAMPLE_TRANSACTION)
+ data->txn = 0;
+
+ if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_DATA_PAGE_SIZE)) {
+ if (filtered_sample_type & PERF_SAMPLE_ADDR)
+ data->addr = 0;
+ }
+
if (sample_type & PERF_SAMPLE_REGS_INTR) {
/* regs dump ABI info */
int size = sizeof(u64);
@@ -7427,7 +7450,8 @@ void perf_prepare_sample(struct perf_event_header *header,
header->size += size;
}
- if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+ if (sample_type & PERF_SAMPLE_PHYS_ADDR &&
+ filtered_sample_type & PERF_SAMPLE_PHYS_ADDR)
data->phys_addr = perf_virt_to_phys(data->addr);
#ifdef CONFIG_CGROUP_PERF
@@ -9998,8 +10022,16 @@ static void bpf_overflow_handler(struct perf_event *event,
goto out;
rcu_read_lock();
prog = READ_ONCE(event->prog);
- if (prog)
+ if (prog) {
+ if (prog->call_get_stack &&
+ (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
+ !(data->sample_flags & PERF_SAMPLE_CALLCHAIN)) {
+ data->callchain = perf_callchain(event, regs);
+ data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
+ }
+
ret = bpf_prog_run(prog, &ctx);
+ }
rcu_read_unlock();
out:
__this_cpu_dec(bpf_prog_active);
@@ -10025,7 +10057,7 @@ static int perf_event_set_bpf_handler(struct perf_event *event,
if (event->attr.precise_ip &&
prog->call_get_stack &&
- (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY) ||
+ (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) ||
event->attr.exclude_callchain_kernel ||
event->attr.exclude_callchain_user)) {
/*
@@ -10942,7 +10974,7 @@ static ssize_t nr_addr_filters_show(struct device *dev,
{
struct pmu *pmu = dev_get_drvdata(dev);
- return snprintf(page, PAGE_SIZE - 1, "%d\n", pmu->nr_addr_filters);
+ return scnprintf(page, PAGE_SIZE - 1, "%d\n", pmu->nr_addr_filters);
}
DEVICE_ATTR_RO(nr_addr_filters);
@@ -10953,7 +10985,7 @@ type_show(struct device *dev, struct device_attribute *attr, char *page)
{
struct pmu *pmu = dev_get_drvdata(dev);
- return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
+ return scnprintf(page, PAGE_SIZE - 1, "%d\n", pmu->type);
}
static DEVICE_ATTR_RO(type);
@@ -10964,7 +10996,7 @@ perf_event_mux_interval_ms_show(struct device *dev,
{
struct pmu *pmu = dev_get_drvdata(dev);
- return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms);
+ return scnprintf(page, PAGE_SIZE - 1, "%d\n", pmu->hrtimer_interval_ms);
}
static DEFINE_MUTEX(mux_interval_mutex);
@@ -11718,11 +11750,9 @@ err_pmu:
event->destroy(event);
module_put(pmu->module);
err_ns:
- if (event->ns)
- put_pid_ns(event->ns);
if (event->hw.target)
put_task_struct(event->hw.target);
- kmem_cache_free(perf_event_cache, event);
+ call_rcu(&event->rcu_head, free_event_rcu);
return ERR_PTR(err);
}