diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2020-05-08 15:00:12 +0200 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2020-05-08 15:00:12 +0200 |
commit | 059c6d68cfc5f85ba3ab71d71a6de380016f7936 (patch) | |
tree | 32dc5be3d88cb56d8e38c1f093a130978e34b3ab /tools/perf/util/record.c | |
parent | perf/x86/intel/pt: Drop pointless NULL assignment. (diff) | |
parent | perf flamegraph: Use /bin/bash for report and record scripts (diff) | |
download | linux-dev-059c6d68cfc5f85ba3ab71d71a6de380016f7936.tar.xz linux-dev-059c6d68cfc5f85ba3ab71d71a6de380016f7936.zip |
Merge tag 'perf-core-for-mingo-5.8-20200506' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf updates from Arnaldo:
perf/core improvements and fixes:
perf record:
- Introduce --switch-output-event to use arbitrary events to be setup
and read from a side band thread and, when they take place a signal
be sent to the main 'perf record' thread, reusing the --switch-output
code to take perf.data snapshots from the --overwrite ring buffer, e.g.:
# perf record --overwrite -e sched:* \
--switch-output-event syscalls:*connect* \
workload
will take perf.data.YYYYMMDDHHMMSS snapshots up to around the
connect syscalls.
Stephane Eranian:
- Add --num-synthesize-threads option to control degree of parallelism of the
synthesize_mmap() code which is scanning /proc/PID/task/PID/maps and can be
time consuming. This mimics pre-existing behaviour in 'perf top'.
Intel PT:
Adrian Hunter:
- Add support for synthesizing branch stacks for regular events (cycles,
instructions, etc) from Intel PT data.
perf bench:
Ian Rogers:
- Add a multi-threaded synthesize benchmark.
- Add kallsyms parsing benchmark.
Tommi Rantala:
- Fix div-by-zero if runtime is zero.
perf synthetic events:
- Remove use of sscanf from /proc reading when parsing pre-existing
threads to generate synthetic PERF_RECORD_{FORK,MMAP,COMM,etc} events.
tools api:
- Add a lightweight buffered reading API.
libsymbols:
- Parse kallsyms using new lightweight buffered reading io API.
perf parse-events:
- Fix memory leaks found on parse_events.
perf mem2node:
- Avoid double free related to realloc().
perf stat:
Jin Yao:
- Zero all the 'ena' and 'run' array slot stats for interval mode.
- Improve runtime stat for interval mode
Kajol Jain:
- Enable Hz/hz printing for --metric-only option
- Enhance JSON/metric infrastructure to handle "?".
perf tests:
Kajol Jain:
- Added test for runtime param in metric expression.
Tommi Rantala:
- Fix data path in the session topology test.
perf vendor events power9:
Kajol Jain:
- Add hv_24x7 socket/chip level metric events
Coresight:
Leo Yan:
- Move definition of 'traceid_list' global variable from header file.
Mike Leach:
- Update to build with latest opencsd version.
perf pmu:
Shaokun Zhang:
- Fix function name in comment, its get_cpuid_str(), not get_cpustr()
Stephane Eranian:
- Add perf_pmu__find_by_type() helper
perf script:
Stephane Eranian:
- Remove extraneous newline in perf_sample__fprintf_regs().
Ian Rogers:
- Avoid NULL dereference on symbol.
tools feature:
Stephane Eranian:
- Add support for detecting libpfm4.
perf symbol:
Thomas Richter:
- Fix kernel symbol address display in TUI verbose mode.
perf cgroup:
Tommi Rantala:
- Avoid needless closing of unopened fd
libperf:
He Zhe:
- Add NULL pointer check for cpu_map iteration and NULL
assignment for all_cpus.
Ian Rogers:
- Fix a refcount leak in evlist method.
Arnaldo Carvalho de Melo:
- Rename the code in tools/perf/util, i.e. perf tooling specific, that
operates on 'struct evsel' to evsel__, leaving the perf_evsel__
namespace for the routines in tools/lib/perf/ that operate on
'struct perf_evsel__'.
tools/perf specific libraries:
Konstantin Khlebnikov:
- Fix reading new topology attribute "core_cpus"
- Simplify checking if SMT is active.
perf flamegraph:
Arnaldo Carvalho de Melo:
- Use /bin/bash for report and record scripts, just like all other
such scripts, fixing a package dependency bug in a Linaro
OpenEmbedded build checker.
perf evlist:
Jagadeesh Pagadala:
- Remove duplicate headers.
Miscelaneous:
Zou Wei:
- Remove unneeded semicolon in libtraceevent, 'perf c2c' and others.
- Fix warning assignment of 0/1 to bool variable in 'perf report'
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util/record.c')
-rw-r--r-- | tools/perf/util/record.c | 173 |
1 files changed, 9 insertions, 164 deletions
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 6d3e3df6e2a1..a4cc11592f6b 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -10,174 +10,20 @@ #include <subcmd/parse-options.h> #include <perf/cpumap.h> #include "cloexec.h" +#include "util/perf_api_probe.h" #include "record.h" #include "../perf-sys.h" -typedef void (*setup_probe_fn_t)(struct evsel *evsel); - -static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) -{ - struct evlist *evlist; - struct evsel *evsel; - unsigned long flags = perf_event_open_cloexec_flag(); - int err = -EAGAIN, fd; - static pid_t pid = -1; - - evlist = evlist__new(); - if (!evlist) - return -ENOMEM; - - if (parse_events(evlist, str, NULL)) - goto out_delete; - - evsel = evlist__first(evlist); - - while (1) { - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); - if (fd < 0) { - if (pid == -1 && errno == EACCES) { - pid = 0; - continue; - } - goto out_delete; - } - break; - } - close(fd); - - fn(evsel); - - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); - if (fd < 0) { - if (errno == EINVAL) - err = -EINVAL; - goto out_delete; - } - close(fd); - err = 0; - -out_delete: - evlist__delete(evlist); - return err; -} - -static bool perf_probe_api(setup_probe_fn_t fn) -{ - const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL}; - struct perf_cpu_map *cpus; - int cpu, ret, i = 0; - - cpus = perf_cpu_map__new(NULL); - if (!cpus) - return false; - cpu = cpus->map[0]; - perf_cpu_map__put(cpus); - - do { - ret = perf_do_probe_api(fn, cpu, try[i++]); - if (!ret) - return true; - } while (ret == -EAGAIN && try[i]); - - return false; -} - -static void perf_probe_sample_identifier(struct evsel *evsel) -{ - evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER; -} - -static void perf_probe_comm_exec(struct evsel *evsel) -{ - evsel->core.attr.comm_exec = 1; -} - -static void perf_probe_context_switch(struct evsel *evsel) -{ - evsel->core.attr.context_switch = 1; -} - -bool perf_can_sample_identifier(void) -{ - return perf_probe_api(perf_probe_sample_identifier); -} - -static bool perf_can_comm_exec(void) -{ - return perf_probe_api(perf_probe_comm_exec); -} - -bool perf_can_record_switch_events(void) -{ - return perf_probe_api(perf_probe_context_switch); -} - -bool perf_can_record_cpu_wide(void) -{ - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_CPU_CLOCK, - .exclude_kernel = 1, - }; - struct perf_cpu_map *cpus; - int cpu, fd; - - cpus = perf_cpu_map__new(NULL); - if (!cpus) - return false; - cpu = cpus->map[0]; - perf_cpu_map__put(cpus); - - fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); - if (fd < 0) - return false; - close(fd); - - return true; -} - -/* - * Architectures are expected to know if AUX area sampling is supported by the - * hardware. Here we check for kernel support. - */ -bool perf_can_aux_sample(void) -{ - struct perf_event_attr attr = { - .size = sizeof(struct perf_event_attr), - .exclude_kernel = 1, - /* - * Non-zero value causes the kernel to calculate the effective - * attribute size up to that byte. - */ - .aux_sample_size = 1, - }; - int fd; - - fd = sys_perf_event_open(&attr, -1, 0, -1, 0); - /* - * If the kernel attribute is big enough to contain aux_sample_size - * then we assume that it is supported. We are relying on the kernel to - * validate the attribute size before anything else that could be wrong. - */ - if (fd < 0 && errno == E2BIG) - return false; - if (fd >= 0) - close(fd); - - return true; -} - /* - * perf_evsel__config_leader_sampling() uses special rules for leader sampling. + * evsel__config_leader_sampling() uses special rules for leader sampling. * However, if the leader is an AUX area event, then assume the event to sample * is the next event. */ -static struct evsel *perf_evsel__read_sampler(struct evsel *evsel, - struct evlist *evlist) +static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evlist) { struct evsel *leader = evsel->leader; - if (perf_evsel__is_aux_event(leader)) { + if (evsel__is_aux_event(leader)) { evlist__for_each_entry(evlist, evsel) { if (evsel->leader == leader && evsel != evsel->leader) return evsel; @@ -187,8 +33,7 @@ static struct evsel *perf_evsel__read_sampler(struct evsel *evsel, return leader; } -static void perf_evsel__config_leader_sampling(struct evsel *evsel, - struct evlist *evlist) +static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist) { struct perf_event_attr *attr = &evsel->core.attr; struct evsel *leader = evsel->leader; @@ -197,7 +42,7 @@ static void perf_evsel__config_leader_sampling(struct evsel *evsel, if (!leader->sample_read) return; - read_sampler = perf_evsel__read_sampler(evsel, evlist); + read_sampler = evsel__read_sampler(evsel, evlist); if (evsel == read_sampler) return; @@ -246,14 +91,14 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, use_comm_exec = perf_can_comm_exec(); evlist__for_each_entry(evlist, evsel) { - perf_evsel__config(evsel, opts, callchain); + evsel__config(evsel, opts, callchain); if (evsel->tracking && use_comm_exec) evsel->core.attr.comm_exec = 1; } /* Configure leader sampling here now that the sample type is known */ evlist__for_each_entry(evlist, evsel) - perf_evsel__config_leader_sampling(evsel, evlist); + evsel__config_leader_sampling(evsel, evlist); if (opts->full_auxtrace) { /* @@ -277,7 +122,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, if (sample_id) { evlist__for_each_entry(evlist, evsel) - perf_evsel__set_sample_id(evsel, use_sample_identifier); + evsel__set_sample_id(evsel, use_sample_identifier); } perf_evlist__set_id_pos(evlist); |