From f1cedfb82858c8a7ec21e45d0ce7b6e2ce9edea0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Sep 2019 11:50:15 -0300 Subject: perf env: Add routine to read the env->cpuid from the running machine In 'perf top' we use that cpuid when initializing the per arch annotation init routines (e.g. x86__annotate_init()) and in that case (live mode, 'perf top') we need to obtain it from the running machine, not from a perf.data file header. Provide a means to do that. Will be used by 'perf top' in a followup patch. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-h2wb3sx7u7znx6lqfezrh7ca@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/env.c | 16 ++++++++++++++++ tools/perf/util/env.h | 1 + 2 files changed, 17 insertions(+) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 3baca06786fb..2a91a10ccfcc 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -2,6 +2,7 @@ #include "cpumap.h" #include "debug.h" #include "env.h" +#include "util/header.h" #include #include #include "bpf-event.h" @@ -256,6 +257,21 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) return 0; } +int perf_env__read_cpuid(struct perf_env *env) +{ + char cpuid[128]; + int err = get_cpuid(cpuid, sizeof(cpuid)); + + if (err) + return err; + + free(env->cpuid); + env->cpuid = strdup(cpuid); + if (env->cpuid == NULL) + return ENOMEM; + return 0; +} + static int perf_env__read_arch(struct perf_env *env) { struct utsname uts; diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index db40906e2937..a3059dc1abe5 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -104,6 +104,7 @@ void perf_env__exit(struct perf_env *env); int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); +int perf_env__read_cpuid(struct perf_env *env); int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); -- cgit v1.2.3-59-g8ed1b From 608127f73779bfc199158b61efdbdb690720e542 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Sep 2019 11:53:00 -0300 Subject: perf top: Initialize perf_env->cpuid, needed by the per arch annotation init routine Just read it so that later on the per arch init routine can use it, e.g. x86__annotate_init(). When using a perf.data file this is obtained from a header that was put there by 'perf record', and then it may be for another machine, another arch. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-4t4n3o8l8s0tc2b1pq53hyr4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 1f60124eb19b..611d03030abc 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1560,6 +1560,17 @@ int cmd_top(int argc, const char **argv) status = perf_config(perf_top_config, &top); if (status) return status; + /* + * Since the per arch annotation init routine may need the cpuid, read + * it here, since we are not getting this from the perf.data header. + */ + status = perf_env__read_cpuid(&perf_env); + if (status) { + pr_err("Couldn't read the cpuid for this machine: %s\n", + str_error_r(errno, errbuf, sizeof(errbuf))); + goto out_delete_evlist; + } + top.evlist->env = &perf_env; argc = parse_options(argc, argv, options, top_usage, 0); if (argc) -- cgit v1.2.3-59-g8ed1b From c0e53476ab5087353547cbcd37f001d98941326c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 1 Oct 2019 11:14:26 -0300 Subject: perf evlist: Adopt __set_tracepoint_handlers method from perf_session It all operates on the evsels in the session's evlist, so move it to the evlist layer to make it useful to tools not using perf_session, just evlists, like 'perf trace' in live mode. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-9oc53gnfi53vg82fvolkm85g@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 24 ++++++++++++++++++++++++ tools/perf/util/evlist.h | 7 +++++++ tools/perf/util/session.c | 29 ----------------------------- tools/perf/util/session.h | 6 +----- 4 files changed, 32 insertions(+), 34 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d277a98e62df..b4c43ac4583f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -186,6 +186,30 @@ void perf_evlist__splice_list_tail(struct evlist *evlist, } } +int __evlist__set_tracepoints_handlers(struct evlist *evlist, + const struct evsel_str_handler *assocs, size_t nr_assocs) +{ + struct evsel *evsel; + size_t i; + int err; + + for (i = 0; i < nr_assocs; i++) { + // Adding a handler for an event not in this evlist, just ignore it. + evsel = perf_evlist__find_tracepoint_by_name(evlist, assocs[i].name); + if (evsel == NULL) + continue; + + err = -EEXIST; + if (evsel->handler != NULL) + goto out; + evsel->handler = assocs[i].handler; + } + + err = 0; +out: + return err; +} + void __perf_evlist__set_leader(struct list_head *list) { struct evsel *evsel, *leader; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 7cfe75522ba5..00eab9435847 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -118,6 +118,13 @@ void perf_evlist__stop_sb_thread(struct evlist *evlist); int perf_evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler); +int __evlist__set_tracepoints_handlers(struct evlist *evlist, + const struct evsel_str_handler *assocs, + size_t nr_assocs); + +#define evlist__set_tracepoints_handlers(evlist, array) \ + __evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array)) + void __perf_evlist__set_sample_bit(struct evlist *evlist, enum perf_event_sample_format bit); void __perf_evlist__reset_sample_bit(struct evlist *evlist, diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 061bb4d6a3f5..6cc32f5ec043 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2355,35 +2355,6 @@ void perf_session__fprintf_info(struct perf_session *session, FILE *fp, fprintf(fp, "# ========\n#\n"); } - -int __perf_session__set_tracepoints_handlers(struct perf_session *session, - const struct evsel_str_handler *assocs, - size_t nr_assocs) -{ - struct evsel *evsel; - size_t i; - int err; - - for (i = 0; i < nr_assocs; i++) { - /* - * Adding a handler for an event not in the session, - * just ignore it. - */ - evsel = perf_evlist__find_tracepoint_by_name(session->evlist, assocs[i].name); - if (evsel == NULL) - continue; - - err = -EEXIST; - if (evsel->handler != NULL) - goto out; - evsel->handler = assocs[i].handler; - } - - err = 0; -out: - return err; -} - int perf_event__process_id_index(struct perf_session *session, union perf_event *event) { diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index b4c9428c18f0..8456e1d868fd 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -120,12 +120,8 @@ void perf_session__fprintf_info(struct perf_session *s, FILE *fp, bool full); struct evsel_str_handler; -int __perf_session__set_tracepoints_handlers(struct perf_session *session, - const struct evsel_str_handler *assocs, - size_t nr_assocs); - #define perf_session__set_tracepoints_handlers(session, array) \ - __perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array)) + __evlist__set_tracepoints_handlers(session->evlist, array, ARRAY_SIZE(array)) extern volatile int session_done; -- cgit v1.2.3-59-g8ed1b From 206d635aa594a5246cd181b3be39d1e3b2126f68 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 1 Oct 2019 11:31:59 -0300 Subject: perf trace: Make evlist__set_evsel_handler() affect just entries without a handler Renaming it to evlist__set_default_evsel_handler(), to better reflect what we want to do, which is to set a default handler for events we still haven't set a custom handler, like the ones for "msr:write_msr", etc that are coming soon. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-e1bit7upnpmtsayh8039kfuw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index bb5130d02155..ee330f50b450 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3858,12 +3858,14 @@ static int parse_pagefaults(const struct option *opt, const char *str, return 0; } -static void evlist__set_evsel_handler(struct evlist *evlist, void *handler) +static void evlist__set_default_evsel_handler(struct evlist *evlist, void *handler) { struct evsel *evsel; - evlist__for_each_entry(evlist, evsel) - evsel->handler = handler; + evlist__for_each_entry(evlist, evsel) { + if (evsel->handler == NULL) + evsel->handler = handler; + } } static int evlist__set_syscall_tp_fields(struct evlist *evlist) @@ -4287,7 +4289,7 @@ int cmd_trace(int argc, const char **argv) } if (trace.evlist->core.nr_entries > 0) { - evlist__set_evsel_handler(trace.evlist, trace__event_handler); + evlist__set_default_evsel_handler(trace.evlist, trace__event_handler); if (evlist__set_syscall_tp_fields(trace.evlist)) { perror("failed to set syscalls:* tracepoint fields"); goto out; -- cgit v1.2.3-59-g8ed1b From 9b2036cd329924082acfa5dec58deec12fa1f5e8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 1 Oct 2019 15:16:33 -0300 Subject: perf trace: Separate 'struct syscall_fmt' definition from syscall_fmts variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As this has all the things needed to format tracepoints events, not just syscalls, that, after all, are just tracepoints with a set in stone ABI, i.e. order and number of parameters. For tracepoints we'll create a static struct syscall_fmt tracepoint_fmts[] array and will fill the ->arg[] entries with the beautifier for each positional argument and record the name, then, when we need it, we'll just check that the position has the same name, maybe even type, so that we can do some check that the tracepoint hasn't changed, if it has, we can even reorder things. Keep calling it syscall_fmt but use it as well for tracepoints, do it this way to minimize changes and reuse what is in place for syscalls, we'll see. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-2x1jgiev13zt4njaanlnne0d@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ee330f50b450..cb853434d761 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -702,7 +702,7 @@ struct syscall_arg_fmt { bool show_zero; }; -static struct syscall_fmt { +struct syscall_fmt { const char *name; const char *alias; struct { @@ -714,7 +714,9 @@ static struct syscall_fmt { bool errpid; bool timeout; bool hexret; -} syscall_fmts[] = { +}; + +static struct syscall_fmt syscall_fmts[] = { { .name = "access", .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, }, { .name = "arch_prctl", -- cgit v1.2.3-59-g8ed1b From bcddbfc5c8c952175e9a5f1a4186685fa0338a14 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 1 Oct 2019 15:27:55 -0300 Subject: perf trace: Generalize the syscall_fmt find routines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To allow them to be used with other stuff, such as tracepoints. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-od3gzg77ppqgnnrxqv40fvgx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index cb853434d761..313dfc1cefc5 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -966,24 +966,35 @@ static int syscall_fmt__cmp(const void *name, const void *fmtp) return strcmp(name, fmt->name); } +static struct syscall_fmt *__syscall_fmt__find(struct syscall_fmt *fmts, const int nmemb, const char *name) +{ + return bsearch(name, fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); +} + static struct syscall_fmt *syscall_fmt__find(const char *name) { const int nmemb = ARRAY_SIZE(syscall_fmts); - return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); + return __syscall_fmt__find(syscall_fmts, nmemb, name); } -static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias) +static struct syscall_fmt *__syscall_fmt__find_by_alias(struct syscall_fmt *fmts, const int nmemb, const char *alias) { - int i, nmemb = ARRAY_SIZE(syscall_fmts); + int i; for (i = 0; i < nmemb; ++i) { - if (syscall_fmts[i].alias && strcmp(syscall_fmts[i].alias, alias) == 0) - return &syscall_fmts[i]; + if (fmts[i].alias && strcmp(fmts[i].alias, alias) == 0) + return &fmts[i]; } return NULL; } +static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias) +{ + const int nmemb = ARRAY_SIZE(syscall_fmts); + return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias); +} + /* * is_exit: is this "exit" or "exit_group"? * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter. -- cgit v1.2.3-59-g8ed1b From 7e035929f3fec70d411fb660c434f4a7f8ca386d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 1 Oct 2019 15:44:44 -0300 Subject: perf trace: Postpone parsing .perfconfig trace.add_events to after --verbose is processed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we add events via the '[trace]' section in perfconfig the command line options are not yet processed, so when something goes wrong with parsing those events and using --verbose is advised, we end up not getting any more verbosity by doing so. So just copy the trace.add_events string for later processing, after we processed --verbose and the other command line options. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-d6wbnz85ftqljdll6ynjyjd8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 313dfc1cefc5..3d54316639a4 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -162,6 +162,7 @@ struct trace { bool force; bool vfs_getname; int trace_pgfaults; + char *perfconfig_events; struct { struct ordered_events data; u64 last; @@ -4044,15 +4045,11 @@ static int trace__config(const char *var, const char *value, void *arg) int err = 0; if (!strcmp(var, "trace.add_events")) { - struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", - "event selector. use 'perf list' to list available events", - parse_events_option); - /* - * We can't propagate parse_event_option() return, as it is 1 - * for failure while perf_config() expects -1. - */ - if (parse_events_option(&o, value, 0)) - err = -1; + trace->perfconfig_events = strdup(value); + if (trace->perfconfig_events == NULL) { + pr_err("Not enough memory for %s\n", "trace.add_events"); + return -1; + } } else if (!strcmp(var, "trace.show_timestamp")) { trace->show_tstamp = perf_config_bool(var, value); } else if (!strcmp(var, "trace.show_duration")) { @@ -4224,6 +4221,21 @@ int cmd_trace(int argc, const char **argv) argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); + /* + * Now that we have --verbose figured out, lets see if we need to parse + * events from .perfconfig, so that if those events fail parsing, say some + * BPF program fails, then we'll be able to use --verbose to see what went + * wrong in more detail. + */ + if (trace.perfconfig_events != NULL) { + struct parse_events_error parse_err = { .idx = 0, }; + + err = parse_events(trace.evlist, trace.perfconfig_events, &parse_err); + if (err) { + parse_events_print_error(&parse_err, trace.perfconfig_events); + goto out; + } + } if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) { usage_with_options_msg(trace_usage, trace_options, @@ -4441,5 +4453,6 @@ out_close: if (output_name != NULL) fclose(trace.output); out: + zfree(&trace.perfconfig_events); return err; } -- cgit v1.2.3-59-g8ed1b From 8bd436b006d4493964c310606f2eb8e56680126c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 2 Oct 2019 12:54:07 -0300 Subject: perf trace augmented_syscalls: Do not show syscalls when none was asked for MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When not using augmented syscalls, i.e. not passing thru the command line a eBPF source or object file event that provides the __augmented_syscalls__ BPF_MAP_TYPE_PERF_EVENT_ARRAY, etc, as with: perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c or passing that augmented eBPF source/object via the trace.add_events in .perfconfig file, we were assuming that syscalls were asked for, differing from when not using augmented syscalls at all. This is confusing when using .perfconfig to hide the fact we're using the augmenter, i.e. using: # perf trace -e sched:* sleep 1 Will show both the scheduler tracepoints and the syscalls, where what we want is to show just the scheduler tracepoints. To see the scheduler tracepoints and some specific syscall strace-like formatting, one has to use: # perf trace -e sched:*,nanosleep sleep 1 Or, if wanting all the syscalls: # perf trace -e sched:* --syscalls sleep 1 This way 'perf trace' can be used to trace just a set of tracepoints while allowing for mixing with strace-like when desired, by simply adding to the mix the name of the syscalls to show in addition to the tracepoints. Fix it so that the behaviour using the eBPF based syscall augmenter is the same as when not using one. Testing: Before this patch, with this ~/.perfconfig: # egrep -B1 ^[[:space:]]+add_events ~/.perfconfig [trace] add_events = /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.o # That points to this pre-compiled eBPF syscall augmenter: # file /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.o /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.o: ELF 64-bit LSB relocatable, eBPF, version 1 (SYSV), with debug_info, not stripped And when asking for _only_ sched:sched_switch and sched:sched_wakeup we were unconditionally getting all the syscalls formatted strace-like: # perf trace -e sched:*switch,sched:*wakeup sleep 1 |& tail 0.633 fstat(3, 0x7fe11d030ac0) = 0 0.635 mmap(NULL, 217750512, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fe10fec5000 0.643 close(3) = 0 0.668 nanosleep(0x7fff649a3a90, NULL) ... 0.672 sched:sched_switch:prev_comm=sleep prev_pid=4417 prev_prio=120 prev_state=S ==> next_comm=swapper/6 next_pid=0 next_prio=120 1000.822 sched:sched_wakeup:comm=sleep pid=4417 prio=120 target_cpu=006 0.668 ... [continued]: nanosleep()) = 0 1000.923 close(1) = 0 1000.941 close(2) = 0 1000.974 exit_group(0) = ? # After the patch: # perf trace -e sched:*switch,sched:*wakeup sleep 1 0.000 sched:sched_wakeup:comm=perf pid=5529 prio=120 target_cpu=005 1.186 sched:sched_switch:prev_comm=sleep prev_pid=5529 prev_prio=120 prev_state=S ==> next_comm=swapper/5 next_pid=0 next_prio=120 1001.573 sched:sched_wakeup:comm=sleep pid=5529 prio=120 target_cpu=005 # If we add the "open*" syscalls to the mix then the eBPF augmented _will_ be used and these syscalls will be traced together with the specified sched tracepoints: # cd /sys/kernel/debug/tracing/events/syscalls/ # ls -1d sys_enter_open* sys_enter_open sys_enter_openat sys_enter_open_by_handle_at sys_enter_open_tree # # perf trace -e open*,sched:*switch,sched:*wakeup sleep 1 0.000 sched:sched_wakeup:comm=perf pid=5580 prio=120 target_cpu=005 0.590 openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 0.616 openat(AT_FDCWD, "/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 0.846 openat(AT_FDCWD, "/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3 0.891 sched:sched_switch:prev_comm=sleep prev_pid=5580 prev_prio=120 prev_state=S ==> next_comm=swapper/5 next_pid=0 next_prio=120 1001.005 sched:sched_wakeup:comm=sleep pid=5580 prio=120 target_cpu=005 # And as we can see, the pathnames were collected via the eBPF augmenters. If we don't specify anything it'll trace all syscalls: # perf trace sleep 1 |& tail 0.299 brk(0x5597543a3000) = 0x5597543a3000 0.302 brk(NULL) = 0x5597543a3000 0.307 openat(AT_FDCWD, "/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3 0.313 fstat(3, 0x7feece50cac0) = 0 0.315 mmap(NULL, 217750512, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7feec13a1000 0.323 close(3) = 0 0.354 nanosleep(0x7ffe338856e0, NULL) = 0 1000.641 close(1) = 0 1000.655 close(2) = 0 1000.673 exit_group(0) = ? # Ditto if we don't use .perfconfig's trace.add_events but instead pass just the augmenter as a command line event: # vim ~/.perfconfig # egrep -B1 ^[[:space:]]+add_events ~/.perfconfig # perf trace -e /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.o sleep 1 |& tail 0.294 brk(0x55ae08ec3000) = 0x55ae08ec3000 0.297 brk(NULL) = 0x55ae08ec3000 0.302 openat(AT_FDCWD, "/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3 0.309 fstat(3, 0x7f726488fac0) = 0 0.311 mmap(NULL, 217750512, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f7257724000 0.319 close(3) = 0 0.347 nanosleep(0x7ffe23643a70, NULL) = 0 1000.560 close(1) = 0 1000.575 close(2) = 0 1000.593 exit_group(0) = ? # As well as that + some syscall names for strace-like formatting: # perf trace -e socket,connect,/home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.o ssh localhost 0.000 socket(PF_LOCAL, SOCK_STREAM|CLOEXEC|NONBLOCK, 0) = 3 0.021 connect(3, { .family: PF_LOCAL, path: /var/run/nscd/socket }, 110) = -1 ENOENT (No such file or directory) 0.034 socket(PF_LOCAL, SOCK_STREAM|CLOEXEC|NONBLOCK, 0) = 3 0.041 connect(3, { .family: PF_LOCAL, path: /var/run/nscd/socket }, 110) = -1 ENOENT (No such file or directory) 0.163 socket(PF_LOCAL, SOCK_STREAM, 0) = 4 0.185 connect(4, { .family: PF_LOCAL, path: /var/lib/sss/pipes/nss }, 110) = 0 0.670 socket(PF_LOCAL, SOCK_STREAM|CLOEXEC|NONBLOCK, 0) = 7 0.684 connect(7, { .family: PF_LOCAL, path: /var/run/nscd/socket }, 110) = -1 ENOENT (No such file or directory) 0.694 socket(PF_LOCAL, SOCK_STREAM|CLOEXEC|NONBLOCK, 0) = 7 0.701 connect(7, { .family: PF_LOCAL, path: /var/run/nscd/socket }, 110) = -1 ENOENT (No such file or directory) 0.994 socket(PF_LOCAL, SOCK_STREAM|CLOEXEC|NONBLOCK, 0) = 5 1.006 connect(5, { .family: PF_LOCAL, path: /var/run/nscd/socket }, 110) = -1 ENOENT (No such file or directory) 1.014 socket(PF_LOCAL, SOCK_STREAM|CLOEXEC|NONBLOCK, 0) = 5 1.022 connect(5, { .family: PF_LOCAL, path: /var/run/nscd/socket }, 110) = -1 ENOENT (No such file or directory) 1.068 socket(PF_INET, SOCK_STREAM, IPPROTO_TCP) = 5 1.087 connect(5, { .family: PF_INET, port: 22, addr: 127.0.0.1 }, 16) = 0 24.299 socket(PF_LOCAL, SOCK_STREAM, 0) = 6 24.337 connect(6, { .family: PF_LOCAL, path: /var/run/.heim_org.h5l.kcm-socket }, 110) = 0 28.441 socket(PF_LOCAL, SOCK_STREAM, 0) = 6 28.516 connect(6, { .family: PF_LOCAL, path: /var/run/.heim_org.h5l.kcm-socket }, 110) = 0 root@localhost's password:^C # Everything works without augmenters: # egrep -B1 ^[[:space:]]+add_events ~/.perfconfig # perf trace sleep 1 |& tail 0.261 brk(0x5635068ac000) = 0x5635068ac000 0.264 brk(NULL) = 0x5635068ac000 0.268 openat(AT_FDCWD, 0xdce642a0, O_RDONLY|O_CLOEXEC) = 3 0.275 fstat(3, 0x7f3fdce97ac0) = 0 0.277 mmap(NULL, 217750512, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f3fcfd2c000 0.284 close(3) = 0 0.310 nanosleep(0x7ffdaea6ecd0, NULL) = 0 1000.552 close(1) = 0 1000.565 close(2) = 0 1000.580 exit_group(0) = ? # # perf trace -e connect ssh localhost 0.000 connect(3, 0x58266930, 110) = -1 ENOENT (No such file or directory) 0.022 connect(3, 0x58266af0, 110) = -1 ENOENT (No such file or directory) 0.150 connect(4, 0x58266b00, 110) = 0 0.490 connect(7, 0x58264150, 110) = -1 ENOENT (No such file or directory) 0.505 connect(7, 0x58264300, 110) = -1 ENOENT (No such file or directory) 0.832 connect(5, 0x58266220, 110) = -1 ENOENT (No such file or directory) 0.847 connect(5, 0x582663e0, 110) = -1 ENOENT (No such file or directory) 0.899 connect(5, 0x95ba0630, 16) = 0 25.619 connect(6, 0x58266360, 110) = 0 40.564 connect(6, 0x58266330, 110) = 0 root@localhost's password: ^C # Cc: Adrian Hunter Cc: Brendan Gregg Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-624f6jxic04031tnt40va4dd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 105 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 95 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 3d54316639a4..6c7025370ec0 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3117,7 +3117,27 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) return err; } -#else + +static void trace__delete_augmented_syscalls(struct trace *trace) +{ + struct evsel *evsel, *tmp; + + evlist__remove(trace->evlist, trace->syscalls.events.augmented); + evsel__delete(trace->syscalls.events.augmented); + trace->syscalls.events.augmented = NULL; + + evlist__for_each_entry_safe(trace->evlist, tmp, evsel) { + if (evsel->bpf_obj == trace->bpf_obj) { + evlist__remove(trace->evlist, evsel); + evsel__delete(evsel); + } + + } + + bpf_object__close(trace->bpf_obj); + trace->bpf_obj = NULL; +} +#else // HAVE_LIBBPF_SUPPORT static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused) { return 0; @@ -3138,8 +3158,27 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_ { return 0; } + +static void trace__delete_augmented_syscalls(struct trace *trace __maybe_unused) +{ +} #endif // HAVE_LIBBPF_SUPPORT +static bool trace__only_augmented_syscalls_evsels(struct trace *trace) +{ + struct evsel *evsel; + + evlist__for_each_entry(trace->evlist, evsel) { + if (evsel == trace->syscalls.events.augmented || + evsel->bpf_obj == trace->bpf_obj) + continue; + + return false; + } + + return true; +} + static int trace__set_ev_qualifier_filter(struct trace *trace) { if (trace->syscalls.map) @@ -3316,7 +3355,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime)) goto out_error_sched_stat_runtime; - /* * If a global cgroup was set, apply it to all the events without an * explicit cgroup. I.e.: @@ -4221,6 +4259,22 @@ int cmd_trace(int argc, const char **argv) argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); + + /* + * Here we already passed thru trace__parse_events_option() and it has + * already figured out if -e syscall_name, if not but if --event + * foo:bar was used, the user is interested _just_ in those, say, + * tracepoint events, not in the strace-like syscall-name-based mode. + * + * This is important because we need to check if strace-like mode is + * needed to decided if we should filter out the eBPF + * __augmented_syscalls__ code, if it is in the mix, say, via + * .perfconfig trace.add_events, and filter those out. + */ + if (!trace.trace_syscalls && !trace.trace_pgfaults && + trace.evlist->core.nr_entries == 0 /* Was --events used? */) { + trace.trace_syscalls = true; + } /* * Now that we have --verbose figured out, lets see if we need to parse * events from .perfconfig, so that if those events fail parsing, say some @@ -4265,9 +4319,45 @@ int cmd_trace(int argc, const char **argv) trace.bpf_obj = evsel->bpf_obj; - trace__set_bpf_map_filtered_pids(&trace); - trace__set_bpf_map_syscalls(&trace); - trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented"); + /* + * If we have _just_ the augmenter event but don't have a + * explicit --syscalls, then assume we want all strace-like + * syscalls: + */ + if (!trace.trace_syscalls && trace__only_augmented_syscalls_evsels(&trace)) + trace.trace_syscalls = true; + /* + * So, if we have a syscall augmenter, but trace_syscalls, aka + * strace-like syscall tracing is not set, then we need to trow + * away the augmenter, i.e. all the events that were created + * from that BPF object file. + * + * This is more to fix the current .perfconfig trace.add_events + * style of setting up the strace-like eBPF based syscall point + * payload augmenter. + * + * All this complexity will be avoided by adding an alternative + * to trace.add_events in the form of + * trace.bpf_augmented_syscalls, that will be only parsed if we + * need it. + * + * .perfconfig trace.add_events is still useful if we want, for + * instance, have msr_write.msr in some .perfconfig profile based + * 'perf trace --config determinism.profile' mode, where for some + * particular goal/workload type we want a set of events and + * output mode (with timings, etc) instead of having to add + * all via the command line. + * + * Also --config to specify an alternate .perfconfig file needs + * to be implemented. + */ + if (!trace.trace_syscalls) { + trace__delete_augmented_syscalls(&trace); + } else { + trace__set_bpf_map_filtered_pids(&trace); + trace__set_bpf_map_syscalls(&trace); + trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented"); + } } err = bpf__setup_stdout(trace.evlist); @@ -4410,11 +4500,6 @@ init_augmented_syscall_tp: if (trace.summary_only) trace.summary = trace.summary_only; - if (!trace.trace_syscalls && !trace.trace_pgfaults && - trace.evlist->core.nr_entries == 0 /* Was --events used? */) { - trace.trace_syscalls = true; - } - if (output_name != NULL) { err = trace__open_output(&trace, output_name); if (err < 0) { -- cgit v1.2.3-59-g8ed1b From 181ea40a24af9a60c2ee75d4f271980f6453f585 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 21 Aug 2019 11:32:11 +0300 Subject: perf scripts python: exported-sql-viewer.py: Add LookupModel() Add LookupModel() to find a model in the model cache without creating it. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20190821083216.1340-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 61b3911d91e6..18ad04654adc 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -341,6 +341,15 @@ def LookupCreateModel(model_name, create_fn): model_cache_lock.release() return model +def LookupModel(model_name): + model_cache_lock.acquire() + try: + model = model_cache[model_name] + except: + model = None + model_cache_lock.release() + return model + # Find bar class FindBar(): -- cgit v1.2.3-59-g8ed1b From 42c303ff9a25c4b95a75f8f10d08661183497d41 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 21 Aug 2019 11:32:12 +0300 Subject: perf scripts python: exported-sql-viewer.py: Add HBoxLayout and VBoxLayout Add layout classes HBoxLayout and VBoxLayout. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20190821083216.1340-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 41 ++++++++++++++++++------ 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 18ad04654adc..9767a5f802e5 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -980,20 +980,41 @@ class CallTreeModel(CallGraphModelBase): ids.insert(0, query.value(1)) return ids -# Vertical widget layout +# Vertical layout -class VBox(): +class HBoxLayout(QHBoxLayout): - def __init__(self, w1, w2, w3=None): - self.vbox = QWidget() - self.vbox.setLayout(QVBoxLayout()) + def __init__(self, *children): + super(HBoxLayout, self).__init__() + + self.layout().setContentsMargins(0, 0, 0, 0) + for child in children: + if child.isWidgetType(): + self.layout().addWidget(child) + else: + self.layout().addLayout(child) + +# Horizontal layout + +class VBoxLayout(QVBoxLayout): - self.vbox.layout().setContentsMargins(0, 0, 0, 0) + def __init__(self, *children): + super(VBoxLayout, self).__init__() - self.vbox.layout().addWidget(w1) - self.vbox.layout().addWidget(w2) - if w3: - self.vbox.layout().addWidget(w3) + self.layout().setContentsMargins(0, 0, 0, 0) + for child in children: + if child.isWidgetType(): + self.layout().addWidget(child) + else: + self.layout().addLayout(child) + +# Vertical layout widget + +class VBox(): + + def __init__(self, *children): + self.vbox = QWidget() + self.vbox.setLayout(VBoxLayout(*children)) def Widget(self): return self.vbox -- cgit v1.2.3-59-g8ed1b From 9a9dae36556e8f7689f68f05d169ac6c132c5f15 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 21 Aug 2019 11:32:13 +0300 Subject: perf scripts python: exported-sql-viewer.py: Add global time range calculations Add calculations to determine a time range that encompasses all data. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20190821083216.1340-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 113 ++++++++++++++++++++++- 1 file changed, 109 insertions(+), 4 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 9767a5f802e5..0dcc9a03b1b0 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -2088,10 +2088,8 @@ class SampleTimeRangesDataItem(LineEditDataItem): QueryExec(query, "SELECT id, time FROM samples ORDER BY id DESC LIMIT 1") if query.next(): self.last_id = int(query.value(0)) - self.last_time = int(query.value(1)) - QueryExec(query, "SELECT time FROM samples WHERE time != 0 ORDER BY id LIMIT 1") - if query.next(): - self.first_time = int(query.value(0)) + self.first_time = int(glb.HostStartTime()) + self.last_time = int(glb.HostFinishTime()) if placeholder_text: placeholder_text += ", between " + str(self.first_time) + " and " + str(self.last_time) @@ -3500,6 +3498,9 @@ class Glb(): self.have_disassembler = True except: self.have_disassembler = False + self.host_machine_id = 0 + self.host_start_time = 0 + self.host_finish_time = 0 def FileFromBuildId(self, build_id): file_name = self.buildid_dir + build_id[0:2] + "/" + build_id[2:] + "/elf" @@ -3532,6 +3533,110 @@ class Glb(): except: pass + def GetHostMachineId(self): + query = QSqlQuery(self.db) + QueryExec(query, "SELECT id FROM machines WHERE pid = -1") + if query.next(): + self.host_machine_id = query.value(0) + else: + self.host_machine_id = 0 + return self.host_machine_id + + def HostMachineId(self): + if self.host_machine_id: + return self.host_machine_id + return self.GetHostMachineId() + + def SelectValue(self, sql): + query = QSqlQuery(self.db) + try: + QueryExec(query, sql) + except: + return None + if query.next(): + return Decimal(query.value(0)) + return None + + def SwitchesMinTime(self, machine_id): + return self.SelectValue("SELECT time" + " FROM context_switches" + " WHERE time != 0 AND machine_id = " + str(machine_id) + + " ORDER BY id LIMIT 1") + + def SwitchesMaxTime(self, machine_id): + return self.SelectValue("SELECT time" + " FROM context_switches" + " WHERE time != 0 AND machine_id = " + str(machine_id) + + " ORDER BY id DESC LIMIT 1") + + def SamplesMinTime(self, machine_id): + return self.SelectValue("SELECT time" + " FROM samples" + " WHERE time != 0 AND machine_id = " + str(machine_id) + + " ORDER BY id LIMIT 1") + + def SamplesMaxTime(self, machine_id): + return self.SelectValue("SELECT time" + " FROM samples" + " WHERE time != 0 AND machine_id = " + str(machine_id) + + " ORDER BY id DESC LIMIT 1") + + def CallsMinTime(self, machine_id): + return self.SelectValue("SELECT calls.call_time" + " FROM calls" + " INNER JOIN threads ON threads.thread_id = calls.thread_id" + " WHERE calls.call_time != 0 AND threads.machine_id = " + str(machine_id) + + " ORDER BY calls.id LIMIT 1") + + def CallsMaxTime(self, machine_id): + return self.SelectValue("SELECT calls.return_time" + " FROM calls" + " INNER JOIN threads ON threads.thread_id = calls.thread_id" + " WHERE calls.return_time != 0 AND threads.machine_id = " + str(machine_id) + + " ORDER BY calls.return_time DESC LIMIT 1") + + def GetStartTime(self, machine_id): + t0 = self.SwitchesMinTime(machine_id) + t1 = self.SamplesMinTime(machine_id) + t2 = self.CallsMinTime(machine_id) + if t0 is None or (not(t1 is None) and t1 < t0): + t0 = t1 + if t0 is None or (not(t2 is None) and t2 < t0): + t0 = t2 + return t0 + + def GetFinishTime(self, machine_id): + t0 = self.SwitchesMaxTime(machine_id) + t1 = self.SamplesMaxTime(machine_id) + t2 = self.CallsMaxTime(machine_id) + if t0 is None or (not(t1 is None) and t1 > t0): + t0 = t1 + if t0 is None or (not(t2 is None) and t2 > t0): + t0 = t2 + return t0 + + def HostStartTime(self): + if self.host_start_time: + return self.host_start_time + self.host_start_time = self.GetStartTime(self.HostMachineId()) + return self.host_start_time + + def HostFinishTime(self): + if self.host_finish_time: + return self.host_finish_time + self.host_finish_time = self.GetFinishTime(self.HostMachineId()) + return self.host_finish_time + + def StartTime(self, machine_id): + if machine_id == self.HostMachineId(): + return self.HostStartTime() + return self.GetStartTime(machine_id) + + def FinishTime(self, machine_id): + if machine_id == self.HostMachineId(): + return self.HostFinishTime() + return self.GetFinishTime(machine_id) + # Database reference class DBRef(): -- cgit v1.2.3-59-g8ed1b From da4264f5cf8a6b798bf5494e08a5dae85a61c4c3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 21 Aug 2019 11:32:14 +0300 Subject: perf scripts python: exported-sql-viewer.py: Tidy up Call tree call_time Record call_time on tree nodes and re-name the misnamed "count" parameter. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20190821083216.1340-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 0dcc9a03b1b0..06b8d55977bc 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -794,15 +794,16 @@ class CallGraphModel(CallGraphModelBase): class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): - def __init__(self, glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item): + def __init__(self, glb, params, row, comm_id, thread_id, calls_id, call_time, time, insn_cnt, cyc_cnt, branch_count, parent_item): super(CallTreeLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item) self.comm_id = comm_id self.thread_id = thread_id self.calls_id = calls_id + self.call_time = call_time + self.time = time self.insn_cnt = insn_cnt self.cyc_cnt = cyc_cnt self.branch_count = branch_count - self.time = time def Select(self): self.query_done = True @@ -839,17 +840,17 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): class CallTreeLevelThreeItem(CallTreeLevelTwoPlusItemBase): - def __init__(self, glb, params, row, comm_id, thread_id, calls_id, name, dso, count, time, insn_cnt, cyc_cnt, branch_count, parent_item): - super(CallTreeLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item) + def __init__(self, glb, params, row, comm_id, thread_id, calls_id, name, dso, call_time, time, insn_cnt, cyc_cnt, branch_count, parent_item): + super(CallTreeLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, calls_id, call_time, time, insn_cnt, cyc_cnt, branch_count, parent_item) dso = dsoname(dso) if self.params.have_ipc: insn_pcnt = PercentToOneDP(insn_cnt, parent_item.insn_cnt) cyc_pcnt = PercentToOneDP(cyc_cnt, parent_item.cyc_cnt) br_pcnt = PercentToOneDP(branch_count, parent_item.branch_count) ipc = CalcIPC(cyc_cnt, insn_cnt) - self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ] + self.data = [ name, dso, str(call_time), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ] else: - self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] + self.data = [ name, dso, str(call_time), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] self.dbid = calls_id # Call tree data model level two item @@ -857,7 +858,7 @@ class CallTreeLevelThreeItem(CallTreeLevelTwoPlusItemBase): class CallTreeLevelTwoItem(CallTreeLevelTwoPlusItemBase): def __init__(self, glb, params, row, comm_id, thread_id, pid, tid, parent_item): - super(CallTreeLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 0, 0, 0, 0, 0, parent_item) + super(CallTreeLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 0, 0, 0, 0, 0, 0, parent_item) if self.params.have_ipc: self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", "", "", "", "", "", ""] else: -- cgit v1.2.3-59-g8ed1b From e69d5df75d74da14cbc8c96bbc1d9e86cc91ad0b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 21 Aug 2019 11:32:15 +0300 Subject: perf scripts python: exported-sql-viewer.py: Add ability for Call tree to open at a specified task and time Add ability for Call tree to open at a specified task and time. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20190821083216.1340-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 44 +++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 06b8d55977bc..a5af52f422e6 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -1094,7 +1094,7 @@ class CallGraphWindow(TreeWindowBase): class CallTreeWindow(TreeWindowBase): - def __init__(self, glb, parent=None): + def __init__(self, glb, parent=None, thread_at_time=None): super(CallTreeWindow, self).__init__(parent) self.model = LookupCreateModel("Call Tree", lambda x=glb: CallTreeModel(x)) @@ -1112,6 +1112,48 @@ class CallTreeWindow(TreeWindowBase): AddSubWindow(glb.mainwindow.mdi_area, self, "Call Tree") + if thread_at_time: + self.DisplayThreadAtTime(*thread_at_time) + + def DisplayThreadAtTime(self, comm_id, thread_id, time): + parent = QModelIndex() + for dbid in (comm_id, thread_id): + found = False + n = self.model.rowCount(parent) + for row in xrange(n): + child = self.model.index(row, 0, parent) + if child.internalPointer().dbid == dbid: + found = True + self.view.setCurrentIndex(child) + parent = child + break + if not found: + return + found = False + while True: + n = self.model.rowCount(parent) + if not n: + return + last_child = None + for row in xrange(n): + child = self.model.index(row, 0, parent) + child_call_time = child.internalPointer().call_time + if child_call_time < time: + last_child = child + elif child_call_time == time: + self.view.setCurrentIndex(child) + return + elif child_call_time > time: + break + if not last_child: + if not found: + child = self.model.index(0, 0, parent) + self.view.setCurrentIndex(child) + return + found = True + self.view.setCurrentIndex(last_child) + parent = last_child + # Child data item finder class ChildDataItemFinder(): -- cgit v1.2.3-59-g8ed1b From b3700f21c2ede55aeab3aba728bce434051ec631 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 21 Aug 2019 11:32:16 +0300 Subject: perf scripts python: exported-sql-viewer.py: Add Time chart by CPU Add a time chart based on context switch information. Context switch information was added to the database export fairly recently, so the chart menu option will only appear if context switch information is in the database. Refer to the Exported SQL Viewer Help option for more information about the chart. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20190821083216.1340-7-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 1333 +++++++++++++++++++++- 1 file changed, 1331 insertions(+), 2 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index a5af52f422e6..ebc6a2e5eae9 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -105,6 +105,9 @@ except ImportError: glb_nsz = 16 import re import os +import random +import copy +import math pyside_version_1 = True if not "--pyside-version-1" in sys.argv: @@ -1154,6 +1157,1301 @@ class CallTreeWindow(TreeWindowBase): self.view.setCurrentIndex(last_child) parent = last_child +# ExecComm() gets the comm_id of the command string that was set when the process exec'd i.e. the program name + +def ExecComm(db, thread_id, time): + query = QSqlQuery(db) + QueryExec(query, "SELECT comm_threads.comm_id, comms.c_time, comms.exec_flag" + " FROM comm_threads" + " INNER JOIN comms ON comms.id = comm_threads.comm_id" + " WHERE comm_threads.thread_id = " + str(thread_id) + + " ORDER BY comms.c_time, comms.id") + first = None + last = None + while query.next(): + if first is None: + first = query.value(0) + if query.value(2) and Decimal(query.value(1)) <= Decimal(time): + last = query.value(0) + if not(last is None): + return last + return first + +# Container for (x, y) data + +class XY(): + def __init__(self, x=0, y=0): + self.x = x + self.y = y + + def __str__(self): + return "XY({}, {})".format(str(self.x), str(self.y)) + +# Container for sub-range data + +class Subrange(): + def __init__(self, lo=0, hi=0): + self.lo = lo + self.hi = hi + + def __str__(self): + return "Subrange({}, {})".format(str(self.lo), str(self.hi)) + +# Graph data region base class + +class GraphDataRegion(object): + + def __init__(self, key, title = "", ordinal = ""): + self.key = key + self.title = title + self.ordinal = ordinal + +# Function to sort GraphDataRegion + +def GraphDataRegionOrdinal(data_region): + return data_region.ordinal + +# Attributes for a graph region + +class GraphRegionAttribute(): + + def __init__(self, colour): + self.colour = colour + +# Switch graph data region represents a task + +class SwitchGraphDataRegion(GraphDataRegion): + + def __init__(self, key, exec_comm_id, pid, tid, comm, thread_id, comm_id): + super(SwitchGraphDataRegion, self).__init__(key) + + self.title = str(pid) + " / " + str(tid) + " " + comm + # Order graph legend within exec comm by pid / tid / time + self.ordinal = str(pid).rjust(16) + str(exec_comm_id).rjust(8) + str(tid).rjust(16) + self.exec_comm_id = exec_comm_id + self.pid = pid + self.tid = tid + self.comm = comm + self.thread_id = thread_id + self.comm_id = comm_id + +# Graph data point + +class GraphDataPoint(): + + def __init__(self, data, index, x, y, altx=None, alty=None, hregion=None, vregion=None): + self.data = data + self.index = index + self.x = x + self.y = y + self.altx = altx + self.alty = alty + self.hregion = hregion + self.vregion = vregion + +# Graph data (single graph) base class + +class GraphData(object): + + def __init__(self, collection, xbase=Decimal(0), ybase=Decimal(0)): + self.collection = collection + self.points = [] + self.xbase = xbase + self.ybase = ybase + self.title = "" + + def AddPoint(self, x, y, altx=None, alty=None, hregion=None, vregion=None): + index = len(self.points) + + x = float(Decimal(x) - self.xbase) + y = float(Decimal(y) - self.ybase) + + self.points.append(GraphDataPoint(self, index, x, y, altx, alty, hregion, vregion)) + + def XToData(self, x): + return Decimal(x) + self.xbase + + def YToData(self, y): + return Decimal(y) + self.ybase + +# Switch graph data (for one CPU) + +class SwitchGraphData(GraphData): + + def __init__(self, db, collection, cpu, xbase): + super(SwitchGraphData, self).__init__(collection, xbase) + + self.cpu = cpu + self.title = "CPU " + str(cpu) + self.SelectSwitches(db) + + def SelectComms(self, db, thread_id, last_comm_id, start_time, end_time): + query = QSqlQuery(db) + QueryExec(query, "SELECT id, c_time" + " FROM comms" + " WHERE c_thread_id = " + str(thread_id) + + " AND exec_flag = TRUE" + " AND c_time >= " + str(start_time) + + " AND c_time <= " + str(end_time) + + " ORDER BY c_time, id") + while query.next(): + comm_id = query.value(0) + if comm_id == last_comm_id: + continue + time = query.value(1) + hregion = self.HRegion(db, thread_id, comm_id, time) + self.AddPoint(time, 1000, None, None, hregion) + + def SelectSwitches(self, db): + last_time = None + last_comm_id = None + last_thread_id = None + query = QSqlQuery(db) + QueryExec(query, "SELECT time, thread_out_id, thread_in_id, comm_out_id, comm_in_id, flags" + " FROM context_switches" + " WHERE machine_id = " + str(self.collection.machine_id) + + " AND cpu = " + str(self.cpu) + + " ORDER BY time, id") + while query.next(): + flags = int(query.value(5)) + if flags & 1: + # Schedule-out: detect and add exec's + if last_thread_id == query.value(1) and last_comm_id is not None and last_comm_id != query.value(3): + self.SelectComms(db, last_thread_id, last_comm_id, last_time, query.value(0)) + continue + # Schedule-in: add data point + if len(self.points) == 0: + start_time = self.collection.glb.StartTime(self.collection.machine_id) + hregion = self.HRegion(db, query.value(1), query.value(3), start_time) + self.AddPoint(start_time, 1000, None, None, hregion) + time = query.value(0) + comm_id = query.value(4) + thread_id = query.value(2) + hregion = self.HRegion(db, thread_id, comm_id, time) + self.AddPoint(time, 1000, None, None, hregion) + last_time = time + last_comm_id = comm_id + last_thread_id = thread_id + + def NewHRegion(self, db, key, thread_id, comm_id, time): + exec_comm_id = ExecComm(db, thread_id, time) + query = QSqlQuery(db) + QueryExec(query, "SELECT pid, tid FROM threads WHERE id = " + str(thread_id)) + if query.next(): + pid = query.value(0) + tid = query.value(1) + else: + pid = -1 + tid = -1 + query = QSqlQuery(db) + QueryExec(query, "SELECT comm FROM comms WHERE id = " + str(comm_id)) + if query.next(): + comm = query.value(0) + else: + comm = "" + return SwitchGraphDataRegion(key, exec_comm_id, pid, tid, comm, thread_id, comm_id) + + def HRegion(self, db, thread_id, comm_id, time): + key = str(thread_id) + ":" + str(comm_id) + hregion = self.collection.LookupHRegion(key) + if hregion is None: + hregion = self.NewHRegion(db, key, thread_id, comm_id, time) + self.collection.AddHRegion(key, hregion) + return hregion + +# Graph data collection (multiple related graphs) base class + +class GraphDataCollection(object): + + def __init__(self, glb): + self.glb = glb + self.data = [] + self.hregions = {} + self.xrangelo = None + self.xrangehi = None + self.yrangelo = None + self.yrangehi = None + self.dp = XY(0, 0) + + def AddGraphData(self, data): + self.data.append(data) + + def LookupHRegion(self, key): + if key in self.hregions: + return self.hregions[key] + return None + + def AddHRegion(self, key, hregion): + self.hregions[key] = hregion + +# Switch graph data collection (SwitchGraphData for each CPU) + +class SwitchGraphDataCollection(GraphDataCollection): + + def __init__(self, glb, db, machine_id): + super(SwitchGraphDataCollection, self).__init__(glb) + + self.machine_id = machine_id + self.cpus = self.SelectCPUs(db) + + self.xrangelo = glb.StartTime(machine_id) + self.xrangehi = glb.FinishTime(machine_id) + + self.yrangelo = Decimal(0) + self.yrangehi = Decimal(1000) + + for cpu in self.cpus: + self.AddGraphData(SwitchGraphData(db, self, cpu, self.xrangelo)) + + def SelectCPUs(self, db): + cpus = [] + query = QSqlQuery(db) + QueryExec(query, "SELECT DISTINCT cpu" + " FROM context_switches" + " WHERE machine_id = " + str(self.machine_id)) + while query.next(): + cpus.append(int(query.value(0))) + return sorted(cpus) + +# Switch graph data graphics item displays the graphed data + +class SwitchGraphDataGraphicsItem(QGraphicsItem): + + def __init__(self, data, graph_width, graph_height, attrs, event_handler, parent=None): + super(SwitchGraphDataGraphicsItem, self).__init__(parent) + + self.data = data + self.graph_width = graph_width + self.graph_height = graph_height + self.attrs = attrs + self.event_handler = event_handler + self.setAcceptHoverEvents(True) + + def boundingRect(self): + return QRectF(0, 0, self.graph_width, self.graph_height) + + def PaintPoint(self, painter, last, x): + if not(last is None or last.hregion.pid == 0 or x < self.attrs.subrange.x.lo): + if last.x < self.attrs.subrange.x.lo: + x0 = self.attrs.subrange.x.lo + else: + x0 = last.x + if x > self.attrs.subrange.x.hi: + x1 = self.attrs.subrange.x.hi + else: + x1 = x - 1 + x0 = self.attrs.XToPixel(x0) + x1 = self.attrs.XToPixel(x1) + + y0 = self.attrs.YToPixel(last.y) + + colour = self.attrs.region_attributes[last.hregion.key].colour + + width = x1 - x0 + 1 + if width < 2: + painter.setPen(colour) + painter.drawLine(x0, self.graph_height - y0, x0, self.graph_height) + else: + painter.fillRect(x0, self.graph_height - y0, width, self.graph_height - 1, colour) + + def paint(self, painter, option, widget): + last = None + for point in self.data.points: + self.PaintPoint(painter, last, point.x) + if point.x > self.attrs.subrange.x.hi: + break; + last = point + self.PaintPoint(painter, last, self.attrs.subrange.x.hi + 1) + + def BinarySearchPoint(self, target): + lower_pos = 0 + higher_pos = len(self.data.points) + while True: + pos = int((lower_pos + higher_pos) / 2) + val = self.data.points[pos].x + if target >= val: + lower_pos = pos + else: + higher_pos = pos + if higher_pos <= lower_pos + 1: + return lower_pos + + def XPixelToData(self, x): + x = self.attrs.PixelToX(x) + if x < self.data.points[0].x: + x = 0 + pos = 0 + low = True + else: + pos = self.BinarySearchPoint(x) + low = False + return (low, pos, self.data.XToData(x)) + + def EventToData(self, event): + no_data = (None,) * 4 + if len(self.data.points) < 1: + return no_data + x = event.pos().x() + if x < 0: + return no_data + low0, pos0, time_from = self.XPixelToData(x) + low1, pos1, time_to = self.XPixelToData(x + 1) + hregions = set() + hregion_times = [] + if not low1: + for i in xrange(pos0, pos1 + 1): + hregion = self.data.points[i].hregion + hregions.add(hregion) + if i == pos0: + time = time_from + else: + time = self.data.XToData(self.data.points[i].x) + hregion_times.append((hregion, time)) + return (time_from, time_to, hregions, hregion_times) + + def hoverMoveEvent(self, event): + time_from, time_to, hregions, hregion_times = self.EventToData(event) + if time_from is not None: + self.event_handler.PointEvent(self.data.cpu, time_from, time_to, hregions) + + def hoverLeaveEvent(self, event): + self.event_handler.NoPointEvent() + + def mousePressEvent(self, event): + if event.button() != Qt.RightButton: + super(SwitchGraphDataGraphicsItem, self).mousePressEvent(event) + return + time_from, time_to, hregions, hregion_times = self.EventToData(event) + if hregion_times: + self.event_handler.RightClickEvent(self.data.cpu, hregion_times, event.screenPos()) + +# X-axis graphics item + +class XAxisGraphicsItem(QGraphicsItem): + + def __init__(self, width, parent=None): + super(XAxisGraphicsItem, self).__init__(parent) + + self.width = width + self.max_mark_sz = 4 + self.height = self.max_mark_sz + 1 + + def boundingRect(self): + return QRectF(0, 0, self.width, self.height) + + def Step(self): + attrs = self.parentItem().attrs + subrange = attrs.subrange.x + t = subrange.hi - subrange.lo + s = (3.0 * t) / self.width + n = 1.0 + while s > n: + n = n * 10.0 + return n + + def PaintMarks(self, painter, at_y, lo, hi, step, i): + attrs = self.parentItem().attrs + x = lo + while x <= hi: + xp = attrs.XToPixel(x) + if i % 10: + if i % 5: + sz = 1 + else: + sz = 2 + else: + sz = self.max_mark_sz + i = 0 + painter.drawLine(xp, at_y, xp, at_y + sz) + x += step + i += 1 + + def paint(self, painter, option, widget): + # Using QPainter::drawLine(int x1, int y1, int x2, int y2) so x2 = width -1 + painter.drawLine(0, 0, self.width - 1, 0) + n = self.Step() + attrs = self.parentItem().attrs + subrange = attrs.subrange.x + if subrange.lo: + x_offset = n - (subrange.lo % n) + else: + x_offset = 0.0 + x = subrange.lo + x_offset + i = (x / n) % 10 + self.PaintMarks(painter, 0, x, subrange.hi, n, i) + + def ScaleDimensions(self): + n = self.Step() + attrs = self.parentItem().attrs + lo = attrs.subrange.x.lo + hi = (n * 10.0) + lo + width = attrs.XToPixel(hi) + if width > 500: + width = 0 + return (n, lo, hi, width) + + def PaintScale(self, painter, at_x, at_y): + n, lo, hi, width = self.ScaleDimensions() + if not width: + return + painter.drawLine(at_x, at_y, at_x + width, at_y) + self.PaintMarks(painter, at_y, lo, hi, n, 0) + + def ScaleWidth(self): + n, lo, hi, width = self.ScaleDimensions() + return width + + def ScaleHeight(self): + return self.height + + def ScaleUnit(self): + return self.Step() * 10 + +# Scale graphics item base class + +class ScaleGraphicsItem(QGraphicsItem): + + def __init__(self, axis, parent=None): + super(ScaleGraphicsItem, self).__init__(parent) + self.axis = axis + + def boundingRect(self): + scale_width = self.axis.ScaleWidth() + if not scale_width: + return QRectF() + return QRectF(0, 0, self.axis.ScaleWidth() + 100, self.axis.ScaleHeight()) + + def paint(self, painter, option, widget): + scale_width = self.axis.ScaleWidth() + if not scale_width: + return + self.axis.PaintScale(painter, 0, 5) + x = scale_width + 4 + painter.drawText(QPointF(x, 10), self.Text()) + + def Unit(self): + return self.axis.ScaleUnit() + + def Text(self): + return "" + +# Switch graph scale graphics item + +class SwitchScaleGraphicsItem(ScaleGraphicsItem): + + def __init__(self, axis, parent=None): + super(SwitchScaleGraphicsItem, self).__init__(axis, parent) + + def Text(self): + unit = self.Unit() + if unit >= 1000000000: + unit = int(unit / 1000000000) + us = "s" + elif unit >= 1000000: + unit = int(unit / 1000000) + us = "ms" + elif unit >= 1000: + unit = int(unit / 1000) + us = "us" + else: + unit = int(unit) + us = "ns" + return " = " + str(unit) + " " + us + +# Switch graph graphics item contains graph title, scale, x/y-axis, and the graphed data + +class SwitchGraphGraphicsItem(QGraphicsItem): + + def __init__(self, collection, data, attrs, event_handler, first, parent=None): + super(SwitchGraphGraphicsItem, self).__init__(parent) + self.collection = collection + self.data = data + self.attrs = attrs + self.event_handler = event_handler + + margin = 20 + title_width = 50 + + self.title_graphics = QGraphicsSimpleTextItem(data.title, self) + + self.title_graphics.setPos(margin, margin) + graph_width = attrs.XToPixel(attrs.subrange.x.hi) + 1 + graph_height = attrs.YToPixel(attrs.subrange.y.hi) + 1 + + self.graph_origin_x = margin + title_width + margin + self.graph_origin_y = graph_height + margin + + x_axis_size = 1 + y_axis_size = 1 + self.yline = QGraphicsLineItem(0, 0, 0, graph_height, self) + + self.x_axis = XAxisGraphicsItem(graph_width, self) + self.x_axis.setPos(self.graph_origin_x, self.graph_origin_y + 1) + + if first: + self.scale_item = SwitchScaleGraphicsItem(self.x_axis, self) + self.scale_item.setPos(self.graph_origin_x, self.graph_origin_y + 10) + + self.yline.setPos(self.graph_origin_x - y_axis_size, self.graph_origin_y - graph_height) + + self.axis_point = QGraphicsLineItem(0, 0, 0, 0, self) + self.axis_point.setPos(self.graph_origin_x - 1, self.graph_origin_y +1) + + self.width = self.graph_origin_x + graph_width + margin + self.height = self.graph_origin_y + margin + + self.graph = SwitchGraphDataGraphicsItem(data, graph_width, graph_height, attrs, event_handler, self) + self.graph.setPos(self.graph_origin_x, self.graph_origin_y - graph_height) + + if parent and 'EnableRubberBand' in dir(parent): + parent.EnableRubberBand(self.graph_origin_x, self.graph_origin_x + graph_width - 1, self) + + def boundingRect(self): + return QRectF(0, 0, self.width, self.height) + + def paint(self, painter, option, widget): + pass + + def RBXToPixel(self, x): + return self.attrs.PixelToX(x - self.graph_origin_x) + + def RBXRangeToPixel(self, x0, x1): + return (self.RBXToPixel(x0), self.RBXToPixel(x1 + 1)) + + def RBPixelToTime(self, x): + if x < self.data.points[0].x: + return self.data.XToData(0) + return self.data.XToData(x) + + def RBEventTimes(self, x0, x1): + x0, x1 = self.RBXRangeToPixel(x0, x1) + time_from = self.RBPixelToTime(x0) + time_to = self.RBPixelToTime(x1) + return (time_from, time_to) + + def RBEvent(self, x0, x1): + time_from, time_to = self.RBEventTimes(x0, x1) + self.event_handler.RangeEvent(time_from, time_to) + + def RBMoveEvent(self, x0, x1): + if x1 < x0: + x0, x1 = x1, x0 + self.RBEvent(x0, x1) + + def RBReleaseEvent(self, x0, x1, selection_state): + if x1 < x0: + x0, x1 = x1, x0 + x0, x1 = self.RBXRangeToPixel(x0, x1) + self.event_handler.SelectEvent(x0, x1, selection_state) + +# Graphics item to draw a vertical bracket (used to highlight "forward" sub-range) + +class VerticalBracketGraphicsItem(QGraphicsItem): + + def __init__(self, parent=None): + super(VerticalBracketGraphicsItem, self).__init__(parent) + + self.width = 0 + self.height = 0 + self.hide() + + def SetSize(self, width, height): + self.width = width + 1 + self.height = height + 1 + + def boundingRect(self): + return QRectF(0, 0, self.width, self.height) + + def paint(self, painter, option, widget): + colour = QColor(255, 255, 0, 32) + painter.fillRect(0, 0, self.width, self.height, colour) + x1 = self.width - 1 + y1 = self.height - 1 + painter.drawLine(0, 0, x1, 0) + painter.drawLine(0, 0, 0, 3) + painter.drawLine(x1, 0, x1, 3) + painter.drawLine(0, y1, x1, y1) + painter.drawLine(0, y1, 0, y1 - 3) + painter.drawLine(x1, y1, x1, y1 - 3) + +# Graphics item to contain graphs arranged vertically + +class VertcalGraphSetGraphicsItem(QGraphicsItem): + + def __init__(self, collection, attrs, event_handler, child_class, parent=None): + super(VertcalGraphSetGraphicsItem, self).__init__(parent) + + self.collection = collection + + self.top = 10 + + self.width = 0 + self.height = self.top + + self.rubber_band = None + self.rb_enabled = False + + first = True + for data in collection.data: + child = child_class(collection, data, attrs, event_handler, first, self) + child.setPos(0, self.height + 1) + rect = child.boundingRect() + if rect.right() > self.width: + self.width = rect.right() + self.height = self.height + rect.bottom() + 1 + first = False + + self.bracket = VerticalBracketGraphicsItem(self) + + def EnableRubberBand(self, xlo, xhi, rb_event_handler): + if self.rb_enabled: + return + self.rb_enabled = True + self.rb_in_view = False + self.setAcceptedMouseButtons(Qt.LeftButton) + self.rb_xlo = xlo + self.rb_xhi = xhi + self.rb_event_handler = rb_event_handler + self.mousePressEvent = self.MousePressEvent + self.mouseMoveEvent = self.MouseMoveEvent + self.mouseReleaseEvent = self.MouseReleaseEvent + + def boundingRect(self): + return QRectF(0, 0, self.width, self.height) + + def paint(self, painter, option, widget): + pass + + def RubberBandParent(self): + scene = self.scene() + view = scene.views()[0] + viewport = view.viewport() + return viewport + + def RubberBandSetGeometry(self, rect): + scene_rectf = self.mapRectToScene(QRectF(rect)) + scene = self.scene() + view = scene.views()[0] + poly = view.mapFromScene(scene_rectf) + self.rubber_band.setGeometry(poly.boundingRect()) + + def SetSelection(self, selection_state): + if self.rubber_band: + if selection_state: + self.RubberBandSetGeometry(selection_state) + self.rubber_band.show() + else: + self.rubber_band.hide() + + def SetBracket(self, rect): + if rect: + x, y, width, height = rect.x(), rect.y(), rect.width(), rect.height() + self.bracket.setPos(x, y) + self.bracket.SetSize(width, height) + self.bracket.show() + else: + self.bracket.hide() + + def RubberBandX(self, event): + x = event.pos().toPoint().x() + if x < self.rb_xlo: + x = self.rb_xlo + elif x > self.rb_xhi: + x = self.rb_xhi + else: + self.rb_in_view = True + return x + + def RubberBandRect(self, x): + if self.rb_origin.x() <= x: + width = x - self.rb_origin.x() + rect = QRect(self.rb_origin, QSize(width, self.height)) + else: + width = self.rb_origin.x() - x + top_left = QPoint(self.rb_origin.x() - width, self.rb_origin.y()) + rect = QRect(top_left, QSize(width, self.height)) + return rect + + def MousePressEvent(self, event): + self.rb_in_view = False + x = self.RubberBandX(event) + self.rb_origin = QPoint(x, self.top) + if self.rubber_band is None: + self.rubber_band = QRubberBand(QRubberBand.Rectangle, self.RubberBandParent()) + self.RubberBandSetGeometry(QRect(self.rb_origin, QSize(0, self.height))) + if self.rb_in_view: + self.rubber_band.show() + self.rb_event_handler.RBMoveEvent(x, x) + else: + self.rubber_band.hide() + + def MouseMoveEvent(self, event): + x = self.RubberBandX(event) + rect = self.RubberBandRect(x) + self.RubberBandSetGeometry(rect) + if self.rb_in_view: + self.rubber_band.show() + self.rb_event_handler.RBMoveEvent(self.rb_origin.x(), x) + + def MouseReleaseEvent(self, event): + x = self.RubberBandX(event) + if self.rb_in_view: + selection_state = self.RubberBandRect(x) + else: + selection_state = None + self.rb_event_handler.RBReleaseEvent(self.rb_origin.x(), x, selection_state) + +# Switch graph legend data model + +class SwitchGraphLegendModel(QAbstractTableModel): + + def __init__(self, collection, region_attributes, parent=None): + super(SwitchGraphLegendModel, self).__init__(parent) + + self.region_attributes = region_attributes + + self.child_items = sorted(collection.hregions.values(), key=GraphDataRegionOrdinal) + self.child_count = len(self.child_items) + + self.highlight_set = set() + + self.column_headers = ("pid", "tid", "comm") + + def rowCount(self, parent): + return self.child_count + + def headerData(self, section, orientation, role): + if role != Qt.DisplayRole: + return None + if orientation != Qt.Horizontal: + return None + return self.columnHeader(section) + + def index(self, row, column, parent): + return self.createIndex(row, column, self.child_items[row]) + + def columnCount(self, parent=None): + return len(self.column_headers) + + def columnHeader(self, column): + return self.column_headers[column] + + def data(self, index, role): + if role == Qt.BackgroundRole: + child = self.child_items[index.row()] + if child in self.highlight_set: + return self.region_attributes[child.key].colour + return None + if role == Qt.ForegroundRole: + child = self.child_items[index.row()] + if child in self.highlight_set: + return QColor(255, 255, 255) + return self.region_attributes[child.key].colour + if role != Qt.DisplayRole: + return None + hregion = self.child_items[index.row()] + col = index.column() + if col == 0: + return hregion.pid + if col == 1: + return hregion.tid + if col == 2: + return hregion.comm + return None + + def SetHighlight(self, row, set_highlight): + child = self.child_items[row] + top_left = self.createIndex(row, 0, child) + bottom_right = self.createIndex(row, len(self.column_headers) - 1, child) + self.dataChanged.emit(top_left, bottom_right) + + def Highlight(self, highlight_set): + for row in xrange(self.child_count): + child = self.child_items[row] + if child in self.highlight_set: + if child not in highlight_set: + self.SetHighlight(row, False) + elif child in highlight_set: + self.SetHighlight(row, True) + self.highlight_set = highlight_set + +# Switch graph legend is a table + +class SwitchGraphLegend(QWidget): + + def __init__(self, collection, region_attributes, parent=None): + super(SwitchGraphLegend, self).__init__(parent) + + self.data_model = SwitchGraphLegendModel(collection, region_attributes) + + self.model = QSortFilterProxyModel() + self.model.setSourceModel(self.data_model) + + self.view = QTableView() + self.view.setModel(self.model) + self.view.setEditTriggers(QAbstractItemView.NoEditTriggers) + self.view.verticalHeader().setVisible(False) + self.view.sortByColumn(-1, Qt.AscendingOrder) + self.view.setSortingEnabled(True) + self.view.resizeColumnsToContents() + self.view.resizeRowsToContents() + + self.vbox = VBoxLayout(self.view) + self.setLayout(self.vbox) + + sz1 = self.view.columnWidth(0) + self.view.columnWidth(1) + self.view.columnWidth(2) + 2 + sz1 = sz1 + self.view.verticalScrollBar().sizeHint().width() + self.saved_size = sz1 + + def resizeEvent(self, event): + self.saved_size = self.size().width() + super(SwitchGraphLegend, self).resizeEvent(event) + + def Highlight(self, highlight_set): + self.data_model.Highlight(highlight_set) + self.update() + + def changeEvent(self, event): + if event.type() == QEvent.FontChange: + self.view.resizeRowsToContents() + self.view.resizeColumnsToContents() + # Need to resize rows again after column resize + self.view.resizeRowsToContents() + super(SwitchGraphLegend, self).changeEvent(event) + +# Random colour generation + +def RGBColourTooLight(r, g, b): + if g > 230: + return True + if g <= 160: + return False + if r <= 180 and g <= 180: + return False + if r < 60: + return False + return True + +def GenerateColours(x): + cs = [0] + for i in xrange(1, x): + cs.append(int((255.0 / i) + 0.5)) + colours = [] + for r in cs: + for g in cs: + for b in cs: + # Exclude black and colours that look too light against a white background + if (r, g, b) == (0, 0, 0) or RGBColourTooLight(r, g, b): + continue + colours.append(QColor(r, g, b)) + return colours + +def GenerateNColours(n): + for x in xrange(2, n + 2): + colours = GenerateColours(x) + if len(colours) >= n: + return colours + return [] + +def GenerateNRandomColours(n, seed): + colours = GenerateNColours(n) + random.seed(seed) + random.shuffle(colours) + return colours + +# Graph attributes, in particular the scale and subrange that change when zooming + +class GraphAttributes(): + + def __init__(self, scale, subrange, region_attributes, dp): + self.scale = scale + self.subrange = subrange + self.region_attributes = region_attributes + # Rounding avoids errors due to finite floating point precision + self.dp = dp # data decimal places + self.Update() + + def XToPixel(self, x): + return int(round((x - self.subrange.x.lo) * self.scale.x, self.pdp.x)) + + def YToPixel(self, y): + return int(round((y - self.subrange.y.lo) * self.scale.y, self.pdp.y)) + + def PixelToXRounded(self, px): + return round((round(px, 0) / self.scale.x), self.dp.x) + self.subrange.x.lo + + def PixelToYRounded(self, py): + return round((round(py, 0) / self.scale.y), self.dp.y) + self.subrange.y.lo + + def PixelToX(self, px): + x = self.PixelToXRounded(px) + if self.pdp.x == 0: + rt = self.XToPixel(x) + if rt > px: + return x - 1 + return x + + def PixelToY(self, py): + y = self.PixelToYRounded(py) + if self.pdp.y == 0: + rt = self.YToPixel(y) + if rt > py: + return y - 1 + return y + + def ToPDP(self, dp, scale): + # Calculate pixel decimal places: + # (10 ** dp) is the minimum delta in the data + # scale it to get the minimum delta in pixels + # log10 gives the number of decimals places negatively + # subtrace 1 to divide by 10 + # round to the lower negative number + # change the sign to get the number of decimals positively + x = math.log10((10 ** dp) * scale) + if x < 0: + x -= 1 + x = -int(math.floor(x) - 0.1) + else: + x = 0 + return x + + def Update(self): + x = self.ToPDP(self.dp.x, self.scale.x) + y = self.ToPDP(self.dp.y, self.scale.y) + self.pdp = XY(x, y) # pixel decimal places + +# Switch graph splitter which divides the CPU graphs from the legend + +class SwitchGraphSplitter(QSplitter): + + def __init__(self, parent=None): + super(SwitchGraphSplitter, self).__init__(parent) + + self.first_time = False + + def resizeEvent(self, ev): + if self.first_time: + self.first_time = False + sz1 = self.widget(1).view.columnWidth(0) + self.widget(1).view.columnWidth(1) + self.widget(1).view.columnWidth(2) + 2 + sz1 = sz1 + self.widget(1).view.verticalScrollBar().sizeHint().width() + sz0 = self.size().width() - self.handleWidth() - sz1 + self.setSizes([sz0, sz1]) + elif not(self.widget(1).saved_size is None): + sz1 = self.widget(1).saved_size + sz0 = self.size().width() - self.handleWidth() - sz1 + self.setSizes([sz0, sz1]) + super(SwitchGraphSplitter, self).resizeEvent(ev) + +# Graph widget base class + +class GraphWidget(QWidget): + + graph_title_changed = Signal(object) + + def __init__(self, parent=None): + super(GraphWidget, self).__init__(parent) + + def GraphTitleChanged(self, title): + self.graph_title_changed.emit(title) + + def Title(self): + return "" + +# Display time in s, ms, us or ns + +def ToTimeStr(val): + val = Decimal(val) + if val >= 1000000000: + return "{} s".format((val / 1000000000).quantize(Decimal("0.000000001"))) + if val >= 1000000: + return "{} ms".format((val / 1000000).quantize(Decimal("0.000001"))) + if val >= 1000: + return "{} us".format((val / 1000).quantize(Decimal("0.001"))) + return "{} ns".format(val.quantize(Decimal("1"))) + +# Switch (i.e. context switch i.e. Time Chart by CPU) graph widget which contains the CPU graphs and the legend and control buttons + +class SwitchGraphWidget(GraphWidget): + + def __init__(self, glb, collection, parent=None): + super(SwitchGraphWidget, self).__init__(parent) + + self.glb = glb + self.collection = collection + + self.back_state = [] + self.forward_state = [] + self.selection_state = (None, None) + self.fwd_rect = None + self.start_time = self.glb.StartTime(collection.machine_id) + + i = 0 + hregions = collection.hregions.values() + colours = GenerateNRandomColours(len(hregions), 1013) + region_attributes = {} + for hregion in hregions: + if hregion.pid == 0 and hregion.tid == 0: + region_attributes[hregion.key] = GraphRegionAttribute(QColor(0, 0, 0)) + else: + region_attributes[hregion.key] = GraphRegionAttribute(colours[i]) + i = i + 1 + + # Default to entire range + xsubrange = Subrange(0.0, float(collection.xrangehi - collection.xrangelo) + 1.0) + ysubrange = Subrange(0.0, float(collection.yrangehi - collection.yrangelo) + 1.0) + subrange = XY(xsubrange, ysubrange) + + scale = self.GetScaleForRange(subrange) + + self.attrs = GraphAttributes(scale, subrange, region_attributes, collection.dp) + + self.item = VertcalGraphSetGraphicsItem(collection, self.attrs, self, SwitchGraphGraphicsItem) + + self.scene = QGraphicsScene() + self.scene.addItem(self.item) + + self.view = QGraphicsView(self.scene) + self.view.centerOn(0, 0) + self.view.setAlignment(Qt.AlignLeft | Qt.AlignTop) + + self.legend = SwitchGraphLegend(collection, region_attributes) + + self.splitter = SwitchGraphSplitter() + self.splitter.addWidget(self.view) + self.splitter.addWidget(self.legend) + + self.point_label = QLabel("") + self.point_label.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Fixed) + + self.back_button = QToolButton() + self.back_button.setIcon(self.style().standardIcon(QStyle.SP_ArrowLeft)) + self.back_button.setDisabled(True) + self.back_button.released.connect(lambda: self.Back()) + + self.forward_button = QToolButton() + self.forward_button.setIcon(self.style().standardIcon(QStyle.SP_ArrowRight)) + self.forward_button.setDisabled(True) + self.forward_button.released.connect(lambda: self.Forward()) + + self.zoom_button = QToolButton() + self.zoom_button.setText("Zoom") + self.zoom_button.setDisabled(True) + self.zoom_button.released.connect(lambda: self.Zoom()) + + self.hbox = HBoxLayout(self.back_button, self.forward_button, self.zoom_button, self.point_label) + + self.vbox = VBoxLayout(self.splitter, self.hbox) + + self.setLayout(self.vbox) + + def GetScaleForRangeX(self, xsubrange): + # Default graph 1000 pixels wide + dflt = 1000.0 + r = xsubrange.hi - xsubrange.lo + return dflt / r + + def GetScaleForRangeY(self, ysubrange): + # Default graph 50 pixels high + dflt = 50.0 + r = ysubrange.hi - ysubrange.lo + return dflt / r + + def GetScaleForRange(self, subrange): + # Default graph 1000 pixels wide, 50 pixels high + xscale = self.GetScaleForRangeX(subrange.x) + yscale = self.GetScaleForRangeY(subrange.y) + return XY(xscale, yscale) + + def PointEvent(self, cpu, time_from, time_to, hregions): + text = "CPU: " + str(cpu) + time_from = time_from.quantize(Decimal(1)) + rel_time_from = time_from - self.glb.StartTime(self.collection.machine_id) + text = text + " Time: " + str(time_from) + " (+" + ToTimeStr(rel_time_from) + ")" + self.point_label.setText(text) + self.legend.Highlight(hregions) + + def RightClickEvent(self, cpu, hregion_times, pos): + if not IsSelectable(self.glb.db, "calls", "WHERE parent_id >= 0"): + return + menu = QMenu(self.view) + for hregion, time in hregion_times: + thread_at_time = (hregion.exec_comm_id, hregion.thread_id, time) + menu_text = "Show Call Tree for {} {}:{} at {}".format(hregion.comm, hregion.pid, hregion.tid, time) + menu.addAction(CreateAction(menu_text, "Show Call Tree", lambda a=None, args=thread_at_time: self.RightClickSelect(args), self.view)) + menu.exec_(pos) + + def RightClickSelect(self, args): + CallTreeWindow(self.glb, self.glb.mainwindow, thread_at_time=args) + + def NoPointEvent(self): + self.point_label.setText("") + self.legend.Highlight({}) + + def RangeEvent(self, time_from, time_to): + time_from = time_from.quantize(Decimal(1)) + time_to = time_to.quantize(Decimal(1)) + if time_to <= time_from: + self.point_label.setText("") + return + rel_time_from = time_from - self.start_time + rel_time_to = time_to - self.start_time + text = " Time: " + str(time_from) + " (+" + ToTimeStr(rel_time_from) + ") to: " + str(time_to) + " (+" + ToTimeStr(rel_time_to) + ")" + text = text + " duration: " + ToTimeStr(time_to - time_from) + self.point_label.setText(text) + + def BackState(self): + return (self.attrs.subrange, self.attrs.scale, self.selection_state, self.fwd_rect) + + def PushBackState(self): + state = copy.deepcopy(self.BackState()) + self.back_state.append(state) + self.back_button.setEnabled(True) + + def PopBackState(self): + self.attrs.subrange, self.attrs.scale, self.selection_state, self.fwd_rect = self.back_state.pop() + self.attrs.Update() + if not self.back_state: + self.back_button.setDisabled(True) + + def PushForwardState(self): + state = copy.deepcopy(self.BackState()) + self.forward_state.append(state) + self.forward_button.setEnabled(True) + + def PopForwardState(self): + self.attrs.subrange, self.attrs.scale, self.selection_state, self.fwd_rect = self.forward_state.pop() + self.attrs.Update() + if not self.forward_state: + self.forward_button.setDisabled(True) + + def Title(self): + time_from = self.collection.xrangelo + Decimal(self.attrs.subrange.x.lo) + time_to = self.collection.xrangelo + Decimal(self.attrs.subrange.x.hi) + rel_time_from = time_from - self.start_time + rel_time_to = time_to - self.start_time + title = "+" + ToTimeStr(rel_time_from) + " to +" + ToTimeStr(rel_time_to) + title = title + " (" + ToTimeStr(time_to - time_from) + ")" + return title + + def Update(self): + selected_subrange, selection_state = self.selection_state + self.item.SetSelection(selection_state) + self.item.SetBracket(self.fwd_rect) + self.zoom_button.setDisabled(selected_subrange is None) + self.GraphTitleChanged(self.Title()) + self.item.update(self.item.boundingRect()) + + def Back(self): + if not self.back_state: + return + self.PushForwardState() + self.PopBackState() + self.Update() + + def Forward(self): + if not self.forward_state: + return + self.PushBackState() + self.PopForwardState() + self.Update() + + def SelectEvent(self, x0, x1, selection_state): + if selection_state is None: + selected_subrange = None + else: + if x1 - x0 < 1.0: + x1 += 1.0 + selected_subrange = Subrange(x0, x1) + self.selection_state = (selected_subrange, selection_state) + self.zoom_button.setDisabled(selected_subrange is None) + + def Zoom(self): + selected_subrange, selection_state = self.selection_state + if selected_subrange is None: + return + self.fwd_rect = selection_state + self.item.SetSelection(None) + self.PushBackState() + self.attrs.subrange.x = selected_subrange + self.forward_state = [] + self.forward_button.setDisabled(True) + self.selection_state = (None, None) + self.fwd_rect = None + self.attrs.scale.x = self.GetScaleForRangeX(self.attrs.subrange.x) + self.attrs.Update() + self.Update() + +# Slow initialization - perform non-GUI initialization in a separate thread and put up a modal message box while waiting + +class SlowInitClass(): + + def __init__(self, glb, title, init_fn): + self.init_fn = init_fn + self.done = False + self.result = None + + self.msg_box = QMessageBox(glb.mainwindow) + self.msg_box.setText("Initializing " + title + ". Please wait.") + self.msg_box.setWindowTitle("Initializing " + title) + self.msg_box.setWindowIcon(glb.mainwindow.style().standardIcon(QStyle.SP_MessageBoxInformation)) + + self.init_thread = Thread(self.ThreadFn, glb) + self.init_thread.done.connect(lambda: self.Done(), Qt.QueuedConnection) + + self.init_thread.start() + + def Done(self): + self.msg_box.done(0) + + def ThreadFn(self, glb): + conn_name = "SlowInitClass" + str(os.getpid()) + db, dbname = glb.dbref.Open(conn_name) + self.result = self.init_fn(db) + self.done = True + return (True, 0) + + def Result(self): + while not self.done: + self.msg_box.exec_() + self.init_thread.wait() + return self.result + +def SlowInit(glb, title, init_fn): + init = SlowInitClass(glb, title, init_fn) + return init.Result() + +# Time chart by CPU window + +class TimeChartByCPUWindow(QMdiSubWindow): + + def __init__(self, glb, parent=None): + super(TimeChartByCPUWindow, self).__init__(parent) + + self.glb = glb + self.machine_id = glb.HostMachineId() + self.collection_name = "SwitchGraphDataCollection " + str(self.machine_id) + + collection = LookupModel(self.collection_name) + if collection is None: + collection = SlowInit(glb, "Time Chart", self.Init) + + self.widget = SwitchGraphWidget(glb, collection, self) + self.view = self.widget + + self.base_title = "Time Chart by CPU" + self.setWindowTitle(self.base_title + self.widget.Title()) + self.widget.graph_title_changed.connect(self.GraphTitleChanged) + + self.setWidget(self.widget) + + AddSubWindow(glb.mainwindow.mdi_area, self, self.windowTitle()) + + def Init(self, db): + return LookupCreateModel(self.collection_name, lambda : SwitchGraphDataCollection(self.glb, db, self.machine_id)) + + def GraphTitleChanged(self, title): + self.setWindowTitle(self.base_title + " : " + title) + # Child data item finder class ChildDataItemFinder(): @@ -3025,7 +4323,9 @@ p.c2 {

1.3 All branches

1.4 Selected branches

1.5 Top calls by elapsed time

-

2. Tables

+

2. Charts

+

2.1 Time chart by CPU

+

3. Tables

1. Reports

1.1 Context-Sensitive Call Graph

The result is a GUI window with a tree representing a context-sensitive @@ -3113,7 +4413,29 @@ N.B. Due to the granularity of timestamps, there could be no branches in any giv The Top calls by elapsed time report displays calls in descending order of time elapsed between when the function was called and when it returned. The data is reduced by various selection criteria. A dialog box displays available criteria which are AND'ed together. If not all data is fetched, a Fetch bar is provided. Ctrl-F displays a Find bar. -

2. Tables

+

2. Charts

+

2.1 Time chart by CPU

+This chart displays context switch information when that data is available. Refer to context_switches_view on the Tables menu. +

Features

+
    +
  1. Mouse over to highight the task and show the time
  2. +
  3. Drag the mouse to select a region and zoom by pushing the Zoom button
  4. +
  5. Go back and forward by pressing the arrow buttons
  6. +
  7. If call information is available, right-click to show a call tree opened to that task and time. +Note, the call tree may take some time to appear, and there may not be call information for the task or time selected. +
  8. +
+

Important

+The graph can be misleading in the following respects: +
    +
  1. The graph shows the first task on each CPU as running from the beginning of the time range. +Because tracing might start on different CPUs at different times, that is not necessarily the case. +Refer to context_switches_view on the Tables menu to understand what data the graph is based upon.
  2. +
  3. Similarly, the last task on each CPU can be showing running longer than it really was. +Again, refer to context_switches_view on the Tables menu to understand what data the graph is based upon.
  4. +
  5. When the mouse is over a task, the highlighted task might not be visible on the legend without scrolling if the legend does not fit fully in the window
  6. +
+

3. Tables

The Tables menu shows all tables and views in the database. Most tables have an associated view which displays the information in a more friendly way. Not all data for large tables is fetched immediately. More records can be fetched using the Fetch bar provided. Columns can be sorted, @@ -3309,6 +4631,10 @@ class MainWindow(QMainWindow): if IsSelectable(glb.db, "calls"): reports_menu.addAction(CreateAction("&Top calls by elapsed time", "Create a new window displaying top calls by elapsed time", self.NewTopCalls, self)) + if IsSelectable(glb.db, "context_switches"): + charts_menu = menu.addMenu("&Charts") + charts_menu.addAction(CreateAction("&Time chart by CPU", "Create a new window displaying time charts by CPU", self.TimeChartByCPU, self)) + self.TableMenu(GetTableList(glb), menu) self.window_menu = WindowMenu(self.mdi_area, menu) @@ -3369,6 +4695,9 @@ class MainWindow(QMainWindow): label = "Selected branches" if branches_events == 1 else "Selected branches " + "(id=" + dbid + ")" reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewSelectedBranchView(x), self)) + def TimeChartByCPU(self): + TimeChartByCPUWindow(self.glb, self) + def TableMenu(self, tables, menu): table_menu = menu.addMenu("&Tables") for table in tables: -- cgit v1.2.3-59-g8ed1b From 06f84d1989b7e58d56fa2e448664585749d41221 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 1 Oct 2019 13:33:06 +0200 Subject: perf tools: Make usage of test_attr__* optional for perf-sys.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For users of perf-sys.h outside perf, e.g. samples/bpf/bpf_load.c, it's convenient not to depend on test_attr__*. After commit 91854f9a077e ("perf tools: Move everything related to sys_perf_event_open() to perf-sys.h"), all users of perf-sys.h will depend on test_attr__enabled and test_attr__open. This commit enables a user to define HAVE_ATTR_TEST to zero in order to omit the test dependency. Fixes: 91854f9a077e ("perf tools: Move everything related to sys_perf_event_open() to perf-sys.h") Signed-off-by: Björn Töpel Acked-by: Song Liu Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Namhyung Kim Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Link: http://lore.kernel.org/lkml/20191001113307.27796-2-bjorn.topel@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-sys.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h index 63e4349a772a..15e458e150bd 100644 --- a/tools/perf/perf-sys.h +++ b/tools/perf/perf-sys.h @@ -15,7 +15,9 @@ void test_attr__init(void); void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, int fd, int group_fd, unsigned long flags); -#define HAVE_ATTR_TEST +#ifndef HAVE_ATTR_TEST +#define HAVE_ATTR_TEST 1 +#endif static inline int sys_perf_event_open(struct perf_event_attr *attr, @@ -27,7 +29,7 @@ sys_perf_event_open(struct perf_event_attr *attr, fd = syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); -#ifdef HAVE_ATTR_TEST +#if HAVE_ATTR_TEST if (unlikely(test_attr__enabled)) test_attr__open(attr, pid, cpu, fd, group_fd, flags); #endif -- cgit v1.2.3-59-g8ed1b From fce9501aec6bdda45ef3a5e365a5e0de7de7fe2d Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 1 Oct 2019 13:33:07 +0200 Subject: samples/bpf: fix build by setting HAVE_ATTR_TEST to zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To remove that test_attr__{enabled/open} are used by perf-sys.h, we set HAVE_ATTR_TEST to zero. Signed-off-by: Björn Töpel Tested-by: KP Singh Acked-by: Song Liu Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Namhyung Kim Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Link: http://lore.kernel.org/lkml/20191001113307.27796-3-bjorn.topel@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- samples/bpf/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 1d9be26b4edd..42b571cde177 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -176,6 +176,7 @@ KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/bpf/ KBUILD_HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include KBUILD_HOSTCFLAGS += -I$(srctree)/tools/perf +KBUILD_HOSTCFLAGS += -DHAVE_ATTR_TEST=0 HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable -- cgit v1.2.3-59-g8ed1b From 3714437d3fcc7956cabcb0077f2a506b61160a56 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 Oct 2019 09:46:42 -0700 Subject: perf script: Allow --time with --reltime The original --reltime patch forbid --time with --reltime. But it turns out --time doesn't really care about --reltime, because the relative time is only used at final output, while the time filtering always works earlier on absolute time. So just remove the check and allow combining the two options. Fixes: 90b10f47c0ee ("perf script: Support relative time") Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lore.kernel.org/lkml/20191002164642.1719-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 67be8d31afab..1c797a948ada 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3605,11 +3605,6 @@ int cmd_script(int argc, const char **argv) } } - if (script.time_str && reltime) { - fprintf(stderr, "Don't combine --reltime with --time\n"); - return -1; - } - if (itrace_synth_opts.callchain && itrace_synth_opts.callchain_sz > scripting_max_stack) scripting_max_stack = itrace_synth_opts.callchain_sz; -- cgit v1.2.3-59-g8ed1b From 8d1d4ff5e239d9ef385444bc0d855127d7b32754 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 3 Oct 2019 15:57:42 -0300 Subject: perf trace: Factor out the initialization of syscal_arg_fmt->scnprintf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We set the default scnprint routines for the syscall args based on its type or on heuristics based on its names, now we'll use this for tracepoints as well, so move it out of syscall__set_arg_fmts() and into a routine that receive just an array of syscall_arg_fmt entries + the tracepoint format fields list. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-xs3x0zzyes06c7scdsjn01ty@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 6c7025370ec0..d52dd2bad980 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1467,15 +1467,16 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) return 0; } -static int syscall__set_arg_fmts(struct syscall *sc) +static struct tep_format_field * +syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field) { - struct tep_format_field *field, *last_field = NULL; - int idx = 0, len; + struct tep_format_field *last_field = NULL; + int len; - for (field = sc->args; field; field = field->next, ++idx) { + for (; field; field = field->next, ++arg) { last_field = field; - if (sc->fmt && sc->fmt->arg[idx].scnprintf) + if (arg->scnprintf) continue; len = strlen(field->name); @@ -1483,13 +1484,13 @@ static int syscall__set_arg_fmts(struct syscall *sc) if (strcmp(field->type, "const char *") == 0 && ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) || strstr(field->name, "path") != NULL)) - sc->arg_fmt[idx].scnprintf = SCA_FILENAME; + arg->scnprintf = SCA_FILENAME; else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr")) - sc->arg_fmt[idx].scnprintf = SCA_PTR; + arg->scnprintf = SCA_PTR; else if (strcmp(field->type, "pid_t") == 0) - sc->arg_fmt[idx].scnprintf = SCA_PID; + arg->scnprintf = SCA_PID; else if (strcmp(field->type, "umode_t") == 0) - sc->arg_fmt[idx].scnprintf = SCA_MODE_T; + arg->scnprintf = SCA_MODE_T; else if ((strcmp(field->type, "int") == 0 || strcmp(field->type, "unsigned int") == 0 || strcmp(field->type, "long") == 0) && @@ -1501,10 +1502,17 @@ static int syscall__set_arg_fmts(struct syscall *sc) * 23 unsigned int * 7 unsigned long */ - sc->arg_fmt[idx].scnprintf = SCA_FD; + arg->scnprintf = SCA_FD; } } + return last_field; +} + +static int syscall__set_arg_fmts(struct syscall *sc) +{ + struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args); + if (last_field) sc->args_size = last_field->offset + last_field->size; -- cgit v1.2.3-59-g8ed1b From 947b843cf52a53f6b35aa1406e11884291f41597 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 3 Oct 2019 16:18:22 -0300 Subject: perf trace: Allocate an array of beautifiers for tracepoint args MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will work similar to the syscall args, we'll allocate an array of 'struct syscall_arg_fmt' for the tracepoint args and then init them using the same algorithm used for the defaults for syscall args, i.e. using its types and sometimes names as hints to find the right scnprintf routine to beautify them from numbers into strings. Next step is to stop using libtracevent to printf tracepoints, as we'll have more beautifiers than int provides, modulo perhaps some plugins. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-dcl135relxvf6ljisjg13aqg@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d52dd2bad980..aa70602c2808 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1574,6 +1574,19 @@ static int trace__read_syscall_info(struct trace *trace, int id) return syscall__set_arg_fmts(sc); } +static int perf_evsel__init_tp_arg_scnprintf(struct evsel *evsel) +{ + int nr_args = evsel->tp_format->format.nr_fields; + + evsel->priv = calloc(nr_args, sizeof(struct syscall_arg_fmt)); + if (evsel->priv != NULL) { + syscall_arg_fmt__init_array(evsel->priv, evsel->tp_format->format.fields); + return 0; + } + + return -ENOMEM; +} + static int intcmp(const void *a, const void *b) { const int *one = a, *another = b; @@ -3936,8 +3949,10 @@ static int evlist__set_syscall_tp_fields(struct evlist *evlist) if (evsel->priv || !evsel->tp_format) continue; - if (strcmp(evsel->tp_format->system, "syscalls")) + if (strcmp(evsel->tp_format->system, "syscalls")) { + perf_evsel__init_tp_arg_scnprintf(evsel); continue; + } if (perf_evsel__init_syscall_tp(evsel)) return -1; -- cgit v1.2.3-59-g8ed1b From 3e0c9b2cfaed25599a0a5cbd40e37871bdb10523 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Oct 2019 11:30:41 -0300 Subject: perf trace: Move some scnprintf methods from syscall to syscall_arg_fmt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since all they operate on is on a syscall_arg_fmt instance, so move them to allow use it from the upcoming tracepoint fprintf routine. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-ynttrs1l75f0x9tk67spd7jd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index aa70602c2808..82d39ef43d9c 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1715,22 +1715,22 @@ static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size, * as mount 'flags' argument that needs ignoring some magic flag, see comment * in tools/perf/trace/beauty/mount_flags.c */ -static unsigned long syscall__mask_val(struct syscall *sc, struct syscall_arg *arg, unsigned long val) +static unsigned long syscall_arg_fmt__mask_val(struct syscall_arg_fmt *fmt, struct syscall_arg *arg, unsigned long val) { - if (sc->arg_fmt && sc->arg_fmt[arg->idx].mask_val) - return sc->arg_fmt[arg->idx].mask_val(arg, val); + if (fmt && fmt->mask_val) + return fmt->mask_val(arg, val); return val; } -static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size, - struct syscall_arg *arg, unsigned long val) +static size_t syscall_arg_fmt__scnprintf_val(struct syscall_arg_fmt *fmt, char *bf, size_t size, + struct syscall_arg *arg, unsigned long val) { - if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) { + if (fmt && fmt->scnprintf) { arg->val = val; - if (sc->arg_fmt[arg->idx].parm) - arg->parm = sc->arg_fmt[arg->idx].parm; - return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg); + if (fmt->parm) + arg->parm = fmt->parm; + return fmt->scnprintf(bf, size, arg); } return scnprintf(bf, size, "%ld", val); } @@ -1776,7 +1776,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, * Some syscall args need some mask, most don't and * return val untouched. */ - val = syscall__mask_val(sc, &arg, val); + val = syscall_arg_fmt__mask_val(&sc->arg_fmt[arg.idx], &arg, val); /* * Suppress this argument if its value is zero and @@ -1797,7 +1797,8 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, if (trace->show_arg_names) printed += scnprintf(bf + printed, size - printed, "%s: ", field->name); - printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val); + printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx], + bf + printed, size - printed, &arg, val); } } else if (IS_ERR(sc->tp_format)) { /* @@ -1812,7 +1813,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, if (printed) printed += scnprintf(bf + printed, size - printed, ", "); printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg); - printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val); + printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx], bf + printed, size - printed, &arg, val); next_arg: ++arg.idx; bit <<= 1; -- cgit v1.2.3-59-g8ed1b From 888ca854e275fcfbb13206d32bb01c0576fc5546 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Oct 2019 14:52:30 -0300 Subject: perf trace: Add the syscall_arg_fmt pointer to syscall_arg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that the scnprintf beautifiers can access it, as will be the case with the char array one in the following csets, that needs to know the number of elements in an array. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-01qmjqv6cb1nj1qy4khdexce@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 45 ++++++++++++++++++++-------------------- tools/perf/trace/beauty/beauty.h | 3 +++ 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 82d39ef43d9c..f30296c72415 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -86,6 +86,28 @@ # define F_LINUX_SPECIFIC_BASE 1024 #endif +struct syscall_arg_fmt { + size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); + unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val); + void *parm; + const char *name; + bool show_zero; +}; + +struct syscall_fmt { + const char *name; + const char *alias; + struct { + const char *sys_enter, + *sys_exit; + } bpf_prog_name; + struct syscall_arg_fmt arg[6]; + u8 nr_args; + bool errpid; + bool timeout; + bool hexret; +}; + struct trace { struct perf_tool tool; struct syscalltbl *sctbl; @@ -695,28 +717,6 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, #include "trace/beauty/socket_type.c" #include "trace/beauty/waitid_options.c" -struct syscall_arg_fmt { - size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); - unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val); - void *parm; - const char *name; - bool show_zero; -}; - -struct syscall_fmt { - const char *name; - const char *alias; - struct { - const char *sys_enter, - *sys_exit; - } bpf_prog_name; - struct syscall_arg_fmt arg[6]; - u8 nr_args; - bool errpid; - bool timeout; - bool hexret; -}; - static struct syscall_fmt syscall_fmts[] = { { .name = "access", .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, }, @@ -1771,6 +1771,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, if (arg.mask & bit) continue; + arg.fmt = &sc->arg_fmt[arg.idx]; val = syscall_arg__val(&arg, arg.idx); /* * Some syscall args need some mask, most don't and diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 7e06605f7c76..4cc4f6b3d4a1 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -78,6 +78,8 @@ struct augmented_arg { u64 value[]; }; +struct syscall_arg_fmt; + /** * @val: value of syscall argument being formatted * @args: All the args, use syscall_args__val(arg, nth) to access one @@ -94,6 +96,7 @@ struct augmented_arg { struct syscall_arg { unsigned long val; unsigned char *args; + struct syscall_arg_fmt *fmt; struct { struct augmented_arg *args; int size; -- cgit v1.2.3-59-g8ed1b From 9597945d7fb42460e9f2559d1273302ebde85bbf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Oct 2019 14:56:40 -0300 Subject: perf trace: Add array of chars scnprintf beautifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Needed for sched's traceoints prev/next comm, where, unlike with syscalls, we are not dealing with an integer or pointer, but an array straight out from the ring buffer. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-rlll7tmcqe1g4odtaifil5re@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f30296c72415..b3fb208cbd30 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -91,6 +91,7 @@ struct syscall_arg_fmt { unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val); void *parm; const char *name; + u16 nr_entries; // for arrays bool show_zero; }; @@ -522,6 +523,16 @@ size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *ar return scnprintf(bf, size, "%ld", arg->val); } +static size_t syscall_arg__scnprintf_char_array(char *bf, size_t size, struct syscall_arg *arg) +{ + // XXX Hey, maybe for sched:sched_switch prev/next comm fields we can + // fill missing comms using thread__set_comm()... + // here or in a special syscall_arg__scnprintf_pid_sched_tp... + return scnprintf(bf, size, "\"%-.*s\"", arg->fmt->nr_entries, arg->val); +} + +#define SCA_CHAR_ARRAY syscall_arg__scnprintf_char_array + static const char *bpf_cmd[] = { "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM", "MAP_GET_NEXT_KEY", "PROG_LOAD", @@ -1491,7 +1502,10 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field arg->scnprintf = SCA_PID; else if (strcmp(field->type, "umode_t") == 0) arg->scnprintf = SCA_MODE_T; - else if ((strcmp(field->type, "int") == 0 || + else if ((field->flags & TEP_FIELD_IS_ARRAY) && strstarts(field->type, "char")) { + arg->scnprintf = SCA_CHAR_ARRAY; + arg->nr_entries = field->arraylen; + } else if ((strcmp(field->type, "int") == 0 || strcmp(field->type, "unsigned int") == 0 || strcmp(field->type, "long") == 0) && len >= 2 && strcmp(field->name + len - 2, "fd") == 0) { -- cgit v1.2.3-59-g8ed1b From 311baaf93c4b9e6a339722006d1a7c33e4283c0c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Oct 2019 15:01:30 -0300 Subject: perf trace: Enclose all events argument lists with () MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that they look a bit like normal strace-like syscall enter+exit lines. They will look even more when we switch from using libtraceevent's tep_print_event() routine in favour of using all the perf beautifiers used by the strace-like syscall enter+exit lines. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-y4fcej6v6u1m644nbxd2r4pg@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index b3fb208cbd30..297aeaa9f69d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2450,7 +2450,7 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel, */ } - fprintf(trace->output, "%s:", evsel->name); + fprintf(trace->output, "%s(", evsel->name); if (perf_evsel__is_bpf_output(evsel)) { bpf_output__fprintf(trace, sample); @@ -2470,7 +2470,7 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel, } newline: - fprintf(trace->output, "\n"); + fprintf(trace->output, ")\n"); if (callchain_ret > 0) trace__fprintf_callchain(trace, sample); -- cgit v1.2.3-59-g8ed1b From f11b2803bb88655d90b88c787710b53100913bff Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Oct 2019 15:28:13 -0300 Subject: perf trace: Allow choosing how to augment the tracepoint arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So far we used the libtraceevent printing routines when showing tracepoint arguments, but since 'perf trace' has a lot of beautifiers for syscall arguments, and since some of those can be used to augment tracepoint arguments, add a routine to make use of those beautifiers and allow the user to choose which one to use. The default now is to use the same beautifiers used for the strace-like sys_enter+sys_exit lines, but the user can choose the libtraceevent ones by either using the: perf trace --libtraceevent_print command line option, or by setting: # cat ~/.perfconfig [trace] tracepoint_beautifiers = libtraceevent For instance, here are some examples: # perf trace -e sched:*switch,*sleep,sched:*wakeup,exit*,sched:*exit sleep 1 0.000 sched:sched_wakeup(comm: "perf", pid: 5273 (perf), prio: 120, success: 1, target_cpu: 6) 0.621 nanosleep(rqtp: 0x7ffdd06d1140, rmtp: NULL) ... 0.628 sched:sched_switch(prev_comm: "sleep", prev_pid: 5273 (sleep), prev_prio: 120, prev_state: 1, next_comm: "swapper/6", next_pid: 0, next_prio: 120) 1000.879 sched:sched_wakeup(comm: "sleep", pid: 5273 (sleep), prio: 120, success: 1, target_cpu: 6) 0.621 ... [continued]: nanosleep()) = 0 1001.026 exit_group(error_code: 0) = ? 1001.216 sched:sched_process_exit(comm: "sleep", pid: 5273 (sleep), prio: 120) # And then using libtraceevent, as before: # perf trace --libtraceevent_print -e sched:*switch,*sleep,sched:*wakeup,exit*,sched:*exit sleep 1 0.000 sched:sched_wakeup(comm=perf pid=5288 prio=120 target_cpu=001) 0.739 nanosleep(rqtp: 0x7ffeba6c2f40, rmtp: NULL) ... 0.747 sched:sched_switch(prev_comm=sleep prev_pid=5288 prev_prio=120 prev_state=S ==> next_comm=swapper/1 next_pid=0 next_prio=120) 1000.902 sched:sched_wakeup(comm=sleep pid=5288 prio=120 target_cpu=001) 0.739 ... [continued]: nanosleep()) = 0 1001.012 exit_group(error_code: 0) = ? # The new default allocates an array of 'struct syscall_arg_fmt' for the tracepoint arguments and, just like with syscall arguments, tries to find suitable syscall_arg__scnprintf_NAME() routines to augment those tracepoint arguments based on their type (as in the tracefs "format" file), or even in their name + type, for instance arguntents with names ending in "fd" with type "int" get the fd scnprintf beautifier attached, etc. Soon this will take advantage of the kernel BTF information to augment enumerations based on the tracefs "format" type info. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-o8qdluotkcb3b1x2gjqrejcl@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 5 ++ tools/perf/Documentation/perf-trace.txt | 5 ++ tools/perf/builtin-trace.c | 83 ++++++++++++++++++++++++++++++-- 3 files changed, 90 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index c599623a1f3d..c4dd23c4b478 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -561,6 +561,11 @@ trace.*:: trace.show_zeros:: Do not suppress syscall arguments that are equal to zero. + trace.tracepoint_beautifiers:: + Use "libtraceevent" to use that library to augment the tracepoint arguments, + "libbeauty", the default, to use the same argument beautifiers used in the + strace-like sys_enter+sys_exit lines. + llvm.*:: llvm.clang-path:: Path to clang. If omit, search it from $PATH. diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 25b74fdb36fa..ba16cd5b680f 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -219,6 +219,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. may happen, for instance, when a thread gets migrated to a different CPU while processing a syscall. +--libtraceevent_print:: + Use libtraceevent to print tracepoint arguments. By default 'perf trace' uses + the same beautifiers used in the strace-like enter+exit lines to augment the + tracepoint arguments. + --map-dump:: Dump BPF maps setup by events passed via -e, for instance the augmented_raw_syscalls living in tools/perf/examples/bpf/augmented_raw_syscalls.c. For now this diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 297aeaa9f69d..8303d83cb93c 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -175,6 +175,7 @@ struct trace { bool print_sample; bool show_tool_stats; bool trace_syscalls; + bool libtraceevent_print; bool kernel_syscallchains; s16 args_alignment; bool show_tstamp; @@ -2397,6 +2398,71 @@ static void bpf_output__fprintf(struct trace *trace, ++trace->nr_events_printed; } +static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, struct perf_sample *sample, + struct thread *thread, void *augmented_args, int augmented_args_size) +{ + char bf[2048]; + size_t size = sizeof(bf); + struct tep_format_field *field = evsel->tp_format->format.fields; + struct syscall_arg_fmt *arg = evsel->priv; + size_t printed = 0; + unsigned long val; + u8 bit = 1; + struct syscall_arg syscall_arg = { + .augmented = { + .size = augmented_args_size, + .args = augmented_args, + }, + .idx = 0, + .mask = 0, + .trace = trace, + .thread = thread, + .show_string_prefix = trace->show_string_prefix, + }; + + for (; field && arg; field = field->next, ++syscall_arg.idx, bit <<= 1, ++arg) { + if (syscall_arg.mask & bit) + continue; + + syscall_arg.fmt = arg; + if (field->flags & TEP_FIELD_IS_ARRAY) + val = (uintptr_t)(sample->raw_data + field->offset); + else + val = format_field__intval(field, sample, evsel->needs_swap); + /* + * Some syscall args need some mask, most don't and + * return val untouched. + */ + val = syscall_arg_fmt__mask_val(arg, &syscall_arg, val); + + /* + * Suppress this argument if its value is zero and + * and we don't have a string associated in an + * strarray for it. + */ + if (val == 0 && + !trace->show_zeros && + !((arg->show_zero || + arg->scnprintf == SCA_STRARRAY || + arg->scnprintf == SCA_STRARRAYS) && + arg->parm)) + continue; + + printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : ""); + + /* + * XXX Perhaps we should have a show_tp_arg_names, + * leaving show_arg_names just for syscalls? + */ + if (1 || trace->show_arg_names) + printed += scnprintf(bf + printed, size - printed, "%s: ", field->name); + + printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val); + } + + return printed + fprintf(trace->output, "%s", bf); +} + static int trace__event_handler(struct trace *trace, struct evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -2457,9 +2523,13 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel, } else if (evsel->tp_format) { if (strncmp(evsel->tp_format->name, "sys_enter_", 10) || trace__fprintf_sys_enter(trace, evsel, sample)) { - event_format__fprintf(evsel->tp_format, sample->cpu, - sample->raw_data, sample->raw_size, - trace->output); + if (trace->libtraceevent_print) { + event_format__fprintf(evsel->tp_format, sample->cpu, + sample->raw_data, sample->raw_size, + trace->output); + } else { + trace__fprintf_tp_fields(trace, evsel, sample, thread, NULL, 0); + } ++trace->nr_events_printed; if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) { @@ -4150,6 +4220,11 @@ static int trace__config(const char *var, const char *value, void *arg) int args_alignment = 0; if (perf_config_int(&args_alignment, var, value) == 0) trace->args_alignment = args_alignment; + } else if (!strcmp(var, "trace.tracepoint_beautifiers")) { + if (strcasecmp(value, "libtraceevent") == 0) + trace->libtraceevent_print = true; + else if (strcasecmp(value, "libbeauty") == 0) + trace->libtraceevent_print = false; } out: return err; @@ -4239,6 +4314,8 @@ int cmd_trace(int argc, const char **argv) OPT_CALLBACK(0, "call-graph", &trace.opts, "record_mode[,record_size]", record_callchain_help, &record_parse_callchain_opt), + OPT_BOOLEAN(0, "libtraceevent_print", &trace.libtraceevent_print, + "Use libtraceevent to print the tracepoint arguments."), OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains, "Show the kernel callchains on the syscall exit path"), OPT_ULONG(0, "max-events", &trace.max_events, -- cgit v1.2.3-59-g8ed1b From 444e2ff34df8f631cd83ae73bb56ef13cfb84b34 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 26 Sep 2019 15:26:39 -0300 Subject: tools arch x86: Grab a copy of the file containing the MSR numbers We'll use it to generate a table and then convert the msr:{read,write}_msr 'msr' option in things like perf trace, script, etc. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-y1f4s0y1s43d4drh7pd2huzn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 857 +++++++++++++++++++++++++++++++++ tools/perf/check-headers.sh | 1 + 2 files changed, 858 insertions(+) create mode 100644 tools/arch/x86/include/asm/msr-index.h diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h new file mode 100644 index 000000000000..20ce682a2540 --- /dev/null +++ b/tools/arch/x86/include/asm/msr-index.h @@ -0,0 +1,857 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MSR_INDEX_H +#define _ASM_X86_MSR_INDEX_H + +#include + +/* + * CPU model specific register (MSR) numbers. + * + * Do not add new entries to this file unless the definitions are shared + * between multiple compilation units. + */ + +/* x86-64 specific MSRs */ +#define MSR_EFER 0xc0000080 /* extended feature register */ +#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ +#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ +#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ +#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ +#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ +#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ +#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ +#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ + +/* EFER bits: */ +#define _EFER_SCE 0 /* SYSCALL/SYSRET */ +#define _EFER_LME 8 /* Long mode enable */ +#define _EFER_LMA 10 /* Long mode active (read-only) */ +#define _EFER_NX 11 /* No execute enable */ +#define _EFER_SVME 12 /* Enable virtualization */ +#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ +#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ + +#define EFER_SCE (1<<_EFER_SCE) +#define EFER_LME (1<<_EFER_LME) +#define EFER_LMA (1<<_EFER_LMA) +#define EFER_NX (1<<_EFER_NX) +#define EFER_SVME (1<<_EFER_SVME) +#define EFER_LMSLE (1<<_EFER_LMSLE) +#define EFER_FFXSR (1<<_EFER_FFXSR) + +/* Intel MSRs. Some also available on other CPUs */ + +#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ +#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ +#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ +#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ +#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ + +#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ +#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ + +#define MSR_PPIN_CTL 0x0000004e +#define MSR_PPIN 0x0000004f + +#define MSR_IA32_PERFCTR0 0x000000c1 +#define MSR_IA32_PERFCTR1 0x000000c2 +#define MSR_FSB_FREQ 0x000000cd +#define MSR_PLATFORM_INFO 0x000000ce +#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 +#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) + +#define MSR_IA32_UMWAIT_CONTROL 0xe1 +#define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0) +#define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1) +/* + * The time field is bit[31:2], but representing a 32bit value with + * bit[1:0] zero. + */ +#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) + +#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 +#define NHM_C3_AUTO_DEMOTE (1UL << 25) +#define NHM_C1_AUTO_DEMOTE (1UL << 26) +#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C1_AUTO_UNDEMOTE (1UL << 28) + +#define MSR_MTRRcap 0x000000fe + +#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a +#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ +#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ +#define ARCH_CAP_SSB_NO BIT(4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Speculative Store Bypass + * control required. + */ +#define ARCH_CAP_MDS_NO BIT(5) /* + * Not susceptible to + * Microarchitectural Data + * Sampling (MDS) vulnerabilities. + */ + +#define MSR_IA32_FLUSH_CMD 0x0000010b +#define L1D_FLUSH BIT(0) /* + * Writeback and invalidate the + * L1 data cache. + */ + +#define MSR_IA32_BBL_CR_CTL 0x00000119 +#define MSR_IA32_BBL_CR_CTL3 0x0000011e + +#define MSR_IA32_SYSENTER_CS 0x00000174 +#define MSR_IA32_SYSENTER_ESP 0x00000175 +#define MSR_IA32_SYSENTER_EIP 0x00000176 + +#define MSR_IA32_MCG_CAP 0x00000179 +#define MSR_IA32_MCG_STATUS 0x0000017a +#define MSR_IA32_MCG_CTL 0x0000017b +#define MSR_IA32_MCG_EXT_CTL 0x000004d0 + +#define MSR_OFFCORE_RSP_0 0x000001a6 +#define MSR_OFFCORE_RSP_1 0x000001a7 +#define MSR_TURBO_RATIO_LIMIT 0x000001ad +#define MSR_TURBO_RATIO_LIMIT1 0x000001ae +#define MSR_TURBO_RATIO_LIMIT2 0x000001af + +#define MSR_LBR_SELECT 0x000001c8 +#define MSR_LBR_TOS 0x000001c9 +#define MSR_LBR_NHM_FROM 0x00000680 +#define MSR_LBR_NHM_TO 0x000006c0 +#define MSR_LBR_CORE_FROM 0x00000040 +#define MSR_LBR_CORE_TO 0x00000060 + +#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */ +#define LBR_INFO_MISPRED BIT_ULL(63) +#define LBR_INFO_IN_TX BIT_ULL(62) +#define LBR_INFO_ABORT BIT_ULL(61) +#define LBR_INFO_CYCLES 0xffff + +#define MSR_IA32_PEBS_ENABLE 0x000003f1 +#define MSR_PEBS_DATA_CFG 0x000003f2 +#define MSR_IA32_DS_AREA 0x00000600 +#define MSR_IA32_PERF_CAPABILITIES 0x00000345 +#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 + +#define MSR_IA32_RTIT_CTL 0x00000570 +#define RTIT_CTL_TRACEEN BIT(0) +#define RTIT_CTL_CYCLEACC BIT(1) +#define RTIT_CTL_OS BIT(2) +#define RTIT_CTL_USR BIT(3) +#define RTIT_CTL_PWR_EVT_EN BIT(4) +#define RTIT_CTL_FUP_ON_PTW BIT(5) +#define RTIT_CTL_FABRIC_EN BIT(6) +#define RTIT_CTL_CR3EN BIT(7) +#define RTIT_CTL_TOPA BIT(8) +#define RTIT_CTL_MTC_EN BIT(9) +#define RTIT_CTL_TSC_EN BIT(10) +#define RTIT_CTL_DISRETC BIT(11) +#define RTIT_CTL_PTW_EN BIT(12) +#define RTIT_CTL_BRANCH_EN BIT(13) +#define RTIT_CTL_MTC_RANGE_OFFSET 14 +#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET) +#define RTIT_CTL_CYC_THRESH_OFFSET 19 +#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET) +#define RTIT_CTL_PSB_FREQ_OFFSET 24 +#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET) +#define RTIT_CTL_ADDR0_OFFSET 32 +#define RTIT_CTL_ADDR0 (0x0full << RTIT_CTL_ADDR0_OFFSET) +#define RTIT_CTL_ADDR1_OFFSET 36 +#define RTIT_CTL_ADDR1 (0x0full << RTIT_CTL_ADDR1_OFFSET) +#define RTIT_CTL_ADDR2_OFFSET 40 +#define RTIT_CTL_ADDR2 (0x0full << RTIT_CTL_ADDR2_OFFSET) +#define RTIT_CTL_ADDR3_OFFSET 44 +#define RTIT_CTL_ADDR3 (0x0full << RTIT_CTL_ADDR3_OFFSET) +#define MSR_IA32_RTIT_STATUS 0x00000571 +#define RTIT_STATUS_FILTEREN BIT(0) +#define RTIT_STATUS_CONTEXTEN BIT(1) +#define RTIT_STATUS_TRIGGEREN BIT(2) +#define RTIT_STATUS_BUFFOVF BIT(3) +#define RTIT_STATUS_ERROR BIT(4) +#define RTIT_STATUS_STOPPED BIT(5) +#define RTIT_STATUS_BYTECNT_OFFSET 32 +#define RTIT_STATUS_BYTECNT (0x1ffffull << RTIT_STATUS_BYTECNT_OFFSET) +#define MSR_IA32_RTIT_ADDR0_A 0x00000580 +#define MSR_IA32_RTIT_ADDR0_B 0x00000581 +#define MSR_IA32_RTIT_ADDR1_A 0x00000582 +#define MSR_IA32_RTIT_ADDR1_B 0x00000583 +#define MSR_IA32_RTIT_ADDR2_A 0x00000584 +#define MSR_IA32_RTIT_ADDR2_B 0x00000585 +#define MSR_IA32_RTIT_ADDR3_A 0x00000586 +#define MSR_IA32_RTIT_ADDR3_B 0x00000587 +#define MSR_IA32_RTIT_CR3_MATCH 0x00000572 +#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560 +#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561 + +#define MSR_MTRRfix64K_00000 0x00000250 +#define MSR_MTRRfix16K_80000 0x00000258 +#define MSR_MTRRfix16K_A0000 0x00000259 +#define MSR_MTRRfix4K_C0000 0x00000268 +#define MSR_MTRRfix4K_C8000 0x00000269 +#define MSR_MTRRfix4K_D0000 0x0000026a +#define MSR_MTRRfix4K_D8000 0x0000026b +#define MSR_MTRRfix4K_E0000 0x0000026c +#define MSR_MTRRfix4K_E8000 0x0000026d +#define MSR_MTRRfix4K_F0000 0x0000026e +#define MSR_MTRRfix4K_F8000 0x0000026f +#define MSR_MTRRdefType 0x000002ff + +#define MSR_IA32_CR_PAT 0x00000277 + +#define MSR_IA32_DEBUGCTLMSR 0x000001d9 +#define MSR_IA32_LASTBRANCHFROMIP 0x000001db +#define MSR_IA32_LASTBRANCHTOIP 0x000001dc +#define MSR_IA32_LASTINTFROMIP 0x000001dd +#define MSR_IA32_LASTINTTOIP 0x000001de + +/* DEBUGCTLMSR bits (others vary by model): */ +#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_BTF_SHIFT 1 +#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_TR (1UL << 6) +#define DEBUGCTLMSR_BTS (1UL << 7) +#define DEBUGCTLMSR_BTINT (1UL << 8) +#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) +#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) +#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) +#define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12) +#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 +#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) + +#define MSR_PEBS_FRONTEND 0x000003f7 + +#define MSR_IA32_POWER_CTL 0x000001fc + +#define MSR_IA32_MC0_CTL 0x00000400 +#define MSR_IA32_MC0_STATUS 0x00000401 +#define MSR_IA32_MC0_ADDR 0x00000402 +#define MSR_IA32_MC0_MISC 0x00000403 + +/* C-state Residency Counters */ +#define MSR_PKG_C3_RESIDENCY 0x000003f8 +#define MSR_PKG_C6_RESIDENCY 0x000003f9 +#define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa +#define MSR_PKG_C7_RESIDENCY 0x000003fa +#define MSR_CORE_C3_RESIDENCY 0x000003fc +#define MSR_CORE_C6_RESIDENCY 0x000003fd +#define MSR_CORE_C7_RESIDENCY 0x000003fe +#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff +#define MSR_PKG_C2_RESIDENCY 0x0000060d +#define MSR_PKG_C8_RESIDENCY 0x00000630 +#define MSR_PKG_C9_RESIDENCY 0x00000631 +#define MSR_PKG_C10_RESIDENCY 0x00000632 + +/* Interrupt Response Limit */ +#define MSR_PKGC3_IRTL 0x0000060a +#define MSR_PKGC6_IRTL 0x0000060b +#define MSR_PKGC7_IRTL 0x0000060c +#define MSR_PKGC8_IRTL 0x00000633 +#define MSR_PKGC9_IRTL 0x00000634 +#define MSR_PKGC10_IRTL 0x00000635 + +/* Run Time Average Power Limiting (RAPL) Interface */ + +#define MSR_RAPL_POWER_UNIT 0x00000606 + +#define MSR_PKG_POWER_LIMIT 0x00000610 +#define MSR_PKG_ENERGY_STATUS 0x00000611 +#define MSR_PKG_PERF_STATUS 0x00000613 +#define MSR_PKG_POWER_INFO 0x00000614 + +#define MSR_DRAM_POWER_LIMIT 0x00000618 +#define MSR_DRAM_ENERGY_STATUS 0x00000619 +#define MSR_DRAM_PERF_STATUS 0x0000061b +#define MSR_DRAM_POWER_INFO 0x0000061c + +#define MSR_PP0_POWER_LIMIT 0x00000638 +#define MSR_PP0_ENERGY_STATUS 0x00000639 +#define MSR_PP0_POLICY 0x0000063a +#define MSR_PP0_PERF_STATUS 0x0000063b + +#define MSR_PP1_POWER_LIMIT 0x00000640 +#define MSR_PP1_ENERGY_STATUS 0x00000641 +#define MSR_PP1_POLICY 0x00000642 + +/* Config TDP MSRs */ +#define MSR_CONFIG_TDP_NOMINAL 0x00000648 +#define MSR_CONFIG_TDP_LEVEL_1 0x00000649 +#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A +#define MSR_CONFIG_TDP_CONTROL 0x0000064B +#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C + +#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D + +#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 +#define MSR_PKG_ANY_CORE_C0_RES 0x00000659 +#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A +#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B + +#define MSR_CORE_C1_RES 0x00000660 +#define MSR_MODULE_C6_RES_MS 0x00000664 + +#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 +#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 + +#define MSR_ATOM_CORE_RATIOS 0x0000066a +#define MSR_ATOM_CORE_VIDS 0x0000066b +#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c +#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d + + +#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 +#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 +#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 + +/* Hardware P state interface */ +#define MSR_PPERF 0x0000064e +#define MSR_PERF_LIMIT_REASONS 0x0000064f +#define MSR_PM_ENABLE 0x00000770 +#define MSR_HWP_CAPABILITIES 0x00000771 +#define MSR_HWP_REQUEST_PKG 0x00000772 +#define MSR_HWP_INTERRUPT 0x00000773 +#define MSR_HWP_REQUEST 0x00000774 +#define MSR_HWP_STATUS 0x00000777 + +/* CPUID.6.EAX */ +#define HWP_BASE_BIT (1<<7) +#define HWP_NOTIFICATIONS_BIT (1<<8) +#define HWP_ACTIVITY_WINDOW_BIT (1<<9) +#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10) +#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) + +/* IA32_HWP_CAPABILITIES */ +#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff) +#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff) +#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff) +#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) + +/* IA32_HWP_REQUEST */ +#define HWP_MIN_PERF(x) (x & 0xff) +#define HWP_MAX_PERF(x) ((x & 0xff) << 8) +#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) +#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) +#define HWP_EPP_PERFORMANCE 0x00 +#define HWP_EPP_BALANCE_PERFORMANCE 0x80 +#define HWP_EPP_BALANCE_POWERSAVE 0xC0 +#define HWP_EPP_POWERSAVE 0xFF +#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) +#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) + +/* IA32_HWP_STATUS */ +#define HWP_GUARANTEED_CHANGE(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4) + +/* IA32_HWP_INTERRUPT */ +#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2) + +#define MSR_AMD64_MC0_MASK 0xc0010044 + +#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) +#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) +#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) +#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) + +#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) + +/* These are consecutive and not in the normal 4er MCE bank block */ +#define MSR_IA32_MC0_CTL2 0x00000280 +#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) + +#define MSR_P6_PERFCTR0 0x000000c1 +#define MSR_P6_PERFCTR1 0x000000c2 +#define MSR_P6_EVNTSEL0 0x00000186 +#define MSR_P6_EVNTSEL1 0x00000187 + +#define MSR_KNC_PERFCTR0 0x00000020 +#define MSR_KNC_PERFCTR1 0x00000021 +#define MSR_KNC_EVNTSEL0 0x00000028 +#define MSR_KNC_EVNTSEL1 0x00000029 + +/* Alternative perfctr range with full access. */ +#define MSR_IA32_PMC0 0x000004c1 + +/* Auto-reload via MSR instead of DS area */ +#define MSR_RELOAD_PMC0 0x000014c1 +#define MSR_RELOAD_FIXED_CTR0 0x00001309 + +/* + * AMD64 MSRs. Not complete. See the architecture manual for a more + * complete list. + */ +#define MSR_AMD64_PATCH_LEVEL 0x0000008b +#define MSR_AMD64_TSC_RATIO 0xc0000104 +#define MSR_AMD64_NB_CFG 0xc001001f +#define MSR_AMD64_CPUID_FN_1 0xc0011004 +#define MSR_AMD64_PATCH_LOADER 0xc0010020 +#define MSR_AMD_PERF_CTL 0xc0010062 +#define MSR_AMD_PERF_STATUS 0xc0010063 +#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 +#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 +#define MSR_AMD64_OSVW_STATUS 0xc0010141 +#define MSR_AMD64_LS_CFG 0xc0011020 +#define MSR_AMD64_DC_CFG 0xc0011022 +#define MSR_AMD64_BU_CFG2 0xc001102a +#define MSR_AMD64_IBSFETCHCTL 0xc0011030 +#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 +#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 +#define MSR_AMD64_IBSFETCH_REG_COUNT 3 +#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL< Date: Wed, 9 Oct 2019 11:22:43 -0300 Subject: perf beauty: Make strarray's offset be u64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need it for things like MSRs that are sparse and go over MAXINT. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-g8t2d0jr0mg3yimg2qrjkvlt@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/beauty.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 4cc4f6b3d4a1..5ad7542b428b 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -7,7 +7,7 @@ #include struct strarray { - int offset; + u64 offset; int nr_entries; const char *prefix; const char **entries; -- cgit v1.2.3-59-g8ed1b From 693d345818e106318710ac150ae252b73765d0fa Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 26 Sep 2019 15:28:02 -0300 Subject: perf trace beauty: Add a x86 MSR cmd id->str table generator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without parameters it'll parse tools/arch/x86/include/asm/msr-index.h and output a table usable by tools, that will be wired up later to a libtraceevent plugin registered from perf's glue code: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh static const char *x86_MSRs[] = { [0x00000034] = "SMI_COUNT", [0x0000003a] = "IA32_FEATURE_CONTROL", [0x0000003b] = "IA32_TSC_ADJUST", [0x00000040] = "LBR_CORE_FROM", [0x00000048] = "IA32_SPEC_CTRL", [0x00000049] = "IA32_PRED_CMD", [0x0000010b] = "IA32_FLUSH_CMD", [0x0000010F] = "TSX_FORCE_ABORT", [0x00000198] = "IA32_PERF_STATUS", [0x00000199] = "IA32_PERF_CTL", [0x00000da0] = "IA32_XSS", [0x00000dc0] = "LBR_INFO_0", [0x00000ffc] = "IA32_BNDCFGS_RSVD", }; #define x86_64_specific_MSRs_offset 0xc0000080 static const char *x86_64_specific_MSRs[] = { [0xc0000080 - x86_64_specific_MSRs_offset] = "EFER", [0xc0000081 - x86_64_specific_MSRs_offset] = "STAR", [0xc0000082 - x86_64_specific_MSRs_offset] = "LSTAR", [0xc0000083 - x86_64_specific_MSRs_offset] = "CSTAR", [0xc0000084 - x86_64_specific_MSRs_offset] = "SYSCALL_MASK", [0xc0000103 - x86_64_specific_MSRs_offset] = "TSC_AUX", [0xc0000104 - x86_64_specific_MSRs_offset] = "AMD64_TSC_RATIO", }; #define x86_AMD_V_KVM_MSRs_offset 0xc0010000 static const char *x86_AMD_V_KVM_MSRs[] = { [0xc0010000 - x86_AMD_V_KVM_MSRs_offset] = "K7_EVNTSEL0", [0xc0010114 - x86_AMD_V_KVM_MSRs_offset] = "VM_CR", [0xc0010115 - x86_AMD_V_KVM_MSRs_offset] = "VM_IGNNE", [0xc0010117 - x86_AMD_V_KVM_MSRs_offset] = "VM_HSAVE_PA", [0xc0010240 - x86_AMD_V_KVM_MSRs_offset] = "F15H_NB_PERF_CTL", [0xc0010241 - x86_AMD_V_KVM_MSRs_offset] = "F15H_NB_PERF_CTR", [0xc0010280 - x86_AMD_V_KVM_MSRs_offset] = "F15H_PTSC", }; Then these will in turn be hooked up in a follow up patch to be used by strarrays__scnprintf(). Cc: Adrian Hunter Cc: Brendan Gregg Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-ja080xawx08kedez855usnon@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/tracepoints/x86_msr.sh | 40 ++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100755 tools/perf/trace/beauty/tracepoints/x86_msr.sh diff --git a/tools/perf/trace/beauty/tracepoints/x86_msr.sh b/tools/perf/trace/beauty/tracepoints/x86_msr.sh new file mode 100755 index 000000000000..831c02cf0586 --- /dev/null +++ b/tools/perf/trace/beauty/tracepoints/x86_msr.sh @@ -0,0 +1,40 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +if [ $# -ne 1 ] ; then + arch_x86_header_dir=tools/arch/x86/include/asm/ +else + arch_x86_header_dir=$1 +fi + +x86_msr_index=${arch_x86_header_dir}/msr-index.h + +# Support all later, with some hash table, for now chop off +# Just the ones starting with 0x00000 so as to have a simple +# array. + +printf "static const char *x86_MSRs[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x00000[[:xdigit:]]+)[[:space:]]*.*' +egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|AMD64|IA32_TSCDEADLINE|IDT_FCR4)' | \ + sed -r "s/$regex/\2 \1/g" | sort -n | \ + xargs printf "\t[%s] = \"%s\",\n" +printf "};\n\n" + +# Remove MSR_K6_WHCR, clashes with MSR_LSTAR +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0xc0000[[:xdigit:]]+)[[:space:]]*.*' +printf "#define x86_64_specific_MSRs_offset " +egrep $regex ${x86_msr_index} | sed -r "s/$regex/\2/g" | sort -n | head -1 +printf "static const char *x86_64_specific_MSRs[] = {\n" +egrep $regex ${x86_msr_index} | \ + sed -r "s/$regex/\2 \1/g" | egrep -vw 'K6_WHCR' | sort -n | \ + xargs printf "\t[%s - x86_64_specific_MSRs_offset] = \"%s\",\n" +printf "};\n\n" + +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0xc0010[[:xdigit:]]+)[[:space:]]*.*' +printf "#define x86_AMD_V_KVM_MSRs_offset " +egrep $regex ${x86_msr_index} | sed -r "s/$regex/\2/g" | sort -n | head -1 +printf "static const char *x86_AMD_V_KVM_MSRs[] = {\n" +egrep $regex ${x86_msr_index} | \ + sed -r "s/$regex/\2 \1/g" | sort -n | \ + xargs printf "\t[%s - x86_AMD_V_KVM_MSRs_offset] = \"%s\",\n" +printf "};\n" -- cgit v1.2.3-59-g8ed1b From fd21834704a678a583cf294aea47c7ed3fd9d8d2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 26 Sep 2019 15:47:16 -0300 Subject: perf beauty: Hook up the x86 MSR table generator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This way we generate the source with the table for later use by plugins, etc. I.e. after running: $ make -C tools/perf O=/tmp/build/perf We end up with: $ head /tmp/build/perf/trace/beauty/generated/x86_arch_MSRs_array.c static const char *x86_MSRs[] = { [0x00000000] = "IA32_P5_MC_ADDR", [0x00000001] = "IA32_P5_MC_TYPE", [0x00000010] = "IA32_TSC", [0x00000017] = "IA32_PLATFORM_ID", [0x0000001b] = "IA32_APICBASE", [0x00000020] = "KNC_PERFCTR0", [0x00000021] = "KNC_PERFCTR1", [0x00000028] = "KNC_EVNTSEL0", [0x00000029] = "KNC_EVNTSEL1", $ Now its just a matter of using it, first in a libtracevent plugin. At some point we should move tools/perf/trace/beauty to tools/beauty/, so that it can be used more generally and even made available externally like libbpf, libperf, libtraevent, etc. Cc: Adrian Hunter Cc: Brendan Gregg Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-b3rmutg4igcohx6kpo67qh4j@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 902c792f326a..45c14dc24f4b 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -407,6 +407,7 @@ linux_uapi_dir := $(srctree)/tools/include/uapi/linux asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/ x86_arch_asm_uapi_dir := $(srctree)/tools/arch/x86/include/uapi/asm/ +x86_arch_asm_dir := $(srctree)/tools/arch/x86/include/asm/ beauty_outdir := $(OUTPUT)trace/beauty/generated beauty_ioctl_outdir := $(beauty_outdir)/ioctl @@ -543,6 +544,12 @@ x86_arch_prctl_code_tbl := $(srctree)/tools/perf/trace/beauty/x86_arch_prctl.sh $(x86_arch_prctl_code_array): $(x86_arch_asm_uapi_dir)/prctl.h $(x86_arch_prctl_code_tbl) $(Q)$(SHELL) '$(x86_arch_prctl_code_tbl)' $(x86_arch_asm_uapi_dir) > $@ +x86_arch_MSRs_array := $(beauty_outdir)/x86_arch_MSRs_array.c +x86_arch_MSRs_tbl := $(srctree)/tools/perf/trace/beauty/tracepoints/x86_msr.sh + +$(x86_arch_MSRs_array): $(x86_arch_asm_dir)/msr-index.h $(x86_arch_MSRs_tbl) + $(Q)$(SHELL) '$(x86_arch_MSRs_tbl)' $(x86_arch_asm_dir) > $@ + rename_flags_array := $(beauty_outdir)/rename_flags_array.c rename_flags_tbl := $(srctree)/tools/perf/trace/beauty/rename_flags.sh @@ -677,6 +684,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(perf_ioctl_array) \ $(prctl_option_array) \ $(usbdevfs_ioctl_array) \ + $(x86_arch_MSRs_array) \ $(x86_arch_prctl_code_array) \ $(rename_flags_array) \ $(arch_errno_name_array) \ @@ -981,6 +989,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(perf_ioctl_array) \ $(OUTPUT)$(prctl_option_array) \ $(OUTPUT)$(usbdevfs_ioctl_array) \ + $(OUTPUT)$(x86_arch_MSRs_array) \ $(OUTPUT)$(x86_arch_prctl_code_array) \ $(OUTPUT)$(rename_flags_array) \ $(OUTPUT)$(arch_errno_name_array) \ -- cgit v1.2.3-59-g8ed1b From 5d88099bc00dccddf5da18e25e1223f01644f7a2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 7 Oct 2019 15:50:15 -0300 Subject: perf trace: Allow associating scnprintf routines with well known arg names For instance 'msr' appears in several tracepoints, so we can associate it with a single scnprintf() routine auto-generated from kernel headers, as will be done in followup patches. Start with an empty array of associations. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-89ptht6s5fez82lykuwq1eyb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 8303d83cb93c..d52972ca6123 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1479,6 +1479,27 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) return 0; } +static struct syscall_arg_fmt syscall_arg_fmts__by_name[] = { +}; + +static int syscall_arg_fmt__cmp(const void *name, const void *fmtp) +{ + const struct syscall_arg_fmt *fmt = fmtp; + return strcmp(name, fmt->name); +} + +static struct syscall_arg_fmt * +__syscall_arg_fmt__find_by_name(struct syscall_arg_fmt *fmts, const int nmemb, const char *name) +{ + return bsearch(name, fmts, nmemb, sizeof(struct syscall_arg_fmt), syscall_arg_fmt__cmp); +} + +static struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *name) +{ + const int nmemb = ARRAY_SIZE(syscall_arg_fmts__by_name); + return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name); +} + static struct tep_format_field * syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field) { @@ -1518,6 +1539,11 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field * 7 unsigned long */ arg->scnprintf = SCA_FD; + } else { + struct syscall_arg_fmt *fmt = syscall_arg_fmt__find_by_name(field->name); + + if (fmt) + arg->scnprintf = fmt->scnprintf; } } -- cgit v1.2.3-59-g8ed1b From 646b3e2cfbf2d9207b4dbfaade7a28351aa7edeb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 7 Oct 2019 15:52:19 -0300 Subject: perf trace beauty: Add the glue for the autogenerated MSR arrays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to wrap those autogenerated string arrays with the strarrays__scnprintf() formatter, do it. Cc: Adrian Hunter Cc: Brendan Gregg Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-wqjz4kwi4a0ot6lsis3kc65j@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/Build | 1 + tools/perf/trace/beauty/beauty.h | 3 +++ tools/perf/trace/beauty/tracepoints/Build | 1 + tools/perf/trace/beauty/tracepoints/x86_msr.c | 34 +++++++++++++++++++++++++++ 4 files changed, 39 insertions(+) create mode 100644 tools/perf/trace/beauty/tracepoints/Build create mode 100644 tools/perf/trace/beauty/tracepoints/x86_msr.c diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index afa75a76f6b8..433dc39053a7 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -17,3 +17,4 @@ perf-y += sockaddr.o perf-y += socket.o perf-y += statx.o perf-y += sync_file_range.o +perf-y += tracepoints/ diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 5ad7542b428b..aa3fac8bd1be 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -114,6 +114,9 @@ unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx); size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STRARRAY_FLAGS syscall_arg__scnprintf_strarray_flags +size_t syscall_arg__scnprintf_x86_MSR(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_X86_MSR syscall_arg__scnprintf_x86_MSR + size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STRARRAYS syscall_arg__scnprintf_strarrays diff --git a/tools/perf/trace/beauty/tracepoints/Build b/tools/perf/trace/beauty/tracepoints/Build new file mode 100644 index 000000000000..625a67663de3 --- /dev/null +++ b/tools/perf/trace/beauty/tracepoints/Build @@ -0,0 +1 @@ +perf-y += x86_msr.o diff --git a/tools/perf/trace/beauty/tracepoints/x86_msr.c b/tools/perf/trace/beauty/tracepoints/x86_msr.c new file mode 100644 index 000000000000..5e9ef5369fb5 --- /dev/null +++ b/tools/perf/trace/beauty/tracepoints/x86_msr.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * trace/beauty/x86_msr.c + * + * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo + */ + +#include "trace/beauty/beauty.h" + +#include "trace/beauty/generated/x86_arch_MSRs_array.c" + +static DEFINE_STRARRAY(x86_MSRs, "MSR_"); +static DEFINE_STRARRAY_OFFSET(x86_64_specific_MSRs, "MSR_", x86_64_specific_MSRs_offset); +static DEFINE_STRARRAY_OFFSET(x86_AMD_V_KVM_MSRs, "MSR_", x86_AMD_V_KVM_MSRs_offset); + +static struct strarray *x86_MSRs_tables[] = { + &strarray__x86_MSRs, + &strarray__x86_64_specific_MSRs, + &strarray__x86_AMD_V_KVM_MSRs, +}; + +static DEFINE_STRARRAYS(x86_MSRs_tables); + +static size_t x86_MSR__scnprintf(unsigned long msr, char *bf, size_t size, bool show_prefix) +{ + return strarrays__scnprintf(&strarrays__x86_MSRs_tables, bf, size, "%#x", show_prefix, msr); +} + +size_t syscall_arg__scnprintf_x86_MSR(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long flags = arg->val; + + return x86_MSR__scnprintf(flags, bf, size, arg->show_string_prefix); +} -- cgit v1.2.3-59-g8ed1b From c330ef2847eeedfa9d06f03836dfd4fc6727e855 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 7 Oct 2019 15:54:51 -0300 Subject: perf trace: Associate the "msr" tracepoint arg name with x86_MSR__scnprintf() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that we can go from: # perf trace -e msr:write_msr --max-stack=16 sleep 1 0.000 sleep/6740 msr:write_msr(msr: 3221225728, val: 139636317451648) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) do_arch_prctl_64 ([kernel.kallsyms]) __x64_sys_arch_prctl ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64 ([kernel.kallsyms]) init_tls (/usr/lib64/ld-2.29.so) dl_main (/usr/lib64/ld-2.29.so) _dl_sysdep_start (/usr/lib64/ld-2.29.so) _dl_start (/usr/lib64/ld-2.29.so) # To: # perf trace -e msr:write_msr --max-stack=16 sleep 1 0.000 sleep/8519 msr:write_msr(msr: FS_BASE, val: 139878031705472) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) do_arch_prctl_64 ([kernel.kallsyms]) __x64_sys_arch_prctl ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64 ([kernel.kallsyms]) init_tls (/usr/lib64/ld-2.29.so) dl_main (/usr/lib64/ld-2.29.so) _dl_sysdep_start (/usr/lib64/ld-2.29.so) _dl_start (/usr/lib64/ld-2.29.so) # This, in reverse, will allow for symbolic system call/tracepoint filtering. Cc: Adrian Hunter Cc: Brendan Gregg Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-q1q4unmqja5ex7dy0kb5cjaa@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d52972ca6123..e9f132aa5a09 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1480,6 +1480,7 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) } static struct syscall_arg_fmt syscall_arg_fmts__by_name[] = { + { .name = "msr", .scnprintf = SCA_X86_MSR, } }; static int syscall_arg_fmt__cmp(const void *name, const void *fmtp) -- cgit v1.2.3-59-g8ed1b From 05cea4492c9dd28439cc73de1047ab3b26033736 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 7 Oct 2019 16:43:03 -0300 Subject: perf evlist: Factor out asprintf routine to build a tracepoint pid filter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Will be used to append such lists to existing filters. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-798vlyqfqw938ehoe8etivx1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index b4c43ac4583f..c1b46080426b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1053,6 +1053,9 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) struct evsel *evsel; int err = 0; + if (filter == NULL) + return -1; + evlist__for_each_entry(evlist, evsel) { if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) continue; @@ -1065,16 +1068,15 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) return err; } -int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) +static char *asprintf__tp_filter_pids(size_t npids, pid_t *pids) { char *filter; - int ret = -1; size_t i; for (i = 0; i < npids; ++i) { if (i == 0) { if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) - return -1; + return NULL; } else { char *tmp; @@ -1086,8 +1088,17 @@ int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t * } } - ret = perf_evlist__set_tp_filter(evlist, filter); + return filter; out_free: + free(filter); + return NULL; +} + +int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) +{ + char *filter = asprintf__tp_filter_pids(npids, pids); + int ret = perf_evlist__set_tp_filter(evlist, filter); + free(filter); return ret; } -- cgit v1.2.3-59-g8ed1b From 53c92f73389d049d72b2e1d1cbc81c007241d422 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 7 Oct 2019 16:52:17 -0300 Subject: perf evlist: Introduce append_tp_filter() method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Will be used by 'perf trace' to support 'perf trace --filter', we need to append to any pre-existing filter. When parse_filter() gets invoked to process --filter, it'll set the filter to that specified on the command line, later on, when we filter out 'perf trace' own pid to avoid an event feedback loop, we need to preserve the command line filter put in place by parse_filter(). Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-h9rot08qmxlnfmte0holt68x@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 20 ++++++++++++++++++++ tools/perf/util/evlist.h | 2 ++ 2 files changed, 22 insertions(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c1b46080426b..1650d242a1c8 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1068,6 +1068,26 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) return err; } +int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter) +{ + struct evsel *evsel; + int err = 0; + + if (filter == NULL) + return -1; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) + continue; + + err = perf_evsel__append_tp_filter(evsel, filter); + if (err) + break; + } + + return err; +} + static char *asprintf__tp_filter_pids(size_t npids, pid_t *pids) { char *filter; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 00eab9435847..c58fd1908bfc 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -140,6 +140,8 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter); int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid); int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); +int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter); + struct evsel * perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id); -- cgit v1.2.3-59-g8ed1b From 1827ab5ba8e1d0354cc36b3692444306ced01471 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 7 Oct 2019 17:00:34 -0300 Subject: perf evlist: Introduce append_tp_filter_pid() and append_tp_filter_pids() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We'll need this to support 'perf trace e tracepoint --filter=expr', as the command line tracepoint filter is attache to the preceding evsel, just like in 'perf record' and when we go to set pid filters, which we do at the minimum to filter 'perf trace' own syscalls, we need to append, not set the tp filter. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-daynpknni44ywuzi8iua57nn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 14 ++++++++++++++ tools/perf/util/evlist.h | 3 +++ 2 files changed, 17 insertions(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 1650d242a1c8..e33b46aca5cb 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1128,6 +1128,20 @@ int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid) return perf_evlist__set_tp_filter_pids(evlist, 1, &pid); } +int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) +{ + char *filter = asprintf__tp_filter_pids(npids, pids); + int ret = perf_evlist__append_tp_filter(evlist, filter); + + free(filter); + return ret; +} + +int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid) +{ + return perf_evlist__append_tp_filter_pids(evlist, 1, &pid); +} + bool perf_evlist__valid_sample_type(struct evlist *evlist) { struct evsel *pos; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index c58fd1908bfc..13051409fd22 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -142,6 +142,9 @@ int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t * int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter); +int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid); +int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); + struct evsel * perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id); -- cgit v1.2.3-59-g8ed1b From d4097f1937f2242d0aa0a7c654d2159a6895e5c8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 8 Oct 2019 07:33:08 -0300 Subject: perf trace: Introduce --filter for tracepoint events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to what is in 'perf record', works just like there: # perf trace -e msr:* 328.297 :0/0 msr:write_msr(msr: FS_BASE, val: 140240388381888) 328.302 :0/0 msr:write_msr(msr: FS_BASE, val: 140240388381888) 328.306 :0/0 msr:write_msr(msr: FS_BASE, val: 140240388381888) 328.317 :0/0 msr:write_msr(msr: FS_BASE, val: 140240388381888) 328.322 :0/0 msr:write_msr(msr: FS_BASE, val: 140240388381888) 328.327 :0/0 msr:write_msr(msr: FS_BASE, val: 140240388381888) 328.331 :0/0 msr:write_msr(msr: FS_BASE, val: 140240388381888) 328.336 :0/0 msr:write_msr(msr: FS_BASE, val: 140240388381888) 328.340 :0/0 ^Cmsr:write_msr(msr: FS_BASE, val: 140240388381888) # So, for a system wide trace session looking at the write_msr tracepoint we see a flood of MSR_FS_BASE, we need to get the number for that: # grep FS_BASE /tmp/build/perf/trace/beauty/generated/x86_arch_MSRs_array.c [0xc0000100 - x86_64_specific_MSRs_offset] = "FS_BASE", # And then use it in a filter: # perf trace -e msr:* --filter="msr!=0xc0000100" 942.177 :0/0 msr:write_msr(msr: IA32_TSC_DEADLINE, val: 3056931068232) 942.199 :0/0 msr:write_msr(msr: IA32_TSC_DEADLINE, val: 3057135655252) 942.203 :0/0 msr:write_msr(msr: IA32_TSC_DEADLINE, val: 3056931068222) 942.231 :0/0 msr:write_msr(msr: IA32_TSC_DEADLINE, val: 3056998373022) 942.241 :0/0 msr:write_msr(msr: IA32_TSC_DEADLINE, val: 3056931068236) # Ok, lets filter that too, too noisy: # grep TSC_DEADLINE /tmp/build/perf/trace/beauty/generated/x86_arch_MSRs_array.c [0x000006E0] = "IA32_TSC_DEADLINE", # # perf trace -e msr:* --filter="msr!=0xc0000100 && msr!=0x6e0" -a sleep 0.1 0.000 :0/0 msr:read_msr(msr: IA32_TSC_ADJUST) 0.066 CPU 0/KVM/4895 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6) 0.070 CPU 0/KVM/4895 msr:write_msr(msr: 0x830, val: 34359740667) 0.099 CPU 0/KVM/4895 msr:read_msr(msr: IA32_SYSENTER_ESP, val: -2199021993472) 0.100 CPU 0/KVM/4895 msr:read_msr(msr: IA32_APICBASE, val: 4276096000) 0.101 CPU 0/KVM/4895 msr:read_msr(msr: IA32_DEBUGCTLMSR) 0.109 :0/0 msr:write_msr(msr: IA32_SPEC_CTRL) 1.000 :0/0 msr:write_msr(msr: 0x830, val: 17179871485) 18.893 :0/0 msr:write_msr(msr: 0x83f, val: 246) 28.810 :0/0 msr:write_msr(msr: 0x830, val: 68719479037) 40.117 CPU 0/KVM/4895 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6) 40.127 CPU 0/KVM/4895 msr:read_msr(msr: IA32_DEBUGCTLMSR) 40.139 CPU 0/KVM/4895 msr:write_msr(msr: LSTAR, val: -2130661312) 40.141 CPU 0/KVM/4895 msr:write_msr(msr: SYSCALL_MASK, val: 14080) 40.142 CPU 0/KVM/4895 msr:write_msr(msr: TSC_AUX) 40.144 CPU 0/KVM/4895 msr:write_msr(msr: KERNEL_GS_BASE) 40.147 CPU 0/KVM/4895 msr:write_msr(msr: IA32_SPEC_CTRL) 40.148 CPU 0/KVM/4895 msr:write_msr(msr: IA32_FLUSH_CMD, val: 1) 40.151 CPU 0/KVM/4895 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6) ^C # One can combine that with filtering pids as well: # perf trace -e msr:* --filter="msr!=0xc0000100 && msr!=0x6e0" --filter-pids 4895 -a sleep 0.09 0.000 :0/0 msr:write_msr(msr: 0x830, val: 4294969597) 0.291 gnome-terminal/2790 msr:write_msr(msr: SYSCALL_MASK, val: 292608) 0.294 gnome-terminal/2790 msr:write_msr(msr: LSTAR, val: -1935671280) 0.295 gnome-terminal/2790 msr:write_msr(msr: TSC_AUX, val: 6) 10.940 gnome-terminal/2790 msr:write_msr(msr: 0x830, val: 4294969597) 15.943 gnome-shell/2096 msr:write_msr(msr: 0x830, val: 4294969597) 16.975 :0/0 msr:write_msr(msr: 0x830, val: 4294969597) 19.560 :0/0 msr:write_msr(msr: 0x83f, val: 246) 25.162 :0/0 msr:read_msr(msr: IA32_TSC_ADJUST) 25.807 JS Watchdog/3635 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6) 25.820 :0/0 msr:write_msr(msr: IA32_SPEC_CTRL) 25.941 gnome-terminal/2790 msr:write_msr(msr: 0x830, val: 4294969597) 26.941 gnome-terminal/2790 msr:write_msr(msr: 0x830, val: 4294969597) 29.942 gnome-terminal/2790 msr:write_msr(msr: 0x830, val: 4294969597) 45.313 :0/0 msr:write_msr(msr: 0x83f, val: 246) 56.945 gnome-terminal/2790 msr:write_msr(msr: 0x830, val: 4294969597) 60.946 gnome-terminal/2790 msr:write_msr(msr: 0x830, val: 4294969597) 74.096 JS Watchdog/8971 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6) 74.130 :0/0 msr:write_msr(msr: IA32_SPEC_CTRL) 79.673 :0/0 msr:write_msr(msr: 0x83f, val: 246) 79.947 gnome-terminal/2790 msr:write_msr(msr: 0x830, val: 17179871485) # Or for just a pid, with callchains: # grep SYSCALL_MAS /tmp/build/perf/trace/beauty/generated/x86_arch_MSRs_array.c [0xc0000084 - x86_64_specific_MSRs_offset] = "SYSCALL_MASK", # perf trace -e msr:* --filter="msr==0xc0000084" --pid 2790 --call-graph=dwarf 0.000 gnome-terminal/2790 msr:write_msr(msr: SYSCALL_MASK, val: 292608) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) kvm_on_user_return ([kvm]) fire_user_return_notifiers ([kernel.kallsyms]) exit_to_usermode_loop ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64 ([kernel.kallsyms]) __GI___poll (inlined) 9299.073 gnome-terminal/2790 msr:write_msr(msr: SYSCALL_MASK, val: 292608) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) kvm_on_user_return ([kvm]) fire_user_return_notifiers ([kernel.kallsyms]) exit_to_usermode_loop ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64 ([kernel.kallsyms]) __GI___poll (inlined) 9348.374 gnome-terminal/2790 msr:write_msr(msr: SYSCALL_MASK, val: 292608) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) kvm_on_user_return ([kvm]) fire_user_return_notifiers ([kernel.kallsyms]) exit_to_usermode_loop ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64 ([kernel.kallsyms]) __GI___poll (inlined) # Ok, just another form of KVM to emit MSRs :-) Next step: elliminate those greps by getting the filter expression, looking for arg names, then for the arrays associated with it to do a reverse lookup. Also allow those filters to be associated with strace-like syscall names. After that: augment the 'val' arg for 'msr:write_msr' based on the first arg, 'msr'. Then, do that with eBPF too, not just with tracepoint filters. Cc: Adrian Hunter Cc: Brendan Gregg Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Marcelo Tosatti Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-95bfe5d4tzy5f66bx49d05rj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 5 +++++ tools/perf/builtin-trace.c | 8 +++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index ba16cd5b680f..3bb89c2e9020 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -42,6 +42,11 @@ OPTIONS Prefixing with ! shows all syscalls but the ones specified. You may need to escape it. +--filter=:: + Event filter. This option should follow an event selector (-e) which + selects tracepoint event(s). + + -D msecs:: --delay msecs:: After starting the program, wait msecs before measuring. This is useful to diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e9f132aa5a09..2c1968061b4b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3362,7 +3362,7 @@ static int trace__set_filter_loop_pids(struct trace *trace) thread = parent; } - err = perf_evlist__set_tp_filter_pids(trace->evlist, nr, pids); + err = perf_evlist__append_tp_filter_pids(trace->evlist, nr, pids); if (!err && trace->filter_pids.map) err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids); @@ -3379,8 +3379,8 @@ static int trace__set_filter_pids(struct trace *trace) * we fork the workload in perf_evlist__prepare_workload. */ if (trace->filter_pids.nr > 0) { - err = perf_evlist__set_tp_filter_pids(trace->evlist, trace->filter_pids.nr, - trace->filter_pids.entries); + err = perf_evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr, + trace->filter_pids.entries); if (!err && trace->filter_pids.map) { err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr, trace->filter_pids.entries); @@ -4294,6 +4294,8 @@ int cmd_trace(int argc, const char **argv) OPT_CALLBACK('e', "event", &trace, "event", "event/syscall selector. use 'perf list' to list available events", trace__parse_events_option), + OPT_CALLBACK(0, "filter", &trace.evlist, "filter", + "event filter", parse_filter), OPT_BOOLEAN(0, "comm", &trace.show_comm, "show the thread COMM next to its id"), OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), -- cgit v1.2.3-59-g8ed1b From 3f41b77843b338e836f52cc2d486be689d6cb9c1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 9 Oct 2019 16:06:43 -0300 Subject: perf trace: Add a strtoul() method to 'struct syscall_arg_fmt' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will go from a string to a number, so that filter expressions can be constructed with strings and then, before applying the tracepoint filters (or eBPF, in the future) we can map those strings to numbers. The first one will be for 'msr' tracepoint arguments, but real quickly we will be able to reuse all strarrays for that. Cc: Adrian Hunter Cc: Brendan Gregg Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-wgqq48agcgr95b8dmn6fygtr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 2c1968061b4b..faa5bf4a5a3a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -86,8 +86,12 @@ # define F_LINUX_SPECIFIC_BASE 1024 #endif +/* + * strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100 + */ struct syscall_arg_fmt { size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); + bool (*strtoul)(char *bf, size_t size, struct syscall_arg *arg, u64 *val); unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val); void *parm; const char *name; @@ -1543,8 +1547,10 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field } else { struct syscall_arg_fmt *fmt = syscall_arg_fmt__find_by_name(field->name); - if (fmt) + if (fmt) { arg->scnprintf = fmt->scnprintf; + arg->strtoul = fmt->strtoul; + } } } -- cgit v1.2.3-59-g8ed1b From d0a3a1041005d9273d18669819e2a6dfed922a4d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 9 Oct 2019 16:11:36 -0300 Subject: perf trace: Introduce a strtoul() method for 'struct strarrays' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And also for 'struct strarray', since its needed to implement strarrays__strtoul(). This just traverses the entries and when finding a match, returns (offset + index), i.e. the value associated with the searched string. E.g. "EFER" (MSR_EFER) returns: # grep -w EFER -B2 /tmp/build/perf/trace/beauty/generated/x86_arch_MSRs_array.c #define x86_64_specific_MSRs_offset 0xc0000080 static const char *x86_64_specific_MSRs[] = { [0xc0000080 - x86_64_specific_MSRs_offset] = "EFER", # 0xc0000080 This will be auto-attached to 'struct syscall_arg_fmt' entries associated with strarrays as soon as we add a ->strarray and ->strarrays to 'struct syscall_arg_fmt'. Cc: Adrian Hunter Cc: Brendan Gregg Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-r2hpaahf8lishyb1owko9vs1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 28 ++++++++++++++++++++++++++++ tools/perf/trace/beauty/beauty.h | 5 +++++ 2 files changed, 33 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index faa5bf4a5a3a..50a1aeb997ae 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -477,6 +477,34 @@ size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const return printed; } +bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret) +{ + int i; + + for (i = 0; i < sa->nr_entries; ++i) { + if (sa->entries[i] && strncmp(sa->entries[i], bf, size) == 0 && sa->entries[i][size] == '\0') { + *ret = sa->offset + i; + return true; + } + } + + return false; +} + +bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret) +{ + int i; + + for (i = 0; i < sas->nr_entries; ++i) { + struct strarray *sa = sas->entries[i]; + + if (strarray__strtoul(sa, bf, size, ret)) + return true; + } + + return false; +} + size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg) { diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index aa3fac8bd1be..919ac4548bd8 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -5,6 +5,7 @@ #include #include #include +#include struct strarray { u64 offset; @@ -29,6 +30,8 @@ struct strarray { size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val); size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, bool show_prefix, unsigned long flags); +bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret); + struct trace; struct thread; @@ -51,6 +54,8 @@ struct strarrays { size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val); +bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret); + size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size); extern struct strarray strarray__socket_families; -- cgit v1.2.3-59-g8ed1b From 90df0249c2eae21f329760ee857575260926188a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 9 Oct 2019 16:22:16 -0300 Subject: perf trace: Expand strings in filters to integers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that one can try things like: # perf trace -e msr:* --filter="msr!=FS_BASE && msr != IA32_TSC_DEADLINE && msr != 0x830 && msr != 0x83f && msr !=IA32_SPEC_CTRL" --filter-pids 3750 That, at this point in the patchset, without any strtoul in place for tracepoint arguments, will result in: No resolver (strtoul) for "msr" in "msr:read_msr", can't set filter "(msr!=FS_BASE && msr != IA32_TSC_DEADLINE && msr != 0x830 && msr != 0x83f && msr !=IA32_SPEC_CTRL) && (common_pid != 25407 && common_pid != 3750)" # See you in the next cset! Cc: Adrian Hunter Cc: Brendan Gregg Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-dx5j70fv2rgkeezd1cb3hv2p@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 130 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 50a1aeb997ae..515a800efc9c 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3484,6 +3484,133 @@ static int ordered_events__deliver_event(struct ordered_events *oe, return __trace__deliver_event(trace, event->event); } +static struct syscall_arg_fmt *perf_evsel__syscall_arg_fmt(struct evsel *evsel, char *arg) +{ + struct tep_format_field *field; + struct syscall_arg_fmt *fmt = evsel->priv; + + if (evsel->tp_format == NULL || fmt == NULL) + return NULL; + + for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt) + if (strcmp(field->name, arg) == 0) + return fmt; + + return NULL; +} + +static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel *evsel) +{ + char *tok, *left = evsel->filter, *new_filter = evsel->filter; + + while ((tok = strpbrk(left, "=<>!")) != NULL) { + char *right = tok + 1, *right_end; + + if (*right == '=') + ++right; + + while (isspace(*right)) + ++right; + + if (*right == '\0') + break; + + while (!isalpha(*left)) + if (++left == tok) { + /* + * Bail out, can't find the name of the argument that is being + * used in the filter, let it try to set this filter, will fail later. + */ + return 0; + } + + right_end = right + 1; + while (isalnum(*right_end) || *right_end == '_') + ++right_end; + + if (isalpha(*right)) { + struct syscall_arg_fmt *fmt; + int left_size = tok - left, + right_size = right_end - right; + char arg[128]; + + while (isspace(left[left_size - 1])) + --left_size; + + scnprintf(arg, sizeof(arg), "%.*s", left_size, left); + + fmt = perf_evsel__syscall_arg_fmt(evsel, arg); + if (fmt == NULL) { + pr_debug("\"%s\" not found in \"%s\", can't set filter \"%s\"\n", + arg, evsel->name, evsel->filter); + return -1; + } + + pr_debug2("trying to expand \"%s\" \"%.*s\" \"%.*s\" -> ", + arg, (int)(right - tok), tok, right_size, right); + + if (fmt->strtoul) { + u64 val; + if (fmt->strtoul(right, right_size, NULL, &val)) { + char *n, expansion[19]; + int expansion_lenght = scnprintf(expansion, sizeof(expansion), "%#" PRIx64, val); + int expansion_offset = right - new_filter; + + pr_debug("%s", expansion); + + if (asprintf(&n, "%.*s%s%s", expansion_offset, new_filter, expansion, right_end) < 0) { + pr_debug(" out of memory!\n"); + free(new_filter); + return -1; + } + if (new_filter != evsel->filter) + free(new_filter); + left = n + expansion_offset + expansion_lenght; + new_filter = n; + } else { + pr_err("\"%.*s\" not found for \"%s\" in \"%s\", can't set filter \"%s\"\n", + right_size, right, arg, evsel->name, evsel->filter); + return -1; + } + } else { + pr_err("No resolver (strtoul) for \"%s\" in \"%s\", can't set filter \"%s\"\n", + arg, evsel->name, evsel->filter); + return -1; + } + + pr_debug("\n"); + } else { + left = right_end; + } + } + + if (new_filter != evsel->filter) { + pr_debug("New filter for %s: %s\n", evsel->name, new_filter); + perf_evsel__set_filter(evsel, new_filter); + free(new_filter); + } + + return 0; +} + +static int trace__expand_filters(struct trace *trace, struct evsel **err_evsel) +{ + struct evlist *evlist = trace->evlist; + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->filter == NULL) + continue; + + if (trace__expand_filter(trace, evsel)) { + *err_evsel = evsel; + return -1; + } + } + + return 0; +} + static int trace__run(struct trace *trace, int argc, const char **argv) { struct evlist *evlist = trace->evlist; @@ -3625,6 +3752,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv) */ trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close")); + err = trace__expand_filters(trace, &evsel); + if (err) + goto out_delete_evlist; err = perf_evlist__apply_filters(evlist, &evsel); if (err < 0) goto out_error_apply_filters; -- cgit v1.2.3-59-g8ed1b From 728db198868c7b46db5e65717d4518aeb6523ccc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 9 Oct 2019 16:25:02 -0300 Subject: perf beauty: Introduce strtoul() for x86 MSRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Continuing from the previous cset comment, now that filter expression works: # perf trace -e msr:* --filter="msr!=FS_BASE && msr != IA32_TSC_DEADLINE && msr != 0x830 && msr != 0x83f && msr !=IA32_SPEC_CTRL" --filter-pids 3750 0.000 Timer/5033 msr:write_msr(msr: SYSCALL_MASK, val: 292608) 0.009 Timer/5033 msr:write_msr(msr: LSTAR, val: -1398800368) 0.010 Timer/5033 msr:write_msr(msr: TSC_AUX, val: 4) 0.050 :0/0 msr:read_msr(msr: IA32_TSC_ADJUST) 45.661 gnome-terminal/12595 msr:write_msr(msr: SYSCALL_MASK, val: 292608) 45.672 gnome-terminal/12595 msr:write_msr(msr: LSTAR, val: -1398800368) 45.675 gnome-terminal/12595 msr:write_msr(msr: TSC_AUX, val: 3) 54.852 :0/0 msr:read_msr(msr: IA32_TSC_ADJUST) 130.508 Timer/4050 msr:write_msr(msr: SYSCALL_MASK, val: 292608) 130.527 Timer/4050 msr:write_msr(msr: LSTAR, val: -1398800368) 130.531 Timer/4050 msr:write_msr(msr: TSC_AUX, val: 3) 140.924 :0/0 msr:read_msr(msr: IA32_TSC_ADJUST) 164.738 :0/0 msr:read_msr(msr: IA32_TSC_ADJUST) 603.578 :0/0 msr:read_msr(msr: IA32_TSC_ADJUST) 620.809 :0/0 msr:read_msr(msr: IA32_TSC_ADJUST) 690.115 JS Watchdog/4259 msr:write_msr(msr: SYSCALL_MASK, val: 292608) 690.136 JS Watchdog/4259 msr:write_msr(msr: LSTAR, val: -1398800368) 690.141 JS Watchdog/4259 msr:write_msr(msr: TSC_AUX, val: 3) 690.186 :0/0 msr:read_msr(msr: IA32_TSC_ADJUST) 759.016 :0/0 msr:read_msr(msr: IA32_TSC_ADJUST) ^C[root@quaco ~]# Or look at the first 3 write_msr events for that IA32_TSC_DEADLINE to learn why it happens so often: # perf trace --max-events=3 --max-stack=8 -e msr:* --filter="msr==IA32_TSC_DEADLINE" --filter-pids 3750 0.000 :0/0 msr:write_msr(msr: IA32_TSC_DEADLINE, val: 19296732550862) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) lapic_next_deadline ([kernel.kallsyms]) clockevents_program_event ([kernel.kallsyms]) hrtimer_interrupt ([kernel.kallsyms]) smp_apic_timer_interrupt ([kernel.kallsyms]) apic_timer_interrupt ([kernel.kallsyms]) cpuidle_enter_state ([kernel.kallsyms]) 32.646 :0/0 msr:write_msr(msr: IA32_TSC_DEADLINE, val: 19296800134158) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) lapic_next_deadline ([kernel.kallsyms]) clockevents_program_event ([kernel.kallsyms]) hrtimer_start_range_ns ([kernel.kallsyms]) tick_nohz_restart_sched_tick ([kernel.kallsyms]) tick_nohz_idle_exit ([kernel.kallsyms]) do_idle ([kernel.kallsyms]) 32.802 :0/0 msr:write_msr(msr: IA32_TSC_DEADLINE, val: 19297507436922) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) lapic_next_deadline ([kernel.kallsyms]) clockevents_program_event ([kernel.kallsyms]) hrtimer_try_to_cancel ([kernel.kallsyms]) hrtimer_cancel ([kernel.kallsyms]) tick_nohz_restart_sched_tick ([kernel.kallsyms]) tick_nohz_idle_exit ([kernel.kallsyms]) # And if some of the strings can't be found: # trace -e msr:* --filter="msr!=SPECULATIVE_EXECUTION_PROBLEMS_SOLUTION && msr != IA32_TSC_DEADLINE && msr != 0x830 && msr != 0x83f && msr !=IA32_SPEC_CTRL" --filter-pids 3750 "SPECULATIVE_EXECUTION_PROBLEMS_SOLUTION" not found for "msr" in "msr:read_msr", can't set filter "(msr!=SPECULATIVE_EXECUTION_PROBLEMS_SOLUTION && msr != IA32_TSC_DEADLINE && msr != 0x830 && msr != 0x83f && msr !=IA32_SPEC_CTRL) && (common_pid != 28131 && common_pid != 3750)" # Next step is to automatically wire up the pre-existing strarrays, which there are quite a few. The strtoul() methods will be further enhanced to allow for looking at other arguments in a syscall/tracepoint, just like going from integer to string (scnprintf methods), so that those "val" lines for the msr tracepoints can be properly formatted or even resolved into some string. Cc: Adrian Hunter Cc: Brendan Gregg Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-4qaai5iqjgefd11k4ddm7qg8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 2 +- tools/perf/trace/beauty/beauty.h | 3 +++ tools/perf/trace/beauty/tracepoints/x86_msr.c | 5 +++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 515a800efc9c..b627975d1c3e 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1512,7 +1512,7 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) } static struct syscall_arg_fmt syscall_arg_fmts__by_name[] = { - { .name = "msr", .scnprintf = SCA_X86_MSR, } + { .name = "msr", .scnprintf = SCA_X86_MSR, .strtoul = STUL_X86_MSR, } }; static int syscall_arg_fmt__cmp(const void *name, const void *fmtp) diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 919ac4548bd8..77ad80a399fd 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -122,6 +122,9 @@ size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct sysca size_t syscall_arg__scnprintf_x86_MSR(char *bf, size_t size, struct syscall_arg *arg); #define SCA_X86_MSR syscall_arg__scnprintf_x86_MSR +bool syscall_arg__strtoul_x86_MSR(char *bf, size_t size, struct syscall_arg *arg, u64 *ret); +#define STUL_X86_MSR syscall_arg__strtoul_x86_MSR + size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STRARRAYS syscall_arg__scnprintf_strarrays diff --git a/tools/perf/trace/beauty/tracepoints/x86_msr.c b/tools/perf/trace/beauty/tracepoints/x86_msr.c index 5e9ef5369fb5..6b8f129579d6 100644 --- a/tools/perf/trace/beauty/tracepoints/x86_msr.c +++ b/tools/perf/trace/beauty/tracepoints/x86_msr.c @@ -32,3 +32,8 @@ size_t syscall_arg__scnprintf_x86_MSR(char *bf, size_t size, struct syscall_arg return x86_MSR__scnprintf(flags, bf, size, arg->show_string_prefix); } + +bool syscall_arg__strtoul_x86_MSR(char *bf, size_t size, struct syscall_arg *arg __maybe_unused, u64 *ret) +{ + return strarrays__strtoul(&strarrays__x86_MSRs_tables, bf, size, ret); +} -- cgit v1.2.3-59-g8ed1b From 42466b9f29b415c254dc4c2f4618e2a96951a406 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 30 Sep 2019 17:36:23 -0700 Subject: perf tools: Avoid 'sample_reg_masks' being const + weak Being const + weak breaks with some compilers that constant-propagate from the weak symbol. This behavior is outside of the specification, but in LLVM is chosen to match GCC's behavior. LLVM's implementation was set in this patch: https://github.com/llvm/llvm-project/commit/f49573d1eedcf1e44893d5a062ac1b72c8419646 A const + weak symbol is set to be weak_odr: https://llvm.org/docs/LangRef.html ODR is one definition rule, and given there is one constant definition constant-propagation is possible. It is possible to get this code to miscompile with LLVM when applying link time optimization. As compilers become more aggressive, this is likely to break in more instances. Move the definition of sample_reg_masks to the conditional part of perf_regs.h and guard usage with HAVE_PERF_REGS_SUPPORT. This avoids the weak symbol. Fix an issue when HAVE_PERF_REGS_SUPPORT isn't defined from patch v1. In v3, add perf_regs.c for architectures that HAVE_PERF_REGS_SUPPORT but don't declare sample_regs_masks. Further notes: Jiri asked: "Is this just a precaution or you actualy saw some breakage?" Ian answered: "We saw a breakage with clang with thinlto enabled for linking. Our compiler team had recently seen, and were surprised by, a similar issue and were able to dig out the weak ODR issue." Signed-off-by: Ian Rogers Reviewed-by: Nick Desaulniers Acked-by: Jiri Olsa Cc: Albert Ou Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: clang-built-linux@googlegroups.com Cc: Guo Ren Cc: Kan Liang Cc: linux-riscv@lists.infradead.org Cc: Mao Han Cc: Mark Rutland Cc: Namhyung Kim Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20191001003623.255186-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/Build | 2 ++ tools/perf/arch/arm/util/perf_regs.c | 6 ++++++ tools/perf/arch/arm64/util/Build | 1 + tools/perf/arch/arm64/util/perf_regs.c | 6 ++++++ tools/perf/arch/csky/util/Build | 2 ++ tools/perf/arch/csky/util/perf_regs.c | 6 ++++++ tools/perf/arch/riscv/util/Build | 2 ++ tools/perf/arch/riscv/util/perf_regs.c | 6 ++++++ tools/perf/arch/s390/util/Build | 1 + tools/perf/arch/s390/util/perf_regs.c | 6 ++++++ tools/perf/util/parse-regs-options.c | 8 ++++++-- tools/perf/util/perf_regs.c | 4 ---- tools/perf/util/perf_regs.h | 4 ++-- 13 files changed, 46 insertions(+), 8 deletions(-) create mode 100644 tools/perf/arch/arm/util/perf_regs.c create mode 100644 tools/perf/arch/arm64/util/perf_regs.c create mode 100644 tools/perf/arch/csky/util/perf_regs.c create mode 100644 tools/perf/arch/riscv/util/perf_regs.c create mode 100644 tools/perf/arch/s390/util/perf_regs.c diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build index 296f0eac5e18..37fc63708966 100644 --- a/tools/perf/arch/arm/util/Build +++ b/tools/perf/arch/arm/util/Build @@ -1,3 +1,5 @@ +perf-y += perf_regs.o + perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm/util/perf_regs.c b/tools/perf/arch/arm/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/arm/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 3cde540d2fcf..0a7782c61209 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,4 +1,5 @@ perf-y += header.o +perf-y += perf_regs.o perf-y += sym-handling.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/csky/util/Build b/tools/perf/arch/csky/util/Build index 1160bb2332ba..7d3050134ae0 100644 --- a/tools/perf/arch/csky/util/Build +++ b/tools/perf/arch/csky/util/Build @@ -1,2 +1,4 @@ +perf-y += perf_regs.o + perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/csky/util/perf_regs.c b/tools/perf/arch/csky/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/csky/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/riscv/util/Build b/tools/perf/arch/riscv/util/Build index 1160bb2332ba..7d3050134ae0 100644 --- a/tools/perf/arch/riscv/util/Build +++ b/tools/perf/arch/riscv/util/Build @@ -1,2 +1,4 @@ +perf-y += perf_regs.o + perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/riscv/util/perf_regs.c b/tools/perf/arch/riscv/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/riscv/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index 22797f043b84..3d9d0f4f72ca 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build @@ -1,5 +1,6 @@ perf-y += header.o perf-y += kvm-stat.o +perf-y += perf_regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/s390/util/perf_regs.c b/tools/perf/arch/s390/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/s390/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c index ef46c2848808..e687497b3aac 100644 --- a/tools/perf/util/parse-regs-options.c +++ b/tools/perf/util/parse-regs-options.c @@ -13,7 +13,7 @@ static int __parse_regs(const struct option *opt, const char *str, int unset, bool intr) { uint64_t *mode = (uint64_t *)opt->value; - const struct sample_reg *r; + const struct sample_reg *r = NULL; char *s, *os = NULL, *p; int ret = -1; uint64_t mask; @@ -46,19 +46,23 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr) if (!strcmp(s, "?")) { fprintf(stderr, "available registers: "); +#ifdef HAVE_PERF_REGS_SUPPORT for (r = sample_reg_masks; r->name; r++) { if (r->mask & mask) fprintf(stderr, "%s ", r->name); } +#endif fputc('\n', stderr); /* just printing available regs */ return -1; } +#ifdef HAVE_PERF_REGS_SUPPORT for (r = sample_reg_masks; r->name; r++) { if ((r->mask & mask) && !strcasecmp(s, r->name)) break; } - if (!r->name) { +#endif + if (!r || !r->name) { ui__warning("Unknown register \"%s\", check man page or run \"perf record %s?\"\n", s, intr ? "-I" : "--user-regs="); goto error; diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 2774cec1f15f..5ee47ae1509c 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -3,10 +3,6 @@ #include "perf_regs.h" #include "event.h" -const struct sample_reg __weak sample_reg_masks[] = { - SMPL_REG_END -}; - int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused, char **new_op __maybe_unused) { diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 47fe34e5f7d5..e014c2c038f4 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -15,8 +15,6 @@ struct sample_reg { #define SMPL_REG2(n, b) { .name = #n, .mask = 3ULL << (b) } #define SMPL_REG_END { .name = NULL } -extern const struct sample_reg sample_reg_masks[]; - enum { SDT_ARG_VALID = 0, SDT_ARG_SKIP, @@ -27,6 +25,8 @@ uint64_t arch__intr_reg_mask(void); uint64_t arch__user_reg_mask(void); #ifdef HAVE_PERF_REGS_SUPPORT +extern const struct sample_reg sample_reg_masks[]; + #include #define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP)) -- cgit v1.2.3-59-g8ed1b From 26d5310ee28ad9395bd676f750d2ee3ddff3dcfd Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 9 Oct 2019 16:54:33 +0800 Subject: MAINTAINERS: Add entry for perf tool arm64 pmu-events files Will and I have an interest in reviewing the pmu-events changes related to arm64, so add a specific entry for this. Signed-off-by: John Garry Cc: Alexander Shishkin Cc: Florian Fainelli Cc: Jiri Olsa Cc: linuxarm@huawei.com Cc: linux-arm-kernel@lists.infradead.org Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Link: http://lore.kernel.org/lkml/1570611273-108281-1-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 55199ef7fa74..b50ddc863986 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12771,6 +12771,13 @@ F: arch/*/events/* F: arch/*/events/*/* F: tools/perf/ +PERFORMANCE EVENTS SUBSYSTEM ARM64 PMU EVENTS +R: John Garry +R: Will Deacon +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Supported +F: tools/perf/pmu-events/arch/arm64/ + PERSONALITY HANDLING M: Christoph Hellwig L: linux-abi-devel@lists.sourceforge.net -- cgit v1.2.3-59-g8ed1b From 353120b48d4f61288e4745b0c8a191784b11c0f4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:09 +0200 Subject: libperf: Add perf_mmap__init() function Add perf_mmap__init() function to initialize 'struct perf_mmap' objects. Add it to a new mmap.c source file, that will carry all the mmap related functions. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/Build | 1 + tools/perf/lib/include/internal/mmap.h | 2 ++ tools/perf/lib/mmap.c | 9 +++++++++ tools/perf/util/evlist.c | 5 ++--- 4 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 tools/perf/lib/mmap.c diff --git a/tools/perf/lib/Build b/tools/perf/lib/Build index c31f1c111f8f..2ef9a4ec6d99 100644 --- a/tools/perf/lib/Build +++ b/tools/perf/lib/Build @@ -3,6 +3,7 @@ libperf-y += cpumap.o libperf-y += threadmap.o libperf-y += evsel.o libperf-y += evlist.o +libperf-y += mmap.o libperf-y += zalloc.o libperf-y += xyarray.o libperf-y += lib.o diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index ba1e519c15b9..e25890de6a55 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -29,4 +29,6 @@ struct perf_mmap { char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); }; +void perf_mmap__init(struct perf_mmap *map, bool overwrite); + #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c new file mode 100644 index 000000000000..3da6177510e6 --- /dev/null +++ b/tools/perf/lib/mmap.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +void perf_mmap__init(struct perf_mmap *map, bool overwrite) +{ + map->fd = -1; + map->overwrite = overwrite; + refcount_set(&map->refcnt, 0); +} diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e33b46aca5cb..6c8de0865670 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -629,8 +629,6 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, return NULL; for (i = 0; i < evlist->core.nr_mmaps; i++) { - map[i].core.fd = -1; - map[i].core.overwrite = overwrite; /* * When the perf_mmap() call is made we grab one refcount, plus * one extra to let perf_mmap__consume() get the last @@ -640,8 +638,9 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and * thus does perf_mmap__get() on it. */ - refcount_set(&map[i].core.refcnt, 0); + perf_mmap__init(&map[i].core, overwrite); } + return map; } -- cgit v1.2.3-59-g8ed1b From e440979faf6ac8048e1792af383df6af78dd1cb0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:10 +0200 Subject: libperf: Add 'struct perf_mmap_param' Add libperf's version of mmap params 'struct perf_mmap_param' object with the basics: 'prot' and 'mask'. Encapsulate it in the current 'struct mmap_params' object. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/include/internal/mmap.h | 5 +++++ tools/perf/util/evlist.c | 14 +++++++++----- tools/perf/util/mmap.c | 4 ++-- tools/perf/util/mmap.h | 3 ++- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index e25890de6a55..b26806b36bb6 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -29,6 +29,11 @@ struct perf_mmap { char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); }; +struct perf_mmap_param { + int prot; + int mask; +}; + void perf_mmap__init(struct perf_mmap *map, bool overwrite); #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6c8de0865670..3a19a7cb95b1 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -667,7 +667,7 @@ static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, int fd; int cpu; - mp->prot = PROT_READ | PROT_WRITE; + mp->core.prot = PROT_READ | PROT_WRITE; if (evsel->core.attr.write_backward) { output = _output_overwrite; maps = evlist->overwrite_mmap; @@ -680,7 +680,7 @@ static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); } - mp->prot &= ~PROT_WRITE; + mp->core.prot &= ~PROT_WRITE; } if (evsel->core.system_wide && thread) @@ -921,8 +921,12 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, * Its value is decided by evsel's write_backward. * So &mp should not be passed through const pointer. */ - struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush, - .comp_level = comp_level }; + struct mmap_params mp = { + .nr_cblocks = nr_cblocks, + .affinity = affinity, + .flush = flush, + .comp_level = comp_level + }; if (!evlist->mmap) evlist->mmap = evlist__alloc_mmap(evlist, false); @@ -934,7 +938,7 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, evlist->core.mmap_len = evlist__mmap_size(pages); pr_debug("mmap size %zuB\n", evlist->core.mmap_len); - mp.mask = evlist->core.mmap_len - page_size - 1; + mp.core.mask = evlist->core.mmap_len - page_size - 1; auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len, auxtrace_pages, auxtrace_overwrite); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index a35dc57d5995..a496ced5ed2a 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -370,8 +370,8 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) */ refcount_set(&map->core.refcnt, 2); map->core.prev = 0; - map->core.mask = mp->mask; - map->core.base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, + map->core.mask = mp->core.mask; + map->core.base = mmap(NULL, perf_mmap__mmap_len(map), mp->core.prot, MAP_SHARED, fd, 0); if (map->core.base == MAP_FAILED) { pr_debug2("failed to mmap perf event ring buffer, error %d\n", diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index e567c1c875bd..4ff75d8aeb05 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -37,7 +37,8 @@ struct mmap { }; struct mmap_params { - int prot, mask, nr_cblocks, affinity, flush, comp_level; + struct perf_mmap_param core; + int nr_cblocks, affinity, flush, comp_level; struct auxtrace_mmap_params auxtrace_mp; }; -- cgit v1.2.3-59-g8ed1b From bf59b3053e63783520c2810fc3f676553bc7eedd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:11 +0200 Subject: libperf: Adopt perf_mmap__mmap_len() function from tools/perf Move perf_mmap__mmap_len() from tools/perf wto libperf, it will be used in the following patches. And rename the existing perf's function to mmap__mmap_len(). Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 4 ++-- tools/perf/lib/include/internal/mmap.h | 2 ++ tools/perf/lib/mmap.c | 6 ++++++ tools/perf/util/mmap.c | 20 ++++++++++---------- tools/perf/util/mmap.h | 2 +- 5 files changed, 21 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 23332861de6e..f05e8b7955e4 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -276,7 +276,7 @@ static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size if (record__comp_enabled(aio->rec)) { size = zstd_compress(aio->rec->session, aio->data + aio->size, - perf_mmap__mmap_len(map) - aio->size, + mmap__mmap_len(map) - aio->size, buf, size); } else { memcpy(aio->data + aio->size, buf, size); @@ -488,7 +488,7 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) struct record *rec = to; if (record__comp_enabled(rec)) { - size = zstd_compress(rec->session, map->data, perf_mmap__mmap_len(map), bf, size); + size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size); bf = map->data; } diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index b26806b36bb6..e7a67260940c 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -34,6 +34,8 @@ struct perf_mmap_param { int mask; }; +size_t perf_mmap__mmap_len(struct perf_mmap *map); + void perf_mmap__init(struct perf_mmap *map, bool overwrite); #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index 3da6177510e6..cc4284da4d99 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include void perf_mmap__init(struct perf_mmap *map, bool overwrite) { @@ -7,3 +8,8 @@ void perf_mmap__init(struct perf_mmap *map, bool overwrite) map->overwrite = overwrite; refcount_set(&map->refcnt, 0); } + +size_t perf_mmap__mmap_len(struct perf_mmap *map) +{ + return map->mask + 1 + page_size; +} diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index a496ced5ed2a..a8e81c4cbae8 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -23,9 +23,9 @@ #include "../perf.h" #include /* page_size */ -size_t perf_mmap__mmap_len(struct mmap *map) +size_t mmap__mmap_len(struct mmap *map) { - return map->core.mask + 1 + page_size; + return perf_mmap__mmap_len(&map->core); } /* When check_messup is true, 'end' must points to a good entry */ @@ -170,7 +170,7 @@ static int perf_mmap__aio_enabled(struct mmap *map) #ifdef HAVE_LIBNUMA_SUPPORT static int perf_mmap__aio_alloc(struct mmap *map, int idx) { - map->aio.data[idx] = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE, + map->aio.data[idx] = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if (map->aio.data[idx] == MAP_FAILED) { map->aio.data[idx] = NULL; @@ -183,7 +183,7 @@ static int perf_mmap__aio_alloc(struct mmap *map, int idx) static void perf_mmap__aio_free(struct mmap *map, int idx) { if (map->aio.data[idx]) { - munmap(map->aio.data[idx], perf_mmap__mmap_len(map)); + munmap(map->aio.data[idx], mmap__mmap_len(map)); map->aio.data[idx] = NULL; } } @@ -196,7 +196,7 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity) if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) { data = map->aio.data[idx]; - mmap_len = perf_mmap__mmap_len(map); + mmap_len = mmap__mmap_len(map); node_mask = 1UL << cpu__get_node(cpu); if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) { pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n", @@ -210,7 +210,7 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity) #else /* !HAVE_LIBNUMA_SUPPORT */ static int perf_mmap__aio_alloc(struct mmap *map, int idx) { - map->aio.data[idx] = malloc(perf_mmap__mmap_len(map)); + map->aio.data[idx] = malloc(mmap__mmap_len(map)); if (map->aio.data[idx] == NULL) return -1; @@ -315,11 +315,11 @@ void perf_mmap__munmap(struct mmap *map) { perf_mmap__aio_munmap(map); if (map->data != NULL) { - munmap(map->data, perf_mmap__mmap_len(map)); + munmap(map->data, mmap__mmap_len(map)); map->data = NULL; } if (map->core.base != NULL) { - munmap(map->core.base, perf_mmap__mmap_len(map)); + munmap(map->core.base, mmap__mmap_len(map)); map->core.base = NULL; map->core.fd = -1; refcount_set(&map->core.refcnt, 0); @@ -371,7 +371,7 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) refcount_set(&map->core.refcnt, 2); map->core.prev = 0; map->core.mask = mp->core.mask; - map->core.base = mmap(NULL, perf_mmap__mmap_len(map), mp->core.prot, + map->core.base = mmap(NULL, mmap__mmap_len(map), mp->core.prot, MAP_SHARED, fd, 0); if (map->core.base == MAP_FAILED) { pr_debug2("failed to mmap perf event ring buffer, error %d\n", @@ -389,7 +389,7 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) map->comp_level = mp->comp_level; if (map->comp_level && !perf_mmap__aio_enabled(map)) { - map->data = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE, + map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if (map->data == MAP_FAILED) { pr_debug2("failed to mmap data buffer, error %d\n", diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 4ff75d8aeb05..2b97dc6d9ee2 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -67,7 +67,7 @@ union perf_event *perf_mmap__read_event(struct mmap *map); int perf_mmap__push(struct mmap *md, void *to, int push(struct mmap *map, void *to, void *buf, size_t size)); -size_t perf_mmap__mmap_len(struct mmap *map); +size_t mmap__mmap_len(struct mmap *map); int perf_mmap__read_init(struct mmap *md); void perf_mmap__read_done(struct mmap *map); -- cgit v1.2.3-59-g8ed1b From 32c261c070c222858148c2171698d2954242ddd9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:12 +0200 Subject: libperf: Adopt perf_mmap__mmap() function from tools/perf Move perf_mmap__mmap() from tools/perf to libperf, it will be used in the following patches. And rename the existing perf's function to mmap__mmap(). Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/include/internal/mmap.h | 2 ++ tools/perf/lib/mmap.c | 18 ++++++++++++++++++ tools/perf/util/evlist.c | 2 +- tools/perf/util/mmap.c | 12 +++--------- tools/perf/util/mmap.h | 2 +- 5 files changed, 25 insertions(+), 11 deletions(-) diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index e7a67260940c..7067b70c6f61 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -37,5 +37,7 @@ struct perf_mmap_param { size_t perf_mmap__mmap_len(struct perf_mmap *map); void perf_mmap__init(struct perf_mmap *map, bool overwrite); +int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, + int fd, int cpu); #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index cc4284da4d99..b216a7db857f 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include @@ -13,3 +14,20 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map) { return map->mask + 1 + page_size; } + +int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, + int fd, int cpu) +{ + map->prev = 0; + map->mask = mp->mask; + map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, + MAP_SHARED, fd, 0); + if (map->base == MAP_FAILED) { + map->base = NULL; + return -1; + } + + map->fd = fd; + map->cpu = cpu; + return 0; +} diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3a19a7cb95b1..f9781de0e61e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -695,7 +695,7 @@ static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, if (*output == -1) { *output = fd; - if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0) + if (mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0) return -1; } else { if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index a8e81c4cbae8..acef6e3f6b80 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -353,7 +353,7 @@ static void perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params CPU_SET(map->core.cpu, &map->affinity_mask); } -int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) +int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) { /* * The last one will be done at perf_mmap__consume(), so that we @@ -369,18 +369,12 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) * perf_evlist__filter_pollfd(). */ refcount_set(&map->core.refcnt, 2); - map->core.prev = 0; - map->core.mask = mp->core.mask; - map->core.base = mmap(NULL, mmap__mmap_len(map), mp->core.prot, - MAP_SHARED, fd, 0); - if (map->core.base == MAP_FAILED) { + + if (perf_mmap__mmap(&map->core, &mp->core, fd, cpu)) { pr_debug2("failed to mmap perf event ring buffer, error %d\n", errno); - map->core.base = NULL; return -1; } - map->core.fd = fd; - map->core.cpu = cpu; perf_mmap__setup_affinity_mask(map, mp); diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 2b97dc6d9ee2..a60e6ead7255 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -42,7 +42,7 @@ struct mmap_params { struct auxtrace_mmap_params auxtrace_mp; }; -int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); +int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); void perf_mmap__munmap(struct mmap *map); void perf_mmap__get(struct mmap *map); -- cgit v1.2.3-59-g8ed1b From e75710f063e29ae7715c57b45eb27c2d504b32ca Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:13 +0200 Subject: libperf: Adopt perf_mmap__get() function from tools/perf Move perf_mmap__get() from tools/perf to libperf in the internal header internal/mmap.h. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/lib/include/internal/mmap.h | 1 + tools/perf/lib/mmap.c | 5 +++++ tools/perf/util/evlist.c | 2 +- tools/perf/util/mmap.c | 5 ----- tools/perf/util/mmap.h | 1 - 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f05e8b7955e4..025a12b57325 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -293,7 +293,7 @@ static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size * after started aio request completion or at record__aio_push() * if the request failed to start. */ - perf_mmap__get(map); + perf_mmap__get(&map->core); } aio->size += size; diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index 7067b70c6f61..2e68974bffb4 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -39,5 +39,6 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map); void perf_mmap__init(struct perf_mmap *map, bool overwrite); int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, int fd, int cpu); +void perf_mmap__get(struct perf_mmap *map); #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index b216a7db857f..b765e0505bb6 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -31,3 +31,8 @@ int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, map->cpu = cpu; return 0; } + +void perf_mmap__get(struct perf_mmap *map) +{ + refcount_inc(&map->refcnt); +} diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f9781de0e61e..dc5b36069d4c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -701,7 +701,7 @@ static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) return -1; - perf_mmap__get(&maps[idx]); + perf_mmap__get(&maps[idx].core); } revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index acef6e3f6b80..be691b58d8ab 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -110,11 +110,6 @@ static bool perf_mmap__empty(struct mmap *map) return perf_mmap__read_head(map) == map->core.prev && !map->auxtrace_mmap.base; } -void perf_mmap__get(struct mmap *map) -{ - refcount_inc(&map->core.refcnt); -} - void perf_mmap__put(struct mmap *map) { BUG_ON(map->core.base && refcount_read(&map->core.refcnt) == 0); diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index a60e6ead7255..a73402ee8fe0 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -45,7 +45,6 @@ struct mmap_params { int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); void perf_mmap__munmap(struct mmap *map); -void perf_mmap__get(struct mmap *map); void perf_mmap__put(struct mmap *map); void perf_mmap__consume(struct mmap *map); -- cgit v1.2.3-59-g8ed1b From 59d7ea620b58fa7d107834a81528e3098f1c27b0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:14 +0200 Subject: libperf: Adopt perf_mmap__unmap() function from tools/perf Move perf_mmap__unmap() from tools/perf to libperf, to internal header internal/mmap.h. It will be used in the following patches. And rename the existing perf's function to mmap__munmap(). Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/include/internal/mmap.h | 1 + tools/perf/lib/mmap.c | 10 ++++++++++ tools/perf/util/evlist.c | 4 ++-- tools/perf/util/mmap.c | 11 +++-------- tools/perf/util/mmap.h | 2 +- 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index 2e68974bffb4..5c2ca9ab12cd 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -39,6 +39,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map); void perf_mmap__init(struct perf_mmap *map, bool overwrite); int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, int fd, int cpu); +void perf_mmap__munmap(struct perf_mmap *map); void perf_mmap__get(struct perf_mmap *map); #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index b765e0505bb6..6eb228d89206 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -32,6 +32,16 @@ int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, return 0; } +void perf_mmap__munmap(struct perf_mmap *map) +{ + if (map && map->base != NULL) { + munmap(map->base, perf_mmap__mmap_len(map)); + map->base = NULL; + map->fd = -1; + refcount_set(&map->refcnt, 0); + } +} + void perf_mmap__get(struct perf_mmap *map) { refcount_inc(&map->refcnt); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index dc5b36069d4c..0b877d39a660 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -601,11 +601,11 @@ static void evlist__munmap_nofree(struct evlist *evlist) if (evlist->mmap) for (i = 0; i < evlist->core.nr_mmaps; i++) - perf_mmap__munmap(&evlist->mmap[i]); + mmap__munmap(&evlist->mmap[i]); if (evlist->overwrite_mmap) for (i = 0; i < evlist->core.nr_mmaps; i++) - perf_mmap__munmap(&evlist->overwrite_mmap[i]); + mmap__munmap(&evlist->overwrite_mmap[i]); } void evlist__munmap(struct evlist *evlist) diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index be691b58d8ab..2c73b5bcf74e 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -115,7 +115,7 @@ void perf_mmap__put(struct mmap *map) BUG_ON(map->core.base && refcount_read(&map->core.refcnt) == 0); if (refcount_dec_and_test(&map->core.refcnt)) - perf_mmap__munmap(map); + mmap__munmap(map); } void perf_mmap__consume(struct mmap *map) @@ -306,19 +306,14 @@ static void perf_mmap__aio_munmap(struct mmap *map __maybe_unused) } #endif -void perf_mmap__munmap(struct mmap *map) +void mmap__munmap(struct mmap *map) { + perf_mmap__munmap(&map->core); perf_mmap__aio_munmap(map); if (map->data != NULL) { munmap(map->data, mmap__mmap_len(map)); map->data = NULL; } - if (map->core.base != NULL) { - munmap(map->core.base, mmap__mmap_len(map)); - map->core.base = NULL; - map->core.fd = -1; - refcount_set(&map->core.refcnt, 0); - } auxtrace_mmap__munmap(&map->auxtrace_mmap); } diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index a73402ee8fe0..6a18b2990059 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -43,7 +43,7 @@ struct mmap_params { }; int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); -void perf_mmap__munmap(struct mmap *map); +void mmap__munmap(struct mmap *map); void perf_mmap__put(struct mmap *map); -- cgit v1.2.3-59-g8ed1b From 80e53d1148231d7d4fdc4cd89e5393616b33bf82 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:15 +0200 Subject: libperf: Adopt perf_mmap__put() function from tools/perf Move perf_mmap__put() from tools/perf to libperf. Once perf_mmap__put() is moved, we need a way to call application related unmap code (AIO and aux related code for eprf), when the map goes away. Add the perf_mmap::unmap callback to do that. The unmap path from perf is: perf_mmap__put (libperf) perf_mmap__munmap (libperf) map->unmap_cb -> perf_mmap__unmap_cb (perf) mmap__munmap (perf) Committer notes: Add missing linux/kernel.h to tools/perf/lib/mmap.c to get the BUG_ON definition. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 4 ++-- tools/perf/lib/include/internal/mmap.h | 31 +++++++++++++++++++------------ tools/perf/lib/mmap.c | 15 ++++++++++++++- tools/perf/util/evlist.c | 17 ++++++++++++----- tools/perf/util/mmap.c | 11 +---------- tools/perf/util/mmap.h | 2 -- 6 files changed, 48 insertions(+), 32 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 025a12b57325..2fb83aabbef5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -197,7 +197,7 @@ static int record__aio_complete(struct mmap *md, struct aiocb *cblock) * every aio write request started in record__aio_push() so * decrement it because the request is now complete. */ - perf_mmap__put(md); + perf_mmap__put(&md->core); rc = 1; } else { /* @@ -332,7 +332,7 @@ static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) * map->refcount is decremented in record__aio_complete() after * aio write operation finishes successfully. */ - perf_mmap__put(map); + perf_mmap__put(&map->core); } return ret; diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index 5c2ca9ab12cd..bf9cc7d005ab 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -10,23 +10,28 @@ /* perf sample has 16 bits size limit */ #define PERF_SAMPLE_MAX_SIZE (1 << 16) +struct perf_mmap; + +typedef void (*libperf_unmap_cb_t)(struct perf_mmap *map); + /** * struct perf_mmap - perf's ring buffer mmap details * * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this */ struct perf_mmap { - void *base; - int mask; - int fd; - int cpu; - refcount_t refcnt; - u64 prev; - u64 start; - u64 end; - bool overwrite; - u64 flush; - char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + void *base; + int mask; + int fd; + int cpu; + refcount_t refcnt; + u64 prev; + u64 start; + u64 end; + bool overwrite; + u64 flush; + libperf_unmap_cb_t unmap_cb; + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); }; struct perf_mmap_param { @@ -36,10 +41,12 @@ struct perf_mmap_param { size_t perf_mmap__mmap_len(struct perf_mmap *map); -void perf_mmap__init(struct perf_mmap *map, bool overwrite); +void perf_mmap__init(struct perf_mmap *map, bool overwrite, + libperf_unmap_cb_t unmap_cb); int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, int fd, int cpu); void perf_mmap__munmap(struct perf_mmap *map); void perf_mmap__get(struct perf_mmap *map); +void perf_mmap__put(struct perf_mmap *map); #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index 6eb228d89206..89c1e0e8b897 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -2,11 +2,14 @@ #include #include #include +#include -void perf_mmap__init(struct perf_mmap *map, bool overwrite) +void perf_mmap__init(struct perf_mmap *map, bool overwrite, + libperf_unmap_cb_t unmap_cb) { map->fd = -1; map->overwrite = overwrite; + map->unmap_cb = unmap_cb; refcount_set(&map->refcnt, 0); } @@ -40,9 +43,19 @@ void perf_mmap__munmap(struct perf_mmap *map) map->fd = -1; refcount_set(&map->refcnt, 0); } + if (map && map->unmap_cb) + map->unmap_cb(map); } void perf_mmap__get(struct perf_mmap *map) { refcount_inc(&map->refcnt); } + +void perf_mmap__put(struct perf_mmap *map) +{ + BUG_ON(map->base && refcount_read(&map->refcnt) == 0); + + if (refcount_dec_and_test(&map->refcnt)) + perf_mmap__munmap(map); +} diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 0b877d39a660..4394a5a10ce9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -433,7 +433,7 @@ static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, struct mmap *map = fda->priv[fd].ptr; if (map) - perf_mmap__put(map); + perf_mmap__put(&map->core); } int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) @@ -601,11 +601,11 @@ static void evlist__munmap_nofree(struct evlist *evlist) if (evlist->mmap) for (i = 0; i < evlist->core.nr_mmaps; i++) - mmap__munmap(&evlist->mmap[i]); + perf_mmap__munmap(&evlist->mmap[i].core); if (evlist->overwrite_mmap) for (i = 0; i < evlist->core.nr_mmaps; i++) - mmap__munmap(&evlist->overwrite_mmap[i]); + perf_mmap__munmap(&evlist->overwrite_mmap[i].core); } void evlist__munmap(struct evlist *evlist) @@ -615,6 +615,13 @@ void evlist__munmap(struct evlist *evlist) zfree(&evlist->overwrite_mmap); } +static void perf_mmap__unmap_cb(struct perf_mmap *map) +{ + struct mmap *m = container_of(map, struct mmap, core); + + mmap__munmap(m); +} + static struct mmap *evlist__alloc_mmap(struct evlist *evlist, bool overwrite) { @@ -638,7 +645,7 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and * thus does perf_mmap__get() on it. */ - perf_mmap__init(&map[i].core, overwrite); + perf_mmap__init(&map[i].core, overwrite, perf_mmap__unmap_cb); } return map; @@ -715,7 +722,7 @@ static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, */ if (!evsel->core.system_wide && perf_evlist__add_pollfd(&evlist->core, fd, &maps[idx], revent) < 0) { - perf_mmap__put(&maps[idx]); + perf_mmap__put(&maps[idx].core); return -1; } diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 2c73b5bcf74e..9f150d50cea5 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -110,14 +110,6 @@ static bool perf_mmap__empty(struct mmap *map) return perf_mmap__read_head(map) == map->core.prev && !map->auxtrace_mmap.base; } -void perf_mmap__put(struct mmap *map) -{ - BUG_ON(map->core.base && refcount_read(&map->core.refcnt) == 0); - - if (refcount_dec_and_test(&map->core.refcnt)) - mmap__munmap(map); -} - void perf_mmap__consume(struct mmap *map) { if (!map->core.overwrite) { @@ -127,7 +119,7 @@ void perf_mmap__consume(struct mmap *map) } if (refcount_read(&map->core.refcnt) == 1 && perf_mmap__empty(map)) - perf_mmap__put(map); + perf_mmap__put(&map->core); } int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, @@ -308,7 +300,6 @@ static void perf_mmap__aio_munmap(struct mmap *map __maybe_unused) void mmap__munmap(struct mmap *map) { - perf_mmap__munmap(&map->core); perf_mmap__aio_munmap(map); if (map->data != NULL) { munmap(map->data, mmap__mmap_len(map)); diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 6a18b2990059..78e3c4436ce8 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -45,8 +45,6 @@ struct mmap_params { int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); void mmap__munmap(struct mmap *map); -void perf_mmap__put(struct mmap *map); - void perf_mmap__consume(struct mmap *map); static inline u64 perf_mmap__read_head(struct mmap *mm) -- cgit v1.2.3-59-g8ed1b From 1d40ae4e1784bfa1646fd153ca022db21511284f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:16 +0200 Subject: perf tools: Use perf_mmap way to detect aux mmap We will move this code to libperf shortly, so we need to free it of 'struct auxtrace_mmap' usage, because it won't be available in libperf (for now). The perf_event_mmap_page::aux_size is set when the aux mmap is mapped, so the check is equivalent. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 9f150d50cea5..f246dd403507 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -107,7 +107,9 @@ union perf_event *perf_mmap__read_event(struct mmap *map) static bool perf_mmap__empty(struct mmap *map) { - return perf_mmap__read_head(map) == map->core.prev && !map->auxtrace_mmap.base; + struct perf_event_mmap_page *pc = map->core.base; + + return perf_mmap__read_head(map) == map->core.prev && !pc->aux_size; } void perf_mmap__consume(struct mmap *map) -- cgit v1.2.3-59-g8ed1b From 7728fa0cfaeb7d25b12c8865c733359cc8e5fb13 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:17 +0200 Subject: libperf: Adopt perf_mmap__consume() function from tools/perf Move perf_mmap__consume() vrom tools/perf to libperf and export it in the perf/mmap.h header. Move also the needed helpers perf_mmap__write_tail(), perf_mmap__read_head() and perf_mmap__empty(). Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-10-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 3 ++- tools/perf/builtin-kvm.c | 5 +++-- tools/perf/builtin-top.c | 3 ++- tools/perf/builtin-trace.c | 3 ++- tools/perf/lib/Makefile | 5 +++-- tools/perf/lib/include/internal/mmap.h | 2 ++ tools/perf/lib/include/perf/mmap.h | 11 ++++++++++ tools/perf/lib/libperf.map | 1 + tools/perf/lib/mmap.c | 32 ++++++++++++++++++++++++++++ tools/perf/tests/backward-ring-buffer.c | 1 + tools/perf/tests/bpf.c | 1 + tools/perf/tests/code-reading.c | 3 ++- tools/perf/tests/keep-tracking.c | 3 ++- tools/perf/tests/mmap-basic.c | 3 ++- tools/perf/tests/openat-syscall-tp-fields.c | 3 ++- tools/perf/tests/perf-record.c | 3 ++- tools/perf/tests/sw-clock.c | 3 ++- tools/perf/tests/switch-tracking.c | 3 ++- tools/perf/tests/task-exit.c | 3 ++- tools/perf/util/evlist.c | 3 ++- tools/perf/util/mmap.c | 32 ++++++---------------------- tools/perf/util/mmap.h | 12 ----------- tools/perf/util/python.c | 3 ++- 23 files changed, 87 insertions(+), 54 deletions(-) create mode 100644 tools/perf/lib/include/perf/mmap.h diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index fa947952c16a..3397898824f6 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "debug.h" #include "parse-events.h" @@ -139,7 +140,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe comm2_time = sample.time; } next_event: - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } perf_mmap__read_done(md); } diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 58a9e0989491..0c04c4c6c1eb 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -46,6 +46,7 @@ #include #include #include +#include static const char *get_filename_for_perf_kvm(void) { @@ -766,7 +767,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, while ((event = perf_mmap__read_event(md)) != NULL) { err = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); if (err) { - perf_mmap__consume(md); + perf_mmap__consume(&md->core); pr_err("Failed to parse sample\n"); return -1; } @@ -776,7 +777,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, * FIXME: Here we can't consume the event, as perf_session__queue_event will * point to it, and it'll get possibly overwritten by the kernel. */ - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (err) { pr_err("Failed to enqueue sample: %d\n", err); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 611d03030abc..5fcf15799415 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -82,6 +82,7 @@ #include #include +#include static volatile int done; static volatile int resize; @@ -883,7 +884,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) if (ret) break; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (top->qe.rotate) { pthread_mutex_lock(&top->qe.mutex); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index b627975d1c3e..6a5708438566 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -77,6 +77,7 @@ #include #include +#include #ifndef O_CLOEXEC # define O_CLOEXEC 02000000 @@ -3810,7 +3811,7 @@ again: if (err) goto out_disable; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (interrupted) goto out_disable; diff --git a/tools/perf/lib/Makefile b/tools/perf/lib/Makefile index 85ccb8c439a4..0889c9c3ec19 100644 --- a/tools/perf/lib/Makefile +++ b/tools/perf/lib/Makefile @@ -172,8 +172,9 @@ install_headers: $(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); - $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); + $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \ + $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \ + $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644); install_pkgconfig: $(LIBPERF_PC) $(call QUIET_INSTALL, $(LIBPERF_PC)) \ diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index bf9cc7d005ab..ee536c4441bb 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -49,4 +49,6 @@ void perf_mmap__munmap(struct perf_mmap *map); void perf_mmap__get(struct perf_mmap *map); void perf_mmap__put(struct perf_mmap *map); +u64 perf_mmap__read_head(struct perf_mmap *map); + #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/lib/include/perf/mmap.h b/tools/perf/lib/include/perf/mmap.h new file mode 100644 index 000000000000..d3678d1834d9 --- /dev/null +++ b/tools/perf/lib/include/perf/mmap.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LIBPERF_MMAP_H +#define __LIBPERF_MMAP_H + +#include + +struct perf_mmap; + +LIBPERF_API void perf_mmap__consume(struct perf_mmap *map); + +#endif /* __LIBPERF_MMAP_H */ diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map index ab8dbde1136c..d7b327f224e2 100644 --- a/tools/perf/lib/libperf.map +++ b/tools/perf/lib/libperf.map @@ -40,6 +40,7 @@ LIBPERF_0.0.1 { perf_evlist__next; perf_evlist__set_maps; perf_evlist__poll; + perf_mmap__consume; local: *; }; diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index 89c1e0e8b897..4cada1c89fdb 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include +#include #include #include #include @@ -59,3 +62,32 @@ void perf_mmap__put(struct perf_mmap *map) if (refcount_dec_and_test(&map->refcnt)) perf_mmap__munmap(map); } + +static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) +{ + ring_buffer_write_tail(md->base, tail); +} + +u64 perf_mmap__read_head(struct perf_mmap *map) +{ + return ring_buffer_read_head(map->base); +} + +static bool perf_mmap__empty(struct perf_mmap *map) +{ + struct perf_event_mmap_page *pc = map->base; + + return perf_mmap__read_head(map) == map->prev && !pc->aux_size; +} + +void perf_mmap__consume(struct perf_mmap *map) +{ + if (!map->overwrite) { + u64 old = map->prev; + + perf_mmap__write_tail(map, old); + } + + if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map)) + perf_mmap__put(map); +} diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 338cd9faa835..13f9a060361a 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -13,6 +13,7 @@ #include "util/mmap.h" #include #include +#include #define NR_ITERS 111 diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 1eb0bffaed6c..6f0d239f8277 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "tests.h" #include "llvm.h" #include "debug.h" diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index f5764a3890b9..b5a57bb54c25 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "debug.h" #include "dso.h" @@ -430,7 +431,7 @@ static int process_events(struct machine *machine, struct evlist *evlist, while ((event = perf_mmap__read_event(md)) != NULL) { ret = process_event(machine, evlist, event, state); - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (ret < 0) return ret; } diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 92c7d591bcac..31c005e07b17 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "debug.h" #include "parse-events.h" @@ -46,7 +47,7 @@ static int find_comm(struct evlist *evlist, const char *comm) (pid_t)event->comm.tid == getpid() && strcmp(event->comm.comm, comm) == 0) found += 1; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } perf_mmap__read_done(md); } diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 3a22dce991ba..b176acc4f52e 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -16,6 +16,7 @@ #include #include #include +#include /* * This test will generate random numbers of calls to some getpid syscalls, @@ -139,7 +140,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse goto out_delete_evlist; } nr_events[evsel->idx]++; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } perf_mmap__read_done(md); diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 2b5c46813053..bbf8ba320721 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -13,6 +13,7 @@ #include "debug.h" #include "util/mmap.h" #include +#include #ifndef O_DIRECTORY #define O_DIRECTORY 00200000 @@ -103,7 +104,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest ++nr_events; if (type != PERF_RECORD_SAMPLE) { - perf_mmap__consume(md); + perf_mmap__consume(&md->core); continue; } diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 437426be29e9..6ebbcc65749e 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -6,6 +6,7 @@ #include #include +#include #include "evlist.h" #include "evsel.h" #include "debug.h" @@ -276,7 +277,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus ++errs; } - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } perf_mmap__read_done(md); } diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 84519df87f30..1aeb558010c1 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -15,6 +15,7 @@ #include "util/mmap.h" #include "util/thread_map.h" #include +#include #define NR_LOOPS 10000000 @@ -117,7 +118,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) total_periods += sample.period; nr_samples++; next_event: - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } perf_mmap__read_done(md); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index ffa592e0020e..55728b3da057 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "debug.h" #include "parse-events.h" @@ -275,7 +276,7 @@ static int process_events(struct evlist *evlist, while ((event = perf_mmap__read_event(md)) != NULL) { cnt += 1; ret = add_event(evlist, &events, event); - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (ret < 0) goto out_free_nodes; } diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index bce3a4cb4c89..a0f689594848 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -12,6 +12,7 @@ #include #include #include +#include static int exited; static int nr_exit; @@ -124,7 +125,7 @@ retry: if (event->header.type == PERF_RECORD_EXIT) nr_exit++; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } perf_mmap__read_done(md); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 4394a5a10ce9..34ba47b9896b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -42,6 +42,7 @@ #include #include #include +#include #include @@ -1818,7 +1819,7 @@ static void *perf_evlist__poll_thread(void *arg) else pr_warning("cannot locate proper evsel for the side band event\n"); - perf_mmap__consume(map); + perf_mmap__consume(&map->core); got_data = true; } perf_mmap__read_done(map); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index f246dd403507..abe7cbe6c95f 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -13,6 +13,7 @@ #include #include #include // sysconf() +#include #ifdef HAVE_LIBNUMA_SUPPORT #include #endif @@ -95,7 +96,7 @@ union perf_event *perf_mmap__read_event(struct mmap *map) /* non-overwirte doesn't pause the ringbuffer */ if (!map->core.overwrite) - map->core.end = perf_mmap__read_head(map); + map->core.end = perf_mmap__read_head(&map->core); event = perf_mmap__read(map, &map->core.start, map->core.end); @@ -105,25 +106,6 @@ union perf_event *perf_mmap__read_event(struct mmap *map) return event; } -static bool perf_mmap__empty(struct mmap *map) -{ - struct perf_event_mmap_page *pc = map->core.base; - - return perf_mmap__read_head(map) == map->core.prev && !pc->aux_size; -} - -void perf_mmap__consume(struct mmap *map) -{ - if (!map->core.overwrite) { - u64 old = map->core.prev; - - perf_mmap__write_tail(map, old); - } - - if (refcount_read(&map->core.refcnt) == 1 && perf_mmap__empty(map)) - perf_mmap__put(&map->core); -} - int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, struct auxtrace_mmap_params *mp __maybe_unused, void *userpg __maybe_unused, @@ -420,7 +402,7 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) */ static int __perf_mmap__read_init(struct mmap *md) { - u64 head = perf_mmap__read_head(md); + u64 head = perf_mmap__read_head(&md->core); u64 old = md->core.prev; unsigned char *data = md->core.base + page_size; unsigned long size; @@ -437,7 +419,7 @@ static int __perf_mmap__read_init(struct mmap *md) WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); md->core.prev = head; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); return -EAGAIN; } @@ -466,7 +448,7 @@ int perf_mmap__read_init(struct mmap *map) int perf_mmap__push(struct mmap *md, void *to, int push(struct mmap *map, void *to, void *buf, size_t size)) { - u64 head = perf_mmap__read_head(md); + u64 head = perf_mmap__read_head(&md->core); unsigned char *data = md->core.base + page_size; unsigned long size; void *buf; @@ -499,7 +481,7 @@ int perf_mmap__push(struct mmap *md, void *to, } md->core.prev = head; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); out: return rc; } @@ -518,5 +500,5 @@ void perf_mmap__read_done(struct mmap *map) if (!refcount_read(&map->core.refcnt)) return; - map->core.prev = perf_mmap__read_head(map); + map->core.prev = perf_mmap__read_head(&map->core); } diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 78e3c4436ce8..89fb93267ff1 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -45,18 +45,6 @@ struct mmap_params { int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); void mmap__munmap(struct mmap *map); -void perf_mmap__consume(struct mmap *map); - -static inline u64 perf_mmap__read_head(struct mmap *mm) -{ - return ring_buffer_read_head(mm->core.base); -} - -static inline void perf_mmap__write_tail(struct mmap *md, u64 tail) -{ - ring_buffer_write_tail(md->core.base, tail); -} - union perf_event *perf_mmap__read_forward(struct mmap *map); union perf_event *perf_mmap__read_event(struct mmap *map); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 02460362256d..82a4fa6c87bd 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "evlist.h" #include "callchain.h" #include "evsel.h" @@ -1045,7 +1046,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, err = perf_evsel__parse_sample(evsel, event, &pevent->sample); /* Consume the even only after we parsed it out. */ - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (err) return PyErr_Format(PyExc_OSError, -- cgit v1.2.3-59-g8ed1b From 7c4d41824f9afc659ba425a41018546531cffd72 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:18 +0200 Subject: libperf: Adopt perf_mmap__read_init() from tools/perf Move perf_mmap__read_init() from tools/perf to libperf and export it in perf/mmap.h header. And add pr_debug2()/pr_debug3() macros support, because the code is using them. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-11-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 2 +- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 2 +- tools/perf/lib/include/perf/core.h | 2 + tools/perf/lib/include/perf/mmap.h | 1 + tools/perf/lib/internal.h | 2 + tools/perf/lib/libperf.map | 1 + tools/perf/lib/mmap.c | 84 ++++++++++++++++++++++++++++ tools/perf/tests/backward-ring-buffer.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 2 +- tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/sw-clock.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/tests/task-exit.c | 2 +- tools/perf/util/evlist.c | 2 +- tools/perf/util/mmap.c | 82 +-------------------------- tools/perf/util/mmap.h | 1 - tools/perf/util/python.c | 2 +- 23 files changed, 107 insertions(+), 98 deletions(-) diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index 3397898824f6..6a0c3ff78e01 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -118,7 +118,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 0c04c4c6c1eb..b6a8078dd446 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -760,7 +760,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, *mmap_time = ULLONG_MAX; md = &evlist->mmap[idx]; - err = perf_mmap__read_init(md); + err = perf_mmap__read_init(&md->core); if (err < 0) return (err == -EAGAIN) ? 0 : -1; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5fcf15799415..4a4bb7b20c39 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -870,7 +870,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) union perf_event *event; md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) return; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 6a5708438566..cd69d68e7f1d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3801,7 +3801,7 @@ again: struct mmap *md; md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/lib/include/perf/core.h b/tools/perf/lib/include/perf/core.h index cfd70e720c1c..2a80e4b6f819 100644 --- a/tools/perf/lib/include/perf/core.h +++ b/tools/perf/lib/include/perf/core.h @@ -12,6 +12,8 @@ enum libperf_print_level { LIBPERF_WARN, LIBPERF_INFO, LIBPERF_DEBUG, + LIBPERF_DEBUG2, + LIBPERF_DEBUG3, }; typedef int (*libperf_print_fn_t)(enum libperf_print_level level, diff --git a/tools/perf/lib/include/perf/mmap.h b/tools/perf/lib/include/perf/mmap.h index d3678d1834d9..646e9052b003 100644 --- a/tools/perf/lib/include/perf/mmap.h +++ b/tools/perf/lib/include/perf/mmap.h @@ -7,5 +7,6 @@ struct perf_mmap; LIBPERF_API void perf_mmap__consume(struct perf_mmap *map); +LIBPERF_API int perf_mmap__read_init(struct perf_mmap *map); #endif /* __LIBPERF_MMAP_H */ diff --git a/tools/perf/lib/internal.h b/tools/perf/lib/internal.h index dc92f241732e..37db745e1502 100644 --- a/tools/perf/lib/internal.h +++ b/tools/perf/lib/internal.h @@ -14,5 +14,7 @@ do { \ #define pr_warning(fmt, ...) __pr(LIBPERF_WARN, fmt, ##__VA_ARGS__) #define pr_info(fmt, ...) __pr(LIBPERF_INFO, fmt, ##__VA_ARGS__) #define pr_debug(fmt, ...) __pr(LIBPERF_DEBUG, fmt, ##__VA_ARGS__) +#define pr_debug2(fmt, ...) __pr(LIBPERF_DEBUG2, fmt, ##__VA_ARGS__) +#define pr_debug3(fmt, ...) __pr(LIBPERF_DEBUG3, fmt, ##__VA_ARGS__) #endif /* __LIBPERF_INTERNAL_H */ diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map index d7b327f224e2..bc3fbb213a3e 100644 --- a/tools/perf/lib/libperf.map +++ b/tools/perf/lib/libperf.map @@ -41,6 +41,7 @@ LIBPERF_0.0.1 { perf_evlist__set_maps; perf_evlist__poll; perf_mmap__consume; + perf_mmap__read_init; local: *; }; diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index 4cada1c89fdb..fdbc6c550dea 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -1,11 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include +#include #include #include #include #include #include #include +#include "internal.h" void perf_mmap__init(struct perf_mmap *map, bool overwrite, libperf_unmap_cb_t unmap_cb) @@ -91,3 +95,83 @@ void perf_mmap__consume(struct perf_mmap *map) if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map)) perf_mmap__put(map); } + +static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) +{ + struct perf_event_header *pheader; + u64 evt_head = *start; + int size = mask + 1; + + pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start); + pheader = (struct perf_event_header *)(buf + (*start & mask)); + while (true) { + if (evt_head - *start >= (unsigned int)size) { + pr_debug("Finished reading overwrite ring buffer: rewind\n"); + if (evt_head - *start > (unsigned int)size) + evt_head -= pheader->size; + *end = evt_head; + return 0; + } + + pheader = (struct perf_event_header *)(buf + (evt_head & mask)); + + if (pheader->size == 0) { + pr_debug("Finished reading overwrite ring buffer: get start\n"); + *end = evt_head; + return 0; + } + + evt_head += pheader->size; + pr_debug3("move evt_head: %"PRIx64"\n", evt_head); + } + WARN_ONCE(1, "Shouldn't get here\n"); + return -1; +} + +/* + * Report the start and end of the available data in ringbuffer + */ +static int __perf_mmap__read_init(struct perf_mmap *md) +{ + u64 head = perf_mmap__read_head(md); + u64 old = md->prev; + unsigned char *data = md->base + page_size; + unsigned long size; + + md->start = md->overwrite ? head : old; + md->end = md->overwrite ? old : head; + + if ((md->end - md->start) < md->flush) + return -EAGAIN; + + size = md->end - md->start; + if (size > (unsigned long)(md->mask) + 1) { + if (!md->overwrite) { + WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); + + md->prev = head; + perf_mmap__consume(md); + return -EAGAIN; + } + + /* + * Backward ring buffer is full. We still have a chance to read + * most of data from it. + */ + if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end)) + return -EINVAL; + } + + return 0; +} + +int perf_mmap__read_init(struct perf_mmap *map) +{ + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!refcount_read(&map->refcnt)) + return -ENOENT; + + return __perf_mmap__read_init(map); +} diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 13f9a060361a..ff3a986983ab 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -38,7 +38,7 @@ static int count_samples(struct evlist *evlist, int *sample_count, struct mmap *map = &evlist->overwrite_mmap[i]; union perf_event *event; - perf_mmap__read_init(map); + perf_mmap__read_init(&map->core); while ((event = perf_mmap__read_event(map)) != NULL) { const u32 type = event->header.type; diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 6f0d239f8277..73d26c63d624 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -185,7 +185,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), struct mmap *md; md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index b5a57bb54c25..cf992e0b27ff 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -426,7 +426,7 @@ static int process_events(struct machine *machine, struct evlist *evlist, for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 31c005e07b17..e85da7e77269 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -39,7 +39,7 @@ static int find_comm(struct evlist *evlist, const char *comm) found = 0; for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; while ((event = perf_mmap__read_event(md)) != NULL) { if (event->header.type == PERF_RECORD_COMM && diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index b176acc4f52e..77f42f0ac15d 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -114,7 +114,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse } md = &evlist->mmap[0]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) goto out_init; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index bbf8ba320721..d6a563120d93 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -93,7 +93,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest struct mmap *md; md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 6ebbcc65749e..2587cb8b2c0f 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -171,7 +171,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus struct mmap *md; md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 1aeb558010c1..808669507c30 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -100,7 +100,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) evlist__disable(evlist); md = &evlist->mmap[0]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) goto out_init; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 55728b3da057..bedfdec34972 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -270,7 +270,7 @@ static int process_events(struct evlist *evlist, for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index a0f689594848..035d42375d4b 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -118,7 +118,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused retry: md = &evlist->mmap[0]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) goto out_init; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 34ba47b9896b..d9a4a4b188ed 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1809,7 +1809,7 @@ static void *perf_evlist__poll_thread(void *arg) struct mmap *map = &evlist->mmap[i]; union perf_event *event; - if (perf_mmap__read_init(map)) + if (perf_mmap__read_init(&map->core)) continue; while ((event = perf_mmap__read_event(map)) != NULL) { struct evsel *evsel = perf_evlist__event2evsel(evlist, event); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index abe7cbe6c95f..59379118c2f1 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -365,86 +365,6 @@ int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) return perf_mmap__aio_mmap(map, mp); } -static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) -{ - struct perf_event_header *pheader; - u64 evt_head = *start; - int size = mask + 1; - - pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start); - pheader = (struct perf_event_header *)(buf + (*start & mask)); - while (true) { - if (evt_head - *start >= (unsigned int)size) { - pr_debug("Finished reading overwrite ring buffer: rewind\n"); - if (evt_head - *start > (unsigned int)size) - evt_head -= pheader->size; - *end = evt_head; - return 0; - } - - pheader = (struct perf_event_header *)(buf + (evt_head & mask)); - - if (pheader->size == 0) { - pr_debug("Finished reading overwrite ring buffer: get start\n"); - *end = evt_head; - return 0; - } - - evt_head += pheader->size; - pr_debug3("move evt_head: %"PRIx64"\n", evt_head); - } - WARN_ONCE(1, "Shouldn't get here\n"); - return -1; -} - -/* - * Report the start and end of the available data in ringbuffer - */ -static int __perf_mmap__read_init(struct mmap *md) -{ - u64 head = perf_mmap__read_head(&md->core); - u64 old = md->core.prev; - unsigned char *data = md->core.base + page_size; - unsigned long size; - - md->core.start = md->core.overwrite ? head : old; - md->core.end = md->core.overwrite ? old : head; - - if ((md->core.end - md->core.start) < md->core.flush) - return -EAGAIN; - - size = md->core.end - md->core.start; - if (size > (unsigned long)(md->core.mask) + 1) { - if (!md->core.overwrite) { - WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); - - md->core.prev = head; - perf_mmap__consume(&md->core); - return -EAGAIN; - } - - /* - * Backward ring buffer is full. We still have a chance to read - * most of data from it. - */ - if (overwrite_rb_find_range(data, md->core.mask, &md->core.start, &md->core.end)) - return -EINVAL; - } - - return 0; -} - -int perf_mmap__read_init(struct mmap *map) -{ - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&map->core.refcnt)) - return -ENOENT; - - return __perf_mmap__read_init(map); -} - int perf_mmap__push(struct mmap *md, void *to, int push(struct mmap *map, void *to, void *buf, size_t size)) { @@ -454,7 +374,7 @@ int perf_mmap__push(struct mmap *md, void *to, void *buf; int rc = 0; - rc = perf_mmap__read_init(md); + rc = perf_mmap__read_init(&md->core); if (rc < 0) return (rc == -EAGAIN) ? 1 : -1; diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 89fb93267ff1..6d818ef51f05 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -54,6 +54,5 @@ int perf_mmap__push(struct mmap *md, void *to, size_t mmap__mmap_len(struct mmap *map); -int perf_mmap__read_init(struct mmap *md); void perf_mmap__read_done(struct mmap *map); #endif /*__PERF_MMAP_H */ diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 82a4fa6c87bd..64eec2a239d4 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -1023,7 +1023,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, if (!md) return NULL; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) goto end; event = perf_mmap__read_event(md); -- cgit v1.2.3-59-g8ed1b From 32fdc2ca7e2ae8ae5d0ff660ca7783acd8ee6396 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:19 +0200 Subject: libperf: Adopt perf_mmap__read_done() from tools/perf Move perf_mmap__read_init() from tools/perf to libperf and export it in the perf/mmap.h header. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-12-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 2 +- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 2 +- tools/perf/lib/include/perf/mmap.h | 1 + tools/perf/lib/libperf.map | 1 + tools/perf/lib/mmap.c | 17 +++++++++++++++++ tools/perf/tests/backward-ring-buffer.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 2 +- tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/sw-clock.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/tests/task-exit.c | 2 +- tools/perf/util/evlist.c | 2 +- tools/perf/util/mmap.c | 17 ----------------- tools/perf/util/mmap.h | 1 - 20 files changed, 34 insertions(+), 33 deletions(-) diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index 6a0c3ff78e01..c90d925f7ae6 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -142,7 +142,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe next_event: perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } if (!comm1_time || !comm2_time) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index b6a8078dd446..4c087a8c9fed 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -794,7 +794,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, break; } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); return n; } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 4a4bb7b20c39..1a54069ccd9c 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -894,7 +894,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) } } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } static void perf_top__mmap_read(struct perf_top *top) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index cd69d68e7f1d..23116289f710 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3821,7 +3821,7 @@ again: draining = true; } } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } if (trace->nr_events == before) { diff --git a/tools/perf/lib/include/perf/mmap.h b/tools/perf/lib/include/perf/mmap.h index 646e9052b003..4f946e7f724b 100644 --- a/tools/perf/lib/include/perf/mmap.h +++ b/tools/perf/lib/include/perf/mmap.h @@ -8,5 +8,6 @@ struct perf_mmap; LIBPERF_API void perf_mmap__consume(struct perf_mmap *map); LIBPERF_API int perf_mmap__read_init(struct perf_mmap *map); +LIBPERF_API void perf_mmap__read_done(struct perf_mmap *map); #endif /* __LIBPERF_MMAP_H */ diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map index bc3fbb213a3e..7e3ea2e9c917 100644 --- a/tools/perf/lib/libperf.map +++ b/tools/perf/lib/libperf.map @@ -42,6 +42,7 @@ LIBPERF_0.0.1 { perf_evlist__poll; perf_mmap__consume; perf_mmap__read_init; + perf_mmap__read_done; local: *; }; diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index fdbc6c550dea..97297cba44e3 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -175,3 +175,20 @@ int perf_mmap__read_init(struct perf_mmap *map) return __perf_mmap__read_init(map); } + +/* + * Mandatory for overwrite mode + * The direction of overwrite mode is backward. + * The last perf_mmap__read() will set tail to map->core.prev. + * Need to correct the map->core.prev to head which is the end of next read. + */ +void perf_mmap__read_done(struct perf_mmap *map) +{ + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!refcount_read(&map->refcnt)) + return; + + map->prev = perf_mmap__read_head(map); +} diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index ff3a986983ab..13e67cd213bd 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -54,7 +54,7 @@ static int count_samples(struct evlist *evlist, int *sample_count, return TEST_FAIL; } } - perf_mmap__read_done(map); + perf_mmap__read_done(&map->core); } return TEST_OK; } diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 73d26c63d624..fd45529e29c1 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -194,7 +194,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), if (type == PERF_RECORD_SAMPLE) count ++; } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } if (count != expect) { diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index cf992e0b27ff..9947cda29bad 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -435,7 +435,7 @@ static int process_events(struct machine *machine, struct evlist *evlist, if (ret < 0) return ret; } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } return 0; } diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index e85da7e77269..e950907f6f57 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -49,7 +49,7 @@ static int find_comm(struct evlist *evlist, const char *comm) found += 1; perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } return found; } diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 77f42f0ac15d..bb15d405a42c 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -142,7 +142,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse nr_events[evsel->idx]++; perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); out_init: err = 0; diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index d6a563120d93..c95eb1bbf396 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -124,7 +124,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest goto out_ok; } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } if (nr_events == before) diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 2587cb8b2c0f..92a53be3b32b 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -279,7 +279,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } /* diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 808669507c30..ace20921ad55 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -120,7 +120,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) next_event: perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); out_init: if ((u64) nr_samples == total_periods) { diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index bedfdec34972..8400fb17c170 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -280,7 +280,7 @@ static int process_events(struct evlist *evlist, if (ret < 0) goto out_free_nodes; } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } events_array = calloc(cnt, sizeof(struct event_node)); diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 035d42375d4b..c6a13948821c 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -127,7 +127,7 @@ retry: perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); out_init: if (!exited || !nr_exit) { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d9a4a4b188ed..6e070ee9ad39 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1822,7 +1822,7 @@ static void *perf_evlist__poll_thread(void *arg) perf_mmap__consume(&map->core); got_data = true; } - perf_mmap__read_done(map); + perf_mmap__read_done(&map->core); } if (draining && !got_data) diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 59379118c2f1..2dedef9b06fd 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -405,20 +405,3 @@ int perf_mmap__push(struct mmap *md, void *to, out: return rc; } - -/* - * Mandatory for overwrite mode - * The direction of overwrite mode is backward. - * The last perf_mmap__read() will set tail to map->core.prev. - * Need to correct the map->core.prev to head which is the end of next read. - */ -void perf_mmap__read_done(struct mmap *map) -{ - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&map->core.refcnt)) - return; - - map->core.prev = perf_mmap__read_head(&map->core); -} diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 6d818ef51f05..0b15702be1a5 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -54,5 +54,4 @@ int perf_mmap__push(struct mmap *md, void *to, size_t mmap__mmap_len(struct mmap *map); -void perf_mmap__read_done(struct mmap *map); #endif /*__PERF_MMAP_H */ -- cgit v1.2.3-59-g8ed1b From 151ed5d70da87720022e4171227733a008b3c719 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:20 +0200 Subject: libperf: Adopt perf_mmap__read_event() from tools/perf Move perf_mmap__read_event() from tools/perf to libperf and export it in the perf/mmap.h header. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-13-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 2 +- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 2 +- tools/perf/lib/include/perf/mmap.h | 2 + tools/perf/lib/libperf.map | 1 + tools/perf/lib/mmap.c | 79 ++++++++++++++++++++++++++++ tools/perf/tests/backward-ring-buffer.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 2 +- tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/sw-clock.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/tests/task-exit.c | 2 +- tools/perf/util/evlist.c | 2 +- tools/perf/util/mmap.c | 77 --------------------------- tools/perf/util/mmap.h | 2 - tools/perf/util/python.c | 2 +- 21 files changed, 98 insertions(+), 95 deletions(-) diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index c90d925f7ae6..909ead08a6f6 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -121,7 +121,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { struct perf_sample sample; if (event->header.type != PERF_RECORD_COMM || diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 4c087a8c9fed..858da896b518 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -764,7 +764,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, if (err < 0) return (err == -EAGAIN) ? 0 : -1; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { err = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); if (err) { perf_mmap__consume(&md->core); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 1a54069ccd9c..d96f24c8770d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -873,7 +873,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) if (perf_mmap__read_init(&md->core) < 0) return; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { int ret; ret = perf_evlist__parse_sample_timestamp(evlist, event, &last_timestamp); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 23116289f710..144d417ddb22 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3804,7 +3804,7 @@ again: if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { ++trace->nr_events; err = trace__deliver_event(trace, event); diff --git a/tools/perf/lib/include/perf/mmap.h b/tools/perf/lib/include/perf/mmap.h index 4f946e7f724b..9508ad90d8b9 100644 --- a/tools/perf/lib/include/perf/mmap.h +++ b/tools/perf/lib/include/perf/mmap.h @@ -5,9 +5,11 @@ #include struct perf_mmap; +union perf_event; LIBPERF_API void perf_mmap__consume(struct perf_mmap *map); LIBPERF_API int perf_mmap__read_init(struct perf_mmap *map); LIBPERF_API void perf_mmap__read_done(struct perf_mmap *map); +LIBPERF_API union perf_event *perf_mmap__read_event(struct perf_mmap *map); #endif /* __LIBPERF_MMAP_H */ diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map index 7e3ea2e9c917..8bb0d73e0c6c 100644 --- a/tools/perf/lib/libperf.map +++ b/tools/perf/lib/libperf.map @@ -43,6 +43,7 @@ LIBPERF_0.0.1 { perf_mmap__consume; perf_mmap__read_init; perf_mmap__read_done; + perf_mmap__read_event; local: *; }; diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index 97297cba44e3..0752c193b0fb 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -3,9 +3,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -192,3 +194,80 @@ void perf_mmap__read_done(struct perf_mmap *map) map->prev = perf_mmap__read_head(map); } + +/* When check_messup is true, 'end' must points to a good entry */ +static union perf_event *perf_mmap__read(struct perf_mmap *map, + u64 *startp, u64 end) +{ + unsigned char *data = map->base + page_size; + union perf_event *event = NULL; + int diff = end - *startp; + + if (diff >= (int)sizeof(event->header)) { + size_t size; + + event = (union perf_event *)&data[*startp & map->mask]; + size = event->header.size; + + if (size < sizeof(event->header) || diff < (int)size) + return NULL; + + /* + * Event straddles the mmap boundary -- header should always + * be inside due to u64 alignment of output. + */ + if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) { + unsigned int offset = *startp; + unsigned int len = min(sizeof(*event), size), cpy; + void *dst = map->event_copy; + + do { + cpy = min(map->mask + 1 - (offset & map->mask), len); + memcpy(dst, &data[offset & map->mask], cpy); + offset += cpy; + dst += cpy; + len -= cpy; + } while (len); + + event = (union perf_event *)map->event_copy; + } + + *startp += size; + } + + return event; +} + +/* + * Read event from ring buffer one by one. + * Return one event for each call. + * + * Usage: + * perf_mmap__read_init() + * while(event = perf_mmap__read_event()) { + * //process the event + * perf_mmap__consume() + * } + * perf_mmap__read_done() + */ +union perf_event *perf_mmap__read_event(struct perf_mmap *map) +{ + union perf_event *event; + + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!refcount_read(&map->refcnt)) + return NULL; + + /* non-overwirte doesn't pause the ringbuffer */ + if (!map->overwrite) + map->end = perf_mmap__read_head(map); + + event = perf_mmap__read(map, &map->start, map->end); + + if (!map->overwrite) + map->prev = map->start; + + return event; +} diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 13e67cd213bd..a4cd30c0beb3 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -39,7 +39,7 @@ static int count_samples(struct evlist *evlist, int *sample_count, union perf_event *event; perf_mmap__read_init(&map->core); - while ((event = perf_mmap__read_event(map)) != NULL) { + while ((event = perf_mmap__read_event(&map->core)) != NULL) { const u32 type = event->header.type; switch (type) { diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index fd45529e29c1..5d20bf8397f0 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -188,7 +188,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { const u32 type = event->header.type; if (type == PERF_RECORD_SAMPLE) diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 9947cda29bad..1f017e1b2a55 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -429,7 +429,7 @@ static int process_events(struct machine *machine, struct evlist *evlist, if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { ret = process_event(machine, evlist, event, state); perf_mmap__consume(&md->core); if (ret < 0) diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index e950907f6f57..50a0c9fcde7d 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -41,7 +41,7 @@ static int find_comm(struct evlist *evlist, const char *comm) md = &evlist->mmap[i]; if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { if (event->header.type == PERF_RECORD_COMM && (pid_t)event->comm.pid == getpid() && (pid_t)event->comm.tid == getpid() && diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index bb15d405a42c..5f4c0dbb4715 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -117,7 +117,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse if (perf_mmap__read_init(&md->core) < 0) goto out_init; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { struct perf_sample sample; if (event->header.type != PERF_RECORD_SAMPLE) { diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index c95eb1bbf396..c6b2d7aab608 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -96,7 +96,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { const u32 type = event->header.type; int tp_flags; struct perf_sample sample; diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 92a53be3b32b..2195fc205e72 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -174,7 +174,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { const u32 type = event->header.type; const char *name = perf_event__name(type); diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index ace20921ad55..bfb9986093d8 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -103,7 +103,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) if (perf_mmap__read_init(&md->core) < 0) goto out_init; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { struct perf_sample sample; if (event->header.type != PERF_RECORD_SAMPLE) diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 8400fb17c170..fcb0d03dba4e 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -273,7 +273,7 @@ static int process_events(struct evlist *evlist, if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { cnt += 1; ret = add_event(evlist, &events, event); perf_mmap__consume(&md->core); diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index c6a13948821c..4965f8b9055b 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -121,7 +121,7 @@ retry: if (perf_mmap__read_init(&md->core) < 0) goto out_init; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { if (event->header.type == PERF_RECORD_EXIT) nr_exit++; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6e070ee9ad39..a9b189ac859b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1811,7 +1811,7 @@ static void *perf_evlist__poll_thread(void *arg) if (perf_mmap__read_init(&map->core)) continue; - while ((event = perf_mmap__read_event(map)) != NULL) { + while ((event = perf_mmap__read_event(&map->core)) != NULL) { struct evsel *evsel = perf_evlist__event2evsel(evlist, event); if (evsel && evsel->side_band.cb) diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 2dedef9b06fd..2a8bf0ab861c 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -29,83 +29,6 @@ size_t mmap__mmap_len(struct mmap *map) return perf_mmap__mmap_len(&map->core); } -/* When check_messup is true, 'end' must points to a good entry */ -static union perf_event *perf_mmap__read(struct mmap *map, - u64 *startp, u64 end) -{ - unsigned char *data = map->core.base + page_size; - union perf_event *event = NULL; - int diff = end - *startp; - - if (diff >= (int)sizeof(event->header)) { - size_t size; - - event = (union perf_event *)&data[*startp & map->core.mask]; - size = event->header.size; - - if (size < sizeof(event->header) || diff < (int)size) - return NULL; - - /* - * Event straddles the mmap boundary -- header should always - * be inside due to u64 alignment of output. - */ - if ((*startp & map->core.mask) + size != ((*startp + size) & map->core.mask)) { - unsigned int offset = *startp; - unsigned int len = min(sizeof(*event), size), cpy; - void *dst = map->core.event_copy; - - do { - cpy = min(map->core.mask + 1 - (offset & map->core.mask), len); - memcpy(dst, &data[offset & map->core.mask], cpy); - offset += cpy; - dst += cpy; - len -= cpy; - } while (len); - - event = (union perf_event *)map->core.event_copy; - } - - *startp += size; - } - - return event; -} - -/* - * Read event from ring buffer one by one. - * Return one event for each call. - * - * Usage: - * perf_mmap__read_init() - * while(event = perf_mmap__read_event()) { - * //process the event - * perf_mmap__consume() - * } - * perf_mmap__read_done() - */ -union perf_event *perf_mmap__read_event(struct mmap *map) -{ - union perf_event *event; - - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&map->core.refcnt)) - return NULL; - - /* non-overwirte doesn't pause the ringbuffer */ - if (!map->core.overwrite) - map->core.end = perf_mmap__read_head(&map->core); - - event = perf_mmap__read(map, &map->core.start, map->core.end); - - if (!map->core.overwrite) - map->core.prev = map->core.start; - - return event; -} - int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, struct auxtrace_mmap_params *mp __maybe_unused, void *userpg __maybe_unused, diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 0b15702be1a5..bee4e83f7109 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -47,8 +47,6 @@ void mmap__munmap(struct mmap *map); union perf_event *perf_mmap__read_forward(struct mmap *map); -union perf_event *perf_mmap__read_event(struct mmap *map); - int perf_mmap__push(struct mmap *md, void *to, int push(struct mmap *map, void *to, void *buf, size_t size)); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 64eec2a239d4..25118605f3f8 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -1026,7 +1026,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, if (perf_mmap__read_init(&md->core) < 0) goto end; - event = perf_mmap__read_event(md); + event = perf_mmap__read_event(&md->core); if (event != NULL) { PyObject *pyevent = pyrf_event__new(event); struct pyrf_event *pevent = (struct pyrf_event *)pyevent; -- cgit v1.2.3-59-g8ed1b From d1a177595b3a824c72dacb0f9d1a4e5906eaef0e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:21 +0200 Subject: libperf: Adopt perf_evlist__mmap()/munmap() from tools/perf Add libperf's version of perf_evlist__mmap()/munmap() functions and exporting them in the perf/evlist.h header. It's the backbone of what we have in perf code. The following changes will add needed callbacks and then we'll finally switch the perf code to use libperf's version. Add mmap/mmap_ovw 'struct perf_mmap' object arrays to hold maps for libperf's evlist. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-14-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/evlist.c | 236 +++++++++++++++++++++++++++++++ tools/perf/lib/include/internal/evlist.h | 2 + tools/perf/lib/include/perf/evlist.h | 3 + tools/perf/lib/libperf.map | 2 + 4 files changed, 243 insertions(+) diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index d1496fee810c..250ad5752589 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -8,13 +8,20 @@ #include #include #include +#include +#include +#include +#include +#include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -103,6 +110,10 @@ perf_evlist__next(struct perf_evlist *evlist, struct perf_evsel *prev) void perf_evlist__delete(struct perf_evlist *evlist) { + if (evlist == NULL) + return; + + perf_evlist__munmap(evlist); free(evlist); } @@ -281,3 +292,228 @@ int perf_evlist__poll(struct perf_evlist *evlist, int timeout) { return fdarray__poll(&evlist->pollfd, timeout); } + +static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, bool overwrite) +{ + int i; + struct perf_mmap *map; + + evlist->nr_mmaps = perf_cpu_map__nr(evlist->cpus); + if (perf_cpu_map__empty(evlist->cpus)) + evlist->nr_mmaps = perf_thread_map__nr(evlist->threads); + + map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); + if (!map) + return NULL; + + for (i = 0; i < evlist->nr_mmaps; i++) { + /* + * When the perf_mmap() call is made we grab one refcount, plus + * one extra to let perf_mmap__consume() get the last + * events after all real references (perf_mmap__get()) are + * dropped. + * + * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and + * thus does perf_mmap__get() on it. + */ + perf_mmap__init(&map[i], overwrite, NULL); + } + + return map; +} + +static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, + struct perf_evsel *evsel, int idx, int cpu, + int thread) +{ + struct perf_sample_id *sid = SID(evsel, cpu, thread); + + sid->idx = idx; + if (evlist->cpus && cpu >= 0) + sid->cpu = evlist->cpus->map[cpu]; + else + sid->cpu = -1; + if (!evsel->system_wide && evlist->threads && thread >= 0) + sid->tid = perf_thread_map__pid(evlist->threads, thread); + else + sid->tid = -1; +} + +static struct perf_mmap* +perf_evlist__map_get(struct perf_evlist *evlist, bool overwrite, int idx) +{ + struct perf_mmap *map = &evlist->mmap[idx]; + + if (overwrite) { + if (!evlist->mmap_ovw) { + evlist->mmap_ovw = perf_evlist__alloc_mmap(evlist, true); + if (!evlist->mmap_ovw) + return NULL; + } + map = &evlist->mmap_ovw[idx]; + } + + return map; +} + +#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y)) + +static int +mmap_per_evsel(struct perf_evlist *evlist, int idx, + struct perf_mmap_param *mp, int cpu_idx, + int thread, int *_output, int *_output_overwrite) +{ + int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); + struct perf_evsel *evsel; + int revent; + + perf_evlist__for_each_entry(evlist, evsel) { + bool overwrite = evsel->attr.write_backward; + struct perf_mmap *map; + int *output, fd, cpu; + + if (evsel->system_wide && thread) + continue; + + cpu = perf_cpu_map__idx(evsel->cpus, evlist_cpu); + if (cpu == -1) + continue; + + map = perf_evlist__map_get(evlist, overwrite, idx); + if (map == NULL) + return -ENOMEM; + + if (overwrite) { + mp->prot = PROT_READ; + output = _output_overwrite; + } else { + mp->prot = PROT_READ | PROT_WRITE; + output = _output; + } + + fd = FD(evsel, cpu, thread); + + if (*output == -1) { + *output = fd; + + if (perf_mmap__mmap(map, mp, *output, evlist_cpu) < 0) + return -1; + } else { + if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) + return -1; + + perf_mmap__get(map); + } + + revent = !overwrite ? POLLIN : 0; + + if (!evsel->system_wide && + perf_evlist__add_pollfd(evlist, fd, map, revent) < 0) { + perf_mmap__put(map); + return -1; + } + + if (evsel->attr.read_format & PERF_FORMAT_ID) { + if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, + fd) < 0) + return -1; + perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, + thread); + } + } + + return 0; +} + +static int +mmap_per_thread(struct perf_evlist *evlist, struct perf_mmap_param *mp) +{ + int thread; + int nr_threads = perf_thread_map__nr(evlist->threads); + + for (thread = 0; thread < nr_threads; thread++) { + int output = -1; + int output_overwrite = -1; + + if (mmap_per_evsel(evlist, thread, mp, 0, thread, + &output, &output_overwrite)) + goto out_unmap; + } + + return 0; + +out_unmap: + perf_evlist__munmap(evlist); + return -1; +} + +static int +mmap_per_cpu(struct perf_evlist *evlist, struct perf_mmap_param *mp) +{ + int nr_threads = perf_thread_map__nr(evlist->threads); + int nr_cpus = perf_cpu_map__nr(evlist->cpus); + int cpu, thread; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + int output = -1; + int output_overwrite = -1; + + for (thread = 0; thread < nr_threads; thread++) { + if (mmap_per_evsel(evlist, cpu, mp, cpu, + thread, &output, &output_overwrite)) + goto out_unmap; + } + } + + return 0; + +out_unmap: + perf_evlist__munmap(evlist); + return -1; +} + +int perf_evlist__mmap(struct perf_evlist *evlist, int pages) +{ + struct perf_evsel *evsel; + const struct perf_cpu_map *cpus = evlist->cpus; + const struct perf_thread_map *threads = evlist->threads; + struct perf_mmap_param mp; + + if (!evlist->mmap) + evlist->mmap = perf_evlist__alloc_mmap(evlist, false); + if (!evlist->mmap) + return -ENOMEM; + + perf_evlist__for_each_entry(evlist, evsel) { + if ((evsel->attr.read_format & PERF_FORMAT_ID) && + evsel->sample_id == NULL && + perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) + return -ENOMEM; + } + + evlist->mmap_len = (pages + 1) * page_size; + mp.mask = evlist->mmap_len - page_size - 1; + + if (perf_cpu_map__empty(cpus)) + return mmap_per_thread(evlist, &mp); + + return mmap_per_cpu(evlist, &mp); +} + +void perf_evlist__munmap(struct perf_evlist *evlist) +{ + int i; + + if (evlist->mmap) { + for (i = 0; i < evlist->nr_mmaps; i++) + perf_mmap__munmap(&evlist->mmap[i]); + } + + if (evlist->mmap_ovw) { + for (i = 0; i < evlist->nr_mmaps; i++) + perf_mmap__munmap(&evlist->mmap_ovw[i]); + } + + zfree(&evlist->mmap); + zfree(&evlist->mmap_ovw); +} diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index 9f440ab12b76..4438a19ceba3 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -22,6 +22,8 @@ struct perf_evlist { size_t mmap_len; struct fdarray pollfd; struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; + struct perf_mmap *mmap; + struct perf_mmap *mmap_ovw; }; int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); diff --git a/tools/perf/lib/include/perf/evlist.h b/tools/perf/lib/include/perf/evlist.h index 8a2ce0757ab2..28b6a12a8a2b 100644 --- a/tools/perf/lib/include/perf/evlist.h +++ b/tools/perf/lib/include/perf/evlist.h @@ -33,4 +33,7 @@ LIBPERF_API void perf_evlist__set_maps(struct perf_evlist *evlist, struct perf_thread_map *threads); LIBPERF_API int perf_evlist__poll(struct perf_evlist *evlist, int timeout); +LIBPERF_API int perf_evlist__mmap(struct perf_evlist *evlist, int pages); +LIBPERF_API void perf_evlist__munmap(struct perf_evlist *evlist); + #endif /* __LIBPERF_EVLIST_H */ diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map index 8bb0d73e0c6c..5a18fd1aacf2 100644 --- a/tools/perf/lib/libperf.map +++ b/tools/perf/lib/libperf.map @@ -40,6 +40,8 @@ LIBPERF_0.0.1 { perf_evlist__next; perf_evlist__set_maps; perf_evlist__poll; + perf_evlist__mmap; + perf_evlist__munmap; perf_mmap__consume; perf_mmap__read_init; perf_mmap__read_done; -- cgit v1.2.3-59-g8ed1b From 0b5ea10d4c312f5b17af9d09187efb9418517bec Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:22 +0200 Subject: libperf: Introduce perf_evlist__mmap_ops() To be able to pass specific callbacks to evlist's mmap. There will be a specific call to this function from perf's evlist__mmap() and libperf's perf_evlist__mmap() functions in following changes. Signed-off-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-15-jolsa@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/lib/evlist.c | 24 ++++++++++++++++++------ tools/perf/lib/include/internal/evlist.h | 8 ++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index 250ad5752589..88d63f5cd9ca 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -472,12 +472,16 @@ out_unmap: return -1; } -int perf_evlist__mmap(struct perf_evlist *evlist, int pages) +int perf_evlist__mmap_ops(struct perf_evlist *evlist, + struct perf_evlist_mmap_ops *ops, + struct perf_mmap_param *mp) { struct perf_evsel *evsel; const struct perf_cpu_map *cpus = evlist->cpus; const struct perf_thread_map *threads = evlist->threads; - struct perf_mmap_param mp; + + if (!ops) + return -EINVAL; if (!evlist->mmap) evlist->mmap = perf_evlist__alloc_mmap(evlist, false); @@ -491,13 +495,21 @@ int perf_evlist__mmap(struct perf_evlist *evlist, int pages) return -ENOMEM; } + if (perf_cpu_map__empty(cpus)) + return mmap_per_thread(evlist, mp); + + return mmap_per_cpu(evlist, mp); +} + +int perf_evlist__mmap(struct perf_evlist *evlist, int pages) +{ + struct perf_mmap_param mp; + struct perf_evlist_mmap_ops ops; + evlist->mmap_len = (pages + 1) * page_size; mp.mask = evlist->mmap_len - page_size - 1; - if (perf_cpu_map__empty(cpus)) - return mmap_per_thread(evlist, &mp); - - return mmap_per_cpu(evlist, &mp); + return perf_evlist__mmap_ops(evlist, &ops, &mp); } void perf_evlist__munmap(struct perf_evlist *evlist) diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index 4438a19ceba3..e5f092ff6202 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -11,6 +11,7 @@ struct perf_cpu_map; struct perf_thread_map; +struct perf_mmap_param; struct perf_evlist { struct list_head entries; @@ -26,10 +27,17 @@ struct perf_evlist { struct perf_mmap *mmap_ovw; }; +struct perf_evlist_mmap_ops { +}; + int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, void *ptr, short revent); +int perf_evlist__mmap_ops(struct perf_evlist *evlist, + struct perf_evlist_mmap_ops *ops, + struct perf_mmap_param *mp); + /** * __perf_evlist__for_each_entry - iterate thru all the evsels * @list: list_head instance to iterate -- cgit v1.2.3-59-g8ed1b From 1fcbb75cc574072ab457dbbaa74fc7064b691e86 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:23 +0200 Subject: libperf: Introduce perf_evlist_mmap_ops::idx callback Add the perf_evlist_mmap_ops::idx callback to be called in mmap_per_cpu() and mmap_per_thread() with current cpu and thread indexes. It's used by current aux code, so perf will use this callback to set the aux index. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-16-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/evlist.c | 18 +++++++++++++----- tools/perf/lib/include/internal/evlist.h | 4 ++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index 88d63f5cd9ca..3832d3e9a3b4 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -426,7 +426,8 @@ mmap_per_evsel(struct perf_evlist *evlist, int idx, } static int -mmap_per_thread(struct perf_evlist *evlist, struct perf_mmap_param *mp) +mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, + struct perf_mmap_param *mp) { int thread; int nr_threads = perf_thread_map__nr(evlist->threads); @@ -435,6 +436,9 @@ mmap_per_thread(struct perf_evlist *evlist, struct perf_mmap_param *mp) int output = -1; int output_overwrite = -1; + if (ops->idx) + ops->idx(evlist, mp, thread, false); + if (mmap_per_evsel(evlist, thread, mp, 0, thread, &output, &output_overwrite)) goto out_unmap; @@ -448,7 +452,8 @@ out_unmap: } static int -mmap_per_cpu(struct perf_evlist *evlist, struct perf_mmap_param *mp) +mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, + struct perf_mmap_param *mp) { int nr_threads = perf_thread_map__nr(evlist->threads); int nr_cpus = perf_cpu_map__nr(evlist->cpus); @@ -458,6 +463,9 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_mmap_param *mp) int output = -1; int output_overwrite = -1; + if (ops->idx) + ops->idx(evlist, mp, cpu, true); + for (thread = 0; thread < nr_threads; thread++) { if (mmap_per_evsel(evlist, cpu, mp, cpu, thread, &output, &output_overwrite)) @@ -496,15 +504,15 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, } if (perf_cpu_map__empty(cpus)) - return mmap_per_thread(evlist, mp); + return mmap_per_thread(evlist, ops, mp); - return mmap_per_cpu(evlist, mp); + return mmap_per_cpu(evlist, ops, mp); } int perf_evlist__mmap(struct perf_evlist *evlist, int pages) { struct perf_mmap_param mp; - struct perf_evlist_mmap_ops ops; + struct perf_evlist_mmap_ops ops = { 0 }; evlist->mmap_len = (pages + 1) * page_size; mp.mask = evlist->mmap_len - page_size - 1; diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index e5f092ff6202..053f620696f3 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -27,7 +27,11 @@ struct perf_evlist { struct perf_mmap *mmap_ovw; }; +typedef void +(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool); + struct perf_evlist_mmap_ops { + perf_evlist_mmap__cb_idx_t idx; }; int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); -- cgit v1.2.3-59-g8ed1b From 3a8bb58121987a8405d6f96cd8815025e564605d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:24 +0200 Subject: libperf: Add perf_evlist_mmap_ops::get callback Add the perf_evlist_mmap_ops::get callback to be called in mmap_per_evsel() to get/allocate the 'struct perf_mmap' object. Add the libperf's perf_evlist__mmap_cb_get() function as libperf's get callback. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-17-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/evlist.c | 18 ++++++++++-------- tools/perf/lib/include/internal/evlist.h | 3 +++ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index 3832d3e9a3b4..4f49de5e8f7c 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -340,7 +340,7 @@ static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, } static struct perf_mmap* -perf_evlist__map_get(struct perf_evlist *evlist, bool overwrite, int idx) +perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx) { struct perf_mmap *map = &evlist->mmap[idx]; @@ -359,8 +359,8 @@ perf_evlist__map_get(struct perf_evlist *evlist, bool overwrite, int idx) #define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y)) static int -mmap_per_evsel(struct perf_evlist *evlist, int idx, - struct perf_mmap_param *mp, int cpu_idx, +mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, + int idx, struct perf_mmap_param *mp, int cpu_idx, int thread, int *_output, int *_output_overwrite) { int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); @@ -379,7 +379,7 @@ mmap_per_evsel(struct perf_evlist *evlist, int idx, if (cpu == -1) continue; - map = perf_evlist__map_get(evlist, overwrite, idx); + map = ops->get(evlist, overwrite, idx); if (map == NULL) return -ENOMEM; @@ -439,7 +439,7 @@ mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, if (ops->idx) ops->idx(evlist, mp, thread, false); - if (mmap_per_evsel(evlist, thread, mp, 0, thread, + if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread, &output, &output_overwrite)) goto out_unmap; } @@ -467,7 +467,7 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, ops->idx(evlist, mp, cpu, true); for (thread = 0; thread < nr_threads; thread++) { - if (mmap_per_evsel(evlist, cpu, mp, cpu, + if (mmap_per_evsel(evlist, ops, cpu, mp, cpu, thread, &output, &output_overwrite)) goto out_unmap; } @@ -488,7 +488,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, const struct perf_cpu_map *cpus = evlist->cpus; const struct perf_thread_map *threads = evlist->threads; - if (!ops) + if (!ops || !ops->get) return -EINVAL; if (!evlist->mmap) @@ -512,7 +512,9 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, int perf_evlist__mmap(struct perf_evlist *evlist, int pages) { struct perf_mmap_param mp; - struct perf_evlist_mmap_ops ops = { 0 }; + struct perf_evlist_mmap_ops ops = { + .get = perf_evlist__mmap_cb_get, + }; evlist->mmap_len = (pages + 1) * page_size; mp.mask = evlist->mmap_len - page_size - 1; diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index 053f620696f3..9bc3a21643ea 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -29,9 +29,12 @@ struct perf_evlist { typedef void (*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool); +typedef struct perf_mmap* +(*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int); struct perf_evlist_mmap_ops { perf_evlist_mmap__cb_idx_t idx; + perf_evlist_mmap__cb_get_t get; }; int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); -- cgit v1.2.3-59-g8ed1b From b5911e7ac28cb34f21b7380915ce98518078f114 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:25 +0200 Subject: libperf: Introduce perf_evlist_mmap_ops::mmap callback Add the perf_evlist_mmap_ops::mmap callback to be called in mmap_per_evsel() to actually mmap the map. Add libperf's perf_evlist__mmap_cb_mmap() function as libperf's mmap callback. New mmaped map gets refcount set to 2 in mmap__mmap(), we follow that in mmap callback. We will move this to common place after we switch to perf_evlist__mmap(). Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-18-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/evlist.c | 29 ++++++++++++++++++++++++++--- tools/perf/lib/include/internal/evlist.h | 3 +++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index 4f49de5e8f7c..b69722627779 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -358,6 +358,28 @@ perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx) #define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y)) +static int +perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp, + int output, int cpu) +{ + /* + * The last one will be done at perf_mmap__consume(), so that we + * make sure we don't prevent tools from consuming every last event in + * the ring buffer. + * + * I.e. we can get the POLLHUP meaning that the fd doesn't exist + * anymore, but the last events for it are still in the ring buffer, + * waiting to be consumed. + * + * Tools can chose to ignore this at their own discretion, but the + * evlist layer can't just drop it when filtering events in + * perf_evlist__filter_pollfd(). + */ + refcount_set(&map->refcnt, 2); + + return perf_mmap__mmap(map, mp, output, cpu); +} + static int mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, int idx, struct perf_mmap_param *mp, int cpu_idx, @@ -396,7 +418,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, if (*output == -1) { *output = fd; - if (perf_mmap__mmap(map, mp, *output, evlist_cpu) < 0) + if (ops->mmap(map, mp, *output, evlist_cpu) < 0) return -1; } else { if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) @@ -488,7 +510,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, const struct perf_cpu_map *cpus = evlist->cpus; const struct perf_thread_map *threads = evlist->threads; - if (!ops || !ops->get) + if (!ops || !ops->get || !ops->mmap) return -EINVAL; if (!evlist->mmap) @@ -513,7 +535,8 @@ int perf_evlist__mmap(struct perf_evlist *evlist, int pages) { struct perf_mmap_param mp; struct perf_evlist_mmap_ops ops = { - .get = perf_evlist__mmap_cb_get, + .get = perf_evlist__mmap_cb_get, + .mmap = perf_evlist__mmap_cb_mmap, }; evlist->mmap_len = (pages + 1) * page_size; diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index 9bc3a21643ea..b2019700cdc0 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -31,10 +31,13 @@ typedef void (*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool); typedef struct perf_mmap* (*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int); +typedef int +(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int); struct perf_evlist_mmap_ops { perf_evlist_mmap__cb_idx_t idx; perf_evlist_mmap__cb_get_t get; + perf_evlist_mmap__cb_mmap_t mmap; }; int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); -- cgit v1.2.3-59-g8ed1b From 9abd2ab2377613425e1c362694f85b110f5bace2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:26 +0200 Subject: perf tools: Introduce perf_evlist__mmap_cb_idx() Add perf_evlist__mmap_cb_idx function to call auxtrace_mmap_params__set_idx() on each new index during perf_evlist__mmap_ops call. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-19-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a9b189ac859b..11716f2b965a 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -739,6 +739,17 @@ static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, return 0; } +static void +perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist, + struct perf_mmap_param *_mp, + int idx, bool per_cpu) +{ + struct evlist *evlist = container_of(_evlist, struct evlist, core); + struct mmap_params *mp = container_of(_mp, struct mmap_params, core); + + auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu); +} + static int evlist__mmap_per_cpu(struct evlist *evlist, struct mmap_params *mp) { @@ -935,6 +946,9 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, .flush = flush, .comp_level = comp_level }; + struct perf_evlist_mmap_ops ops __maybe_unused = { + .idx = perf_evlist__mmap_cb_idx, + }; if (!evlist->mmap) evlist->mmap = evlist__alloc_mmap(evlist, false); -- cgit v1.2.3-59-g8ed1b From bb1b1885e2f22afb8bc7981cd865fe4b0e3d975b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:27 +0200 Subject: perf evlist: Introduce perf_evlist__mmap_cb_get() Add the perf_evlist__mmap_cb_get() function to return 'struct perf_mmap' object during perf_evlist__mmap_ops() call. The array of 'struct mmap' is allocated via evlist__alloc_mmap(), in this callback we simply returns pointer to the base object. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-20-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 11716f2b965a..f50ee5cb6554 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -750,6 +750,29 @@ perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist, auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu); } +static struct perf_mmap* +perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx) +{ + struct evlist *evlist = container_of(_evlist, struct evlist, core); + struct mmap *maps = evlist->mmap; + + if (overwrite) { + maps = evlist->overwrite_mmap; + + if (!maps) { + maps = evlist__alloc_mmap(evlist, true); + if (!maps) + return NULL; + + evlist->overwrite_mmap = maps; + if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) + perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); + } + } + + return &maps[idx].core; +} + static int evlist__mmap_per_cpu(struct evlist *evlist, struct mmap_params *mp) { @@ -948,6 +971,7 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, }; struct perf_evlist_mmap_ops ops __maybe_unused = { .idx = perf_evlist__mmap_cb_idx, + .get = perf_evlist__mmap_cb_get, }; if (!evlist->mmap) -- cgit v1.2.3-59-g8ed1b From b80132b12a78ec71de2b3320cc49d4a0b2cd7c46 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:28 +0200 Subject: perf evlist: Introduce perf_evlist__mmap_cb_mmap() Add the perf_evlist__mmap_cb_mmap() function to call perf specific mmap__mmap() function during perf_evlist__mmap_ops() call. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-21-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f50ee5cb6554..d57b684b4b7b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -773,6 +773,16 @@ perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx) return &maps[idx].core; } +static int +perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp, + int output, int cpu) +{ + struct mmap *map = container_of(_map, struct mmap, core); + struct mmap_params *mp = container_of(_mp, struct mmap_params, core); + + return mmap__mmap(map, mp, output, cpu); +} + static int evlist__mmap_per_cpu(struct evlist *evlist, struct mmap_params *mp) { @@ -970,8 +980,9 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, .comp_level = comp_level }; struct perf_evlist_mmap_ops ops __maybe_unused = { - .idx = perf_evlist__mmap_cb_idx, - .get = perf_evlist__mmap_cb_get, + .idx = perf_evlist__mmap_cb_idx, + .get = perf_evlist__mmap_cb_get, + .mmap = perf_evlist__mmap_cb_mmap, }; if (!evlist->mmap) -- cgit v1.2.3-59-g8ed1b From 923d0f1868cb331d660fb569ecd00c39889905f6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:29 +0200 Subject: perf evlist: Switch to libperf's mmap interface Switch to the libperf mmap interface by calling directly perf_evlist__mmap_ops() and removing perf's evlist__mmap_per_* functions. By switching to libperf perf_evlist__mmap() we need to operate over 'struct perf_mmap' in evlist__add_pollfd, so make the related changes there. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-22-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 179 ++--------------------------------------------- 1 file changed, 4 insertions(+), 175 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d57b684b4b7b..3f4f11f27b94 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -431,10 +431,10 @@ int evlist__add_pollfd(struct evlist *evlist, int fd) static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, void *arg __maybe_unused) { - struct mmap *map = fda->priv[fd].ptr; + struct perf_mmap *map = fda->priv[fd].ptr; if (map) - perf_mmap__put(&map->core); + perf_mmap__put(map); } int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) @@ -448,22 +448,6 @@ int evlist__poll(struct evlist *evlist, int timeout) return perf_evlist__poll(&evlist->core, timeout); } -static void perf_evlist__set_sid_idx(struct evlist *evlist, - struct evsel *evsel, int idx, int cpu, - int thread) -{ - struct perf_sample_id *sid = SID(evsel, cpu, thread); - sid->idx = idx; - if (evlist->core.cpus && cpu >= 0) - sid->cpu = evlist->core.cpus->map[cpu]; - else - sid->cpu = -1; - if (!evsel->core.system_wide && evlist->core.threads && thread >= 0) - sid->tid = perf_thread_map__pid(evlist->core.threads, thread); - else - sid->tid = -1; -} - struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id) { struct hlist_head *head; @@ -652,93 +636,6 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, return map; } -static bool -perf_evlist__should_poll(struct evlist *evlist __maybe_unused, - struct evsel *evsel) -{ - if (evsel->core.attr.write_backward) - return false; - return true; -} - -static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, - struct mmap_params *mp, int cpu_idx, - int thread, int *_output, int *_output_overwrite) -{ - struct evsel *evsel; - int revent; - int evlist_cpu = cpu_map__cpu(evlist->core.cpus, cpu_idx); - - evlist__for_each_entry(evlist, evsel) { - struct mmap *maps = evlist->mmap; - int *output = _output; - int fd; - int cpu; - - mp->core.prot = PROT_READ | PROT_WRITE; - if (evsel->core.attr.write_backward) { - output = _output_overwrite; - maps = evlist->overwrite_mmap; - - if (!maps) { - maps = evlist__alloc_mmap(evlist, true); - if (!maps) - return -1; - evlist->overwrite_mmap = maps; - if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) - perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); - } - mp->core.prot &= ~PROT_WRITE; - } - - if (evsel->core.system_wide && thread) - continue; - - cpu = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu); - if (cpu == -1) - continue; - - fd = FD(evsel, cpu, thread); - - if (*output == -1) { - *output = fd; - - if (mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0) - return -1; - } else { - if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) - return -1; - - perf_mmap__get(&maps[idx].core); - } - - revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; - - /* - * The system_wide flag causes a selected event to be opened - * always without a pid. Consequently it will never get a - * POLLHUP, but it is used for tracking in combination with - * other events, so it should not need to be polled anyway. - * Therefore don't add it for polling. - */ - if (!evsel->core.system_wide && - perf_evlist__add_pollfd(&evlist->core, fd, &maps[idx], revent) < 0) { - perf_mmap__put(&maps[idx].core); - return -1; - } - - if (evsel->core.attr.read_format & PERF_FORMAT_ID) { - if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, cpu, thread, - fd) < 0) - return -1; - perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, - thread); - } - } - - return 0; -} - static void perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist, struct perf_mmap_param *_mp, @@ -783,61 +680,6 @@ perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp, return mmap__mmap(map, mp, output, cpu); } -static int evlist__mmap_per_cpu(struct evlist *evlist, - struct mmap_params *mp) -{ - int cpu, thread; - int nr_cpus = perf_cpu_map__nr(evlist->core.cpus); - int nr_threads = perf_thread_map__nr(evlist->core.threads); - - pr_debug2("perf event ring buffer mmapped per cpu\n"); - for (cpu = 0; cpu < nr_cpus; cpu++) { - int output = -1; - int output_overwrite = -1; - - auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, - true); - - for (thread = 0; thread < nr_threads; thread++) { - if (evlist__mmap_per_evsel(evlist, cpu, mp, cpu, - thread, &output, &output_overwrite)) - goto out_unmap; - } - } - - return 0; - -out_unmap: - evlist__munmap_nofree(evlist); - return -1; -} - -static int evlist__mmap_per_thread(struct evlist *evlist, - struct mmap_params *mp) -{ - int thread; - int nr_threads = perf_thread_map__nr(evlist->core.threads); - - pr_debug2("perf event ring buffer mmapped per thread\n"); - for (thread = 0; thread < nr_threads; thread++) { - int output = -1; - int output_overwrite = -1; - - auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, - false); - - if (evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, - &output, &output_overwrite)) - goto out_unmap; - } - - return 0; - -out_unmap: - evlist__munmap_nofree(evlist); - return -1; -} - unsigned long perf_event_mlock_kb_in_pages(void) { unsigned long pages; @@ -965,9 +807,6 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush, int comp_level) { - struct evsel *evsel; - const struct perf_cpu_map *cpus = evlist->core.cpus; - const struct perf_thread_map *threads = evlist->core.threads; /* * Delay setting mp.prot: set it before calling perf_mmap__mmap. * Its value is decided by evsel's write_backward. @@ -979,7 +818,7 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, .flush = flush, .comp_level = comp_level }; - struct perf_evlist_mmap_ops ops __maybe_unused = { + struct perf_evlist_mmap_ops ops = { .idx = perf_evlist__mmap_cb_idx, .get = perf_evlist__mmap_cb_get, .mmap = perf_evlist__mmap_cb_mmap, @@ -1000,17 +839,7 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len, auxtrace_pages, auxtrace_overwrite); - evlist__for_each_entry(evlist, evsel) { - if ((evsel->core.attr.read_format & PERF_FORMAT_ID) && - evsel->core.sample_id == NULL && - perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr) < 0) - return -ENOMEM; - } - - if (perf_cpu_map__empty(cpus)) - return evlist__mmap_per_thread(evlist, &mp); - - return evlist__mmap_per_cpu(evlist, &mp); + return perf_evlist__mmap_ops(&evlist->core, &ops, &mp.core); } int evlist__mmap(struct evlist *evlist, unsigned int pages) -- cgit v1.2.3-59-g8ed1b From 285aaeac8c5d537b56b70169e21ac29ae5caa8e1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:30 +0200 Subject: libperf: Centralize map refcnt setting Currently when a new map is mmapped we set its refcnt to 2 in the perf_evlist_mmap_ops::mmap callback. Every mmap gets its refcnt set to 2 when it's first mmaped: - 1 for the current user, which will be taken out by a call to perf_evlist__munmap_filtered(), where we find out there's no more data comming from kernel to this mmap. - 1 for the drain code where in perf_mmap__consume() the mmap is released if it is empty. Move this common setup into libperf's generic code before the mmap callback is called. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-23-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/evlist.c | 30 +++++++++++++++--------------- tools/perf/util/mmap.c | 15 --------------- 2 files changed, 15 insertions(+), 30 deletions(-) diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index b69722627779..f9a802d2ceb5 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -362,21 +362,6 @@ static int perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp, int output, int cpu) { - /* - * The last one will be done at perf_mmap__consume(), so that we - * make sure we don't prevent tools from consuming every last event in - * the ring buffer. - * - * I.e. we can get the POLLHUP meaning that the fd doesn't exist - * anymore, but the last events for it are still in the ring buffer, - * waiting to be consumed. - * - * Tools can chose to ignore this at their own discretion, but the - * evlist layer can't just drop it when filtering events in - * perf_evlist__filter_pollfd(). - */ - refcount_set(&map->refcnt, 2); - return perf_mmap__mmap(map, mp, output, cpu); } @@ -418,6 +403,21 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, if (*output == -1) { *output = fd; + /* + * The last one will be done at perf_mmap__consume(), so that we + * make sure we don't prevent tools from consuming every last event in + * the ring buffer. + * + * I.e. we can get the POLLHUP meaning that the fd doesn't exist + * anymore, but the last events for it are still in the ring buffer, + * waiting to be consumed. + * + * Tools can chose to ignore this at their own discretion, but the + * evlist layer can't just drop it when filtering events in + * perf_evlist__filter_pollfd(). + */ + refcount_set(&map->refcnt, 2); + if (ops->mmap(map, mp, *output, evlist_cpu) < 0) return -1; } else { diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 2a8bf0ab861c..063d1b93c53d 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -243,21 +243,6 @@ static void perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) { - /* - * The last one will be done at perf_mmap__consume(), so that we - * make sure we don't prevent tools from consuming every last event in - * the ring buffer. - * - * I.e. we can get the POLLHUP meaning that the fd doesn't exist - * anymore, but the last events for it are still in the ring buffer, - * waiting to be consumed. - * - * Tools can chose to ignore this at their own discretion, but the - * evlist layer can't just drop it when filtering events in - * perf_evlist__filter_pollfd(). - */ - refcount_set(&map->core.refcnt, 2); - if (perf_mmap__mmap(&map->core, &mp->core, fd, cpu)) { pr_debug2("failed to mmap perf event ring buffer, error %d\n", errno); -- cgit v1.2.3-59-g8ed1b From 230662e15ed6cc63ecf72ed1bffa3cadef486850 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:31 +0200 Subject: libperf: Move the pollfd allocation from tools/perf to libperf It's needed in libperf only, so move it to the perf_evlist__mmap_ops() function. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-24-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/evlist.c | 5 +++++ tools/perf/util/evlist.c | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index f9a802d2ceb5..5ae1da97d2e6 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -34,6 +34,7 @@ void perf_evlist__init(struct perf_evlist *evlist) INIT_HLIST_HEAD(&evlist->heads[i]); INIT_LIST_HEAD(&evlist->entries); evlist->nr_entries = 0; + fdarray__init(&evlist->pollfd, 64); } static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, @@ -114,6 +115,7 @@ void perf_evlist__delete(struct perf_evlist *evlist) return; perf_evlist__munmap(evlist); + fdarray__exit(&evlist->pollfd); free(evlist); } @@ -525,6 +527,9 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, return -ENOMEM; } + if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) + return -ENOMEM; + if (perf_cpu_map__empty(cpus)) return mmap_per_thread(evlist, ops, mp); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3f4f11f27b94..5192c6583c96 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -58,7 +58,6 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, { perf_evlist__init(&evlist->core); perf_evlist__set_maps(&evlist->core, cpus, threads); - fdarray__init(&evlist->core.pollfd, 64); evlist->workload.pid = -1; evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; } @@ -829,9 +828,6 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, if (!evlist->mmap) return -ENOMEM; - if (evlist->core.pollfd.entries == NULL && perf_evlist__alloc_pollfd(&evlist->core) < 0) - return -ENOMEM; - evlist->core.mmap_len = evlist__mmap_size(pages); pr_debug("mmap size %zuB\n", evlist->core.mmap_len); mp.core.mask = evlist->core.mmap_len - page_size - 1; -- cgit v1.2.3-59-g8ed1b From 93dd6e2831ff399f7685aa2157b997b6392efac8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:32 +0200 Subject: libperf: Introduce perf_evlist__exit() Add the perf_evlist__exit() function, so far it's not exported and added only for internal use for perf and libperf. USe it to release cpus/threads and pollfd array. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-25-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/evlist.c | 12 +++++++++++- tools/perf/lib/include/internal/evlist.h | 2 ++ tools/perf/util/evlist.c | 6 +----- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index 5ae1da97d2e6..7ba98f0e6365 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -109,13 +109,23 @@ perf_evlist__next(struct perf_evlist *evlist, struct perf_evsel *prev) return next; } +void perf_evlist__exit(struct perf_evlist *evlist) +{ + perf_cpu_map__put(evlist->cpus); + perf_thread_map__put(evlist->threads); + evlist->cpus = NULL; + evlist->threads = NULL; + fdarray__exit(&evlist->pollfd); +} + void perf_evlist__delete(struct perf_evlist *evlist) { if (evlist == NULL) return; perf_evlist__munmap(evlist); - fdarray__exit(&evlist->pollfd); + perf_evlist__close(evlist); + perf_evlist__exit(evlist); free(evlist); } diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index b2019700cdc0..0721512ffb19 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -48,6 +48,8 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, struct perf_mmap_param *mp); +void perf_evlist__exit(struct perf_evlist *evlist); + /** * __perf_evlist__for_each_entry - iterate thru all the evsels * @list: list_head instance to iterate diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 5192c6583c96..031ace3696a2 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -138,7 +138,7 @@ void evlist__exit(struct evlist *evlist) { zfree(&evlist->mmap); zfree(&evlist->overwrite_mmap); - fdarray__exit(&evlist->core.pollfd); + perf_evlist__exit(&evlist->core); } void evlist__delete(struct evlist *evlist) @@ -148,10 +148,6 @@ void evlist__delete(struct evlist *evlist) evlist__munmap(evlist); evlist__close(evlist); - perf_cpu_map__put(evlist->core.cpus); - perf_thread_map__put(evlist->core.threads); - evlist->core.cpus = NULL; - evlist->core.threads = NULL; evlist__purge(evlist); evlist__exit(evlist); free(evlist); -- cgit v1.2.3-59-g8ed1b From 696f27c994ed056fd176ce9dc51c7988e148e4b0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:33 +0200 Subject: libperf: Introduce perf_evlist__purge() Add a static perf_evlist__purge() function to purge evsels from a evlist. Add also perf_evlist__for_each_entry_safe() which is used by perf_evlist__purge(). Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-26-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/evlist.c | 13 +++++++++++++ tools/perf/lib/include/internal/evlist.h | 18 ++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index 7ba98f0e6365..9534ad9a572f 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -109,6 +109,18 @@ perf_evlist__next(struct perf_evlist *evlist, struct perf_evsel *prev) return next; } +static void perf_evlist__purge(struct perf_evlist *evlist) +{ + struct perf_evsel *pos, *n; + + perf_evlist__for_each_entry_safe(evlist, n, pos) { + list_del_init(&pos->node); + perf_evsel__delete(pos); + } + + evlist->nr_entries = 0; +} + void perf_evlist__exit(struct perf_evlist *evlist) { perf_cpu_map__put(evlist->cpus); @@ -125,6 +137,7 @@ void perf_evlist__delete(struct perf_evlist *evlist) perf_evlist__munmap(evlist); perf_evlist__close(evlist); + perf_evlist__purge(evlist); perf_evlist__exit(evlist); free(evlist); } diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index 0721512ffb19..be0b25a70730 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -82,6 +82,24 @@ void perf_evlist__exit(struct perf_evlist *evlist); #define perf_evlist__for_each_entry_reverse(evlist, evsel) \ __perf_evlist__for_each_entry_reverse(&(evlist)->entries, evsel) +/** + * __perf_evlist__for_each_entry_safe - safely iterate thru all the evsels + * @list: list_head instance to iterate + * @tmp: struct evsel temp iterator + * @evsel: struct evsel iterator + */ +#define __perf_evlist__for_each_entry_safe(list, tmp, evsel) \ + list_for_each_entry_safe(evsel, tmp, list, node) + +/** + * perf_evlist__for_each_entry_safe - safely iterate thru all the evsels + * @evlist: evlist instance to iterate + * @evsel: struct evsel iterator + * @tmp: struct evsel temp iterator + */ +#define perf_evlist__for_each_entry_safe(evlist, tmp, evsel) \ + __perf_evlist__for_each_entry_safe(&(evlist)->entries, tmp, evsel) + static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist) { return list_entry(evlist->entries.next, struct perf_evsel, node); -- cgit v1.2.3-59-g8ed1b From 84227cb11ff4d9815b9b1daf0c1a2bd7e9274c58 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:34 +0200 Subject: libperf: Adopt perf_evlist__filter_pollfd() from tools/perf Introduce the perf_evlist__filter_pollfd function and export it in the perf/evlist.h header, so that libperf users can check if the descriptor is still alive. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-27-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/evlist.c | 15 +++++++++++++++ tools/perf/lib/include/perf/evlist.h | 2 ++ tools/perf/lib/libperf.map | 1 + tools/perf/util/evlist.c | 12 +----------- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index 9534ad9a572f..65045614c938 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -313,6 +313,21 @@ int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, return pos; } +static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, + void *arg __maybe_unused) +{ + struct perf_mmap *map = fda->priv[fd].ptr; + + if (map) + perf_mmap__put(map); +} + +int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) +{ + return fdarray__filter(&evlist->pollfd, revents_and_mask, + perf_evlist__munmap_filtered, NULL); +} + int perf_evlist__poll(struct perf_evlist *evlist, int timeout) { return fdarray__poll(&evlist->pollfd, timeout); diff --git a/tools/perf/lib/include/perf/evlist.h b/tools/perf/lib/include/perf/evlist.h index 28b6a12a8a2b..16f526e74d13 100644 --- a/tools/perf/lib/include/perf/evlist.h +++ b/tools/perf/lib/include/perf/evlist.h @@ -32,6 +32,8 @@ LIBPERF_API void perf_evlist__set_maps(struct perf_evlist *evlist, struct perf_cpu_map *cpus, struct perf_thread_map *threads); LIBPERF_API int perf_evlist__poll(struct perf_evlist *evlist, int timeout); +LIBPERF_API int perf_evlist__filter_pollfd(struct perf_evlist *evlist, + short revents_and_mask); LIBPERF_API int perf_evlist__mmap(struct perf_evlist *evlist, int pages); LIBPERF_API void perf_evlist__munmap(struct perf_evlist *evlist); diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map index 5a18fd1aacf2..2184aba36c3f 100644 --- a/tools/perf/lib/libperf.map +++ b/tools/perf/lib/libperf.map @@ -42,6 +42,7 @@ LIBPERF_0.0.1 { perf_evlist__poll; perf_evlist__mmap; perf_evlist__munmap; + perf_evlist__filter_pollfd; perf_mmap__consume; perf_mmap__read_init; perf_mmap__read_done; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 031ace3696a2..21b77efa802c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -423,19 +423,9 @@ int evlist__add_pollfd(struct evlist *evlist, int fd) return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN); } -static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, - void *arg __maybe_unused) -{ - struct perf_mmap *map = fda->priv[fd].ptr; - - if (map) - perf_mmap__put(map); -} - int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) { - return fdarray__filter(&evlist->core.pollfd, revents_and_mask, - perf_evlist__munmap_filtered, NULL); + return perf_evlist__filter_pollfd(&evlist->core, revents_and_mask); } int evlist__poll(struct evlist *evlist, int timeout) -- cgit v1.2.3-59-g8ed1b From 55542113c690a567e728e40d4181d7d037fc21b0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 11 Oct 2019 14:21:55 +0200 Subject: perf tools: Propagate CFLAGS to libperf Andi reported that 'make DEBUG=1' does not propagate to the libbperf code. It's true also for the other flags. Changing the code to propagate the global build flags to libperf compilation. Reported-by: Andi Kleen Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191011122155.15738-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 28 +++++++++++++++------------- tools/perf/Makefile.perf | 2 +- tools/perf/lib/core.c | 3 ++- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 46f7fba2306c..063202c53b64 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -188,7 +188,7 @@ endif # Treat warnings as errors unless directed not to ifneq ($(WERROR),0) - CFLAGS += -Werror + CORE_CFLAGS += -Werror CXXFLAGS += -Werror endif @@ -198,9 +198,9 @@ endif ifeq ($(DEBUG),0) ifeq ($(CC_NO_CLANG), 0) - CFLAGS += -O3 + CORE_CFLAGS += -O3 else - CFLAGS += -O6 + CORE_CFLAGS += -O6 endif endif @@ -245,12 +245,12 @@ FEATURE_CHECK_LDFLAGS-libaio = -lrt FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl -CFLAGS += -fno-omit-frame-pointer -CFLAGS += -ggdb3 -CFLAGS += -funwind-tables -CFLAGS += -Wall -CFLAGS += -Wextra -CFLAGS += -std=gnu99 +CORE_CFLAGS += -fno-omit-frame-pointer +CORE_CFLAGS += -ggdb3 +CORE_CFLAGS += -funwind-tables +CORE_CFLAGS += -Wall +CORE_CFLAGS += -Wextra +CORE_CFLAGS += -std=gnu99 CXXFLAGS += -std=gnu++11 -fno-exceptions -fno-rtti CXXFLAGS += -Wall @@ -272,12 +272,12 @@ include $(FEATURES_DUMP) endif ifeq ($(feature-stackprotector-all), 1) - CFLAGS += -fstack-protector-all + CORE_CFLAGS += -fstack-protector-all endif ifeq ($(DEBUG),0) ifeq ($(feature-fortify-source), 1) - CFLAGS += -D_FORTIFY_SOURCE=2 + CORE_CFLAGS += -D_FORTIFY_SOURCE=2 endif endif @@ -301,10 +301,12 @@ INC_FLAGS += -I$(src-perf)/util INC_FLAGS += -I$(src-perf) INC_FLAGS += -I$(srctree)/tools/lib/ -CFLAGS += $(INC_FLAGS) +CORE_CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE + +CFLAGS += $(CORE_CFLAGS) $(INC_FLAGS) CXXFLAGS += $(INC_FLAGS) -CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE +LIBPERF_CFLAGS := $(CORE_CFLAGS) $(EXTRA_CFLAGS) ifeq ($(feature-sync-compare-and-swap), 1) CFLAGS += -DHAVE_SYNC_COMPARE_AND_SWAP_SUPPORT diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 45c14dc24f4b..a099a8a89447 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -769,7 +769,7 @@ $(LIBBPF)-clean: $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null $(LIBPERF): FORCE - $(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(OUTPUT) $(OUTPUT)libperf.a + $(Q)$(MAKE) -C $(LIBPERF_DIR) EXTRA_CFLAGS="$(LIBPERF_CFLAGS)" O=$(OUTPUT) $(OUTPUT)libperf.a $(LIBPERF)-clean: $(call QUIET_CLEAN, libperf) diff --git a/tools/perf/lib/core.c b/tools/perf/lib/core.c index d0b9ae422b9f..58fc894b76c5 100644 --- a/tools/perf/lib/core.c +++ b/tools/perf/lib/core.c @@ -5,11 +5,12 @@ #include #include #include +#include #include #include #include "internal.h" -static int __base_pr(enum libperf_print_level level, const char *format, +static int __base_pr(enum libperf_print_level level __maybe_unused, const char *format, va_list args) { return vfprintf(stderr, format, args); -- cgit v1.2.3-59-g8ed1b From cebf7d51a6c3babc4d0589da7aec0de1af0a5691 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 25 Sep 2019 09:14:46 +0800 Subject: perf diff: Report noisy for cycles diff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch prints the stddev and hist for the cycles diff of program block. It can help us to understand if the cycles is noisy or not. This patch is inspired by Andi Kleen's patch: https://lwn.net/Articles/600471/ We create new option '--cycles-hist'. Example: perf record -b ./div perf record -b ./div perf diff -c cycles # Baseline [Program Block Range] Cycles Diff Shared Object Symbol # ........ .......................................................... .... ................. ............................ # 46.72% [div.c:40 -> div.c:40] 0 div [.] main 46.72% [div.c:42 -> div.c:44] 0 div [.] main 46.72% [div.c:42 -> div.c:39] 0 div [.] main 20.54% [random_r.c:357 -> random_r.c:394] 1 libc-2.27.so [.] __random_r 20.54% [random_r.c:357 -> random_r.c:380] 0 libc-2.27.so [.] __random_r 20.54% [random_r.c:388 -> random_r.c:388] 0 libc-2.27.so [.] __random_r 20.54% [random_r.c:388 -> random_r.c:391] 0 libc-2.27.so [.] __random_r 17.04% [random.c:288 -> random.c:291] 0 libc-2.27.so [.] __random 17.04% [random.c:291 -> random.c:291] 0 libc-2.27.so [.] __random 17.04% [random.c:293 -> random.c:293] 0 libc-2.27.so [.] __random 17.04% [random.c:295 -> random.c:295] 0 libc-2.27.so [.] __random 17.04% [random.c:295 -> random.c:295] 0 libc-2.27.so [.] __random 17.04% [random.c:298 -> random.c:298] 0 libc-2.27.so [.] __random 8.40% [div.c:22 -> div.c:25] 0 div [.] compute_flag 8.40% [div.c:27 -> div.c:28] 0 div [.] compute_flag 5.14% [rand.c:26 -> rand.c:27] 0 libc-2.27.so [.] rand 5.14% [rand.c:28 -> rand.c:28] 0 libc-2.27.so [.] rand 2.15% [rand@plt+0 -> rand@plt+0] 0 div [.] rand@plt 0.00% [kernel.kallsyms] [k] __x86_indirect_thunk_rax 0.00% [do_mmap+714 -> do_mmap+732] -10 [kernel.kallsyms] [k] do_mmap 0.00% [do_mmap+737 -> do_mmap+765] 1 [kernel.kallsyms] [k] do_mmap 0.00% [do_mmap+262 -> do_mmap+299] 0 [kernel.kallsyms] [k] do_mmap 0.00% [__x86_indirect_thunk_r15+0 -> __x86_indirect_thunk_r15+0] 7 [kernel.kallsyms] [k] __x86_indirect_thunk_r15 0.00% [native_sched_clock+0 -> native_sched_clock+119] -1 [kernel.kallsyms] [k] native_sched_clock 0.00% [native_write_msr+0 -> native_write_msr+16] -13 [kernel.kallsyms] [k] native_write_msr When we enable the option '--cycles-hist', the output is perf diff -c cycles --cycles-hist # Baseline [Program Block Range] Cycles Diff stddev/Hist Shared Object Symbol # ........ .......................................................... .... ................. ................. ............................ # 46.72% [div.c:40 -> div.c:40] 0 ± 37.8% ▁█▁▁██▁█ div [.] main 46.72% [div.c:42 -> div.c:44] 0 ± 49.4% ▁▁▂█▂▂▂▂ div [.] main 46.72% [div.c:42 -> div.c:39] 0 ± 24.1% ▃█▂▄▁▃▂▁ div [.] main 20.54% [random_r.c:357 -> random_r.c:394] 1 ± 33.5% ▅▂▁█▃▁▂▁ libc-2.27.so [.] __random_r 20.54% [random_r.c:357 -> random_r.c:380] 0 ± 39.4% ▁▁█▁██▅▁ libc-2.27.so [.] __random_r 20.54% [random_r.c:388 -> random_r.c:388] 0 libc-2.27.so [.] __random_r 20.54% [random_r.c:388 -> random_r.c:391] 0 ± 41.2% ▁▃▁▂█▄▃▁ libc-2.27.so [.] __random_r 17.04% [random.c:288 -> random.c:291] 0 ± 48.8% ▁▁▁▁███▁ libc-2.27.so [.] __random 17.04% [random.c:291 -> random.c:291] 0 ±100.0% ▁█▁▁▁▁▁▁ libc-2.27.so [.] __random 17.04% [random.c:293 -> random.c:293] 0 ±100.0% ▁█▁▁▁▁▁▁ libc-2.27.so [.] __random 17.04% [random.c:295 -> random.c:295] 0 ±100.0% ▁█▁▁▁▁▁▁ libc-2.27.so [.] __random 17.04% [random.c:295 -> random.c:295] 0 libc-2.27.so [.] __random 17.04% [random.c:298 -> random.c:298] 0 ± 75.6% ▃█▁▁▁▁▁▁ libc-2.27.so [.] __random 8.40% [div.c:22 -> div.c:25] 0 ± 42.1% ▁▃▁▁███▁ div [.] compute_flag 8.40% [div.c:27 -> div.c:28] 0 ± 41.8% ██▁▁▄▁▁▄ div [.] compute_flag 5.14% [rand.c:26 -> rand.c:27] 0 ± 37.8% ▁▁▁████▁ libc-2.27.so [.] rand 5.14% [rand.c:28 -> rand.c:28] 0 libc-2.27.so [.] rand 2.15% [rand@plt+0 -> rand@plt+0] 0 div [.] rand@plt 0.00% [kernel.kallsyms] [k] __x86_indirect_thunk_rax 0.00% [do_mmap+714 -> do_mmap+732] -10 [kernel.kallsyms] [k] do_mmap 0.00% [do_mmap+737 -> do_mmap+765] 1 [kernel.kallsyms] [k] do_mmap 0.00% [do_mmap+262 -> do_mmap+299] 0 [kernel.kallsyms] [k] do_mmap 0.00% [__x86_indirect_thunk_r15+0 -> __x86_indirect_thunk_r15+0] 7 [kernel.kallsyms] [k] __x86_indirect_thunk_r15 0.00% [native_sched_clock+0 -> native_sched_clock+119] -1 ± 38.5% ▄█▁ [kernel.kallsyms] [k] native_sched_clock 0.00% [native_write_msr+0 -> native_write_msr+16] -13 ± 47.1% ▁█▇▃▁▁ [kernel.kallsyms] [k] native_write_msr v8: --- Rebase to perf/core branch v7: --- 1. v6 got Jiri's ACK. 2. Rebase to latest perf/core branch. v6: --- 1. Jiri provides better code for using data__hpp_register() in ui_init(). Use this code in v6. v5: --- 1. Refine the use of data__hpp_register() in ui_init() according to Jiri's suggestion. v4: --- 1. Rename the new option from '--noisy' to '--cycles-hist' 2. Remove the option '-n'. 3. Only update the spark value and stats when '--cycles-hist' is enabled. 4. Remove the code of printing '..'. v3: --- 1. Move the histogram to a separate column 2. Move the svals[] out of struct stats v2: --- Jiri got a compile error, CC builtin-diff.o builtin-diff.c: In function ‘compute_cycles_diff’: builtin-diff.c:712:10: error: taking the absolute value of unsigned type ‘u64’ {aka ‘long unsigned int’} has no effect [-Werror=absolute-value] 712 | labs(pair->block_info->cycles_spark[i] - | ^~~~ Because the result of u64 - u64 is still u64. Now we change the type of cycles_spark[] to s64. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190925011446.30678-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-diff.txt | 5 ++ tools/perf/builtin-diff.c | 143 +++++++++++++++++++++++++++++++++ tools/perf/util/Build | 1 + tools/perf/util/annotate.c | 4 + tools/perf/util/annotate.h | 2 + tools/perf/util/sort.h | 4 + tools/perf/util/spark.c | 34 ++++++++ tools/perf/util/spark.h | 8 ++ tools/perf/util/symbol.h | 2 + 9 files changed, 203 insertions(+) create mode 100644 tools/perf/util/spark.c create mode 100644 tools/perf/util/spark.h diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index d5cc15e651cf..f50ca0fef0a4 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -95,6 +95,11 @@ OPTIONS diff.compute config option. See COMPARISON METHODS section for more info. +--cycles-hist:: + Report a histogram and the standard deviation for cycles data. + It can help us to judge if the reported cycles data is noisy or + not. This option should be used with '-c cycles'. + -p:: --period:: Show period values for both compared hist entries. diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index c37a78677955..5281629c27b1 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -23,6 +23,7 @@ #include "util/time-utils.h" #include "util/annotate.h" #include "util/map.h" +#include "util/spark.h" #include #include #include @@ -53,6 +54,7 @@ enum { PERF_HPP_DIFF__FORMULA, PERF_HPP_DIFF__DELTA_ABS, PERF_HPP_DIFF__CYCLES, + PERF_HPP_DIFF__CYCLES_HIST, PERF_HPP_DIFF__MAX_INDEX }; @@ -87,6 +89,7 @@ static bool force; static bool show_period; static bool show_formula; static bool show_baseline_only; +static bool cycles_hist; static unsigned int sort_compute = 1; static s64 compute_wdiff_w1; @@ -164,6 +167,10 @@ static struct header_column { [PERF_HPP_DIFF__CYCLES] = { .name = "[Program Block Range] Cycles Diff", .width = 70, + }, + [PERF_HPP_DIFF__CYCLES_HIST] = { + .name = "stddev/Hist", + .width = NUM_SPARKS + 9, } }; @@ -610,6 +617,9 @@ static void init_block_info(struct block_info *bi, struct symbol *sym, bi->cycles_aggr = ch->cycles_aggr; bi->num = ch->num; bi->num_aggr = ch->num_aggr; + + memcpy(bi->cycles_spark, ch->cycles_spark, + NUM_SPARKS * sizeof(u64)); } static int process_block_per_sym(struct hist_entry *he) @@ -689,6 +699,21 @@ static struct hist_entry *get_block_pair(struct hist_entry *he, return NULL; } +static void init_spark_values(unsigned long *svals, int num) +{ + for (int i = 0; i < num; i++) + svals[i] = 0; +} + +static void update_spark_value(unsigned long *svals, int num, + struct stats *stats, u64 val) +{ + int n = stats->n; + + if (n < num) + svals[n] = val; +} + static void compute_cycles_diff(struct hist_entry *he, struct hist_entry *pair) { @@ -697,6 +722,26 @@ static void compute_cycles_diff(struct hist_entry *he, pair->diff.cycles = pair->block_info->cycles_aggr / pair->block_info->num_aggr - he->block_info->cycles_aggr / he->block_info->num_aggr; + + if (!cycles_hist) + return; + + init_stats(&pair->diff.stats); + init_spark_values(pair->diff.svals, NUM_SPARKS); + + for (int i = 0; i < pair->block_info->num; i++) { + u64 val; + + if (i >= he->block_info->num || i >= NUM_SPARKS) + break; + + val = labs(pair->block_info->cycles_spark[i] - + he->block_info->cycles_spark[i]); + + update_spark_value(pair->diff.svals, NUM_SPARKS, + &pair->diff.stats, val); + update_stats(&pair->diff.stats, val); + } } } @@ -1255,6 +1300,9 @@ static const struct option options[] = { "Show period values."), OPT_BOOLEAN('F', "formula", &show_formula, "Show formula."), + OPT_BOOLEAN(0, "cycles-hist", &cycles_hist, + "Show cycles histogram and standard deviation " + "- WARNING: use only with -c cycles."), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), @@ -1462,6 +1510,90 @@ static int hpp__color_cycles(struct perf_hpp_fmt *fmt, return __hpp__color_compare(fmt, hpp, he, COMPUTE_CYCLES); } +static int all_zero(unsigned long *vals, int len) +{ + int i; + + for (i = 0; i < len; i++) + if (vals[i] != 0) + return 0; + return 1; +} + +static int print_cycles_spark(char *bf, int size, unsigned long *svals, u64 n) +{ + int printed; + + if (n <= 1) + return 0; + + if (n > NUM_SPARKS) + n = NUM_SPARKS; + if (all_zero(svals, n)) + return 0; + + printed = print_spark(bf, size, svals, n); + printed += scnprintf(bf + printed, size - printed, " "); + return printed; +} + +static int hpp__color_cycles_hist(struct perf_hpp_fmt *fmt, + struct perf_hpp *hpp, struct hist_entry *he) +{ + struct diff_hpp_fmt *dfmt = + container_of(fmt, struct diff_hpp_fmt, fmt); + struct hist_entry *pair = get_pair_fmt(he, dfmt); + struct block_hist *bh = container_of(he, struct block_hist, he); + struct block_hist *bh_pair; + struct hist_entry *block_he; + char spark[32], buf[128]; + double r; + int ret, pad; + + if (!pair) { + if (bh->block_idx) + hpp->skip = true; + + goto no_print; + } + + bh_pair = container_of(pair, struct block_hist, he); + + block_he = hists__get_entry(&bh_pair->block_hists, bh->block_idx); + if (!block_he) { + hpp->skip = true; + goto no_print; + } + + ret = print_cycles_spark(spark, sizeof(spark), block_he->diff.svals, + block_he->diff.stats.n); + + r = rel_stddev_stats(stddev_stats(&block_he->diff.stats), + avg_stats(&block_he->diff.stats)); + + if (ret) { + /* + * Padding spaces if number of sparks less than NUM_SPARKS + * otherwise the output is not aligned. + */ + pad = NUM_SPARKS - ((ret - 1) / 3); + scnprintf(buf, sizeof(buf), "%s%5.1f%% %s", "\u00B1", r, spark); + ret = scnprintf(hpp->buf, hpp->size, "%*s", + dfmt->header_width, buf); + + if (pad) { + ret += scnprintf(hpp->buf + ret, hpp->size - ret, + "%-*s", pad, " "); + } + + return ret; + } + +no_print: + return scnprintf(hpp->buf, hpp->size, "%*s", + dfmt->header_width, " "); +} + static void hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size) { @@ -1667,6 +1799,10 @@ static void data__hpp_register(struct data__file *d, int idx) fmt->color = hpp__color_cycles; fmt->sort = hist_entry__cmp_nop; break; + case PERF_HPP_DIFF__CYCLES_HIST: + fmt->color = hpp__color_cycles_hist; + fmt->sort = hist_entry__cmp_nop; + break; default: fmt->sort = hist_entry__cmp_nop; break; @@ -1692,10 +1828,14 @@ static int ui_init(void) * PERF_HPP_DIFF__DELTA * PERF_HPP_DIFF__RATIO * PERF_HPP_DIFF__WEIGHTED_DIFF + * PERF_HPP_DIFF__CYCLES */ data__hpp_register(d, i ? compute_2_hpp[compute] : PERF_HPP_DIFF__BASELINE); + if (cycles_hist && i) + data__hpp_register(d, PERF_HPP_DIFF__CYCLES_HIST); + /* * And the rest: * @@ -1850,6 +1990,9 @@ int cmd_diff(int argc, const char **argv) if (quiet) perf_quiet_option(); + if (cycles_hist && (compute != COMPUTE_CYCLES)) + usage_with_options(diff_usage, options); + symbol__annotation_init(); if (symbol__init(NULL) < 0) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 8dcfca1a882f..39814b1806a6 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -95,6 +95,7 @@ perf-y += cloexec.o perf-y += call-path.o perf-y += rwsem.o perf-y += thread-stack.o +perf-y += spark.o perf-$(CONFIG_AUXTRACE) += auxtrace.o perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ perf-$(CONFIG_AUXTRACE) += intel-pt.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 4036c7f7b0fb..2b856b6b46f6 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -853,6 +853,10 @@ static int __symbol__account_cycles(struct cyc_hist *ch, ch[offset].start < start) return 0; } + + if (ch[offset].num < NUM_SPARKS) + ch[offset].cycles_spark[ch[offset].num] = cycles; + ch[offset].have_start = have_start; ch[offset].start = start; ch[offset].cycles += cycles; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index d76fd0e81f46..3528bd4f8f21 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -11,6 +11,7 @@ #include #include #include "symbol_conf.h" +#include "spark.h" struct hist_browser_timer; struct hist_entry; @@ -235,6 +236,7 @@ struct cyc_hist { u64 cycles_aggr; u64 cycles_max; u64 cycles_min; + s64 cycles_spark[NUM_SPARKS]; u32 num; u32 num_aggr; u8 have_start; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 7b93f34ac1f4..5aff9542d9b7 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -10,6 +10,8 @@ #include "callchain.h" #include "values.h" #include "hist.h" +#include "stat.h" +#include "spark.h" struct option; struct thread; @@ -71,6 +73,8 @@ struct hist_entry_diff { /* PERF_HPP_DIFF__CYCLES */ s64 cycles; }; + struct stats stats; + unsigned long svals[NUM_SPARKS]; }; struct hist_entry_ops { diff --git a/tools/perf/util/spark.c b/tools/perf/util/spark.c new file mode 100644 index 000000000000..70272a8b81a6 --- /dev/null +++ b/tools/perf/util/spark.c @@ -0,0 +1,34 @@ +#include +#include +#include +#include +#include "spark.h" +#include "stat.h" + +#define SPARK_SHIFT 8 + +/* Print spark lines on outf for numval values in val. */ +int print_spark(char *bf, int size, unsigned long *val, int numval) +{ + static const char *ticks[NUM_SPARKS] = { + "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█" + }; + int i, printed = 0; + unsigned long min = ULONG_MAX, max = 0, f; + + for (i = 0; i < numval; i++) { + if (val[i] < min) + min = val[i]; + if (val[i] > max) + max = val[i]; + } + f = ((max - min) << SPARK_SHIFT) / (NUM_SPARKS - 1); + if (f < 1) + f = 1; + for (i = 0; i < numval; i++) { + printed += scnprintf(bf + printed, size - printed, "%s", + ticks[((val[i] - min) << SPARK_SHIFT) / f]); + } + + return printed; +} diff --git a/tools/perf/util/spark.h b/tools/perf/util/spark.h new file mode 100644 index 000000000000..25402d7d7a64 --- /dev/null +++ b/tools/perf/util/spark.h @@ -0,0 +1,8 @@ +#ifndef SPARK_H +#define SPARK_H 1 + +#define NUM_SPARKS 8 + +int print_spark(char *bf, int size, unsigned long *val, int numval); + +#endif diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 0b0c6b5b1899..cc2a89b99d3d 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -11,6 +11,7 @@ #include #include "path.h" #include "symbol_conf.h" +#include "spark.h" #ifdef HAVE_LIBELF_SUPPORT #include @@ -111,6 +112,7 @@ struct block_info { u64 end; u64 cycles; u64 cycles_aggr; + s64 cycles_spark[NUM_SPARKS]; int num; int num_aggr; refcount_t refcnt; -- cgit v1.2.3-59-g8ed1b