diff options
Diffstat (limited to 'tools/perf')
217 files changed, 8390 insertions, 2760 deletions
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 31824d5269cc..6e54979c2124 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -48,7 +48,7 @@ man5dir=$(mandir)/man5 man7dir=$(mandir)/man7 ASCIIDOC=asciidoc -ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf +ASCIIDOC_EXTRA += --unsafe -f asciidoc.conf ASCIIDOC_HTML = xhtml11 MANPAGE_XSL = manpage-normal.xsl XMLTO_EXTRA = @@ -59,7 +59,7 @@ HTML_REF = origin/html ifdef USE_ASCIIDOCTOR ASCIIDOC = asciidoctor -ASCIIDOC_EXTRA = -a compat-mode +ASCIIDOC_EXTRA += -a compat-mode ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions ASCIIDOC_EXTRA += -a mansource="perf" -a manmanual="perf Manual" ASCIIDOC_HTML = xhtml5 diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 82ff7dad40c2..e817179c5027 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -1,5 +1,5 @@ i synthesize instructions events - b synthesize branches events + b synthesize branches events (branch misses for Arm SPE) c synthesize branches events (calls only) r synthesize branches events (returns only) x synthesize transactions events @@ -9,8 +9,14 @@ of aux-output (refer to perf record) e synthesize error events d create a debug log + f synthesize first level cache events + m synthesize last level cache events + t synthesize TLB events + a synthesize remote access events g synthesize a call chain (use with i or x) + G synthesize a call chain on existing event records l synthesize last branch entries (use with i or x) + L synthesize last branch entries on existing event records s skip initial number of events The default is all events i.e. the same as --itrace=ibxwpe, @@ -31,6 +37,10 @@ Also the number of last branch entries (default 64, max. 1024) for instructions or transactions events can be specified. + Similar to options g and l, size may also be specified for options G and L. + On x86, note that G and L work poorly when data has been recorded with + large PEBS. Refer linkperf:perf-intel-pt[1] man page for details. + It is also possible to skip events generated (instructions, branches, transactions, ptwrite, power) at the beginning. This is useful to ignore initialization code. diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index 0921a3c67381..bad16512c48d 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -61,6 +61,9 @@ SUBSYSTEM 'epoll':: Eventpoll (epoll) stressing benchmarks. +'internals':: + Benchmark internal perf functionality. + 'all':: All benchmark subsystems. @@ -214,6 +217,11 @@ Suite for evaluating concurrent epoll_wait calls. *ctl*:: Suite for evaluating multiple epoll_ctl calls. +SUITES FOR 'internals' +~~~~~~~~~~~~~~~~~~~~~~ +*synthesize*:: +Suite for evaluating perf's event synthesis performance. + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index e6150f21267d..98efdab5fbd4 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -40,7 +40,7 @@ RECORD OPTIONS -------------- -e:: --event=:: - Select the PMU event. Use 'perf mem record -e list' + Select the PMU event. Use 'perf c2c record -e list' to list available events. -v:: @@ -111,6 +111,17 @@ REPORT OPTIONS --display:: Switch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default. +--stitch-lbr:: + Show callgraph with stitched LBRs, which may have more complete + callgraph. The perf.data file must have been obtained using + perf c2c record --call-graph lbr. + Disabled by default. In common cases with call stack overflows, + it can recreate better call stacks than the default lbr call stack + output. But this approach is not full proof. There can be cases + where it creates incorrect call stacks from incorrect matches. + The known limitations include exception handing such as + setjmp/longjmp will have calls/returns not match. + C2C RECORD ---------- The perf c2c record command setup options related to HITM cacheline analysis diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index f16d8a71d3f5..c7d3df5798e2 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -667,6 +667,11 @@ convert.*:: Limit the size of ordered_events queue, so we could control allocation size of perf data files without proper finished round events. +stat.*:: + + stat.big-num:: + (boolean) Change the default for "--big-num". To make + "--no-big-num" the default, set "stat.big-num=false". intel-pt.*:: diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 456fdcbf26ac..f4cd49a7fcdb 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -69,22 +69,22 @@ And profiled with 'perf report' e.g. To also trace kernel space presents a problem, namely kernel self-modifying code. A fairly good kernel image is available in /proc/kcore but to get an accurate image a copy of /proc/kcore needs to be made under the same conditions -as the data capture. A script perf-with-kcore can do that, but beware that the -script makes use of 'sudo' to copy /proc/kcore. If you have perf installed -locally from the source tree you can do: +as the data capture. 'perf record' can make a copy of /proc/kcore if the option +--kcore is used, but access to /proc/kcore is restricted e.g. - ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls + sudo perf record -o pt_ls --kcore -e intel_pt// -- ls -which will create a directory named 'pt_ls' and put the perf.data file and -copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use -'perf report' becomes: +which will create a directory named 'pt_ls' and put the perf.data file (named +simply 'data') and copies of /proc/kcore, /proc/kallsyms and /proc/modules into +it. The other tools understand the directory format, so to use 'perf report' +becomes: - ~/libexec/perf-core/perf-with-kcore report pt_ls + sudo perf report -i pt_ls Because samples are synthesized after-the-fact, the sampling period can be selected for reporting. e.g. sample every microsecond - ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge + sudo perf report pt_ls --itrace=i1usge See the sections below for more information about the --itrace option. @@ -687,7 +687,7 @@ The v4.2 kernel introduced support for a context switch metadata event, PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes are scheduled out and in, just not by whom, which is left for the PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context, -which in turn requires CAP_SYS_ADMIN. +which in turn requires CAP_PERFMON or CAP_SYS_ADMIN. Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context switches") commit, that introduces these metadata events for further info. @@ -821,7 +821,9 @@ The letters are: e synthesize tracing error events d create a debug log g synthesize a call chain (use with i or x) + G synthesize a call chain on existing event records l synthesize last branch entries (use with i or x) + L synthesize last branch entries on existing event records s skip initial number of events "Instructions" events look like they were recorded by "perf record -e @@ -912,6 +914,39 @@ transactions events can be specified. e.g. Note that last branch entries are cleared for each sample, so there is no overlap from one sample to the next. +The G and L options are designed in particular for sample mode, and work much +like g and l but add call chain and branch stack to the other selected events +instead of synthesized events. For example, to record branch-misses events for +'ls' and then add a call chain derived from the Intel PT trace: + + perf record --aux-sample -e '{intel_pt//u,branch-misses:u}' -- ls + perf report --itrace=Ge + +Although in fact G is a default for perf report, so that is the same as just: + + perf report + +One caveat with the G and L options is that they work poorly with "Large PEBS". +Large PEBS means PEBS records will be accumulated by hardware and the written +into the event buffer in one go. That reduces interrupts, but can give very +late timestamps. Because the Intel PT trace is synchronized by timestamps, +the PEBS events do not match the trace. Currently, Large PEBS is used only in +certain circumstances: + - hardware supports it + - PEBS is used + - event period is specified, instead of frequency + - the sample type is limited to the following flags: + PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | + PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | + PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | + PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | + PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | + PERF_SAMPLE_PERIOD (and sometimes) | PERF_SAMPLE_TIME +Because Intel PT sample mode uses a different sample type to the list above, +Large PEBS is not used with Intel PT sample mode. To avoid Large PEBS in other +cases, avoid specifying the event period i.e. avoid the 'perf record' -c option, +--count option, or 'period' config term. + To disable trace decoding entirely, use the option --no-itrace. It is also possible to skip events generated (instructions, branches, transactions) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 6345db33c533..376a50b3452d 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -115,6 +115,11 @@ raw encoding of 0x1A8 can be used: perf stat -e r1a8 -a sleep 1 perf record -e r1a8 ... +It's also possible to use pmu syntax: + + perf record -e r1a8 -a sleep 1 + perf record -e cpu/r1a8/ ... + You should refer to the processor specific documentation for getting these details. Some of them are referenced in the SEE ALSO section below. @@ -258,6 +263,9 @@ Normally all events in an event group sample, but with :S only the first event (the leader) samples, and it only reads the values of the other events in the group. +However, in the case AUX area events (e.g. Intel PT or CoreSight), the AUX +area event must be the leader, so then the second event samples, not the first. + OPTIONS ------- diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index b3f3b3f1c161..fa8a5fcd27ab 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -458,7 +458,9 @@ This option sets the time out limit. The default value is 500 ms. --switch-events:: Record context switch events i.e. events of type PERF_RECORD_SWITCH or -PERF_RECORD_SWITCH_CPU_WIDE. +PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT or CoreSight) +switch events will be enabled automatically, which can be suppressed by +by the option --no-switch-events. --clang-path=PATH:: Path to clang binary to use for compiling BPF scriptlets. @@ -556,6 +558,19 @@ overhead. You can still switch them on with: --switch-output --no-no-buildid --no-no-buildid-cache +--switch-output-event:: +Events that will cause the switch of the perf.data file, auto-selecting +--switch-output=signal, the results are similar as internally the side band +thread will also send a SIGUSR2 to the main one. + +Uses the same syntax as --event, it will just not be recorded, serving only to +switch the perf.data file as soon as the --switch-output event is processed by +a separate sideband thread. + +This sideband thread is also used to other purposes, like processing the +PERF_RECORD_BPF_EVENT records as they happen, asking the kernel for extra BPF +information, etc. + --switch-max-files=N:: When rotating perf.data with --switch-output, only keep N files. @@ -596,6 +611,21 @@ Make a copy of /proc/kcore and place it into a directory with the perf data file Limit the sample data max size, <size> is expected to be a number with appended unit character - B/K/M/G +--num-thread-synthesize:: + The number of threads to run when synthesizing events for existing processes. + By default, the number of threads equals 1. + +ifdef::HAVE_LIBPFM[] +--pfm-events events:: +Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net) +including support for event filters. For example '--pfm-events +inst_retired:any_p:u:c=1:i'. More than one event can be passed to the +option using the comma separator. Hardware events and generic hardware +events cannot be mixed together. The latter must be used with the -e +option. The -e option and this one can be mixed and matched. Events +can be grouped using the {} notation. +endif::HAVE_LIBPFM[] + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index f569b9ea4002..d068103690cc 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -488,6 +488,17 @@ include::itrace.txt[] This option extends the perf report to show reference callgraphs, which collected by reference event, in no callgraph event. +--stitch-lbr:: + Show callgraph with stitched LBRs, which may have more complete + callgraph. The perf.data file must have been obtained using + perf record --call-graph lbr. + Disabled by default. In common cases with call stack overflows, + it can recreate better call stacks than the default lbr call stack + output. But this approach is not full proof. There can be cases + where it creates incorrect call stacks from incorrect matches. + The known limitations include exception handing such as + setjmp/longjmp will have calls/returns not match. + --socket-filter:: Only report the samples on the processor socket that match with this filter diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 963487e82edc..372dfd110e6d 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -440,6 +440,17 @@ include::itrace.txt[] --show-on-off-events:: Show the --switch-on/off events too. +--stitch-lbr:: + Show callgraph with stitched LBRs, which may have more complete + callgraph. The perf.data file must have been obtained using + perf record --call-graph lbr. + Disabled by default. In common cases with call stack overflows, + it can recreate better call stacks than the default lbr call stack + output. But this approach is not full proof. There can be cases + where it creates incorrect call stacks from incorrect matches. + The known limitations include exception handing such as + setjmp/longjmp will have calls/returns not match. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 4d56586b2fb9..b029ee728a0b 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -71,6 +71,16 @@ report:: --tid=<tid>:: stat events on existing thread id (comma separated list) +ifdef::HAVE_LIBPFM[] +--pfm-events events:: +Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net) +including support for event filters. For example '--pfm-events +inst_retired:any_p:u:c=1:i'. More than one event can be passed to the +option using the comma separator. Hardware events and generic hardware +events cannot be mixed together. The latter must be used with the -e +option. The -e option and this one can be mixed and matched. Events +can be grouped using the {} notation. +endif::HAVE_LIBPFM[] -a:: --all-cpus:: @@ -93,7 +103,9 @@ report:: -B:: --big-num:: - print large numbers with thousands' separators according to locale + print large numbers with thousands' separators according to locale. + Enabled by default. Use "--no-big-num" to disable. + Default setting can be changed with "perf config stat.big-num=false". -C:: --cpu=:: @@ -176,6 +188,8 @@ Print count deltas every N milliseconds (minimum: 1ms) The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution. example: 'perf stat -I 1000 -e cycles -a sleep 5' +If the metric exists, it is calculated by the counts generated in this interval and the metric is printed after #. + --interval-count times:: Print count deltas for fixed number of times. This option should be used together with "-I" option. @@ -232,6 +246,25 @@ filter out the startup phase of the program, which is often very different. Print statistics of transactional execution if supported. +--metric-no-group:: +By default, events to compute a metric are placed in weak groups. The +group tries to enforce scheduling all or none of the events. The +--metric-no-group option places events outside of groups and may +increase the chance of the event being scheduled - leading to more +accuracy. However, as events may not be scheduled together accuracy +for metrics like instructions per cycle can be lower - as both metrics +may no longer be being measured at the same time. + +--metric-no-merge:: +By default metric events in different weak groups can be shared if one +group contains all the events needed by another. In such cases one +group will be eliminated reducing event multiplexing and making it so +that certain groups of metrics sum to 100%. A downside to sharing a +group is that the group may require multiplexing and so accuracy for a +small group that need not have multiplexing is lowered. This option +forbids the event merging logic from sharing events between groups and +may be used to increase accuracy in this case. + STAT RECORD ----------- Stores stat data into perf data file. diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 487737a725e9..ee2024691d46 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -319,6 +319,26 @@ Default is to monitor all CPUS. go straight to the histogram browser, just like 'perf top' with no events explicitely specified does. +--stitch-lbr:: + Show callgraph with stitched LBRs, which may have more complete + callgraph. The option must be used with --call-graph lbr recording. + Disabled by default. In common cases with call stack overflows, + it can recreate better call stacks than the default lbr call stack + output. But this approach is not full proof. There can be cases + where it creates incorrect call stacks from incorrect matches. + The known limitations include exception handing such as + setjmp/longjmp will have calls/returns not match. + +ifdef::HAVE_LIBPFM[] +--pfm-events events:: +Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net) +including support for event filters. For example '--pfm-events +inst_retired:any_p:u:c=1:i'. More than one event can be passed to the +option using the comma separator. Hardware events and generic hardware +events cannot be mixed together. The latter must be used with the -e +option. The -e option and this one can be mixed and matched. Events +can be grouped using the {} notation. +endif::HAVE_LIBPFM[] INTERACTIVE PROMPTING KEYS -------------------------- diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index b0152e1095c5..b6472e463284 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -373,6 +373,22 @@ struct { Indicates that trace contains records of PERF_RECORD_COMPRESSED type that have perf_events records in compressed form. + HEADER_CPU_PMU_CAPS = 28, + + A list of cpu PMU capabilities. The format of data is as below. + +struct { + u32 nr_cpu_pmu_caps; + { + char name[]; + char value[]; + } [nr_cpu_pmu_caps] +}; + + +Example: + cpu pmu capabilities: branches=32, max_precise=3, pmu_name=icelake + other bits are reserved and should ignored for now HEADER_FEAT_BITS = 256, diff --git a/tools/perf/Documentation/security.txt b/tools/perf/Documentation/security.txt new file mode 100644 index 000000000000..4fe3b8b1958f --- /dev/null +++ b/tools/perf/Documentation/security.txt @@ -0,0 +1,237 @@ +Overview +======== + +For general security related questions of perf_event_open() syscall usage, +performance monitoring and observability operations by Perf see here: +https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html + +Enabling LSM based mandatory access control (MAC) to perf_event_open() syscall +============================================================================== + +LSM hooks for mandatory access control for perf_event_open() syscall can be +used starting from Linux v5.3. Below are the steps to extend Fedora (v31) with +Targeted policy with perf_event_open() access control capabilities: + +1. Download selinux-policy SRPM package (e.g. selinux-policy-3.14.4-48.fc31.src.rpm on FC31) + and install it so rpmbuild directory would exist in the current working directory: + + # rpm -Uhv selinux-policy-3.14.4-48.fc31.src.rpm + +2. Get into rpmbuild/SPECS directory and unpack the source code: + + # rpmbuild -bp selinux-policy.spec + +3. Place patch below at rpmbuild/BUILD/selinux-policy-b86eaaf4dbcf2d51dd4432df7185c0eaf3cbcc02 + directory and apply it: + + # patch -p1 < selinux-policy-perf-events-perfmon.patch + patching file policy/flask/access_vectors + patching file policy/flask/security_classes + # cat selinux-policy-perf-events-perfmon.patch +diff -Nura a/policy/flask/access_vectors b/policy/flask/access_vectors +--- a/policy/flask/access_vectors 2020-02-04 18:19:53.000000000 +0300 ++++ b/policy/flask/access_vectors 2020-02-28 23:37:25.000000000 +0300 +@@ -174,6 +174,7 @@ + wake_alarm + block_suspend + audit_read ++ perfmon + } + + # +@@ -1099,3 +1100,15 @@ + + class xdp_socket + inherits socket ++ ++class perf_event ++{ ++ open ++ cpu ++ kernel ++ tracepoint ++ read ++ write ++} ++ ++ +diff -Nura a/policy/flask/security_classes b/policy/flask/security_classes +--- a/policy/flask/security_classes 2020-02-04 18:19:53.000000000 +0300 ++++ b/policy/flask/security_classes 2020-02-28 21:35:17.000000000 +0300 +@@ -200,4 +200,6 @@ + + class xdp_socket + ++class perf_event ++ + # FLASK + +4. Get into rpmbuild/SPECS directory and build policy packages from patched sources: + + # rpmbuild --noclean --noprep -ba selinux-policy.spec + + so you have this: + + # ls -alh rpmbuild/RPMS/noarch/ + total 33M + drwxr-xr-x. 2 root root 4.0K Mar 20 12:16 . + drwxr-xr-x. 3 root root 4.0K Mar 20 12:16 .. + -rw-r--r--. 1 root root 112K Mar 20 12:16 selinux-policy-3.14.4-48.fc31.noarch.rpm + -rw-r--r--. 1 root root 1.2M Mar 20 12:17 selinux-policy-devel-3.14.4-48.fc31.noarch.rpm + -rw-r--r--. 1 root root 2.3M Mar 20 12:17 selinux-policy-doc-3.14.4-48.fc31.noarch.rpm + -rw-r--r--. 1 root root 12M Mar 20 12:17 selinux-policy-minimum-3.14.4-48.fc31.noarch.rpm + -rw-r--r--. 1 root root 4.5M Mar 20 12:16 selinux-policy-mls-3.14.4-48.fc31.noarch.rpm + -rw-r--r--. 1 root root 111K Mar 20 12:16 selinux-policy-sandbox-3.14.4-48.fc31.noarch.rpm + -rw-r--r--. 1 root root 14M Mar 20 12:17 selinux-policy-targeted-3.14.4-48.fc31.noarch.rpm + +5. Install SELinux packages from Fedora repo, if not already done so, and + update with the patched rpms above: + + # rpm -Uhv rpmbuild/RPMS/noarch/selinux-policy-* + +6. Enable SELinux Permissive mode for Targeted policy, if not already done so: + + # cat /etc/selinux/config + + # This file controls the state of SELinux on the system. + # SELINUX= can take one of these three values: + # enforcing - SELinux security policy is enforced. + # permissive - SELinux prints warnings instead of enforcing. + # disabled - No SELinux policy is loaded. + SELINUX=permissive + # SELINUXTYPE= can take one of these three values: + # targeted - Targeted processes are protected, + # minimum - Modification of targeted policy. Only selected processes are protected. + # mls - Multi Level Security protection. + SELINUXTYPE=targeted + +7. Enable filesystem SELinux labeling at the next reboot: + + # touch /.autorelabel + +8. Reboot machine and it will label filesystems and load Targeted policy into the kernel; + +9. Login and check that dmesg output doesn't mention that perf_event class is unknown to SELinux subsystem; + +10. Check that SELinux is enabled and in Permissive mode + + # getenforce + Permissive + +11. Turn SELinux into Enforcing mode: + + # setenforce 1 + # getenforce + Enforcing + +Opening access to perf_event_open() syscall on Fedora with SELinux +================================================================== + +Access to performance monitoring and observability operations by Perf +can be limited for superuser or CAP_PERFMON or CAP_SYS_ADMIN privileged +processes. MAC policy settings (e.g. SELinux) can be loaded into the kernel +and prevent unauthorized access to perf_event_open() syscall. In such case +Perf tool provides a message similar to the one below: + + # perf stat + Error: + Access to performance monitoring and observability operations is limited. + Enforced MAC policy settings (SELinux) can limit access to performance + monitoring and observability operations. Inspect system audit records for + more perf_event access control information and adjusting the policy. + Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open + access to performance monitoring and observability operations for users + without CAP_PERFMON or CAP_SYS_ADMIN Linux capability. + perf_event_paranoid setting is -1: + -1: Allow use of (almost) all events by all users + Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK + >= 0: Disallow raw and ftrace function tracepoint access + >= 1: Disallow CPU event access + >= 2: Disallow kernel profiling + To make the adjusted perf_event_paranoid setting permanent preserve it + in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>) + +To make sure that access is limited by MAC policy settings inspect system +audit records using journalctl command or /var/log/audit/audit.log so the +output would contain AVC denied records related to perf_event: + + # journalctl --reverse --no-pager | grep perf_event + + python3[1318099]: SELinux is preventing perf from open access on the perf_event labeled unconfined_t. + If you believe that perf should be allowed open access on perf_event labeled unconfined_t by default. + setroubleshoot[1318099]: SELinux is preventing perf from open access on the perf_event labeled unconfined_t. For complete SELinux messages run: sealert -l 4595ce5b-e58f-462c-9d86-3bc2074935de + audit[1318098]: AVC avc: denied { open } for pid=1318098 comm="perf" scontext=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 tcontext=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 tclass=perf_event permissive=0 + +In order to open access to perf_event_open() syscall MAC policy settings can +require to be extended. On SELinux system this can be done by loading a special +policy module extending base policy settings. Perf related policy module can +be generated using the system audit records about blocking perf_event access. +Run the command below to generate my-perf.te policy extension file with +perf_event related rules: + + # ausearch -c 'perf' --raw | audit2allow -M my-perf && cat my-perf.te + + module my-perf 1.0; + + require { + type unconfined_t; + class perf_event { cpu kernel open read tracepoint write }; + } + + #============= unconfined_t ============== + allow unconfined_t self:perf_event { cpu kernel open read tracepoint write }; + +Now compile, pack and load my-perf.pp extension module into the kernel: + + # checkmodule -M -m -o my-perf.mod my-perf.te + # semodule_package -o my-perf.pp -m my-perf.mod + # semodule -X 300 -i my-perf.pp + +After all those taken steps above access to perf_event_open() syscall should +now be allowed by the policy settings. Check access running Perf like this: + + # perf stat + ^C + Performance counter stats for 'system wide': + + 36,387.41 msec cpu-clock # 7.999 CPUs utilized + 2,629 context-switches # 0.072 K/sec + 57 cpu-migrations # 0.002 K/sec + 1 page-faults # 0.000 K/sec + 263,721,559 cycles # 0.007 GHz + 175,746,713 instructions # 0.67 insn per cycle + 19,628,798 branches # 0.539 M/sec + 1,259,201 branch-misses # 6.42% of all branches + + 4.549061439 seconds time elapsed + +The generated perf-event.pp related policy extension module can be removed +from the kernel using this command: + + # semodule -X 300 -r my-perf + +Alternatively the module can be temporarily disabled and enabled back using +these two commands: + + # semodule -d my-perf + # semodule -e my-perf + +If something went wrong +======================= + +To turn SELinux into Permissive mode: + # setenforce 0 + +To fully disable SELinux during kernel boot [3] set kernel command line parameter selinux=0 + +To remove SELinux labeling from local filesystems: + # find / -mount -print0 | xargs -0 setfattr -h -x security.selinux + +To fully turn SELinux off a machine set SELINUX=disabled at /etc/selinux/config file and reboot; + +Links +===== + +[1] https://download-ib01.fedoraproject.org/pub/fedora/linux/updates/31/Everything/SRPMS/Packages/s/selinux-policy-3.14.4-49.fc31.src.rpm +[2] https://docs.fedoraproject.org/en-US/Fedora/11/html/Security-Enhanced_Linux/sect-Security-Enhanced_Linux-Working_with_SELinux-Enabling_and_Disabling_SELinux.html +[3] https://danwalsh.livejournal.com/10972.html diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 12a8204d63c6..877ca6be0ed0 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -23,12 +23,28 @@ include $(srctree)/tools/scripts/Makefile.arch $(call detected_var,SRCARCH) NO_PERF_REGS := 1 -NO_SYSCALL_TABLE := 1 + +ifneq ($(NO_SYSCALL_TABLE),1) + NO_SYSCALL_TABLE := 1 + + ifeq ($(SRCARCH),x86) + ifeq (${IS_64_BIT}, 1) + NO_SYSCALL_TABLE := 0 + endif + else + ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390)) + NO_SYSCALL_TABLE := 0 + endif + endif + + ifneq ($(NO_SYSCALL_TABLE),1) + CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT + endif +endif # Additional ARCH settings for ppc ifeq ($(SRCARCH),powerpc) NO_PERF_REGS := 0 - NO_SYSCALL_TABLE := 0 CFLAGS += -I$(OUTPUT)arch/powerpc/include/generated LIBUNWIND_LIBS := -lunwind -lunwind-ppc64 endif @@ -37,7 +53,6 @@ endif ifeq ($(SRCARCH),x86) $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) - NO_SYSCALL_TABLE := 0 CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -I$(OUTPUT)arch/x86/include/generated ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma @@ -55,7 +70,6 @@ endif ifeq ($(SRCARCH),arm64) NO_PERF_REGS := 0 - NO_SYSCALL_TABLE := 0 CFLAGS += -I$(OUTPUT)arch/arm64/include/generated LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif @@ -70,7 +84,6 @@ endif ifeq ($(ARCH),s390) NO_PERF_REGS := 0 - NO_SYSCALL_TABLE := 0 CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated endif @@ -78,10 +91,6 @@ ifeq ($(NO_PERF_REGS),0) $(call detected,CONFIG_PERF_REGS) endif -ifneq ($(NO_SYSCALL_TABLE),1) - CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT -endif - # So far there's only x86 and arm libdw unwind support merged in perf. # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures @@ -346,7 +355,7 @@ ifndef NO_BIONIC endif ifeq ($(feature-eventfd), 1) - CFLAGS += -DHAVE_EVENTFD + CFLAGS += -DHAVE_EVENTFD_SUPPORT endif ifeq ($(feature-get_current_dir_name), 1) @@ -651,6 +660,7 @@ ifeq ($(NO_SYSCALL_TABLE),0) $(call detected,CONFIG_TRACE) else ifndef NO_LIBAUDIT + $(call feature_check,libaudit) ifneq ($(feature-libaudit), 1) msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev); NO_LIBAUDIT := 1 @@ -1012,6 +1022,19 @@ ifdef LIBCLANGLLVM endif endif +ifdef LIBPFM4 + $(call feature_check,libpfm4) + ifeq ($(feature-libpfm4), 1) + CFLAGS += -DHAVE_LIBPFM + EXTLIBS += -lpfm + ASCIIDOC_EXTRA = -aHAVE_LIBPFM=1 + $(call detected,CONFIG_LIBPFM4) + else + msg := $(warning libpfm4 not found, disables libpfm4 support. Please install libpfm4-dev); + NO_LIBPFM4 := 1 + endif +endif + # Among the variables below, these: # perfexecdir # perf_include_dir diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index d15a311408f1..86dbb51bb272 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -118,6 +118,12 @@ include ../scripts/utilities.mak # # Define LIBBPF_DYNAMIC to enable libbpf dynamic linking. # +# Define NO_SYSCALL_TABLE=1 to disable the use of syscall id to/from name tables +# generated from the kernel .tbl or unistd.h files and use, if available, libaudit +# for doing the conversions to/from strings/id. +# +# Define LIBPFM4 to enable libpfm4 events extension. +# # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -188,7 +194,7 @@ AWK = awk # non-config cases config := 1 -NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf +NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help ifdef MAKECMDGOALS ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),) @@ -278,6 +284,7 @@ strip-libs = $(filter-out -l%,$(1)) ifneq ($(OUTPUT),) TE_PATH=$(OUTPUT) + PLUGINS_PATH=$(OUTPUT) BPF_PATH=$(OUTPUT) SUBCMD_PATH=$(OUTPUT) LIBPERF_PATH=$(OUTPUT) @@ -288,6 +295,7 @@ else endif else TE_PATH=$(TRACE_EVENT_DIR) + PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/ API_PATH=$(LIB_DIR) BPF_PATH=$(BPF_DIR) SUBCMD_PATH=$(SUBCMD_DIR) @@ -297,7 +305,7 @@ endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a export LIBTRACEEVENT -LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)plugins/libtraceevent-dynamic-list +LIBTRACEEVENT_DYNAMIC_LIST = $(PLUGINS_PATH)libtraceevent-dynamic-list # # The static build has no dynsym table, so this does not work for @@ -756,10 +764,10 @@ $(LIBTRACEEVENT): FORCE $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a libtraceevent_plugins: FORCE - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins $(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)plugins/libtraceevent-dynamic-list + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list $(LIBTRACEEVENT)-clean: $(call QUIET_CLEAN, libtraceevent) @@ -832,7 +840,7 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html # 'make doc' should call 'make -C Documentation all' $(DOC_TARGETS): - $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:doc=all) + $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:doc=all) ASCIIDOC_EXTRA=$(ASCIIDOC_EXTRA) TAG_FOLDERS= . ../lib ../include TAG_FILES= ../../include/uapi/linux/perf_event.h @@ -959,7 +967,7 @@ install-python_ext: # 'make install-doc' should call 'make -C Documentation install' $(INSTALL_DOC_TARGETS): - $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:-doc=) + $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:-doc=) ASCIIDOC_EXTRA=$(ASCIIDOC_EXTRA) ### Cleaning rules diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 941f814820b8..cea5e33d61d2 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -23,6 +23,7 @@ #include "../../util/event.h" #include "../../util/evlist.h" #include "../../util/evsel.h" +#include "../../util/perf_api_probe.h" #include "../../util/evsel_config.h" #include "../../util/pmu.h" #include "../../util/cs-etm.h" @@ -215,7 +216,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu, struct evsel *evsel) { char msg[BUFSIZ], path[PATH_MAX], *sink; - struct perf_evsel_config_term *term; + struct evsel_config_term *term; int ret = -EINVAL; u32 hash; @@ -223,7 +224,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu, return 0; list_for_each_entry(term, &evsel->config_terms, list) { - if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG) + if (term->type != EVSEL__CONFIG_TERM_DRV_CFG) continue; sink = term->val.str; @@ -232,7 +233,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu, ret = perf_pmu__scan_file(pmu, path, "%x", &hash); if (ret != 1) { pr_err("failed to set sink \"%s\" on event %s with %d (%s)\n", - sink, perf_evsel__name(evsel), errno, + sink, evsel__name(evsel), errno, str_error_r(errno, msg, sizeof(msg))); return ret; } @@ -264,7 +265,8 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, ptr->evlist = evlist; ptr->snapshot_mode = opts->auxtrace_snapshot_mode; - if (perf_can_record_switch_events()) + if (!record_opts__no_switch_events(opts) && + perf_can_record_switch_events()) opts->record_switch_events = true; evlist__for_each_entry(evlist, evsel) { @@ -401,7 +403,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, * when a context switch happened. */ if (!perf_cpu_map__empty(cpus)) { - perf_evsel__set_sample_bit(cs_etm_evsel, CPU); + evsel__set_sample_bit(cs_etm_evsel, CPU); err = cs_etm_set_option(itr, cs_etm_evsel, ETM_OPT_CTXTID | ETM_OPT_TS); @@ -425,7 +427,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, /* In per-cpu case, always need the time of mmap events etc */ if (!perf_cpu_map__empty(cpus)) - perf_evsel__set_sample_bit(tracking_evsel, TIME); + evsel__set_sample_bit(tracking_evsel, TIME); } out: diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 27653be24447..e3593063b3d1 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -120,9 +120,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, */ perf_evlist__to_front(evlist, arm_spe_evsel); - perf_evsel__set_sample_bit(arm_spe_evsel, CPU); - perf_evsel__set_sample_bit(arm_spe_evsel, TIME); - perf_evsel__set_sample_bit(arm_spe_evsel, TID); + evsel__set_sample_bit(arm_spe_evsel, CPU); + evsel__set_sample_bit(arm_spe_evsel, TIME); + evsel__set_sample_bit(arm_spe_evsel, TID); /* Add dummy event to keep tracking */ err = parse_events(evlist, "dummy:u", NULL); @@ -134,9 +134,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; - perf_evsel__set_sample_bit(tracking_evsel, TIME); - perf_evsel__set_sample_bit(tracking_evsel, CPU); - perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); + evsel__set_sample_bit(tracking_evsel, TIME); + evsel__set_sample_bit(tracking_evsel, CPU); + evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); return 0; } diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c b/tools/perf/arch/arm64/util/unwind-libdw.c index 7623d85e77f3..a50941629649 100644 --- a/tools/perf/arch/arm64/util/unwind-libdw.c +++ b/tools/perf/arch/arm64/util/unwind-libdw.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include <elfutils/libdwfl.h> -#include "../../util/unwind-libdw.h" -#include "../../util/perf_regs.h" -#include "../../util/event.h" +#include "../../../util/unwind-libdw.h" +#include "../../../util/perf_regs.h" +#include "../../../util/event.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index e5c9504f8586..e86e210bf514 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -2,6 +2,7 @@ perf-y += header.o perf-y += kvm-stat.o perf-y += perf_regs.o perf-y += mem-events.o +perf-y += sym-handling.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 3b4cdfc5efd6..d4870074f14c 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -7,6 +7,8 @@ #include <string.h> #include <linux/stringify.h> #include "header.h" +#include "metricgroup.h" +#include <api/fs/fs.h> #define mfspr(rn) ({unsigned long rval; \ asm volatile("mfspr %0," __stringify(rn) \ @@ -44,3 +46,9 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused) return bufp; } + +int arch_get_runtimeparam(void) +{ + int count; + return sysfs__read_int("/devices/hv_24x7/interface/sockets", &count) < 0 ? 1 : count; +} diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index 16807269317c..eed9e5a42935 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -39,7 +39,7 @@ static void hcall_event_get_key(struct evsel *evsel, struct event_key *key) { key->info = 0; - key->key = perf_evsel__intval(evsel, sample, "req"); + key->key = evsel__intval(evsel, sample, "req"); } static const char *get_hcall_exit_reason(u64 exit_code) diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c b/tools/perf/arch/powerpc/util/unwind-libdw.c index abf2dbc7f829..7b2d96ec28e3 100644 --- a/tools/perf/arch/powerpc/util/unwind-libdw.c +++ b/tools/perf/arch/powerpc/util/unwind-libdw.c @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include <elfutils/libdwfl.h> #include <linux/kernel.h> -#include "../../util/unwind-libdw.h" -#include "../../util/perf_regs.h" -#include "../../util/event.h" +#include "../../../util/unwind-libdw.h" +#include "../../../util/perf_regs.h" +#include "../../../util/event.h" /* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils. */ static const int special_regs[3][2] = { diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c index 0fd4e9f49ed0..34da89ced29a 100644 --- a/tools/perf/arch/s390/util/kvm-stat.c +++ b/tools/perf/arch/s390/util/kvm-stat.c @@ -30,7 +30,7 @@ static void event_icpt_insn_get_key(struct evsel *evsel, { unsigned long insn; - insn = perf_evsel__intval(evsel, sample, "instruction"); + insn = evsel__intval(evsel, sample, "instruction"); key->key = icpt_insn_decoder(insn); key->exit_reasons = sie_icpt_insn_codes; } @@ -39,7 +39,7 @@ static void event_sigp_get_key(struct evsel *evsel, struct perf_sample *sample, struct event_key *key) { - key->key = perf_evsel__intval(evsel, sample, "order_code"); + key->key = evsel__intval(evsel, sample, "order_code"); key->exit_reasons = sie_sigp_order_codes; } @@ -47,7 +47,7 @@ static void event_diag_get_key(struct evsel *evsel, struct perf_sample *sample, struct event_key *key) { - key->key = perf_evsel__intval(evsel, sample, "code"); + key->key = evsel__intval(evsel, sample, "code"); key->exit_reasons = sie_diagnose_codes; } @@ -55,7 +55,7 @@ static void event_icpt_prog_get_key(struct evsel *evsel, struct perf_sample *sample, struct event_key *key) { - key->key = perf_evsel__intval(evsel, sample, "code"); + key->key = evsel__intval(evsel, sample, "code"); key->exit_reasons = sie_icpt_prog_codes; } diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c index ef43be9b6ec2..4e40402a4f81 100644 --- a/tools/perf/arch/x86/tests/dwarf-unwind.c +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c @@ -55,6 +55,14 @@ int test__arch_unwind_sample(struct perf_sample *sample, return -1; } +#ifdef MEMORY_SANITIZER + /* + * Assignments to buf in the assembly function perf_regs_load aren't + * seen by memory sanitizer. Zero the memory to convince memory + * sanitizer the memory is initialized. + */ + memset(buf, 0, sizeof(u64) * PERF_REGS_MAX); +#endif perf_regs_load(buf); regs->abi = PERF_SAMPLE_REGS_ABI; regs->regs = buf; diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index 909ead08a6f6..026d32ed078e 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -130,13 +130,11 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe goto next_event; if (strcmp(event->comm.comm, comm1) == 0) { - CHECK__(perf_evsel__parse_sample(evsel, event, - &sample)); + CHECK__(evsel__parse_sample(evsel, event, &sample)); comm1_time = sample.time; } if (strcmp(event->comm.comm, comm2) == 0) { - CHECK__(perf_evsel__parse_sample(evsel, event, - &sample)); + CHECK__(evsel__parse_sample(evsel, event, &sample)); comm2_time = sample.time; } next_event: diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 09f93800bffd..0dc09b5809c1 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -224,7 +224,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, * AUX event. */ if (!perf_cpu_map__empty(cpus)) - perf_evsel__set_sample_bit(intel_bts_evsel, CPU); + evsel__set_sample_bit(intel_bts_evsel, CPU); } /* Add dummy event to keep tracking */ diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 1643aed8c4c8..839ef52c1ac2 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -25,6 +25,7 @@ #include "../../../util/pmu.h" #include "../../../util/debug.h" #include "../../../util/auxtrace.h" +#include "../../../util/perf_api_probe.h" #include "../../../util/record.h" #include "../../../util/target.h" #include "../../../util/tsc.h" @@ -58,7 +59,8 @@ struct intel_pt_recording { size_t priv_size; }; -static int intel_pt_parse_terms_with_default(struct list_head *formats, +static int intel_pt_parse_terms_with_default(const char *pmu_name, + struct list_head *formats, const char *str, u64 *config) { @@ -77,7 +79,8 @@ static int intel_pt_parse_terms_with_default(struct list_head *formats, goto out_free; attr.config = *config; - err = perf_pmu__config_terms(formats, &attr, terms, true, NULL); + err = perf_pmu__config_terms(pmu_name, formats, &attr, terms, true, + NULL); if (err) goto out_free; @@ -87,11 +90,12 @@ out_free: return err; } -static int intel_pt_parse_terms(struct list_head *formats, const char *str, - u64 *config) +static int intel_pt_parse_terms(const char *pmu_name, struct list_head *formats, + const char *str, u64 *config) { *config = 0; - return intel_pt_parse_terms_with_default(formats, str, config); + return intel_pt_parse_terms_with_default(pmu_name, formats, str, + config); } static u64 intel_pt_masked_bits(u64 mask, u64 bits) @@ -228,7 +232,8 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); - intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config); + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, buf, + &config); return config; } @@ -336,13 +341,16 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, if (priv_size != ptr->priv_size) return -EINVAL; - intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); - intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp", - &noretcomp_bit); - intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit); + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, + "tsc", &tsc_bit); + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, + "noretcomp", &noretcomp_bit); + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, + "mtc", &mtc_bit); mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format, "mtc_period"); - intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit); + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, + "cyc", &cyc_bit); intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d); @@ -420,8 +428,8 @@ static int intel_pt_track_switches(struct evlist *evlist) evsel = evlist__last(evlist); - perf_evsel__set_sample_bit(evsel, CPU); - perf_evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, CPU); + evsel__set_sample_bit(evsel, TIME); evsel->core.system_wide = true; evsel->no_aux_samples = true; @@ -555,10 +563,9 @@ static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, static void intel_pt_config_sample_mode(struct perf_pmu *intel_pt_pmu, struct evsel *evsel) { - struct perf_evsel_config_term *term; u64 user_bits = 0, bits; + struct evsel_config_term *term = evsel__get_config_term(evsel, CFG_CHG); - term = perf_evsel__get_config_term(evsel, CFG_CHG); if (term) user_bits = term->val.cfg_chg; @@ -768,7 +775,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, } } - intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, + "tsc", &tsc_bit); if (opts->full_auxtrace && (intel_pt_evsel->core.attr.config & tsc_bit)) have_timing_info = true; @@ -779,7 +787,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * Per-cpu recording needs sched_switch events to distinguish different * threads. */ - if (have_timing_info && !perf_cpu_map__empty(cpus)) { + if (have_timing_info && !perf_cpu_map__empty(cpus) && + !record_opts__no_switch_events(opts)) { if (perf_can_record_switch_events()) { bool cpu_wide = !target__none(&opts->target) && !target__has_task(&opts->target); @@ -801,10 +810,10 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, switch_evsel->no_aux_samples = true; switch_evsel->immediate = true; - perf_evsel__set_sample_bit(switch_evsel, TID); - perf_evsel__set_sample_bit(switch_evsel, TIME); - perf_evsel__set_sample_bit(switch_evsel, CPU); - perf_evsel__reset_sample_bit(switch_evsel, BRANCH_STACK); + evsel__set_sample_bit(switch_evsel, TID); + evsel__set_sample_bit(switch_evsel, TIME); + evsel__set_sample_bit(switch_evsel, CPU); + evsel__reset_sample_bit(switch_evsel, BRANCH_STACK); opts->record_switch_events = false; ptr->have_sched_switch = 3; @@ -838,7 +847,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * AUX event. */ if (!perf_cpu_map__empty(cpus)) - perf_evsel__set_sample_bit(intel_pt_evsel, CPU); + evsel__set_sample_bit(intel_pt_evsel, CPU); } /* Add dummy event to keep tracking */ @@ -862,11 +871,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, /* In per-cpu case, always need the time of mmap events etc */ if (!perf_cpu_map__empty(cpus)) { - perf_evsel__set_sample_bit(tracking_evsel, TIME); + evsel__set_sample_bit(tracking_evsel, TIME); /* And the CPU for switch events */ - perf_evsel__set_sample_bit(tracking_evsel, CPU); + evsel__set_sample_bit(tracking_evsel, CPU); } - perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); + evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); } /* @@ -874,7 +883,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * per-cpu with no sched_switch (except workload-only). */ if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) && - !target__none(&opts->target)) + !target__none(&opts->target) && + !intel_pt_evsel->core.attr.exclude_user) ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); return 0; diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c index c0775c39227f..072920475b65 100644 --- a/tools/perf/arch/x86/util/kvm-stat.c +++ b/tools/perf/arch/x86/util/kvm-stat.c @@ -31,8 +31,8 @@ const char *kvm_exit_trace = "kvm:kvm_exit"; static void mmio_event_get_key(struct evsel *evsel, struct perf_sample *sample, struct event_key *key) { - key->key = perf_evsel__intval(evsel, sample, "gpa"); - key->info = perf_evsel__intval(evsel, sample, "type"); + key->key = evsel__intval(evsel, sample, "gpa"); + key->info = evsel__intval(evsel, sample, "type"); } #define KVM_TRACE_MMIO_READ_UNSATISFIED 0 @@ -48,7 +48,7 @@ static bool mmio_event_begin(struct evsel *evsel, /* MMIO write begin event in kernel. */ if (!strcmp(evsel->name, "kvm:kvm_mmio") && - perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) { + evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) { mmio_event_get_key(evsel, sample, key); return true; } @@ -65,7 +65,7 @@ static bool mmio_event_end(struct evsel *evsel, struct perf_sample *sample, /* MMIO read end event in kernel.*/ if (!strcmp(evsel->name, "kvm:kvm_mmio") && - perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) { + evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) { mmio_event_get_key(evsel, sample, key); return true; } @@ -94,8 +94,8 @@ static void ioport_event_get_key(struct evsel *evsel, struct perf_sample *sample, struct event_key *key) { - key->key = perf_evsel__intval(evsel, sample, "port"); - key->info = perf_evsel__intval(evsel, sample, "rw"); + key->key = evsel__intval(evsel, sample, "port"); + key->info = evsel__intval(evsel, sample, "rw"); } static bool ioport_event_begin(struct evsel *evsel, diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c index fda8f4206ee4..eea2bf87232b 100644 --- a/tools/perf/arch/x86/util/unwind-libdw.c +++ b/tools/perf/arch/x86/util/unwind-libdw.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include <elfutils/libdwfl.h> -#include "../../util/unwind-libdw.h" -#include "../../util/perf_regs.h" -#include "../../util/event.h" +#include "../../../util/unwind-libdw.h" +#include "../../../util/perf_regs.h" +#include "../../../util/event.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index e4e321b6f883..768e408757a0 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -6,9 +6,10 @@ perf-y += futex-wake.o perf-y += futex-wake-parallel.o perf-y += futex-requeue.o perf-y += futex-lock-pi.o - perf-y += epoll-wait.o perf-y += epoll-ctl.o +perf-y += synthesize.o +perf-y += kallsyms-parse.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 4aa6de1aa67d..61cae4966cae 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -41,9 +41,10 @@ int bench_futex_wake_parallel(int argc, const char **argv); int bench_futex_requeue(int argc, const char **argv); /* pi futexes */ int bench_futex_lock_pi(int argc, const char **argv); - int bench_epoll_wait(int argc, const char **argv); int bench_epoll_ctl(int argc, const char **argv); +int bench_synthesize(int argc, const char **argv); +int bench_kallsyms_parse(int argc, const char **argv); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index cadc18d42aa4..ca2d591aad8a 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -5,7 +5,7 @@ * Benchmark the various operations allowed for epoll_ctl(2). * The idea is to concurrently stress a single epoll instance */ -#ifdef HAVE_EVENTFD +#ifdef HAVE_EVENTFD_SUPPORT /* For the CLR_() macros */ #include <string.h> #include <pthread.h> @@ -412,4 +412,4 @@ int bench_epoll_ctl(int argc, const char **argv) errmem: err(EXIT_FAILURE, "calloc"); } -#endif // HAVE_EVENTFD +#endif // HAVE_EVENTFD_SUPPORT diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index f938c585d512..75dca9773186 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#ifdef HAVE_EVENTFD +#ifdef HAVE_EVENTFD_SUPPORT /* * Copyright (C) 2018 Davidlohr Bueso. * @@ -519,7 +519,8 @@ int bench_epoll_wait(int argc, const char **argv) qsort(worker, nthreads, sizeof(struct worker), cmpworker); for (i = 0; i < nthreads; i++) { - unsigned long t = worker[i].ops / bench__runtime.tv_sec; + unsigned long t = bench__runtime.tv_sec > 0 ? + worker[i].ops / bench__runtime.tv_sec : 0; update_stats(&throughput_stats, t); @@ -539,4 +540,4 @@ int bench_epoll_wait(int argc, const char **argv) errmem: err(EXIT_FAILURE, "calloc"); } -#endif // HAVE_EVENTFD +#endif // HAVE_EVENTFD_SUPPORT diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 65eebe06c04d..915bf3da7ce2 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -205,7 +205,8 @@ int bench_futex_hash(int argc, const char **argv) pthread_mutex_destroy(&thread_lock); for (i = 0; i < nthreads; i++) { - unsigned long t = worker[i].ops / bench__runtime.tv_sec; + unsigned long t = bench__runtime.tv_sec > 0 ? + worker[i].ops / bench__runtime.tv_sec : 0; update_stats(&throughput_stats, t); if (!silent) { if (nfutexes == 1) diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 89fd8f325f38..bb25d8beb3b8 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -211,7 +211,8 @@ int bench_futex_lock_pi(int argc, const char **argv) pthread_mutex_destroy(&thread_lock); for (i = 0; i < nthreads; i++) { - unsigned long t = worker[i].ops / bench__runtime.tv_sec; + unsigned long t = bench__runtime.tv_sec > 0 ? + worker[i].ops / bench__runtime.tv_sec : 0; update_stats(&throughput_stats, t); if (!silent) diff --git a/tools/perf/bench/kallsyms-parse.c b/tools/perf/bench/kallsyms-parse.c new file mode 100644 index 000000000000..2b0d0f980ae9 --- /dev/null +++ b/tools/perf/bench/kallsyms-parse.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Benchmark of /proc/kallsyms parsing. + * + * Copyright 2020 Google LLC. + */ +#include <stdlib.h> +#include "bench.h" +#include "../util/stat.h" +#include <linux/time64.h> +#include <subcmd/parse-options.h> +#include <symbol/kallsyms.h> + +static unsigned int iterations = 100; + +static const struct option options[] = { + OPT_UINTEGER('i', "iterations", &iterations, + "Number of iterations used to compute average"), + OPT_END() +}; + +static const char *const bench_usage[] = { + "perf bench internals kallsyms-parse <options>", + NULL +}; + +static int bench_process_symbol(void *arg __maybe_unused, + const char *name __maybe_unused, + char type __maybe_unused, + u64 start __maybe_unused) +{ + return 0; +} + +static int do_kallsyms_parse(void) +{ + struct timeval start, end, diff; + u64 runtime_us; + unsigned int i; + double time_average, time_stddev; + int err; + struct stats time_stats; + + init_stats(&time_stats); + + for (i = 0; i < iterations; i++) { + gettimeofday(&start, NULL); + err = kallsyms__parse("/proc/kallsyms", NULL, + bench_process_symbol); + if (err) + return err; + + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + update_stats(&time_stats, runtime_us); + } + + time_average = avg_stats(&time_stats) / USEC_PER_MSEC; + time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC; + printf(" Average kallsyms__parse took: %.3f ms (+- %.3f ms)\n", + time_average, time_stddev); + return 0; +} + +int bench_kallsyms_parse(int argc, const char **argv) +{ + argc = parse_options(argc, argv, options, bench_usage, 0); + if (argc) { + usage_with_options(bench_usage, options); + exit(EXIT_FAILURE); + } + + return do_kallsyms_parse(); +} diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index 97e4a4fb3362..71d830d7b923 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -40,7 +40,7 @@ struct sender_context { unsigned int num_fds; int ready_out; int wakefd; - int out_fds[0]; + int out_fds[]; }; struct receiver_context { diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c new file mode 100644 index 000000000000..8d624aea1c5e --- /dev/null +++ b/tools/perf/bench/synthesize.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Benchmark synthesis of perf events such as at the start of a 'perf + * record'. Synthesis is done on the current process and the 'dummy' event + * handlers are invoked that support dump_trace but otherwise do nothing. + * + * Copyright 2019 Google LLC. + */ +#include <stdio.h> +#include "bench.h" +#include "../util/debug.h" +#include "../util/session.h" +#include "../util/stat.h" +#include "../util/synthetic-events.h" +#include "../util/target.h" +#include "../util/thread_map.h" +#include "../util/tool.h" +#include "../util/util.h" +#include <linux/atomic.h> +#include <linux/err.h> +#include <linux/time64.h> +#include <subcmd/parse-options.h> + +static unsigned int min_threads = 1; +static unsigned int max_threads = UINT_MAX; +static unsigned int single_iterations = 10000; +static unsigned int multi_iterations = 10; +static bool run_st; +static bool run_mt; + +static const struct option options[] = { + OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"), + OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"), + OPT_UINTEGER('m', "min-threads", &min_threads, + "Minimum number of threads in multithreaded bench"), + OPT_UINTEGER('M', "max-threads", &max_threads, + "Maximum number of threads in multithreaded bench"), + OPT_UINTEGER('i', "single-iterations", &single_iterations, + "Number of iterations used to compute single-threaded average"), + OPT_UINTEGER('I', "multi-iterations", &multi_iterations, + "Number of iterations used to compute multi-threaded average"), + OPT_END() +}; + +static const char *const bench_usage[] = { + "perf bench internals synthesize <options>", + NULL +}; + +static atomic_t event_count; + +static int process_synthesized_event(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + atomic_inc(&event_count); + return 0; +} + +static int do_run_single_threaded(struct perf_session *session, + struct perf_thread_map *threads, + struct target *target, bool data_mmap) +{ + const unsigned int nr_threads_synthesize = 1; + struct timeval start, end, diff; + u64 runtime_us; + unsigned int i; + double time_average, time_stddev, event_average, event_stddev; + int err; + struct stats time_stats, event_stats; + + init_stats(&time_stats); + init_stats(&event_stats); + + for (i = 0; i < single_iterations; i++) { + atomic_set(&event_count, 0); + gettimeofday(&start, NULL); + err = __machine__synthesize_threads(&session->machines.host, + NULL, + target, threads, + process_synthesized_event, + data_mmap, + nr_threads_synthesize); + if (err) + return err; + + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + update_stats(&time_stats, runtime_us); + update_stats(&event_stats, atomic_read(&event_count)); + } + + time_average = avg_stats(&time_stats); + time_stddev = stddev_stats(&time_stats); + printf(" Average %ssynthesis took: %.3f usec (+- %.3f usec)\n", + data_mmap ? "data " : "", time_average, time_stddev); + + event_average = avg_stats(&event_stats); + event_stddev = stddev_stats(&event_stats); + printf(" Average num. events: %.3f (+- %.3f)\n", + event_average, event_stddev); + + printf(" Average time per event %.3f usec\n", + time_average / event_average); + return 0; +} + +static int run_single_threaded(void) +{ + struct perf_session *session; + struct target target = { + .pid = "self", + }; + struct perf_thread_map *threads; + int err; + + perf_set_singlethreaded(); + session = perf_session__new(NULL, false, NULL); + if (IS_ERR(session)) { + pr_err("Session creation failed.\n"); + return PTR_ERR(session); + } + threads = thread_map__new_by_pid(getpid()); + if (!threads) { + pr_err("Thread map creation failed.\n"); + err = -ENOMEM; + goto err_out; + } + + puts( +"Computing performance of single threaded perf event synthesis by\n" +"synthesizing events on the perf process itself:"); + + err = do_run_single_threaded(session, threads, &target, false); + if (err) + goto err_out; + + err = do_run_single_threaded(session, threads, &target, true); + +err_out: + if (threads) + perf_thread_map__put(threads); + + perf_session__delete(session); + return err; +} + +static int do_run_multi_threaded(struct target *target, + unsigned int nr_threads_synthesize) +{ + struct timeval start, end, diff; + u64 runtime_us; + unsigned int i; + double time_average, time_stddev, event_average, event_stddev; + int err; + struct stats time_stats, event_stats; + struct perf_session *session; + + init_stats(&time_stats); + init_stats(&event_stats); + for (i = 0; i < multi_iterations; i++) { + session = perf_session__new(NULL, false, NULL); + if (!session) + return -ENOMEM; + + atomic_set(&event_count, 0); + gettimeofday(&start, NULL); + err = __machine__synthesize_threads(&session->machines.host, + NULL, + target, NULL, + process_synthesized_event, + false, + nr_threads_synthesize); + if (err) { + perf_session__delete(session); + return err; + } + + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + update_stats(&time_stats, runtime_us); + update_stats(&event_stats, atomic_read(&event_count)); + perf_session__delete(session); + } + + time_average = avg_stats(&time_stats); + time_stddev = stddev_stats(&time_stats); + printf(" Average synthesis took: %.3f usec (+- %.3f usec)\n", + time_average, time_stddev); + + event_average = avg_stats(&event_stats); + event_stddev = stddev_stats(&event_stats); + printf(" Average num. events: %.3f (+- %.3f)\n", + event_average, event_stddev); + + printf(" Average time per event %.3f usec\n", + time_average / event_average); + return 0; +} + +static int run_multi_threaded(void) +{ + struct target target = { + .cpu_list = "0" + }; + unsigned int nr_threads_synthesize; + int err; + + if (max_threads == UINT_MAX) + max_threads = sysconf(_SC_NPROCESSORS_ONLN); + + puts( +"Computing performance of multi threaded perf event synthesis by\n" +"synthesizing events on CPU 0:"); + + for (nr_threads_synthesize = min_threads; + nr_threads_synthesize <= max_threads; + nr_threads_synthesize++) { + if (nr_threads_synthesize == 1) + perf_set_singlethreaded(); + else + perf_set_multithreaded(); + + printf(" Number of synthesis threads: %u\n", + nr_threads_synthesize); + + err = do_run_multi_threaded(&target, nr_threads_synthesize); + if (err) + return err; + } + perf_set_singlethreaded(); + return 0; +} + +int bench_synthesize(int argc, const char **argv) +{ + int err = 0; + + argc = parse_options(argc, argv, options, bench_usage, 0); + if (argc) { + usage_with_options(bench_usage, options); + exit(EXIT_FAILURE); + } + + /* + * If neither single threaded or multi-threaded are specified, default + * to running just single threaded. + */ + if (!run_st && !run_mt) + run_st = true; + + if (run_st) + err = run_single_threaded(); + + if (!err && run_mt) + err = run_multi_threaded(); + + return err; +} diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 6c0a0412502e..4940d10074c3 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -212,11 +212,9 @@ static bool has_annotation(struct perf_annotate *ann) return ui__has_annotation() || ann->use_stdio2; } -static int perf_evsel__add_sample(struct evsel *evsel, - struct perf_sample *sample, - struct addr_location *al, - struct perf_annotate *ann, - struct machine *machine) +static int evsel__add_sample(struct evsel *evsel, struct perf_sample *sample, + struct addr_location *al, struct perf_annotate *ann, + struct machine *machine) { struct hists *hists = evsel__hists(evsel); struct hist_entry *he; @@ -278,7 +276,7 @@ static int process_sample_event(struct perf_tool *tool, goto out_put; if (!al.filtered && - perf_evsel__add_sample(evsel, sample, &al, ann, machine)) { + evsel__add_sample(evsel, sample, &al, ann, machine)) { pr_warning("problem incrementing symbol count, " "skipping event\n"); ret = -1; @@ -433,11 +431,10 @@ static int __cmd_annotate(struct perf_annotate *ann) total_nr_samples += nr_samples; hists__collapse_resort(hists, NULL); /* Don't sort callchain */ - perf_evsel__reset_sample_bit(pos, CALLCHAIN); - perf_evsel__output_resort(pos, NULL); + evsel__reset_sample_bit(pos, CALLCHAIN); + evsel__output_resort(pos, NULL); - if (symbol_conf.event_group && - !perf_evsel__is_group_leader(pos)) + if (symbol_conf.event_group && !evsel__is_group_leader(pos)) continue; hists__find_annotations(hists, pos, ann); diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index c06fe21c8613..cad31b1d3438 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -67,14 +67,20 @@ static struct bench futex_benchmarks[] = { { NULL, NULL, NULL } }; -#ifdef HAVE_EVENTFD +#ifdef HAVE_EVENTFD_SUPPORT static struct bench epoll_benchmarks[] = { { "wait", "Benchmark epoll concurrent epoll_waits", bench_epoll_wait }, { "ctl", "Benchmark epoll concurrent epoll_ctls", bench_epoll_ctl }, { "all", "Run all futex benchmarks", NULL }, { NULL, NULL, NULL } }; -#endif // HAVE_EVENTFD +#endif // HAVE_EVENTFD_SUPPORT + +static struct bench internals_benchmarks[] = { + { "synthesize", "Benchmark perf event synthesis", bench_synthesize }, + { "kallsyms-parse", "Benchmark kallsyms parsing", bench_kallsyms_parse }, + { NULL, NULL, NULL } +}; struct collection { const char *name; @@ -89,9 +95,10 @@ static struct collection collections[] = { { "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks }, #endif {"futex", "Futex stressing benchmarks", futex_benchmarks }, -#ifdef HAVE_EVENTFD +#ifdef HAVE_EVENTFD_SUPPORT {"epoll", "Epoll stressing benchmarks", epoll_benchmarks }, #endif + { "internals", "Perf-internals benchmarks", internals_benchmarks }, { "all", "All benchmarks", NULL }, { NULL, NULL, NULL } }; diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 246ac0b4d54f..d617d5682c68 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -95,6 +95,7 @@ struct perf_c2c { bool use_stdio; bool stats_only; bool symbol_full; + bool stitch_lbr; /* HITM shared clines stats */ struct c2c_stats hitm_stats; @@ -273,6 +274,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, return -1; } + if (c2c.stitch_lbr) + al.thread->lbr_stitch_enable = true; + ret = sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, sysctl_perf_event_max_stack); if (ret) @@ -1705,7 +1709,7 @@ static struct c2c_dimension *get_dimension(const char *name) if (!strcmp(dim->name, name)) return dim; - }; + } return NULL; } @@ -1921,7 +1925,7 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats) FILTER_HITM(tot_hitm); default: break; - }; + } #undef FILTER_HITM @@ -2255,8 +2259,7 @@ static void print_c2c_info(FILE *out, struct perf_session *session) fprintf(out, "=================================================\n"); evlist__for_each_entry(evlist, evsel) { - fprintf(out, "%-36s: %s\n", first ? " Events" : "", - perf_evsel__name(evsel)); + fprintf(out, "%-36s: %s\n", first ? " Events" : "", evsel__name(evsel)); first = false; } fprintf(out, " Cachelines sort on : %s HITMs\n", @@ -2601,6 +2604,12 @@ static int setup_callchain(struct evlist *evlist) } } + if (c2c.stitch_lbr && (mode != CALLCHAIN_LBR)) { + ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n" + "Please apply --call-graph lbr when recording.\n"); + c2c.stitch_lbr = false; + } + callchain_param.record_mode = mode; callchain_param.min_percent = 0; return 0; @@ -2752,6 +2761,8 @@ static int perf_c2c__report(int argc, const char **argv) OPT_STRING('c', "coalesce", &coalesce, "coalesce fields", "coalesce fields: pid,tid,iaddr,dso"), OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), + OPT_BOOLEAN(0, "stitch-lbr", &c2c.stitch_lbr, + "Enable LBR callgraph stitching approach"), OPT_PARENT(c2c_options), OPT_END() }; @@ -2876,8 +2887,15 @@ static int parse_record_events(const struct option *opt, { bool *event_set = (bool *) opt->value; + if (!strcmp(str, "list")) { + perf_mem_events__list(); + exit(0); + } + if (perf_mem_events__parse(str)) + exit(-1); + *event_set = true; - return perf_mem_events__parse(str); + return 0; } @@ -2947,7 +2965,7 @@ static int perf_c2c__record(int argc, const char **argv) rec_argv[i++] = "-e"; rec_argv[i++] = perf_mem_events__name(j); - }; + } if (all_user) rec_argv[i++] = "--all-user"; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index c94a002f295e..f8c9bdd8269a 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -467,7 +467,7 @@ static struct evsel *evsel_match(struct evsel *evsel, struct evsel *e; evlist__for_each_entry(evlist, e) { - if (perf_evsel__match2(evsel, e)) + if (evsel__match2(evsel, e)) return e; } @@ -981,7 +981,7 @@ static void data_process(void) if (!quiet) { fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n", - perf_evsel__name(evsel_base)); + evsel__name(evsel_base)); } first = false; @@ -990,7 +990,7 @@ static void data_process(void) data__fprintf(); /* Don't sort callchain for perf diff */ - perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN); + evsel__reset_sample_bit(evsel_base, CALLCHAIN); hists__process(hists_base); } @@ -1562,7 +1562,7 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair, default: BUG_ON(1); - }; + } } static void diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 440501994931..98e992801251 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -34,7 +34,7 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details return PTR_ERR(session); evlist__for_each_entry(session->evlist, pos) { - perf_evsel__fprintf(pos, details, stdout); + evsel__fprintf(pos, details, stdout); if (pos->core.attr.type == PERF_TYPE_TRACEPOINT) has_tracepoint = true; diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index d5adc417a4ca..2bfc1b0db536 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -45,6 +45,7 @@ struct filter_entry { char name[]; }; +static volatile int workload_exec_errno; static bool done; static void sig_handler(int sig __maybe_unused) @@ -63,7 +64,7 @@ static void ftrace__workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info __maybe_unused, void *ucontext __maybe_unused) { - /* workload_exec_errno = info->si_value.sival_int; */ + workload_exec_errno = info->si_value.sival_int; done = true; } @@ -284,10 +285,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) .events = POLLIN, }; - if (!perf_cap__capable(CAP_SYS_ADMIN)) { + if (!(perf_cap__capable(CAP_PERFMON) || + perf_cap__capable(CAP_SYS_ADMIN))) { pr_err("ftrace only works for %s!\n", #ifdef HAVE_LIBCAP_SUPPORT - "users with the SYS_ADMIN capability" + "users with the CAP_PERFMON or CAP_SYS_ADMIN capability" #else "root" #endif @@ -382,6 +384,14 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) write_tracing_file("tracing_on", "0"); + if (workload_exec_errno) { + const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf)); + /* flush stdout first so below error msg appears at the end. */ + fflush(stdout); + pr_err("workload failed: %s\n", emsg); + goto out_close_fd; + } + /* read remaining buffer contents */ while (true) { int n = read(trace_fd, buf, sizeof(buf)); @@ -396,7 +406,7 @@ out_close_fd: out_reset: reset_tracing_files(ftrace); out: - return done ? 0 : -1; + return (done && !workload_exec_errno) ? 0 : -1; } static int perf_ftrace_config(const char *var, const char *value, void *cb) @@ -493,7 +503,7 @@ int cmd_ftrace(int argc, const char **argv) argc = parse_options(argc, argv, ftrace_options, ftrace_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (!argc && target__none(&ftrace.target)) - usage_with_options(ftrace_usage, ftrace_options); + ftrace.target.system_wide = true; ret = target__validate(&ftrace.target); if (ret) { diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 7e124a7b8bfd..4a6de4b03ac0 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -51,7 +51,7 @@ struct perf_inject { struct event_entry { struct list_head node; u32 tid; - union perf_event event[0]; + union perf_event event[]; }; static int output_bytes(struct perf_inject *inject, void *buf, size_t sz) @@ -536,7 +536,7 @@ static int perf_inject__sched_stat(struct perf_tool *tool, union perf_event *event_sw; struct perf_sample sample_sw; struct perf_inject *inject = container_of(tool, struct perf_inject, tool); - u32 pid = perf_evsel__intval(evsel, sample, "pid"); + u32 pid = evsel__intval(evsel, sample, "pid"); list_for_each_entry(ent, &inject->samples, node) { if (pid == ent->tid) @@ -546,7 +546,7 @@ static int perf_inject__sched_stat(struct perf_tool *tool, return 0; found: event_sw = &ent->event[0]; - perf_evsel__parse_sample(evsel, event_sw, &sample_sw); + evsel__parse_sample(evsel, event_sw, &sample_sw); sample_sw.period = sample->period; sample_sw.time = sample->time; @@ -561,11 +561,10 @@ static void sig_handler(int sig __maybe_unused) session_done = 1; } -static int perf_evsel__check_stype(struct evsel *evsel, - u64 sample_type, const char *sample_msg) +static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg) { struct perf_event_attr *attr = &evsel->core.attr; - const char *name = perf_evsel__name(evsel); + const char *name = evsel__name(evsel); if (!(attr->sample_type & sample_type)) { pr_err("Samples for %s event do not have %s attribute set.", @@ -622,10 +621,10 @@ static int __cmd_inject(struct perf_inject *inject) struct evsel *evsel; evlist__for_each_entry(session->evlist, evsel) { - const char *name = perf_evsel__name(evsel); + const char *name = evsel__name(evsel); if (!strcmp(name, "sched:sched_switch")) { - if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID")) + if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID")) return -EINVAL; evsel->handler = perf_inject__sched_switch; @@ -684,14 +683,14 @@ static int __cmd_inject(struct perf_inject *inject) perf_header__clear_feat(&session->header, HEADER_AUXTRACE); - if (inject->itrace_synth_opts.last_branch) + if (inject->itrace_synth_opts.last_branch || + inject->itrace_synth_opts.add_last_branch) perf_header__set_feat(&session->header, HEADER_BRANCH_STACK); evsel = perf_evlist__id2evsel_strict(session->evlist, inject->aux_id); if (evsel) { - pr_debug("Deleting %s\n", - perf_evsel__name(evsel)); + pr_debug("Deleting %s\n", evsel__name(evsel)); evlist__remove(session->evlist, evsel); evsel__delete(evsel); } diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 003c85f5f56c..38a5ab683ebc 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -169,13 +169,12 @@ static int insert_caller_stat(unsigned long call_site, return 0; } -static int perf_evsel__process_alloc_event(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_alloc_event(struct evsel *evsel, struct perf_sample *sample) { - unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"), - call_site = perf_evsel__intval(evsel, sample, "call_site"); - int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"), - bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc"); + unsigned long ptr = evsel__intval(evsel, sample, "ptr"), + call_site = evsel__intval(evsel, sample, "call_site"); + int bytes_req = evsel__intval(evsel, sample, "bytes_req"), + bytes_alloc = evsel__intval(evsel, sample, "bytes_alloc"); if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) || insert_caller_stat(call_site, bytes_req, bytes_alloc)) @@ -188,14 +187,13 @@ static int perf_evsel__process_alloc_event(struct evsel *evsel, return 0; } -static int perf_evsel__process_alloc_node_event(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_sample *sample) { - int ret = perf_evsel__process_alloc_event(evsel, sample); + int ret = evsel__process_alloc_event(evsel, sample); if (!ret) { int node1 = cpu__get_node(sample->cpu), - node2 = perf_evsel__intval(evsel, sample, "node"); + node2 = evsel__intval(evsel, sample, "node"); if (node1 != node2) nr_cross_allocs++; @@ -232,10 +230,9 @@ static struct alloc_stat *search_alloc_stat(unsigned long ptr, return NULL; } -static int perf_evsel__process_free_event(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_free_event(struct evsel *evsel, struct perf_sample *sample) { - unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"); + unsigned long ptr = evsel__intval(evsel, sample, "ptr"); struct alloc_stat *s_alloc, *s_caller; s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); @@ -784,13 +781,12 @@ static int parse_gfp_flags(struct evsel *evsel, struct perf_sample *sample, return 0; } -static int perf_evsel__process_page_alloc_event(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_page_alloc_event(struct evsel *evsel, struct perf_sample *sample) { u64 page; - unsigned int order = perf_evsel__intval(evsel, sample, "order"); - unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags"); - unsigned int migrate_type = perf_evsel__intval(evsel, sample, + unsigned int order = evsel__intval(evsel, sample, "order"); + unsigned int gfp_flags = evsel__intval(evsel, sample, "gfp_flags"); + unsigned int migrate_type = evsel__intval(evsel, sample, "migratetype"); u64 bytes = kmem_page_size << order; u64 callsite; @@ -802,9 +798,9 @@ static int perf_evsel__process_page_alloc_event(struct evsel *evsel, }; if (use_pfn) - page = perf_evsel__intval(evsel, sample, "pfn"); + page = evsel__intval(evsel, sample, "pfn"); else - page = perf_evsel__intval(evsel, sample, "page"); + page = evsel__intval(evsel, sample, "page"); nr_page_allocs++; total_page_alloc_bytes += bytes; @@ -857,11 +853,10 @@ static int perf_evsel__process_page_alloc_event(struct evsel *evsel, return 0; } -static int perf_evsel__process_page_free_event(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_page_free_event(struct evsel *evsel, struct perf_sample *sample) { u64 page; - unsigned int order = perf_evsel__intval(evsel, sample, "order"); + unsigned int order = evsel__intval(evsel, sample, "order"); u64 bytes = kmem_page_size << order; struct page_stat *pstat; struct page_stat this = { @@ -869,9 +864,9 @@ static int perf_evsel__process_page_free_event(struct evsel *evsel, }; if (use_pfn) - page = perf_evsel__intval(evsel, sample, "pfn"); + page = evsel__intval(evsel, sample, "pfn"); else - page = perf_evsel__intval(evsel, sample, "page"); + page = evsel__intval(evsel, sample, "page"); nr_page_frees++; total_page_free_bytes += bytes; @@ -1371,15 +1366,15 @@ static int __cmd_kmem(struct perf_session *session) struct evsel *evsel; const struct evsel_str_handler kmem_tracepoints[] = { /* slab allocator */ - { "kmem:kmalloc", perf_evsel__process_alloc_event, }, - { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, }, - { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, }, - { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, }, - { "kmem:kfree", perf_evsel__process_free_event, }, - { "kmem:kmem_cache_free", perf_evsel__process_free_event, }, + { "kmem:kmalloc", evsel__process_alloc_event, }, + { "kmem:kmem_cache_alloc", evsel__process_alloc_event, }, + { "kmem:kmalloc_node", evsel__process_alloc_node_event, }, + { "kmem:kmem_cache_alloc_node", evsel__process_alloc_node_event, }, + { "kmem:kfree", evsel__process_free_event, }, + { "kmem:kmem_cache_free", evsel__process_free_event, }, /* page allocator */ - { "kmem:mm_page_alloc", perf_evsel__process_page_alloc_event, }, - { "kmem:mm_page_free", perf_evsel__process_page_free_event, }, + { "kmem:mm_page_alloc", evsel__process_page_alloc_event, }, + { "kmem:mm_page_free", evsel__process_page_free_event, }, }; if (!perf_session__has_traces(session, "kmem record")) @@ -1391,8 +1386,8 @@ static int __cmd_kmem(struct perf_session *session) } evlist__for_each_entry(session->evlist, evsel) { - if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") && - perf_evsel__field(evsel, "pfn")) { + if (!strcmp(evsel__name(evsel), "kmem:mm_page_alloc") && + evsel__field(evsel, "pfn")) { use_pfn = true; break; } diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 577af4f3297a..95a77058023e 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -69,7 +69,7 @@ void exit_event_get_key(struct evsel *evsel, struct event_key *key) { key->info = 0; - key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason); + key->key = evsel__intval(evsel, sample, kvm_exit_reason); } bool kvm_exit_event(struct evsel *evsel) @@ -416,8 +416,7 @@ struct vcpu_event_record *per_vcpu_record(struct thread *thread, return NULL; } - vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, - vcpu_id_str); + vcpu_record->vcpu_id = evsel__intval(evsel, sample, vcpu_id_str); thread__set_priv(thread, vcpu_record); } @@ -1033,16 +1032,16 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm) struct perf_event_attr *attr = &pos->core.attr; /* make sure these *are* set */ - perf_evsel__set_sample_bit(pos, TID); - perf_evsel__set_sample_bit(pos, TIME); - perf_evsel__set_sample_bit(pos, CPU); - perf_evsel__set_sample_bit(pos, RAW); + evsel__set_sample_bit(pos, TID); + evsel__set_sample_bit(pos, TIME); + evsel__set_sample_bit(pos, CPU); + evsel__set_sample_bit(pos, RAW); /* make sure these are *not*; want as small a sample as possible */ - perf_evsel__reset_sample_bit(pos, PERIOD); - perf_evsel__reset_sample_bit(pos, IP); - perf_evsel__reset_sample_bit(pos, CALLCHAIN); - perf_evsel__reset_sample_bit(pos, ADDR); - perf_evsel__reset_sample_bit(pos, READ); + evsel__reset_sample_bit(pos, PERIOD); + evsel__reset_sample_bit(pos, IP); + evsel__reset_sample_bit(pos, CALLCHAIN); + evsel__reset_sample_bit(pos, ADDR); + evsel__reset_sample_bit(pos, READ); attr->mmap = 0; attr->comm = 0; attr->task = 0; diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 965ef017496f..0a7fe4cb5555 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -42,7 +42,7 @@ int cmd_list(int argc, const char **argv) OPT_END() }; const char * const list_usage[] = { - "perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]", + "perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob]", NULL }; diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 474dfd59d7eb..f0a1dbacb46c 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -48,7 +48,7 @@ struct lock_stat { struct rb_node rb; /* used for sorting */ /* - * FIXME: perf_evsel__intval() returns u64, + * FIXME: evsel__intval() returns u64, * so address of lockdep_map should be dealed as 64bit. * Is there more better solution? */ @@ -404,9 +404,9 @@ static int report_lock_acquire_event(struct evsel *evsel, struct lock_stat *ls; struct thread_stat *ts; struct lock_seq_stat *seq; - const char *name = perf_evsel__strval(evsel, sample, "name"); - u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr"); - int flag = perf_evsel__intval(evsel, sample, "flag"); + const char *name = evsel__strval(evsel, sample, "name"); + u64 tmp = evsel__intval(evsel, sample, "lockdep_addr"); + int flag = evsel__intval(evsel, sample, "flag"); memcpy(&addr, &tmp, sizeof(void *)); @@ -477,8 +477,8 @@ static int report_lock_acquired_event(struct evsel *evsel, struct thread_stat *ts; struct lock_seq_stat *seq; u64 contended_term; - const char *name = perf_evsel__strval(evsel, sample, "name"); - u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr"); + const char *name = evsel__strval(evsel, sample, "name"); + u64 tmp = evsel__intval(evsel, sample, "lockdep_addr"); memcpy(&addr, &tmp, sizeof(void *)); @@ -539,8 +539,8 @@ static int report_lock_contended_event(struct evsel *evsel, struct lock_stat *ls; struct thread_stat *ts; struct lock_seq_stat *seq; - const char *name = perf_evsel__strval(evsel, sample, "name"); - u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr"); + const char *name = evsel__strval(evsel, sample, "name"); + u64 tmp = evsel__intval(evsel, sample, "lockdep_addr"); memcpy(&addr, &tmp, sizeof(void *)); @@ -594,8 +594,8 @@ static int report_lock_release_event(struct evsel *evsel, struct lock_stat *ls; struct thread_stat *ts; struct lock_seq_stat *seq; - const char *name = perf_evsel__strval(evsel, sample, "name"); - u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr"); + const char *name = evsel__strval(evsel, sample, "name"); + u64 tmp = evsel__intval(evsel, sample, "lockdep_addr"); memcpy(&addr, &tmp, sizeof(void *)); @@ -657,32 +657,28 @@ static struct trace_lock_handler report_lock_ops = { static struct trace_lock_handler *trace_handler; -static int perf_evsel__process_lock_acquire(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample) { if (trace_handler->acquire_event) return trace_handler->acquire_event(evsel, sample); return 0; } -static int perf_evsel__process_lock_acquired(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample) { if (trace_handler->acquired_event) return trace_handler->acquired_event(evsel, sample); return 0; } -static int perf_evsel__process_lock_contended(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample) { if (trace_handler->contended_event) return trace_handler->contended_event(evsel, sample); return 0; } -static int perf_evsel__process_lock_release(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample) { if (trace_handler->release_event) return trace_handler->release_event(evsel, sample); @@ -775,7 +771,7 @@ static void dump_threads(void) pr_info("%10d: %s\n", st->tid, thread__comm_str(t)); node = rb_next(node); thread__put(t); - }; + } } static void dump_map(void) @@ -849,10 +845,10 @@ static void sort_result(void) } static const struct evsel_str_handler lock_tracepoints[] = { - { "lock:lock_acquire", perf_evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ - { "lock:lock_acquired", perf_evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ - { "lock:lock_contended", perf_evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ - { "lock:lock_release", perf_evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ + { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ + { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ }; static bool force; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index a13f5817d6fc..3523279af6af 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -38,26 +38,16 @@ static int parse_record_events(const struct option *opt, const char *str, int unset __maybe_unused) { struct perf_mem *mem = *(struct perf_mem **)opt->value; - int j; - if (strcmp(str, "list")) { - if (!perf_mem_events__parse(str)) { - mem->operation = 0; - return 0; - } - exit(-1); + if (!strcmp(str, "list")) { + perf_mem_events__list(); + exit(0); } + if (perf_mem_events__parse(str)) + exit(-1); - for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - struct perf_mem_event *e = &perf_mem_events[j]; - - fprintf(stderr, "%-13s%-*s%s\n", - e->tag, - verbose > 0 ? 25 : 0, - verbose > 0 ? perf_mem_events__name(j) : "", - e->supported ? ": available" : ""); - } - exit(0); + mem->operation = 0; + return 0; } static const char * const __usage[] = { @@ -123,7 +113,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = "-e"; rec_argv[i++] = perf_mem_events__name(j); - }; + } if (all_user) rec_argv[i++] = "--all-user"; diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 70548df2abb9..6b1507566770 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -364,6 +364,9 @@ static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs) for (k = 0; k < pev->ntevs; k++) { struct probe_trace_event *tev = &pev->tevs[k]; + /* Skipped events have no event name */ + if (!tev->event) + continue; /* We use tev's name for showing new events */ show_perf_probe_event(tev->group, tev->event, pev, diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1ab349abe904..e108d90ae2ed 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -34,6 +34,7 @@ #include "util/tsc.h" #include "util/parse-branch-options.h" #include "util/parse-regs-options.h" +#include "util/perf_api_probe.h" #include "util/llvm-utils.h" #include "util/bpf-loader.h" #include "util/trigger.h" @@ -43,6 +44,8 @@ #include "util/time-utils.h" #include "util/units.h" #include "util/bpf-event.h" +#include "util/util.h" +#include "util/pfm.h" #include "asm/bug.h" #include "perf.h" @@ -50,9 +53,13 @@ #include <inttypes.h> #include <locale.h> #include <poll.h> +#include <pthread.h> #include <unistd.h> #include <sched.h> #include <signal.h> +#ifdef HAVE_EVENTFD_SUPPORT +#include <sys/eventfd.h> +#endif #include <sys/mman.h> #include <sys/wait.h> #include <sys/types.h> @@ -84,7 +91,10 @@ struct record { struct auxtrace_record *itr; struct evlist *evlist; struct perf_session *session; + struct evlist *sb_evlist; + pthread_t thread_id; int realtime_prio; + bool switch_output_event_set; bool no_buildid; bool no_buildid_set; bool no_buildid_cache; @@ -503,6 +513,20 @@ static int process_synthesized_event(struct perf_tool *tool, return record__write(rec, NULL, event, event->header.size); } +static int process_locked_synthesized_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER; + int ret; + + pthread_mutex_lock(&synth_lock); + ret = process_synthesized_event(tool, event, sample, machine); + pthread_mutex_unlock(&synth_lock); + return ret; +} + static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) { struct record *rec = to; @@ -518,6 +542,9 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) static volatile int signr = -1; static volatile int child_finished; +#ifdef HAVE_EVENTFD_SUPPORT +static int done_fd = -1; +#endif static void sig_handler(int sig) { @@ -527,6 +554,21 @@ static void sig_handler(int sig) signr = sig; done = 1; +#ifdef HAVE_EVENTFD_SUPPORT +{ + u64 tmp = 1; + /* + * It is possible for this signal handler to run after done is checked + * in the main loop, but before the perf counter fds are polled. If this + * happens, the poll() will continue to wait even though done is set, + * and will only break out if either another signal is received, or the + * counters are ready for read. To ensure the poll() doesn't sleep when + * done is set, use an eventfd (done_fd) to wake up the poll(). + */ + if (write(done_fd, &tmp, sizeof(tmp)) < 0) + pr_err("failed to signal wakeup fd, error: %m\n"); +} +#endif // HAVE_EVENTFD_SUPPORT } static void sigsegv_handler(int sig) @@ -805,19 +847,28 @@ static int record__open(struct record *rec) int rc = 0; /* - * For initial_delay we need to add a dummy event so that we can track - * PERF_RECORD_MMAP while we wait for the initial delay to enable the - * real events, the ones asked by the user. + * For initial_delay or system wide, we need to add a dummy event so + * that we can track PERF_RECORD_MMAP to cover the delay of waiting or + * event synthesis. */ - if (opts->initial_delay) { + if (opts->initial_delay || target__has_cpu(&opts->target)) { if (perf_evlist__add_dummy(evlist)) return -ENOMEM; + /* Disable tracking of mmaps on lead event. */ pos = evlist__first(evlist); pos->tracking = 0; + /* Set up dummy event. */ pos = evlist__last(evlist); pos->tracking = 1; - pos->core.attr.enable_on_exec = 1; + /* + * Enable the dummy event when the process is forked for + * initial_delay, immediately for system wide. + */ + if (opts->initial_delay) + pos->core.attr.enable_on_exec = 1; + else + pos->immediate = 1; } perf_evlist__config(evlist, opts, &callchain_param); @@ -825,7 +876,7 @@ static int record__open(struct record *rec) evlist__for_each_entry(evlist, pos) { try_again: if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { - if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { + if (evsel__fallback(pos, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); goto try_again; @@ -837,8 +888,7 @@ try_again: goto try_again; } rc = -errno; - perf_evsel__open_strerror(pos, &opts->target, - errno, msg, sizeof(msg)); + evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); ui__error("%s\n", msg); goto out; } @@ -859,7 +909,7 @@ try_again: if (perf_evlist__apply_filters(evlist, &pos)) { pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", - pos->filter, perf_evsel__name(pos), errno, + pos->filter, evsel__name(pos), errno, str_error_r(errno, msg, sizeof(msg))); rc = -1; goto out; @@ -1288,6 +1338,7 @@ static int record__synthesize(struct record *rec, bool tail) struct perf_tool *tool = &rec->tool; int fd = perf_data__fd(data); int err = 0; + event_op f = process_synthesized_event; if (rec->opts.tail_synthesize != tail) return 0; @@ -1402,13 +1453,67 @@ static int record__synthesize(struct record *rec, bool tail) if (err < 0) pr_warning("Couldn't synthesize cgroup events.\n"); + if (rec->opts.nr_threads_synthesize > 1) { + perf_set_multithreaded(); + f = process_locked_synthesized_event; + } + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads, - process_synthesized_event, opts->sample_address, - 1); + f, opts->sample_address, + rec->opts.nr_threads_synthesize); + + if (rec->opts.nr_threads_synthesize > 1) + perf_set_singlethreaded(); + out: return err; } +static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) +{ + struct record *rec = data; + pthread_kill(rec->thread_id, SIGUSR2); + return 0; +} + +static int record__setup_sb_evlist(struct record *rec) +{ + struct record_opts *opts = &rec->opts; + + if (rec->sb_evlist != NULL) { + /* + * We get here if --switch-output-event populated the + * sb_evlist, so associate a callback that will send a SIGUSR2 + * to the main thread. + */ + evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); + rec->thread_id = pthread_self(); + } + + if (!opts->no_bpf_event) { + if (rec->sb_evlist == NULL) { + rec->sb_evlist = evlist__new(); + + if (rec->sb_evlist == NULL) { + pr_err("Couldn't create side band evlist.\n."); + return -1; + } + } + + if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { + pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); + return -1; + } + } + + if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { + pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); + opts->no_bpf_event = true; + } + + return 0; +} + static int __cmd_record(struct record *rec, int argc, const char **argv) { int err; @@ -1420,7 +1525,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) struct perf_data *data = &rec->data; struct perf_session *session; bool disabled = false, draining = false; - struct evlist *sb_evlist = NULL; int fd; float ratio = 0; @@ -1465,6 +1569,20 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) pr_err("Compression initialization failed.\n"); return -1; } +#ifdef HAVE_EVENTFD_SUPPORT + done_fd = eventfd(0, EFD_NONBLOCK); + if (done_fd < 0) { + pr_err("Failed to create wakeup eventfd, error: %m\n"); + status = -1; + goto out_delete_session; + } + err = evlist__add_pollfd(rec->evlist, done_fd); + if (err < 0) { + pr_err("Failed to add wakeup eventfd to poll list\n"); + status = err; + goto out_delete_session; + } +#endif // HAVE_EVENTFD_SUPPORT session->header.env.comp_type = PERF_COMP_ZSTD; session->header.env.comp_level = rec->opts.comp_level; @@ -1546,21 +1664,17 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) goto out_child; } + err = -1; if (!rec->no_buildid && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { pr_err("Couldn't generate buildids. " "Use --no-buildid to profile anyway.\n"); - err = -1; goto out_child; } - if (!opts->no_bpf_event) - bpf_event__add_sb_event(&sb_evlist, &session->header.env); - - if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) { - pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); - opts->no_bpf_event = true; - } + err = record__setup_sb_evlist(rec); + if (err) + goto out_child; err = record__synthesize(rec, false); if (err < 0) @@ -1827,11 +1941,15 @@ out_child: } out_delete_session: +#ifdef HAVE_EVENTFD_SUPPORT + if (done_fd >= 0) + close(done_fd); +#endif zstd_fini(&session->zstd_data); perf_session__delete(session); if (!opts->no_bpf_event) - perf_evlist__stop_sb_thread(sb_evlist); + perf_evlist__stop_sb_thread(rec->sb_evlist); return status; } @@ -2142,10 +2260,19 @@ static int switch_output_setup(struct record *rec) }; unsigned long val; + /* + * If we're using --switch-output-events, then we imply its + * --switch-output=signal, as we'll send a SIGUSR2 from the side band + * thread to its parent. + */ + if (rec->switch_output_event_set) + goto do_signal; + if (!s->set) return 0; if (!strcmp(s->str, "signal")) { +do_signal: s->signal = true; pr_debug("switch-output with SIGUSR2 signal\n"); goto enabled; @@ -2232,6 +2359,7 @@ static struct record record = { .default_per_cpu = true, }, .mmap_flush = MMAP_FLUSH_DEFAULT, + .nr_threads_synthesize = 1, }, .tool = { .sample = process_sample_event, @@ -2374,8 +2502,9 @@ static struct option __record_options[] = { "Record namespaces events"), OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, "Record cgroup events"), - OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, - "Record context switch events"), + OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, + &record.opts.record_switch_events_set, + "Record context switch events"), OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, "Configure all used events to run in kernel space.", PARSE_OPT_EXCLUSIVE), @@ -2402,6 +2531,9 @@ static struct option __record_options[] = { &record.switch_output.set, "signal or size[BKMG] or time[smhd]", "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", "signal"), + OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event", + "switch output event selector. use 'perf list' to list available events", + parse_events_option_new_evlist), OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, "Limit number of switch output generated files"), OPT_BOOLEAN(0, "dry-run", &dry_run, @@ -2421,6 +2553,14 @@ static struct option __record_options[] = { #endif OPT_CALLBACK(0, "max-size", &record.output_max_size, "size", "Limit the maximum size of the output file", parse_output_max_size), + OPT_UINTEGER(0, "num-thread-synthesize", + &record.opts.nr_threads_synthesize, + "number of threads to run for event synthesis"), +#ifdef HAVE_LIBPFM + OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", + "libpfm4 event selector. use 'perf list' to list available events", + parse_libpfm_events_option), +#endif OPT_END() }; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 26d8fc27e427..b63b3fb2de70 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -47,7 +47,6 @@ #include "util/time-utils.h" #include "util/auxtrace.h" #include "util/units.h" -#include "util/branch.h" #include "util/util.h" // perf_tip() #include "ui/ui.h" #include "ui/progress.h" @@ -84,6 +83,7 @@ struct report { bool header_only; bool nonany_branch_mode; bool group_set; + bool stitch_lbr; int max_stack; struct perf_read_values show_threads_values; struct annotation_options annotation_opts; @@ -267,6 +267,9 @@ static int process_sample_event(struct perf_tool *tool, return -1; } + if (rep->stitch_lbr) + al.thread->lbr_stitch_enable = true; + if (symbol_conf.hide_unresolved && al.sym == NULL) goto out_put; @@ -317,7 +320,7 @@ static int process_read_event(struct perf_tool *tool, struct report *rep = container_of(tool, struct report, tool); if (rep->show_threads) { - const char *name = perf_evsel__name(evsel); + const char *name = evsel__name(evsel); int err = perf_read_values_add_value(&rep->show_threads_values, event->read.pid, event->read.tid, evsel->idx, @@ -339,12 +342,14 @@ static int report__setup_sample_type(struct report *rep) bool is_pipe = perf_data__is_pipe(session->data); if (session->itrace_synth_opts->callchain || + session->itrace_synth_opts->add_callchain || (!is_pipe && perf_header__has_feat(&session->header, HEADER_AUXTRACE) && !session->itrace_synth_opts->set)) sample_type |= PERF_SAMPLE_CALLCHAIN; - if (session->itrace_synth_opts->last_branch) + if (session->itrace_synth_opts->last_branch || + session->itrace_synth_opts->add_last_branch) sample_type |= PERF_SAMPLE_BRANCH_STACK; if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { @@ -396,15 +401,12 @@ static int report__setup_sample_type(struct report *rep) } } - if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { - if ((sample_type & PERF_SAMPLE_REGS_USER) && - (sample_type & PERF_SAMPLE_STACK_USER)) { - callchain_param.record_mode = CALLCHAIN_DWARF; - dwarf_callchain_users = true; - } else if (sample_type & PERF_SAMPLE_BRANCH_STACK) - callchain_param.record_mode = CALLCHAIN_LBR; - else - callchain_param.record_mode = CALLCHAIN_FP; + callchain_param_setup(sample_type); + + if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) { + ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n" + "Please apply --call-graph lbr when recording.\n"); + rep->stitch_lbr = false; } /* ??? handle more cases than just ANY? */ @@ -447,10 +449,10 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report nr_events = hists->stats.total_non_filtered_period; } - if (perf_evsel__is_group_event(evsel)) { + if (evsel__is_group_event(evsel)) { struct evsel *pos; - perf_evsel__group_desc(evsel, buf, size); + evsel__group_desc(evsel, buf, size); evname = buf; for_each_group_member(pos, evsel) { @@ -525,10 +527,9 @@ static int perf_evlist__tty_browse_hists(struct evlist *evlist, evlist__for_each_entry(evlist, pos) { struct hists *hists = evsel__hists(pos); - const char *evname = perf_evsel__name(pos); + const char *evname = evsel__name(pos); - if (symbol_conf.event_group && - !perf_evsel__is_group_leader(pos)) + if (symbol_conf.event_group && !evsel__is_group_leader(pos)) continue; hists__fprintf_nr_sample_events(hists, rep, evname, stdout); @@ -670,8 +671,7 @@ static int report__collapse_hists(struct report *rep) break; /* Non-group events are considered as leader */ - if (symbol_conf.event_group && - !perf_evsel__is_group_leader(pos)) { + if (symbol_conf.event_group && !evsel__is_group_leader(pos)) { struct hists *leader_hists = evsel__hists(pos->leader); hists__match(leader_hists, hists); @@ -706,8 +706,7 @@ static void report__output_resort(struct report *rep) ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); evlist__for_each_entry(rep->session->evlist, pos) { - perf_evsel__output_resort_cb(pos, &prog, - hists__resort_cb, rep); + evsel__output_resort_cb(pos, &prog, hists__resort_cb, rep); } ui_progress__finish(); @@ -1080,6 +1079,26 @@ parse_percent_limit(const struct option *opt, const char *str, return 0; } +static int process_attr(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct evlist **pevlist) +{ + u64 sample_type; + int err; + + err = perf_event__process_attr(tool, event, pevlist); + if (err) + return err; + + /* + * Check if we need to enable callchains based + * on events sample_type. + */ + sample_type = perf_evlist__combined_sample_type(*pevlist); + callchain_param_setup(sample_type); + return 0; +} + int cmd_report(int argc, const char **argv) { struct perf_session *session; @@ -1110,7 +1129,7 @@ int cmd_report(int argc, const char **argv) .fork = perf_event__process_fork, .lost = perf_event__process_lost, .read = process_read_event, - .attr = perf_event__process_attr, + .attr = process_attr, .tracing_data = perf_event__process_tracing_data, .build_id = perf_event__process_build_id, .id_index = perf_event__process_id_index, @@ -1257,6 +1276,8 @@ int cmd_report(int argc, const char **argv) "Show full source file name path for source lines"), OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph, "Show callgraph from reference event"), + OPT_BOOLEAN(0, "stitch-lbr", &report.stitch_lbr, + "Enable LBR callgraph stitching approach"), OPT_INTEGER(0, "socket-filter", &report.socket_filter, "only show processor socket that match with this filter"), OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace, @@ -1332,7 +1353,7 @@ int cmd_report(int argc, const char **argv) if (symbol_conf.cumulate_callchain && !callchain_param.order_set) callchain_param.order = ORDER_CALLER; - if (itrace_synth_opts.callchain && + if ((itrace_synth_opts.callchain || itrace_synth_opts.add_callchain) && (int)itrace_synth_opts.callchain_sz > report.max_stack) report.max_stack = itrace_synth_opts.callchain_sz; @@ -1380,7 +1401,7 @@ repeat: goto error; } - if (itrace_synth_opts.last_branch) + if (itrace_synth_opts.last_branch || itrace_synth_opts.add_last_branch) has_br_stack = true; if (has_br_stack && branch_call_mode) @@ -1400,7 +1421,7 @@ repeat: } if (branch_call_mode) { callchain_param.key = CCKEY_ADDRESS; - callchain_param.branch_callstack = 1; + callchain_param.branch_callstack = true; symbol_conf.use_callchain = true; callchain_register_param(&callchain_param); if (sort_order == NULL) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 82fcc2c15fe4..459e4229945e 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -811,8 +811,8 @@ replay_wakeup_event(struct perf_sched *sched, struct evsel *evsel, struct perf_sample *sample, struct machine *machine __maybe_unused) { - const char *comm = perf_evsel__strval(evsel, sample, "comm"); - const u32 pid = perf_evsel__intval(evsel, sample, "pid"); + const char *comm = evsel__strval(evsel, sample, "comm"); + const u32 pid = evsel__intval(evsel, sample, "pid"); struct task_desc *waker, *wakee; if (verbose > 0) { @@ -833,11 +833,11 @@ static int replay_switch_event(struct perf_sched *sched, struct perf_sample *sample, struct machine *machine __maybe_unused) { - const char *prev_comm = perf_evsel__strval(evsel, sample, "prev_comm"), - *next_comm = perf_evsel__strval(evsel, sample, "next_comm"); - const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), - next_pid = perf_evsel__intval(evsel, sample, "next_pid"); - const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); + const char *prev_comm = evsel__strval(evsel, sample, "prev_comm"), + *next_comm = evsel__strval(evsel, sample, "next_comm"); + const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), + next_pid = evsel__intval(evsel, sample, "next_pid"); + const u64 prev_state = evsel__intval(evsel, sample, "prev_state"); struct task_desc *prev, __maybe_unused *next; u64 timestamp0, timestamp = sample->time; int cpu = sample->cpu; @@ -1106,9 +1106,9 @@ static int latency_switch_event(struct perf_sched *sched, struct perf_sample *sample, struct machine *machine) { - const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), - next_pid = perf_evsel__intval(evsel, sample, "next_pid"); - const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); + const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), + next_pid = evsel__intval(evsel, sample, "next_pid"); + const u64 prev_state = evsel__intval(evsel, sample, "prev_state"); struct work_atoms *out_events, *in_events; struct thread *sched_out, *sched_in; u64 timestamp0, timestamp = sample->time; @@ -1176,8 +1176,8 @@ static int latency_runtime_event(struct perf_sched *sched, struct perf_sample *sample, struct machine *machine) { - const u32 pid = perf_evsel__intval(evsel, sample, "pid"); - const u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); + const u32 pid = evsel__intval(evsel, sample, "pid"); + const u64 runtime = evsel__intval(evsel, sample, "runtime"); struct thread *thread = machine__findnew_thread(machine, -1, pid); struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); u64 timestamp = sample->time; @@ -1211,7 +1211,7 @@ static int latency_wakeup_event(struct perf_sched *sched, struct perf_sample *sample, struct machine *machine) { - const u32 pid = perf_evsel__intval(evsel, sample, "pid"); + const u32 pid = evsel__intval(evsel, sample, "pid"); struct work_atoms *atoms; struct work_atom *atom; struct thread *wakee; @@ -1272,7 +1272,7 @@ static int latency_migrate_task_event(struct perf_sched *sched, struct perf_sample *sample, struct machine *machine) { - const u32 pid = perf_evsel__intval(evsel, sample, "pid"); + const u32 pid = evsel__intval(evsel, sample, "pid"); u64 timestamp = sample->time; struct work_atoms *atoms; struct work_atom *atom; @@ -1526,7 +1526,7 @@ map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, struct perf_sample *sample, struct machine *machine) { - const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); + const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); struct thread *sched_in; struct thread_runtime *tr; int new_shortname; @@ -1670,8 +1670,8 @@ static int process_sched_switch_event(struct perf_tool *tool, { struct perf_sched *sched = container_of(tool, struct perf_sched, tool); int this_cpu = sample->cpu, err = 0; - u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), - next_pid = perf_evsel__intval(evsel, sample, "next_pid"); + u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), + next_pid = evsel__intval(evsel, sample, "next_pid"); if (sched->curr_pid[this_cpu] != (u32)-1) { /* @@ -1848,7 +1848,7 @@ static inline void print_sched_time(unsigned long long nsecs, int width) * returns runtime data for event, allocating memory for it the * first time it is used. */ -static struct evsel_runtime *perf_evsel__get_runtime(struct evsel *evsel) +static struct evsel_runtime *evsel__get_runtime(struct evsel *evsel) { struct evsel_runtime *r = evsel->priv; @@ -1863,10 +1863,9 @@ static struct evsel_runtime *perf_evsel__get_runtime(struct evsel *evsel) /* * save last time event was seen per cpu */ -static void perf_evsel__save_time(struct evsel *evsel, - u64 timestamp, u32 cpu) +static void evsel__save_time(struct evsel *evsel, u64 timestamp, u32 cpu) { - struct evsel_runtime *r = perf_evsel__get_runtime(evsel); + struct evsel_runtime *r = evsel__get_runtime(evsel); if (r == NULL) return; @@ -1890,9 +1889,9 @@ static void perf_evsel__save_time(struct evsel *evsel, } /* returns last time this event was seen on the given cpu */ -static u64 perf_evsel__get_time(struct evsel *evsel, u32 cpu) +static u64 evsel__get_time(struct evsel *evsel, u32 cpu) { - struct evsel_runtime *r = perf_evsel__get_runtime(evsel); + struct evsel_runtime *r = evsel__get_runtime(evsel); if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu)) return 0; @@ -2004,8 +2003,8 @@ static void timehist_print_sample(struct perf_sched *sched, u64 t, int state) { struct thread_runtime *tr = thread__priv(thread); - const char *next_comm = perf_evsel__strval(evsel, sample, "next_comm"); - const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); + const char *next_comm = evsel__strval(evsel, sample, "next_comm"); + const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); u32 max_cpus = sched->max_cpu + 1; char tstr[64]; char nstr[30]; @@ -2136,8 +2135,8 @@ static bool is_idle_sample(struct perf_sample *sample, struct evsel *evsel) { /* pid 0 == swapper == idle task */ - if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) - return perf_evsel__intval(evsel, sample, "prev_pid") == 0; + if (strcmp(evsel__name(evsel), "sched:sched_switch") == 0) + return evsel__intval(evsel, sample, "prev_pid") == 0; return sample->pid == 0; } @@ -2334,7 +2333,7 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, itr->last_thread = thread; /* copy task callchain when entering to idle */ - if (perf_evsel__intval(evsel, sample, "next_pid") == 0) + if (evsel__intval(evsel, sample, "next_pid") == 0) save_idle_callchain(sched, itr, sample); } } @@ -2355,10 +2354,10 @@ static bool timehist_skip_sample(struct perf_sched *sched, } if (sched->idle_hist) { - if (strcmp(perf_evsel__name(evsel), "sched:sched_switch")) + if (strcmp(evsel__name(evsel), "sched:sched_switch")) rc = true; - else if (perf_evsel__intval(evsel, sample, "prev_pid") != 0 && - perf_evsel__intval(evsel, sample, "next_pid") != 0) + else if (evsel__intval(evsel, sample, "prev_pid") != 0 && + evsel__intval(evsel, sample, "next_pid") != 0) rc = true; } @@ -2409,7 +2408,7 @@ static int timehist_sched_wakeup_event(struct perf_tool *tool, struct thread *thread; struct thread_runtime *tr = NULL; /* want pid of awakened task not pid in sample */ - const u32 pid = perf_evsel__intval(evsel, sample, "pid"); + const u32 pid = evsel__intval(evsel, sample, "pid"); thread = machine__findnew_thread(machine, 0, pid); if (thread == NULL) @@ -2445,8 +2444,8 @@ static void timehist_print_migration_event(struct perf_sched *sched, return; max_cpus = sched->max_cpu + 1; - ocpu = perf_evsel__intval(evsel, sample, "orig_cpu"); - dcpu = perf_evsel__intval(evsel, sample, "dest_cpu"); + ocpu = evsel__intval(evsel, sample, "orig_cpu"); + dcpu = evsel__intval(evsel, sample, "dest_cpu"); thread = machine__findnew_thread(machine, sample->pid, sample->tid); if (thread == NULL) @@ -2493,7 +2492,7 @@ static int timehist_migrate_task_event(struct perf_tool *tool, struct thread *thread; struct thread_runtime *tr = NULL; /* want pid of migrated task not pid in sample */ - const u32 pid = perf_evsel__intval(evsel, sample, "pid"); + const u32 pid = evsel__intval(evsel, sample, "pid"); thread = machine__findnew_thread(machine, 0, pid); if (thread == NULL) @@ -2524,8 +2523,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, struct thread_runtime *tr = NULL; u64 tprev, t = sample->time; int rc = 0; - int state = perf_evsel__intval(evsel, sample, "prev_state"); - + int state = evsel__intval(evsel, sample, "prev_state"); if (machine__resolve(machine, &al, sample) < 0) { pr_err("problem processing %d event. skipping it\n", @@ -2549,7 +2547,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, goto out; } - tprev = perf_evsel__get_time(evsel, sample->cpu); + tprev = evsel__get_time(evsel, sample->cpu); /* * If start time given: @@ -2632,7 +2630,7 @@ out: tr->ready_to_run = 0; } - perf_evsel__save_time(evsel, sample->time, sample->cpu); + evsel__save_time(evsel, sample->time, sample->cpu); return rc; } @@ -2942,7 +2940,7 @@ static int timehist_check_attr(struct perf_sched *sched, struct evsel_runtime *er; list_for_each_entry(evsel, &evlist->core.entries, core.node) { - er = perf_evsel__get_runtime(evsel); + er = evsel__get_runtime(evsel); if (er == NULL) { pr_err("Failed to allocate memory for evsel runtime data\n"); return -1; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 1f57a7ecdf3d..5da243676f12 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -167,6 +167,7 @@ static struct { u64 fields; u64 invalid_fields; u64 user_set_fields; + u64 user_unset_fields; } output[OUTPUT_TYPE_MAX] = { [PERF_TYPE_HARDWARE] = { @@ -273,7 +274,7 @@ static struct evsel_script *perf_evsel_script__new(struct evsel *evsel, struct evsel_script *es = zalloc(sizeof(*es)); if (es != NULL) { - if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0) + if (asprintf(&es->filename, "%s.%s.dump", data->file.path, evsel__name(evsel)) < 0) goto out_free; es->fp = fopen(es->filename, "w"); if (es->fp == NULL) @@ -351,10 +352,8 @@ static const char *output_field2str(enum perf_output_field field) #define PRINT_FIELD(x) (output[output_type(attr->type)].fields & PERF_OUTPUT_##x) -static int perf_evsel__do_check_stype(struct evsel *evsel, - u64 sample_type, const char *sample_msg, - enum perf_output_field field, - bool allow_user_set) +static int evsel__do_check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg, + enum perf_output_field field, bool allow_user_set) { struct perf_event_attr *attr = &evsel->core.attr; int type = output_type(attr->type); @@ -366,7 +365,7 @@ static int perf_evsel__do_check_stype(struct evsel *evsel, if (output[type].user_set_fields & field) { if (allow_user_set) return 0; - evname = perf_evsel__name(evsel); + evname = evsel__name(evsel); pr_err("Samples for '%s' event do not have %s attribute set. " "Cannot print '%s' field.\n", evname, sample_msg, output_field2str(field)); @@ -375,7 +374,7 @@ static int perf_evsel__do_check_stype(struct evsel *evsel, /* user did not ask for it explicitly so remove from the default list */ output[type].fields &= ~field; - evname = perf_evsel__name(evsel); + evname = evsel__name(evsel); pr_debug("Samples for '%s' event do not have %s attribute set. " "Skipping '%s' field.\n", evname, sample_msg, output_field2str(field)); @@ -383,16 +382,13 @@ static int perf_evsel__do_check_stype(struct evsel *evsel, return 0; } -static int perf_evsel__check_stype(struct evsel *evsel, - u64 sample_type, const char *sample_msg, - enum perf_output_field field) +static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg, + enum perf_output_field field) { - return perf_evsel__do_check_stype(evsel, sample_type, sample_msg, field, - false); + return evsel__do_check_stype(evsel, sample_type, sample_msg, field, false); } -static int perf_evsel__check_attr(struct evsel *evsel, - struct perf_session *session) +static int perf_evsel__check_attr(struct evsel *evsel, struct perf_session *session) { struct perf_event_attr *attr = &evsel->core.attr; bool allow_user_set; @@ -404,32 +400,28 @@ static int perf_evsel__check_attr(struct evsel *evsel, HEADER_AUXTRACE); if (PRINT_FIELD(TRACE) && - !perf_session__has_traces(session, "record -R")) + !perf_session__has_traces(session, "record -R")) return -EINVAL; if (PRINT_FIELD(IP)) { - if (perf_evsel__check_stype(evsel, PERF_SAMPLE_IP, "IP", - PERF_OUTPUT_IP)) + if (evsel__check_stype(evsel, PERF_SAMPLE_IP, "IP", PERF_OUTPUT_IP)) return -EINVAL; } if (PRINT_FIELD(ADDR) && - perf_evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR", - PERF_OUTPUT_ADDR, allow_user_set)) + evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR", PERF_OUTPUT_ADDR, allow_user_set)) return -EINVAL; if (PRINT_FIELD(DATA_SRC) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", - PERF_OUTPUT_DATA_SRC)) + evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", PERF_OUTPUT_DATA_SRC)) return -EINVAL; if (PRINT_FIELD(WEIGHT) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT", - PERF_OUTPUT_WEIGHT)) + evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT", PERF_OUTPUT_WEIGHT)) return -EINVAL; if (PRINT_FIELD(SYM) && - !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) { + !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) { pr_err("Display of symbols requested but neither sample IP nor " "sample address\navailable. Hence, no addresses to convert " "to symbols.\n"); @@ -441,7 +433,7 @@ static int perf_evsel__check_attr(struct evsel *evsel, return -EINVAL; } if (PRINT_FIELD(DSO) && - !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) { + !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) { pr_err("Display of DSO requested but no address to convert.\n"); return -EINVAL; } @@ -458,33 +450,27 @@ static int perf_evsel__check_attr(struct evsel *evsel, return -EINVAL; } if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", - PERF_OUTPUT_TID|PERF_OUTPUT_PID)) + evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", PERF_OUTPUT_TID|PERF_OUTPUT_PID)) return -EINVAL; if (PRINT_FIELD(TIME) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_TIME, "TIME", - PERF_OUTPUT_TIME)) + evsel__check_stype(evsel, PERF_SAMPLE_TIME, "TIME", PERF_OUTPUT_TIME)) return -EINVAL; if (PRINT_FIELD(CPU) && - perf_evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU", - PERF_OUTPUT_CPU, allow_user_set)) + evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU", PERF_OUTPUT_CPU, allow_user_set)) return -EINVAL; if (PRINT_FIELD(IREGS) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", - PERF_OUTPUT_IREGS)) + evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", PERF_OUTPUT_IREGS)) return -EINVAL; if (PRINT_FIELD(UREGS) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_USER, "UREGS", - PERF_OUTPUT_UREGS)) + evsel__check_stype(evsel, PERF_SAMPLE_REGS_USER, "UREGS", PERF_OUTPUT_UREGS)) return -EINVAL; if (PRINT_FIELD(PHYS_ADDR) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", - PERF_OUTPUT_PHYS_ADDR)) + evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR)) return -EINVAL; return 0; @@ -604,8 +590,6 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val); } - fprintf(fp, "\n"); - return printed; } @@ -1697,6 +1681,7 @@ struct perf_script { bool show_cgroup_events; bool allocated; bool per_event_dump; + bool stitch_lbr; struct evswitch evswitch; struct perf_cpu_map *cpus; struct perf_thread_map *threads; @@ -1713,7 +1698,7 @@ static int perf_evlist__max_name_len(struct evlist *evlist) int max = 0; evlist__for_each_entry(evlist, evsel) { - int len = strlen(perf_evsel__name(evsel)); + int len = strlen(evsel__name(evsel)); max = MAX(len, max); } @@ -1887,7 +1872,7 @@ static void process_event(struct perf_script *script, fprintf(fp, "%10" PRIu64 " ", sample->period); if (PRINT_FIELD(EVNAME)) { - const char *evname = perf_evsel__name(evsel); + const char *evname = evsel__name(evsel); if (!script->name_width) script->name_width = perf_evlist__max_name_len(script->session->evlist); @@ -1923,6 +1908,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(IP)) { struct callchain_cursor *cursor = NULL; + if (script->stitch_lbr) + al->thread->lbr_stitch_enable = true; + if (symbol_conf.use_callchain && sample->callchain && thread__resolve_callchain(al->thread, &callchain_cursor, evsel, sample, NULL, NULL, scripting_max_stack) == 0) @@ -1946,7 +1934,7 @@ static void process_event(struct perf_script *script, else if (PRINT_FIELD(BRSTACKOFF)) perf_sample__fprintf_brstackoff(sample, thread, attr, fp); - if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) + if (evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) perf_sample__fprintf_bpf_output(sample, fp); perf_sample__fprintf_insn(sample, attr, thread, machine, fp); @@ -1975,7 +1963,7 @@ static struct scripting_ops *scripting_ops; static void __process_stat(struct evsel *counter, u64 tstamp) { int nthreads = perf_thread_map__nr(counter->core.threads); - int ncpus = perf_evsel__nr_cpus(counter); + int ncpus = evsel__nr_cpus(counter); int cpu, thread; static int header_printed; @@ -2001,7 +1989,7 @@ static void __process_stat(struct evsel *counter, u64 tstamp) counts->ena, counts->run, tstamp, - perf_evsel__name(counter)); + evsel__name(counter)); } } } @@ -2040,7 +2028,7 @@ static int cleanup_scripting(void) static bool filter_cpu(struct perf_sample *sample) { - if (cpu_list) + if (cpu_list && sample->cpu != (u32)-1) return !test_bit(sample->cpu, cpu_bitmap); return false; } @@ -2098,6 +2086,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, struct perf_script *scr = container_of(tool, struct perf_script, tool); struct evlist *evlist; struct evsel *evsel, *pos; + u64 sample_type; int err; static struct evsel_script *es; @@ -2130,49 +2119,89 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, return 0; } - set_print_ip_opts(&evsel->core.attr); - - if (evsel->core.attr.sample_type) + if (evsel->core.attr.sample_type) { err = perf_evsel__check_attr(evsel, scr->session); + if (err) + return err; + } - return err; + /* + * Check if we need to enable callchains based + * on events sample_type. + */ + sample_type = perf_evlist__combined_sample_type(evlist); + callchain_param_setup(sample_type); + + /* Enable fields for callchain entries */ + if (symbol_conf.use_callchain && + (sample_type & PERF_SAMPLE_CALLCHAIN || + sample_type & PERF_SAMPLE_BRANCH_STACK || + (sample_type & PERF_SAMPLE_REGS_USER && + sample_type & PERF_SAMPLE_STACK_USER))) { + int type = output_type(evsel->core.attr.type); + + if (!(output[type].user_unset_fields & PERF_OUTPUT_IP)) + output[type].fields |= PERF_OUTPUT_IP; + if (!(output[type].user_unset_fields & PERF_OUTPUT_SYM)) + output[type].fields |= PERF_OUTPUT_SYM; + } + set_print_ip_opts(&evsel->core.attr); + return 0; } -static int process_comm_event(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample, - struct machine *machine) +static int print_event_with_time(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine, + pid_t pid, pid_t tid, u64 timestamp) { - struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - int ret = -1; + struct thread *thread = NULL; - thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid); - if (thread == NULL) { - pr_debug("problem processing COMM event, skipping it.\n"); - return -1; + if (evsel && !evsel->core.attr.sample_id_all) { + sample->cpu = 0; + sample->time = timestamp; + sample->pid = pid; + sample->tid = tid; } - if (perf_event__process_comm(tool, event, sample, machine) < 0) - goto out; + if (filter_cpu(sample)) + return 0; - if (!evsel->core.attr.sample_id_all) { - sample->cpu = 0; - sample->time = 0; - sample->tid = event->comm.tid; - sample->pid = event->comm.pid; - } - if (!filter_cpu(sample)) { + if (tid != -1) + thread = machine__findnew_thread(machine, pid, tid); + + if (thread && evsel) { perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_COMM, stdout); - perf_event__fprintf(event, stdout); + event->header.type, stdout); } - ret = 0; -out: + + perf_event__fprintf(event, stdout); + thread__put(thread); - return ret; + + return 0; +} + +static int print_event(struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, struct machine *machine, + pid_t pid, pid_t tid) +{ + return print_event_with_time(tool, event, sample, machine, pid, tid, 0); +} + +static int process_comm_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + if (perf_event__process_comm(tool, event, sample, machine) < 0) + return -1; + + return print_event(tool, event, sample, machine, event->comm.pid, + event->comm.tid); } static int process_namespaces_event(struct perf_tool *tool, @@ -2180,37 +2209,11 @@ static int process_namespaces_event(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct thread *thread; - struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - int ret = -1; - - thread = machine__findnew_thread(machine, event->namespaces.pid, - event->namespaces.tid); - if (thread == NULL) { - pr_debug("problem processing NAMESPACES event, skipping it.\n"); - return -1; - } - if (perf_event__process_namespaces(tool, event, sample, machine) < 0) - goto out; + return -1; - if (!evsel->core.attr.sample_id_all) { - sample->cpu = 0; - sample->time = 0; - sample->tid = event->namespaces.tid; - sample->pid = event->namespaces.pid; - } - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_NAMESPACES, stdout); - perf_event__fprintf(event, stdout); - } - ret = 0; -out: - thread__put(thread); - return ret; + return print_event(tool, event, sample, machine, event->namespaces.pid, + event->namespaces.tid); } static int process_cgroup_event(struct perf_tool *tool, @@ -2218,34 +2221,11 @@ static int process_cgroup_event(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct thread *thread; - struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - int ret = -1; - - thread = machine__findnew_thread(machine, sample->pid, sample->tid); - if (thread == NULL) { - pr_debug("problem processing CGROUP event, skipping it.\n"); - return -1; - } - if (perf_event__process_cgroup(tool, event, sample, machine) < 0) - goto out; + return -1; - if (!evsel->core.attr.sample_id_all) { - sample->cpu = 0; - sample->time = 0; - } - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_CGROUP, stdout); - perf_event__fprintf(event, stdout); - } - ret = 0; -out: - thread__put(thread); - return ret; + return print_event(tool, event, sample, machine, sample->pid, + sample->tid); } static int process_fork_event(struct perf_tool *tool, @@ -2253,69 +2233,24 @@ static int process_fork_event(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct thread *thread; - struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - if (perf_event__process_fork(tool, event, sample, machine) < 0) return -1; - thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); - if (thread == NULL) { - pr_debug("problem processing FORK event, skipping it.\n"); - return -1; - } - - if (!evsel->core.attr.sample_id_all) { - sample->cpu = 0; - sample->time = event->fork.time; - sample->tid = event->fork.tid; - sample->pid = event->fork.pid; - } - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_FORK, stdout); - perf_event__fprintf(event, stdout); - } - thread__put(thread); - - return 0; + return print_event_with_time(tool, event, sample, machine, + event->fork.pid, event->fork.tid, + event->fork.time); } static int process_exit_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine) { - int err = 0; - struct thread *thread; - struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - - thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); - if (thread == NULL) { - pr_debug("problem processing EXIT event, skipping it.\n"); + /* Print before 'exit' deletes anything */ + if (print_event_with_time(tool, event, sample, machine, event->fork.pid, + event->fork.tid, event->fork.time)) return -1; - } - if (!evsel->core.attr.sample_id_all) { - sample->cpu = 0; - sample->time = 0; - sample->tid = event->fork.tid; - sample->pid = event->fork.pid; - } - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_EXIT, stdout); - perf_event__fprintf(event, stdout); - } - - if (perf_event__process_exit(tool, event, sample, machine) < 0) - err = -1; - - thread__put(thread); - return err; + return perf_event__process_exit(tool, event, sample, machine); } static int process_mmap_event(struct perf_tool *tool, @@ -2323,33 +2258,11 @@ static int process_mmap_event(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct thread *thread; - struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - if (perf_event__process_mmap(tool, event, sample, machine) < 0) return -1; - thread = machine__findnew_thread(machine, event->mmap.pid, event->mmap.tid); - if (thread == NULL) { - pr_debug("problem processing MMAP event, skipping it.\n"); - return -1; - } - - if (!evsel->core.attr.sample_id_all) { - sample->cpu = 0; - sample->time = 0; - sample->tid = event->mmap.tid; - sample->pid = event->mmap.pid; - } - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_MMAP, stdout); - perf_event__fprintf(event, stdout); - } - thread__put(thread); - return 0; + return print_event(tool, event, sample, machine, event->mmap.pid, + event->mmap.tid); } static int process_mmap2_event(struct perf_tool *tool, @@ -2357,33 +2270,11 @@ static int process_mmap2_event(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct thread *thread; - struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - if (perf_event__process_mmap2(tool, event, sample, machine) < 0) return -1; - thread = machine__findnew_thread(machine, event->mmap2.pid, event->mmap2.tid); - if (thread == NULL) { - pr_debug("problem processing MMAP2 event, skipping it.\n"); - return -1; - } - - if (!evsel->core.attr.sample_id_all) { - sample->cpu = 0; - sample->time = 0; - sample->tid = event->mmap2.tid; - sample->pid = event->mmap2.pid; - } - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_MMAP2, stdout); - perf_event__fprintf(event, stdout); - } - thread__put(thread); - return 0; + return print_event(tool, event, sample, machine, event->mmap2.pid, + event->mmap2.tid); } static int process_switch_event(struct perf_tool *tool, @@ -2391,10 +2282,7 @@ static int process_switch_event(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); if (perf_event__process_switch(tool, event, sample, machine) < 0) return -1; @@ -2405,20 +2293,8 @@ static int process_switch_event(struct perf_tool *tool, if (!script->show_switch_events) return 0; - thread = machine__findnew_thread(machine, sample->pid, - sample->tid); - if (thread == NULL) { - pr_debug("problem processing SWITCH event, skipping it.\n"); - return -1; - } - - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_SWITCH, stdout); - perf_event__fprintf(event, stdout); - } - thread__put(thread); - return 0; + return print_event(tool, event, sample, machine, sample->pid, + sample->tid); } static int @@ -2427,23 +2303,8 @@ process_lost_event(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - struct thread *thread; - - thread = machine__findnew_thread(machine, sample->pid, - sample->tid); - if (thread == NULL) - return -1; - - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_LOST, stdout); - perf_event__fprintf(event, stdout); - } - thread__put(thread); - return 0; + return print_event(tool, event, sample, machine, sample->pid, + sample->tid); } static int @@ -2462,33 +2323,11 @@ process_bpf_events(struct perf_tool *tool __maybe_unused, struct perf_sample *sample, struct machine *machine) { - struct thread *thread; - struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - if (machine__process_ksymbol(machine, event, sample) < 0) return -1; - if (!evsel->core.attr.sample_id_all) { - perf_event__fprintf(event, stdout); - return 0; - } - - thread = machine__findnew_thread(machine, sample->pid, sample->tid); - if (thread == NULL) { - pr_debug("problem processing MMAP event, skipping it.\n"); - return -1; - } - - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - event->header.type, stdout); - perf_event__fprintf(event, stdout); - } - - thread__put(thread); - return 0; + return print_event(tool, event, sample, machine, sample->pid, + sample->tid); } static void sig_handler(int sig __maybe_unused) @@ -2619,7 +2458,7 @@ static int __cmd_script(struct perf_script *script) struct script_spec { struct list_head node; struct scripting_ops *ops; - char spec[0]; + char spec[]; }; static LIST_HEAD(script_specs); @@ -2857,9 +2696,11 @@ parse: if (change == REMOVE) { output[j].fields &= ~all_output_options[i].field; output[j].user_set_fields &= ~all_output_options[i].field; + output[j].user_unset_fields |= all_output_options[i].field; } else { output[j].fields |= all_output_options[i].field; output[j].user_set_fields |= all_output_options[i].field; + output[j].user_unset_fields &= ~all_output_options[i].field; } output[j].user_set = true; output[j].wildcard_set = true; @@ -3145,7 +2986,7 @@ static int check_ev_match(char *dir_name, char *scriptname, match = 0; evlist__for_each_entry(session->evlist, pos) { - if (!strcmp(perf_evsel__name(pos), evname)) { + if (!strcmp(evsel__name(pos), evname)) { match = 1; break; } @@ -3342,6 +3183,12 @@ static void script__setup_sample_type(struct perf_script *script) else callchain_param.record_mode = CALLCHAIN_FP; } + + if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) { + pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n" + "Please apply --call-graph lbr when recording.\n"); + script->stitch_lbr = false; + } } static int process_stat_round_event(struct perf_session *session, @@ -3465,7 +3312,10 @@ static int parse_xed(const struct option *opt __maybe_unused, const char *str __maybe_unused, int unset __maybe_unused) { - force_pager("xed -F insn: -A -64 | less"); + if (isatty(1)) + force_pager("xed -F insn: -A -64 | less"); + else + force_pager("xed -F insn: -A -64"); return 0; } @@ -3653,6 +3503,8 @@ int cmd_script(int argc, const char **argv) "file", "file saving guest os /proc/kallsyms"), OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules, "file", "file saving guest os /proc/modules"), + OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr, + "Enable LBR callgraph stitching approach"), OPTS_EVSWITCH(&script.evswitch), OPT_END() }; @@ -3709,7 +3561,7 @@ int cmd_script(int argc, const char **argv) return -1; } - if (itrace_synth_opts.callchain && + if ((itrace_synth_opts.callchain || itrace_synth_opts.add_callchain) && itrace_synth_opts.callchain_sz > scripting_max_stack) scripting_max_stack = itrace_synth_opts.callchain_sz; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ec053dc1e35c..9be020e0098a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -66,6 +66,7 @@ #include "util/time-utils.h" #include "util/top.h" #include "util/affinity.h" +#include "util/pfm.h" #include "asm/bug.h" #include <linux/time64.h> @@ -189,6 +190,59 @@ static struct perf_stat_config stat_config = { .big_num = true, }; +static bool cpus_map_matched(struct evsel *a, struct evsel *b) +{ + if (!a->core.cpus && !b->core.cpus) + return true; + + if (!a->core.cpus || !b->core.cpus) + return false; + + if (a->core.cpus->nr != b->core.cpus->nr) + return false; + + for (int i = 0; i < a->core.cpus->nr; i++) { + if (a->core.cpus->map[i] != b->core.cpus->map[i]) + return false; + } + + return true; +} + +static void evlist__check_cpu_maps(struct evlist *evlist) +{ + struct evsel *evsel, *pos, *leader; + char buf[1024]; + + evlist__for_each_entry(evlist, evsel) { + leader = evsel->leader; + + /* Check that leader matches cpus with each member. */ + if (leader == evsel) + continue; + if (cpus_map_matched(leader, evsel)) + continue; + + /* If there's mismatch disable the group and warn user. */ + WARN_ONCE(1, "WARNING: grouped events cpus do not match, disabling group:\n"); + evsel__group_desc(leader, buf, sizeof(buf)); + pr_warning(" %s\n", buf); + + if (verbose) { + cpu_map__snprint(leader->core.cpus, buf, sizeof(buf)); + pr_warning(" %s: %s\n", leader->name, buf); + cpu_map__snprint(evsel->core.cpus, buf, sizeof(buf)); + pr_warning(" %s: %s\n", evsel->name, buf); + } + + for_each_group_evsel(pos, leader) { + pos->leader = pos; + pos->core.nr_members = 0; + } + evsel->leader->core.nr_members = 0; + } +} + static inline void diff_timespec(struct timespec *r, struct timespec *a, struct timespec *b) { @@ -238,9 +292,8 @@ static int write_stat_round_event(u64 tm, u64 type) #define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y) -static int -perf_evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread, - struct perf_counts_values *count) +static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread, + struct perf_counts_values *count) { struct perf_sample_id *sid = SID(counter, cpu, thread); @@ -259,7 +312,7 @@ static int read_single_counter(struct evsel *counter, int cpu, count->val = val; return 0; } - return perf_evsel__read_counter(counter, cpu, thread); + return evsel__read_counter(counter, cpu, thread); } /* @@ -284,7 +337,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) /* * The leader's group read loads data into its group members - * (via perf_evsel__read_counter()) and sets their count->loaded. + * (via evsel__read_counter()) and sets their count->loaded. */ if (!perf_counts__is_loaded(counter->counts, cpu, thread) && read_single_counter(counter, cpu, thread, rs)) { @@ -297,7 +350,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) perf_counts__set_loaded(counter->counts, cpu, thread, false); if (STAT_RECORD) { - if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { + if (evsel__write_stat_event(counter, cpu, thread, count)) { pr_err("failed to write stat event\n"); return -1; } @@ -306,7 +359,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) if (verbose > 1) { fprintf(stat_config.output, "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", - perf_evsel__name(counter), + evsel__name(counter), cpu, count->val, count->ena, count->run); } @@ -315,14 +368,14 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) return 0; } -static void read_counters(struct timespec *rs) +static int read_affinity_counters(struct timespec *rs) { struct evsel *counter; struct affinity affinity; int i, ncpus, cpu; if (affinity__setup(&affinity) < 0) - return; + return -1; ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus); if (!target__has_cpu(&target) || target__has_per_thread(&target)) @@ -342,6 +395,15 @@ static void read_counters(struct timespec *rs) } } affinity__cleanup(&affinity); + return 0; +} + +static void read_counters(struct timespec *rs) +{ + struct evsel *counter; + + if (!stat_config.summary && (read_affinity_counters(rs) < 0)) + return; evlist__for_each_entry(evsel_list, counter) { if (counter->err) @@ -352,6 +414,46 @@ static void read_counters(struct timespec *rs) } } +static int runtime_stat_new(struct perf_stat_config *config, int nthreads) +{ + int i; + + config->stats = calloc(nthreads, sizeof(struct runtime_stat)); + if (!config->stats) + return -1; + + config->stats_num = nthreads; + + for (i = 0; i < nthreads; i++) + runtime_stat__init(&config->stats[i]); + + return 0; +} + +static void runtime_stat_delete(struct perf_stat_config *config) +{ + int i; + + if (!config->stats) + return; + + for (i = 0; i < config->stats_num; i++) + runtime_stat__exit(&config->stats[i]); + + zfree(&config->stats); +} + +static void runtime_stat_reset(struct perf_stat_config *config) +{ + int i; + + if (!config->stats) + return; + + for (i = 0; i < config->stats_num; i++) + perf_stat__reset_shadow_per_stat(&config->stats[i]); +} + static void process_interval(void) { struct timespec ts, rs; @@ -359,6 +461,8 @@ static void process_interval(void) clock_gettime(CLOCK_MONOTONIC, &ts); diff_timespec(&rs, &ts, &ref_time); + perf_stat__reset_shadow_per_stat(&rt_stat); + runtime_stat_reset(&stat_config); read_counters(&rs); if (STAT_RECORD) { @@ -367,7 +471,7 @@ static void process_interval(void) } init_stats(&walltime_nsecs_stats); - update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000); + update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000ULL); print_counters(&rs, 0, NULL); } @@ -409,7 +513,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf workload_exec_errno = info->si_value.sival_int; } -static bool perf_evsel__should_store_id(struct evsel *counter) +static bool evsel__should_store_id(struct evsel *counter) { return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID; } @@ -454,7 +558,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) errno == ENXIO) { if (verbose > 0) ui__warning("%s event is not supported by the kernel.\n", - perf_evsel__name(counter)); + evsel__name(counter)); counter->supported = false; /* * errored is a sticky flag that means one of the counter's @@ -465,7 +569,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) if ((counter->leader != counter) || !(counter->leader->core.nr_members > 1)) return COUNTER_SKIP; - } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { + } else if (evsel__fallback(counter, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); return COUNTER_RETRY; @@ -483,8 +587,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) } } - perf_evsel__open_strerror(counter, &target, - errno, msg, sizeof(msg)); + evsel__open_strerror(counter, &target, errno, msg, sizeof(msg)); ui__error("%s\n", msg); if (child_pid != -1) @@ -604,7 +707,7 @@ try_again: if (!counter->reset_group) continue; try_again_reset: - pr_debug2("reopening weak %s\n", perf_evsel__name(counter)); + pr_debug2("reopening weak %s\n", evsel__name(counter)); if (create_perf_stat_counter(counter, &stat_config, &target, counter->cpu_iter - 1) < 0) { @@ -635,14 +738,14 @@ try_again_reset: if (l > stat_config.unit_width) stat_config.unit_width = l; - if (perf_evsel__should_store_id(counter) && - perf_evsel__store_ids(counter, evsel_list)) + if (evsel__should_store_id(counter) && + evsel__store_ids(counter, evsel_list)) return -1; } if (perf_evlist__apply_filters(evsel_list, &counter)) { pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", - counter->filter, perf_evsel__name(counter), errno, + counter->filter, evsel__name(counter), errno, str_error_r(errno, msg, sizeof(msg))); return -1; } @@ -686,8 +789,11 @@ try_again_reset: break; } } - if (child_pid != -1) + if (child_pid != -1) { + if (timeout) + kill(child_pid, SIGTERM); wait4(child_pid, &status, 0, &stat_config.ru_data); + } if (workload_exec_errno) { const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); @@ -720,7 +826,21 @@ try_again_reset: if (stat_config.walltime_run_table) stat_config.walltime_run[run_idx] = t1 - t0; - update_stats(&walltime_nsecs_stats, t1 - t0); + if (interval) { + stat_config.interval = 0; + stat_config.summary = true; + init_stats(&walltime_nsecs_stats); + update_stats(&walltime_nsecs_stats, t1 - t0); + + if (stat_config.aggr_mode == AGGR_GLOBAL) + perf_evlist__save_aggr_prev_raw_counts(evsel_list); + + perf_evlist__copy_prev_raw_counts(evsel_list); + perf_evlist__reset_prev_raw_counts(evsel_list); + runtime_stat_reset(&stat_config); + perf_stat__reset_shadow_per_stat(&rt_stat); + } else + update_stats(&walltime_nsecs_stats, t1 - t0); /* * Closing a group leader splits the group, and as we only disable @@ -819,10 +939,16 @@ static void sig_atexit(void) kill(getpid(), signr); } +void perf_stat__set_big_num(int set) +{ + stat_config.big_num = (set != 0); +} + static int stat__set_big_num(const struct option *opt __maybe_unused, const char *s __maybe_unused, int unset) { big_num_opt = unset ? 0 : 1; + perf_stat__set_big_num(!unset); return 0; } @@ -838,7 +964,10 @@ static int parse_metric_groups(const struct option *opt, const char *str, int unset __maybe_unused) { - return metricgroup__parse_groups(opt, str, &stat_config.metric_events); + return metricgroup__parse_groups(opt, str, + stat_config.metric_no_group, + stat_config.metric_no_merge, + &stat_config.metric_events); } static struct option stat_options[] = { @@ -916,6 +1045,10 @@ static struct option stat_options[] = { "ms to wait before starting measurement after program start"), OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL, "Only print computed metrics. No raw values", enable_metric_only), + OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group, + "don't group metric events, impacts multiplexing"), + OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge, + "don't try to share events between metrics in a group"), OPT_BOOLEAN(0, "topdown", &topdown_run, "measure topdown level 1 statistics"), OPT_BOOLEAN(0, "smi-cost", &smi_cost, @@ -933,6 +1066,11 @@ static struct option stat_options[] = { "Use with 'percore' event qualifier to show the event " "counts of one hardware thread by sum up total hardware " "threads of same physical core"), +#ifdef HAVE_LIBPFM + OPT_CALLBACK(0, "pfm-events", &evsel_list, "event", + "libpfm4 event selector. use 'perf list' to list available events", + parse_libpfm_events_option), +#endif OPT_END() }; @@ -1440,6 +1578,8 @@ static int add_default_attributes(void) struct option opt = { .value = &evsel_list }; return metricgroup__parse_groups(&opt, "transaction", + stat_config.metric_no_group, + stat_config.metric_no_merge, &stat_config.metric_events); } @@ -1735,35 +1875,6 @@ int process_cpu_map_event(struct perf_session *session, return set_maps(st); } -static int runtime_stat_new(struct perf_stat_config *config, int nthreads) -{ - int i; - - config->stats = calloc(nthreads, sizeof(struct runtime_stat)); - if (!config->stats) - return -1; - - config->stats_num = nthreads; - - for (i = 0; i < nthreads; i++) - runtime_stat__init(&config->stats[i]); - - return 0; -} - -static void runtime_stat_delete(struct perf_stat_config *config) -{ - int i; - - if (!config->stats) - return; - - for (i = 0; i < config->stats_num; i++) - runtime_stat__exit(&config->stats[i]); - - zfree(&config->stats); -} - static const char * const stat_report_usage[] = { "perf stat report [<options>]", NULL, @@ -2055,6 +2166,8 @@ int cmd_stat(int argc, const char **argv) goto out; } + evlist__check_cpu_maps(evsel_list); + /* * Initialize thread_map with comm names, * so we could print it out on output. @@ -2145,7 +2258,7 @@ int cmd_stat(int argc, const char **argv) } } - if (!forever && status != -1 && !interval) + if (!forever && status != -1 && (!interval || stat_config.summary)) print_counters(NULL, argc, argv); if (STAT_RECORD) { diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 9e84fae9b096..4e380e7b5230 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -128,7 +128,7 @@ struct sample_wrapper { struct sample_wrapper *next; u64 timestamp; - unsigned char data[0]; + unsigned char data[]; }; #define TYPE_NONE 0 @@ -579,8 +579,8 @@ process_sample_cpu_idle(struct timechart *tchart __maybe_unused, struct perf_sample *sample, const char *backtrace __maybe_unused) { - u32 state = perf_evsel__intval(evsel, sample, "state"); - u32 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id"); + u32 state = evsel__intval(evsel, sample, "state"); + u32 cpu_id = evsel__intval(evsel, sample, "cpu_id"); if (state == (u32)PWR_EVENT_EXIT) c_state_end(tchart, cpu_id, sample->time); @@ -595,8 +595,8 @@ process_sample_cpu_frequency(struct timechart *tchart, struct perf_sample *sample, const char *backtrace __maybe_unused) { - u32 state = perf_evsel__intval(evsel, sample, "state"); - u32 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id"); + u32 state = evsel__intval(evsel, sample, "state"); + u32 cpu_id = evsel__intval(evsel, sample, "cpu_id"); p_state_change(tchart, cpu_id, sample->time, state); return 0; @@ -608,9 +608,9 @@ process_sample_sched_wakeup(struct timechart *tchart, struct perf_sample *sample, const char *backtrace) { - u8 flags = perf_evsel__intval(evsel, sample, "common_flags"); - int waker = perf_evsel__intval(evsel, sample, "common_pid"); - int wakee = perf_evsel__intval(evsel, sample, "pid"); + u8 flags = evsel__intval(evsel, sample, "common_flags"); + int waker = evsel__intval(evsel, sample, "common_pid"); + int wakee = evsel__intval(evsel, sample, "pid"); sched_wakeup(tchart, sample->cpu, sample->time, waker, wakee, flags, backtrace); return 0; @@ -622,9 +622,9 @@ process_sample_sched_switch(struct timechart *tchart, struct perf_sample *sample, const char *backtrace) { - int prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"); - int next_pid = perf_evsel__intval(evsel, sample, "next_pid"); - u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); + int prev_pid = evsel__intval(evsel, sample, "prev_pid"); + int next_pid = evsel__intval(evsel, sample, "next_pid"); + u64 prev_state = evsel__intval(evsel, sample, "prev_state"); sched_switch(tchart, sample->cpu, sample->time, prev_pid, next_pid, prev_state, backtrace); @@ -638,8 +638,8 @@ process_sample_power_start(struct timechart *tchart __maybe_unused, struct perf_sample *sample, const char *backtrace __maybe_unused) { - u64 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id"); - u64 value = perf_evsel__intval(evsel, sample, "value"); + u64 cpu_id = evsel__intval(evsel, sample, "cpu_id"); + u64 value = evsel__intval(evsel, sample, "value"); c_state_start(cpu_id, sample->time, value); return 0; @@ -661,8 +661,8 @@ process_sample_power_frequency(struct timechart *tchart, struct perf_sample *sample, const char *backtrace __maybe_unused) { - u64 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id"); - u64 value = perf_evsel__intval(evsel, sample, "value"); + u64 cpu_id = evsel__intval(evsel, sample, "cpu_id"); + u64 value = evsel__intval(evsel, sample, "value"); p_state_change(tchart, cpu_id, sample->time, value); return 0; @@ -843,7 +843,7 @@ process_enter_read(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long fd = perf_evsel__intval(evsel, sample, "fd"); + long fd = evsel__intval(evsel, sample, "fd"); return pid_begin_io_sample(tchart, sample->tid, IOTYPE_READ, sample->time, fd); } @@ -853,7 +853,7 @@ process_exit_read(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long ret = perf_evsel__intval(evsel, sample, "ret"); + long ret = evsel__intval(evsel, sample, "ret"); return pid_end_io_sample(tchart, sample->tid, IOTYPE_READ, sample->time, ret); } @@ -863,7 +863,7 @@ process_enter_write(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long fd = perf_evsel__intval(evsel, sample, "fd"); + long fd = evsel__intval(evsel, sample, "fd"); return pid_begin_io_sample(tchart, sample->tid, IOTYPE_WRITE, sample->time, fd); } @@ -873,7 +873,7 @@ process_exit_write(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long ret = perf_evsel__intval(evsel, sample, "ret"); + long ret = evsel__intval(evsel, sample, "ret"); return pid_end_io_sample(tchart, sample->tid, IOTYPE_WRITE, sample->time, ret); } @@ -883,7 +883,7 @@ process_enter_sync(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long fd = perf_evsel__intval(evsel, sample, "fd"); + long fd = evsel__intval(evsel, sample, "fd"); return pid_begin_io_sample(tchart, sample->tid, IOTYPE_SYNC, sample->time, fd); } @@ -893,7 +893,7 @@ process_exit_sync(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long ret = perf_evsel__intval(evsel, sample, "ret"); + long ret = evsel__intval(evsel, sample, "ret"); return pid_end_io_sample(tchart, sample->tid, IOTYPE_SYNC, sample->time, ret); } @@ -903,7 +903,7 @@ process_enter_tx(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long fd = perf_evsel__intval(evsel, sample, "fd"); + long fd = evsel__intval(evsel, sample, "fd"); return pid_begin_io_sample(tchart, sample->tid, IOTYPE_TX, sample->time, fd); } @@ -913,7 +913,7 @@ process_exit_tx(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long ret = perf_evsel__intval(evsel, sample, "ret"); + long ret = evsel__intval(evsel, sample, "ret"); return pid_end_io_sample(tchart, sample->tid, IOTYPE_TX, sample->time, ret); } @@ -923,7 +923,7 @@ process_enter_rx(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long fd = perf_evsel__intval(evsel, sample, "fd"); + long fd = evsel__intval(evsel, sample, "fd"); return pid_begin_io_sample(tchart, sample->tid, IOTYPE_RX, sample->time, fd); } @@ -933,7 +933,7 @@ process_exit_rx(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long ret = perf_evsel__intval(evsel, sample, "ret"); + long ret = evsel__intval(evsel, sample, "ret"); return pid_end_io_sample(tchart, sample->tid, IOTYPE_RX, sample->time, ret); } @@ -943,7 +943,7 @@ process_enter_poll(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long fd = perf_evsel__intval(evsel, sample, "fd"); + long fd = evsel__intval(evsel, sample, "fd"); return pid_begin_io_sample(tchart, sample->tid, IOTYPE_POLL, sample->time, fd); } @@ -953,7 +953,7 @@ process_exit_poll(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long ret = perf_evsel__intval(evsel, sample, "ret"); + long ret = evsel__intval(evsel, sample, "ret"); return pid_end_io_sample(tchart, sample->tid, IOTYPE_POLL, sample->time, ret); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 289cf83e658a..13889d73f8dd 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -33,6 +33,7 @@ #include "util/map.h" #include "util/mmap.h" #include "util/session.h" +#include "util/thread.h" #include "util/symbol.h" #include "util/synthetic-events.h" #include "util/top.h" @@ -52,6 +53,7 @@ #include "util/debug.h" #include "util/ordered-events.h" +#include "util/pfm.h" #include <assert.h> #include <elf.h> @@ -254,7 +256,7 @@ static void perf_top__show_details(struct perf_top *top) if (notes->src == NULL) goto out_unlock; - printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name); + printf("Showing %s for %s\n", evsel__name(top->sym_evsel), symbol->name); printf(" Events Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt); more = symbol__annotate_printf(&he->ms, top->sym_evsel, &top->annotation_opts); @@ -297,8 +299,7 @@ static void perf_top__resort_hists(struct perf_top *t) hists__collapse_resort(hists, NULL); /* Non-group events are considered as leader */ - if (symbol_conf.event_group && - !perf_evsel__is_group_leader(pos)) { + if (symbol_conf.event_group && !evsel__is_group_leader(pos)) { struct hists *leader_hists = evsel__hists(pos->leader); hists__match(leader_hists, hists); @@ -307,7 +308,7 @@ static void perf_top__resort_hists(struct perf_top *t) } evlist__for_each_entry(evlist, pos) { - perf_evsel__output_resort(pos, NULL); + evsel__output_resort(pos, NULL); } } @@ -441,7 +442,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top) fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top->print_entries); if (top->evlist->core.nr_entries > 1) - fprintf(stdout, "\t[E] active event counter. \t(%s)\n", perf_evsel__name(top->sym_evsel)); + fprintf(stdout, "\t[E] active event counter. \t(%s)\n", evsel__name(top->sym_evsel)); fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter); @@ -528,13 +529,13 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) fprintf(stderr, "\nAvailable events:"); evlist__for_each_entry(top->evlist, top->sym_evsel) - fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel)); + fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, evsel__name(top->sym_evsel)); prompt_integer(&counter, "Enter details event counter"); if (counter >= top->evlist->core.nr_entries) { top->sym_evsel = evlist__first(top->evlist); - fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel)); + fprintf(stderr, "Sorry, no such event, using %s.\n", evsel__name(top->sym_evsel)); sleep(1); break; } @@ -775,6 +776,9 @@ static void perf_event__process_sample(struct perf_tool *tool, if (machine__resolve(machine, &al, sample) < 0) return; + if (top->stitch_lbr) + al.thread->lbr_stitch_enable = true; + if (!machine->kptr_restrict_warned && symbol_conf.kptr_restrict && al.cpumode == PERF_RECORD_MISC_KERNEL) { @@ -946,7 +950,7 @@ static int perf_top__overwrite_check(struct perf_top *top) { struct record_opts *opts = &top->record_opts; struct evlist *evlist = top->evlist; - struct perf_evsel_config_term *term; + struct evsel_config_term *term; struct list_head *config_terms; struct evsel *evsel; int set, overwrite = -1; @@ -955,7 +959,7 @@ static int perf_top__overwrite_check(struct perf_top *top) set = -1; config_terms = &evsel->config_terms; list_for_each_entry(term, config_terms, list) { - if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE) + if (term->type == EVSEL__CONFIG_TERM_OVERWRITE) set = term->val.overwrite ? 1 : 0; } @@ -1042,14 +1046,13 @@ try_again: perf_top_overwrite_fallback(top, counter)) goto try_again; - if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { + if (evsel__fallback(counter, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); goto try_again; } - perf_evsel__open_strerror(counter, &opts->target, - errno, msg, sizeof(msg)); + evsel__open_strerror(counter, &opts->target, errno, msg, sizeof(msg)); ui__error("%s\n", msg); goto out_err; } @@ -1571,10 +1574,16 @@ int cmd_top(int argc, const char **argv) "Sort the output by the event at the index n in group. " "If n is invalid, sort by the first event. " "WARNING: should be used on grouped events."), + OPT_BOOLEAN(0, "stitch-lbr", &top.stitch_lbr, + "Enable LBR callgraph stitching approach"), +#ifdef HAVE_LIBPFM + OPT_CALLBACK(0, "pfm-events", &top.evlist, "event", + "libpfm4 event selector. use 'perf list' to list available events", + parse_libpfm_events_option), +#endif OPTS_EVSWITCH(&top.evswitch), OPT_END() }; - struct evlist *sb_evlist = NULL; const char * const top_usage[] = { "perf top [<options>]", NULL @@ -1640,6 +1649,11 @@ int cmd_top(int argc, const char **argv) } } + if (top.stitch_lbr && !(callchain_param.record_mode == CALLCHAIN_LBR)) { + pr_err("Error: --stitch-lbr must be used with --call-graph lbr\n"); + goto out_delete_evlist; + } + if (opts->branch_stack && callchain_param.enabled) symbol_conf.show_branchflag_count = true; @@ -1732,10 +1746,21 @@ int cmd_top(int argc, const char **argv) goto out_delete_evlist; } - if (!top.record_opts.no_bpf_event) - bpf_event__add_sb_event(&sb_evlist, &perf_env); + if (!top.record_opts.no_bpf_event) { + top.sb_evlist = evlist__new(); + + if (top.sb_evlist == NULL) { + pr_err("Couldn't create side band evlist.\n."); + goto out_delete_evlist; + } + + if (evlist__add_bpf_sb_event(top.sb_evlist, &perf_env)) { + pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); + goto out_delete_evlist; + } + } - if (perf_evlist__start_sb_thread(sb_evlist, target)) { + if (perf_evlist__start_sb_thread(top.sb_evlist, target)) { pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); opts->no_bpf_event = true; } @@ -1743,7 +1768,7 @@ int cmd_top(int argc, const char **argv) status = __cmd_top(&top); if (!opts->no_bpf_event) - perf_evlist__stop_sb_thread(sb_evlist); + perf_evlist__stop_sb_thread(top.sb_evlist); out_delete_evlist: evlist__delete(top.evlist); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 01d542007c8b..4cbb64edc998 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -366,11 +366,9 @@ out_delete: return NULL; } -static int perf_evsel__init_tp_uint_field(struct evsel *evsel, - struct tp_field *field, - const char *name) +static int evsel__init_tp_uint_field(struct evsel *evsel, struct tp_field *field, const char *name) { - struct tep_format_field *format_field = perf_evsel__field(evsel, name); + struct tep_format_field *format_field = evsel__field(evsel, name); if (format_field == NULL) return -1; @@ -380,13 +378,11 @@ static int perf_evsel__init_tp_uint_field(struct evsel *evsel, #define perf_evsel__init_sc_tp_uint_field(evsel, name) \ ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\ - perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) + evsel__init_tp_uint_field(evsel, &sc->name, #name); }) -static int perf_evsel__init_tp_ptr_field(struct evsel *evsel, - struct tp_field *field, - const char *name) +static int evsel__init_tp_ptr_field(struct evsel *evsel, struct tp_field *field, const char *name) { - struct tep_format_field *format_field = perf_evsel__field(evsel, name); + struct tep_format_field *format_field = evsel__field(evsel, name); if (format_field == NULL) return -1; @@ -396,7 +392,7 @@ static int perf_evsel__init_tp_ptr_field(struct evsel *evsel, #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\ - perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) + evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) static void evsel__delete_priv(struct evsel *evsel) { @@ -404,13 +400,13 @@ static void evsel__delete_priv(struct evsel *evsel) evsel__delete(evsel); } -static int perf_evsel__init_syscall_tp(struct evsel *evsel) +static int evsel__init_syscall_tp(struct evsel *evsel) { struct syscall_tp *sc = evsel__syscall_tp(evsel); if (sc != NULL) { - if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") && - perf_evsel__init_tp_uint_field(evsel, &sc->id, "nr")) + if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") && + evsel__init_tp_uint_field(evsel, &sc->id, "nr")) return -ENOENT; return 0; } @@ -418,14 +414,14 @@ static int perf_evsel__init_syscall_tp(struct evsel *evsel) return -ENOMEM; } -static int perf_evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp) +static int evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp) { struct syscall_tp *sc = evsel__syscall_tp(evsel); if (sc != NULL) { - struct tep_format_field *syscall_id = perf_evsel__field(tp, "id"); + struct tep_format_field *syscall_id = evsel__field(tp, "id"); if (syscall_id == NULL) - syscall_id = perf_evsel__field(tp, "__syscall_nr"); + syscall_id = evsel__field(tp, "__syscall_nr"); if (syscall_id == NULL || __tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap)) return -EINVAL; @@ -436,21 +432,21 @@ static int perf_evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evs return -ENOMEM; } -static int perf_evsel__init_augmented_syscall_tp_args(struct evsel *evsel) +static int evsel__init_augmented_syscall_tp_args(struct evsel *evsel) { struct syscall_tp *sc = __evsel__syscall_tp(evsel); return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)); } -static int perf_evsel__init_augmented_syscall_tp_ret(struct evsel *evsel) +static int evsel__init_augmented_syscall_tp_ret(struct evsel *evsel) { struct syscall_tp *sc = __evsel__syscall_tp(evsel); return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap); } -static int perf_evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler) +static int evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler) { if (evsel__syscall_tp(evsel) != NULL) { if (perf_evsel__init_sc_tp_uint_field(evsel, id)) @@ -465,16 +461,16 @@ static int perf_evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler) static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler) { - struct evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); + struct evsel *evsel = evsel__newtp("raw_syscalls", direction); /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ if (IS_ERR(evsel)) - evsel = perf_evsel__newtp("syscalls", direction); + evsel = evsel__newtp("syscalls", direction); if (IS_ERR(evsel)) return NULL; - if (perf_evsel__init_raw_syscall_tp(evsel, handler)) + if (evsel__init_raw_syscall_tp(evsel, handler)) goto out_delete; return evsel; @@ -1752,12 +1748,26 @@ static int trace__read_syscall_info(struct trace *trace, int id) struct syscall *sc; const char *name = syscalltbl__name(trace->sctbl, id); +#ifdef HAVE_SYSCALL_TABLE_SUPPORT if (trace->syscalls.table == NULL) { trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc)); if (trace->syscalls.table == NULL) return -ENOMEM; } +#else + if (id > trace->sctbl->syscalls.max_id || (id == 0 && trace->syscalls.table == NULL)) { + // When using libaudit we don't know beforehand what is the max syscall id + struct syscall *table = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); + + if (table == NULL) + return -ENOMEM; + + memset(table + trace->sctbl->syscalls.max_id, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc)); + trace->syscalls.table = table; + trace->sctbl->syscalls.max_id = id; + } +#endif sc = trace->syscalls.table + id; if (sc->nonexistent) return 0; @@ -1801,7 +1811,7 @@ static int trace__read_syscall_info(struct trace *trace, int id) return syscall__set_arg_fmts(sc); } -static int perf_evsel__init_tp_arg_scnprintf(struct evsel *evsel) +static int evsel__init_tp_arg_scnprintf(struct evsel *evsel) { struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel); @@ -2074,15 +2084,27 @@ static struct syscall *trace__syscall_info(struct trace *trace, if (verbose > 1) { static u64 n; fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", - id, perf_evsel__name(evsel), ++n); + id, evsel__name(evsel), ++n); } return NULL; } err = -EINVAL; - if (id > trace->sctbl->syscalls.max_id) +#ifdef HAVE_SYSCALL_TABLE_SUPPORT + if (id > trace->sctbl->syscalls.max_id) { +#else + if (id >= trace->sctbl->syscalls.max_id) { + /* + * With libaudit we don't know beforehand what is the max_id, + * so we let trace__read_syscall_info() figure that out as we + * go on reading syscalls. + */ + err = trace__read_syscall_info(trace, id); + if (err) +#endif goto out_cant_read; + } if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) && (err = trace__read_syscall_info(trace, id)) != 0) @@ -2206,7 +2228,7 @@ static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel, double ts = (double)sample->time / NSEC_PER_MSEC; printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n", - perf_evsel__name(evsel), ts, + evsel__name(evsel), ts, thread__comm_str(thread), sample->pid, sample->tid, sample->cpu); } @@ -2382,7 +2404,7 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam static const char *errno_to_name(struct evsel *evsel, int err) { - struct perf_env *env = perf_evsel__env(evsel); + struct perf_env *env = evsel__env(evsel); const char *arch_name = perf_env__arch(env); return arch_syscalls__strerrno(arch_name, err); @@ -2513,7 +2535,7 @@ errno_print: { if (callchain_ret > 0) trace__fprintf_callchain(trace, sample); else if (callchain_ret < 0) - pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); + pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel)); out: ttrace->entry_pending = false; err = 0; @@ -2531,7 +2553,7 @@ static int trace__vfs_getname(struct trace *trace, struct evsel *evsel, size_t filename_len, entry_str_len, to_move; ssize_t remaining_space; char *pos; - const char *filename = perf_evsel__rawptr(evsel, sample, "pathname"); + const char *filename = evsel__rawptr(evsel, sample, "pathname"); if (!thread) goto out; @@ -2587,7 +2609,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) { - u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); + u64 runtime = evsel__intval(evsel, sample, "runtime"); double runtime_ms = (double)runtime / NSEC_PER_MSEC; struct thread *thread = machine__findnew_thread(trace->host, sample->pid, @@ -2606,10 +2628,10 @@ out_put: out_dump: fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", evsel->name, - perf_evsel__strval(evsel, sample, "comm"), - (pid_t)perf_evsel__intval(evsel, sample, "pid"), + evsel__strval(evsel, sample, "comm"), + (pid_t)evsel__intval(evsel, sample, "pid"), runtime, - perf_evsel__intval(evsel, sample, "vruntime")); + evsel__intval(evsel, sample, "vruntime")); goto out_put; } @@ -2774,7 +2796,7 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel, fprintf(trace->output, "%s(", evsel->name); - if (perf_evsel__is_bpf_output(evsel)) { + if (evsel__is_bpf_output(evsel)) { bpf_output__fprintf(trace, sample); } else if (evsel->tp_format) { if (strncmp(evsel->tp_format->name, "sys_enter_", 10) || @@ -2795,7 +2817,7 @@ newline: if (callchain_ret > 0) trace__fprintf_callchain(trace, sample); else if (callchain_ret < 0) - pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); + pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel)); ++trace->nr_events_printed; @@ -2890,7 +2912,7 @@ static int trace__pgfault(struct trace *trace, if (callchain_ret > 0) trace__fprintf_callchain(trace, sample); else if (callchain_ret < 0) - pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); + pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel)); ++trace->nr_events_printed; out: @@ -3032,10 +3054,10 @@ static bool evlist__add_vfs_getname(struct evlist *evlist) } evlist__for_each_entry_safe(evlist, evsel, tmp) { - if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname")) + if (!strstarts(evsel__name(evsel), "probe:vfs_getname")) continue; - if (perf_evsel__field(evsel, "pathname")) { + if (evsel__field(evsel, "pathname")) { evsel->handler = trace__vfs_getname; found = true; continue; @@ -3049,7 +3071,7 @@ static bool evlist__add_vfs_getname(struct evlist *evlist) return found; } -static struct evsel *perf_evsel__new_pgfault(u64 config) +static struct evsel *evsel__new_pgfault(u64 config) { struct evsel *evsel; struct perf_event_attr attr = { @@ -3093,7 +3115,7 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && sample->raw_data == NULL) { fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", - perf_evsel__name(evsel), sample->tid, + evsel__name(evsel), sample->tid, sample->cpu, sample->raw_size); } else { tracepoint_handler handler = evsel->handler; @@ -3124,8 +3146,8 @@ static int trace__add_syscall_newtp(struct trace *trace) if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) goto out_delete_sys_exit; - perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param); - perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param); + evsel__config_callchain(sys_enter, &trace->opts, &callchain_param); + evsel__config_callchain(sys_exit, &trace->opts, &callchain_param); evlist__add(evlist, sys_enter); evlist__add(evlist, sys_exit); @@ -3164,10 +3186,9 @@ static int trace__set_ev_qualifier_tp_filter(struct trace *trace) if (filter == NULL) goto out_enomem; - if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter, - filter)) { + if (!evsel__append_tp_filter(trace->syscalls.events.sys_enter, filter)) { sys_exit = trace->syscalls.events.sys_exit; - err = perf_evsel__append_tp_filter(sys_exit, filter); + err = evsel__append_tp_filter(sys_exit, filter); } free(filter); @@ -3179,6 +3200,26 @@ out_enomem: } #ifdef HAVE_LIBBPF_SUPPORT +static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name) +{ + if (trace->bpf_obj == NULL) + return NULL; + + return bpf_object__find_map_by_name(trace->bpf_obj, name); +} + +static void trace__set_bpf_map_filtered_pids(struct trace *trace) +{ + trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered"); +} + +static void trace__set_bpf_map_syscalls(struct trace *trace) +{ + trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls"); + trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter"); + trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit"); +} + static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name) { if (trace->bpf_obj == NULL) @@ -3517,6 +3558,20 @@ static void trace__delete_augmented_syscalls(struct trace *trace) trace->bpf_obj = NULL; } #else // HAVE_LIBBPF_SUPPORT +static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused, + const char *name __maybe_unused) +{ + return NULL; +} + +static void trace__set_bpf_map_filtered_pids(struct trace *trace __maybe_unused) +{ +} + +static void trace__set_bpf_map_syscalls(struct trace *trace __maybe_unused) +{ +} + static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused) { return 0; @@ -3695,7 +3750,7 @@ static int ordered_events__deliver_event(struct ordered_events *oe, return __trace__deliver_event(trace, event->event); } -static struct syscall_arg_fmt *perf_evsel__syscall_arg_fmt(struct evsel *evsel, char *arg) +static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg) { struct tep_format_field *field; struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel); @@ -3750,7 +3805,7 @@ static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel scnprintf(arg, sizeof(arg), "%.*s", left_size, left); - fmt = perf_evsel__syscall_arg_fmt(evsel, arg); + fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg); if (fmt == NULL) { pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n", arg, evsel->name, evsel->filter); @@ -3801,7 +3856,7 @@ static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel if (new_filter != evsel->filter) { pr_debug("New filter for %s: %s\n", evsel->name, new_filter); - perf_evsel__set_filter(evsel, new_filter); + evsel__set_filter(evsel, new_filter); free(new_filter); } @@ -3846,18 +3901,18 @@ static int trace__run(struct trace *trace, int argc, const char **argv) } if ((trace->trace_pgfaults & TRACE_PFMAJ)) { - pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ); + pgfault_maj = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ); if (pgfault_maj == NULL) goto out_error_mem; - perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); + evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); evlist__add(evlist, pgfault_maj); } if ((trace->trace_pgfaults & TRACE_PFMIN)) { - pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN); + pgfault_min = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN); if (pgfault_min == NULL) goto out_error_mem; - perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); + evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); evlist__add(evlist, pgfault_min); } @@ -4108,7 +4163,7 @@ out_error: out_error_apply_filters: fprintf(trace->output, "Failed to set filter \"%s\" on event %s with %d (%s)\n", - evsel->filter, perf_evsel__name(evsel), errno, + evsel->filter, evsel__name(evsel), errno, str_error_r(errno, errbuf, sizeof(errbuf))); goto out_delete_evlist; } @@ -4179,7 +4234,7 @@ static int trace__replay(struct trace *trace) "syscalls:sys_enter"); if (evsel && - (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 || + (evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 || perf_evsel__init_sc_tp_ptr_field(evsel, args))) { pr_err("Error during initialize raw_syscalls:sys_enter event\n"); goto out; @@ -4191,7 +4246,7 @@ static int trace__replay(struct trace *trace) evsel = perf_evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit"); if (evsel && - (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 || + (evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 || perf_evsel__init_sc_tp_uint_field(evsel, ret))) { pr_err("Error during initialize raw_syscalls:sys_exit event\n"); goto out; @@ -4471,11 +4526,11 @@ static int evlist__set_syscall_tp_fields(struct evlist *evlist) continue; if (strcmp(evsel->tp_format->system, "syscalls")) { - perf_evsel__init_tp_arg_scnprintf(evsel); + evsel__init_tp_arg_scnprintf(evsel); continue; } - if (perf_evsel__init_syscall_tp(evsel)) + if (evsel__init_syscall_tp(evsel)) return -1; if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) { @@ -4605,26 +4660,6 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u return 0; } -static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name) -{ - if (trace->bpf_obj == NULL) - return NULL; - - return bpf_object__find_map_by_name(trace->bpf_obj, name); -} - -static void trace__set_bpf_map_filtered_pids(struct trace *trace) -{ - trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered"); -} - -static void trace__set_bpf_map_syscalls(struct trace *trace) -{ - trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls"); - trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter"); - trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit"); -} - static int trace__config(const char *var, const char *value, void *arg) { struct trace *trace = arg; @@ -4989,7 +5024,7 @@ int cmd_trace(int argc, const char **argv) */ if (trace.syscalls.events.augmented) { evlist__for_each_entry(trace.evlist, evsel) { - bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0; + bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0; if (raw_syscalls_sys_exit) { trace.raw_augmented_syscalls = true; @@ -4997,10 +5032,10 @@ int cmd_trace(int argc, const char **argv) } if (trace.syscalls.events.augmented->priv == NULL && - strstr(perf_evsel__name(evsel), "syscalls:sys_enter")) { + strstr(evsel__name(evsel), "syscalls:sys_enter")) { struct evsel *augmented = trace.syscalls.events.augmented; - if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) || - perf_evsel__init_augmented_syscall_tp_args(augmented)) + if (evsel__init_augmented_syscall_tp(augmented, evsel) || + evsel__init_augmented_syscall_tp_args(augmented)) goto out; /* * Augmented is __augmented_syscalls__ BPF_OUTPUT event @@ -5014,16 +5049,16 @@ int cmd_trace(int argc, const char **argv) * as not to filter it, then we'll handle it just like we would * for the BPF_OUTPUT one: */ - if (perf_evsel__init_augmented_syscall_tp(evsel, evsel) || - perf_evsel__init_augmented_syscall_tp_args(evsel)) + if (evsel__init_augmented_syscall_tp(evsel, evsel) || + evsel__init_augmented_syscall_tp_args(evsel)) goto out; evsel->handler = trace__sys_enter; } - if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) { + if (strstarts(evsel__name(evsel), "syscalls:sys_exit_")) { struct syscall_tp *sc; init_augmented_syscall_tp: - if (perf_evsel__init_augmented_syscall_tp(evsel, evsel)) + if (evsel__init_augmented_syscall_tp(evsel, evsel)) goto out; sc = __evsel__syscall_tp(evsel); /* @@ -5047,7 +5082,7 @@ init_augmented_syscall_tp: */ if (trace.raw_augmented_syscalls) trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset; - perf_evsel__init_augmented_syscall_tp_ret(evsel); + evsel__init_augmented_syscall_tp_ret(evsel); evsel->handler = trace__sys_exit; } } diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index cf147db4e5ca..94c2bc22c2bb 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -128,4 +128,8 @@ check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl +# check duplicated library files +check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h +check_2 tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c + cd tools/perf diff --git a/tools/perf/design.txt b/tools/perf/design.txt index 0453ba26cdbd..a42fab308ff6 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -258,7 +258,8 @@ gets schedule to. Per task counters can be created by any user, for their own tasks. A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts -all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege. +all events on CPU-x. Per CPU counters need CAP_PERFMON or CAP_SYS_ADMIN +privilege. The 'flags' parameter is currently unused and must be zero. diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c index c441a34cb1c0..fcca275e5bf9 100644 --- a/tools/perf/jvmti/libjvmti.c +++ b/tools/perf/jvmti/libjvmti.c @@ -32,34 +32,41 @@ static void print_error(jvmtiEnv *jvmti, const char *msg, jvmtiError ret) #ifdef HAVE_JVMTI_CMLR static jvmtiError -do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci, - jvmti_line_info_t *tab, jint *nr) +do_get_line_number(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci, + jvmti_line_info_t *tab) { - jint i, lines = 0; - jint nr_lines = 0; + jint i, nr_lines = 0; jvmtiLineNumberEntry *loc_tab = NULL; jvmtiError ret; + jint src_line = -1; ret = (*jvmti)->GetLineNumberTable(jvmti, m, &nr_lines, &loc_tab); - if (ret != JVMTI_ERROR_NONE) { + if (ret == JVMTI_ERROR_ABSENT_INFORMATION || ret == JVMTI_ERROR_NATIVE_METHOD) { + /* No debug information for this method */ + return ret; + } else if (ret != JVMTI_ERROR_NONE) { print_error(jvmti, "GetLineNumberTable", ret); return ret; } - for (i = 0; i < nr_lines; i++) { - if (loc_tab[i].start_location < bci) { - tab[lines].pc = (unsigned long)pc; - tab[lines].line_number = loc_tab[i].line_number; - tab[lines].discrim = 0; /* not yet used */ - tab[lines].methodID = m; - lines++; - } else { - break; - } + for (i = 0; i < nr_lines && loc_tab[i].start_location <= bci; i++) { + src_line = i; + } + + if (src_line != -1) { + tab->pc = (unsigned long)pc; + tab->line_number = loc_tab[src_line].line_number; + tab->discrim = 0; /* not yet used */ + tab->methodID = m; + + ret = JVMTI_ERROR_NONE; + } else { + ret = JVMTI_ERROR_ABSENT_INFORMATION; } + (*jvmti)->Deallocate(jvmti, (unsigned char *)loc_tab); - *nr = lines; - return JVMTI_ERROR_NONE; + + return ret; } static jvmtiError @@ -67,9 +74,8 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t ** { const jvmtiCompiledMethodLoadRecordHeader *hdr; jvmtiCompiledMethodLoadInlineRecord *rec; - jvmtiLineNumberEntry *lne = NULL; PCStackInfo *c; - jint nr, ret; + jint ret; int nr_total = 0; int i, lines_total = 0; @@ -82,21 +88,7 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t ** for (hdr = compile_info; hdr != NULL; hdr = hdr->next) { if (hdr->kind == JVMTI_CMLR_INLINE_INFO) { rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr; - for (i = 0; i < rec->numpcs; i++) { - c = rec->pcinfo + i; - nr = 0; - /* - * unfortunately, need a tab to get the number of lines! - */ - ret = (*jvmti)->GetLineNumberTable(jvmti, c->methods[0], &nr, &lne); - if (ret == JVMTI_ERROR_NONE) { - /* free what was allocated for nothing */ - (*jvmti)->Deallocate(jvmti, (unsigned char *)lne); - nr_total += (int)nr; - } else { - print_error(jvmti, "GetLineNumberTable", ret); - } - } + nr_total += rec->numpcs; } } @@ -115,14 +107,17 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t ** rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr; for (i = 0; i < rec->numpcs; i++) { c = rec->pcinfo + i; - nr = 0; - ret = do_get_line_numbers(jvmti, c->pc, - c->methods[0], - c->bcis[0], - *tab + lines_total, - &nr); + /* + * c->methods is the stack of inlined method calls + * at c->pc. [0] is the leaf method. Caller frames + * are ignored at the moment. + */ + ret = do_get_line_number(jvmti, c->pc, + c->methods[0], + c->bcis[0], + *tab + lines_total); if (ret == JVMTI_ERROR_NONE) - lines_total += nr; + lines_total++; } } } @@ -246,8 +241,6 @@ compiled_method_load_cb(jvmtiEnv *jvmti, char *class_sign = NULL; char *func_name = NULL; char *func_sign = NULL; - char *file_name = NULL; - char fn[PATH_MAX]; uint64_t addr = (uint64_t)(uintptr_t)code_addr; jvmtiError ret; int nr_lines = 0; /* in line_tab[] */ @@ -264,7 +257,9 @@ compiled_method_load_cb(jvmtiEnv *jvmti, if (has_line_numbers && map && map_length) { ret = get_line_numbers(jvmti, compile_info, &line_tab, &nr_lines); if (ret != JVMTI_ERROR_NONE) { - warnx("jvmti: cannot get line table for method"); + if (ret != JVMTI_ERROR_NOT_FOUND) { + warnx("jvmti: cannot get line table for method"); + } nr_lines = 0; } else if (nr_lines > 0) { line_file_names = malloc(sizeof(char*) * nr_lines); @@ -282,12 +277,6 @@ compiled_method_load_cb(jvmtiEnv *jvmti, } } - ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name); - if (ret != JVMTI_ERROR_NONE) { - print_error(jvmti, "GetSourceFileName", ret); - goto error; - } - ret = (*jvmti)->GetClassSignature(jvmti, decl_class, &class_sign, NULL); if (ret != JVMTI_ERROR_NONE) { @@ -302,8 +291,6 @@ compiled_method_load_cb(jvmtiEnv *jvmti, goto error; } - copy_class_filename(class_sign, file_name, fn, PATH_MAX); - /* * write source line info record if we have it */ @@ -323,7 +310,6 @@ error: (*jvmti)->Deallocate(jvmti, (unsigned char *)func_name); (*jvmti)->Deallocate(jvmti, (unsigned char *)func_sign); (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign); - (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name); free(line_tab); while (line_file_names && (nr_lines > 0)) { if (line_file_names[nr_lines - 1]) { diff --git a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json index bffb2d4a6420..fc4aa6c2ddc9 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json @@ -169,7 +169,7 @@ }, { "BriefDescription": "Cycles GCT empty where dispatch was held", - "MetricExpr": "(PM_GCT_NOSLOT_DISP_HELD_MAP + PM_GCT_NOSLOT_DISP_HELD_SRQ + PM_GCT_NOSLOT_DISP_HELD_ISSQ + PM_GCT_NOSLOT_DISP_HELD_OTHER) / PM_RUN_INST_CMPL)", + "MetricExpr": "(PM_GCT_NOSLOT_DISP_HELD_MAP + PM_GCT_NOSLOT_DISP_HELD_SRQ + PM_GCT_NOSLOT_DISP_HELD_ISSQ + PM_GCT_NOSLOT_DISP_HELD_OTHER) / PM_RUN_INST_CMPL", "MetricGroup": "cpi_breakdown", "MetricName": "gct_empty_disp_held_cpi" }, diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json index 811c2a8c1c9e..80816d6402e9 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json @@ -208,6 +208,84 @@ "MetricName": "fxu_stall_cpi" }, { + "BriefDescription": "Instruction Completion Table empty for this thread due to branch mispred", + "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ict_noslot_br_mpred_cpi" + }, + { + "BriefDescription": "Instruction Completion Table empty for this thread due to Icache Miss and branch mispred", + "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED_ICMISS/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ict_noslot_br_mpred_icmiss_cpi" + }, + { + "BriefDescription": "Instruction Completion Table other stalls", + "MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ict_noslot_cyc_other_cpi" + }, + { + "BriefDescription": "Cycles in which the NTC instruciton is held at dispatch for any reason", + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ict_noslot_disp_held_cpi" + }, + { + "BriefDescription": "Instruction Completion Table empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF", + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ict_noslot_disp_held_hb_full_cpi" + }, + { + "BriefDescription": "Instruction Completion Table empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full", + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_ISSQ/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ict_noslot_disp_held_issq_cpi" + }, + { + "BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI", + "MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ict_noslot_disp_held_other_cpi" + }, + { + "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch", + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_SYNC/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ict_noslot_disp_held_sync_cpi" + }, + { + "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch", + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ict_noslot_disp_held_tbegin_cpi" + }, + { + "BriefDescription": "ICT_NOSLOT_IC_L2_CPI", + "MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ict_noslot_ic_l2_cpi" + }, + { + "BriefDescription": "Instruction Completion Table empty for this thread due to icache misses that were sourced from the local L3", + "MetricExpr": "PM_ICT_NOSLOT_IC_L3/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ict_noslot_ic_l3_cpi" + }, + { + "BriefDescription": "Instruction Completion Table empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache", + "MetricExpr": "PM_ICT_NOSLOT_IC_L3MISS/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ict_noslot_ic_l3miss_cpi" + }, + { + "BriefDescription": "Instruction Completion Table empty for this thread due to Icache Miss", + "MetricExpr": "PM_ICT_NOSLOT_IC_MISS/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ict_noslot_ic_miss_cpi" + }, + { "MetricExpr": "(PM_NTC_ISSUE_HELD_DARQ_FULL + PM_NTC_ISSUE_HELD_ARB + PM_NTC_ISSUE_HELD_OTHER)/PM_RUN_INST_CMPL", "MetricGroup": "cpi_breakdown", "MetricName": "issue_hold_cpi" @@ -313,7 +391,7 @@ "MetricName": "nested_tend_stall_cpi" }, { - "BriefDescription": "Number of cycles the ICT has no itags assigned to this thread", + "BriefDescription": "Number of cycles the Instruction Completion Table has no itags assigned to this thread", "MetricExpr": "PM_ICT_NOSLOT_CYC/PM_RUN_INST_CMPL", "MetricGroup": "cpi_breakdown", "MetricName": "nothing_dispatched_cpi" @@ -362,7 +440,7 @@ }, { "BriefDescription": "Completion stall for other reasons", - "MetricExpr": "PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL", + "MetricExpr": "(PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL", "MetricGroup": "cpi_breakdown", "MetricName": "other_stall_cpi" }, @@ -425,7 +503,7 @@ "MetricName": "st_fwd_stall_cpi" }, { - "BriefDescription": "Nothing completed and ICT not empty", + "BriefDescription": "Nothing completed and Instruction Completion Table not empty", "MetricExpr": "PM_CMPLU_STALL/PM_RUN_INST_CMPL", "MetricGroup": "cpi_breakdown", "MetricName": "stall_cpi" @@ -1820,71 +1898,6 @@ "MetricName": "fxu_all_idle" }, { - "BriefDescription": "Ict empty for this thread due to branch mispred", - "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED/PM_RUN_INST_CMPL", - "MetricName": "ict_noslot_br_mpred_cpi" - }, - { - "BriefDescription": "Ict empty for this thread due to Icache Miss and branch mispred", - "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED_ICMISS/PM_RUN_INST_CMPL", - "MetricName": "ict_noslot_br_mpred_icmiss_cpi" - }, - { - "BriefDescription": "ICT other stalls", - "MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL", - "MetricName": "ict_noslot_cyc_other_cpi" - }, - { - "BriefDescription": "Cycles in which the NTC instruciton is held at dispatch for any reason", - "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD/PM_RUN_INST_CMPL", - "MetricName": "ict_noslot_disp_held_cpi" - }, - { - "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF", - "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL/PM_RUN_INST_CMPL", - "MetricName": "ict_noslot_disp_held_hb_full_cpi" - }, - { - "BriefDescription": "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full", - "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_ISSQ/PM_RUN_INST_CMPL", - "MetricName": "ict_noslot_disp_held_issq_cpi" - }, - { - "BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI", - "MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL", - "MetricName": "ict_noslot_disp_held_other_cpi" - }, - { - "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch", - "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_SYNC/PM_RUN_INST_CMPL", - "MetricName": "ict_noslot_disp_held_sync_cpi" - }, - { - "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch", - "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN/PM_RUN_INST_CMPL", - "MetricName": "ict_noslot_disp_held_tbegin_cpi" - }, - { - "BriefDescription": "ICT_NOSLOT_IC_L2_CPI", - "MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL", - "MetricName": "ict_noslot_ic_l2_cpi" - }, - { - "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from the local L3", - "MetricExpr": "PM_ICT_NOSLOT_IC_L3/PM_RUN_INST_CMPL", - "MetricName": "ict_noslot_ic_l3_cpi" - }, - { - "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache", - "MetricExpr": "PM_ICT_NOSLOT_IC_L3MISS/PM_RUN_INST_CMPL", - "MetricName": "ict_noslot_ic_l3miss_cpi" - }, - { - "BriefDescription": "Ict empty for this thread due to Icache Miss", - "MetricExpr": "PM_ICT_NOSLOT_IC_MISS/PM_RUN_INST_CMPL", - "MetricName": "ict_noslot_ic_miss_cpi" - }, - { "BriefDescription": "Rate of IERAT reloads from L2", "MetricExpr": "PM_IPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", "MetricName": "ipteg_from_l2_rate_percent" diff --git a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json new file mode 100644 index 000000000000..c121e526442a --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json @@ -0,0 +1,19 @@ +[ + { + "MetricExpr": "(hv_24x7@PM_MCS01_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_RD_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT23\\,chip\\=?@)", + "MetricName": "Memory_RD_BW_Chip", + "MetricGroup": "Memory_BW", + "ScaleUnit": "1.6e-2MB" + }, + { + "MetricExpr": "(hv_24x7@PM_MCS01_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_WR_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT23\\,chip\\=?@ )", + "MetricName": "Memory_WR_BW_Chip", + "MetricGroup": "Memory_BW", + "ScaleUnit": "1.6e-2MB" + }, + { + "MetricExpr": "(hv_24x7@PM_PB_CYC\\,chip\\=?@ )", + "MetricName": "PowerBUS_Frequency", + "ScaleUnit": "2.5e-7GHz" + } +] diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index 7fde0d2943cd..d25eebce34c9 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -328,31 +328,31 @@ }, { "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", - "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", + "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x35\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", "MetricGroup": "Memory_Lat", "MetricName": "DRAM_Read_Latency" }, { "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", - "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@", + "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x36\\,umask\\=0x21\\,thresh\\=1@", "MetricGroup": "Memory_BW", "MetricName": "DRAM_Parallel_Reads" }, { "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches", - "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\\\\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ ) if 1 if 0 == 1 else 0 else 0", + "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ )", "MetricGroup": "Memory_Lat", "MetricName": "MEM_PMM_Read_Latency" }, { "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]", - "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0", + "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time )", "MetricGroup": "Memory_BW", "MetricName": "PMM_Read_BW" }, { "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]", - "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0", + "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time )", "MetricGroup": "Memory_BW", "MetricName": "PMM_Write_BW" }, diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index b4f91137f40c..390bdab1be9d 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -328,13 +328,13 @@ }, { "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", - "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", + "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x35\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", "MetricGroup": "Memory_Lat", "MetricName": "DRAM_Read_Latency" }, { "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", - "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@", + "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x36\\,umask\\=0x21\\,thresh\\=1@", "MetricGroup": "Memory_BW", "MetricName": "DRAM_Parallel_Reads" }, diff --git a/tools/perf/pmu-events/jsmn.h b/tools/perf/pmu-events/jsmn.h index c7b0f6ea2a31..1bdfd55fff30 100644 --- a/tools/perf/pmu-events/jsmn.h +++ b/tools/perf/pmu-events/jsmn.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: MIT */ #ifndef __JSMN_H_ #define __JSMN_H_ diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 53e76d5d5b37..c8f306b572f4 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -26,7 +26,7 @@ struct pmu_event { * Map a CPU to its table of PMU events. The CPU is identified by the * cpuid field, which is an arch-specific identifier for the CPU. * The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile - * must match the get_cpustr() in tools/perf/arch/xxx/util/header.c) + * must match the get_cpuid_str() in tools/perf/arch/xxx/util/header.c) * * The cpuid can contain any character other than the comma. */ diff --git a/tools/perf/scripts/python/bin/flamegraph-record b/tools/perf/scripts/python/bin/flamegraph-record new file mode 100755 index 000000000000..7df5a19c0163 --- /dev/null +++ b/tools/perf/scripts/python/bin/flamegraph-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -g "$@" diff --git a/tools/perf/scripts/python/bin/flamegraph-report b/tools/perf/scripts/python/bin/flamegraph-report new file mode 100755 index 000000000000..53c5dc90c87e --- /dev/null +++ b/tools/perf/scripts/python/bin/flamegraph-report @@ -0,0 +1,3 @@ +#!/bin/bash +# description: create flame graphs +perf script -s "$PERF_EXEC_PATH"/scripts/python/flamegraph.py -- "$@" diff --git a/tools/perf/scripts/python/flamegraph.py b/tools/perf/scripts/python/flamegraph.py new file mode 100755 index 000000000000..61f3be9add6b --- /dev/null +++ b/tools/perf/scripts/python/flamegraph.py @@ -0,0 +1,124 @@ +# flamegraph.py - create flame graphs from perf samples +# SPDX-License-Identifier: GPL-2.0 +# +# Usage: +# +# perf record -a -g -F 99 sleep 60 +# perf script report flamegraph +# +# Combined: +# +# perf script flamegraph -a -F 99 sleep 60 +# +# Written by Andreas Gerstmayr <agerstmayr@redhat.com> +# Flame Graphs invented by Brendan Gregg <bgregg@netflix.com> +# Works in tandem with d3-flame-graph by Martin Spier <mspier@netflix.com> + +from __future__ import print_function +import sys +import os +import argparse +import json + + +class Node: + def __init__(self, name, libtype=""): + self.name = name + self.libtype = libtype + self.value = 0 + self.children = [] + + def toJSON(self): + return { + "n": self.name, + "l": self.libtype, + "v": self.value, + "c": self.children + } + + +class FlameGraphCLI: + def __init__(self, args): + self.args = args + self.stack = Node("root") + + if self.args.format == "html" and \ + not os.path.isfile(self.args.template): + print("Flame Graph template {} does not exist. Please install " + "the js-d3-flame-graph (RPM) or libjs-d3-flame-graph (deb) " + "package, specify an existing flame graph template " + "(--template PATH) or another output format " + "(--format FORMAT).".format(self.args.template), + file=sys.stderr) + sys.exit(1) + + def find_or_create_node(self, node, name, dso): + libtype = "kernel" if dso == "[kernel.kallsyms]" else "" + if name is None: + name = "[unknown]" + + for child in node.children: + if child.name == name and child.libtype == libtype: + return child + + child = Node(name, libtype) + node.children.append(child) + return child + + def process_event(self, event): + node = self.find_or_create_node(self.stack, event["comm"], None) + if "callchain" in event: + for entry in reversed(event['callchain']): + node = self.find_or_create_node( + node, entry.get("sym", {}).get("name"), event.get("dso")) + else: + node = self.find_or_create_node( + node, entry.get("symbol"), event.get("dso")) + node.value += 1 + + def trace_end(self): + json_str = json.dumps(self.stack, default=lambda x: x.toJSON()) + + if self.args.format == "html": + try: + with open(self.args.template) as f: + output_str = f.read().replace("/** @flamegraph_json **/", + json_str) + except IOError as e: + print("Error reading template file: {}".format(e), file=sys.stderr) + sys.exit(1) + output_fn = self.args.output or "flamegraph.html" + else: + output_str = json_str + output_fn = self.args.output or "stacks.json" + + if output_fn == "-": + sys.stdout.write(output_str) + else: + print("dumping data to {}".format(output_fn)) + try: + with open(output_fn, "w") as out: + out.write(output_str) + except IOError as e: + print("Error writing output file: {}".format(e), file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Create flame graphs.") + parser.add_argument("-f", "--format", + default="html", choices=["json", "html"], + help="output file format") + parser.add_argument("-o", "--output", + help="output file name") + parser.add_argument("--template", + default="/usr/share/d3-flame-graph/d3-flamegraph-base.html", + help="path to flamegraph HTML template") + parser.add_argument("-i", "--input", + help=argparse.SUPPRESS) + + args = parser.parse_args() + cli = FlameGraphCLI(args) + + process_event = cli.process_event + trace_end = cli.trace_end diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index b3d1bf13ca07..cd00498a5dce 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -56,6 +56,9 @@ perf-y += mem2node.o perf-y += maps.o perf-y += time-utils-test.o perf-y += genelf.o +perf-y += api-io.o +perf-y += demangle-java-test.o +perf-y += pfm.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/api-io.c b/tools/perf/tests/api-io.c new file mode 100644 index 000000000000..2ada86ad6084 --- /dev/null +++ b/tools/perf/tests/api-io.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "debug.h" +#include "tests.h" +#include <api/io.h> +#include <linux/kernel.h> + +#define TEMPL "/tmp/perf-test-XXXXXX" + +#define EXPECT_EQUAL(val, expected) \ +do { \ + if (val != expected) { \ + pr_debug("%s:%d: %d != %d\n", \ + __FILE__, __LINE__, val, expected); \ + ret = -1; \ + } \ +} while (0) + +#define EXPECT_EQUAL64(val, expected) \ +do { \ + if (val != expected) { \ + pr_debug("%s:%d: %lld != %lld\n", \ + __FILE__, __LINE__, val, expected); \ + ret = -1; \ + } \ +} while (0) + +static int make_test_file(char path[PATH_MAX], const char *contents) +{ + ssize_t contents_len = strlen(contents); + int fd; + + strcpy(path, TEMPL); + fd = mkstemp(path); + if (fd < 0) { + pr_debug("mkstemp failed"); + return -1; + } + if (write(fd, contents, contents_len) < contents_len) { + pr_debug("short write"); + close(fd); + unlink(path); + return -1; + } + close(fd); + return 0; +} + +static int setup_test(char path[PATH_MAX], const char *contents, + size_t buf_size, struct io *io) +{ + if (make_test_file(path, contents)) + return -1; + + io->fd = open(path, O_RDONLY); + if (io->fd < 0) { + pr_debug("Failed to open '%s'\n", path); + unlink(path); + return -1; + } + io->buf = malloc(buf_size); + if (io->buf == NULL) { + pr_debug("Failed to allocate memory"); + close(io->fd); + unlink(path); + return -1; + } + io__init(io, io->fd, io->buf, buf_size); + return 0; +} + +static void cleanup_test(char path[PATH_MAX], struct io *io) +{ + free(io->buf); + close(io->fd); + unlink(path); +} + +static int do_test_get_char(const char *test_string, size_t buf_size) +{ + char path[PATH_MAX]; + struct io io; + int ch, ret = 0; + size_t i; + + if (setup_test(path, test_string, buf_size, &io)) + return -1; + + for (i = 0; i < strlen(test_string); i++) { + ch = io__get_char(&io); + + EXPECT_EQUAL(ch, test_string[i]); + EXPECT_EQUAL(io.eof, false); + } + ch = io__get_char(&io); + EXPECT_EQUAL(ch, -1); + EXPECT_EQUAL(io.eof, true); + + cleanup_test(path, &io); + return ret; +} + +static int test_get_char(void) +{ + int i, ret = 0; + size_t j; + + static const char *const test_strings[] = { + "12345678abcdef90", + "a\nb\nc\nd\n", + "\a\b\t\v\f\r", + }; + for (i = 0; i <= 10; i++) { + for (j = 0; j < ARRAY_SIZE(test_strings); j++) { + if (do_test_get_char(test_strings[j], 1 << i)) + ret = -1; + } + } + return ret; +} + +static int do_test_get_hex(const char *test_string, + __u64 val1, int ch1, + __u64 val2, int ch2, + __u64 val3, int ch3, + bool end_eof) +{ + char path[PATH_MAX]; + struct io io; + int ch, ret = 0; + __u64 hex; + + if (setup_test(path, test_string, 4, &io)) + return -1; + + ch = io__get_hex(&io, &hex); + EXPECT_EQUAL64(hex, val1); + EXPECT_EQUAL(ch, ch1); + + ch = io__get_hex(&io, &hex); + EXPECT_EQUAL64(hex, val2); + EXPECT_EQUAL(ch, ch2); + + ch = io__get_hex(&io, &hex); + EXPECT_EQUAL64(hex, val3); + EXPECT_EQUAL(ch, ch3); + + EXPECT_EQUAL(io.eof, end_eof); + + cleanup_test(path, &io); + return ret; +} + +static int test_get_hex(void) +{ + int ret = 0; + + if (do_test_get_hex("12345678abcdef90", + 0x12345678abcdef90, -1, + 0, -1, + 0, -1, + true)) + ret = -1; + + if (do_test_get_hex("1\n2\n3\n", + 1, '\n', + 2, '\n', + 3, '\n', + false)) + ret = -1; + + if (do_test_get_hex("12345678ABCDEF90;a;b", + 0x12345678abcdef90, ';', + 0xa, ';', + 0xb, -1, + true)) + ret = -1; + + if (do_test_get_hex("0x1x2x", + 0, 'x', + 1, 'x', + 2, 'x', + false)) + ret = -1; + + if (do_test_get_hex("x1x", + 0, -2, + 1, 'x', + 0, -1, + true)) + ret = -1; + + if (do_test_get_hex("10000000000000000000000000000abcdefgh99i", + 0xabcdef, 'g', + 0, -2, + 0x99, 'i', + false)) + ret = -1; + + return ret; +} + +static int do_test_get_dec(const char *test_string, + __u64 val1, int ch1, + __u64 val2, int ch2, + __u64 val3, int ch3, + bool end_eof) +{ + char path[PATH_MAX]; + struct io io; + int ch, ret = 0; + __u64 dec; + + if (setup_test(path, test_string, 4, &io)) + return -1; + + ch = io__get_dec(&io, &dec); + EXPECT_EQUAL64(dec, val1); + EXPECT_EQUAL(ch, ch1); + + ch = io__get_dec(&io, &dec); + EXPECT_EQUAL64(dec, val2); + EXPECT_EQUAL(ch, ch2); + + ch = io__get_dec(&io, &dec); + EXPECT_EQUAL64(dec, val3); + EXPECT_EQUAL(ch, ch3); + + EXPECT_EQUAL(io.eof, end_eof); + + cleanup_test(path, &io); + return ret; +} + +static int test_get_dec(void) +{ + int ret = 0; + + if (do_test_get_dec("12345678abcdef90", + 12345678, 'a', + 0, -2, + 0, -2, + false)) + ret = -1; + + if (do_test_get_dec("1\n2\n3\n", + 1, '\n', + 2, '\n', + 3, '\n', + false)) + ret = -1; + + if (do_test_get_dec("12345678;1;2", + 12345678, ';', + 1, ';', + 2, -1, + true)) + ret = -1; + + if (do_test_get_dec("0x1x2x", + 0, 'x', + 1, 'x', + 2, 'x', + false)) + ret = -1; + + if (do_test_get_dec("x1x", + 0, -2, + 1, 'x', + 0, -1, + true)) + ret = -1; + + if (do_test_get_dec("10000000000000000000000000000000000000000000000000000000000123456789ab99c", + 123456789, 'a', + 0, -2, + 99, 'c', + false)) + ret = -1; + + return ret; +} + +int test__api_io(struct test *test __maybe_unused, + int subtest __maybe_unused) +{ + int ret = 0; + + if (test_get_char()) + ret = TEST_FAIL; + if (test_get_hex()) + ret = TEST_FAIL; + if (test_get_dec()) + ret = TEST_FAIL; + return ret; +} diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/attr/system-wide-dummy new file mode 100644 index 000000000000..eba723cc0d38 --- /dev/null +++ b/tools/perf/tests/attr/system-wide-dummy @@ -0,0 +1,50 @@ +# Event added by system-wide or CPU perf-record to handle the race of +# processes starting while /proc is processed. +[event] +fd=1 +group_fd=-1 +cpu=* +pid=-1 +flags=8 +type=1 +size=120 +config=9 +sample_period=4000 +sample_type=455 +read_format=4 +# Event will be enabled right away. +disabled=0 +inherit=1 +pinned=0 +exclusive=0 +exclude_user=0 +exclude_kernel=0 +exclude_hv=0 +exclude_idle=0 +mmap=1 +comm=1 +freq=1 +inherit_stat=0 +enable_on_exec=0 +task=1 +watermark=0 +precise_ip=0 +mmap_data=0 +sample_id_all=1 +exclude_host=0 +exclude_guest=0 +exclude_callchain_kernel=0 +exclude_callchain_user=0 +mmap2=1 +comm_exec=1 +context_switch=0 +write_backward=0 +namespaces=0 +use_clockid=0 +wakeup_events=0 +bp_type=0 +config1=0 +config2=0 +branch_sample_type=0 +sample_regs_user=0 +sample_stack_user=0 diff --git a/tools/perf/tests/attr/test-record-C0 b/tools/perf/tests/attr/test-record-C0 index 93818054ae20..317730b906dd 100644 --- a/tools/perf/tests/attr/test-record-C0 +++ b/tools/perf/tests/attr/test-record-C0 @@ -9,6 +9,14 @@ cpu=0 # no enable on exec for CPU attached enable_on_exec=0 -# PERF_SAMPLE_IP | PERF_SAMPLE_TID PERF_SAMPLE_TIME | # PERF_SAMPLE_PERIOD +# PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | +# PERF_SAMPLE_ID | PERF_SAMPLE_PERIOD # + PERF_SAMPLE_CPU added by -C 0 -sample_type=391 +sample_type=455 + +# Dummy event handles mmaps, comm and task. +mmap=0 +comm=0 +task=0 + +[event:system-wide-dummy] diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index b6322eb0f423..da5b6cc23f25 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -75,6 +75,13 @@ static struct test generic_tests[] = { { .desc = "PMU events", .func = test__pmu_events, + .subtest = { + .skip_if_fail = false, + .get_nr = test__pmu_events_subtest_get_nr, + .get_desc = test__pmu_events_subtest_get_desc, + .skip_reason = test__pmu_events_subtest_skip_reason, + }, + }, { .desc = "DSO data read", @@ -310,10 +317,27 @@ static struct test generic_tests[] = { .func = test__jit_write_elf, }, { + .desc = "Test libpfm4 support", + .func = test__pfm, + .subtest = { + .skip_if_fail = true, + .get_nr = test__pfm_subtest_get_nr, + .get_desc = test__pfm_subtest_get_desc, + } + }, + { + .desc = "Test api io", + .func = test__api_io, + }, + { .desc = "maps__merge_in", .func = test__maps__merge_in, }, { + .desc = "Demangle Java", + .func = test__demangle_java, + }, + { .func = NULL, }, }; @@ -323,7 +347,7 @@ static struct test *tests[] = { arch_tests, }; -static bool perf_test__matches(struct test *test, int curr, int argc, const char *argv[]) +static bool perf_test__matches(const char *desc, int curr, int argc, const char *argv[]) { int i; @@ -340,7 +364,7 @@ static bool perf_test__matches(struct test *test, int curr, int argc, const char continue; } - if (strcasestr(test->desc, argv[i])) + if (strcasestr(desc, argv[i])) return true; } @@ -425,8 +449,15 @@ static int test_and_print(struct test *t, bool force_skip, int subtest) case TEST_OK: pr_info(" Ok\n"); break; - case TEST_SKIP: - color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n"); + case TEST_SKIP: { + const char *skip_reason = NULL; + if (t->subtest.skip_reason) + skip_reason = t->subtest.skip_reason(subtest); + if (skip_reason) + color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (%s)\n", skip_reason); + else + color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n"); + } break; case TEST_FAIL: default: @@ -562,7 +593,7 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width) .priv = &st, }; - if (!perf_test__matches(&test, curr, argc, argv)) + if (!perf_test__matches(test.desc, curr, argc, argv)) continue; st.file = ent->d_name; @@ -590,9 +621,25 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) for_each_test(j, t) { int curr = i++, err; + int subi; - if (!perf_test__matches(t, curr, argc, argv)) - continue; + if (!perf_test__matches(t->desc, curr, argc, argv)) { + bool skip = true; + int subn; + + if (!t->subtest.get_nr) + continue; + + subn = t->subtest.get_nr(); + + for (subi = 0; subi < subn; subi++) { + if (perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv)) + skip = false; + } + + if (skip) + continue; + } if (t->is_supported && !t->is_supported()) { pr_debug("%2d: %-*s: Disabled\n", i, width, t->desc); @@ -620,7 +667,6 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) */ int subw = width > 2 ? width - 2 : width; bool skip = false; - int subi; if (subn <= 0) { color_fprintf(stderr, PERF_COLOR_YELLOW, @@ -637,6 +683,9 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) } for (subi = 0; subi < subn; subi++) { + if (!perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv)) + continue; + pr_info("%2d.%1d: %-*s:", i, subi + 1, subw, t->subtest.get_desc(subi)); err = test_and_print(t, skip, subi); @@ -670,7 +719,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i) .desc = shell_test__description(bf, sizeof(bf), path, ent->d_name), }; - if (!perf_test__matches(&t, curr, argc, argv)) + if (!perf_test__matches(t.desc, curr, argc, argv)) continue; pr_info("%2d: %s\n", i, t.desc); @@ -689,7 +738,7 @@ static int perf_test__list(int argc, const char **argv) for_each_test(j, t) { int curr = i++; - if (!perf_test__matches(t, curr, argc, argv) || + if (!perf_test__matches(t->desc, curr, argc, argv) || (t->is_supported && !t->is_supported())) continue; diff --git a/tools/perf/tests/demangle-java-test.c b/tools/perf/tests/demangle-java-test.c new file mode 100644 index 000000000000..8f3b90832fb0 --- /dev/null +++ b/tools/perf/tests/demangle-java-test.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include "tests.h" +#include "session.h" +#include "debug.h" +#include "demangle-java.h" + +int test__demangle_java(struct test *test __maybe_unused, int subtest __maybe_unused) +{ + int ret = TEST_OK; + char *buf = NULL; + size_t i; + + struct { + const char *mangled, *demangled; + } test_cases[] = { + { "Ljava/lang/StringLatin1;equals([B[B)Z", + "boolean java.lang.StringLatin1.equals(byte[], byte[])" }, + { "Ljava/util/zip/ZipUtils;CENSIZ([BI)J", + "long java.util.zip.ZipUtils.CENSIZ(byte[], int)" }, + { "Ljava/util/regex/Pattern$BmpCharProperty;match(Ljava/util/regex/Matcher;ILjava/lang/CharSequence;)Z", + "boolean java.util.regex.Pattern$BmpCharProperty.match(java.util.regex.Matcher, int, java.lang.CharSequence)" }, + { "Ljava/lang/AbstractStringBuilder;appendChars(Ljava/lang/String;II)V", + "void java.lang.AbstractStringBuilder.appendChars(java.lang.String, int, int)" }, + { "Ljava/lang/Object;<init>()V", + "void java.lang.Object<init>()" }, + }; + + for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) { + buf = java_demangle_sym(test_cases[i].mangled, 0); + if (strcmp(buf, test_cases[i].demangled)) { + pr_debug("FAILED: %s: %s != %s\n", test_cases[i].mangled, + buf, test_cases[i].demangled); + ret = TEST_FAIL; + } + free(buf); + } + + return ret; +} diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 779ce280a0e9..2491d167bf76 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -37,6 +37,7 @@ static int init_live_machine(struct machine *machine) union perf_event event; pid_t pid = getpid(); + memset(&event, 0, sizeof(event)); return perf_event__synthesize_mmap_events(NULL, &event, pid, pid, mmap_handler, machine, true); } @@ -94,7 +95,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) return strcmp((const char *) symbol, funcs[idx]); } -noinline int test_dwarf_unwind__thread(struct thread *thread) +__no_tail_call noinline int test_dwarf_unwind__thread(struct thread *thread) { struct perf_sample sample; unsigned long cnt = 0; @@ -125,7 +126,7 @@ noinline int test_dwarf_unwind__thread(struct thread *thread) static int global_unwind_retval = -INT_MAX; -noinline int test_dwarf_unwind__compare(void *p1, void *p2) +__no_tail_call noinline int test_dwarf_unwind__compare(void *p1, void *p2) { /* Any possible value should be 'thread' */ struct thread *thread = *(struct thread **)p1; @@ -144,7 +145,7 @@ noinline int test_dwarf_unwind__compare(void *p1, void *p2) return p1 - p2; } -noinline int test_dwarf_unwind__krava_3(struct thread *thread) +__no_tail_call noinline int test_dwarf_unwind__krava_3(struct thread *thread) { struct thread *array[2] = {thread, thread}; void *fp = &bsearch; @@ -163,12 +164,12 @@ noinline int test_dwarf_unwind__krava_3(struct thread *thread) return global_unwind_retval; } -noinline int test_dwarf_unwind__krava_2(struct thread *thread) +__no_tail_call noinline int test_dwarf_unwind__krava_2(struct thread *thread) { return test_dwarf_unwind__krava_3(thread); } -noinline int test_dwarf_unwind__krava_1(struct thread *thread) +__no_tail_call noinline int test_dwarf_unwind__krava_1(struct thread *thread) { return test_dwarf_unwind__krava_2(thread); } diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index 1e8a9f5c356d..db68894a6f40 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -72,7 +72,7 @@ static int attach__current_disabled(struct evlist *evlist) evsel->core.attr.disabled = 1; - err = perf_evsel__open_per_thread(evsel, threads); + err = evsel__open_per_thread(evsel, threads); if (err) { pr_debug("Failed to open event cpu-clock:u\n"); return err; @@ -96,7 +96,7 @@ static int attach__current_enabled(struct evlist *evlist) return -1; } - err = perf_evsel__open_per_thread(evsel, threads); + err = evsel__open_per_thread(evsel, threads); perf_thread_map__put(threads); return err == 0 ? TEST_OK : TEST_FAIL; @@ -125,7 +125,7 @@ static int attach__cpu_disabled(struct evlist *evlist) evsel->core.attr.disabled = 1; - err = perf_evsel__open_per_cpu(evsel, cpus, -1); + err = evsel__open_per_cpu(evsel, cpus, -1); if (err) { if (err == -EACCES) return TEST_SKIP; @@ -152,7 +152,7 @@ static int attach__cpu_enabled(struct evlist *evlist) return -1; } - err = perf_evsel__open_per_cpu(evsel, cpus, -1); + err = evsel__open_per_cpu(evsel, cpus, -1); if (err == -EACCES) return TEST_SKIP; diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index c727379cf20e..bdcf032f8516 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -109,7 +109,7 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu TEST_ASSERT_VAL("failed to synthesize attr update scale", !perf_event__synthesize_event_update_scale(NULL, evsel, process_event_scale)); - tmp.name = perf_evsel__name(evsel); + tmp.name = evsel__name(evsel); TEST_ASSERT_VAL("failed to synthesize attr update name", !perf_event__synthesize_event_update_name(&tmp.tool, evsel, process_event_name)); diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c index 956205bf9326..f7f3e5b4c180 100644 --- a/tools/perf/tests/evsel-roundtrip-name.c +++ b/tools/perf/tests/evsel-roundtrip-name.c @@ -20,12 +20,11 @@ static int perf_evsel__roundtrip_cache_name_test(void) for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { /* skip invalid cache type */ - if (!perf_evsel__is_cache_op_valid(type, op)) + if (!evsel__is_cache_op_valid(type, op)) continue; for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - __perf_evsel__hw_cache_type_op_res_name(type, op, i, - name, sizeof(name)); + __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); err = parse_events(evlist, name, NULL); if (err) ret = err; @@ -39,23 +38,22 @@ static int perf_evsel__roundtrip_cache_name_test(void) for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { /* skip invalid cache type */ - if (!perf_evsel__is_cache_op_valid(type, op)) + if (!evsel__is_cache_op_valid(type, op)) continue; for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - __perf_evsel__hw_cache_type_op_res_name(type, op, i, - name, sizeof(name)); + __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); if (evsel->idx != idx) continue; ++idx; - if (strcmp(perf_evsel__name(evsel), name)) { - pr_debug("%s != %s\n", perf_evsel__name(evsel), name); + if (strcmp(evsel__name(evsel), name)) { + pr_debug("%s != %s\n", evsel__name(evsel), name); ret = -1; } - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); } } } @@ -84,9 +82,9 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names) err = 0; evlist__for_each_entry(evlist, evsel) { - if (strcmp(perf_evsel__name(evsel), names[evsel->idx])) { + if (strcmp(evsel__name(evsel), names[evsel->idx])) { --err; - pr_debug("%s != %s\n", perf_evsel__name(evsel), names[evsel->idx]); + pr_debug("%s != %s\n", evsel__name(evsel), names[evsel->idx]); } } @@ -102,12 +100,11 @@ int test__perf_evsel__roundtrip_name_test(struct test *test __maybe_unused, int { int err = 0, ret = 0; - err = perf_evsel__name_array_test(perf_evsel__hw_names); + err = perf_evsel__name_array_test(evsel__hw_names); if (err) ret = err; - err = __perf_evsel__name_array_test(perf_evsel__sw_names, - PERF_COUNT_SW_DUMMY + 1); + err = __perf_evsel__name_array_test(evsel__sw_names, PERF_COUNT_SW_DUMMY + 1); if (err) ret = err; diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index 261e6eaaee99..0e224a0a55d9 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -8,7 +8,7 @@ static int perf_evsel__test_field(struct evsel *evsel, const char *name, int size, bool should_be_signed) { - struct tep_format_field *field = perf_evsel__field(evsel, name); + struct tep_format_field *field = evsel__field(evsel, name); int is_signed; int ret = 0; @@ -35,11 +35,11 @@ static int perf_evsel__test_field(struct evsel *evsel, const char *name, int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtest __maybe_unused) { - struct evsel *evsel = perf_evsel__newtp("sched", "sched_switch"); + struct evsel *evsel = evsel__newtp("sched", "sched_switch"); int ret = 0; if (IS_ERR(evsel)) { - pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel)); + pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel)); return -1; } @@ -66,10 +66,10 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes evsel__delete(evsel); - evsel = perf_evsel__newtp("sched", "sched_wakeup"); + evsel = evsel__newtp("sched", "sched_wakeup"); if (IS_ERR(evsel)) { - pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel)); + pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel)); return -1; } diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 28313e59d6f6..1cb02ca2b15f 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -6,11 +6,11 @@ #include <string.h> #include <linux/zalloc.h> -static int test(struct parse_ctx *ctx, const char *e, double val2) +static int test(struct expr_parse_ctx *ctx, const char *e, double val2) { double val; - if (expr__parse(&val, ctx, e)) + if (expr__parse(&val, ctx, e, 1)) TEST_ASSERT_VAL("parse test failed", 0); TEST_ASSERT_VAL("unexpected value", val == val2); return 0; @@ -19,15 +19,13 @@ static int test(struct parse_ctx *ctx, const char *e, double val2) int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) { const char *p; - const char **other; - double val; - int i, ret; - struct parse_ctx ctx; - int num_other; + double val, *val_ptr; + int ret; + struct expr_parse_ctx ctx; expr__ctx_init(&ctx); - expr__add_id(&ctx, "FOO", 1); - expr__add_id(&ctx, "BAR", 2); + expr__add_id(&ctx, strdup("FOO"), 1); + expr__add_id(&ctx, strdup("BAR"), 2); ret = test(&ctx, "1+1", 2); ret |= test(&ctx, "FOO+BAR", 3); @@ -39,29 +37,43 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) ret |= test(&ctx, "min(1,2) + 1", 2); ret |= test(&ctx, "max(1,2) + 1", 3); ret |= test(&ctx, "1+1 if 3*4 else 0", 2); + ret |= test(&ctx, "1.1 + 2.1", 3.2); + ret |= test(&ctx, ".1 + 2.", 2.1); if (ret) return ret; p = "FOO/0"; - ret = expr__parse(&val, &ctx, p); + ret = expr__parse(&val, &ctx, p, 1); TEST_ASSERT_VAL("division by zero", ret == -1); p = "BAR/"; - ret = expr__parse(&val, &ctx, p); + ret = expr__parse(&val, &ctx, p, 1); TEST_ASSERT_VAL("missing operand", ret == -1); + expr__ctx_clear(&ctx); + TEST_ASSERT_VAL("find other", + expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", + &ctx, 1) == 0); + TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 3); + TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAR", + (void **)&val_ptr)); + TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAZ", + (void **)&val_ptr)); + TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BOZO", + (void **)&val_ptr)); + + expr__ctx_clear(&ctx); TEST_ASSERT_VAL("find other", - expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0); - TEST_ASSERT_VAL("find other", num_other == 3); - TEST_ASSERT_VAL("find other", !strcmp(other[0], "BAR")); - TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ")); - TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO")); - TEST_ASSERT_VAL("find other", other[3] == NULL); + expr__find_other("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", + NULL, &ctx, 3) == 0); + TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 2); + TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT1,param=3/", + (void **)&val_ptr)); + TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT2,param=3/", + (void **)&val_ptr)); - for (i = 0; i < num_other; i++) - zfree(&other[i]); - free((void *)other); + expr__ctx_clear(&ctx); return 0; } diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 6367c8f6ca22..3f2e1a581247 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -190,7 +190,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec * function since TEST_ASSERT_VAL() returns in case of failure. */ hists__collapse_resort(hists, NULL); - perf_evsel__output_resort(hists_to_evsel(hists), NULL); + evsel__output_resort(hists_to_evsel(hists), NULL); if (verbose > 2) { pr_info("use callchain: %d, cumulate callchain: %d\n", @@ -280,7 +280,7 @@ static int test1(struct evsel *evsel, struct machine *machine) symbol_conf.use_callchain = false; symbol_conf.cumulate_callchain = false; - perf_evsel__reset_sample_bit(evsel, CALLCHAIN); + evsel__reset_sample_bit(evsel, CALLCHAIN); setup_sorting(NULL); callchain_register_param(&callchain_param); @@ -427,7 +427,7 @@ static int test2(struct evsel *evsel, struct machine *machine) symbol_conf.use_callchain = true; symbol_conf.cumulate_callchain = false; - perf_evsel__set_sample_bit(evsel, CALLCHAIN); + evsel__set_sample_bit(evsel, CALLCHAIN); setup_sorting(NULL); callchain_register_param(&callchain_param); @@ -485,7 +485,7 @@ static int test3(struct evsel *evsel, struct machine *machine) symbol_conf.use_callchain = false; symbol_conf.cumulate_callchain = true; - perf_evsel__reset_sample_bit(evsel, CALLCHAIN); + evsel__reset_sample_bit(evsel, CALLCHAIN); setup_sorting(NULL); callchain_register_param(&callchain_param); @@ -669,7 +669,7 @@ static int test4(struct evsel *evsel, struct machine *machine) symbol_conf.use_callchain = true; symbol_conf.cumulate_callchain = true; - perf_evsel__set_sample_bit(evsel, CALLCHAIN); + evsel__set_sample_bit(evsel, CALLCHAIN); setup_sorting(NULL); diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 618b51ffcc01..123e07d35b55 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -142,7 +142,7 @@ int test__hists_filter(struct test *test __maybe_unused, int subtest __maybe_unu struct hists *hists = evsel__hists(evsel); hists__collapse_resort(hists, NULL); - perf_evsel__output_resort(evsel, NULL); + evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("Normal histogram\n"); diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index 38f804ff6452..8973f35df604 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -155,7 +155,7 @@ static int test1(struct evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - perf_evsel__output_resort(evsel, NULL); + evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -255,7 +255,7 @@ static int test2(struct evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - perf_evsel__output_resort(evsel, NULL); + evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -309,7 +309,7 @@ static int test3(struct evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - perf_evsel__output_resort(evsel, NULL); + evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -387,7 +387,7 @@ static int test4(struct evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - perf_evsel__output_resort(evsel, NULL); + evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -490,7 +490,7 @@ static int test5(struct evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - perf_evsel__output_resort(evsel, NULL); + evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 5d0c3a9c47a1..9b651dfe0a6b 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -84,10 +84,13 @@ make_no_libaudit := NO_LIBAUDIT=1 make_no_libbionic := NO_LIBBIONIC=1 make_no_auxtrace := NO_AUXTRACE=1 make_no_libbpf := NO_LIBBPF=1 +make_no_libbpf_DEBUG := NO_LIBBPF=1 DEBUG=1 make_no_libcrypto := NO_LIBCRYPTO=1 make_with_babeltrace:= LIBBABELTRACE=1 make_no_sdt := NO_SDT=1 +make_no_syscall_tbl := NO_SYSCALL_TABLE=1 make_with_clangllvm := LIBCLANGLLVM=1 +make_with_libpfm4 := LIBPFM4=1 make_tags := tags make_cscope := cscope make_help := help @@ -112,7 +115,7 @@ make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1 -make_minimal += NO_LIBCAP=1 +make_minimal += NO_LIBCAP=1 NO_SYSCALL_TABLE=1 # $(run) contains all available tests run := make_pure @@ -144,8 +147,13 @@ run += make_no_libaudit run += make_no_libbionic run += make_no_auxtrace run += make_no_libbpf +run += make_no_libbpf_DEBUG +run += make_no_libcrypto +run += make_no_sdt +run += make_no_syscall_tbl run += make_with_babeltrace run += make_with_clangllvm +run += make_with_libpfm4 run += make_help run += make_doc run += make_perf_o diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 5f4c0dbb4715..7b0dbfc0e17d 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -79,14 +79,14 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse char name[64]; snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]); - evsels[i] = perf_evsel__newtp("syscalls", name); + evsels[i] = evsel__newtp("syscalls", name); if (IS_ERR(evsels[i])) { - pr_debug("perf_evsel__new(%s)\n", name); + pr_debug("evsel__new(%s)\n", name); goto out_delete_evlist; } evsels[i]->core.attr.wakeup_events = 1; - perf_evsel__set_sample_id(evsels[i], false); + evsel__set_sample_id(evsels[i], false); evlist__add(evlist, evsels[i]); @@ -150,7 +150,7 @@ out_init: if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) { pr_debug("expected %d %s events, got %d\n", expected_nr_events[evsel->idx], - perf_evsel__name(evsel), nr_events[evsel->idx]); + evsel__name(evsel), nr_events[evsel->idx]); err = -1; goto out_delete_evlist; } diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 93c176523e38..71f85e2cc127 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -44,7 +44,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int CPU_ZERO(&cpu_set); - evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); + evsel = evsel__newtp("syscalls", "sys_enter_openat"); if (IS_ERR(evsel)) { tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat"); pr_debug("%s\n", errbuf); @@ -90,8 +90,8 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int * we use the auto allocation it will allocate just for 1 cpu, * as we start by cpu 0. */ - if (perf_evsel__alloc_counts(evsel, cpus->nr, 1) < 0) { - pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr); + if (evsel__alloc_counts(evsel, cpus->nr, 1) < 0) { + pr_debug("evsel__alloc_counts(ncpus=%d)\n", cpus->nr); goto out_close_fd; } @@ -103,21 +103,21 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int if (cpus->map[cpu] >= CPU_SETSIZE) continue; - if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) { - pr_debug("perf_evsel__read_on_cpu\n"); + if (evsel__read_on_cpu(evsel, cpu, 0) < 0) { + pr_debug("evsel__read_on_cpu\n"); err = -1; break; } expected = nr_openat_calls + cpu; if (perf_counts(evsel->counts, cpu, 0)->val != expected) { - pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", + pr_debug("evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val); err = -1; } } - perf_evsel__free_counts(evsel); + evsel__free_counts(evsel); out_close_fd: perf_evsel__close_fd(&evsel->core); out_evsel_delete: diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index c6b2d7aab608..1f5f5e79ae25 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -46,9 +46,9 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest goto out; } - evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); + evsel = evsel__newtp("syscalls", "sys_enter_openat"); if (IS_ERR(evsel)) { - pr_debug("%s: perf_evsel__newtp\n", __func__); + pr_debug("%s: evsel__newtp\n", __func__); goto out_delete_evlist; } @@ -60,7 +60,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest goto out_delete_evlist; } - perf_evsel__config(evsel, &opts, NULL); + evsel__config(evsel, &opts, NULL); perf_thread_map__set_pid(evlist->core.threads, 0, getpid()); @@ -108,13 +108,13 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest continue; } - err = perf_evsel__parse_sample(evsel, event, &sample); + err = evsel__parse_sample(evsel, event, &sample); if (err) { pr_debug("Can't parse sample, err = %d\n", err); goto out_delete_evlist; } - tp_flags = perf_evsel__intval(evsel, &sample, "flags"); + tp_flags = evsel__intval(evsel, &sample, "flags"); if (flags != tp_flags) { pr_debug("%s: Expected flags=%#x, got %#x\n", diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index 5ebffae18605..85a8f0fe7aea 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -27,14 +27,14 @@ int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __m return -1; } - evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); + evsel = evsel__newtp("syscalls", "sys_enter_openat"); if (IS_ERR(evsel)) { tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat"); pr_debug("%s\n", errbuf); goto out_thread_map_delete; } - if (perf_evsel__open_per_thread(evsel, threads) < 0) { + if (evsel__open_per_thread(evsel, threads) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", str_error_r(errno, sbuf, sizeof(sbuf))); @@ -46,13 +46,13 @@ int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __m close(fd); } - if (perf_evsel__read_on_cpu(evsel, 0, 0) < 0) { - pr_debug("perf_evsel__read_on_cpu\n"); + if (evsel__read_on_cpu(evsel, 0, 0) < 0) { + pr_debug("evsel__read_on_cpu\n"); goto out_close_fd; } if (perf_counts(evsel->counts, 0, 0)->val != nr_openat_calls) { - pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n", + pr_debug("evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n", nr_openat_calls, perf_counts(evsel->counts, 0, 0)->val); goto out_close_fd; } diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 091c3aeccc27..895188b63f96 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -371,7 +371,7 @@ static int test__checkevent_breakpoint_modifier(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0:u")); + !strcmp(evsel__name(evsel), "mem:0:u")); return test__checkevent_breakpoint(evlist); } @@ -385,7 +385,7 @@ static int test__checkevent_breakpoint_x_modifier(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0:x:k")); + !strcmp(evsel__name(evsel), "mem:0:x:k")); return test__checkevent_breakpoint_x(evlist); } @@ -399,7 +399,7 @@ static int test__checkevent_breakpoint_r_modifier(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0:r:hp")); + !strcmp(evsel__name(evsel), "mem:0:r:hp")); return test__checkevent_breakpoint_r(evlist); } @@ -413,7 +413,7 @@ static int test__checkevent_breakpoint_w_modifier(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0:w:up")); + !strcmp(evsel__name(evsel), "mem:0:w:up")); return test__checkevent_breakpoint_w(evlist); } @@ -427,7 +427,7 @@ static int test__checkevent_breakpoint_rw_modifier(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0:rw:kp")); + !strcmp(evsel__name(evsel), "mem:0:rw:kp")); return test__checkevent_breakpoint_rw(evlist); } @@ -468,7 +468,7 @@ static int test__checkevent_list(struct evlist *evlist) TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); /* syscalls:sys_enter_openat:k */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->core.attr.type); TEST_ASSERT_VAL("wrong sample_type", PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type); @@ -479,7 +479,7 @@ static int test__checkevent_list(struct evlist *evlist) TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); /* 1:1:hp */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); @@ -498,15 +498,15 @@ static int test__checkevent_pmu_name(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config); - TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "krava")); + TEST_ASSERT_VAL("wrong name", !strcmp(evsel__name(evsel), "krava")); /* cpu/config=2/u" */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 2 == evsel->core.attr.config); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "cpu/config=2/u")); + !strcmp(evsel__name(evsel), "cpu/config=2/u")); return 0; } @@ -529,7 +529,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist) TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->core.attr.sample_type)); /* cpu/config=2,call-graph=no,time=0,period=2000/ */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 2 == evsel->core.attr.config); /* @@ -577,7 +577,7 @@ static int test__checkevent_pmu_events_mix(struct evlist *evlist) TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); /* cpu/pmu-event/u*/ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong exclude_user", @@ -652,13 +652,13 @@ static int test__group1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* cycles:upp */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -670,7 +670,7 @@ static int test__group1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); return 0; @@ -694,13 +694,13 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* cache-references + :u modifier */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CACHE_REFERENCES == evsel->core.attr.config); @@ -711,11 +711,11 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* cycles:k */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -725,7 +725,7 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); return 0; @@ -750,15 +750,15 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong group name", !strcmp(leader->group_name, "group1")); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* group1 cycles:kppp */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -771,11 +771,11 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 3); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* group2 cycles + G modifier */ - evsel = leader = perf_evsel__next(evsel); + evsel = leader = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -785,15 +785,15 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong group name", !strcmp(leader->group_name, "group2")); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* group2 1:3 + G modifier */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 3 == evsel->core.attr.config); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); @@ -803,11 +803,11 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* instructions:u */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); @@ -817,7 +817,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); return 0; @@ -843,13 +843,13 @@ static int test__group4(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 1); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* instructions:kp + p */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); @@ -861,7 +861,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); return 0; @@ -886,13 +886,13 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* instructions + G */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); @@ -903,11 +903,11 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* cycles:G */ - evsel = leader = perf_evsel__next(evsel); + evsel = leader = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -918,13 +918,13 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* instructions:G */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); @@ -935,10 +935,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); /* cycles */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -948,7 +948,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); return 0; } @@ -972,12 +972,12 @@ static int test__group_gh1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); /* cache-misses:G + :H group modifier */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); @@ -988,7 +988,7 @@ static int test__group_gh1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); return 0; } @@ -1012,12 +1012,12 @@ static int test__group_gh2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); /* cache-misses:H + :G group modifier */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); @@ -1028,7 +1028,7 @@ static int test__group_gh2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); return 0; } @@ -1052,12 +1052,12 @@ static int test__group_gh3(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); /* cache-misses:H + :u group modifier */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); @@ -1068,7 +1068,7 @@ static int test__group_gh3(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); return 0; } @@ -1092,12 +1092,12 @@ static int test__group_gh4(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); /* cache-misses:H + :uG group modifier */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); @@ -1108,7 +1108,7 @@ static int test__group_gh4(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); return 0; } @@ -1135,7 +1135,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read); /* cache-misses - not sampling */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); @@ -1149,7 +1149,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read); /* branch-misses - not sampling */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config); @@ -1188,7 +1188,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read); /* branch-misses - not sampling */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config); @@ -1234,14 +1234,14 @@ static int test__pinned_group(struct evlist *evlist) TEST_ASSERT_VAL("wrong pinned", evsel->core.attr.pinned); /* cache-misses - can not be pinned, but will go on with the leader */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); /* branch-misses - ditto */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config); TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); @@ -1356,6 +1356,16 @@ static int test__checkevent_complex_name(struct evlist *evlist) return 0; } +static int test__checkevent_raw_pmu(struct evlist *evlist) +{ + struct evsel *evsel = evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); + return 0; +} + static int test__sym_event_slash(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); @@ -1750,7 +1760,12 @@ static struct evlist_test test__events_pmu[] = { .name = "cpu/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks',period=0x1,event=0x2/ukp", .check = test__checkevent_complex_name, .id = 3, - } + }, + { + .name = "software/r1a/", + .check = test__checkevent_raw_pmu, + .id = 4, + }, }; struct terms_test { diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 2195fc205e72..83adfd846ccd 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -106,9 +106,9 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus * Config the evsels, setting attr->comm on the first one, etc. */ evsel = evlist__first(evlist); - perf_evsel__set_sample_bit(evsel, CPU); - perf_evsel__set_sample_bit(evsel, TID); - perf_evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, CPU); + evsel__set_sample_bit(evsel, TID); + evsel__set_sample_bit(evsel, TIME); perf_evlist__config(evlist, &opts, NULL); err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask); diff --git a/tools/perf/tests/pfm.c b/tools/perf/tests/pfm.c new file mode 100644 index 000000000000..76a53126efdf --- /dev/null +++ b/tools/perf/tests/pfm.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test support for libpfm4 event encodings. + * + * Copyright 2020 Google LLC. + */ +#include "tests.h" +#include "util/debug.h" +#include "util/evlist.h" +#include "util/pfm.h" + +#include <linux/kernel.h> + +#ifdef HAVE_LIBPFM +static int test__pfm_events(void); +static int test__pfm_group(void); +#endif + +static const struct { + int (*func)(void); + const char *desc; +} pfm_testcase_table[] = { +#ifdef HAVE_LIBPFM + { + .func = test__pfm_events, + .desc = "test of individual --pfm-events", + }, + { + .func = test__pfm_group, + .desc = "test groups of --pfm-events", + }, +#endif +}; + +#ifdef HAVE_LIBPFM +static int count_pfm_events(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + int count = 0; + + perf_evlist__for_each_entry(evlist, evsel) { + count++; + } + return count; +} + +static int test__pfm_events(void) +{ + struct evlist *evlist; + struct option opt; + size_t i; + const struct { + const char *events; + int nr_events; + } table[] = { + { + .events = "", + .nr_events = 0, + }, + { + .events = "instructions", + .nr_events = 1, + }, + { + .events = "instructions,cycles", + .nr_events = 2, + }, + { + .events = "stereolab", + .nr_events = 0, + }, + { + .events = "instructions,instructions", + .nr_events = 2, + }, + { + .events = "stereolab,instructions", + .nr_events = 0, + }, + { + .events = "instructions,stereolab", + .nr_events = 1, + }, + }; + + for (i = 0; i < ARRAY_SIZE(table); i++) { + evlist = evlist__new(); + if (evlist == NULL) + return -ENOMEM; + + opt.value = evlist; + parse_libpfm_events_option(&opt, + table[i].events, + 0); + TEST_ASSERT_EQUAL(table[i].events, + count_pfm_events(&evlist->core), + table[i].nr_events); + TEST_ASSERT_EQUAL(table[i].events, + evlist->nr_groups, + 0); + + evlist__delete(evlist); + } + return 0; +} + +static int test__pfm_group(void) +{ + struct evlist *evlist; + struct option opt; + size_t i; + const struct { + const char *events; + int nr_events; + int nr_groups; + } table[] = { + { + .events = "{},", + .nr_events = 0, + .nr_groups = 0, + }, + { + .events = "{instructions}", + .nr_events = 1, + .nr_groups = 1, + }, + { + .events = "{instructions},{}", + .nr_events = 1, + .nr_groups = 1, + }, + { + .events = "{},{instructions}", + .nr_events = 0, + .nr_groups = 0, + }, + { + .events = "{instructions},{instructions}", + .nr_events = 2, + .nr_groups = 2, + }, + { + .events = "{instructions,cycles},{instructions,cycles}", + .nr_events = 4, + .nr_groups = 2, + }, + { + .events = "{stereolab}", + .nr_events = 0, + .nr_groups = 0, + }, + { + .events = + "{instructions,cycles},{instructions,stereolab}", + .nr_events = 3, + .nr_groups = 1, + }, + }; + + for (i = 0; i < ARRAY_SIZE(table); i++) { + evlist = evlist__new(); + if (evlist == NULL) + return -ENOMEM; + + opt.value = evlist; + parse_libpfm_events_option(&opt, + table[i].events, + 0); + TEST_ASSERT_EQUAL(table[i].events, + count_pfm_events(&evlist->core), + table[i].nr_events); + TEST_ASSERT_EQUAL(table[i].events, + evlist->nr_groups, + table[i].nr_groups); + + evlist__delete(evlist); + } + return 0; +} +#endif + +const char *test__pfm_subtest_get_desc(int i) +{ + if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table)) + return NULL; + return pfm_testcase_table[i].desc; +} + +int test__pfm_subtest_get_nr(void) +{ + return (int)ARRAY_SIZE(pfm_testcase_table); +} + +int test__pfm(struct test *test __maybe_unused, int i __maybe_unused) +{ +#ifdef HAVE_LIBPFM + if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table)) + return TEST_FAIL; + return pfm_testcase_table[i].func(); +#else + return TEST_SKIP; +#endif +} diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index d64261da8bf7..ab64b4a4e284 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include "math.h" #include "parse-events.h" #include "pmu.h" #include "tests.h" @@ -8,6 +9,9 @@ #include <linux/zalloc.h> #include "debug.h" #include "../pmu-events/pmu-events.h" +#include "util/evlist.h" +#include "util/expr.h" +#include "util/parse-events.h" struct perf_pmu_test_event { struct pmu_event event; @@ -144,7 +148,7 @@ static struct pmu_events_map *__test_pmu_get_events_map(void) } /* Verify generated events from pmu-events.c is as expected */ -static int __test_pmu_event_table(void) +static int test_pmu_event_table(void) { struct pmu_events_map *map = __test_pmu_get_events_map(); struct pmu_event *table; @@ -347,14 +351,11 @@ static int __test__pmu_event_aliases(char *pmu_name, int *count) return res; } -int test__pmu_events(struct test *test __maybe_unused, - int subtest __maybe_unused) + +static int test_aliases(void) { struct perf_pmu *pmu = NULL; - if (__test_pmu_event_table()) - return -1; - while ((pmu = perf_pmu__scan(pmu)) != NULL) { int count = 0; @@ -377,3 +378,163 @@ int test__pmu_events(struct test *test __maybe_unused, return 0; } + +static bool is_number(const char *str) +{ + char *end_ptr; + double v; + + errno = 0; + v = strtod(str, &end_ptr); + (void)v; // We're not interested in this value, only if it is valid + return errno == 0 && end_ptr != str; +} + +static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe) +{ + struct parse_events_error error; + struct evlist *evlist; + int ret; + + /* Numbers are always valid. */ + if (is_number(id)) + return 0; + + evlist = evlist__new(); + memset(&error, 0, sizeof(error)); + ret = parse_events(evlist, id, &error); + if (ret && same_cpu) { + pr_warning("Parse event failed metric '%s' id '%s' expr '%s'\n", + pe->metric_name, id, pe->metric_expr); + pr_warning("Error string '%s' help '%s'\n", error.str, + error.help); + } else if (ret) { + pr_debug3("Parse event failed, but for an event that may not be supported by this CPU.\nid '%s' metric '%s' expr '%s'\n", + id, pe->metric_name, pe->metric_expr); + ret = 0; + } + evlist__delete(evlist); + free(error.str); + free(error.help); + free(error.first_str); + free(error.first_help); + return ret; +} + +static void expr_failure(const char *msg, + const struct pmu_events_map *map, + const struct pmu_event *pe) +{ + pr_debug("%s for map %s %s %s\n", + msg, map->cpuid, map->version, map->type); + pr_debug("On metric %s\n", pe->metric_name); + pr_debug("On expression %s\n", pe->metric_expr); +} + +static int test_parsing(void) +{ + struct pmu_events_map *cpus_map = perf_pmu__find_map(NULL); + struct pmu_events_map *map; + struct pmu_event *pe; + int i, j, k; + int ret = 0; + struct expr_parse_ctx ctx; + double result; + + i = 0; + for (;;) { + map = &pmu_events_map[i++]; + if (!map->table) + break; + j = 0; + for (;;) { + struct hashmap_entry *cur; + size_t bkt; + + pe = &map->table[j++]; + if (!pe->name && !pe->metric_group && !pe->metric_name) + break; + if (!pe->metric_expr) + continue; + expr__ctx_init(&ctx); + if (expr__find_other(pe->metric_expr, NULL, &ctx, 0) + < 0) { + expr_failure("Parse other failed", map, pe); + ret++; + continue; + } + + /* + * Add all ids with a made up value. The value may + * trigger divide by zero when subtracted and so try to + * make them unique. + */ + k = 1; + hashmap__for_each_entry((&ctx.ids), cur, bkt) + expr__add_id(&ctx, strdup(cur->key), k++); + + hashmap__for_each_entry((&ctx.ids), cur, bkt) { + if (check_parse_id(cur->key, map == cpus_map, + pe)) + ret++; + } + + if (expr__parse(&result, &ctx, pe->metric_expr, 0)) { + expr_failure("Parse failed", map, pe); + ret++; + } + expr__ctx_clear(&ctx); + } + } + /* TODO: fail when not ok */ + return ret == 0 ? TEST_OK : TEST_SKIP; +} + +static const struct { + int (*func)(void); + const char *desc; +} pmu_events_testcase_table[] = { + { + .func = test_pmu_event_table, + .desc = "PMU event table sanity", + }, + { + .func = test_aliases, + .desc = "PMU event map aliases", + }, + { + .func = test_parsing, + .desc = "Parsing of PMU event table metrics", + }, +}; + +const char *test__pmu_events_subtest_get_desc(int subtest) +{ + if (subtest < 0 || + subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table)) + return NULL; + return pmu_events_testcase_table[subtest].desc; +} + +const char *test__pmu_events_subtest_skip_reason(int subtest) +{ + if (subtest < 0 || + subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table)) + return NULL; + if (pmu_events_testcase_table[subtest].func != test_parsing) + return NULL; + return "some metrics failed"; +} + +int test__pmu_events_subtest_get_nr(void) +{ + return (int)ARRAY_SIZE(pmu_events_testcase_table); +} + +int test__pmu_events(struct test *test __maybe_unused, int subtest) +{ + if (subtest < 0 || + subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table)) + return TEST_FAIL; + return pmu_events_testcase_table[subtest].func(); +} diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index 74379ff1f7fa..5c11fe2b3040 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -156,8 +156,8 @@ int test__pmu(struct test *test __maybe_unused, int subtest __maybe_unused) if (ret) break; - ret = perf_pmu__config_terms(&formats, &attr, terms, - false, NULL); + ret = perf_pmu__config_terms("perf-pmu-test", &formats, &attr, + terms, false, NULL); if (ret) break; diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 61865699c3f4..a0bdaf390ac8 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -296,12 +296,12 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) goto out_free; } - evsel.sample_size = __perf_evsel__sample_size(sample_type); + evsel.sample_size = __evsel__sample_size(sample_type); - err = perf_evsel__parse_sample(&evsel, event, &sample_out); + err = evsel__parse_sample(&evsel, event, &sample_out); if (err) { pr_debug("%s failed for sample_type %#"PRIx64", error %d\n", - "perf_evsel__parse_sample", sample_type, err); + "evsel__parse_sample", sample_type, err); goto out_free; } diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index bfb9986093d8..4b9b731977c8 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -56,7 +56,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) evsel = evsel__new(&attr); if (evsel == NULL) { - pr_debug("perf_evsel__new\n"); + pr_debug("evsel__new\n"); goto out_delete_evlist; } evlist__add(evlist, evsel); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index fcb0d03dba4e..db5e1f70053a 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -135,8 +135,8 @@ static int process_sample_event(struct evlist *evlist, evsel = perf_evlist__id2evsel(evlist, sample.id); if (evsel == switch_tracking->switch_evsel) { - next_tid = perf_evsel__intval(evsel, &sample, "next_pid"); - prev_tid = perf_evsel__intval(evsel, &sample, "prev_pid"); + next_tid = evsel__intval(evsel, &sample, "next_pid"); + prev_tid = evsel__intval(evsel, &sample, "prev_pid"); cpu = sample.cpu; pr_debug3("sched_switch: cpu: %d prev_tid %d next_tid %d\n", cpu, prev_tid, next_tid); @@ -394,8 +394,8 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ switch_evsel = evlist__last(evlist); - perf_evsel__set_sample_bit(switch_evsel, CPU); - perf_evsel__set_sample_bit(switch_evsel, TIME); + evsel__set_sample_bit(switch_evsel, CPU); + evsel__set_sample_bit(switch_evsel, TIME); switch_evsel->core.system_wide = true; switch_evsel->no_aux_samples = true; @@ -412,8 +412,8 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ goto out_err; } - perf_evsel__set_sample_bit(cycles_evsel, CPU); - perf_evsel__set_sample_bit(cycles_evsel, TIME); + evsel__set_sample_bit(cycles_evsel, CPU); + evsel__set_sample_bit(cycles_evsel, TIME); /* Fourth event */ err = parse_events(evlist, "dummy:u", NULL); @@ -429,7 +429,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; - perf_evsel__set_sample_bit(tracking_evsel, TIME); + evsel__set_sample_bit(tracking_evsel, TIME); /* Config events */ perf_evlist__config(evlist, &opts, NULL); diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 61a1ab032080..76a4e352eaaf 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -34,6 +34,7 @@ struct test { bool skip_if_fail; int (*get_nr)(void); const char *(*get_desc)(int subtest); + const char *(*skip_reason)(int subtest); } subtest; bool (*is_supported)(void); void *priv; @@ -50,6 +51,9 @@ int test__perf_evsel__tp_sched_test(struct test *test, int subtest); int test__syscall_openat_tp_fields(struct test *test, int subtest); int test__pmu(struct test *test, int subtest); int test__pmu_events(struct test *test, int subtest); +const char *test__pmu_events_subtest_get_desc(int subtest); +const char *test__pmu_events_subtest_skip_reason(int subtest); +int test__pmu_events_subtest_get_nr(void); int test__attr(struct test *test, int subtest); int test__dso_data(struct test *test, int subtest); int test__dso_data_cache(struct test *test, int subtest); @@ -112,6 +116,11 @@ int test__mem2node(struct test *t, int subtest); int test__maps__merge_in(struct test *t, int subtest); int test__time_utils(struct test *t, int subtest); int test__jit_write_elf(struct test *test, int subtest); +int test__api_io(struct test *test, int subtest); +int test__demangle_java(struct test *test, int subtest); +int test__pfm(struct test *test, int subtest); +const char *test__pfm_subtest_get_desc(int subtest); +int test__pfm_subtest_get_nr(void); bool test__bp_signal_is_supported(void); bool test__bp_account_is_supported(void); diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 4a800499d7c3..22daf2bdf5fa 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -33,10 +33,8 @@ static int session_write_header(char *path) { struct perf_session *session; struct perf_data data = { - .file = { - .path = path, - }, - .mode = PERF_DATA_MODE_WRITE, + .path = path, + .mode = PERF_DATA_MODE_WRITE, }; session = perf_session__new(&data, false, NULL); @@ -63,10 +61,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) { struct perf_session *session; struct perf_data data = { - .file = { - .path = path, - }, - .mode = PERF_DATA_MODE_READ, + .path = path, + .mode = PERF_DATA_MODE_READ, }; int i; diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh index 22c9fc900c84..9f9ea45cddc4 100755 --- a/tools/perf/trace/beauty/arch_errno_names.sh +++ b/tools/perf/trace/beauty/arch_errno_names.sh @@ -57,7 +57,7 @@ process_arch() local arch="$1" local asm_errno=$(asm_errno_file "$arch") - $gcc $include_path -E -dM -x c $asm_errno \ + $gcc $CFLAGS $include_path -E -dM -x c $asm_errno \ |grep -hE '^#define[[:blank:]]+(E[^[:blank:]]+)[[:blank:]]+([[:digit:]]+).*' \ |awk '{ print $2","$3; }' \ |sort -t, -k2 -nu \ @@ -91,7 +91,7 @@ EoHEADER # in tools/perf/arch archlist="" for arch in $(find $toolsdir/arch -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | grep -v x86 | sort); do - test -d arch/$arch && archlist="$archlist $arch" + test -d $toolsdir/perf/arch/$arch && archlist="$archlist $arch" done for arch in x86 $archlist generic; do diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 487e54ef56a9..f98a118dfc49 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3416,7 +3416,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser, struct hists *hists = evsel__hists(evsel); bool current_entry = ui_browser__is_current_entry(browser, row); unsigned long nr_events = hists->stats.nr_events[PERF_RECORD_SAMPLE]; - const char *ev_name = perf_evsel__name(evsel); + const char *ev_name = evsel__name(evsel); char bf[256], unit; const char *warn = " "; size_t printed; @@ -3424,10 +3424,10 @@ static void perf_evsel_menu__write(struct ui_browser *browser, ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : HE_COLORSET_NORMAL); - if (perf_evsel__is_group_event(evsel)) { + if (evsel__is_group_event(evsel)) { struct evsel *pos; - ev_name = perf_evsel__group_name(evsel); + ev_name = evsel__group_name(evsel); for_each_group_member(pos, evsel) { struct hists *pos_hists = evsel__hists(pos); @@ -3512,13 +3512,13 @@ browse_hists: if (pos->core.node.next == &evlist->core.entries) pos = evlist__first(evlist); else - pos = perf_evsel__next(pos); + pos = evsel__next(pos); goto browse_hists; case K_UNTAB: if (pos->core.node.prev == &evlist->core.entries) pos = evlist__last(evlist); else - pos = perf_evsel__prev(pos); + pos = evsel__prev(pos); goto browse_hists; case K_SWITCH_INPUT_DATA: case K_RELOAD: @@ -3554,7 +3554,7 @@ static bool filter_group_entries(struct ui_browser *browser __maybe_unused, { struct evsel *evsel = list_entry(entry, struct evsel, core.node); - if (symbol_conf.event_group && !perf_evsel__is_group_leader(evsel)) + if (symbol_conf.event_group && !evsel__is_group_leader(evsel)) return true; return false; @@ -3587,7 +3587,7 @@ static int __perf_evlist__tui_browse_hists(struct evlist *evlist, ui_helpline__push("Press ESC to exit"); evlist__for_each_entry(evlist, pos) { - const char *ev_name = perf_evsel__name(pos); + const char *ev_name = evsel__name(pos); size_t line_len = strlen(ev_name) + 7; if (menu.b.width < line_len) @@ -3622,7 +3622,7 @@ single_entry: nr_entries = 0; evlist__for_each_entry(evlist, pos) { - if (perf_evsel__is_group_leader(pos)) + if (evsel__is_group_leader(pos)) nr_entries++; } @@ -3640,7 +3640,7 @@ static int block_hists_browser__title(struct hist_browser *browser, char *bf, size_t size) { struct hists *hists = evsel__hists(browser->block_evsel); - const char *evname = perf_evsel__name(browser->block_evsel); + const char *evname = evsel__name(browser->block_evsel); unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; int ret; diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 35f9641bf670..a7dff77f2018 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -130,7 +130,7 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct map_symbol *ms, gtk_list_store_append(store, &iter); - if (perf_evsel__is_group_event(evsel)) { + if (evsel__is_group_event(evsel)) { for (i = 0; i < evsel->core.nr_members; i++) { ret += perf_gtk__get_percent(s + ret, sizeof(s) - ret, diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index ed1a97b2c4b0..53ef71a1b15d 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -635,18 +635,18 @@ int perf_evlist__gtk_browse_hists(struct evlist *evlist, evlist__for_each_entry(evlist, pos) { struct hists *hists = evsel__hists(pos); - const char *evname = perf_evsel__name(pos); + const char *evname = evsel__name(pos); GtkWidget *scrolled_window; GtkWidget *tab_label; char buf[512]; size_t size = sizeof(buf); if (symbol_conf.event_group) { - if (!perf_evsel__is_group_leader(pos)) + if (!evsel__is_group_leader(pos)) continue; if (pos->core.nr_members > 1) { - perf_evsel__group_desc(pos, buf, size); + evsel__group_desc(pos, buf, size); evname = buf; } } diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 025f4c7f96bf..c1f24d004852 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -43,12 +43,12 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, } else ret = hpp__call_print_fn(hpp, print_fn, fmt, len, get_field(he)); - if (perf_evsel__is_group_event(evsel)) { + if (evsel__is_group_event(evsel)) { int prev_idx, idx_delta; struct hist_entry *pair; int nr_members = evsel->core.nr_members; - prev_idx = perf_evsel__group_idx(evsel); + prev_idx = evsel__group_idx(evsel); list_for_each_entry(pair, &he->pairs.head, pairs.node) { u64 period = get_field(pair); @@ -58,7 +58,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, continue; evsel = hists_to_evsel(pair->hists); - idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1; + idx_delta = evsel__group_idx(evsel) - prev_idx - 1; while (idx_delta--) { /* @@ -82,7 +82,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, len, period); } - prev_idx = perf_evsel__group_idx(evsel); + prev_idx = evsel__group_idx(evsel); } idx_delta = nr_members - prev_idx - 1; @@ -164,12 +164,12 @@ static int hist_entry__new_pair(struct hist_entry *a, struct hist_entry *b, list_for_each_entry(pair, &a->pairs.head, pairs.node) { struct evsel *evsel = hists_to_evsel(pair->hists); - fa[perf_evsel__group_idx(evsel)] = get_field(pair); + fa[evsel__group_idx(evsel)] = get_field(pair); } list_for_each_entry(pair, &b->pairs.head, pairs.node) { struct evsel *evsel = hists_to_evsel(pair->hists); - fb[perf_evsel__group_idx(evsel)] = get_field(pair); + fb[evsel__group_idx(evsel)] = get_field(pair); } *fields_a = fa; @@ -190,7 +190,7 @@ static int __hpp__group_sort_idx(struct hist_entry *a, struct hist_entry *b, int cmp, nr_members, ret, i; cmp = field_cmp(get_field(a), get_field(b)); - if (!perf_evsel__is_group_event(evsel)) + if (!evsel__is_group_event(evsel)) return cmp; nr_members = evsel->core.nr_members; @@ -240,7 +240,7 @@ static int __hpp__sort(struct hist_entry *a, struct hist_entry *b, return ret; evsel = hists_to_evsel(a->hists); - if (!perf_evsel__is_group_event(evsel)) + if (!evsel__is_group_event(evsel)) return ret; nr_members = evsel->core.nr_members; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index c0cf8dff694e..8d18380ecd10 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -10,6 +10,7 @@ perf-y += db-export.o perf-y += env.o perf-y += event.o perf-y += evlist.o +perf-y += sideband_evlist.o perf-y += evsel.o perf-y += evsel_fprintf.o perf-y += perf_event_attr_fprintf.o @@ -88,6 +89,7 @@ perf-y += counts.o perf-y += stat.o perf-y += stat-shadow.o perf-y += stat-display.o +perf-y += perf_api_probe.o perf-y += record.o perf-y += srcline.o perf-y += srccode.o @@ -104,7 +106,7 @@ perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ perf-$(CONFIG_AUXTRACE) += intel-pt.o perf-$(CONFIG_AUXTRACE) += intel-bts.o perf-$(CONFIG_AUXTRACE) += arm-spe.o -perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o +perf-$(CONFIG_AUXTRACE) += arm-spe-decoder/ perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o ifdef CONFIG_LIBOPENCSD @@ -134,6 +136,10 @@ perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o perf-$(CONFIG_LIBELF) += probe-event.o +ifndef CONFIG_LIBBPF +perf-y += hashmap.o +endif + ifndef CONFIG_LIBELF perf-y += symbol-minimal.o endif @@ -177,6 +183,8 @@ perf-$(CONFIG_LIBBPF) += bpf-event.o perf-$(CONFIG_CXX) += c++/ +perf-$(CONFIG_LIBPFM4) += pfm.o + CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))" diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index f1ea0d61eb5b..76bfb4a9d94e 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -41,7 +41,6 @@ #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/string.h> -#include <bpf/libbpf.h> #include <subcmd/parse-options.h> #include <subcmd/run-command.h> @@ -1191,7 +1190,7 @@ static struct disasm_line *disasm_line__new(struct annotate_args *args) struct disasm_line *dl = NULL; int nr = 1; - if (perf_evsel__is_group_event(args->evsel)) + if (evsel__is_group_event(args->evsel)) nr = args->evsel->core.nr_members; dl = zalloc(disasm_line_size(nr)); @@ -1437,7 +1436,7 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start if (queue) return -1; - if (perf_evsel__is_group_event(evsel)) + if (evsel__is_group_event(evsel)) width *= evsel->core.nr_members; if (!*al->line) @@ -1821,6 +1820,24 @@ static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused, } #endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) +static int +symbol__disassemble_bpf_image(struct symbol *sym, + struct annotate_args *args) +{ + struct annotation *notes = symbol__annotation(sym); + struct disasm_line *dl; + + args->offset = -1; + args->line = strdup("to be implemented"); + args->line_nr = 0; + dl = disasm_line__new(args); + if (dl) + annotation_line__add(&dl->al, ¬es->src->source); + + free(args->line); + return 0; +} + /* * Possibly create a new version of line with tabs expanded. Returns the * existing or new line, storage is updated if a new line is allocated. If @@ -1920,6 +1937,8 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) { return symbol__disassemble_bpf(sym, args); + } else if (dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE) { + return symbol__disassemble_bpf_image(sym, args); } else if (dso__is_kcore(dso)) { kce.kcore_filename = symfs_filename; kce.addr = map__rip_2objdump(map, sym->start); @@ -2136,7 +2155,7 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, .evsel = evsel, .options = options, }; - struct perf_env *env = perf_evsel__env(evsel); + struct perf_env *env = evsel__env(evsel); const char *arch_name = perf_env__arch(env); struct arch *arch; int err; @@ -2324,7 +2343,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, struct dso *dso = map->dso; char *filename; const char *d_filename; - const char *evsel_name = perf_evsel__name(evsel); + const char *evsel_name = evsel__name(evsel); struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evsel->idx); struct annotation_line *pos, *queue = NULL; @@ -2348,9 +2367,9 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, len = symbol__size(sym); - if (perf_evsel__is_group_event(evsel)) { + if (evsel__is_group_event(evsel)) { width *= evsel->core.nr_members; - perf_evsel__group_desc(evsel, buf, sizeof(buf)); + evsel__group_desc(evsel, buf, sizeof(buf)); evsel_name = buf; } @@ -2485,7 +2504,7 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts) { - const char *ev_name = perf_evsel__name(evsel); + const char *ev_name = evsel__name(evsel); char buf[1024]; char *filename; int err = -1; @@ -2498,8 +2517,8 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, if (fp == NULL) goto out_free_filename; - if (perf_evsel__is_group_event(evsel)) { - perf_evsel__group_desc(evsel, buf, sizeof(buf)); + if (evsel__is_group_event(evsel)) { + evsel__group_desc(evsel, buf, sizeof(buf)); ev_name = buf; } @@ -3044,7 +3063,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, if (notes->offsets == NULL) return ENOMEM; - if (perf_evsel__is_group_event(evsel)) + if (evsel__is_group_event(evsel)) nr_pcnt = evsel->core.nr_members; err = symbol__annotate(ms, evsel, options, parch); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 2d88069d6428..0a0cd4f32175 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -144,7 +144,7 @@ struct annotation_line { u32 idx; int idx_asm; int data_nr; - struct annotation_data data[0]; + struct annotation_data data[]; }; struct disasm_line { @@ -227,7 +227,7 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel); struct sym_hist { u64 nr_samples; u64 period; - struct sym_hist_entry addr[0]; + struct sym_hist_entry addr[]; }; struct cyc_hist { diff --git a/tools/perf/util/arm-spe-decoder/Build b/tools/perf/util/arm-spe-decoder/Build new file mode 100644 index 000000000000..f8dae13fc876 --- /dev/null +++ b/tools/perf/util/arm-spe-decoder/Build @@ -0,0 +1 @@ +perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o arm-spe-decoder.o diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c new file mode 100644 index 000000000000..302a14d0aca9 --- /dev/null +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * arm_spe_decoder.c: ARM SPE support + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <errno.h> +#include <inttypes.h> +#include <stdbool.h> +#include <string.h> +#include <stdint.h> +#include <stdlib.h> +#include <linux/compiler.h> +#include <linux/zalloc.h> + +#include "../auxtrace.h" +#include "../debug.h" +#include "../util.h" + +#include "arm-spe-decoder.h" + +#ifndef BIT +#define BIT(n) (1UL << (n)) +#endif + +static u64 arm_spe_calc_ip(int index, u64 payload) +{ + u8 *addr = (u8 *)&payload; + int ns, el; + + /* Instruction virtual address or Branch target address */ + if (index == SPE_ADDR_PKT_HDR_INDEX_INS || + index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) { + ns = addr[7] & SPE_ADDR_PKT_NS; + el = (addr[7] & SPE_ADDR_PKT_EL_MASK) >> SPE_ADDR_PKT_EL_OFFSET; + + /* Fill highest byte for EL1 or EL2 (VHE) mode */ + if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2)) + addr[7] = 0xff; + /* Clean highest byte for other cases */ + else + addr[7] = 0x0; + + /* Data access virtual address */ + } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) { + + /* Fill highest byte if bits [48..55] is 0xff */ + if (addr[6] == 0xff) + addr[7] = 0xff; + /* Otherwise, cleanup tags */ + else + addr[7] = 0x0; + + /* Data access physical address */ + } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) { + /* Cleanup byte 7 */ + addr[7] = 0x0; + } else { + pr_err("unsupported address packet index: 0x%x\n", index); + } + + return payload; +} + +struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params) +{ + struct arm_spe_decoder *decoder; + + if (!params->get_trace) + return NULL; + + decoder = zalloc(sizeof(struct arm_spe_decoder)); + if (!decoder) + return NULL; + + decoder->get_trace = params->get_trace; + decoder->data = params->data; + + return decoder; +} + +void arm_spe_decoder_free(struct arm_spe_decoder *decoder) +{ + free(decoder); +} + +static int arm_spe_get_data(struct arm_spe_decoder *decoder) +{ + struct arm_spe_buffer buffer = { .buf = 0, }; + int ret; + + pr_debug("Getting more data\n"); + ret = decoder->get_trace(&buffer, decoder->data); + if (ret < 0) + return ret; + + decoder->buf = buffer.buf; + decoder->len = buffer.len; + + if (!decoder->len) + pr_debug("No more data\n"); + + return decoder->len; +} + +static int arm_spe_get_next_packet(struct arm_spe_decoder *decoder) +{ + int ret; + + do { + if (!decoder->len) { + ret = arm_spe_get_data(decoder); + + /* Failed to read out trace data */ + if (ret <= 0) + return ret; + } + + ret = arm_spe_get_packet(decoder->buf, decoder->len, + &decoder->packet); + if (ret <= 0) { + /* Move forward for 1 byte */ + decoder->buf += 1; + decoder->len -= 1; + return -EBADMSG; + } + + decoder->buf += ret; + decoder->len -= ret; + } while (decoder->packet.type == ARM_SPE_PAD); + + return 1; +} + +static int arm_spe_read_record(struct arm_spe_decoder *decoder) +{ + int err; + int idx; + u64 payload, ip; + + memset(&decoder->record, 0x0, sizeof(decoder->record)); + + while (1) { + err = arm_spe_get_next_packet(decoder); + if (err <= 0) + return err; + + idx = decoder->packet.index; + payload = decoder->packet.payload; + + switch (decoder->packet.type) { + case ARM_SPE_TIMESTAMP: + decoder->record.timestamp = payload; + return 1; + case ARM_SPE_END: + return 1; + case ARM_SPE_ADDRESS: + ip = arm_spe_calc_ip(idx, payload); + if (idx == SPE_ADDR_PKT_HDR_INDEX_INS) + decoder->record.from_ip = ip; + else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH) + decoder->record.to_ip = ip; + break; + case ARM_SPE_COUNTER: + break; + case ARM_SPE_CONTEXT: + break; + case ARM_SPE_OP_TYPE: + break; + case ARM_SPE_EVENTS: + if (payload & BIT(EV_L1D_REFILL)) + decoder->record.type |= ARM_SPE_L1D_MISS; + + if (payload & BIT(EV_L1D_ACCESS)) + decoder->record.type |= ARM_SPE_L1D_ACCESS; + + if (payload & BIT(EV_TLB_WALK)) + decoder->record.type |= ARM_SPE_TLB_MISS; + + if (payload & BIT(EV_TLB_ACCESS)) + decoder->record.type |= ARM_SPE_TLB_ACCESS; + + if ((idx == 1 || idx == 2 || idx == 3) && + (payload & BIT(EV_LLC_MISS))) + decoder->record.type |= ARM_SPE_LLC_MISS; + + if ((idx == 1 || idx == 2 || idx == 3) && + (payload & BIT(EV_LLC_ACCESS))) + decoder->record.type |= ARM_SPE_LLC_ACCESS; + + if ((idx == 1 || idx == 2 || idx == 3) && + (payload & BIT(EV_REMOTE_ACCESS))) + decoder->record.type |= ARM_SPE_REMOTE_ACCESS; + + if (payload & BIT(EV_MISPRED)) + decoder->record.type |= ARM_SPE_BRANCH_MISS; + + break; + case ARM_SPE_DATA_SOURCE: + break; + case ARM_SPE_BAD: + break; + case ARM_SPE_PAD: + break; + default: + pr_err("Get packet error!\n"); + return -1; + } + } + + return 0; +} + +int arm_spe_decode(struct arm_spe_decoder *decoder) +{ + return arm_spe_read_record(decoder); +} diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h new file mode 100644 index 000000000000..a5111a8d4360 --- /dev/null +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * arm_spe_decoder.h: Arm Statistical Profiling Extensions support + * Copyright (c) 2019-2020, Arm Ltd. + */ + +#ifndef INCLUDE__ARM_SPE_DECODER_H__ +#define INCLUDE__ARM_SPE_DECODER_H__ + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#include "arm-spe-pkt-decoder.h" + +enum arm_spe_events { + EV_EXCEPTION_GEN = 0, + EV_RETIRED = 1, + EV_L1D_ACCESS = 2, + EV_L1D_REFILL = 3, + EV_TLB_ACCESS = 4, + EV_TLB_WALK = 5, + EV_NOT_TAKEN = 6, + EV_MISPRED = 7, + EV_LLC_ACCESS = 8, + EV_LLC_MISS = 9, + EV_REMOTE_ACCESS = 10, + EV_ALIGNMENT = 11, + EV_PARTIAL_PREDICATE = 17, + EV_EMPTY_PREDICATE = 18, +}; + +enum arm_spe_sample_type { + ARM_SPE_L1D_ACCESS = 1 << 0, + ARM_SPE_L1D_MISS = 1 << 1, + ARM_SPE_LLC_ACCESS = 1 << 2, + ARM_SPE_LLC_MISS = 1 << 3, + ARM_SPE_TLB_ACCESS = 1 << 4, + ARM_SPE_TLB_MISS = 1 << 5, + ARM_SPE_BRANCH_MISS = 1 << 6, + ARM_SPE_REMOTE_ACCESS = 1 << 7, +}; + +struct arm_spe_record { + enum arm_spe_sample_type type; + int err; + u64 from_ip; + u64 to_ip; + u64 timestamp; +}; + +struct arm_spe_insn; + +struct arm_spe_buffer { + const unsigned char *buf; + size_t len; + u64 offset; + u64 trace_nr; +}; + +struct arm_spe_params { + int (*get_trace)(struct arm_spe_buffer *buffer, void *data); + void *data; +}; + +struct arm_spe_decoder { + int (*get_trace)(struct arm_spe_buffer *buffer, void *data); + void *data; + struct arm_spe_record record; + + const unsigned char *buf; + size_t len; + + struct arm_spe_pkt packet; +}; + +struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params); +void arm_spe_decoder_free(struct arm_spe_decoder *decoder); + +int arm_spe_decode(struct arm_spe_decoder *decoder); + +#endif diff --git a/tools/perf/util/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index b94001b756c7..b94001b756c7 100644 --- a/tools/perf/util/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c diff --git a/tools/perf/util/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index d786ef65113f..4c870521b8eb 100644 --- a/tools/perf/util/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -15,6 +15,8 @@ #define ARM_SPE_NEED_MORE_BYTES -1 #define ARM_SPE_BAD_PACKET -2 +#define ARM_SPE_PKT_MAX_SZ 16 + enum arm_spe_pkt_type { ARM_SPE_BAD, ARM_SPE_PAD, @@ -34,6 +36,20 @@ struct arm_spe_pkt { uint64_t payload; }; +#define SPE_ADDR_PKT_HDR_INDEX_INS (0x0) +#define SPE_ADDR_PKT_HDR_INDEX_BRANCH (0x1) +#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT (0x2) +#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS (0x3) + +#define SPE_ADDR_PKT_NS BIT(7) +#define SPE_ADDR_PKT_CH BIT(6) +#define SPE_ADDR_PKT_EL_OFFSET (5) +#define SPE_ADDR_PKT_EL_MASK (0x3 << SPE_ADDR_PKT_EL_OFFSET) +#define SPE_ADDR_PKT_EL0 (0) +#define SPE_ADDR_PKT_EL1 (1) +#define SPE_ADDR_PKT_EL2 (2) +#define SPE_ADDR_PKT_EL3 (3) + const char *arm_spe_pkt_name(enum arm_spe_pkt_type); int arm_spe_get_packet(const unsigned char *buf, size_t len, diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 53be12b23ff4..3882a5360ada 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -4,46 +4,85 @@ * Copyright (c) 2017-2018, Arm Ltd. */ +#include <byteswap.h> #include <endian.h> #include <errno.h> -#include <byteswap.h> #include <inttypes.h> -#include <unistd.h> -#include <stdlib.h> -#include <linux/kernel.h> -#include <linux/types.h> #include <linux/bitops.h> +#include <linux/kernel.h> #include <linux/log2.h> +#include <linux/types.h> #include <linux/zalloc.h> +#include <stdlib.h> +#include <unistd.h> +#include "auxtrace.h" #include "color.h" +#include "debug.h" +#include "evlist.h" #include "evsel.h" #include "machine.h" #include "session.h" -#include "debug.h" -#include "auxtrace.h" +#include "symbol.h" +#include "thread.h" +#include "thread-stack.h" +#include "tool.h" +#include "util/synthetic-events.h" + #include "arm-spe.h" -#include "arm-spe-pkt-decoder.h" +#include "arm-spe-decoder/arm-spe-decoder.h" +#include "arm-spe-decoder/arm-spe-pkt-decoder.h" + +#define MAX_TIMESTAMP (~0ULL) struct arm_spe { struct auxtrace auxtrace; struct auxtrace_queues queues; struct auxtrace_heap heap; + struct itrace_synth_opts synth_opts; u32 auxtrace_type; struct perf_session *session; struct machine *machine; u32 pmu_type; + + u8 timeless_decoding; + u8 data_queued; + + u8 sample_flc; + u8 sample_llc; + u8 sample_tlb; + u8 sample_branch; + u8 sample_remote_access; + + u64 l1d_miss_id; + u64 l1d_access_id; + u64 llc_miss_id; + u64 llc_access_id; + u64 tlb_miss_id; + u64 tlb_access_id; + u64 branch_miss_id; + u64 remote_access_id; + + u64 kernel_start; + + unsigned long num_events; }; struct arm_spe_queue { - struct arm_spe *spe; - unsigned int queue_nr; - struct auxtrace_buffer *buffer; - bool on_heap; - bool done; - pid_t pid; - pid_t tid; - int cpu; + struct arm_spe *spe; + unsigned int queue_nr; + struct auxtrace_buffer *buffer; + struct auxtrace_buffer *old_buffer; + union perf_event *event_buf; + bool on_heap; + bool done; + pid_t pid; + pid_t tid; + int cpu; + struct arm_spe_decoder *decoder; + u64 time; + u64 timestamp; + struct thread *thread; }; static void arm_spe_dump(struct arm_spe *spe __maybe_unused, @@ -92,44 +131,520 @@ static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, arm_spe_dump(spe, buf, len); } -static int arm_spe_process_event(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_sample *sample __maybe_unused, - struct perf_tool *tool __maybe_unused) +static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data) +{ + struct arm_spe_queue *speq = data; + struct auxtrace_buffer *buffer = speq->buffer; + struct auxtrace_buffer *old_buffer = speq->old_buffer; + struct auxtrace_queue *queue; + + queue = &speq->spe->queues.queue_array[speq->queue_nr]; + + buffer = auxtrace_buffer__next(queue, buffer); + /* If no more data, drop the previous auxtrace_buffer and return */ + if (!buffer) { + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + b->len = 0; + return 0; + } + + speq->buffer = buffer; + + /* If the aux_buffer doesn't have data associated, try to load it */ + if (!buffer->data) { + /* get the file desc associated with the perf data file */ + int fd = perf_data__fd(speq->spe->session->data); + + buffer->data = auxtrace_buffer__get_data(buffer, fd); + if (!buffer->data) + return -ENOMEM; + } + + b->len = buffer->size; + b->buf = buffer->data; + + if (b->len) { + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + speq->old_buffer = buffer; + } else { + auxtrace_buffer__drop_data(buffer); + return arm_spe_get_trace(b, data); + } + + return 0; +} + +static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, + unsigned int queue_nr) +{ + struct arm_spe_params params = { .get_trace = 0, }; + struct arm_spe_queue *speq; + + speq = zalloc(sizeof(*speq)); + if (!speq) + return NULL; + + speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); + if (!speq->event_buf) + goto out_free; + + speq->spe = spe; + speq->queue_nr = queue_nr; + speq->pid = -1; + speq->tid = -1; + speq->cpu = -1; + + /* params set */ + params.get_trace = arm_spe_get_trace; + params.data = speq; + + /* create new decoder */ + speq->decoder = arm_spe_decoder_new(¶ms); + if (!speq->decoder) + goto out_free; + + return speq; + +out_free: + zfree(&speq->event_buf); + free(speq); + + return NULL; +} + +static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) +{ + return ip >= spe->kernel_start ? + PERF_RECORD_MISC_KERNEL : + PERF_RECORD_MISC_USER; +} + +static void arm_spe_prep_sample(struct arm_spe *spe, + struct arm_spe_queue *speq, + union perf_event *event, + struct perf_sample *sample) +{ + struct arm_spe_record *record = &speq->decoder->record; + + if (!spe->timeless_decoding) + sample->time = speq->timestamp; + + sample->ip = record->from_ip; + sample->cpumode = arm_spe_cpumode(spe, sample->ip); + sample->pid = speq->pid; + sample->tid = speq->tid; + sample->addr = record->to_ip; + sample->period = 1; + sample->cpu = speq->cpu; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = sample->cpumode; + event->sample.header.size = sizeof(struct perf_event_header); +} + +static inline int +arm_spe_deliver_synth_event(struct arm_spe *spe, + struct arm_spe_queue *speq __maybe_unused, + union perf_event *event, + struct perf_sample *sample) +{ + int ret; + + ret = perf_session__deliver_synth_event(spe->session, event, sample); + if (ret) + pr_err("ARM SPE: failed to deliver event, error %d\n", ret); + + return ret; +} + +static int +arm_spe_synth_spe_events_sample(struct arm_spe_queue *speq, + u64 spe_events_id) +{ + struct arm_spe *spe = speq->spe; + union perf_event *event = speq->event_buf; + struct perf_sample sample = { .ip = 0, }; + + arm_spe_prep_sample(spe, speq, event, &sample); + + sample.id = spe_events_id; + sample.stream_id = spe_events_id; + + return arm_spe_deliver_synth_event(spe, speq, event, &sample); +} + +static int arm_spe_sample(struct arm_spe_queue *speq) +{ + const struct arm_spe_record *record = &speq->decoder->record; + struct arm_spe *spe = speq->spe; + int err; + + if (spe->sample_flc) { + if (record->type & ARM_SPE_L1D_MISS) { + err = arm_spe_synth_spe_events_sample( + speq, spe->l1d_miss_id); + if (err) + return err; + } + + if (record->type & ARM_SPE_L1D_ACCESS) { + err = arm_spe_synth_spe_events_sample( + speq, spe->l1d_access_id); + if (err) + return err; + } + } + + if (spe->sample_llc) { + if (record->type & ARM_SPE_LLC_MISS) { + err = arm_spe_synth_spe_events_sample( + speq, spe->llc_miss_id); + if (err) + return err; + } + + if (record->type & ARM_SPE_LLC_ACCESS) { + err = arm_spe_synth_spe_events_sample( + speq, spe->llc_access_id); + if (err) + return err; + } + } + + if (spe->sample_tlb) { + if (record->type & ARM_SPE_TLB_MISS) { + err = arm_spe_synth_spe_events_sample( + speq, spe->tlb_miss_id); + if (err) + return err; + } + + if (record->type & ARM_SPE_TLB_ACCESS) { + err = arm_spe_synth_spe_events_sample( + speq, spe->tlb_access_id); + if (err) + return err; + } + } + + if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) { + err = arm_spe_synth_spe_events_sample(speq, + spe->branch_miss_id); + if (err) + return err; + } + + if (spe->sample_remote_access && + (record->type & ARM_SPE_REMOTE_ACCESS)) { + err = arm_spe_synth_spe_events_sample(speq, + spe->remote_access_id); + if (err) + return err; + } + + return 0; +} + +static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) +{ + struct arm_spe *spe = speq->spe; + int ret; + + if (!spe->kernel_start) + spe->kernel_start = machine__kernel_start(spe->machine); + + while (1) { + ret = arm_spe_decode(speq->decoder); + if (!ret) { + pr_debug("No data or all data has been processed.\n"); + return 1; + } + + /* + * Error is detected when decode SPE trace data, continue to + * the next trace data and find out more records. + */ + if (ret < 0) + continue; + + ret = arm_spe_sample(speq); + if (ret) + return ret; + + if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { + *timestamp = speq->timestamp; + return 0; + } + } + + return 0; +} + +static int arm_spe__setup_queue(struct arm_spe *spe, + struct auxtrace_queue *queue, + unsigned int queue_nr) +{ + struct arm_spe_queue *speq = queue->priv; + struct arm_spe_record *record; + + if (list_empty(&queue->head) || speq) + return 0; + + speq = arm_spe__alloc_queue(spe, queue_nr); + + if (!speq) + return -ENOMEM; + + queue->priv = speq; + + if (queue->cpu != -1) + speq->cpu = queue->cpu; + + if (!speq->on_heap) { + int ret; + + if (spe->timeless_decoding) + return 0; + +retry: + ret = arm_spe_decode(speq->decoder); + + if (!ret) + return 0; + + if (ret < 0) + goto retry; + + record = &speq->decoder->record; + + speq->timestamp = record->timestamp; + ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); + if (ret) + return ret; + speq->on_heap = true; + } + + return 0; +} + +static int arm_spe__setup_queues(struct arm_spe *spe) +{ + unsigned int i; + int ret; + + for (i = 0; i < spe->queues.nr_queues; i++) { + ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i); + if (ret) + return ret; + } + + return 0; +} + +static int arm_spe__update_queues(struct arm_spe *spe) +{ + if (spe->queues.new_data) { + spe->queues.new_data = false; + return arm_spe__setup_queues(spe); + } + + return 0; +} + +static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) +{ + struct evsel *evsel; + struct evlist *evlist = spe->session->evlist; + bool timeless_decoding = true; + + /* + * Circle through the list of event and complain if we find one + * with the time bit set. + */ + evlist__for_each_entry(evlist, evsel) { + if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) + timeless_decoding = false; + } + + return timeless_decoding; +} + +static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, + struct auxtrace_queue *queue) +{ + struct arm_spe_queue *speq = queue->priv; + pid_t tid; + + tid = machine__get_current_tid(spe->machine, speq->cpu); + if (tid != -1) { + speq->tid = tid; + thread__zput(speq->thread); + } else + speq->tid = queue->tid; + + if ((!speq->thread) && (speq->tid != -1)) { + speq->thread = machine__find_thread(spe->machine, -1, + speq->tid); + } + + if (speq->thread) { + speq->pid = speq->thread->pid_; + if (queue->cpu == -1) + speq->cpu = speq->thread->cpu; + } +} + +static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) +{ + unsigned int queue_nr; + u64 ts; + int ret; + + while (1) { + struct auxtrace_queue *queue; + struct arm_spe_queue *speq; + + if (!spe->heap.heap_cnt) + return 0; + + if (spe->heap.heap_array[0].ordinal >= timestamp) + return 0; + + queue_nr = spe->heap.heap_array[0].queue_nr; + queue = &spe->queues.queue_array[queue_nr]; + speq = queue->priv; + + auxtrace_heap__pop(&spe->heap); + + if (spe->heap.heap_cnt) { + ts = spe->heap.heap_array[0].ordinal + 1; + if (ts > timestamp) + ts = timestamp; + } else { + ts = timestamp; + } + + arm_spe_set_pid_tid_cpu(spe, queue); + + ret = arm_spe_run_decoder(speq, &ts); + if (ret < 0) { + auxtrace_heap__add(&spe->heap, queue_nr, ts); + return ret; + } + + if (!ret) { + ret = auxtrace_heap__add(&spe->heap, queue_nr, ts); + if (ret < 0) + return ret; + } else { + speq->on_heap = false; + } + } + + return 0; +} + +static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, + u64 time_) { + struct auxtrace_queues *queues = &spe->queues; + unsigned int i; + u64 ts = 0; + + for (i = 0; i < queues->nr_queues; i++) { + struct auxtrace_queue *queue = &spe->queues.queue_array[i]; + struct arm_spe_queue *speq = queue->priv; + + if (speq && (tid == -1 || speq->tid == tid)) { + speq->time = time_; + arm_spe_set_pid_tid_cpu(spe, queue); + arm_spe_run_decoder(speq, &ts); + } + } return 0; } +static int arm_spe_process_event(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool) +{ + int err = 0; + u64 timestamp; + struct arm_spe *spe = container_of(session->auxtrace, + struct arm_spe, auxtrace); + + if (dump_trace) + return 0; + + if (!tool->ordered_events) { + pr_err("SPE trace requires ordered events\n"); + return -EINVAL; + } + + if (sample->time && (sample->time != (u64) -1)) + timestamp = sample->time; + else + timestamp = 0; + + if (timestamp || spe->timeless_decoding) { + err = arm_spe__update_queues(spe); + if (err) + return err; + } + + if (spe->timeless_decoding) { + if (event->header.type == PERF_RECORD_EXIT) { + err = arm_spe_process_timeless_queues(spe, + event->fork.tid, + sample->time); + } + } else if (timestamp) { + if (event->header.type == PERF_RECORD_EXIT) { + err = arm_spe_process_queues(spe, timestamp); + if (err) + return err; + } + } + + return err; +} + static int arm_spe_process_auxtrace_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool __maybe_unused) { struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); - struct auxtrace_buffer *buffer; - off_t data_offset; - int fd = perf_data__fd(session->data); - int err; - if (perf_data__is_pipe(session->data)) { - data_offset = 0; - } else { - data_offset = lseek(fd, 0, SEEK_CUR); - if (data_offset == -1) - return -errno; - } + if (!spe->data_queued) { + struct auxtrace_buffer *buffer; + off_t data_offset; + int fd = perf_data__fd(session->data); + int err; - err = auxtrace_queues__add_event(&spe->queues, session, event, - data_offset, &buffer); - if (err) - return err; + if (perf_data__is_pipe(session->data)) { + data_offset = 0; + } else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } - /* Dump here now we have copied a piped trace out of the pipe */ - if (dump_trace) { - if (auxtrace_buffer__get_data(buffer, fd)) { - arm_spe_dump_event(spe, buffer->data, - buffer->size); - auxtrace_buffer__put_data(buffer); + err = auxtrace_queues__add_event(&spe->queues, session, event, + data_offset, &buffer); + if (err) + return err; + + /* Dump here now we have copied a piped trace out of the pipe */ + if (dump_trace) { + if (auxtrace_buffer__get_data(buffer, fd)) { + arm_spe_dump_event(spe, buffer->data, + buffer->size); + auxtrace_buffer__put_data(buffer); + } } } @@ -139,7 +654,25 @@ static int arm_spe_process_auxtrace_event(struct perf_session *session, static int arm_spe_flush(struct perf_session *session __maybe_unused, struct perf_tool *tool __maybe_unused) { - return 0; + struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, + auxtrace); + int ret; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) + return -EINVAL; + + ret = arm_spe__update_queues(spe); + if (ret < 0) + return ret; + + if (spe->timeless_decoding) + return arm_spe_process_timeless_queues(spe, -1, + MAX_TIMESTAMP - 1); + + return arm_spe_process_queues(spe, MAX_TIMESTAMP); } static void arm_spe_free_queue(void *priv) @@ -148,6 +681,9 @@ static void arm_spe_free_queue(void *priv) if (!speq) return; + thread__zput(speq->thread); + arm_spe_decoder_free(speq->decoder); + zfree(&speq->event_buf); free(speq); } @@ -176,6 +712,14 @@ static void arm_spe_free(struct perf_session *session) free(spe); } +static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); + + return evsel->core.attr.type == spe->pmu_type; +} + static const char * const arm_spe_info_fmts[] = { [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n", }; @@ -188,11 +732,189 @@ static void arm_spe_print_info(__u64 *arr) fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); } +struct arm_spe_synth { + struct perf_tool dummy_tool; + struct perf_session *session; +}; + +static int arm_spe_event_synth(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct arm_spe_synth *arm_spe_synth = + container_of(tool, struct arm_spe_synth, dummy_tool); + + return perf_session__deliver_synth_event(arm_spe_synth->session, + event, NULL); +} + +static int arm_spe_synth_event(struct perf_session *session, + struct perf_event_attr *attr, u64 id) +{ + struct arm_spe_synth arm_spe_synth; + + memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth)); + arm_spe_synth.session = session; + + return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1, + &id, arm_spe_event_synth); +} + +static void arm_spe_set_event_name(struct evlist *evlist, u64 id, + const char *name) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.id && evsel->core.id[0] == id) { + if (evsel->name) + zfree(&evsel->name); + evsel->name = strdup(name); + break; + } + } +} + +static int +arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) +{ + struct evlist *evlist = session->evlist; + struct evsel *evsel; + struct perf_event_attr attr; + bool found = false; + u64 id; + int err; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.type == spe->pmu_type) { + found = true; + break; + } + } + + if (!found) { + pr_debug("No selected events with SPE trace data\n"); + return 0; + } + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = PERF_TYPE_HARDWARE; + attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; + attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | + PERF_SAMPLE_PERIOD; + if (spe->timeless_decoding) + attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; + else + attr.sample_type |= PERF_SAMPLE_TIME; + + attr.exclude_user = evsel->core.attr.exclude_user; + attr.exclude_kernel = evsel->core.attr.exclude_kernel; + attr.exclude_hv = evsel->core.attr.exclude_hv; + attr.exclude_host = evsel->core.attr.exclude_host; + attr.exclude_guest = evsel->core.attr.exclude_guest; + attr.sample_id_all = evsel->core.attr.sample_id_all; + attr.read_format = evsel->core.attr.read_format; + + /* create new id val to be a fixed offset from evsel id */ + id = evsel->core.id[0] + 1000000000; + + if (!id) + id = 1; + + if (spe->synth_opts.flc) { + spe->sample_flc = true; + + /* Level 1 data cache miss */ + err = arm_spe_synth_event(session, &attr, id); + if (err) + return err; + spe->l1d_miss_id = id; + arm_spe_set_event_name(evlist, id, "l1d-miss"); + id += 1; + + /* Level 1 data cache access */ + err = arm_spe_synth_event(session, &attr, id); + if (err) + return err; + spe->l1d_access_id = id; + arm_spe_set_event_name(evlist, id, "l1d-access"); + id += 1; + } + + if (spe->synth_opts.llc) { + spe->sample_llc = true; + + /* Last level cache miss */ + err = arm_spe_synth_event(session, &attr, id); + if (err) + return err; + spe->llc_miss_id = id; + arm_spe_set_event_name(evlist, id, "llc-miss"); + id += 1; + + /* Last level cache access */ + err = arm_spe_synth_event(session, &attr, id); + if (err) + return err; + spe->llc_access_id = id; + arm_spe_set_event_name(evlist, id, "llc-access"); + id += 1; + } + + if (spe->synth_opts.tlb) { + spe->sample_tlb = true; + + /* TLB miss */ + err = arm_spe_synth_event(session, &attr, id); + if (err) + return err; + spe->tlb_miss_id = id; + arm_spe_set_event_name(evlist, id, "tlb-miss"); + id += 1; + + /* TLB access */ + err = arm_spe_synth_event(session, &attr, id); + if (err) + return err; + spe->tlb_access_id = id; + arm_spe_set_event_name(evlist, id, "tlb-access"); + id += 1; + } + + if (spe->synth_opts.branches) { + spe->sample_branch = true; + + /* Branch miss */ + err = arm_spe_synth_event(session, &attr, id); + if (err) + return err; + spe->branch_miss_id = id; + arm_spe_set_event_name(evlist, id, "branch-miss"); + id += 1; + } + + if (spe->synth_opts.remote_access) { + spe->sample_remote_access = true; + + /* Remote access */ + err = arm_spe_synth_event(session, &attr, id); + if (err) + return err; + spe->remote_access_id = id; + arm_spe_set_event_name(evlist, id, "remote-access"); + id += 1; + } + + return 0; +} + int arm_spe_process_auxtrace_info(union perf_event *event, struct perf_session *session) { struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; - size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE; + size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; struct arm_spe *spe; int err; @@ -213,17 +935,41 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->auxtrace_type = auxtrace_info->type; spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; + spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); spe->auxtrace.process_event = arm_spe_process_event; spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; spe->auxtrace.flush_events = arm_spe_flush; spe->auxtrace.free_events = arm_spe_free_events; spe->auxtrace.free = arm_spe_free; + spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; session->auxtrace = &spe->auxtrace; arm_spe_print_info(&auxtrace_info->priv[0]); + if (dump_trace) + return 0; + + if (session->itrace_synth_opts && session->itrace_synth_opts->set) + spe->synth_opts = *session->itrace_synth_opts; + else + itrace_synth_opts__set_default(&spe->synth_opts, false); + + err = arm_spe_synth_events(spe, session); + if (err) + goto err_free_queues; + + err = auxtrace_queues__process_index(&spe->queues, session); + if (err) + goto err_free_queues; + + if (spe->queues.populated) + spe->data_queued = true; + return 0; +err_free_queues: + auxtrace_queues__free(&spe->queues); + session->auxtrace = NULL; err_free: free(spe); return err; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 3571ce72ca28..25c639ac4ad4 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -33,6 +33,7 @@ #include "evsel.h" #include "evsel_config.h" #include "symbol.h" +#include "util/perf_api_probe.h" #include "util/synthetic-events.h" #include "thread_map.h" #include "asm/bug.h" @@ -54,29 +55,9 @@ #include "util/mmap.h" #include <linux/ctype.h> -#include <linux/kernel.h> #include "symbol/kallsyms.h" #include <internal/lib.h> -static struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel) -{ - struct perf_pmu *pmu = NULL; - - while ((pmu = perf_pmu__scan(pmu)) != NULL) { - if (pmu->type == evsel->core.attr.type) - break; - } - - return pmu; -} - -static bool perf_evsel__is_aux_event(struct evsel *evsel) -{ - struct perf_pmu *pmu = perf_evsel__find_pmu(evsel); - - return pmu && pmu->auxtrace; -} - /* * Make a group from 'leader' to 'last', requiring that the events were not * already grouped to a different leader. @@ -88,7 +69,7 @@ static int perf_evlist__regroup(struct evlist *evlist, struct evsel *evsel; bool grp; - if (!perf_evsel__is_group_leader(leader)) + if (!evsel__is_group_leader(leader)) return -EINVAL; grp = false; @@ -703,8 +684,8 @@ static int auxtrace_validate_aux_sample_size(struct evlist *evlist, evlist__for_each_entry(evlist, evsel) { sz = evsel->core.attr.aux_sample_size; - if (perf_evsel__is_group_leader(evsel)) { - has_aux_leader = perf_evsel__is_aux_event(evsel); + if (evsel__is_group_leader(evsel)) { + has_aux_leader = evsel__is_aux_event(evsel); if (sz) { if (has_aux_leader) pr_err("Cannot add AUX area sampling to an AUX area event\n"); @@ -723,10 +704,10 @@ static int auxtrace_validate_aux_sample_size(struct evlist *evlist, pr_err("Cannot add AUX area sampling because group leader is not an AUX area event\n"); return -EINVAL; } - perf_evsel__set_sample_bit(evsel, AUX); + evsel__set_sample_bit(evsel, AUX); opts->auxtrace_sample_mode = true; } else { - perf_evsel__reset_sample_bit(evsel, AUX); + evsel__reset_sample_bit(evsel, AUX); } } @@ -747,7 +728,7 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts, const char *str) { - struct perf_evsel_config_term *term; + struct evsel_config_term *term; struct evsel *aux_evsel; bool has_aux_sample_size = false; bool has_aux_leader = false; @@ -777,8 +758,8 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr, /* Set aux_sample_size based on --aux-sample option */ evlist__for_each_entry(evlist, evsel) { - if (perf_evsel__is_group_leader(evsel)) { - has_aux_leader = perf_evsel__is_aux_event(evsel); + if (evsel__is_group_leader(evsel)) { + has_aux_leader = evsel__is_aux_event(evsel); } else if (has_aux_leader) { evsel->core.attr.aux_sample_size = sz; } @@ -787,9 +768,9 @@ no_opt: aux_evsel = NULL; /* Override with aux_sample_size from config term */ evlist__for_each_entry(evlist, evsel) { - if (perf_evsel__is_aux_event(evsel)) + if (evsel__is_aux_event(evsel)) aux_evsel = evsel; - term = perf_evsel__get_config_term(evsel, AUX_SAMPLE_SIZE); + term = evsel__get_config_term(evsel, AUX_SAMPLE_SIZE); if (term) { has_aux_sample_size = true; evsel->core.attr.aux_sample_size = term->val.aux_sample_size; @@ -1234,29 +1215,79 @@ out_free: return err; } +static void unleader_evsel(struct evlist *evlist, struct evsel *leader) +{ + struct evsel *new_leader = NULL; + struct evsel *evsel; + + /* Find new leader for the group */ + evlist__for_each_entry(evlist, evsel) { + if (evsel->leader != leader || evsel == leader) + continue; + if (!new_leader) + new_leader = evsel; + evsel->leader = new_leader; + } + + /* Update group information */ + if (new_leader) { + zfree(&new_leader->group_name); + new_leader->group_name = leader->group_name; + leader->group_name = NULL; + + new_leader->core.nr_members = leader->core.nr_members - 1; + leader->core.nr_members = 1; + } +} + +static void unleader_auxtrace(struct perf_session *session) +{ + struct evsel *evsel; + + evlist__for_each_entry(session->evlist, evsel) { + if (auxtrace__evsel_is_auxtrace(session, evsel) && + evsel__is_group_leader(evsel)) { + unleader_evsel(session->evlist, evsel); + } + } +} + int perf_event__process_auxtrace_info(struct perf_session *session, union perf_event *event) { enum auxtrace_type type = event->auxtrace_info.type; + int err; if (dump_trace) fprintf(stdout, " type: %u\n", type); switch (type) { case PERF_AUXTRACE_INTEL_PT: - return intel_pt_process_auxtrace_info(event, session); + err = intel_pt_process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_INTEL_BTS: - return intel_bts_process_auxtrace_info(event, session); + err = intel_bts_process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_ARM_SPE: - return arm_spe_process_auxtrace_info(event, session); + err = arm_spe_process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_CS_ETM: - return cs_etm__process_auxtrace_info(event, session); + err = cs_etm__process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_S390_CPUMSF: - return s390_cpumsf_process_auxtrace_info(event, session); + err = s390_cpumsf_process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_UNKNOWN: default: return -EINVAL; } + + if (err) + return err; + + unleader_auxtrace(session); + + return 0; } s64 perf_event__process_auxtrace(struct perf_session *session, @@ -1299,6 +1330,11 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts, synth_opts->pwr_events = true; synth_opts->other_events = true; synth_opts->errors = true; + synth_opts->flc = true; + synth_opts->llc = true; + synth_opts->tlb = true; + synth_opts->remote_access = true; + if (no_sample) { synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; synth_opts->period = 1; @@ -1412,8 +1448,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, synth_opts->branches = true; synth_opts->returns = true; break; + case 'G': case 'g': - synth_opts->callchain = true; + if (p[-1] == 'G') + synth_opts->add_callchain = true; + else + synth_opts->callchain = true; synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; while (*p == ' ' || *p == ',') @@ -1428,8 +1468,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, synth_opts->callchain_sz = val; } break; + case 'L': case 'l': - synth_opts->last_branch = true; + if (p[-1] == 'L') + synth_opts->add_last_branch = true; + else + synth_opts->last_branch = true; synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; while (*p == ' ' || *p == ',') @@ -1451,6 +1495,18 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, goto out_err; p = endptr; break; + case 'f': + synth_opts->flc = true; + break; + case 'm': + synth_opts->llc = true; + break; + case 't': + synth_opts->tlb = true; + break; + case 'a': + synth_opts->remote_access = true; + break; case ' ': case ',': break; @@ -2482,7 +2538,7 @@ static int parse_addr_filter(struct evsel *evsel, const char *filter, goto out_exit; } - if (perf_evsel__append_addr_filter(evsel, new_filter)) { + if (evsel__append_addr_filter(evsel, new_filter)) { err = -ENOMEM; goto out_exit; } @@ -2500,9 +2556,9 @@ out_exit: return err; } -static int perf_evsel__nr_addr_filter(struct evsel *evsel) +static int evsel__nr_addr_filter(struct evsel *evsel) { - struct perf_pmu *pmu = perf_evsel__find_pmu(evsel); + struct perf_pmu *pmu = evsel__find_pmu(evsel); int nr_addr_filters = 0; if (!pmu) @@ -2521,7 +2577,7 @@ int auxtrace_parse_filters(struct evlist *evlist) evlist__for_each_entry(evlist, evsel) { filter = evsel->filter; - max_nr = perf_evsel__nr_addr_filter(evsel); + max_nr = evsel__nr_addr_filter(evsel); if (!filter || !max_nr) continue; evsel->filter = NULL; @@ -2577,3 +2633,12 @@ void auxtrace__free(struct perf_session *session) return session->auxtrace->free(session); } + +bool auxtrace__evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + if (!session->auxtrace || !session->auxtrace->evsel_is_auxtrace) + return false; + + return session->auxtrace->evsel_is_auxtrace(session, evsel); +} diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index e58ef160b599..142ccf7d34df 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -21,6 +21,7 @@ union perf_event; struct perf_session; struct evlist; +struct evsel; struct perf_tool; struct mmap; struct perf_sample; @@ -62,6 +63,7 @@ enum itrace_period_type { * because 'perf inject' will write it out * @instructions: whether to synthesize 'instructions' events * @branches: whether to synthesize 'branches' events + * (branch misses only for Arm SPE) * @transactions: whether to synthesize events for transactions * @ptwrites: whether to synthesize events for ptwrites * @pwr_events: whether to synthesize power events @@ -73,8 +75,14 @@ enum itrace_period_type { * @calls: limit branch samples to calls (can be combined with @returns) * @returns: limit branch samples to returns (can be combined with @calls) * @callchain: add callchain to 'instructions' events + * @add_callchain: add callchain to existing event records * @thread_stack: feed branches to the thread_stack * @last_branch: add branch context to 'instruction' events + * @add_last_branch: add branch context to existing event records + * @flc: whether to synthesize first level cache events + * @llc: whether to synthesize last level cache events + * @tlb: whether to synthesize TLB events + * @remote_access: whether to synthesize remote access events * @callchain_sz: maximum callchain size * @last_branch_sz: branch context size * @period: 'instructions' events period @@ -100,8 +108,14 @@ struct itrace_synth_opts { bool calls; bool returns; bool callchain; + bool add_callchain; bool thread_stack; bool last_branch; + bool add_last_branch; + bool flc; + bool llc; + bool tlb; + bool remote_access; unsigned int callchain_sz; unsigned int last_branch_sz; unsigned long long period; @@ -166,6 +180,8 @@ struct auxtrace { struct perf_tool *tool); void (*free_events)(struct perf_session *session); void (*free)(struct perf_session *session); + bool (*evsel_is_auxtrace)(struct perf_session *session, + struct evsel *evsel); }; /** @@ -584,10 +600,12 @@ void auxtrace__dump_auxtrace_sample(struct perf_session *session, int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool); void auxtrace__free_events(struct perf_session *session); void auxtrace__free(struct perf_session *session); +bool auxtrace__evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel); #define ITRACE_HELP \ " i: synthesize instructions events\n" \ -" b: synthesize branches events\n" \ +" b: synthesize branches events (branch misses for Arm SPE)\n" \ " c: synthesize branches events (calls only)\n" \ " r: synthesize branches events (returns only)\n" \ " x: synthesize transactions events\n" \ @@ -595,6 +613,10 @@ void auxtrace__free(struct perf_session *session); " p: synthesize power events\n" \ " e: synthesize error events\n" \ " d: create a debug log\n" \ +" f: synthesize first level cache events\n" \ +" m: synthesize last level cache events\n" \ +" t: synthesize TLB events\n" \ +" a: synthesize remote access events\n" \ " g[len]: synthesize a call chain (use with i or x)\n" \ " l[len]: synthesize last branch entries (use with i or x)\n" \ " sNUMBER: skip initial number of events\n" \ @@ -750,6 +772,13 @@ void auxtrace_index__free(struct list_head *head __maybe_unused) } static inline +bool auxtrace__evsel_is_auxtrace(struct perf_session *session __maybe_unused, + struct evsel *evsel __maybe_unused) +{ + return false; +} + +static inline int auxtrace_parse_filters(struct evlist *evlist __maybe_unused) { return 0; diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index a3207d900339..3742511a08d1 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -6,6 +6,9 @@ #include <bpf/libbpf.h> #include <linux/btf.h> #include <linux/err.h> +#include <linux/string.h> +#include <internal/lib.h> +#include <symbol/kallsyms.h> #include "bpf-event.h" #include "debug.h" #include "dso.h" @@ -290,11 +293,82 @@ out: return err ? -1 : 0; } +struct kallsyms_parse { + union perf_event *event; + perf_event__handler_t process; + struct machine *machine; + struct perf_tool *tool; +}; + +static int +process_bpf_image(char *name, u64 addr, struct kallsyms_parse *data) +{ + struct machine *machine = data->machine; + union perf_event *event = data->event; + struct perf_record_ksymbol *ksymbol; + int len; + + ksymbol = &event->ksymbol; + + *ksymbol = (struct perf_record_ksymbol) { + .header = { + .type = PERF_RECORD_KSYMBOL, + .size = offsetof(struct perf_record_ksymbol, name), + }, + .addr = addr, + .len = page_size, + .ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF, + .flags = 0, + }; + + len = scnprintf(ksymbol->name, KSYM_NAME_LEN, "%s", name); + ksymbol->header.size += PERF_ALIGN(len + 1, sizeof(u64)); + memset((void *) event + event->header.size, 0, machine->id_hdr_size); + event->header.size += machine->id_hdr_size; + + return perf_tool__process_synth_event(data->tool, event, machine, + data->process); +} + +static int +kallsyms_process_symbol(void *data, const char *_name, + char type __maybe_unused, u64 start) +{ + char disp[KSYM_NAME_LEN]; + char *module, *name; + unsigned long id; + int err = 0; + + module = strchr(_name, '\t'); + if (!module) + return 0; + + /* We are going after [bpf] module ... */ + if (strcmp(module + 1, "[bpf]")) + return 0; + + name = memdup(_name, (module - _name) + 1); + if (!name) + return -ENOMEM; + + name[module - _name] = 0; + + /* .. and only for trampolines and dispatchers */ + if ((sscanf(name, "bpf_trampoline_%lu", &id) == 1) || + (sscanf(name, "bpf_dispatcher_%s", disp) == 1)) + err = process_bpf_image(name, start, data); + + free(name); + return err; +} + int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process, struct machine *machine, struct record_opts *opts) { + const char *kallsyms_filename = "/proc/kallsyms"; + struct kallsyms_parse arg; union perf_event *event; __u32 id = 0; int err; @@ -303,6 +377,8 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, event = malloc(sizeof(event->bpf) + KSYM_NAME_LEN + machine->id_hdr_size); if (!event) return -1; + + /* Synthesize all the bpf programs in system. */ while (true) { err = bpf_prog_get_next_id(id, &id); if (err) { @@ -335,6 +411,23 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, break; } } + + /* Synthesize all the bpf images - trampolines/dispatchers. */ + if (symbol_conf.kallsyms_name != NULL) + kallsyms_filename = symbol_conf.kallsyms_name; + + arg = (struct kallsyms_parse) { + .event = event, + .process = process, + .machine = machine, + .tool = session->tool, + }; + + if (kallsyms__parse(kallsyms_filename, &arg, kallsyms_process_symbol)) { + pr_err("%s: failed to synthesize bpf images: %s\n", + __func__, strerror(errno)); + } + free(event); return err; } @@ -416,8 +509,7 @@ static int bpf_event__sb_cb(union perf_event *event, void *data) return 0; } -int bpf_event__add_sb_event(struct evlist **evlist, - struct perf_env *env) +int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env) { struct perf_event_attr attr = { .type = PERF_TYPE_SOFTWARE, diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 81fdc88e6c1a..68f315c3df5b 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -33,8 +33,7 @@ struct btf_node { #ifdef HAVE_LIBBPF_SUPPORT int machine__process_bpf(struct machine *machine, union perf_event *event, struct perf_sample *sample); -int bpf_event__add_sb_event(struct evlist **evlist, - struct perf_env *env); +int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env); void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, struct perf_env *env, FILE *fp); @@ -46,8 +45,8 @@ static inline int machine__process_bpf(struct machine *machine __maybe_unused, return 0; } -static inline int bpf_event__add_sb_event(struct evlist **evlist __maybe_unused, - struct perf_env *env __maybe_unused) +static inline int evlist__add_bpf_sb_event(struct evlist *evlist __maybe_unused, + struct perf_env *env __maybe_unused) { return 0; } diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 10c187b8b8ea..2feb751516ab 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1225,7 +1225,7 @@ bpf__obj_config_map(struct bpf_object *obj, out: free(map_name); if (!err) - key_scan_pos += strlen(map_opt); + *key_scan_pos += strlen(map_opt); return err; } @@ -1430,7 +1430,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey, return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH; } - if (perf_evsel__is_bpf_output(evsel)) + if (evsel__is_bpf_output(evsel)) check_pass = true; if (attr->type == PERF_TYPE_RAW) check_pass = true; diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 154a05cd03af..17b2ccc61094 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -15,13 +15,18 @@ #include "event.h" struct branch_flags { - u64 mispred:1; - u64 predicted:1; - u64 in_tx:1; - u64 abort:1; - u64 cycles:16; - u64 type:4; - u64 reserved:40; + union { + u64 value; + struct { + u64 mispred:1; + u64 predicted:1; + u64 in_tx:1; + u64 abort:1; + u64 cycles:16; + u64 type:4; + u64 reserved:40; + }; + }; }; struct branch_info { @@ -41,7 +46,7 @@ struct branch_entry { struct branch_stack { u64 nr; u64 hw_idx; - struct branch_entry entries[0]; + struct branch_entry entries[]; }; /* diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 818aa4efd386..2775b752f2fa 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1599,3 +1599,17 @@ void callchain_cursor_reset(struct callchain_cursor *cursor) for (node = cursor->first; node != NULL; node = node->next) map__zput(node->ms.map); } + +void callchain_param_setup(u64 sample_type) +{ + if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { + if ((sample_type & PERF_SAMPLE_REGS_USER) && + (sample_type & PERF_SAMPLE_STACK_USER)) { + callchain_param.record_mode = CALLCHAIN_DWARF; + dwarf_callchain_users = true; + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + callchain_param.record_mode = CALLCHAIN_LBR; + else + callchain_param.record_mode = CALLCHAIN_FP; + } +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 706bb7bbe1e1..fe36a9e5ccd1 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -143,6 +143,9 @@ struct callchain_cursor_node { u64 ip; struct map_symbol ms; const char *srcline; + /* Indicate valid cursor node for LBR stitch */ + bool valid; + bool branch; struct branch_flags branch_flags; u64 branch_from; @@ -151,6 +154,11 @@ struct callchain_cursor_node { struct callchain_cursor_node *next; }; +struct stitch_list { + struct list_head node; + struct callchain_cursor_node cursor; +}; + struct callchain_cursor { u64 nr; struct callchain_cursor_node *first; @@ -289,4 +297,5 @@ int callchain_branch_counts(struct callchain_root *root, u64 *branch_count, u64 *predicted_count, u64 *abort_count, u64 *cycles_count); +void callchain_param_setup(u64 sample_type); #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/cap.h b/tools/perf/util/cap.h index 051dc590ceee..ae52878c0b2e 100644 --- a/tools/perf/util/cap.h +++ b/tools/perf/util/cap.h @@ -29,4 +29,8 @@ static inline bool perf_cap__capable(int cap __maybe_unused) #define CAP_SYSLOG 34 #endif +#ifndef CAP_PERFMON +#define CAP_PERFMON 38 +#endif + #endif /* __PERF_CAP_H */ diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index b73fb7823048..050dea9f1e88 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -107,7 +107,8 @@ found: static void cgroup__delete(struct cgroup *cgroup) { - close(cgroup->fd); + if (cgroup->fd >= 0) + close(cgroup->fd); zfree(&cgroup->name); free(cgroup); } diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c index a12872f2856a..fa8248aadb59 100644 --- a/tools/perf/util/cloexec.c +++ b/tools/perf/util/cloexec.c @@ -28,7 +28,7 @@ int __weak sched_getcpu(void) static int perf_flag_probe(void) { - /* use 'safest' configuration as used in perf_evsel__fallback() */ + /* use 'safest' configuration as used in evsel__fallback() */ struct perf_event_attr attr = { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_CLOCK, @@ -65,7 +65,7 @@ static int perf_flag_probe(void) return 1; } - WARN_ONCE(err != EINVAL && err != EBUSY, + WARN_ONCE(err != EINVAL && err != EBUSY && err != EACCES, "perf_event_open(..., PERF_FLAG_FD_CLOEXEC) failed with unexpected error %d (%s)\n", err, str_error_r(err, sbuf, sizeof(sbuf))); @@ -83,7 +83,7 @@ static int perf_flag_probe(void) if (fd >= 0) close(fd); - if (WARN_ONCE(fd < 0 && err != EBUSY, + if (WARN_ONCE(fd < 0 && err != EBUSY && err != EACCES, "perf_event_open(..., 0) failed unexpectedly with error %d (%s)\n", err, str_error_r(err, sbuf, sizeof(sbuf)))) return -1; diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index ef38eba56ed0..20be0504fb95 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -17,10 +17,10 @@ #include "util/event.h" /* proc_map_timeout */ #include "util/hist.h" /* perf_hist_config */ #include "util/llvm-utils.h" /* perf_llvm_config */ +#include "util/stat.h" /* perf_stat__set_big_num */ #include "build-id.h" #include "debug.h" #include "config.h" -#include "debug.h" #include <sys/types.h> #include <sys/stat.h> #include <stdlib.h> @@ -452,6 +452,15 @@ static int perf_ui_config(const char *var, const char *value) return 0; } +static int perf_stat_config(const char *var, const char *value) +{ + if (!strcmp(var, "stat.big-num")) + perf_stat__set_big_num(perf_config_bool(var, value)); + + /* Add other config variables here. */ + return 0; +} + int perf_default_config(const char *var, const char *value, void *dummy __maybe_unused) { @@ -473,6 +482,9 @@ int perf_default_config(const char *var, const char *value, if (strstarts(var, "buildid.")) return perf_buildid_config(var, value); + if (strstarts(var, "stat.")) + return perf_stat_config(var, value); + /* Add other config variables here. */ return 0; } diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c index f94e1a23dad6..582f3aeaf5e4 100644 --- a/tools/perf/util/counts.c +++ b/tools/perf/util/counts.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <errno.h> #include <stdlib.h> +#include <string.h> #include "evsel.h" #include "counts.h" #include <linux/zalloc.h> @@ -42,24 +43,25 @@ void perf_counts__delete(struct perf_counts *counts) } } -static void perf_counts__reset(struct perf_counts *counts) +void perf_counts__reset(struct perf_counts *counts) { xyarray__reset(counts->loaded); xyarray__reset(counts->values); + memset(&counts->aggr, 0, sizeof(struct perf_counts_values)); } -void perf_evsel__reset_counts(struct evsel *evsel) +void evsel__reset_counts(struct evsel *evsel) { perf_counts__reset(evsel->counts); } -int perf_evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads) +int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads) { evsel->counts = perf_counts__new(ncpus, nthreads); return evsel->counts != NULL ? 0 : -ENOMEM; } -void perf_evsel__free_counts(struct evsel *evsel) +void evsel__free_counts(struct evsel *evsel) { perf_counts__delete(evsel->counts); evsel->counts = NULL; diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h index 92196df4945f..7ff36bf6d644 100644 --- a/tools/perf/util/counts.h +++ b/tools/perf/util/counts.h @@ -37,9 +37,10 @@ perf_counts__set_loaded(struct perf_counts *counts, int cpu, int thread, bool lo struct perf_counts *perf_counts__new(int ncpus, int nthreads); void perf_counts__delete(struct perf_counts *counts); +void perf_counts__reset(struct perf_counts *counts); -void perf_evsel__reset_counts(struct evsel *evsel); -int perf_evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads); -void perf_evsel__free_counts(struct evsel *evsel); +void evsel__reset_counts(struct evsel *evsel); +int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads); +void evsel__free_counts(struct evsel *evsel); #endif /* __PERF_COUNTS_H */ diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h index 7bf6b811f715..6201c3790d86 100644 --- a/tools/perf/util/cputopo.h +++ b/tools/perf/util/cputopo.h @@ -22,7 +22,7 @@ struct numa_topology_node { struct numa_topology { u32 nr; - struct numa_topology_node nodes[0]; + struct numa_topology_node nodes[]; }; struct cpu_topology *cpu_topology__new(void); diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index cd92a99eb89d..cd007cc9c283 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -564,6 +564,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( resp = cs_etm_decoder__set_tid(etmq, packet_queue, elem, trace_chan_id); break; + /* Unused packet types */ + case OCSD_GEN_TRC_ELEM_I_RANGE_NOPATH: case OCSD_GEN_TRC_ELEM_ADDR_NACC: case OCSD_GEN_TRC_ELEM_CYCLE_COUNT: case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN: diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 62d2f9b9ce1b..c283223fb31f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -94,6 +94,9 @@ struct cs_etm_queue { struct cs_etm_traceid_queue **traceid_queues; }; +/* RB tree for quick conversion between traceID and metadata pointers */ +static struct intlist *traceid_list; + static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, @@ -631,6 +634,16 @@ static void cs_etm__free(struct perf_session *session) zfree(&aux); } +static bool cs_etm__evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct cs_etm_auxtrace *aux = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + + return evsel->core.attr.type == aux->pmu_type; +} + static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) { struct machine *machine; @@ -2618,6 +2631,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, etm->auxtrace.flush_events = cs_etm__flush_events; etm->auxtrace.free_events = cs_etm__free_events; etm->auxtrace.free = cs_etm__free; + etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace; session->auxtrace = &etm->auxtrace; etm->unknown_thread = thread__new(999999999, 999999999); diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 650ecc2a6349..4ad925d6d799 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -114,9 +114,6 @@ enum cs_etm_isa { CS_ETM_ISA_T32, }; -/* RB tree for quick conversion between traceID and metadata pointers */ -struct intlist *traceid_list; - struct cs_etm_queue; struct cs_etm_packet { diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index dbc772bfb04e..5f36fc6a5578 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -835,7 +835,7 @@ static int process_sample_event(struct perf_tool *tool, return -1; } - if (perf_evsel__is_bpf_output(evsel)) { + if (evsel__is_bpf_output(evsel)) { ret = add_bpf_output_values(event_class, event, sample); if (ret) return -1; @@ -1155,7 +1155,7 @@ static int add_event(struct ctf_writer *cw, struct evsel *evsel) { struct bt_ctf_event_class *event_class; struct evsel_priv *priv; - const char *name = perf_evsel__name(evsel); + const char *name = evsel__name(evsel); int ret; pr("Adding event '%s' (type %d)\n", name, evsel->core.attr.type); @@ -1174,7 +1174,7 @@ static int add_event(struct ctf_writer *cw, struct evsel *evsel) goto err; } - if (perf_evsel__is_bpf_output(evsel)) { + if (evsel__is_bpf_output(evsel)) { ret = add_bpf_output_types(cw, event_class); if (ret) goto err; diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c index 6fb7f34c0814..39c05200ed65 100644 --- a/tools/perf/util/demangle-java.c +++ b/tools/perf/util/demangle-java.c @@ -15,7 +15,7 @@ enum { MODE_CLASS = 1, MODE_FUNC = 2, MODE_TYPE = 3, - MODE_CTYPE = 3, /* class arg */ + MODE_CTYPE = 4, /* class arg */ }; #define BASE_ENT(c, n) [c - 'A']=n @@ -27,7 +27,7 @@ static const char *base_types['Z' - 'A' + 1] = { BASE_ENT('I', "int" ), BASE_ENT('J', "long" ), BASE_ENT('S', "short" ), - BASE_ENT('Z', "bool" ), + BASE_ENT('Z', "boolean" ), }; /* @@ -59,15 +59,16 @@ __demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int switch (*q) { case 'L': - if (mode == MODE_PREFIX || mode == MODE_CTYPE) { - if (mode == MODE_CTYPE) { + if (mode == MODE_PREFIX || mode == MODE_TYPE) { + if (mode == MODE_TYPE) { if (narg) rlen += scnprintf(buf + rlen, maxlen - rlen, ", "); narg++; } - rlen += scnprintf(buf + rlen, maxlen - rlen, "class "); if (mode == MODE_PREFIX) mode = MODE_CLASS; + else + mode = MODE_CTYPE; } else buf[rlen++] = *q; break; @@ -120,7 +121,7 @@ __demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int if (mode != MODE_CLASS && mode != MODE_CTYPE) goto error; /* safe because at least one other char to process */ - if (isalpha(*(q + 1))) + if (isalpha(*(q + 1)) && mode == MODE_CLASS) rlen += scnprintf(buf + rlen, maxlen - rlen, "."); if (mode == MODE_CLASS) mode = MODE_FUNC; diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 91f21239608b..99f0a39c3c59 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -47,6 +47,7 @@ char dso__symtab_origin(const struct dso *dso) [DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO] = 'D', [DSO_BINARY_TYPE__FEDORA_DEBUGINFO] = 'f', [DSO_BINARY_TYPE__UBUNTU_DEBUGINFO] = 'u', + [DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO] = 'x', [DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO] = 'o', [DSO_BINARY_TYPE__BUILDID_DEBUGINFO] = 'b', [DSO_BINARY_TYPE__SYSTEM_PATH_DSO] = 'd', @@ -129,6 +130,21 @@ int dso__read_binary_type_filename(const struct dso *dso, snprintf(filename + len, size - len, "%s", dso->long_name); break; + case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: + /* + * Ubuntu can mixup /usr/lib with /lib, putting debuginfo in + * /usr/lib/debug/lib when it is expected to be in + * /usr/lib/debug/usr/lib + */ + if (strlen(dso->long_name) < 9 || + strncmp(dso->long_name, "/usr/lib/", 9)) { + ret = -1; + break; + } + len = __symbol__join_symfs(filename, size, "/usr/lib/debug"); + snprintf(filename + len, size - len, "%s", dso->long_name + 4); + break; + case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: { const char *last_slash; @@ -191,6 +207,7 @@ int dso__read_binary_type_filename(const struct dso *dso, case DSO_BINARY_TYPE__GUEST_KALLSYMS: case DSO_BINARY_TYPE__JAVA_JIT: case DSO_BINARY_TYPE__BPF_PROG_INFO: + case DSO_BINARY_TYPE__BPF_IMAGE: case DSO_BINARY_TYPE__NOT_FOUND: ret = -1; break; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 2db64b79617a..d3d03274b0d1 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -30,6 +30,7 @@ enum dso_binary_type { DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO, DSO_BINARY_TYPE__FEDORA_DEBUGINFO, DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, + DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, DSO_BINARY_TYPE__BUILDID_DEBUGINFO, DSO_BINARY_TYPE__SYSTEM_PATH_DSO, DSO_BINARY_TYPE__GUEST_KMODULE, @@ -40,6 +41,7 @@ enum dso_binary_type { DSO_BINARY_TYPE__GUEST_KCORE, DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, DSO_BINARY_TYPE__BPF_PROG_INFO, + DSO_BINARY_TYPE__BPF_IMAGE, DSO_BINARY_TYPE__NOT_FOUND, }; @@ -136,7 +138,7 @@ struct dso_cache { struct rb_node rb_node; u64 offset; u64 size; - char data[0]; + char data[]; }; struct auxtrace_cache; @@ -208,7 +210,7 @@ struct dso { struct nsinfo *nsinfo; struct dso_id id; refcount_t refcnt; - char name[0]; + char name[]; }; /* dso__for_each_symbol - iterate over the symbols of given type diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 7632075a8792..1ab2682d5d2b 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -48,6 +48,7 @@ struct perf_env { char *cpuid; unsigned long long total_mem; unsigned int msr_pmu_type; + unsigned int max_branches; int nr_cmdline; int nr_sibling_cores; @@ -57,12 +58,14 @@ struct perf_env { int nr_memory_nodes; int nr_pmu_mappings; int nr_groups; + int nr_cpu_pmu_caps; char *cmdline; const char **cmdline_argv; char *sibling_cores; char *sibling_dies; char *sibling_threads; char *pmu_mappings; + char *cpu_pmu_caps; struct cpu_topology_map *cpu; struct cpu_cache_level *caches; int caches_cnt; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index dc0e11214ae1..f581550a3015 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -626,7 +626,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al, ret = strlist__has_entry(symbol_conf.sym_list, al->sym->name); } - if (!(ret && al->sym)) { + if (!ret && al->sym) { snprintf(al_addr_str, sz, "0x%"PRIx64, al->map->unmap_ip(al->map, al->sym->start)); ret = strlist__has_entry(symbol_conf.sym_list, diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index b8289f160f07..6ae01c3c2ffa 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -79,7 +79,7 @@ struct sample_read { struct ip_callchain { u64 nr; - u64 ips[0]; + u64 ips[]; }; struct branch_stack; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 1548237b6558..173b4f0e0e6e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -23,6 +23,7 @@ #include "asm/bug.h" #include "bpf-event.h" #include "util/string2.h" +#include "util/perf_api_probe.h" #include <signal.h> #include <unistd.h> #include <sched.h> @@ -118,7 +119,7 @@ static void perf_evlist__update_id_pos(struct evlist *evlist) struct evsel *evsel; evlist__for_each_entry(evlist, evsel) - perf_evsel__calc_id_pos(evsel); + evsel__calc_id_pos(evsel); perf_evlist__set_id_pos(evlist); } @@ -232,7 +233,7 @@ void perf_evlist__set_leader(struct evlist *evlist) int __perf_evlist__add_default(struct evlist *evlist, bool precise) { - struct evsel *evsel = perf_evsel__new_cycles(precise); + struct evsel *evsel = evsel__new_cycles(precise); if (evsel == NULL) return -ENOMEM; @@ -248,7 +249,7 @@ int perf_evlist__add_dummy(struct evlist *evlist) .config = PERF_COUNT_SW_DUMMY, .size = sizeof(attr), /* to capture ABI version */ }; - struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries); + struct evsel *evsel = evsel__new_idx(&attr, evlist->core.nr_entries); if (evsel == NULL) return -ENOMEM; @@ -265,7 +266,7 @@ static int evlist__add_attrs(struct evlist *evlist, size_t i; for (i = 0; i < nr_attrs; i++) { - evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i); + evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i); if (evsel == NULL) goto out_delete_partial_list; list_add_tail(&evsel->core.node, &head); @@ -324,7 +325,7 @@ perf_evlist__find_tracepoint_by_name(struct evlist *evlist, int perf_evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler) { - struct evsel *evsel = perf_evsel__newtp(sys, name); + struct evsel *evsel = evsel__newtp(sys, name); if (IS_ERR(evsel)) return -1; @@ -379,25 +380,36 @@ void evlist__disable(struct evlist *evlist) { struct evsel *pos; struct affinity affinity; - int cpu, i; + int cpu, i, imm = 0; + bool has_imm = false; if (affinity__setup(&affinity) < 0) return; - evlist__for_each_cpu(evlist, i, cpu) { - affinity__set(&affinity, cpu); - - evlist__for_each_entry(evlist, pos) { - if (evsel__cpu_iter_skip(pos, cpu)) - continue; - if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd) - continue; - evsel__disable_cpu(pos, pos->cpu_iter - 1); + /* Disable 'immediate' events last */ + for (imm = 0; imm <= 1; imm++) { + evlist__for_each_cpu(evlist, i, cpu) { + affinity__set(&affinity, cpu); + + evlist__for_each_entry(evlist, pos) { + if (evsel__cpu_iter_skip(pos, cpu)) + continue; + if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd) + continue; + if (pos->immediate) + has_imm = true; + if (pos->immediate != imm) + continue; + evsel__disable_cpu(pos, pos->cpu_iter - 1); + } } + if (!has_imm) + break; } + affinity__cleanup(&affinity); evlist__for_each_entry(evlist, pos) { - if (!perf_evsel__is_group_leader(pos) || !pos->core.fd) + if (!evsel__is_group_leader(pos) || !pos->core.fd) continue; pos->disabled = true; } @@ -420,14 +432,14 @@ void evlist__enable(struct evlist *evlist) evlist__for_each_entry(evlist, pos) { if (evsel__cpu_iter_skip(pos, cpu)) continue; - if (!perf_evsel__is_group_leader(pos) || !pos->core.fd) + if (!evsel__is_group_leader(pos) || !pos->core.fd) continue; evsel__enable_cpu(pos, pos->cpu_iter - 1); } } affinity__cleanup(&affinity); evlist__for_each_entry(evlist, pos) { - if (!perf_evsel__is_group_leader(pos) || !pos->core.fd) + if (!evsel__is_group_leader(pos) || !pos->core.fd) continue; pos->disabled = false; } @@ -947,7 +959,7 @@ void __perf_evlist__set_sample_bit(struct evlist *evlist, struct evsel *evsel; evlist__for_each_entry(evlist, evsel) - __perf_evsel__set_sample_bit(evsel, bit); + __evsel__set_sample_bit(evsel, bit); } void __perf_evlist__reset_sample_bit(struct evlist *evlist, @@ -956,7 +968,7 @@ void __perf_evlist__reset_sample_bit(struct evlist *evlist, struct evsel *evsel; evlist__for_each_entry(evlist, evsel) - __perf_evsel__reset_sample_bit(evsel, bit); + __evsel__reset_sample_bit(evsel, bit); } int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) @@ -994,7 +1006,7 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) continue; - err = perf_evsel__set_filter(evsel, filter); + err = evsel__set_filter(evsel, filter); if (err) break; } @@ -1014,7 +1026,7 @@ int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter) if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) continue; - err = perf_evsel__append_tp_filter(evsel, filter); + err = evsel__append_tp_filter(evsel, filter); if (err) break; } @@ -1131,8 +1143,10 @@ bool perf_evlist__valid_read_format(struct evlist *evlist) u64 sample_type = first->core.attr.sample_type; evlist__for_each_entry(evlist, pos) { - if (read_format != pos->core.attr.read_format) - return false; + if (read_format != pos->core.attr.read_format) { + pr_debug("Read format differs %#" PRIx64 " vs %#" PRIx64 "\n", + read_format, (u64)pos->core.attr.read_format); + } } /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ @@ -1436,7 +1450,7 @@ int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event, if (!evsel) return -EFAULT; - return perf_evsel__parse_sample(evsel, event, sample); + return evsel__parse_sample(evsel, event, sample); } int perf_evlist__parse_sample_timestamp(struct evlist *evlist, @@ -1447,7 +1461,7 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist, if (!evsel) return -EFAULT; - return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); + return evsel__parse_sample_timestamp(evsel, event, timestamp); } int perf_evlist__strerror_open(struct evlist *evlist, @@ -1701,133 +1715,3 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, } return leader; } - -int perf_evlist__add_sb_event(struct evlist **evlist, - struct perf_event_attr *attr, - perf_evsel__sb_cb_t cb, - void *data) -{ - struct evsel *evsel; - bool new_evlist = (*evlist) == NULL; - - if (*evlist == NULL) - *evlist = evlist__new(); - if (*evlist == NULL) - return -1; - - if (!attr->sample_id_all) { - pr_warning("enabling sample_id_all for all side band events\n"); - attr->sample_id_all = 1; - } - - evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries); - if (!evsel) - goto out_err; - - evsel->side_band.cb = cb; - evsel->side_band.data = data; - evlist__add(*evlist, evsel); - return 0; - -out_err: - if (new_evlist) { - evlist__delete(*evlist); - *evlist = NULL; - } - return -1; -} - -static void *perf_evlist__poll_thread(void *arg) -{ - struct evlist *evlist = arg; - bool draining = false; - int i, done = 0; - /* - * In order to read symbols from other namespaces perf to needs to call - * setns(2). This isn't permitted if the struct_fs has multiple users. - * unshare(2) the fs so that we may continue to setns into namespaces - * that we're observing when, for instance, reading the build-ids at - * the end of a 'perf record' session. - */ - unshare(CLONE_FS); - - while (!done) { - bool got_data = false; - - if (evlist->thread.done) - draining = true; - - if (!draining) - evlist__poll(evlist, 1000); - - for (i = 0; i < evlist->core.nr_mmaps; i++) { - struct mmap *map = &evlist->mmap[i]; - union perf_event *event; - - if (perf_mmap__read_init(&map->core)) - continue; - while ((event = perf_mmap__read_event(&map->core)) != NULL) { - struct evsel *evsel = perf_evlist__event2evsel(evlist, event); - - if (evsel && evsel->side_band.cb) - evsel->side_band.cb(event, evsel->side_band.data); - else - pr_warning("cannot locate proper evsel for the side band event\n"); - - perf_mmap__consume(&map->core); - got_data = true; - } - perf_mmap__read_done(&map->core); - } - - if (draining && !got_data) - break; - } - return NULL; -} - -int perf_evlist__start_sb_thread(struct evlist *evlist, - struct target *target) -{ - struct evsel *counter; - - if (!evlist) - return 0; - - if (perf_evlist__create_maps(evlist, target)) - goto out_delete_evlist; - - evlist__for_each_entry(evlist, counter) { - if (evsel__open(counter, evlist->core.cpus, - evlist->core.threads) < 0) - goto out_delete_evlist; - } - - if (evlist__mmap(evlist, UINT_MAX)) - goto out_delete_evlist; - - evlist__for_each_entry(evlist, counter) { - if (evsel__enable(counter)) - goto out_delete_evlist; - } - - evlist->thread.done = 0; - if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) - goto out_delete_evlist; - - return 0; - -out_delete_evlist: - evlist__delete(evlist); - evlist = NULL; - return -1; -} - -void perf_evlist__stop_sb_thread(struct evlist *evlist) -{ - if (!evlist) - return; - evlist->thread.done = 1; - pthread_join(evlist->thread.th, NULL); - evlist__delete(evlist); -} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index f5bd5c386df1..b6f325dfb4d2 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -107,10 +107,11 @@ int __perf_evlist__add_default_attrs(struct evlist *evlist, int perf_evlist__add_dummy(struct evlist *evlist); -int perf_evlist__add_sb_event(struct evlist **evlist, +int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, - perf_evsel__sb_cb_t cb, + evsel__sb_cb_t cb, void *data); +void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data); int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target); void perf_evlist__stop_sb_thread(struct evlist *evlist); @@ -173,10 +174,6 @@ void evlist__close(struct evlist *evlist); struct callchain_param; void perf_evlist__set_id_pos(struct evlist *evlist); -bool perf_can_sample_identifier(void); -bool perf_can_record_switch_events(void); -bool perf_can_record_cpu_wide(void); -bool perf_can_aux_sample(void); void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain); int record_opts__config(struct record_opts *opts); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index eb880efbce16..96e5171dce41 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -56,14 +56,14 @@ struct perf_missing_features perf_missing_features; static clockid_t clockid; -static int perf_evsel__no_extra_init(struct evsel *evsel __maybe_unused) +static int evsel__no_extra_init(struct evsel *evsel __maybe_unused) { return 0; } void __weak test_attr__ready(void) { } -static void perf_evsel__no_extra_fini(struct evsel *evsel __maybe_unused) +static void evsel__no_extra_fini(struct evsel *evsel __maybe_unused) { } @@ -73,13 +73,12 @@ static struct { void (*fini)(struct evsel *evsel); } perf_evsel__object = { .size = sizeof(struct evsel), - .init = perf_evsel__no_extra_init, - .fini = perf_evsel__no_extra_fini, + .init = evsel__no_extra_init, + .fini = evsel__no_extra_fini, }; -int perf_evsel__object_config(size_t object_size, - int (*init)(struct evsel *evsel), - void (*fini)(struct evsel *evsel)) +int evsel__object_config(size_t object_size, int (*init)(struct evsel *evsel), + void (*fini)(struct evsel *evsel)) { if (object_size == 0) @@ -102,7 +101,7 @@ set_methods: #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) -int __perf_evsel__sample_size(u64 sample_type) +int __evsel__sample_size(u64 sample_type) { u64 mask = sample_type & PERF_SAMPLE_MASK; int size = 0; @@ -178,53 +177,53 @@ static int __perf_evsel__calc_is_pos(u64 sample_type) return idx; } -void perf_evsel__calc_id_pos(struct evsel *evsel) +void evsel__calc_id_pos(struct evsel *evsel) { evsel->id_pos = __perf_evsel__calc_id_pos(evsel->core.attr.sample_type); evsel->is_pos = __perf_evsel__calc_is_pos(evsel->core.attr.sample_type); } -void __perf_evsel__set_sample_bit(struct evsel *evsel, +void __evsel__set_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit) { if (!(evsel->core.attr.sample_type & bit)) { evsel->core.attr.sample_type |= bit; evsel->sample_size += sizeof(u64); - perf_evsel__calc_id_pos(evsel); + evsel__calc_id_pos(evsel); } } -void __perf_evsel__reset_sample_bit(struct evsel *evsel, +void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit) { if (evsel->core.attr.sample_type & bit) { evsel->core.attr.sample_type &= ~bit; evsel->sample_size -= sizeof(u64); - perf_evsel__calc_id_pos(evsel); + evsel__calc_id_pos(evsel); } } -void perf_evsel__set_sample_id(struct evsel *evsel, +void evsel__set_sample_id(struct evsel *evsel, bool can_sample_identifier) { if (can_sample_identifier) { - perf_evsel__reset_sample_bit(evsel, ID); - perf_evsel__set_sample_bit(evsel, IDENTIFIER); + evsel__reset_sample_bit(evsel, ID); + evsel__set_sample_bit(evsel, IDENTIFIER); } else { - perf_evsel__set_sample_bit(evsel, ID); + evsel__set_sample_bit(evsel, ID); } evsel->core.attr.read_format |= PERF_FORMAT_ID; } /** - * perf_evsel__is_function_event - Return whether given evsel is a function + * evsel__is_function_event - Return whether given evsel is a function * trace event * * @evsel - evsel selector to be tested * * Return %true if event is function trace event */ -bool perf_evsel__is_function_event(struct evsel *evsel) +bool evsel__is_function_event(struct evsel *evsel) { #define FUNCTION_EVENT "ftrace:function" @@ -249,17 +248,18 @@ void evsel__init(struct evsel *evsel, evsel->bpf_fd = -1; INIT_LIST_HEAD(&evsel->config_terms); perf_evsel__object.init(evsel); - evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); - perf_evsel__calc_id_pos(evsel); + evsel->sample_size = __evsel__sample_size(attr->sample_type); + evsel__calc_id_pos(evsel); evsel->cmdline_group_boundary = false; evsel->metric_expr = NULL; evsel->metric_name = NULL; evsel->metric_events = NULL; + evsel->per_pkg_mask = NULL; evsel->collect_stat = false; evsel->pmu_name = NULL; } -struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) +struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) { struct evsel *evsel = zalloc(perf_evsel__object.size); @@ -267,13 +267,13 @@ struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) return NULL; evsel__init(evsel, attr, idx); - if (perf_evsel__is_bpf_output(evsel)) { + if (evsel__is_bpf_output(evsel)) { evsel->core.attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), evsel->core.attr.sample_period = 1; } - if (perf_evsel__is_clock(evsel)) { + if (evsel__is_clock(evsel)) { /* * The evsel->unit points to static alias->unit * so it's ok to use static string in here. @@ -292,7 +292,7 @@ static bool perf_event_can_profile_kernel(void) return perf_event_paranoid_check(1); } -struct evsel *perf_evsel__new_cycles(bool precise) +struct evsel *evsel__new_cycles(bool precise) { struct perf_event_attr attr = { .type = PERF_TYPE_HARDWARE, @@ -334,7 +334,7 @@ error_free: /* * Returns pointer with encoded error via <linux/err.h> interface. */ -struct evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx) +struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx) { struct evsel *evsel = zalloc(perf_evsel__object.size); int err = -ENOMEM; @@ -372,7 +372,7 @@ out_err: return ERR_PTR(err); } -const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = { +const char *evsel__hw_names[PERF_COUNT_HW_MAX] = { "cycles", "instructions", "cache-references", @@ -385,10 +385,10 @@ const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = { "ref-cycles", }; -static const char *__perf_evsel__hw_name(u64 config) +static const char *__evsel__hw_name(u64 config) { - if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config]) - return perf_evsel__hw_names[config]; + if (config < PERF_COUNT_HW_MAX && evsel__hw_names[config]) + return evsel__hw_names[config]; return "unknown-hardware"; } @@ -429,13 +429,13 @@ static int perf_evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) return r; } -static int perf_evsel__hw_name(struct evsel *evsel, char *bf, size_t size) +static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size) { - int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->core.attr.config)); + int r = scnprintf(bf, size, "%s", __evsel__hw_name(evsel->core.attr.config)); return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); } -const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = { +const char *evsel__sw_names[PERF_COUNT_SW_MAX] = { "cpu-clock", "task-clock", "page-faults", @@ -448,20 +448,20 @@ const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = { "dummy", }; -static const char *__perf_evsel__sw_name(u64 config) +static const char *__evsel__sw_name(u64 config) { - if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config]) - return perf_evsel__sw_names[config]; + if (config < PERF_COUNT_SW_MAX && evsel__sw_names[config]) + return evsel__sw_names[config]; return "unknown-software"; } -static int perf_evsel__sw_name(struct evsel *evsel, char *bf, size_t size) +static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size) { - int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->core.attr.config)); + int r = scnprintf(bf, size, "%s", __evsel__sw_name(evsel->core.attr.config)); return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); } -static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) +static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) { int r; @@ -479,15 +479,14 @@ static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) return r; } -static int perf_evsel__bp_name(struct evsel *evsel, char *bf, size_t size) +static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size) { struct perf_event_attr *attr = &evsel->core.attr; - int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type); + int r = __evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type); return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); } -const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] - [PERF_EVSEL__MAX_ALIASES] = { +const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = { { "L1-dcache", "l1-d", "l1d", "L1-data", }, { "L1-icache", "l1-i", "l1i", "L1-instruction", }, { "LLC", "L2", }, @@ -497,15 +496,13 @@ const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] { "node", }, }; -const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_EVSEL__MAX_ALIASES] = { +const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES] = { { "load", "loads", "read", }, { "store", "stores", "write", }, { "prefetch", "prefetches", "speculative-read", "speculative-load", }, }; -const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] - [PERF_EVSEL__MAX_ALIASES] = { +const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES] = { { "refs", "Reference", "ops", "access", }, { "misses", "miss", }, }; @@ -521,7 +518,7 @@ const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] * L1I : Read and prefetch only * ITLB and BPU : Read-only */ -static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = { +static unsigned long evsel__hw_cache_stat[C(MAX)] = { [C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), [C(L1I)] = (CACHE_READ | CACHE_PREFETCH), [C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), @@ -531,28 +528,27 @@ static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = { [C(NODE)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), }; -bool perf_evsel__is_cache_op_valid(u8 type, u8 op) +bool evsel__is_cache_op_valid(u8 type, u8 op) { - if (perf_evsel__hw_cache_stat[type] & COP(op)) + if (evsel__hw_cache_stat[type] & COP(op)) return true; /* valid */ else return false; /* invalid */ } -int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, - char *bf, size_t size) +int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size) { if (result) { - return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0], - perf_evsel__hw_cache_op[op][0], - perf_evsel__hw_cache_result[result][0]); + return scnprintf(bf, size, "%s-%s-%s", evsel__hw_cache[type][0], + evsel__hw_cache_op[op][0], + evsel__hw_cache_result[result][0]); } - return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0], - perf_evsel__hw_cache_op[op][1]); + return scnprintf(bf, size, "%s-%s", evsel__hw_cache[type][0], + evsel__hw_cache_op[op][1]); } -static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size) +static int __evsel__hw_cache_name(u64 config, char *bf, size_t size) { u8 op, result, type = (config >> 0) & 0xff; const char *err = "unknown-ext-hardware-cache-type"; @@ -571,33 +567,33 @@ static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size) goto out_err; err = "invalid-cache"; - if (!perf_evsel__is_cache_op_valid(type, op)) + if (!evsel__is_cache_op_valid(type, op)) goto out_err; - return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size); + return __evsel__hw_cache_type_op_res_name(type, op, result, bf, size); out_err: return scnprintf(bf, size, "%s", err); } -static int perf_evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size) +static int evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size) { - int ret = __perf_evsel__hw_cache_name(evsel->core.attr.config, bf, size); + int ret = __evsel__hw_cache_name(evsel->core.attr.config, bf, size); return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); } -static int perf_evsel__raw_name(struct evsel *evsel, char *bf, size_t size) +static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size) { int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->core.attr.config); return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); } -static int perf_evsel__tool_name(char *bf, size_t size) +static int evsel__tool_name(char *bf, size_t size) { int ret = scnprintf(bf, size, "duration_time"); return ret; } -const char *perf_evsel__name(struct evsel *evsel) +const char *evsel__name(struct evsel *evsel) { char bf[128]; @@ -609,22 +605,22 @@ const char *perf_evsel__name(struct evsel *evsel) switch (evsel->core.attr.type) { case PERF_TYPE_RAW: - perf_evsel__raw_name(evsel, bf, sizeof(bf)); + evsel__raw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_HARDWARE: - perf_evsel__hw_name(evsel, bf, sizeof(bf)); + evsel__hw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_HW_CACHE: - perf_evsel__hw_cache_name(evsel, bf, sizeof(bf)); + evsel__hw_cache_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_SOFTWARE: if (evsel->tool_event) - perf_evsel__tool_name(bf, sizeof(bf)); + evsel__tool_name(bf, sizeof(bf)); else - perf_evsel__sw_name(evsel, bf, sizeof(bf)); + evsel__sw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_TRACEPOINT: @@ -632,7 +628,7 @@ const char *perf_evsel__name(struct evsel *evsel) break; case PERF_TYPE_BREAKPOINT: - perf_evsel__bp_name(evsel, bf, sizeof(bf)); + evsel__bp_name(evsel, bf, sizeof(bf)); break; default: @@ -649,7 +645,7 @@ out_unknown: return "unknown"; } -const char *perf_evsel__group_name(struct evsel *evsel) +const char *evsel__group_name(struct evsel *evsel) { return evsel->group_name ?: "anon group"; } @@ -664,21 +660,19 @@ const char *perf_evsel__group_name(struct evsel *evsel) * For record -e 'cycles,instructions' and report --group * 'cycles:u, instructions:u' */ -int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size) +int evsel__group_desc(struct evsel *evsel, char *buf, size_t size) { int ret = 0; struct evsel *pos; - const char *group_name = perf_evsel__group_name(evsel); + const char *group_name = evsel__group_name(evsel); if (!evsel->forced_leader) ret = scnprintf(buf, size, "%s { ", group_name); - ret += scnprintf(buf + ret, size - ret, "%s", - perf_evsel__name(evsel)); + ret += scnprintf(buf + ret, size - ret, "%s", evsel__name(evsel)); for_each_group_member(pos, evsel) - ret += scnprintf(buf + ret, size - ret, ", %s", - perf_evsel__name(pos)); + ret += scnprintf(buf + ret, size - ret, ", %s", evsel__name(pos)); if (!evsel->forced_leader) ret += scnprintf(buf + ret, size - ret, " }"); @@ -686,14 +680,13 @@ int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size) return ret; } -static void __perf_evsel__config_callchain(struct evsel *evsel, - struct record_opts *opts, - struct callchain_param *param) +static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *opts, + struct callchain_param *param) { - bool function = perf_evsel__is_function_event(evsel); + bool function = evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->core.attr; - perf_evsel__set_sample_bit(evsel, CALLCHAIN); + evsel__set_sample_bit(evsel, CALLCHAIN); attr->sample_max_stack = param->max_stack; @@ -708,7 +701,7 @@ static void __perf_evsel__config_callchain(struct evsel *evsel, "to get user callchain information. " "Falling back to framepointers.\n"); } else { - perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + evsel__set_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_CALL_STACK | PERF_SAMPLE_BRANCH_NO_CYCLES | @@ -722,8 +715,8 @@ static void __perf_evsel__config_callchain(struct evsel *evsel, if (param->record_mode == CALLCHAIN_DWARF) { if (!function) { - perf_evsel__set_sample_bit(evsel, REGS_USER); - perf_evsel__set_sample_bit(evsel, STACK_USER); + evsel__set_sample_bit(evsel, REGS_USER); + evsel__set_sample_bit(evsel, STACK_USER); if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) { attr->sample_regs_user |= DWARF_MINIMAL_REGS; pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, " @@ -746,12 +739,11 @@ static void __perf_evsel__config_callchain(struct evsel *evsel, } } -void perf_evsel__config_callchain(struct evsel *evsel, - struct record_opts *opts, - struct callchain_param *param) +void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts, + struct callchain_param *param) { if (param->enabled) - return __perf_evsel__config_callchain(evsel, opts, param); + return __evsel__config_callchain(evsel, opts, param); } static void @@ -760,23 +752,23 @@ perf_evsel__reset_callgraph(struct evsel *evsel, { struct perf_event_attr *attr = &evsel->core.attr; - perf_evsel__reset_sample_bit(evsel, CALLCHAIN); + evsel__reset_sample_bit(evsel, CALLCHAIN); if (param->record_mode == CALLCHAIN_LBR) { - perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); + evsel__reset_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_CALL_STACK | PERF_SAMPLE_BRANCH_HW_INDEX); } if (param->record_mode == CALLCHAIN_DWARF) { - perf_evsel__reset_sample_bit(evsel, REGS_USER); - perf_evsel__reset_sample_bit(evsel, STACK_USER); + evsel__reset_sample_bit(evsel, REGS_USER); + evsel__reset_sample_bit(evsel, STACK_USER); } } -static void apply_config_terms(struct evsel *evsel, - struct record_opts *opts, bool track) +static void evsel__apply_config_terms(struct evsel *evsel, + struct record_opts *opts, bool track) { - struct perf_evsel_config_term *term; + struct evsel_config_term *term; struct list_head *config_terms = &evsel->config_terms; struct perf_event_attr *attr = &evsel->core.attr; /* callgraph default */ @@ -789,69 +781,69 @@ static void apply_config_terms(struct evsel *evsel, list_for_each_entry(term, config_terms, list) { switch (term->type) { - case PERF_EVSEL__CONFIG_TERM_PERIOD: + case EVSEL__CONFIG_TERM_PERIOD: if (!(term->weak && opts->user_interval != ULLONG_MAX)) { attr->sample_period = term->val.period; attr->freq = 0; - perf_evsel__reset_sample_bit(evsel, PERIOD); + evsel__reset_sample_bit(evsel, PERIOD); } break; - case PERF_EVSEL__CONFIG_TERM_FREQ: + case EVSEL__CONFIG_TERM_FREQ: if (!(term->weak && opts->user_freq != UINT_MAX)) { attr->sample_freq = term->val.freq; attr->freq = 1; - perf_evsel__set_sample_bit(evsel, PERIOD); + evsel__set_sample_bit(evsel, PERIOD); } break; - case PERF_EVSEL__CONFIG_TERM_TIME: + case EVSEL__CONFIG_TERM_TIME: if (term->val.time) - perf_evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, TIME); else - perf_evsel__reset_sample_bit(evsel, TIME); + evsel__reset_sample_bit(evsel, TIME); break; - case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: + case EVSEL__CONFIG_TERM_CALLGRAPH: callgraph_buf = term->val.str; break; - case PERF_EVSEL__CONFIG_TERM_BRANCH: + case EVSEL__CONFIG_TERM_BRANCH: if (term->val.str && strcmp(term->val.str, "no")) { - perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + evsel__set_sample_bit(evsel, BRANCH_STACK); parse_branch_str(term->val.str, &attr->branch_sample_type); } else - perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); + evsel__reset_sample_bit(evsel, BRANCH_STACK); break; - case PERF_EVSEL__CONFIG_TERM_STACK_USER: + case EVSEL__CONFIG_TERM_STACK_USER: dump_size = term->val.stack_user; break; - case PERF_EVSEL__CONFIG_TERM_MAX_STACK: + case EVSEL__CONFIG_TERM_MAX_STACK: max_stack = term->val.max_stack; break; - case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS: + case EVSEL__CONFIG_TERM_MAX_EVENTS: evsel->max_events = term->val.max_events; break; - case PERF_EVSEL__CONFIG_TERM_INHERIT: + case EVSEL__CONFIG_TERM_INHERIT: /* * attr->inherit should has already been set by - * perf_evsel__config. If user explicitly set + * evsel__config. If user explicitly set * inherit using config terms, override global * opt->no_inherit setting. */ attr->inherit = term->val.inherit ? 1 : 0; break; - case PERF_EVSEL__CONFIG_TERM_OVERWRITE: + case EVSEL__CONFIG_TERM_OVERWRITE: attr->write_backward = term->val.overwrite ? 1 : 0; break; - case PERF_EVSEL__CONFIG_TERM_DRV_CFG: + case EVSEL__CONFIG_TERM_DRV_CFG: break; - case PERF_EVSEL__CONFIG_TERM_PERCORE: + case EVSEL__CONFIG_TERM_PERCORE: break; - case PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT: + case EVSEL__CONFIG_TERM_AUX_OUTPUT: attr->aux_output = term->val.aux_output ? 1 : 0; break; - case PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE: + case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE: /* Already applied by auxtrace */ break; - case PERF_EVSEL__CONFIG_TERM_CFG_CHG: + case EVSEL__CONFIG_TERM_CFG_CHG: break; default: break; @@ -897,11 +889,11 @@ static void apply_config_terms(struct evsel *evsel, /* set perf-event callgraph */ if (param.enabled) { if (sample_address) { - perf_evsel__set_sample_bit(evsel, ADDR); - perf_evsel__set_sample_bit(evsel, DATA_SRC); + evsel__set_sample_bit(evsel, ADDR); + evsel__set_sample_bit(evsel, DATA_SRC); evsel->core.attr.mmap_data = track; } - perf_evsel__config_callchain(evsel, opts, ¶m); + evsel__config_callchain(evsel, opts, ¶m); } } } @@ -912,10 +904,9 @@ static bool is_dummy_event(struct evsel *evsel) (evsel->core.attr.config == PERF_COUNT_SW_DUMMY); } -struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel, - enum evsel_term_type type) +struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type) { - struct perf_evsel_config_term *term, *found_term = NULL; + struct evsel_config_term *term, *found_term = NULL; list_for_each_entry(term, &evsel->config_terms, list) { if (term->type == type) @@ -953,8 +944,8 @@ struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel * enable/disable events specifically, as there's no * initial traced exec call. */ -void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, - struct callchain_param *callchain) +void evsel__config(struct evsel *evsel, struct record_opts *opts, + struct callchain_param *callchain) { struct evsel *leader = evsel->leader; struct perf_event_attr *attr = &evsel->core.attr; @@ -965,17 +956,17 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, attr->inherit = !opts->no_inherit; attr->write_backward = opts->overwrite ? 1 : 0; - perf_evsel__set_sample_bit(evsel, IP); - perf_evsel__set_sample_bit(evsel, TID); + evsel__set_sample_bit(evsel, IP); + evsel__set_sample_bit(evsel, TID); if (evsel->sample_read) { - perf_evsel__set_sample_bit(evsel, READ); + evsel__set_sample_bit(evsel, READ); /* * We need ID even in case of single event, because * PERF_SAMPLE_READ process ID specific data. */ - perf_evsel__set_sample_id(evsel, false); + evsel__set_sample_id(evsel, false); /* * Apply group format only if we belong to group @@ -994,7 +985,7 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, if (!attr->sample_period || (opts->user_freq != UINT_MAX || opts->user_interval != ULLONG_MAX)) { if (opts->freq) { - perf_evsel__set_sample_bit(evsel, PERIOD); + evsel__set_sample_bit(evsel, PERIOD); attr->freq = 1; attr->sample_freq = opts->freq; } else { @@ -1002,25 +993,6 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, } } - /* - * Disable sampling for all group members other - * than leader in case leader 'leads' the sampling. - */ - if ((leader != evsel) && leader->sample_read) { - attr->freq = 0; - attr->sample_freq = 0; - attr->sample_period = 0; - attr->write_backward = 0; - - /* - * We don't get sample for slave events, we make them - * when delivering group leader sample. Set the slave - * event to follow the master sample_type to ease up - * report. - */ - attr->sample_type = leader->core.attr.sample_type; - } - if (opts->no_samples) attr->sample_freq = 0; @@ -1033,7 +1005,7 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, } if (opts->sample_address) { - perf_evsel__set_sample_bit(evsel, ADDR); + evsel__set_sample_bit(evsel, ADDR); attr->mmap_data = track; } @@ -1042,24 +1014,24 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, * event, due to issues with page faults while tracing page * fault handler and its overall trickiness nature. */ - if (perf_evsel__is_function_event(evsel)) + if (evsel__is_function_event(evsel)) evsel->core.attr.exclude_callchain_user = 1; if (callchain && callchain->enabled && !evsel->no_aux_samples) - perf_evsel__config_callchain(evsel, opts, callchain); + evsel__config_callchain(evsel, opts, callchain); if (opts->sample_intr_regs) { attr->sample_regs_intr = opts->sample_intr_regs; - perf_evsel__set_sample_bit(evsel, REGS_INTR); + evsel__set_sample_bit(evsel, REGS_INTR); } if (opts->sample_user_regs) { attr->sample_regs_user |= opts->sample_user_regs; - perf_evsel__set_sample_bit(evsel, REGS_USER); + evsel__set_sample_bit(evsel, REGS_USER); } if (target__has_cpu(&opts->target) || opts->sample_cpu) - perf_evsel__set_sample_bit(evsel, CPU); + evsel__set_sample_bit(evsel, CPU); /* * When the user explicitly disabled time don't force it here. @@ -1068,31 +1040,31 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, (!perf_missing_features.sample_id_all && (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu || opts->sample_time_set))) - perf_evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, TIME); if (opts->raw_samples && !evsel->no_aux_samples) { - perf_evsel__set_sample_bit(evsel, TIME); - perf_evsel__set_sample_bit(evsel, RAW); - perf_evsel__set_sample_bit(evsel, CPU); + evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, RAW); + evsel__set_sample_bit(evsel, CPU); } if (opts->sample_address) - perf_evsel__set_sample_bit(evsel, DATA_SRC); + evsel__set_sample_bit(evsel, DATA_SRC); if (opts->sample_phys_addr) - perf_evsel__set_sample_bit(evsel, PHYS_ADDR); + evsel__set_sample_bit(evsel, PHYS_ADDR); if (opts->no_buffering) { attr->watermark = 0; attr->wakeup_events = 1; } if (opts->branch_stack && !evsel->no_aux_samples) { - perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + evsel__set_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type = opts->branch_stack; } if (opts->sample_weight) - perf_evsel__set_sample_bit(evsel, WEIGHT); + evsel__set_sample_bit(evsel, WEIGHT); attr->task = track; attr->mmap = track; @@ -1106,14 +1078,14 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, if (opts->record_cgroup) { attr->cgroup = track && !perf_missing_features.cgroup; - perf_evsel__set_sample_bit(evsel, CGROUP); + evsel__set_sample_bit(evsel, CGROUP); } if (opts->record_switch_events) attr->context_switch = track; if (opts->sample_transaction) - perf_evsel__set_sample_bit(evsel, TRANSACTION); + evsel__set_sample_bit(evsel, TRANSACTION); if (opts->running_time) { evsel->core.attr.read_format |= @@ -1127,15 +1099,15 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, * Disabling only independent events or group leaders, * keeping group members enabled. */ - if (perf_evsel__is_group_leader(evsel)) + if (evsel__is_group_leader(evsel)) attr->disabled = 1; /* * Setting enable_on_exec for independent events and * group leaders for traced executed by perf. */ - if (target__none(&opts->target) && perf_evsel__is_group_leader(evsel) && - !opts->initial_delay) + if (target__none(&opts->target) && evsel__is_group_leader(evsel) && + !opts->initial_delay) attr->enable_on_exec = 1; if (evsel->immediate) { @@ -1169,28 +1141,31 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, * Apply event specific term settings, * it overloads any global configuration. */ - apply_config_terms(evsel, opts, track); + evsel__apply_config_terms(evsel, opts, track); evsel->ignore_missing_thread = opts->ignore_missing_thread; /* The --period option takes the precedence. */ if (opts->period_set) { if (opts->period) - perf_evsel__set_sample_bit(evsel, PERIOD); + evsel__set_sample_bit(evsel, PERIOD); else - perf_evsel__reset_sample_bit(evsel, PERIOD); + evsel__reset_sample_bit(evsel, PERIOD); } /* + * A dummy event never triggers any actual counter and therefore + * cannot be used with branch_stack. + * * For initial_delay, a dummy event is added implicitly. * The software event will trigger -EOPNOTSUPP error out, * if BRANCH_STACK bit is set. */ - if (opts->initial_delay && is_dummy_event(evsel)) - perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); + if (is_dummy_event(evsel)) + evsel__reset_sample_bit(evsel, BRANCH_STACK); } -int perf_evsel__set_filter(struct evsel *evsel, const char *filter) +int evsel__set_filter(struct evsel *evsel, const char *filter) { char *new_filter = strdup(filter); @@ -1203,13 +1178,12 @@ int perf_evsel__set_filter(struct evsel *evsel, const char *filter) return -1; } -static int perf_evsel__append_filter(struct evsel *evsel, - const char *fmt, const char *filter) +static int evsel__append_filter(struct evsel *evsel, const char *fmt, const char *filter) { char *new_filter; if (evsel->filter == NULL) - return perf_evsel__set_filter(evsel, filter); + return evsel__set_filter(evsel, filter); if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) { free(evsel->filter); @@ -1220,14 +1194,14 @@ static int perf_evsel__append_filter(struct evsel *evsel, return -1; } -int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter) +int evsel__append_tp_filter(struct evsel *evsel, const char *filter) { - return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter); + return evsel__append_filter(evsel, "(%s) && (%s)", filter); } -int perf_evsel__append_addr_filter(struct evsel *evsel, const char *filter) +int evsel__append_addr_filter(struct evsel *evsel, const char *filter) { - return perf_evsel__append_filter(evsel, "%s,%s", filter); + return evsel__append_filter(evsel, "%s,%s", filter); } /* Caller has to clear disabled after going through all CPUs. */ @@ -1266,9 +1240,9 @@ int evsel__disable(struct evsel *evsel) return err; } -static void perf_evsel__free_config_terms(struct evsel *evsel) +static void evsel__free_config_terms(struct evsel *evsel) { - struct perf_evsel_config_term *term, *h; + struct evsel_config_term *term, *h; list_for_each_entry_safe(term, h, &evsel->config_terms, list) { list_del_init(&term->list); @@ -1278,14 +1252,14 @@ static void perf_evsel__free_config_terms(struct evsel *evsel) } } -void perf_evsel__exit(struct evsel *evsel) +void evsel__exit(struct evsel *evsel) { assert(list_empty(&evsel->core.node)); assert(evsel->evlist == NULL); - perf_evsel__free_counts(evsel); + evsel__free_counts(evsel); perf_evsel__free_fd(&evsel->core); perf_evsel__free_id(&evsel->core); - perf_evsel__free_config_terms(evsel); + evsel__free_config_terms(evsel); cgroup__put(evsel->cgrp); perf_cpu_map__put(evsel->core.cpus); perf_cpu_map__put(evsel->core.own_cpus); @@ -1293,17 +1267,19 @@ void perf_evsel__exit(struct evsel *evsel) zfree(&evsel->group_name); zfree(&evsel->name); zfree(&evsel->pmu_name); + zfree(&evsel->per_pkg_mask); + zfree(&evsel->metric_events); perf_evsel__object.fini(evsel); } void evsel__delete(struct evsel *evsel) { - perf_evsel__exit(evsel); + evsel__exit(evsel); free(evsel); } -void perf_evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, - struct perf_counts_values *count) +void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, + struct perf_counts_values *count) { struct perf_counts_values tmp; @@ -1342,8 +1318,7 @@ void perf_counts_values__scale(struct perf_counts_values *count, *pscaled = scaled; } -static int -perf_evsel__read_one(struct evsel *evsel, int cpu, int thread) +static int evsel__read_one(struct evsel *evsel, int cpu, int thread) { struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread); @@ -1403,8 +1378,7 @@ perf_evsel__process_group_data(struct evsel *leader, return 0; } -static int -perf_evsel__read_group(struct evsel *leader, int cpu, int thread) +static int evsel__read_group(struct evsel *leader, int cpu, int thread) { struct perf_stat_evsel *ps = leader->stats; u64 read_format = leader->core.attr.read_format; @@ -1414,7 +1388,7 @@ perf_evsel__read_group(struct evsel *leader, int cpu, int thread) if (!(read_format & PERF_FORMAT_ID)) return -EINVAL; - if (!perf_evsel__is_group_leader(leader)) + if (!evsel__is_group_leader(leader)) return -EINVAL; if (!data) { @@ -1434,18 +1408,17 @@ perf_evsel__read_group(struct evsel *leader, int cpu, int thread) return perf_evsel__process_group_data(leader, cpu, thread, data); } -int perf_evsel__read_counter(struct evsel *evsel, int cpu, int thread) +int evsel__read_counter(struct evsel *evsel, int cpu, int thread) { u64 read_format = evsel->core.attr.read_format; if (read_format & PERF_FORMAT_GROUP) - return perf_evsel__read_group(evsel, cpu, thread); - else - return perf_evsel__read_one(evsel, cpu, thread); + return evsel__read_group(evsel, cpu, thread); + + return evsel__read_one(evsel, cpu, thread); } -int __perf_evsel__read_on_cpu(struct evsel *evsel, - int cpu, int thread, bool scale) +int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale) { struct perf_counts_values count; size_t nv = scale ? 3 : 1; @@ -1453,13 +1426,13 @@ int __perf_evsel__read_on_cpu(struct evsel *evsel, if (FD(evsel, cpu, thread) < 0) return -EINVAL; - if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0) + if (evsel->counts == NULL && evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0) return -ENOMEM; if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0) return -errno; - perf_evsel__compute_deltas(evsel, cpu, thread, &count); + evsel__compute_deltas(evsel, cpu, thread, &count); perf_counts_values__scale(&count, scale, NULL); *perf_counts(evsel->counts, cpu, thread) = count; return 0; @@ -1470,7 +1443,7 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread) struct evsel *leader = evsel->leader; int fd; - if (perf_evsel__is_group_leader(evsel)) + if (evsel__is_group_leader(evsel)) return -1; /* @@ -1749,8 +1722,7 @@ retry_open: /* * If we succeeded but had to kill clockid, fail and - * have perf_evsel__open_strerror() print us a nice - * error. + * have evsel__open_strerror() print us a nice error. */ if (perf_missing_features.clockid || perf_missing_features.clockid_wrong) { @@ -1854,7 +1826,7 @@ try_fallback: } else if (!perf_missing_features.group_read && evsel->core.attr.inherit && (evsel->core.attr.read_format & PERF_FORMAT_GROUP) && - perf_evsel__is_group_leader(evsel)) { + evsel__is_group_leader(evsel)) { perf_missing_features.group_read = true; pr_debug2_peo("switching off group read\n"); goto fallback_missing_features; @@ -1888,9 +1860,7 @@ void evsel__close(struct evsel *evsel) perf_evsel__free_id(&evsel->core); } -int perf_evsel__open_per_cpu(struct evsel *evsel, - struct perf_cpu_map *cpus, - int cpu) +int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu) { if (cpu == -1) return evsel__open_cpu(evsel, cpus, NULL, 0, @@ -1899,8 +1869,7 @@ int perf_evsel__open_per_cpu(struct evsel *evsel, return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1); } -int perf_evsel__open_per_thread(struct evsel *evsel, - struct perf_thread_map *threads) +int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads) { return evsel__open(evsel, NULL, threads); } @@ -1995,8 +1964,8 @@ perf_event__check_size(union perf_event *event, unsigned int sample_size) return 0; } -int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *data) +int evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data) { u64 type = evsel->core.attr.sample_type; bool swapped = evsel->needs_swap; @@ -2136,7 +2105,7 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, } } - if (evsel__has_callchain(evsel)) { + if (type & PERF_SAMPLE_CALLCHAIN) { const u64 max_callchain_nr = UINT64_MAX / sizeof(u64); OVERFLOW_CHECK_u64(array); @@ -2190,7 +2159,7 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, return -EFAULT; sz = data->branch_stack->nr * sizeof(struct branch_entry); - if (perf_evsel__has_branch_hw_idx(evsel)) + if (evsel__has_branch_hw_idx(evsel)) sz += sizeof(u64); else data->no_hw_idx = true; @@ -2298,9 +2267,8 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, return 0; } -int perf_evsel__parse_sample_timestamp(struct evsel *evsel, - union perf_event *event, - u64 *timestamp) +int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, + u64 *timestamp) { u64 type = evsel->core.attr.sample_type; const __u64 *array; @@ -2342,15 +2310,14 @@ int perf_evsel__parse_sample_timestamp(struct evsel *evsel, return 0; } -struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name) +struct tep_format_field *evsel__field(struct evsel *evsel, const char *name) { return tep_find_field(evsel->tp_format, name); } -void *perf_evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, - const char *name) +void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name) { - struct tep_format_field *field = perf_evsel__field(evsel, name); + struct tep_format_field *field = evsel__field(evsel, name); int offset; if (!field) @@ -2405,10 +2372,9 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam return 0; } -u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample, - const char *name) +u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name) { - struct tep_format_field *field = perf_evsel__field(evsel, name); + struct tep_format_field *field = evsel__field(evsel, name); if (!field) return 0; @@ -2416,8 +2382,7 @@ u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample, return field ? format_field__intval(field, sample, evsel->needs_swap) : 0; } -bool perf_evsel__fallback(struct evsel *evsel, int err, - char *msg, size_t msgsize) +bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) { int paranoid; @@ -2442,13 +2407,17 @@ bool perf_evsel__fallback(struct evsel *evsel, int err, return true; } else if (err == EACCES && !evsel->core.attr.exclude_kernel && (paranoid = perf_event_paranoid()) > 1) { - const char *name = perf_evsel__name(evsel); + const char *name = evsel__name(evsel); char *new_name; const char *sep = ":"; + /* If event has exclude user then don't exclude kernel. */ + if (evsel->core.attr.exclude_user) + return false; + /* Is there already the separator in the name. */ if (strchr(name, '/') || - strchr(name, ':')) + (strchr(name, ':') && !evsel->is_libpfm_event)) sep = ""; if (asprintf(&new_name, "%s%su", name, sep) < 0) @@ -2505,39 +2474,46 @@ static bool find_process(const char *name) return ret ? false : true; } -int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, - int err, char *msg, size_t size) +int evsel__open_strerror(struct evsel *evsel, struct target *target, + int err, char *msg, size_t size) { char sbuf[STRERR_BUFSIZE]; - int printed = 0; + int printed = 0, enforced = 0; switch (err) { case EPERM: case EACCES: + printed += scnprintf(msg + printed, size - printed, + "Access to performance monitoring and observability operations is limited.\n"); + + if (!sysfs__read_int("fs/selinux/enforce", &enforced)) { + if (enforced) { + printed += scnprintf(msg + printed, size - printed, + "Enforced MAC policy settings (SELinux) can limit access to performance\n" + "monitoring and observability operations. Inspect system audit records for\n" + "more perf_event access control information and adjusting the policy.\n"); + } + } + if (err == EPERM) - printed = scnprintf(msg, size, - "No permission to enable %s event.\n\n", - perf_evsel__name(evsel)); + printed += scnprintf(msg, size, + "No permission to enable %s event.\n\n", evsel__name(evsel)); return scnprintf(msg + printed, size - printed, - "You may not have permission to collect %sstats.\n\n" - "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n" - "which controls use of the performance events system by\n" - "unprivileged users (without CAP_SYS_ADMIN).\n\n" - "The current value is %d:\n\n" + "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n" + "access to performance monitoring and observability operations for users\n" + "without CAP_PERFMON or CAP_SYS_ADMIN Linux capability.\n" + "perf_event_paranoid setting is %d:\n" " -1: Allow use of (almost) all events by all users\n" " Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n" - ">= 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN\n" - " Disallow raw tracepoint access by users without CAP_SYS_ADMIN\n" - ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n" - ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN\n\n" - "To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n" - " kernel.perf_event_paranoid = -1\n" , - target->system_wide ? "system-wide " : "", - perf_event_paranoid()); + ">= 0: Disallow raw and ftrace function tracepoint access\n" + ">= 1: Disallow CPU event access\n" + ">= 2: Disallow kernel profiling\n" + "To make the adjusted perf_event_paranoid setting permanent preserve it\n" + "in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>)", + perf_event_paranoid()); case ENOENT: - return scnprintf(msg, size, "The %s event is not supported.", - perf_evsel__name(evsel)); + return scnprintf(msg, size, "The %s event is not supported.", evsel__name(evsel)); case EMFILE: return scnprintf(msg, size, "%s", "Too many events are opened.\n" @@ -2561,7 +2537,7 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, if (evsel->core.attr.sample_period != 0) return scnprintf(msg, size, "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'", - perf_evsel__name(evsel)); + evsel__name(evsel)); if (evsel->core.attr.precise_ip) return scnprintf(msg, size, "%s", "\'precise\' request may not be supported. Try removing 'p' modifier."); @@ -2594,11 +2570,10 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n" "/bin/dmesg | grep -i perf may provide additional information.\n", - err, str_error_r(err, sbuf, sizeof(sbuf)), - perf_evsel__name(evsel)); + err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel)); } -struct perf_env *perf_evsel__env(struct evsel *evsel) +struct perf_env *evsel__env(struct evsel *evsel) { if (evsel && evsel->evlist) return evsel->evlist->env; @@ -2623,7 +2598,7 @@ static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist) return 0; } -int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist) +int evsel__store_ids(struct evsel *evsel, struct evlist *evlist) { struct perf_cpu_map *cpus = evsel->core.cpus; struct perf_thread_map *threads = evsel->core.threads; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 53187c501ee8..0f963c2a88a5 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -18,7 +18,7 @@ struct perf_counts; struct perf_stat_evsel; union perf_event; -typedef int (perf_evsel__sb_cb_t)(union perf_event *event, void *data); +typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); enum perf_tool_event { PERF_TOOL_NONE = 0, @@ -76,6 +76,7 @@ struct evsel { bool ignore_missing_thread; bool forced_leader; bool use_uncore_alias; + bool is_libpfm_event; /* parse modifier helper */ int exclude_GH; int sample_read; @@ -101,9 +102,17 @@ struct evsel { int cpu_iter; const char *pmu_name; struct { - perf_evsel__sb_cb_t *cb; - void *data; + evsel__sb_cb_t *cb; + void *data; } side_band; + /* + * For reporting purposes, an evsel sample can have a callchain + * synthesized from AUX area data. Keep track of synthesized sample + * types here. Note, the recorded sample_type cannot be changed because + * it is needed to continue to parse events. + * See also evsel__has_callchain(). + */ + __u64 synth_sample_type; }; struct perf_missing_features { @@ -135,7 +144,7 @@ static inline struct perf_cpu_map *evsel__cpus(struct evsel *evsel) return perf_evsel__cpus(&evsel->core); } -static inline int perf_evsel__nr_cpus(struct evsel *evsel) +static inline int evsel__nr_cpus(struct evsel *evsel) { return evsel__cpus(evsel)->nr; } @@ -143,233 +152,207 @@ static inline int perf_evsel__nr_cpus(struct evsel *evsel) void perf_counts_values__scale(struct perf_counts_values *count, bool scale, s8 *pscaled); -void perf_evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, - struct perf_counts_values *count); +void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, + struct perf_counts_values *count); -int perf_evsel__object_config(size_t object_size, - int (*init)(struct evsel *evsel), - void (*fini)(struct evsel *evsel)); +int evsel__object_config(size_t object_size, + int (*init)(struct evsel *evsel), + void (*fini)(struct evsel *evsel)); -struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx); +struct perf_pmu *evsel__find_pmu(struct evsel *evsel); +bool evsel__is_aux_event(struct evsel *evsel); + +struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx); static inline struct evsel *evsel__new(struct perf_event_attr *attr) { - return perf_evsel__new_idx(attr, 0); + return evsel__new_idx(attr, 0); } -struct evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx); +struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx); /* * Returns pointer with encoded error via <linux/err.h> interface. */ -static inline struct evsel *perf_evsel__newtp(const char *sys, const char *name) +static inline struct evsel *evsel__newtp(const char *sys, const char *name) { - return perf_evsel__newtp_idx(sys, name, 0); + return evsel__newtp_idx(sys, name, 0); } -struct evsel *perf_evsel__new_cycles(bool precise); +struct evsel *evsel__new_cycles(bool precise); struct tep_event *event_format__new(const char *sys, const char *name); void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx); -void perf_evsel__exit(struct evsel *evsel); +void evsel__exit(struct evsel *evsel); void evsel__delete(struct evsel *evsel); struct callchain_param; -void perf_evsel__config(struct evsel *evsel, - struct record_opts *opts, - struct callchain_param *callchain); -void perf_evsel__config_callchain(struct evsel *evsel, - struct record_opts *opts, - struct callchain_param *callchain); - -int __perf_evsel__sample_size(u64 sample_type); -void perf_evsel__calc_id_pos(struct evsel *evsel); - -bool perf_evsel__is_cache_op_valid(u8 type, u8 op); - -#define PERF_EVSEL__MAX_ALIASES 8 - -extern const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] - [PERF_EVSEL__MAX_ALIASES]; -extern const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_EVSEL__MAX_ALIASES]; -extern const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] - [PERF_EVSEL__MAX_ALIASES]; -extern const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX]; -extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX]; -int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, - char *bf, size_t size); -const char *perf_evsel__name(struct evsel *evsel); - -const char *perf_evsel__group_name(struct evsel *evsel); -int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size); - -void __perf_evsel__set_sample_bit(struct evsel *evsel, - enum perf_event_sample_format bit); -void __perf_evsel__reset_sample_bit(struct evsel *evsel, - enum perf_event_sample_format bit); - -#define perf_evsel__set_sample_bit(evsel, bit) \ - __perf_evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit) - -#define perf_evsel__reset_sample_bit(evsel, bit) \ - __perf_evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit) - -void perf_evsel__set_sample_id(struct evsel *evsel, - bool use_sample_identifier); - -int perf_evsel__set_filter(struct evsel *evsel, const char *filter); -int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter); -int perf_evsel__append_addr_filter(struct evsel *evsel, - const char *filter); +void evsel__config(struct evsel *evsel, struct record_opts *opts, + struct callchain_param *callchain); +void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts, + struct callchain_param *callchain); + +int __evsel__sample_size(u64 sample_type); +void evsel__calc_id_pos(struct evsel *evsel); + +bool evsel__is_cache_op_valid(u8 type, u8 op); + +#define EVSEL__MAX_ALIASES 8 + +extern const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES]; +extern const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES]; +extern const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES]; +extern const char *evsel__hw_names[PERF_COUNT_HW_MAX]; +extern const char *evsel__sw_names[PERF_COUNT_SW_MAX]; +int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); +const char *evsel__name(struct evsel *evsel); + +const char *evsel__group_name(struct evsel *evsel); +int evsel__group_desc(struct evsel *evsel, char *buf, size_t size); + +void __evsel__set_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit); +void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit); + +#define evsel__set_sample_bit(evsel, bit) \ + __evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit) + +#define evsel__reset_sample_bit(evsel, bit) \ + __evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit) + +void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier); + +int evsel__set_filter(struct evsel *evsel, const char *filter); +int evsel__append_tp_filter(struct evsel *evsel, const char *filter); +int evsel__append_addr_filter(struct evsel *evsel, const char *filter); int evsel__enable_cpu(struct evsel *evsel, int cpu); int evsel__enable(struct evsel *evsel); int evsel__disable(struct evsel *evsel); int evsel__disable_cpu(struct evsel *evsel, int cpu); -int perf_evsel__open_per_cpu(struct evsel *evsel, - struct perf_cpu_map *cpus, - int cpu); -int perf_evsel__open_per_thread(struct evsel *evsel, - struct perf_thread_map *threads); +int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu); +int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads); int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); void evsel__close(struct evsel *evsel); struct perf_sample; -void *perf_evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, - const char *name); -u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample, - const char *name); +void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name); +u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name); -static inline char *perf_evsel__strval(struct evsel *evsel, - struct perf_sample *sample, - const char *name) +static inline char *evsel__strval(struct evsel *evsel, struct perf_sample *sample, const char *name) { - return perf_evsel__rawptr(evsel, sample, name); + return evsel__rawptr(evsel, sample, name); } struct tep_format_field; u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample, bool needs_swap); -struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name); +struct tep_format_field *evsel__field(struct evsel *evsel, const char *name); -#define perf_evsel__match(evsel, t, c) \ +#define evsel__match(evsel, t, c) \ (evsel->core.attr.type == PERF_TYPE_##t && \ evsel->core.attr.config == PERF_COUNT_##c) -static inline bool perf_evsel__match2(struct evsel *e1, - struct evsel *e2) +static inline bool evsel__match2(struct evsel *e1, struct evsel *e2) { return (e1->core.attr.type == e2->core.attr.type) && (e1->core.attr.config == e2->core.attr.config); } -#define perf_evsel__cmp(a, b) \ - ((a) && \ - (b) && \ - (a)->core.attr.type == (b)->core.attr.type && \ - (a)->core.attr.config == (b)->core.attr.config) - -int perf_evsel__read_counter(struct evsel *evsel, int cpu, int thread); +int evsel__read_counter(struct evsel *evsel, int cpu, int thread); -int __perf_evsel__read_on_cpu(struct evsel *evsel, - int cpu, int thread, bool scale); +int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale); /** - * perf_evsel__read_on_cpu - Read out the results on a CPU and thread + * evsel__read_on_cpu - Read out the results on a CPU and thread * * @evsel - event selector to read value * @cpu - CPU of interest * @thread - thread of interest */ -static inline int perf_evsel__read_on_cpu(struct evsel *evsel, - int cpu, int thread) +static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread) { - return __perf_evsel__read_on_cpu(evsel, cpu, thread, false); + return __evsel__read_on_cpu(evsel, cpu, thread, false); } /** - * perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled + * evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled * * @evsel - event selector to read value * @cpu - CPU of interest * @thread - thread of interest */ -static inline int perf_evsel__read_on_cpu_scaled(struct evsel *evsel, - int cpu, int thread) +static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu, int thread) { - return __perf_evsel__read_on_cpu(evsel, cpu, thread, true); + return __evsel__read_on_cpu(evsel, cpu, thread, true); } -int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *sample); +int evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *sample); -int perf_evsel__parse_sample_timestamp(struct evsel *evsel, - union perf_event *event, - u64 *timestamp); +int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, + u64 *timestamp); -static inline struct evsel *perf_evsel__next(struct evsel *evsel) +static inline struct evsel *evsel__next(struct evsel *evsel) { return list_entry(evsel->core.node.next, struct evsel, core.node); } -static inline struct evsel *perf_evsel__prev(struct evsel *evsel) +static inline struct evsel *evsel__prev(struct evsel *evsel) { return list_entry(evsel->core.node.prev, struct evsel, core.node); } /** - * perf_evsel__is_group_leader - Return whether given evsel is a leader event + * evsel__is_group_leader - Return whether given evsel is a leader event * * @evsel - evsel selector to be tested * * Return %true if @evsel is a group leader or a stand-alone event */ -static inline bool perf_evsel__is_group_leader(const struct evsel *evsel) +static inline bool evsel__is_group_leader(const struct evsel *evsel) { return evsel->leader == evsel; } /** - * perf_evsel__is_group_event - Return whether given evsel is a group event + * evsel__is_group_event - Return whether given evsel is a group event * * @evsel - evsel selector to be tested * * Return %true iff event group view is enabled and @evsel is a actual group * leader which has other members in the group */ -static inline bool perf_evsel__is_group_event(struct evsel *evsel) +static inline bool evsel__is_group_event(struct evsel *evsel) { if (!symbol_conf.event_group) return false; - return perf_evsel__is_group_leader(evsel) && evsel->core.nr_members > 1; + return evsel__is_group_leader(evsel) && evsel->core.nr_members > 1; } -bool perf_evsel__is_function_event(struct evsel *evsel); +bool evsel__is_function_event(struct evsel *evsel); -static inline bool perf_evsel__is_bpf_output(struct evsel *evsel) +static inline bool evsel__is_bpf_output(struct evsel *evsel) { - return perf_evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT); + return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT); } -static inline bool perf_evsel__is_clock(struct evsel *evsel) +static inline bool evsel__is_clock(struct evsel *evsel) { - return perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || - perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK); + return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || + evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK); } -bool perf_evsel__fallback(struct evsel *evsel, int err, - char *msg, size_t msgsize); -int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, - int err, char *msg, size_t size); +bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize); +int evsel__open_strerror(struct evsel *evsel, struct target *target, + int err, char *msg, size_t size); -static inline int perf_evsel__group_idx(struct evsel *evsel) +static inline int evsel__group_idx(struct evsel *evsel) { return evsel->idx - evsel->leader->idx; } @@ -386,22 +369,37 @@ for ((_evsel) = _leader; \ (_evsel) && (_evsel)->leader == (_leader); \ (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node)) -static inline bool perf_evsel__has_branch_callstack(const struct evsel *evsel) +static inline bool evsel__has_branch_callstack(const struct evsel *evsel) { return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; } -static inline bool perf_evsel__has_branch_hw_idx(const struct evsel *evsel) +static inline bool evsel__has_branch_hw_idx(const struct evsel *evsel) { return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; } static inline bool evsel__has_callchain(const struct evsel *evsel) { - return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0; + /* + * For reporting purposes, an evsel sample can have a recorded callchain + * or a callchain synthesized from AUX area data. + */ + return evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN || + evsel->synth_sample_type & PERF_SAMPLE_CALLCHAIN; +} + +static inline bool evsel__has_br_stack(const struct evsel *evsel) +{ + /* + * For reporting purposes, an evsel sample can have a recorded branch + * stack or a branch stack synthesized from AUX area data. + */ + return evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK || + evsel->synth_sample_type & PERF_SAMPLE_BRANCH_STACK; } -struct perf_env *perf_evsel__env(struct evsel *evsel); +struct perf_env *evsel__env(struct evsel *evsel); -int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist); +int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h index e026ab67b008..aee6f808b512 100644 --- a/tools/perf/util/evsel_config.h +++ b/tools/perf/util/evsel_config.h @@ -6,30 +6,30 @@ #include <stdbool.h> /* - * The 'struct perf_evsel_config_term' is used to pass event - * specific configuration data to perf_evsel__config routine. + * The 'struct evsel_config_term' is used to pass event + * specific configuration data to evsel__config routine. * It is allocated within event parsing and attached to - * perf_evsel::config_terms list head. + * evsel::config_terms list head. */ enum evsel_term_type { - PERF_EVSEL__CONFIG_TERM_PERIOD, - PERF_EVSEL__CONFIG_TERM_FREQ, - PERF_EVSEL__CONFIG_TERM_TIME, - PERF_EVSEL__CONFIG_TERM_CALLGRAPH, - PERF_EVSEL__CONFIG_TERM_STACK_USER, - PERF_EVSEL__CONFIG_TERM_INHERIT, - PERF_EVSEL__CONFIG_TERM_MAX_STACK, - PERF_EVSEL__CONFIG_TERM_MAX_EVENTS, - PERF_EVSEL__CONFIG_TERM_OVERWRITE, - PERF_EVSEL__CONFIG_TERM_DRV_CFG, - PERF_EVSEL__CONFIG_TERM_BRANCH, - PERF_EVSEL__CONFIG_TERM_PERCORE, - PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT, - PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE, - PERF_EVSEL__CONFIG_TERM_CFG_CHG, + EVSEL__CONFIG_TERM_PERIOD, + EVSEL__CONFIG_TERM_FREQ, + EVSEL__CONFIG_TERM_TIME, + EVSEL__CONFIG_TERM_CALLGRAPH, + EVSEL__CONFIG_TERM_STACK_USER, + EVSEL__CONFIG_TERM_INHERIT, + EVSEL__CONFIG_TERM_MAX_STACK, + EVSEL__CONFIG_TERM_MAX_EVENTS, + EVSEL__CONFIG_TERM_OVERWRITE, + EVSEL__CONFIG_TERM_DRV_CFG, + EVSEL__CONFIG_TERM_BRANCH, + EVSEL__CONFIG_TERM_PERCORE, + EVSEL__CONFIG_TERM_AUX_OUTPUT, + EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE, + EVSEL__CONFIG_TERM_CFG_CHG, }; -struct perf_evsel_config_term { +struct evsel_config_term { struct list_head list; enum evsel_term_type type; bool free_str; @@ -53,10 +53,9 @@ struct perf_evsel_config_term { struct evsel; -struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel, - enum evsel_term_type type); +struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type); -#define perf_evsel__get_config_term(evsel, type) \ - __perf_evsel__get_config_term(evsel, PERF_EVSEL__CONFIG_TERM_ ## type) +#define evsel__get_config_term(evsel, type) \ + __evsel__get_config_term(evsel, EVSEL__CONFIG_TERM_ ## type) #endif // __PERF_EVSEL_CONFIG_H diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 3b4842840db0..fb498a723a00 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -35,8 +35,7 @@ static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, vo return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val); } -int perf_evsel__fprintf(struct evsel *evsel, - struct perf_attr_details *details, FILE *fp) +int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE *fp) { bool first = true; int printed = 0; @@ -44,22 +43,22 @@ int perf_evsel__fprintf(struct evsel *evsel, if (details->event_group) { struct evsel *pos; - if (!perf_evsel__is_group_leader(evsel)) + if (!evsel__is_group_leader(evsel)) return 0; if (evsel->core.nr_members > 1) printed += fprintf(fp, "%s{", evsel->group_name ?: ""); - printed += fprintf(fp, "%s", perf_evsel__name(evsel)); + printed += fprintf(fp, "%s", evsel__name(evsel)); for_each_group_member(pos, evsel) - printed += fprintf(fp, ",%s", perf_evsel__name(pos)); + printed += fprintf(fp, ",%s", evsel__name(pos)); if (evsel->core.nr_members > 1) printed += fprintf(fp, "}"); goto out; } - printed += fprintf(fp, "%s", perf_evsel__name(evsel)); + printed += fprintf(fp, "%s", evsel__name(evsel)); if (details->verbose) { printed += perf_event_attr__fprintf(fp, &evsel->core.attr, diff --git a/tools/perf/util/evsel_fprintf.h b/tools/perf/util/evsel_fprintf.h index 47e6c8456bb1..3093d096c29f 100644 --- a/tools/perf/util/evsel_fprintf.h +++ b/tools/perf/util/evsel_fprintf.h @@ -15,8 +15,7 @@ struct perf_attr_details { bool trace_fields; }; -int perf_evsel__fprintf(struct evsel *evsel, - struct perf_attr_details *details, FILE *fp); +int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE *fp); #define EVSEL__PRINT_IP (1<<0) #define EVSEL__PRINT_SYM (1<<1) diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index fd192ddf93c1..f64ab91c432b 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -3,38 +3,92 @@ #include <assert.h> #include "expr.h" #include "expr-bison.h" -#define YY_EXTRA_TYPE int #include "expr-flex.h" +#include <linux/kernel.h> #ifdef PARSER_DEBUG extern int expr_debug; #endif +static size_t key_hash(const void *key, void *ctx __maybe_unused) +{ + const char *str = (const char *)key; + size_t hash = 0; + + while (*str != '\0') { + hash *= 31; + hash += *str; + str++; + } + return hash; +} + +static bool key_equal(const void *key1, const void *key2, + void *ctx __maybe_unused) +{ + return !strcmp((const char *)key1, (const char *)key2); +} + /* Caller must make sure id is allocated */ -void expr__add_id(struct parse_ctx *ctx, const char *name, double val) +int expr__add_id(struct expr_parse_ctx *ctx, const char *name, double val) { - int idx; + double *val_ptr = NULL, *old_val = NULL; + char *old_key = NULL; + int ret; + + if (val != 0.0) { + val_ptr = malloc(sizeof(double)); + if (!val_ptr) + return -ENOMEM; + *val_ptr = val; + } + ret = hashmap__set(&ctx->ids, name, val_ptr, + (const void **)&old_key, (void **)&old_val); + free(old_key); + free(old_val); + return ret; +} + +int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr) +{ + double *data; + + if (!hashmap__find(&ctx->ids, id, (void **)&data)) + return -1; + *val_ptr = (data == NULL) ? 0.0 : *data; + return 0; +} - assert(ctx->num_ids < MAX_PARSE_ID); - idx = ctx->num_ids++; - ctx->ids[idx].name = name; - ctx->ids[idx].val = val; +void expr__ctx_init(struct expr_parse_ctx *ctx) +{ + hashmap__init(&ctx->ids, key_hash, key_equal, NULL); } -void expr__ctx_init(struct parse_ctx *ctx) +void expr__ctx_clear(struct expr_parse_ctx *ctx) { - ctx->num_ids = 0; + struct hashmap_entry *cur; + size_t bkt; + + hashmap__for_each_entry((&ctx->ids), cur, bkt) { + free((char *)cur->key); + free(cur->value); + } + hashmap__clear(&ctx->ids); } static int -__expr__parse(double *val, struct parse_ctx *ctx, const char *expr, - int start) +__expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, + int start, int runtime) { + struct expr_scanner_ctx scanner_ctx = { + .start_token = start, + .runtime = runtime, + }; YY_BUFFER_STATE buffer; void *scanner; int ret; - ret = expr_lex_init_extra(start, &scanner); + ret = expr_lex_init_extra(&scanner_ctx, &scanner); if (ret) return ret; @@ -42,6 +96,7 @@ __expr__parse(double *val, struct parse_ctx *ctx, const char *expr, #ifdef PARSER_DEBUG expr_debug = 1; + expr_set_debug(1, scanner); #endif ret = expr_parse(val, ctx, scanner); @@ -52,61 +107,25 @@ __expr__parse(double *val, struct parse_ctx *ctx, const char *expr, return ret; } -int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr) -{ - return __expr__parse(final_val, ctx, expr, EXPR_PARSE) ? -1 : 0; -} - -static bool -already_seen(const char *val, const char *one, const char **other, - int num_other) +int expr__parse(double *final_val, struct expr_parse_ctx *ctx, + const char *expr, int runtime) { - int i; - - if (one && !strcasecmp(one, val)) - return true; - for (i = 0; i < num_other; i++) - if (!strcasecmp(other[i], val)) - return true; - return false; + return __expr__parse(final_val, ctx, expr, EXPR_PARSE, runtime) ? -1 : 0; } -int expr__find_other(const char *expr, const char *one, const char ***other, - int *num_other) +int expr__find_other(const char *expr, const char *one, + struct expr_parse_ctx *ctx, int runtime) { - int err, i = 0, j = 0; - struct parse_ctx ctx; - - expr__ctx_init(&ctx); - err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER); - if (err) - return -1; - - *other = malloc((ctx.num_ids + 1) * sizeof(char *)); - if (!*other) - return -ENOMEM; - - for (i = 0, j = 0; i < ctx.num_ids; i++) { - const char *str = ctx.ids[i].name; - - if (already_seen(str, one, *other, j)) - continue; - - str = strdup(str); - if (!str) - goto out; - (*other)[j++] = str; - } - (*other)[j] = NULL; - -out: - if (i != ctx.num_ids) { - while (--j) - free((char *) (*other)[i]); - free(*other); - err = -1; + double *old_val = NULL; + char *old_key = NULL; + int ret = __expr__parse(NULL, ctx, expr, EXPR_OTHER, runtime); + + if (one) { + hashmap__delete(&ctx->ids, one, + (const void **)&old_key, (void **)&old_val); + free(old_key); + free(old_val); } - *num_other = j; - return err; + return ret; } diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 9377538f4097..8a2c1074f90f 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -2,23 +2,31 @@ #ifndef PARSE_CTX_H #define PARSE_CTX_H 1 -#define EXPR_MAX_OTHER 20 -#define MAX_PARSE_ID EXPR_MAX_OTHER +// There are fixes that need to land upstream before we can use libbpf's headers, +// for now use our copy uncoditionally, since the data structures at this point +// are exactly the same, no problem. +//#ifdef HAVE_LIBBPF_SUPPORT +//#include <bpf/hashmap.h> +//#else +#include "util/hashmap.h" +//#endif -struct parse_id { - const char *name; - double val; +struct expr_parse_ctx { + struct hashmap ids; }; -struct parse_ctx { - int num_ids; - struct parse_id ids[MAX_PARSE_ID]; +struct expr_scanner_ctx { + int start_token; + int runtime; }; -void expr__ctx_init(struct parse_ctx *ctx); -void expr__add_id(struct parse_ctx *ctx, const char *id, double val); -int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr); -int expr__find_other(const char *expr, const char *one, const char ***other, - int *num_other); +void expr__ctx_init(struct expr_parse_ctx *ctx); +void expr__ctx_clear(struct expr_parse_ctx *ctx); +int expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val); +int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr); +int expr__parse(double *final_val, struct expr_parse_ctx *ctx, + const char *expr, int runtime); +int expr__find_other(const char *expr, const char *one, + struct expr_parse_ctx *ids, int runtime); #endif diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index eaad29243c23..f397bf8b1a48 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -10,12 +10,12 @@ char *expr_get_text(yyscan_t yyscanner); YYSTYPE *expr_get_lval(yyscan_t yyscanner); -static int __value(YYSTYPE *yylval, char *str, int base, int token) +static double __value(YYSTYPE *yylval, char *str, int token) { - u64 num; + double num; errno = 0; - num = strtoull(str, NULL, base); + num = strtod(str, NULL); if (errno) return EXPR_ERROR; @@ -23,19 +23,19 @@ static int __value(YYSTYPE *yylval, char *str, int base, int token) return token; } -static int value(yyscan_t scanner, int base) +static int value(yyscan_t scanner) { YYSTYPE *yylval = expr_get_lval(scanner); char *text = expr_get_text(scanner); - return __value(yylval, text, base, NUMBER); + return __value(yylval, text, NUMBER); } /* * Allow @ instead of / to be able to specify pmu/event/ without * conflicts with normal division. */ -static char *normalize(char *str) +static char *normalize(char *str, int runtime) { char *ret = str; char *dst = str; @@ -45,6 +45,19 @@ static char *normalize(char *str) *dst++ = '/'; else if (*str == '\\') *dst++ = *++str; + else if (*str == '?') { + char *paramval; + int i = 0; + int size = asprintf(¶mval, "%d", runtime); + + if (size < 0) + *dst++ = '0'; + else { + while (i < size) + *dst++ = paramval[i++]; + free(paramval); + } + } else *dst++ = *str; str++; @@ -54,35 +67,35 @@ static char *normalize(char *str) return ret; } -static int str(yyscan_t scanner, int token) +static int str(yyscan_t scanner, int token, int runtime) { YYSTYPE *yylval = expr_get_lval(scanner); char *text = expr_get_text(scanner); - yylval->str = normalize(strdup(text)); + yylval->str = normalize(strdup(text), runtime); if (!yylval->str) return EXPR_ERROR; - yylval->str = normalize(yylval->str); + yylval->str = normalize(yylval->str, runtime); return token; } %} -number [0-9]+ +number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+) sch [-,=] spec \\{sch} -sym [0-9a-zA-Z_\.:@]+ -symbol {spec}*{sym}*{spec}*{sym}* +sym [0-9a-zA-Z_\.:@?]+ +symbol ({spec}|{sym})+ %% - { - int start_token; + struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner); - start_token = expr_get_extra(yyscanner); + { + int start_token = sctx->start_token; - if (start_token) { - expr_set_extra(NULL, yyscanner); + if (sctx->start_token) { + sctx->start_token = 0; return start_token; } } @@ -92,8 +105,8 @@ min { return MIN; } if { return IF; } else { return ELSE; } #smt_on { return SMT_ON; } -{number} { return value(yyscanner, 10); } -{symbol} { return str(yyscanner, ID); } +{number} { return value(yyscanner); } +{symbol} { return str(yyscanner, ID, sctx->runtime); } "|" { return '|'; } "^" { return '^'; } "&" { return '&'; } diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 4720cbe79357..bf3e898e3055 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -15,7 +15,7 @@ %define api.pure full %parse-param { double *final_val } -%parse-param { struct parse_ctx *ctx } +%parse-param { struct expr_parse_ctx *ctx } %parse-param {void *scanner} %lex-param {void* scanner} @@ -27,6 +27,7 @@ %token EXPR_PARSE EXPR_OTHER EXPR_ERROR %token <num> NUMBER %token <str> ID +%destructor { free ($$); } <str> %token MIN MAX IF ELSE SMT_ON %left MIN MAX IF %left '|' @@ -39,26 +40,13 @@ %{ static void expr_error(double *final_val __maybe_unused, - struct parse_ctx *ctx __maybe_unused, + struct expr_parse_ctx *ctx __maybe_unused, void *scanner, const char *s) { pr_debug("%s\n", s); } -static int lookup_id(struct parse_ctx *ctx, char *id, double *val) -{ - int i; - - for (i = 0; i < ctx->num_ids; i++) { - if (!strcasecmp(ctx->ids[i].name, id)) { - *val = ctx->ids[i].val; - return 0; - } - } - return -1; -} - %} %% @@ -72,15 +60,10 @@ all_other: all_other other other: ID { - if (ctx->num_ids + 1 >= EXPR_MAX_OTHER) { - pr_err("failed: way too many variables"); - YYABORT; - } - - ctx->ids[ctx->num_ids++].name = $1; + expr__add_id(ctx, $1, 0.0); } | -MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' +MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' | ',' all_expr: if_expr { *final_val = $1; } @@ -92,10 +75,12 @@ if_expr: ; expr: NUMBER - | ID { if (lookup_id(ctx, $1, &$$) < 0) { + | ID { if (expr__get_id(ctx, $1, &$$)) { pr_debug("%s not found\n", $1); + free($1); YYABORT; } + free($1); } | expr '|' expr { $$ = (long)$1 | (long)$3; } | expr '&' expr { $$ = (long)$1 & (long)$3; } @@ -103,8 +88,18 @@ expr: NUMBER | expr '+' expr { $$ = $1 + $3; } | expr '-' expr { $$ = $1 - $3; } | expr '*' expr { $$ = $1 * $3; } - | expr '/' expr { if ($3 == 0) YYABORT; $$ = $1 / $3; } - | expr '%' expr { if ((long)$3 == 0) YYABORT; $$ = (long)$1 % (long)$3; } + | expr '/' expr { if ($3 == 0) { + pr_debug("division by zero\n"); + YYABORT; + } + $$ = $1 / $3; + } + | expr '%' expr { if ((long)$3 == 0) { + pr_debug("division by zero\n"); + YYABORT; + } + $$ = (long)$1 % (long)$3; + } | '-' expr %prec NEG { $$ = -$2; } | '(' if_expr ')' { $$ = $2; } | MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; } diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c index 30e9f618f6cd..dd40683bd4c0 100644 --- a/tools/perf/util/genelf_debug.c +++ b/tools/perf/util/genelf_debug.c @@ -342,7 +342,7 @@ static void emit_lineno_info(struct buffer_ext *be, */ /* start state of the state machine we take care of */ - unsigned long last_vma = code_addr; + unsigned long last_vma = 0; char const *cur_filename = NULL; unsigned long cur_file_idx = 0; int last_line = 1; @@ -473,7 +473,7 @@ jit_process_debug_info(uint64_t code_addr, ent = debug_entry_next(ent); } add_compilation_unit(di, buffer_ext_size(dl)); - add_debug_line(dl, debug, nr_debug_entries, 0); + add_debug_line(dl, debug, nr_debug_entries, GEN_ELF_TEXT_OFFSET); add_debug_abbrev(da); if (0) buffer_ext_dump(da, "abbrev"); diff --git a/tools/perf/util/hashmap.c b/tools/perf/util/hashmap.c new file mode 100644 index 000000000000..a405dad068f5 --- /dev/null +++ b/tools/perf/util/hashmap.c @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * Generic non-thread safe hash map implementation. + * + * Copyright (c) 2019 Facebook + */ +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <linux/err.h> +#include "hashmap.h" + +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + +/* start with 4 buckets */ +#define HASHMAP_MIN_CAP_BITS 2 + +static void hashmap_add_entry(struct hashmap_entry **pprev, + struct hashmap_entry *entry) +{ + entry->next = *pprev; + *pprev = entry; +} + +static void hashmap_del_entry(struct hashmap_entry **pprev, + struct hashmap_entry *entry) +{ + *pprev = entry->next; + entry->next = NULL; +} + +void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn, + hashmap_equal_fn equal_fn, void *ctx) +{ + map->hash_fn = hash_fn; + map->equal_fn = equal_fn; + map->ctx = ctx; + + map->buckets = NULL; + map->cap = 0; + map->cap_bits = 0; + map->sz = 0; +} + +struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, + hashmap_equal_fn equal_fn, + void *ctx) +{ + struct hashmap *map = malloc(sizeof(struct hashmap)); + + if (!map) + return ERR_PTR(-ENOMEM); + hashmap__init(map, hash_fn, equal_fn, ctx); + return map; +} + +void hashmap__clear(struct hashmap *map) +{ + struct hashmap_entry *cur, *tmp; + size_t bkt; + + hashmap__for_each_entry_safe(map, cur, tmp, bkt) { + free(cur); + } + free(map->buckets); + map->buckets = NULL; + map->cap = map->cap_bits = map->sz = 0; +} + +void hashmap__free(struct hashmap *map) +{ + if (!map) + return; + + hashmap__clear(map); + free(map); +} + +size_t hashmap__size(const struct hashmap *map) +{ + return map->sz; +} + +size_t hashmap__capacity(const struct hashmap *map) +{ + return map->cap; +} + +static bool hashmap_needs_to_grow(struct hashmap *map) +{ + /* grow if empty or more than 75% filled */ + return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap); +} + +static int hashmap_grow(struct hashmap *map) +{ + struct hashmap_entry **new_buckets; + struct hashmap_entry *cur, *tmp; + size_t new_cap_bits, new_cap; + size_t h, bkt; + + new_cap_bits = map->cap_bits + 1; + if (new_cap_bits < HASHMAP_MIN_CAP_BITS) + new_cap_bits = HASHMAP_MIN_CAP_BITS; + + new_cap = 1UL << new_cap_bits; + new_buckets = calloc(new_cap, sizeof(new_buckets[0])); + if (!new_buckets) + return -ENOMEM; + + hashmap__for_each_entry_safe(map, cur, tmp, bkt) { + h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits); + hashmap_add_entry(&new_buckets[h], cur); + } + + map->cap = new_cap; + map->cap_bits = new_cap_bits; + free(map->buckets); + map->buckets = new_buckets; + + return 0; +} + +static bool hashmap_find_entry(const struct hashmap *map, + const void *key, size_t hash, + struct hashmap_entry ***pprev, + struct hashmap_entry **entry) +{ + struct hashmap_entry *cur, **prev_ptr; + + if (!map->buckets) + return false; + + for (prev_ptr = &map->buckets[hash], cur = *prev_ptr; + cur; + prev_ptr = &cur->next, cur = cur->next) { + if (map->equal_fn(cur->key, key, map->ctx)) { + if (pprev) + *pprev = prev_ptr; + *entry = cur; + return true; + } + } + + return false; +} + +int hashmap__insert(struct hashmap *map, const void *key, void *value, + enum hashmap_insert_strategy strategy, + const void **old_key, void **old_value) +{ + struct hashmap_entry *entry; + size_t h; + int err; + + if (old_key) + *old_key = NULL; + if (old_value) + *old_value = NULL; + + h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); + if (strategy != HASHMAP_APPEND && + hashmap_find_entry(map, key, h, NULL, &entry)) { + if (old_key) + *old_key = entry->key; + if (old_value) + *old_value = entry->value; + + if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) { + entry->key = key; + entry->value = value; + return 0; + } else if (strategy == HASHMAP_ADD) { + return -EEXIST; + } + } + + if (strategy == HASHMAP_UPDATE) + return -ENOENT; + + if (hashmap_needs_to_grow(map)) { + err = hashmap_grow(map); + if (err) + return err; + h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); + } + + entry = malloc(sizeof(struct hashmap_entry)); + if (!entry) + return -ENOMEM; + + entry->key = key; + entry->value = value; + hashmap_add_entry(&map->buckets[h], entry); + map->sz++; + + return 0; +} + +bool hashmap__find(const struct hashmap *map, const void *key, void **value) +{ + struct hashmap_entry *entry; + size_t h; + + h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); + if (!hashmap_find_entry(map, key, h, NULL, &entry)) + return false; + + if (value) + *value = entry->value; + return true; +} + +bool hashmap__delete(struct hashmap *map, const void *key, + const void **old_key, void **old_value) +{ + struct hashmap_entry **pprev, *entry; + size_t h; + + h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); + if (!hashmap_find_entry(map, key, h, &pprev, &entry)) + return false; + + if (old_key) + *old_key = entry->key; + if (old_value) + *old_value = entry->value; + + hashmap_del_entry(pprev, entry); + free(entry); + map->sz--; + + return true; +} + diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h new file mode 100644 index 000000000000..df59fd4fc95b --- /dev/null +++ b/tools/perf/util/hashmap.h @@ -0,0 +1,176 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * Generic non-thread safe hash map implementation. + * + * Copyright (c) 2019 Facebook + */ +#ifndef __LIBBPF_HASHMAP_H +#define __LIBBPF_HASHMAP_H + +#include <stdbool.h> +#include <stddef.h> +#include <limits.h> +#ifndef __WORDSIZE +#define __WORDSIZE (__SIZEOF_LONG__ * 8) +#endif + +static inline size_t hash_bits(size_t h, int bits) +{ + /* shuffle bits and return requested number of upper bits */ + return (h * 11400714819323198485llu) >> (__WORDSIZE - bits); +} + +typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx); +typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx); + +struct hashmap_entry { + const void *key; + void *value; + struct hashmap_entry *next; +}; + +struct hashmap { + hashmap_hash_fn hash_fn; + hashmap_equal_fn equal_fn; + void *ctx; + + struct hashmap_entry **buckets; + size_t cap; + size_t cap_bits; + size_t sz; +}; + +#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \ + .hash_fn = (hash_fn), \ + .equal_fn = (equal_fn), \ + .ctx = (ctx), \ + .buckets = NULL, \ + .cap = 0, \ + .cap_bits = 0, \ + .sz = 0, \ +} + +void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn, + hashmap_equal_fn equal_fn, void *ctx); +struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, + hashmap_equal_fn equal_fn, + void *ctx); +void hashmap__clear(struct hashmap *map); +void hashmap__free(struct hashmap *map); + +size_t hashmap__size(const struct hashmap *map); +size_t hashmap__capacity(const struct hashmap *map); + +/* + * Hashmap insertion strategy: + * - HASHMAP_ADD - only add key/value if key doesn't exist yet; + * - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise, + * update value; + * - HASHMAP_UPDATE - update value, if key already exists; otherwise, do + * nothing and return -ENOENT; + * - HASHMAP_APPEND - always add key/value pair, even if key already exists. + * This turns hashmap into a multimap by allowing multiple values to be + * associated with the same key. Most useful read API for such hashmap is + * hashmap__for_each_key_entry() iteration. If hashmap__find() is still + * used, it will return last inserted key/value entry (first in a bucket + * chain). + */ +enum hashmap_insert_strategy { + HASHMAP_ADD, + HASHMAP_SET, + HASHMAP_UPDATE, + HASHMAP_APPEND, +}; + +/* + * hashmap__insert() adds key/value entry w/ various semantics, depending on + * provided strategy value. If a given key/value pair replaced already + * existing key/value pair, both old key and old value will be returned + * through old_key and old_value to allow calling code do proper memory + * management. + */ +int hashmap__insert(struct hashmap *map, const void *key, void *value, + enum hashmap_insert_strategy strategy, + const void **old_key, void **old_value); + +static inline int hashmap__add(struct hashmap *map, + const void *key, void *value) +{ + return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL); +} + +static inline int hashmap__set(struct hashmap *map, + const void *key, void *value, + const void **old_key, void **old_value) +{ + return hashmap__insert(map, key, value, HASHMAP_SET, + old_key, old_value); +} + +static inline int hashmap__update(struct hashmap *map, + const void *key, void *value, + const void **old_key, void **old_value) +{ + return hashmap__insert(map, key, value, HASHMAP_UPDATE, + old_key, old_value); +} + +static inline int hashmap__append(struct hashmap *map, + const void *key, void *value) +{ + return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL); +} + +bool hashmap__delete(struct hashmap *map, const void *key, + const void **old_key, void **old_value); + +bool hashmap__find(const struct hashmap *map, const void *key, void **value); + +/* + * hashmap__for_each_entry - iterate over all entries in hashmap + * @map: hashmap to iterate + * @cur: struct hashmap_entry * used as a loop cursor + * @bkt: integer used as a bucket loop cursor + */ +#define hashmap__for_each_entry(map, cur, bkt) \ + for (bkt = 0; bkt < map->cap; bkt++) \ + for (cur = map->buckets[bkt]; cur; cur = cur->next) + +/* + * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe + * against removals + * @map: hashmap to iterate + * @cur: struct hashmap_entry * used as a loop cursor + * @tmp: struct hashmap_entry * used as a temporary next cursor storage + * @bkt: integer used as a bucket loop cursor + */ +#define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \ + for (bkt = 0; bkt < map->cap; bkt++) \ + for (cur = map->buckets[bkt]; \ + cur && ({tmp = cur->next; true; }); \ + cur = tmp) + +/* + * hashmap__for_each_key_entry - iterate over entries associated with given key + * @map: hashmap to iterate + * @cur: struct hashmap_entry * used as a loop cursor + * @key: key to iterate entries for + */ +#define hashmap__for_each_key_entry(map, cur, _key) \ + for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ + map->cap_bits); \ + map->buckets ? map->buckets[bkt] : NULL; }); \ + cur; \ + cur = cur->next) \ + if (map->equal_fn(cur->key, (_key), map->ctx)) + +#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ + for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ + map->cap_bits); \ + cur = map->buckets ? map->buckets[bkt] : NULL; }); \ + cur && ({ tmp = cur->next; true; }); \ + cur = tmp) \ + if (map->equal_fn(cur->key, (_key), map->ctx)) + +#endif /* __LIBBPF_HASHMAP_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index acbd046bf95c..7a67d017d72c 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -525,7 +525,7 @@ static int write_event_desc(struct feat_fd *ff, /* * write event string as passed on cmdline */ - ret = do_write_string(ff, perf_evsel__name(evsel)); + ret = do_write_string(ff, evsel__name(evsel)); if (ret < 0) return ret; /* @@ -783,8 +783,7 @@ static int write_group_desc(struct feat_fd *ff, return ret; evlist__for_each_entry(evlist, evsel) { - if (perf_evsel__is_group_leader(evsel) && - evsel->core.nr_members > 1) { + if (evsel__is_group_leader(evsel) && evsel->core.nr_members > 1) { const char *name = evsel->group_name ?: "{anon_group}"; u32 leader_idx = evsel->idx; u32 nr_members = evsel->core.nr_members; @@ -1395,6 +1394,38 @@ static int write_compressed(struct feat_fd *ff __maybe_unused, return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len)); } +static int write_cpu_pmu_caps(struct feat_fd *ff, + struct evlist *evlist __maybe_unused) +{ + struct perf_pmu *cpu_pmu = perf_pmu__find("cpu"); + struct perf_pmu_caps *caps = NULL; + int nr_caps; + int ret; + + if (!cpu_pmu) + return -ENOENT; + + nr_caps = perf_pmu__caps_parse(cpu_pmu); + if (nr_caps < 0) + return nr_caps; + + ret = do_write(ff, &nr_caps, sizeof(nr_caps)); + if (ret < 0) + return ret; + + list_for_each_entry(caps, &cpu_pmu->caps, list) { + ret = do_write_string(ff, caps->name); + if (ret < 0) + return ret; + + ret = do_write_string(ff, caps->value); + if (ret < 0) + return ret; + } + + return ret; +} + static void print_hostname(struct feat_fd *ff, FILE *fp) { fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); @@ -1809,6 +1840,27 @@ static void print_compressed(struct feat_fd *ff, FILE *fp) ff->ph->env.comp_level, ff->ph->env.comp_ratio); } +static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp) +{ + const char *delimiter = "# cpu pmu capabilities: "; + u32 nr_caps = ff->ph->env.nr_cpu_pmu_caps; + char *str; + + if (!nr_caps) { + fprintf(fp, "# cpu pmu capabilities: not available\n"); + return; + } + + str = ff->ph->env.cpu_pmu_caps; + while (nr_caps--) { + fprintf(fp, "%s%s", delimiter, str); + delimiter = ", "; + str += strlen(str) + 1; + } + + fprintf(fp, "\n"); +} + static void print_pmu_mappings(struct feat_fd *ff, FILE *fp) { const char *delimiter = "# pmu mappings: "; @@ -1854,14 +1906,12 @@ static void print_group_desc(struct feat_fd *ff, FILE *fp) session = container_of(ff->ph, struct perf_session, header); evlist__for_each_entry(session->evlist, evsel) { - if (perf_evsel__is_group_leader(evsel) && - evsel->core.nr_members > 1) { - fprintf(fp, "# group: %s{%s", evsel->group_name ?: "", - perf_evsel__name(evsel)); + if (evsel__is_group_leader(evsel) && evsel->core.nr_members > 1) { + fprintf(fp, "# group: %s{%s", evsel->group_name ?: "", evsel__name(evsel)); nr = evsel->core.nr_members - 1; } else if (nr) { - fprintf(fp, ",%s", perf_evsel__name(evsel)); + fprintf(fp, ",%s", evsel__name(evsel)); if (--nr == 0) fprintf(fp, "}\n"); @@ -2846,6 +2896,60 @@ static int process_compressed(struct feat_fd *ff, return 0; } +static int process_cpu_pmu_caps(struct feat_fd *ff, + void *data __maybe_unused) +{ + char *name, *value; + struct strbuf sb; + u32 nr_caps; + + if (do_read_u32(ff, &nr_caps)) + return -1; + + if (!nr_caps) { + pr_debug("cpu pmu capabilities not available\n"); + return 0; + } + + ff->ph->env.nr_cpu_pmu_caps = nr_caps; + + if (strbuf_init(&sb, 128) < 0) + return -1; + + while (nr_caps--) { + name = do_read_string(ff); + if (!name) + goto error; + + value = do_read_string(ff); + if (!value) + goto free_name; + + if (strbuf_addf(&sb, "%s=%s", name, value) < 0) + goto free_value; + + /* include a NULL character at the end */ + if (strbuf_add(&sb, "", 1) < 0) + goto free_value; + + if (!strcmp(name, "branches")) + ff->ph->env.max_branches = atoi(value); + + free(value); + free(name); + } + ff->ph->env.cpu_pmu_caps = strbuf_detach(&sb, NULL); + return 0; + +free_value: + free(value); +free_name: + free(name); +error: + strbuf_release(&sb); + return -1; +} + #define FEAT_OPR(n, func, __full_only) \ [HEADER_##n] = { \ .name = __stringify(n), \ @@ -2903,6 +3007,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false), FEAT_OPR(BPF_BTF, bpf_btf, false), FEAT_OPR(COMPRESSED, compressed, false), + FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false), }; struct header_print_data { @@ -3469,7 +3574,7 @@ static int perf_header__read_pipe(struct perf_session *session) return -EINVAL; } - return 0; + return f_header.size == sizeof(f_header) ? 0 : -1; } static int read_attr(int fd, struct perf_header *ph, @@ -3571,7 +3676,7 @@ int perf_session__read_header(struct perf_session *session) struct perf_file_header f_header; struct perf_file_attr f_attr; u64 f_id; - int nr_attrs, nr_ids, i, j; + int nr_attrs, nr_ids, i, j, err; int fd = perf_data__fd(data); session->evlist = evlist__new(); @@ -3580,8 +3685,16 @@ int perf_session__read_header(struct perf_session *session) session->evlist->env = &header->env; session->machines.host.env = &header->env; - if (perf_data__is_pipe(data)) - return perf_header__read_pipe(session); + + /* + * We can read 'pipe' data event from regular file, + * check for the pipe header regardless of source. + */ + err = perf_header__read_pipe(session); + if (!err || (err && perf_data__is_pipe(data))) { + data->is_pipe = true; + return err; + } if (perf_file_header__read(&f_header, header, fd) < 0) return -EINVAL; @@ -3842,12 +3955,22 @@ int perf_event__process_tracing_data(struct perf_session *session, { ssize_t size_read, padding, size = event->tracing_data.size; int fd = perf_data__fd(session->data); - off_t offset = lseek(fd, 0, SEEK_CUR); char buf[BUFSIZ]; - /* setup for reading amidst mmap */ - lseek(fd, offset + sizeof(struct perf_record_header_tracing_data), - SEEK_SET); + /* + * The pipe fd is already in proper place and in any case + * we can't move it, and we'd screw the case where we read + * 'pipe' data from regular file. The trace_report reads + * data from 'fd' so we need to set it directly behind the + * event, where the tracing data starts. + */ + if (!perf_data__is_pipe(session->data)) { + off_t offset = lseek(fd, 0, SEEK_CUR); + + /* setup for reading amidst mmap */ + lseek(fd, offset + sizeof(struct perf_record_header_tracing_data), + SEEK_SET); + } size_read = trace_report(fd, &session->tevent, session->repipe); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 840f95cee349..650bd1c7a99b 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -43,6 +43,7 @@ enum { HEADER_BPF_PROG_INFO, HEADER_BPF_BTF, HEADER_COMPRESSED, + HEADER_CPU_PMU_CAPS, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 283a69ff6a3d..8a793e4c9400 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1070,6 +1070,20 @@ iter_next_cumulative_entry(struct hist_entry_iter *iter, return fill_callchain_info(al, node, iter->hide_unresolved); } +static bool +hist_entry__fast__sym_diff(struct hist_entry *left, + struct hist_entry *right) +{ + struct symbol *sym_l = left->ms.sym; + struct symbol *sym_r = right->ms.sym; + + if (!sym_l && !sym_r) + return left->ip != right->ip; + + return !!_sort__sym_cmp(sym_l, sym_r); +} + + static int iter_add_next_cumulative_entry(struct hist_entry_iter *iter, struct addr_location *al) @@ -1096,6 +1110,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, }; int i; struct callchain_cursor cursor; + bool fast = hists__has(he_tmp.hists, sym); callchain_cursor_snapshot(&cursor, &callchain_cursor); @@ -1106,6 +1121,14 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, * It's possible that it has cycles or recursive calls. */ for (i = 0; i < iter->curr; i++) { + /* + * For most cases, there are no duplicate entries in callchain. + * The symbols are usually different. Do a quick check for + * symbols first. + */ + if (fast && hist_entry__fast__sym_diff(he_cache[i], &he_tmp)) + continue; + if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) { /* to avoid calling callback function */ iter->he = NULL; @@ -1907,8 +1930,8 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, } } -void perf_evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog, - hists__resort_cb_t cb, void *cb_arg) +void evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog, + hists__resort_cb_t cb, void *cb_arg) { bool use_callchain; @@ -1922,9 +1945,9 @@ void perf_evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog, output_resort(evsel__hists(evsel), prog, use_callchain, cb, cb_arg); } -void perf_evsel__output_resort(struct evsel *evsel, struct ui_progress *prog) +void evsel__output_resort(struct evsel *evsel, struct ui_progress *prog) { - return perf_evsel__output_resort_cb(evsel, prog, NULL, NULL); + return evsel__output_resort_cb(evsel, prog, NULL, NULL); } void hists__output_resort(struct hists *hists, struct ui_progress *prog) @@ -2637,7 +2660,7 @@ size_t perf_evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp) size_t ret = 0; evlist__for_each_entry(evlist, pos) { - ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos)); + ret += fprintf(fp, "%s stats:\n", evsel__name(pos)); ret += events_stats__fprintf(&evsel__hists(pos)->stats, fp); } @@ -2661,7 +2684,7 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; u64 nr_events = hists->stats.total_period; struct evsel *evsel = hists_to_evsel(hists); - const char *ev_name = perf_evsel__name(evsel); + const char *ev_name = evsel__name(evsel); char buf[512], sample_freq_str[64] = ""; size_t buflen = sizeof(buf); char ref[30] = " show reference callgraph, "; @@ -2672,10 +2695,10 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh nr_events = hists->stats.total_non_filtered_period; } - if (perf_evsel__is_group_event(evsel)) { + if (evsel__is_group_event(evsel)) { struct evsel *pos; - perf_evsel__group_desc(evsel, buf, buflen); + evsel__group_desc(evsel, buf, buflen); ev_name = buf; for_each_group_member(pos, evsel) { @@ -2822,9 +2845,8 @@ static int hists_evsel__init(struct evsel *evsel) int hists__init(void) { - int err = perf_evsel__object_config(sizeof(struct hists_evsel), - hists_evsel__init, - hists_evsel__exit); + int err = evsel__object_config(sizeof(struct hists_evsel), + hists_evsel__init, hists_evsel__exit); if (err) fputs("FATAL ERROR: Couldn't setup hists class\n", stderr); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 4141295a66fa..96b1c13bbccc 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -173,9 +173,9 @@ void hist_entry__delete(struct hist_entry *he); typedef int (*hists__resort_cb_t)(struct hist_entry *he, void *arg); -void perf_evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog, - hists__resort_cb_t cb, void *cb_arg); -void perf_evsel__output_resort(struct evsel *evsel, struct ui_progress *prog); +void evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog, + hists__resort_cb_t cb, void *cb_arg); +void evsel__output_resort(struct evsel *evsel, struct ui_progress *prog); void hists__output_resort(struct hists *hists, struct ui_progress *prog); void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog, hists__resort_cb_t cb); diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 34cb380d19a3..af1e78d76228 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -432,7 +432,7 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq, le64_to_cpu(branch->from), le64_to_cpu(branch->to), btsq->intel_pt_insn.length, - buffer->buffer_nr + 1); + buffer->buffer_nr + 1, true, 0, 0); if (filter && !(filter & btsq->sample_flags)) continue; err = intel_bts_synth_branch_sample(btsq, branch); @@ -728,6 +728,15 @@ static void intel_bts_free(struct perf_session *session) free(bts); } +static bool intel_bts_evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + + return evsel->core.attr.type == bts->pmu_type; +} + struct intel_bts_synth { struct perf_tool dummy_tool; struct perf_session *session; @@ -816,10 +825,10 @@ static int intel_bts_synth_events(struct intel_bts *bts, bts->branches_id = id; /* * We only use sample types from PERF_SAMPLE_MASK so we can use - * __perf_evsel__sample_size() here. + * __evsel__sample_size() here. */ bts->branches_event_size = sizeof(struct perf_record_sample) + - __perf_evsel__sample_size(attr.sample_type); + __evsel__sample_size(attr.sample_type); } return 0; @@ -883,6 +892,7 @@ int intel_bts_process_auxtrace_info(union perf_event *event, bts->auxtrace.flush_events = intel_bts_flush; bts->auxtrace.free_events = intel_bts_free_events; bts->auxtrace.free = intel_bts_free; + bts->auxtrace.evsel_is_auxtrace = intel_bts_evsel_is_auxtrace; session->auxtrace = &bts->auxtrace; intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE, diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index 0ccf10a0bf44..4ce109993e74 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -552,7 +552,7 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, break; default: break; - }; + } if (!(byte & BIT(0))) { if (byte == 0) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 23c8289c2472..e4dd8bf610ce 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -33,6 +33,7 @@ #include "tsc.h" #include "intel-pt.h" #include "config.h" +#include "util/perf_api_probe.h" #include "util/synthetic-events.h" #include "time-utils.h" @@ -68,6 +69,10 @@ struct intel_pt { bool est_tsc; bool sync_switch; bool mispred_all; + bool use_thread_stack; + bool callstack; + unsigned int br_stack_sz; + unsigned int br_stack_sz_plus; int have_sched_switch; u32 pmu_type; u64 kernel_start; @@ -124,6 +129,9 @@ struct intel_pt { struct range *time_ranges; unsigned int range_cnt; + + struct ip_callchain *chain; + struct branch_stack *br_stack; }; enum switch_state { @@ -143,8 +151,6 @@ struct intel_pt_queue { const struct intel_pt_state *state; struct ip_callchain *chain; struct branch_stack *last_branch; - struct branch_stack *last_branch_rb; - size_t last_branch_pos; union perf_event *event_buf; bool on_heap; bool stop; @@ -868,6 +874,86 @@ static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) pt->tc.time_mult; } +static struct ip_callchain *intel_pt_alloc_chain(struct intel_pt *pt) +{ + size_t sz = sizeof(struct ip_callchain); + + /* Add 1 to callchain_sz for callchain context */ + sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); + return zalloc(sz); +} + +static int intel_pt_callchain_init(struct intel_pt *pt) +{ + struct evsel *evsel; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN)) + evsel->synth_sample_type |= PERF_SAMPLE_CALLCHAIN; + } + + pt->chain = intel_pt_alloc_chain(pt); + if (!pt->chain) + return -ENOMEM; + + return 0; +} + +static void intel_pt_add_callchain(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct thread *thread = machine__findnew_thread(pt->machine, + sample->pid, + sample->tid); + + thread_stack__sample_late(thread, sample->cpu, pt->chain, + pt->synth_opts.callchain_sz + 1, sample->ip, + pt->kernel_start); + + sample->callchain = pt->chain; +} + +static struct branch_stack *intel_pt_alloc_br_stack(unsigned int entry_cnt) +{ + size_t sz = sizeof(struct branch_stack); + + sz += entry_cnt * sizeof(struct branch_entry); + return zalloc(sz); +} + +static int intel_pt_br_stack_init(struct intel_pt *pt) +{ + struct evsel *evsel; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK)) + evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK; + } + + pt->br_stack = intel_pt_alloc_br_stack(pt->br_stack_sz); + if (!pt->br_stack) + return -ENOMEM; + + return 0; +} + +static void intel_pt_add_br_stack(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct thread *thread = machine__findnew_thread(pt->machine, + sample->pid, + sample->tid); + + thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack, + pt->br_stack_sz, sample->ip, + pt->kernel_start); + + sample->branch_stack = pt->br_stack; +} + +/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */ +#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3U) + static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, unsigned int queue_nr) { @@ -880,26 +966,17 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, return NULL; if (pt->synth_opts.callchain) { - size_t sz = sizeof(struct ip_callchain); - - /* Add 1 to callchain_sz for callchain context */ - sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); - ptq->chain = zalloc(sz); + ptq->chain = intel_pt_alloc_chain(pt); if (!ptq->chain) goto out_free; } - if (pt->synth_opts.last_branch) { - size_t sz = sizeof(struct branch_stack); + if (pt->synth_opts.last_branch || pt->synth_opts.other_events) { + unsigned int entry_cnt = max(LBRS_MAX, pt->br_stack_sz); - sz += pt->synth_opts.last_branch_sz * - sizeof(struct branch_entry); - ptq->last_branch = zalloc(sz); + ptq->last_branch = intel_pt_alloc_br_stack(entry_cnt); if (!ptq->last_branch) goto out_free; - ptq->last_branch_rb = zalloc(sz); - if (!ptq->last_branch_rb) - goto out_free; } ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); @@ -968,7 +1045,6 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, out_free: zfree(&ptq->event_buf); zfree(&ptq->last_branch); - zfree(&ptq->last_branch_rb); zfree(&ptq->chain); free(ptq); return NULL; @@ -984,7 +1060,6 @@ static void intel_pt_free_queue(void *priv) intel_pt_decoder_free(ptq->decoder); zfree(&ptq->event_buf); zfree(&ptq->last_branch); - zfree(&ptq->last_branch_rb); zfree(&ptq->chain); free(ptq); } @@ -1152,58 +1227,6 @@ static int intel_pt_setup_queues(struct intel_pt *pt) return 0; } -static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq) -{ - struct branch_stack *bs_src = ptq->last_branch_rb; - struct branch_stack *bs_dst = ptq->last_branch; - size_t nr = 0; - - bs_dst->nr = bs_src->nr; - - if (!bs_src->nr) - return; - - nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos; - memcpy(&bs_dst->entries[0], - &bs_src->entries[ptq->last_branch_pos], - sizeof(struct branch_entry) * nr); - - if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) { - memcpy(&bs_dst->entries[nr], - &bs_src->entries[0], - sizeof(struct branch_entry) * ptq->last_branch_pos); - } -} - -static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq) -{ - ptq->last_branch_pos = 0; - ptq->last_branch_rb->nr = 0; -} - -static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) -{ - const struct intel_pt_state *state = ptq->state; - struct branch_stack *bs = ptq->last_branch_rb; - struct branch_entry *be; - - if (!ptq->last_branch_pos) - ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz; - - ptq->last_branch_pos -= 1; - - be = &bs->entries[ptq->last_branch_pos]; - be->from = state->from_ip; - be->to = state->to_ip; - be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); - be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); - /* No support for mispredict */ - be->flags.mispred = ptq->pt->mispred_all; - - if (bs->nr < ptq->pt->synth_opts.last_branch_sz) - bs->nr += 1; -} - static inline bool intel_pt_skip_event(struct intel_pt *pt) { return pt->synth_opts.initial_skip && @@ -1271,9 +1294,9 @@ static inline int intel_pt_opt_inject(struct intel_pt *pt, return intel_pt_inject_event(event, sample, type); } -static int intel_pt_deliver_synth_b_event(struct intel_pt *pt, - union perf_event *event, - struct perf_sample *sample, u64 type) +static int intel_pt_deliver_synth_event(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample, u64 type) { int ret; @@ -1333,8 +1356,8 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt; } - return intel_pt_deliver_synth_b_event(pt, event, &sample, - pt->branches_sample_type); + return intel_pt_deliver_synth_event(pt, event, &sample, + pt->branches_sample_type); } static void intel_pt_prep_sample(struct intel_pt *pt, @@ -1352,27 +1375,12 @@ static void intel_pt_prep_sample(struct intel_pt *pt, } if (pt->synth_opts.last_branch) { - intel_pt_copy_last_branch_rb(ptq); + thread_stack__br_sample(ptq->thread, ptq->cpu, ptq->last_branch, + pt->br_stack_sz); sample->branch_stack = ptq->last_branch; } } -static inline int intel_pt_deliver_synth_event(struct intel_pt *pt, - struct intel_pt_queue *ptq, - union perf_event *event, - struct perf_sample *sample, - u64 type) -{ - int ret; - - ret = intel_pt_deliver_synth_b_event(pt, event, sample, type); - - if (pt->synth_opts.last_branch) - intel_pt_reset_last_branch_rb(ptq); - - return ret; -} - static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; @@ -1397,7 +1405,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) ptq->last_insn_cnt = ptq->state->tot_insn_cnt; - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->instructions_sample_type); } @@ -1415,7 +1423,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) sample.id = ptq->pt->transactions_id; sample.stream_id = ptq->pt->transactions_id; - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->transactions_sample_type); } @@ -1456,7 +1464,7 @@ static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->ptwrites_sample_type); } @@ -1486,7 +1494,7 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1511,7 +1519,7 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1536,7 +1544,7 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1561,7 +1569,7 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1586,7 +1594,7 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1680,15 +1688,14 @@ static u64 intel_pt_lbr_flags(u64 info) union { struct branch_flags flags; u64 result; - } u = { - .flags = { - .mispred = !!(info & LBR_INFO_MISPRED), - .predicted = !(info & LBR_INFO_MISPRED), - .in_tx = !!(info & LBR_INFO_IN_TX), - .abort = !!(info & LBR_INFO_ABORT), - .cycles = info & LBR_INFO_CYCLES, - } - }; + } u; + + u.result = 0; + u.flags.mispred = !!(info & LBR_INFO_MISPRED); + u.flags.predicted = !(info & LBR_INFO_MISPRED); + u.flags.in_tx = !!(info & LBR_INFO_IN_TX); + u.flags.abort = !!(info & LBR_INFO_ABORT); + u.flags.cycles = info & LBR_INFO_CYCLES; return u.result; } @@ -1718,9 +1725,6 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack, } } -/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */ -#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3) - static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) { const struct intel_pt_blk_items *items = &ptq->state->items; @@ -1796,23 +1800,18 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) } if (sample_type & PERF_SAMPLE_BRANCH_STACK) { - struct { - struct branch_stack br_stack; - struct branch_entry entries[LBRS_MAX]; - } br; - if (items->mask[INTEL_PT_LBR_0_POS] || items->mask[INTEL_PT_LBR_1_POS] || items->mask[INTEL_PT_LBR_2_POS]) { - intel_pt_add_lbrs(&br.br_stack, items); - sample.branch_stack = &br.br_stack; + intel_pt_add_lbrs(ptq->last_branch, items); } else if (pt->synth_opts.last_branch) { - intel_pt_copy_last_branch_rb(ptq); - sample.branch_stack = ptq->last_branch; + thread_stack__br_sample(ptq->thread, ptq->cpu, + ptq->last_branch, + pt->br_stack_sz); } else { - br.br_stack.nr = 0; - sample.branch_stack = &br.br_stack; + ptq->last_branch->nr = 0; } + sample.branch_stack = ptq->last_branch; } if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address) @@ -1842,7 +1841,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) sample.transaction = txn; } - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type); + return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); } static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, @@ -1992,12 +1991,15 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) if (!(state->type & INTEL_PT_BRANCH)) return 0; - if (pt->synth_opts.callchain || pt->synth_opts.thread_stack) - thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip, - state->to_ip, ptq->insn_len, - state->trace_nr); - else + if (pt->use_thread_stack) { + thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, + state->from_ip, state->to_ip, ptq->insn_len, + state->trace_nr, pt->callstack, + pt->br_stack_sz_plus, + pt->mispred_all); + } else { thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr); + } if (pt->sample_branches) { err = intel_pt_synth_branch_sample(ptq); @@ -2005,9 +2007,6 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) return err; } - if (pt->synth_opts.last_branch) - intel_pt_update_last_branch_rb(ptq); - if (!ptq->sync_switch) return 0; @@ -2484,7 +2483,7 @@ static int intel_pt_process_switch(struct intel_pt *pt, if (evsel != pt->switch_evsel) return 0; - tid = perf_evsel__intval(evsel, sample, "next_pid"); + tid = evsel__intval(evsel, sample, "next_pid"); cpu = sample->cpu; intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", @@ -2639,6 +2638,13 @@ static int intel_pt_process_event(struct perf_session *session, if (err) return err; + if (event->header.type == PERF_RECORD_SAMPLE) { + if (pt->synth_opts.add_callchain && !sample->callchain) + intel_pt_add_callchain(pt, sample); + if (pt->synth_opts.add_last_branch && !sample->branch_stack) + intel_pt_add_br_stack(pt, sample); + } + if (event->header.type == PERF_RECORD_AUX && (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && pt->synth_opts.errors) { @@ -2710,11 +2716,21 @@ static void intel_pt_free(struct perf_session *session) session->auxtrace = NULL; thread__put(pt->unknown_thread); addr_filters__exit(&pt->filts); + zfree(&pt->chain); zfree(&pt->filter); zfree(&pt->time_ranges); free(pt); } +static bool intel_pt_evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + + return evsel->core.attr.type == pt->pmu_type; +} + static int intel_pt_process_auxtrace_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool __maybe_unused) @@ -3016,7 +3032,7 @@ static struct evsel *intel_pt_find_sched_switch(struct evlist *evlist) struct evsel *evsel; evlist__for_each_entry_reverse(evlist, evsel) { - const char *name = perf_evsel__name(evsel); + const char *name = evsel__name(evsel); if (!strcmp(name, "sched:sched_switch")) return evsel; @@ -3310,6 +3326,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->auxtrace.flush_events = intel_pt_flush; pt->auxtrace.free_events = intel_pt_free_events; pt->auxtrace.free = intel_pt_free; + pt->auxtrace.evsel_is_auxtrace = intel_pt_evsel_is_auxtrace; session->auxtrace = &pt->auxtrace; if (dump_trace) @@ -3338,6 +3355,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, !session->itrace_synth_opts->inject) { pt->synth_opts.branches = false; pt->synth_opts.callchain = true; + pt->synth_opts.add_callchain = true; } pt->synth_opts.thread_stack = session->itrace_synth_opts->thread_stack; @@ -3370,14 +3388,54 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->branches_filter |= PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_BEGIN; - if (pt->synth_opts.callchain && !symbol_conf.use_callchain) { + if ((pt->synth_opts.callchain || pt->synth_opts.add_callchain) && + !symbol_conf.use_callchain) { symbol_conf.use_callchain = true; if (callchain_register_param(&callchain_param) < 0) { symbol_conf.use_callchain = false; pt->synth_opts.callchain = false; + pt->synth_opts.add_callchain = false; } } + if (pt->synth_opts.add_callchain) { + err = intel_pt_callchain_init(pt); + if (err) + goto err_delete_thread; + } + + if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) { + pt->br_stack_sz = pt->synth_opts.last_branch_sz; + pt->br_stack_sz_plus = pt->br_stack_sz; + } + + if (pt->synth_opts.add_last_branch) { + err = intel_pt_br_stack_init(pt); + if (err) + goto err_delete_thread; + /* + * Additional branch stack size to cater for tracing from the + * actual sample ip to where the sample time is recorded. + * Measured at about 200 branches, but generously set to 1024. + * If kernel space is not being traced, then add just 1 for the + * branch to kernel space. + */ + if (intel_pt_tracing_kernel(pt)) + pt->br_stack_sz_plus += 1024; + else + pt->br_stack_sz_plus += 1; + } + + pt->use_thread_stack = pt->synth_opts.callchain || + pt->synth_opts.add_callchain || + pt->synth_opts.thread_stack || + pt->synth_opts.last_branch || + pt->synth_opts.add_last_branch; + + pt->callstack = pt->synth_opts.callchain || + pt->synth_opts.add_callchain || + pt->synth_opts.thread_stack; + err = intel_pt_synth_events(pt, session); if (err) goto err_delete_thread; @@ -3400,6 +3458,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, return 0; err_delete_thread: + zfree(&pt->chain); thread__zput(pt->unknown_thread); err_free_queues: intel_pt_log_disable(); diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index e3ccb0ce1938..32bb05e03fb2 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -57,7 +57,7 @@ struct debug_line_info { unsigned long vma; unsigned int lineno; /* The filename format is unspecified, absolute path, relative etc. */ - char const filename[0]; + char const filename[]; }; struct jit_tool { diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h index f2c3823cc81a..ab2842def83d 100644 --- a/tools/perf/util/jitdump.h +++ b/tools/perf/util/jitdump.h @@ -93,7 +93,7 @@ struct debug_entry { uint64_t addr; int lineno; /* source line number starting at 1 */ int discrim; /* column discriminator, 0 is default */ - const char name[0]; /* null terminated filename, \xff\0 if same as previous entry */ + const char name[]; /* null terminated filename, \xff\0 if same as previous entry */ }; struct jr_code_debug_info { @@ -101,7 +101,7 @@ struct jr_code_debug_info { uint64_t code_addr; uint64_t nr_entry; - struct debug_entry entries[0]; + struct debug_entry entries[]; }; struct jr_code_unwinding_info { @@ -110,7 +110,7 @@ struct jr_code_unwinding_info { uint64_t unwinding_size; uint64_t eh_frame_hdr_size; uint64_t mapped_size; - const char unwinding_data[0]; + const char unwinding_data[]; }; union jr_entry { diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 97142e9671be..d5384807372b 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -736,6 +736,12 @@ int machine__process_switch_event(struct machine *machine __maybe_unused, return 0; } +static int is_bpf_image(const char *name) +{ + return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) == 0 || + strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1) == 0; +} + static int machine__process_ksymbol_register(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) @@ -759,6 +765,12 @@ static int machine__process_ksymbol_register(struct machine *machine, map->start = event->ksymbol.addr; map->end = map->start + event->ksymbol.len; maps__insert(&machine->kmaps, map); + dso__set_loaded(dso); + + if (is_bpf_image(event->ksymbol.name)) { + dso->binary_type = DSO_BINARY_TYPE__BPF_IMAGE; + dso__set_long_name(dso, "", false); + } } sym = symbol__new(map->map_ip(map, map->start), @@ -2178,6 +2190,303 @@ static int remove_loops(struct branch_entry *l, int nr, return nr; } +static int lbr_callchain_add_kernel_ip(struct thread *thread, + struct callchain_cursor *cursor, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + u64 branch_from, + bool callee, int end) +{ + struct ip_callchain *chain = sample->callchain; + u8 cpumode = PERF_RECORD_MISC_USER; + int err, i; + + if (callee) { + for (i = 0; i < end + 1; i++) { + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, chain->ips[i], + false, NULL, NULL, branch_from); + if (err) + return err; + } + return 0; + } + + for (i = end; i >= 0; i--) { + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, chain->ips[i], + false, NULL, NULL, branch_from); + if (err) + return err; + } + + return 0; +} + +static void save_lbr_cursor_node(struct thread *thread, + struct callchain_cursor *cursor, + int idx) +{ + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + + if (!lbr_stitch) + return; + + if (cursor->pos == cursor->nr) { + lbr_stitch->prev_lbr_cursor[idx].valid = false; + return; + } + + if (!cursor->curr) + cursor->curr = cursor->first; + else + cursor->curr = cursor->curr->next; + memcpy(&lbr_stitch->prev_lbr_cursor[idx], cursor->curr, + sizeof(struct callchain_cursor_node)); + + lbr_stitch->prev_lbr_cursor[idx].valid = true; + cursor->pos++; +} + +static int lbr_callchain_add_lbr_ip(struct thread *thread, + struct callchain_cursor *cursor, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + u64 *branch_from, + bool callee) +{ + struct branch_stack *lbr_stack = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); + u8 cpumode = PERF_RECORD_MISC_USER; + int lbr_nr = lbr_stack->nr; + struct branch_flags *flags; + int err, i; + u64 ip; + + /* + * The curr and pos are not used in writing session. They are cleared + * in callchain_cursor_commit() when the writing session is closed. + * Using curr and pos to track the current cursor node. + */ + if (thread->lbr_stitch) { + cursor->curr = NULL; + cursor->pos = cursor->nr; + if (cursor->nr) { + cursor->curr = cursor->first; + for (i = 0; i < (int)(cursor->nr - 1); i++) + cursor->curr = cursor->curr->next; + } + } + + if (callee) { + /* Add LBR ip from first entries.to */ + ip = entries[0].to; + flags = &entries[0].flags; + *branch_from = entries[0].from; + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + true, flags, NULL, + *branch_from); + if (err) + return err; + + /* + * The number of cursor node increases. + * Move the current cursor node. + * But does not need to save current cursor node for entry 0. + * It's impossible to stitch the whole LBRs of previous sample. + */ + if (thread->lbr_stitch && (cursor->pos != cursor->nr)) { + if (!cursor->curr) + cursor->curr = cursor->first; + else + cursor->curr = cursor->curr->next; + cursor->pos++; + } + + /* Add LBR ip from entries.from one by one. */ + for (i = 0; i < lbr_nr; i++) { + ip = entries[i].from; + flags = &entries[i].flags; + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + true, flags, NULL, + *branch_from); + if (err) + return err; + save_lbr_cursor_node(thread, cursor, i); + } + return 0; + } + + /* Add LBR ip from entries.from one by one. */ + for (i = lbr_nr - 1; i >= 0; i--) { + ip = entries[i].from; + flags = &entries[i].flags; + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + true, flags, NULL, + *branch_from); + if (err) + return err; + save_lbr_cursor_node(thread, cursor, i); + } + + /* Add LBR ip from first entries.to */ + ip = entries[0].to; + flags = &entries[0].flags; + *branch_from = entries[0].from; + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + true, flags, NULL, + *branch_from); + if (err) + return err; + + return 0; +} + +static int lbr_callchain_add_stitched_lbr_ip(struct thread *thread, + struct callchain_cursor *cursor) +{ + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + struct callchain_cursor_node *cnode; + struct stitch_list *stitch_node; + int err; + + list_for_each_entry(stitch_node, &lbr_stitch->lists, node) { + cnode = &stitch_node->cursor; + + err = callchain_cursor_append(cursor, cnode->ip, + &cnode->ms, + cnode->branch, + &cnode->branch_flags, + cnode->nr_loop_iter, + cnode->iter_cycles, + cnode->branch_from, + cnode->srcline); + if (err) + return err; + } + return 0; +} + +static struct stitch_list *get_stitch_node(struct thread *thread) +{ + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + struct stitch_list *stitch_node; + + if (!list_empty(&lbr_stitch->free_lists)) { + stitch_node = list_first_entry(&lbr_stitch->free_lists, + struct stitch_list, node); + list_del(&stitch_node->node); + + return stitch_node; + } + + return malloc(sizeof(struct stitch_list)); +} + +static bool has_stitched_lbr(struct thread *thread, + struct perf_sample *cur, + struct perf_sample *prev, + unsigned int max_lbr, + bool callee) +{ + struct branch_stack *cur_stack = cur->branch_stack; + struct branch_entry *cur_entries = perf_sample__branch_entries(cur); + struct branch_stack *prev_stack = prev->branch_stack; + struct branch_entry *prev_entries = perf_sample__branch_entries(prev); + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + int i, j, nr_identical_branches = 0; + struct stitch_list *stitch_node; + u64 cur_base, distance; + + if (!cur_stack || !prev_stack) + return false; + + /* Find the physical index of the base-of-stack for current sample. */ + cur_base = max_lbr - cur_stack->nr + cur_stack->hw_idx + 1; + + distance = (prev_stack->hw_idx > cur_base) ? (prev_stack->hw_idx - cur_base) : + (max_lbr + prev_stack->hw_idx - cur_base); + /* Previous sample has shorter stack. Nothing can be stitched. */ + if (distance + 1 > prev_stack->nr) + return false; + + /* + * Check if there are identical LBRs between two samples. + * Identicall LBRs must have same from, to and flags values. Also, + * they have to be saved in the same LBR registers (same physical + * index). + * + * Starts from the base-of-stack of current sample. + */ + for (i = distance, j = cur_stack->nr - 1; (i >= 0) && (j >= 0); i--, j--) { + if ((prev_entries[i].from != cur_entries[j].from) || + (prev_entries[i].to != cur_entries[j].to) || + (prev_entries[i].flags.value != cur_entries[j].flags.value)) + break; + nr_identical_branches++; + } + + if (!nr_identical_branches) + return false; + + /* + * Save the LBRs between the base-of-stack of previous sample + * and the base-of-stack of current sample into lbr_stitch->lists. + * These LBRs will be stitched later. + */ + for (i = prev_stack->nr - 1; i > (int)distance; i--) { + + if (!lbr_stitch->prev_lbr_cursor[i].valid) + continue; + + stitch_node = get_stitch_node(thread); + if (!stitch_node) + return false; + + memcpy(&stitch_node->cursor, &lbr_stitch->prev_lbr_cursor[i], + sizeof(struct callchain_cursor_node)); + + if (callee) + list_add(&stitch_node->node, &lbr_stitch->lists); + else + list_add_tail(&stitch_node->node, &lbr_stitch->lists); + } + + return true; +} + +static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr) +{ + if (thread->lbr_stitch) + return true; + + thread->lbr_stitch = zalloc(sizeof(*thread->lbr_stitch)); + if (!thread->lbr_stitch) + goto err; + + thread->lbr_stitch->prev_lbr_cursor = calloc(max_lbr + 1, sizeof(struct callchain_cursor_node)); + if (!thread->lbr_stitch->prev_lbr_cursor) + goto free_lbr_stitch; + + INIT_LIST_HEAD(&thread->lbr_stitch->lists); + INIT_LIST_HEAD(&thread->lbr_stitch->free_lists); + + return true; + +free_lbr_stitch: + zfree(&thread->lbr_stitch); +err: + pr_warning("Failed to allocate space for stitched LBRs. Disable LBR stitch\n"); + thread->lbr_stitch_enable = false; + return false; +} + /* * Recolve LBR callstack chain sample * Return: @@ -2190,12 +2499,16 @@ static int resolve_lbr_callchain_sample(struct thread *thread, struct perf_sample *sample, struct symbol **parent, struct addr_location *root_al, - int max_stack) + int max_stack, + unsigned int max_lbr) { + bool callee = (callchain_param.order == ORDER_CALLEE); struct ip_callchain *chain = sample->callchain; int chain_nr = min(max_stack, (int)chain->nr), i; - u8 cpumode = PERF_RECORD_MISC_USER; - u64 ip, branch_from = 0; + struct lbr_stitch *lbr_stitch; + bool stitched_lbr = false; + u64 branch_from = 0; + int err; for (i = 0; i < chain_nr; i++) { if (chain->ips[i] == PERF_CONTEXT_USER) @@ -2203,71 +2516,65 @@ static int resolve_lbr_callchain_sample(struct thread *thread, } /* LBR only affects the user callchain */ - if (i != chain_nr) { - struct branch_stack *lbr_stack = sample->branch_stack; - struct branch_entry *entries = perf_sample__branch_entries(sample); - int lbr_nr = lbr_stack->nr, j, k; - bool branch; - struct branch_flags *flags; - /* - * LBR callstack can only get user call chain. - * The mix_chain_nr is kernel call chain - * number plus LBR user call chain number. - * i is kernel call chain number, - * 1 is PERF_CONTEXT_USER, - * lbr_nr + 1 is the user call chain number. - * For details, please refer to the comments - * in callchain__printf - */ - int mix_chain_nr = i + 1 + lbr_nr + 1; + if (i == chain_nr) + return 0; - for (j = 0; j < mix_chain_nr; j++) { - int err; - branch = false; - flags = NULL; + if (thread->lbr_stitch_enable && !sample->no_hw_idx && + (max_lbr > 0) && alloc_lbr_stitch(thread, max_lbr)) { + lbr_stitch = thread->lbr_stitch; - if (callchain_param.order == ORDER_CALLEE) { - if (j < i + 1) - ip = chain->ips[j]; - else if (j > i + 1) { - k = j - i - 2; - ip = entries[k].from; - branch = true; - flags = &entries[k].flags; - } else { - ip = entries[0].to; - branch = true; - flags = &entries[0].flags; - branch_from = entries[0].from; - } - } else { - if (j < lbr_nr) { - k = lbr_nr - j - 1; - ip = entries[k].from; - branch = true; - flags = &entries[k].flags; - } - else if (j > lbr_nr) - ip = chain->ips[i + 1 - (j - lbr_nr)]; - else { - ip = entries[0].to; - branch = true; - flags = &entries[0].flags; - branch_from = entries[0].from; - } - } + stitched_lbr = has_stitched_lbr(thread, sample, + &lbr_stitch->prev_sample, + max_lbr, callee); - err = add_callchain_ip(thread, cursor, parent, - root_al, &cpumode, ip, - branch, flags, NULL, - branch_from); + if (!stitched_lbr && !list_empty(&lbr_stitch->lists)) { + list_replace_init(&lbr_stitch->lists, + &lbr_stitch->free_lists); + } + memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample)); + } + + if (callee) { + /* Add kernel ip */ + err = lbr_callchain_add_kernel_ip(thread, cursor, sample, + parent, root_al, branch_from, + true, i); + if (err) + goto error; + + err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent, + root_al, &branch_from, true); + if (err) + goto error; + + if (stitched_lbr) { + err = lbr_callchain_add_stitched_lbr_ip(thread, cursor); if (err) - return (err < 0) ? err : 0; + goto error; } - return 1; + + } else { + if (stitched_lbr) { + err = lbr_callchain_add_stitched_lbr_ip(thread, cursor); + if (err) + goto error; + } + err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent, + root_al, &branch_from, false); + if (err) + goto error; + + /* Add kernel ip */ + err = lbr_callchain_add_kernel_ip(thread, cursor, sample, + parent, root_al, branch_from, + false, i); + if (err) + goto error; } + return 1; - return 0; +error: + return (err < 0) ? err : 0; } static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, @@ -2311,9 +2618,12 @@ static int thread__resolve_callchain_sample(struct thread *thread, if (chain) chain_nr = chain->nr; - if (perf_evsel__has_branch_callstack(evsel)) { + if (evsel__has_branch_callstack(evsel)) { + struct perf_env *env = evsel__env(evsel); + err = resolve_lbr_callchain_sample(thread, cursor, sample, parent, - root_al, max_stack); + root_al, max_stack, + !env ? 0 : env->max_branches); if (err) return (err < 0) ? err : 0; } diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index aa29589f6904..ea0af0bc4314 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -103,6 +103,21 @@ int perf_mem_events__init(void) return found ? 0 : -ENOENT; } +void perf_mem_events__list(void) +{ + int j; + + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + struct perf_mem_event *e = &perf_mem_events[j]; + + fprintf(stderr, "%-13s%-*s%s\n", + e->tag, + verbose > 0 ? 25 : 0, + verbose > 0 ? perf_mem_events__name(j) : "", + e->supported ? ": available" : ""); + } +} + static const char * const tlb_access[] = { "N/A", "HIT", diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index f1389bdae7bf..904dad34f7f7 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -39,6 +39,8 @@ int perf_mem_events__init(void); char *perf_mem_events__name(int i); +void perf_mem_events__list(void); + struct mem_info; int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c index 797d86a1ab09..c84f5841c7ab 100644 --- a/tools/perf/util/mem2node.c +++ b/tools/perf/util/mem2node.c @@ -1,5 +1,6 @@ #include <errno.h> #include <inttypes.h> +#include <asm/bug.h> #include <linux/bitmap.h> #include <linux/kernel.h> #include <linux/zalloc.h> @@ -95,7 +96,7 @@ int mem2node__init(struct mem2node *map, struct perf_env *env) /* Cut unused entries, due to merging. */ tmp_entries = realloc(entries, sizeof(*entries) * j); - if (tmp_entries) + if (tmp_entries || WARN_ON_ONCE(j == 0)) entries = tmp_entries; for (i = 0; i < j; i++) { diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 926449a7cdbf..9e21aa767e41 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -85,49 +85,103 @@ static void metricgroup__rblist_init(struct rblist *metric_events) struct egroup { struct list_head nd; - int idnum; - const char **ids; + struct expr_parse_ctx pctx; const char *metric_name; const char *metric_expr; const char *metric_unit; + int runtime; + bool has_constraint; }; +/** + * Find a group of events in perf_evlist that correpond to those from a parsed + * metric expression. Note, as find_evsel_group is called in the same order as + * perf_evlist was constructed, metric_no_merge doesn't need to test for + * underfilling a group. + * @perf_evlist: a list of events something like: {metric1 leader, metric1 + * sibling, metric1 sibling}:W,duration_time,{metric2 leader, metric2 sibling, + * metric2 sibling}:W,duration_time + * @pctx: the parse context for the metric expression. + * @metric_no_merge: don't attempt to share events for the metric with other + * metrics. + * @has_constraint: is there a contraint on the group of events? In which case + * the events won't be grouped. + * @metric_events: out argument, null terminated array of evsel's associated + * with the metric. + * @evlist_used: in/out argument, bitmap tracking which evlist events are used. + * @return the first metric event or NULL on failure. + */ static struct evsel *find_evsel_group(struct evlist *perf_evlist, - const char **ids, - int idnum, + struct expr_parse_ctx *pctx, + bool metric_no_merge, + bool has_constraint, struct evsel **metric_events, - bool *evlist_used) + unsigned long *evlist_used) { - struct evsel *ev; - int i = 0, j = 0; - bool leader_found; + struct evsel *ev, *current_leader = NULL; + double *val_ptr; + int i = 0, matched_events = 0, events_to_match; + const int idnum = (int)hashmap__size(&pctx->ids); + + /* duration_time is grouped separately. */ + if (!has_constraint && + hashmap__find(&pctx->ids, "duration_time", (void **)&val_ptr)) + events_to_match = idnum - 1; + else + events_to_match = idnum; evlist__for_each_entry (perf_evlist, ev) { - if (evlist_used[j++]) + /* + * Events with a constraint aren't grouped and match the first + * events available. + */ + if (has_constraint && ev->weak_group) continue; - if (!strcmp(ev->name, ids[i])) { - if (!metric_events[i]) - metric_events[i] = ev; - i++; - if (i == idnum) - break; - } else { - /* Discard the whole match and start again */ - i = 0; + /* Ignore event if already used and merging is disabled. */ + if (metric_no_merge && test_bit(ev->idx, evlist_used)) + continue; + if (!has_constraint && ev->leader != current_leader) { + /* + * Start of a new group, discard the whole match and + * start again. + */ + matched_events = 0; memset(metric_events, 0, sizeof(struct evsel *) * idnum); + current_leader = ev->leader; + } + if (hashmap__find(&pctx->ids, ev->name, (void **)&val_ptr)) { + if (has_constraint) { + /* + * Events aren't grouped, ensure the same event + * isn't matched from two groups. + */ + for (i = 0; i < matched_events; i++) { + if (!strcmp(ev->name, + metric_events[i]->name)) { + break; + } + } + if (i != matched_events) + continue; + } + metric_events[matched_events++] = ev; + } + if (matched_events == events_to_match) + break; + } - if (!strcmp(ev->name, ids[i])) { - if (!metric_events[i]) - metric_events[i] = ev; - i++; - if (i == idnum) - break; + if (events_to_match != idnum) { + /* Add the first duration_time. */ + evlist__for_each_entry(perf_evlist, ev) { + if (!strcmp(ev->name, "duration_time")) { + metric_events[matched_events++] = ev; + break; } } } - if (i != idnum) { + if (matched_events != idnum) { /* Not whole match */ return NULL; } @@ -135,25 +189,16 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, metric_events[idnum] = NULL; for (i = 0; i < idnum; i++) { - leader_found = false; - evlist__for_each_entry(perf_evlist, ev) { - if (!leader_found && (ev == metric_events[i])) - leader_found = true; - - if (leader_found && - !strcmp(ev->name, metric_events[i]->name)) { - ev->metric_leader = metric_events[i]; - } - j++; - } ev = metric_events[i]; - evlist_used[ev->idx] = true; + ev->metric_leader = ev; + set_bit(ev->idx, evlist_used); } return metric_events[0]; } static int metricgroup__setup_events(struct list_head *groups, + bool metric_no_merge, struct evlist *perf_evlist, struct rblist *metric_events_list) { @@ -162,50 +207,61 @@ static int metricgroup__setup_events(struct list_head *groups, int i = 0; int ret = 0; struct egroup *eg; - struct evsel *evsel; - bool *evlist_used; + struct evsel *evsel, *tmp; + unsigned long *evlist_used; - evlist_used = calloc(perf_evlist->core.nr_entries, sizeof(bool)); - if (!evlist_used) { - ret = -ENOMEM; - return ret; - } + evlist_used = bitmap_alloc(perf_evlist->core.nr_entries); + if (!evlist_used) + return -ENOMEM; list_for_each_entry (eg, groups, nd) { struct evsel **metric_events; - metric_events = calloc(sizeof(void *), eg->idnum + 1); + metric_events = calloc(sizeof(void *), + hashmap__size(&eg->pctx.ids) + 1); if (!metric_events) { ret = -ENOMEM; break; } - evsel = find_evsel_group(perf_evlist, eg->ids, eg->idnum, - metric_events, evlist_used); + evsel = find_evsel_group(perf_evlist, &eg->pctx, + metric_no_merge, + eg->has_constraint, metric_events, + evlist_used); if (!evsel) { pr_debug("Cannot resolve %s: %s\n", eg->metric_name, eg->metric_expr); + free(metric_events); continue; } - for (i = 0; i < eg->idnum; i++) + for (i = 0; metric_events[i]; i++) metric_events[i]->collect_stat = true; me = metricgroup__lookup(metric_events_list, evsel, true); if (!me) { ret = -ENOMEM; + free(metric_events); break; } expr = malloc(sizeof(struct metric_expr)); if (!expr) { ret = -ENOMEM; + free(metric_events); break; } expr->metric_expr = eg->metric_expr; expr->metric_name = eg->metric_name; expr->metric_unit = eg->metric_unit; expr->metric_events = metric_events; + expr->runtime = eg->runtime; list_add(&expr->nd, &me->head); } - free(evlist_used); + evlist__for_each_entry_safe(perf_evlist, tmp, evsel) { + if (!test_bit(evsel->idx, evlist_used)) { + evlist__remove(perf_evlist, evsel); + evsel__delete(evsel); + } + } + bitmap_free(evlist_used); return ret; } @@ -413,43 +469,49 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, } static void metricgroup__add_metric_weak_group(struct strbuf *events, - const char **ids, - int idnum) + struct expr_parse_ctx *ctx) { - bool no_group = false; - int i; + struct hashmap_entry *cur; + size_t bkt; + bool no_group = true, has_duration = false; - for (i = 0; i < idnum; i++) { - pr_debug("found event %s\n", ids[i]); + hashmap__for_each_entry((&ctx->ids), cur, bkt) { + pr_debug("found event %s\n", (const char *)cur->key); /* * Duration time maps to a software event and can make * groups not count. Always use it outside a * group. */ - if (!strcmp(ids[i], "duration_time")) { - if (i > 0) - strbuf_addf(events, "}:W,"); - strbuf_addf(events, "duration_time"); - no_group = true; + if (!strcmp(cur->key, "duration_time")) { + has_duration = true; continue; } strbuf_addf(events, "%s%s", - i == 0 || no_group ? "{" : ",", - ids[i]); + no_group ? "{" : ",", + (const char *)cur->key); no_group = false; } - if (!no_group) + if (!no_group) { strbuf_addf(events, "}:W"); + if (has_duration) + strbuf_addf(events, ",duration_time"); + } else if (has_duration) + strbuf_addf(events, "duration_time"); } static void metricgroup__add_metric_non_group(struct strbuf *events, - const char **ids, - int idnum) + struct expr_parse_ctx *ctx) { - int i; - - for (i = 0; i < idnum; i++) - strbuf_addf(events, ",%s", ids[i]); + struct hashmap_entry *cur; + size_t bkt; + bool first = true; + + hashmap__for_each_entry((&ctx->ids), cur, bkt) { + if (!first) + strbuf_addf(events, ","); + strbuf_addf(events, "%s", (const char *)cur->key); + first = false; + } } static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) @@ -485,12 +547,63 @@ static bool metricgroup__has_constraint(struct pmu_event *pe) return false; } -static int metricgroup__add_metric(const char *metric, struct strbuf *events, +int __weak arch_get_runtimeparam(void) +{ + return 1; +} + +static int __metricgroup__add_metric(struct list_head *group_list, + struct pmu_event *pe, + bool metric_no_group, + int runtime) +{ + struct egroup *eg; + + eg = malloc(sizeof(*eg)); + if (!eg) + return -ENOMEM; + + expr__ctx_init(&eg->pctx); + eg->metric_name = pe->metric_name; + eg->metric_expr = pe->metric_expr; + eg->metric_unit = pe->unit; + eg->runtime = runtime; + eg->has_constraint = metric_no_group || metricgroup__has_constraint(pe); + + if (expr__find_other(pe->metric_expr, NULL, &eg->pctx, runtime) < 0) { + expr__ctx_clear(&eg->pctx); + free(eg); + return -EINVAL; + } + + if (list_empty(group_list)) + list_add(&eg->nd, group_list); + else { + struct list_head *pos; + + /* Place the largest groups at the front. */ + list_for_each_prev(pos, group_list) { + struct egroup *old = list_entry(pos, struct egroup, nd); + + if (hashmap__size(&eg->pctx.ids) <= + hashmap__size(&old->pctx.ids)) + break; + } + list_add(&eg->nd, pos); + } + + return 0; +} + +static int metricgroup__add_metric(const char *metric, bool metric_no_group, + struct strbuf *events, struct list_head *group_list) { struct pmu_events_map *map = perf_pmu__find_map(NULL); struct pmu_event *pe; - int i, ret = -EINVAL; + struct egroup *eg; + int i, ret; + bool has_match = false; if (!map) return 0; @@ -498,47 +611,63 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, for (i = 0; ; i++) { pe = &map->table[i]; - if (!pe->name && !pe->metric_group && !pe->metric_name) + if (!pe->name && !pe->metric_group && !pe->metric_name) { + /* End of pmu events. */ + if (!has_match) + return -EINVAL; break; + } if (!pe->metric_expr) continue; if (match_metric(pe->metric_group, metric) || match_metric(pe->metric_name, metric)) { - const char **ids; - int idnum; - struct egroup *eg; - + has_match = true; pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); - if (expr__find_other(pe->metric_expr, - NULL, &ids, &idnum) < 0) - continue; - if (events->len > 0) - strbuf_addf(events, ","); - - if (metricgroup__has_constraint(pe)) - metricgroup__add_metric_non_group(events, ids, idnum); - else - metricgroup__add_metric_weak_group(events, ids, idnum); - - eg = malloc(sizeof(struct egroup)); - if (!eg) { - ret = -ENOMEM; - break; + if (!strstr(pe->metric_expr, "?")) { + ret = __metricgroup__add_metric(group_list, + pe, + metric_no_group, + 1); + if (ret) + return ret; + } else { + int j, count; + + count = arch_get_runtimeparam(); + + /* This loop is added to create multiple + * events depend on count value and add + * those events to group_list. + */ + + for (j = 0; j < count; j++) { + ret = __metricgroup__add_metric( + group_list, pe, + metric_no_group, j); + if (ret) + return ret; + } } - eg->ids = ids; - eg->idnum = idnum; - eg->metric_name = pe->metric_name; - eg->metric_expr = pe->metric_expr; - eg->metric_unit = pe->unit; - list_add_tail(&eg->nd, group_list); - ret = 0; } } - return ret; + list_for_each_entry(eg, group_list, nd) { + if (events->len > 0) + strbuf_addf(events, ","); + + if (eg->has_constraint) { + metricgroup__add_metric_non_group(events, + &eg->pctx); + } else { + metricgroup__add_metric_weak_group(events, + &eg->pctx); + } + } + return 0; } -static int metricgroup__add_metric_list(const char *list, struct strbuf *events, +static int metricgroup__add_metric_list(const char *list, bool metric_no_group, + struct strbuf *events, struct list_head *group_list) { char *llist, *nlist, *p; @@ -553,7 +682,8 @@ static int metricgroup__add_metric_list(const char *list, struct strbuf *events, strbuf_addf(events, "%s", ""); while ((p = strsep(&llist, ",")) != NULL) { - ret = metricgroup__add_metric(p, events, group_list); + ret = metricgroup__add_metric(p, metric_no_group, events, + group_list); if (ret == -EINVAL) { fprintf(stderr, "Cannot find metric or group `%s'\n", p); @@ -571,20 +701,19 @@ static int metricgroup__add_metric_list(const char *list, struct strbuf *events, static void metricgroup__free_egroups(struct list_head *group_list) { struct egroup *eg, *egtmp; - int i; list_for_each_entry_safe (eg, egtmp, group_list, nd) { - for (i = 0; i < eg->idnum; i++) - zfree(&eg->ids[i]); - zfree(&eg->ids); + expr__ctx_clear(&eg->pctx); list_del_init(&eg->nd); free(eg); } } int metricgroup__parse_groups(const struct option *opt, - const char *str, - struct rblist *metric_events) + const char *str, + bool metric_no_group, + bool metric_no_merge, + struct rblist *metric_events) { struct parse_events_error parse_error; struct evlist *perf_evlist = *(struct evlist **)opt->value; @@ -594,7 +723,8 @@ int metricgroup__parse_groups(const struct option *opt, if (metric_events->nr_entries == 0) metricgroup__rblist_init(metric_events); - ret = metricgroup__add_metric_list(str, &extra_events, &group_list); + ret = metricgroup__add_metric_list(str, metric_no_group, + &extra_events, &group_list); if (ret) return ret; pr_debug("adding %s\n", extra_events.buf); @@ -605,8 +735,8 @@ int metricgroup__parse_groups(const struct option *opt, goto out; } strbuf_release(&extra_events); - ret = metricgroup__setup_events(&group_list, perf_evlist, - metric_events); + ret = metricgroup__setup_events(&group_list, metric_no_merge, + perf_evlist, metric_events); out: metricgroup__free_egroups(&group_list); return ret; diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 475c7f912864..287850bcdeca 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -22,16 +22,20 @@ struct metric_expr { const char *metric_name; const char *metric_unit; struct evsel **metric_events; + int runtime; }; struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct evsel *evsel, bool create); int metricgroup__parse_groups(const struct option *opt, - const char *str, - struct rblist *metric_events); + const char *str, + bool metric_no_group, + bool metric_no_merge, + struct rblist *metric_events); void metricgroup__print(bool metrics, bool groups, char *filter, bool raw, bool details); bool metricgroup__has_metric(const char *metric); +int arch_get_runtimeparam(void); #endif diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 359db2b1fcef..48c8f609441b 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -314,7 +314,7 @@ static int __ordered_events__flush(struct ordered_events *oe, enum oe_flush how, case OE_FLUSH__NONE: default: break; - }; + } pr_oe_time(oe->next_flush, "next_flush - ordered_events__flush PRE %s, nr_events %u\n", str[how], oe->nr_events); diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index 0920fb0ec6cc..75345946c4b9 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -29,7 +29,7 @@ typedef int (*ordered_events__deliver_t)(struct ordered_events *oe, struct ordered_events_buffer { struct list_head list; - struct ordered_event event[0]; + struct ordered_event event[]; }; struct ordered_events { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 10107747b361..3decbb203846 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -26,7 +26,7 @@ #include <api/fs/tracing_path.h> #include <perf/cpumap.h> #include "parse-events-bison.h" -#define YY_EXTRA_TYPE int +#define YY_EXTRA_TYPE void* #include "parse-events-flex.h" #include "pmu.h" #include "thread_map.h" @@ -36,6 +36,7 @@ #include "metricgroup.h" #include "util/evsel_config.h" #include "util/event.h" +#include "util/pfm.h" #define MAX_NAME_LEN 100 @@ -204,7 +205,8 @@ void parse_events__handle_error(struct parse_events_error *err, int idx, err->help = help; break; default: - WARN_ONCE(1, "WARNING: multiple event parsing errors\n"); + pr_debug("Multiple errors dropping message: %s (%s)\n", + err->str, err->help); free(err->str); err->str = str; free(err->help); @@ -344,6 +346,7 @@ static char *get_config_name(struct list_head *head_terms) static struct evsel * __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, + bool init_attr, char *name, struct perf_pmu *pmu, struct list_head *config_terms, bool auto_merge_stats, const char *cpu_list) @@ -352,9 +355,10 @@ __add_event(struct list_head *list, int *idx, struct perf_cpu_map *cpus = pmu ? pmu->cpus : cpu_list ? perf_cpu_map__new(cpu_list) : NULL; - event_attr_init(attr); + if (init_attr) + event_attr_init(attr); - evsel = perf_evsel__new_idx(attr, *idx); + evsel = evsel__new_idx(attr, *idx); if (!evsel) return NULL; @@ -370,15 +374,25 @@ __add_event(struct list_head *list, int *idx, if (config_terms) list_splice(config_terms, &evsel->config_terms); - list_add_tail(&evsel->core.node, list); + if (list) + list_add_tail(&evsel->core.node, list); + return evsel; } +struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, + char *name, struct perf_pmu *pmu) +{ + return __add_event(NULL, &idx, attr, false, name, pmu, NULL, false, + NULL); +} + static int add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, char *name, struct list_head *config_terms) { - return __add_event(list, idx, attr, name, NULL, config_terms, false, NULL) ? 0 : -ENOMEM; + return __add_event(list, idx, attr, true, name, NULL, config_terms, + false, NULL) ? 0 : -ENOMEM; } static int add_event_tool(struct list_head *list, int *idx, @@ -390,7 +404,8 @@ static int add_event_tool(struct list_head *list, int *idx, .config = PERF_COUNT_SW_DUMMY, }; - evsel = __add_event(list, idx, &attr, NULL, NULL, NULL, false, "0"); + evsel = __add_event(list, idx, &attr, true, NULL, NULL, NULL, false, + "0"); if (!evsel) return -ENOMEM; evsel->tool_event = tool_event; @@ -399,13 +414,13 @@ static int add_event_tool(struct list_head *list, int *idx, return 0; } -static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) +static int parse_aliases(char *str, const char *names[][EVSEL__MAX_ALIASES], int size) { int i, j; int n, longest = -1; for (i = 0; i < size; i++) { - for (j = 0; j < PERF_EVSEL__MAX_ALIASES && names[i][j]; j++) { + for (j = 0; j < EVSEL__MAX_ALIASES && names[i][j]; j++) { n = strlen(names[i][j]); if (n > longest && !strncasecmp(str, names[i][j], n)) longest = n; @@ -444,8 +459,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, * No fallback - if we cannot get a clear cache type * then bail out: */ - cache_type = parse_aliases(type, perf_evsel__hw_cache, - PERF_COUNT_HW_CACHE_MAX); + cache_type = parse_aliases(type, evsel__hw_cache, PERF_COUNT_HW_CACHE_MAX); if (cache_type == -1) return -EINVAL; @@ -458,17 +472,17 @@ int parse_events_add_cache(struct list_head *list, int *idx, n += snprintf(name + n, MAX_NAME_LEN - n, "-%s", str); if (cache_op == -1) { - cache_op = parse_aliases(str, perf_evsel__hw_cache_op, + cache_op = parse_aliases(str, evsel__hw_cache_op, PERF_COUNT_HW_CACHE_OP_MAX); if (cache_op >= 0) { - if (!perf_evsel__is_cache_op_valid(cache_type, cache_op)) + if (!evsel__is_cache_op_valid(cache_type, cache_op)) return -EINVAL; continue; } } if (cache_result == -1) { - cache_result = parse_aliases(str, perf_evsel__hw_cache_result, + cache_result = parse_aliases(str, evsel__hw_cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX); if (cache_result >= 0) continue; @@ -538,9 +552,8 @@ static int add_tracepoint(struct list_head *list, int *idx, struct parse_events_error *err, struct list_head *head_config) { - struct evsel *evsel; + struct evsel *evsel = evsel__newtp_idx(sys_name, evt_name, (*idx)++); - evsel = perf_evsel__newtp_idx(sys_name, evt_name, (*idx)++); if (IS_ERR(evsel)) { tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name); return PTR_ERR(evsel); @@ -1214,14 +1227,14 @@ static int get_config_terms(struct list_head *head_config, struct list_head *head_terms __maybe_unused) { #define ADD_CONFIG_TERM(__type, __weak) \ - struct perf_evsel_config_term *__t; \ + struct evsel_config_term *__t; \ \ __t = zalloc(sizeof(*__t)); \ if (!__t) \ return -ENOMEM; \ \ INIT_LIST_HEAD(&__t->list); \ - __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \ + __t->type = EVSEL__CONFIG_TERM_ ## __type; \ __t->weak = __weak; \ list_add_tail(&__t->list, head_terms) @@ -1312,7 +1325,7 @@ do { \ } /* - * Add PERF_EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for + * Add EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for * each bit of attr->config that the user has changed. */ static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config, @@ -1400,10 +1413,10 @@ int parse_events_add_tool(struct parse_events_state *parse_state, static bool config_term_percore(struct list_head *config_terms) { - struct perf_evsel_config_term *term; + struct evsel_config_term *term; list_for_each_entry(term, config_terms, list) { - if (term->type == PERF_EVSEL__CONFIG_TERM_PERCORE) + if (term->type == EVSEL__CONFIG_TERM_PERCORE) return term->val.percore; } @@ -1424,6 +1437,19 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, bool use_uncore_alias; LIST_HEAD(config_terms); + if (verbose > 1) { + fprintf(stderr, "Attempting to add event pmu '%s' with '", + name); + if (head_config) { + struct parse_events_term *term; + + list_for_each_entry(term, head_config, list) { + fprintf(stderr, "%s,", term->config); + } + } + fprintf(stderr, "' that may result in non-fatal errors\n"); + } + pmu = perf_pmu__find(name); if (!pmu) { char *err_str; @@ -1446,8 +1472,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (!head_config) { attr.type = pmu->type; - evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, - auto_merge_stats, NULL); + evsel = __add_event(list, &parse_state->idx, &attr, true, NULL, + pmu, NULL, auto_merge_stats, NULL); if (evsel) { evsel->pmu_name = name ? strdup(name) : NULL; evsel->use_uncore_alias = use_uncore_alias; @@ -1460,6 +1486,19 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (perf_pmu__check_alias(pmu, head_config, &info)) return -EINVAL; + if (verbose > 1) { + fprintf(stderr, "After aliases, add event pmu '%s' with '", + name); + if (head_config) { + struct parse_events_term *term; + + list_for_each_entry(term, head_config, list) { + fprintf(stderr, "%s,", term->config); + } + } + fprintf(stderr, "' that may result in non-fatal errors\n"); + } + /* * Configure hardcoded terms first, no need to check * return value when called with fail == 0 ;) @@ -1478,16 +1517,18 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, return -ENOMEM; if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { - struct perf_evsel_config_term *pos, *tmp; + struct evsel_config_term *pos, *tmp; list_for_each_entry_safe(pos, tmp, &config_terms, list) { list_del_init(&pos->list); + if (pos->free_str) + zfree(&pos->val.str); free(pos); } return -EINVAL; } - evsel = __add_event(list, &parse_state->idx, &attr, + evsel = __add_event(list, &parse_state->idx, &attr, true, get_config_name(head_config), pmu, &config_terms, auto_merge_stats, NULL); if (evsel) { @@ -1629,12 +1670,11 @@ parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list, * event. That can be used to distinguish the leader from * other members, even they have the same event name. */ - if ((leader != evsel) && (leader->pmu_name == evsel->pmu_name)) { + if ((leader != evsel) && + !strcmp(leader->pmu_name, evsel->pmu_name)) { is_leader = false; continue; } - /* The name is always alias name */ - WARN_ON(strcmp(leader->name, evsel->name)); /* Store the leader event for each PMU */ leaders[nr_pmu++] = (uintptr_t) evsel; @@ -1870,7 +1910,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) evsel->precise_max = mod.precise_max; evsel->weak_group = mod.weak; - if (perf_evsel__is_group_leader(evsel)) + if (evsel__is_group_leader(evsel)) evsel->core.attr.pinned = mod.pinned; } @@ -2001,13 +2041,14 @@ perf_pmu__parse_check(const char *name) return r ? r->type : PMU_EVENT_SYMBOL_ERR; } -static int parse_events__scanner(const char *str, void *parse_state, int start_token) +static int parse_events__scanner(const char *str, + struct parse_events_state *parse_state) { YY_BUFFER_STATE buffer; void *scanner; int ret; - ret = parse_events_lex_init_extra(start_token, &scanner); + ret = parse_events_lex_init_extra(parse_state, &scanner); if (ret) return ret; @@ -2015,6 +2056,7 @@ static int parse_events__scanner(const char *str, void *parse_state, int start_t #ifdef PARSER_DEBUG parse_events_debug = 1; + parse_events_set_debug(1, scanner); #endif ret = parse_events_parse(parse_state, scanner); @@ -2030,11 +2072,12 @@ static int parse_events__scanner(const char *str, void *parse_state, int start_t int parse_events_terms(struct list_head *terms, const char *str) { struct parse_events_state parse_state = { - .terms = NULL, + .terms = NULL, + .stoken = PE_START_TERMS, }; int ret; - ret = parse_events__scanner(str, &parse_state, PE_START_TERMS); + ret = parse_events__scanner(str, &parse_state); if (!ret) { list_splice(parse_state.terms, terms); zfree(&parse_state.terms); @@ -2053,10 +2096,11 @@ int parse_events(struct evlist *evlist, const char *str, .idx = evlist->core.nr_entries, .error = err, .evlist = evlist, + .stoken = PE_START_EVENTS, }; int ret; - ret = parse_events__scanner(str, &parse_state, PE_START_EVENTS); + ret = parse_events__scanner(str, &parse_state); perf_pmu__parse_cleanup(); if (!ret && list_empty(&parse_state.list)) { @@ -2190,6 +2234,29 @@ int parse_events_option(const struct option *opt, const char *str, return ret; } +int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset) +{ + struct evlist **evlistp = opt->value; + int ret; + + if (*evlistp == NULL) { + *evlistp = evlist__new(); + + if (*evlistp == NULL) { + fprintf(stderr, "Not enough memory to create evlist\n"); + return -1; + } + } + + ret = parse_events_option(opt, str, unset); + if (ret) { + evlist__delete(*evlistp); + *evlistp = NULL; + } + + return ret; +} + static int foreach_evsel_in_last_glob(struct evlist *evlist, int (*func)(struct evsel *evsel, @@ -2237,7 +2304,7 @@ static int set_filter(struct evsel *evsel, const void *arg) } if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) { - if (perf_evsel__append_tp_filter(evsel, str) < 0) { + if (evsel__append_tp_filter(evsel, str) < 0) { fprintf(stderr, "not enough memory to hold filter string\n"); return -1; @@ -2262,7 +2329,7 @@ static int set_filter(struct evsel *evsel, const void *arg) return -1; } - if (perf_evsel__append_addr_filter(evsel, str) < 0) { + if (evsel__append_addr_filter(evsel, str) < 0) { fprintf(stderr, "not enough memory to hold filter string\n"); return -1; @@ -2293,7 +2360,7 @@ static int add_exclude_perf_filter(struct evsel *evsel, snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid()); - if (perf_evsel__append_tp_filter(evsel, new_filter) < 0) { + if (evsel__append_tp_filter(evsel, new_filter) < 0) { fprintf(stderr, "not enough memory to hold filter string\n"); return -1; @@ -2603,12 +2670,11 @@ restart: for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { /* skip invalid cache type */ - if (!perf_evsel__is_cache_op_valid(type, op)) + if (!evsel__is_cache_op_valid(type, op)) continue; for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - __perf_evsel__hw_cache_type_op_res_name(type, op, i, - name, sizeof(name)); + __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); if (event_glob != NULL && !strglobmatch(name, event_glob)) continue; @@ -2794,6 +2860,8 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_sdt_events(NULL, NULL, name_only); metricgroup__print(true, true, NULL, name_only, details_flag); + + print_libpfm_events(name_only, long_desc); } int parse_events__is_hardcoded_term(struct parse_events_term *term) diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 27596cbd0ba0..1fe23a2f9b36 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -17,6 +17,7 @@ struct evlist; struct parse_events_error; struct option; +struct perf_pmu; struct tracepoint_path { char *system; @@ -31,6 +32,7 @@ bool have_tracepoints(struct list_head *evlist); const char *event_type(int type); int parse_events_option(const struct option *opt, const char *str, int unset); +int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset); int parse_events(struct evlist *evlist, const char *str, struct parse_events_error *error); int parse_events_terms(struct list_head *terms, const char *str); @@ -127,6 +129,7 @@ struct parse_events_state { struct parse_events_error *error; struct evlist *evlist; struct list_head *terms; + int stoken; }; void parse_events__handle_error(struct parse_events_error *err, int idx, @@ -186,6 +189,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, bool auto_merge_stats, bool use_alias); +struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, + char *name, struct perf_pmu *pmu); + int parse_events_multi_pmu_add(struct parse_events_state *parse_state, char *str, struct list_head **listp); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index baa48f28d57d..002802e17059 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -209,10 +209,10 @@ modifier_bp [rwx]{1,3} %% %{ - { - int start_token; + struct parse_events_state *_parse_state = parse_events_get_extra(yyscanner); - start_token = parse_events_get_extra(yyscanner); + { + int start_token = _parse_state->stoken; if (start_token == PE_START_TERMS) BEGIN(config); @@ -220,7 +220,7 @@ modifier_bp [rwx]{1,3} BEGIN(event); if (start_token) { - parse_events_set_extra(NULL, yyscanner); + _parse_state->stoken = 0; /* * The flex parser does not init locations variable * via the scan_string interface, so we need do the @@ -252,7 +252,9 @@ modifier_bp [rwx]{1,3} BEGIN(INITIAL); REWIND(0); } - +, { + return ','; + } } <array>{ @@ -286,6 +288,7 @@ no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); } percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); } aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } +r{num_raw_hex} { return raw(yyscanner); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 94f8bcd83582..c4ca932d092d 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -44,7 +44,7 @@ static void free_list_evsel(struct list_head* list_evsel) list_for_each_entry_safe(evsel, tmp, list_evsel, core.node) { list_del_init(&evsel->core.node); - perf_evsel__delete(evsel); + evsel__delete(evsel); } free(list_evsel); } @@ -326,6 +326,7 @@ PE_NAME opt_pmu_config } parse_events_terms__delete($2); parse_events_terms__delete(orig_terms); + free(pattern); free($1); $$ = list; #undef CLEANUP_YYABORT @@ -706,6 +707,15 @@ event_term } event_term: +PE_RAW +{ + struct parse_events_term *term; + + ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_CONFIG, + NULL, $1, false, &@1, NULL)); + $$ = term; +} +| PE_NAME '=' PE_NAME { struct parse_events_term *term; diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c new file mode 100644 index 000000000000..1337965673d7 --- /dev/null +++ b/tools/perf/util/perf_api_probe.c @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "perf-sys.h" +#include "util/cloexec.h" +#include "util/evlist.h" +#include "util/evsel.h" +#include "util/parse-events.h" +#include "util/perf_api_probe.h" +#include <perf/cpumap.h> +#include <errno.h> + +typedef void (*setup_probe_fn_t)(struct evsel *evsel); + +static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) +{ + struct evlist *evlist; + struct evsel *evsel; + unsigned long flags = perf_event_open_cloexec_flag(); + int err = -EAGAIN, fd; + static pid_t pid = -1; + + evlist = evlist__new(); + if (!evlist) + return -ENOMEM; + + if (parse_events(evlist, str, NULL)) + goto out_delete; + + evsel = evlist__first(evlist); + + while (1) { + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); + if (fd < 0) { + if (pid == -1 && errno == EACCES) { + pid = 0; + continue; + } + goto out_delete; + } + break; + } + close(fd); + + fn(evsel); + + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); + if (fd < 0) { + if (errno == EINVAL) + err = -EINVAL; + goto out_delete; + } + close(fd); + err = 0; + +out_delete: + evlist__delete(evlist); + return err; +} + +static bool perf_probe_api(setup_probe_fn_t fn) +{ + const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL}; + struct perf_cpu_map *cpus; + int cpu, ret, i = 0; + + cpus = perf_cpu_map__new(NULL); + if (!cpus) + return false; + cpu = cpus->map[0]; + perf_cpu_map__put(cpus); + + do { + ret = perf_do_probe_api(fn, cpu, try[i++]); + if (!ret) + return true; + } while (ret == -EAGAIN && try[i]); + + return false; +} + +static void perf_probe_sample_identifier(struct evsel *evsel) +{ + evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER; +} + +static void perf_probe_comm_exec(struct evsel *evsel) +{ + evsel->core.attr.comm_exec = 1; +} + +static void perf_probe_context_switch(struct evsel *evsel) +{ + evsel->core.attr.context_switch = 1; +} + +bool perf_can_sample_identifier(void) +{ + return perf_probe_api(perf_probe_sample_identifier); +} + +bool perf_can_comm_exec(void) +{ + return perf_probe_api(perf_probe_comm_exec); +} + +bool perf_can_record_switch_events(void) +{ + return perf_probe_api(perf_probe_context_switch); +} + +bool perf_can_record_cpu_wide(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + .exclude_kernel = 1, + }; + struct perf_cpu_map *cpus; + int cpu, fd; + + cpus = perf_cpu_map__new(NULL); + if (!cpus) + return false; + cpu = cpus->map[0]; + perf_cpu_map__put(cpus); + + fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); + if (fd < 0) + return false; + close(fd); + + return true; +} + +/* + * Architectures are expected to know if AUX area sampling is supported by the + * hardware. Here we check for kernel support. + */ +bool perf_can_aux_sample(void) +{ + struct perf_event_attr attr = { + .size = sizeof(struct perf_event_attr), + .exclude_kernel = 1, + /* + * Non-zero value causes the kernel to calculate the effective + * attribute size up to that byte. + */ + .aux_sample_size = 1, + }; + int fd; + + fd = sys_perf_event_open(&attr, -1, 0, -1, 0); + /* + * If the kernel attribute is big enough to contain aux_sample_size + * then we assume that it is supported. We are relying on the kernel to + * validate the attribute size before anything else that could be wrong. + */ + if (fd < 0 && errno == E2BIG) + return false; + if (fd >= 0) + close(fd); + + return true; +} diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h new file mode 100644 index 000000000000..706c3c6426e2 --- /dev/null +++ b/tools/perf/util/perf_api_probe.h @@ -0,0 +1,14 @@ + +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_API_PROBE_H +#define __PERF_API_PROBE_H + +#include <stdbool.h> + +bool perf_can_aux_sample(void); +bool perf_can_comm_exec(void); +bool perf_can_record_cpu_wide(void); +bool perf_can_record_switch_events(void); +bool perf_can_sample_identifier(void); + +#endif // __PERF_API_PROBE_H diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c new file mode 100644 index 000000000000..d735acb6c29c --- /dev/null +++ b/tools/perf/util/pfm.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Support for libpfm4 event encoding. + * + * Copyright 2020 Google LLC. + */ +#include "util/cpumap.h" +#include "util/debug.h" +#include "util/event.h" +#include "util/evlist.h" +#include "util/evsel.h" +#include "util/parse-events.h" +#include "util/pmu.h" +#include "util/pfm.h" + +#include <string.h> +#include <linux/kernel.h> +#include <perfmon/pfmlib_perf_event.h> + +static void libpfm_initialize(void) +{ + int ret; + + ret = pfm_initialize(); + if (ret != PFM_SUCCESS) { + ui__warning("libpfm failed to initialize: %s\n", + pfm_strerror(ret)); + } +} + +int parse_libpfm_events_option(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + struct evlist *evlist = *(struct evlist **)opt->value; + struct perf_event_attr attr; + struct perf_pmu *pmu; + struct evsel *evsel, *grp_leader = NULL; + char *p, *q, *p_orig; + const char *sep; + int grp_evt = -1; + int ret; + + libpfm_initialize(); + + p_orig = p = strdup(str); + if (!p) + return -1; + /* + * force loading of the PMU list + */ + perf_pmu__scan(NULL); + + for (q = p; strsep(&p, ",{}"); q = p) { + sep = p ? str + (p - p_orig - 1) : ""; + if (*sep == '{') { + if (grp_evt > -1) { + ui__error( + "nested event groups not supported\n"); + goto error; + } + grp_evt++; + } + + /* no event */ + if (*q == '\0') + continue; + + memset(&attr, 0, sizeof(attr)); + event_attr_init(&attr); + + ret = pfm_get_perf_event_encoding(q, PFM_PLM0|PFM_PLM3, + &attr, NULL, NULL); + + if (ret != PFM_SUCCESS) { + ui__error("failed to parse event %s : %s\n", str, + pfm_strerror(ret)); + goto error; + } + + pmu = perf_pmu__find_by_type((unsigned int)attr.type); + evsel = parse_events__add_event(evlist->core.nr_entries, + &attr, q, pmu); + if (evsel == NULL) + goto error; + + evsel->is_libpfm_event = true; + + evlist__add(evlist, evsel); + + if (grp_evt == 0) + grp_leader = evsel; + + if (grp_evt > -1) { + evsel->leader = grp_leader; + grp_leader->core.nr_members++; + grp_evt++; + } + + if (*sep == '}') { + if (grp_evt < 0) { + ui__error( + "cannot close a non-existing event group\n"); + goto error; + } + evlist->nr_groups++; + grp_leader = NULL; + grp_evt = -1; + } + } + return 0; +error: + free(p_orig); + return -1; +} + +static const char *srcs[PFM_ATTR_CTRL_MAX] = { + [PFM_ATTR_CTRL_UNKNOWN] = "???", + [PFM_ATTR_CTRL_PMU] = "PMU", + [PFM_ATTR_CTRL_PERF_EVENT] = "perf_event", +}; + +static void +print_attr_flags(pfm_event_attr_info_t *info) +{ + int n = 0; + + if (info->is_dfl) { + printf("[default] "); + n++; + } + + if (info->is_precise) { + printf("[precise] "); + n++; + } + + if (!n) + printf("- "); +} + +static void +print_libpfm_events_detailed(pfm_event_info_t *info, bool long_desc) +{ + pfm_event_attr_info_t ainfo; + const char *src; + int j, ret; + + ainfo.size = sizeof(ainfo); + + printf(" %s\n", info->name); + printf(" [%s]\n", info->desc); + if (long_desc) { + if (info->equiv) + printf(" Equiv: %s\n", info->equiv); + + printf(" Code : 0x%"PRIx64"\n", info->code); + } + pfm_for_each_event_attr(j, info) { + ret = pfm_get_event_attr_info(info->idx, j, + PFM_OS_PERF_EVENT_EXT, &ainfo); + if (ret != PFM_SUCCESS) + continue; + + if (ainfo.type == PFM_ATTR_UMASK) { + printf(" %s:%s\n", info->name, ainfo.name); + printf(" [%s]\n", ainfo.desc); + } + + if (!long_desc) + continue; + + if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX) + ainfo.ctrl = PFM_ATTR_CTRL_UNKNOWN; + + src = srcs[ainfo.ctrl]; + switch (ainfo.type) { + case PFM_ATTR_UMASK: + printf(" Umask : 0x%02"PRIx64" : %s: ", + ainfo.code, src); + print_attr_flags(&ainfo); + putchar('\n'); + break; + case PFM_ATTR_MOD_BOOL: + printf(" Modif : %s: [%s] : %s (boolean)\n", src, + ainfo.name, ainfo.desc); + break; + case PFM_ATTR_MOD_INTEGER: + printf(" Modif : %s: [%s] : %s (integer)\n", src, + ainfo.name, ainfo.desc); + break; + case PFM_ATTR_NONE: + case PFM_ATTR_RAW_UMASK: + case PFM_ATTR_MAX: + default: + printf(" Attr : %s: [%s] : %s\n", src, + ainfo.name, ainfo.desc); + } + } +} + +/* + * list all pmu::event:umask, pmu::event + * printed events may not be all valid combinations of umask for an event + */ +static void +print_libpfm_events_raw(pfm_pmu_info_t *pinfo, pfm_event_info_t *info) +{ + pfm_event_attr_info_t ainfo; + int j, ret; + bool has_umask = false; + + ainfo.size = sizeof(ainfo); + + pfm_for_each_event_attr(j, info) { + ret = pfm_get_event_attr_info(info->idx, j, + PFM_OS_PERF_EVENT_EXT, &ainfo); + if (ret != PFM_SUCCESS) + continue; + + if (ainfo.type != PFM_ATTR_UMASK) + continue; + + printf("%s::%s:%s\n", pinfo->name, info->name, ainfo.name); + has_umask = true; + } + if (!has_umask) + printf("%s::%s\n", pinfo->name, info->name); +} + +void print_libpfm_events(bool name_only, bool long_desc) +{ + pfm_event_info_t info; + pfm_pmu_info_t pinfo; + int i, p, ret; + + libpfm_initialize(); + + /* initialize to zero to indicate ABI version */ + info.size = sizeof(info); + pinfo.size = sizeof(pinfo); + + if (!name_only) + puts("\nList of pre-defined events (to be used in --pfm-events):\n"); + + pfm_for_all_pmus(p) { + bool printed_pmu = false; + + ret = pfm_get_pmu_info(p, &pinfo); + if (ret != PFM_SUCCESS) + continue; + + /* only print events that are supported by host HW */ + if (!pinfo.is_present) + continue; + + /* handled by perf directly */ + if (pinfo.pmu == PFM_PMU_PERF_EVENT) + continue; + + for (i = pinfo.first_event; i != -1; + i = pfm_get_event_next(i)) { + + ret = pfm_get_event_info(i, PFM_OS_PERF_EVENT_EXT, + &info); + if (ret != PFM_SUCCESS) + continue; + + if (!name_only && !printed_pmu) { + printf("%s:\n", pinfo.name); + printed_pmu = true; + } + + if (!name_only) + print_libpfm_events_detailed(&info, long_desc); + else + print_libpfm_events_raw(&pinfo, &info); + } + if (!name_only && printed_pmu) + putchar('\n'); + } +} diff --git a/tools/perf/util/pfm.h b/tools/perf/util/pfm.h new file mode 100644 index 000000000000..7d70dda87012 --- /dev/null +++ b/tools/perf/util/pfm.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Support for libpfm4 event encoding. + * + * Copyright 2020 Google LLC. + */ +#ifndef __PERF_PFM_H +#define __PERF_PFM_H + +#include <subcmd/parse-options.h> + +#ifdef HAVE_LIBPFM +int parse_libpfm_events_option(const struct option *opt, const char *str, + int unset); + +void print_libpfm_events(bool name_only, bool long_desc); + +#else +#include <linux/compiler.h> + +static inline int parse_libpfm_events_option( + const struct option *opt __maybe_unused, + const char *str __maybe_unused, + int unset __maybe_unused) +{ + return 0; +} + +static inline void print_libpfm_events(bool name_only __maybe_unused, + bool long_desc __maybe_unused) +{ +} + +#endif + + +#endif /* __PERF_PFM_H */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ef6a63f3d386..93fe72a9dc0b 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -18,6 +18,7 @@ #include <regex.h> #include <perf/cpumap.h> #include "debug.h" +#include "evsel.h" #include "pmu.h" #include "parse-events.h" #include "header.h" @@ -849,6 +850,7 @@ static struct perf_pmu *pmu_lookup(const char *name) INIT_LIST_HEAD(&pmu->format); INIT_LIST_HEAD(&pmu->aliases); + INIT_LIST_HEAD(&pmu->caps); list_splice(&format, &pmu->format); list_splice(&aliases, &pmu->aliases); list_add_tail(&pmu->list, &pmus); @@ -869,6 +871,17 @@ static struct perf_pmu *pmu_find(const char *name) return NULL; } +struct perf_pmu *perf_pmu__find_by_type(unsigned int type) +{ + struct perf_pmu *pmu; + + list_for_each_entry(pmu, &pmus, list) + if (pmu->type == type) + return pmu; + + return NULL; +} + struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu) { /* @@ -884,6 +897,25 @@ struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu) return NULL; } +struct perf_pmu *evsel__find_pmu(struct evsel *evsel) +{ + struct perf_pmu *pmu = NULL; + + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + if (pmu->type == evsel->core.attr.type) + break; + } + + return pmu; +} + +bool evsel__is_aux_event(struct evsel *evsel) +{ + struct perf_pmu *pmu = evsel__find_pmu(evsel); + + return pmu && pmu->auxtrace; +} + struct perf_pmu *perf_pmu__find(const char *name) { struct perf_pmu *pmu; @@ -1024,7 +1056,8 @@ error: * Setup one of config[12] attr members based on the * user input data - term parameter. */ -static int pmu_config_term(struct list_head *formats, +static int pmu_config_term(const char *pmu_name, + struct list_head *formats, struct perf_event_attr *attr, struct parse_events_term *term, struct list_head *head_terms, @@ -1050,16 +1083,24 @@ static int pmu_config_term(struct list_head *formats, format = pmu_find_format(formats, term->config); if (!format) { - if (verbose > 0) - printf("Invalid event/parameter '%s'\n", term->config); + char *pmu_term = pmu_formats_string(formats); + char *unknown_term; + char *help_msg; + + if (asprintf(&unknown_term, + "unknown term '%s' for pmu '%s'", + term->config, pmu_name) < 0) + unknown_term = NULL; + help_msg = parse_events_formats_error_string(pmu_term); if (err) { - char *pmu_term = pmu_formats_string(formats); - parse_events__handle_error(err, term->err_term, - strdup("unknown term"), - parse_events_formats_error_string(pmu_term)); - free(pmu_term); + unknown_term, + help_msg); + } else { + pr_debug("%s (%s)\n", unknown_term, help_msg); + free(unknown_term); } + free(pmu_term); return -EINVAL; } @@ -1136,7 +1177,7 @@ static int pmu_config_term(struct list_head *formats, return 0; } -int perf_pmu__config_terms(struct list_head *formats, +int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats, struct perf_event_attr *attr, struct list_head *head_terms, bool zero, struct parse_events_error *err) @@ -1144,7 +1185,7 @@ int perf_pmu__config_terms(struct list_head *formats, struct parse_events_term *term; list_for_each_entry(term, head_terms, list) { - if (pmu_config_term(formats, attr, term, head_terms, + if (pmu_config_term(pmu_name, formats, attr, term, head_terms, zero, err)) return -EINVAL; } @@ -1164,8 +1205,8 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, bool zero = !!pmu->default_config; attr->type = pmu->type; - return perf_pmu__config_terms(&pmu->format, attr, head_terms, - zero, err); + return perf_pmu__config_terms(pmu->name, &pmu->format, attr, + head_terms, zero, err); } static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu, @@ -1574,3 +1615,84 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, va_end(args); return ret; } + +static int perf_pmu__new_caps(struct list_head *list, char *name, char *value) +{ + struct perf_pmu_caps *caps = zalloc(sizeof(*caps)); + + if (!caps) + return -ENOMEM; + + caps->name = strdup(name); + if (!caps->name) + goto free_caps; + caps->value = strndup(value, strlen(value) - 1); + if (!caps->value) + goto free_name; + list_add_tail(&caps->list, list); + return 0; + +free_name: + zfree(caps->name); +free_caps: + free(caps); + + return -ENOMEM; +} + +/* + * Reading/parsing the given pmu capabilities, which should be located at: + * /sys/bus/event_source/devices/<dev>/caps as sysfs group attributes. + * Return the number of capabilities + */ +int perf_pmu__caps_parse(struct perf_pmu *pmu) +{ + struct stat st; + char caps_path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + DIR *caps_dir; + struct dirent *evt_ent; + int nr_caps = 0; + + if (!sysfs) + return -1; + + snprintf(caps_path, PATH_MAX, + "%s" EVENT_SOURCE_DEVICE_PATH "%s/caps", sysfs, pmu->name); + + if (stat(caps_path, &st) < 0) + return 0; /* no error if caps does not exist */ + + caps_dir = opendir(caps_path); + if (!caps_dir) + return -EINVAL; + + while ((evt_ent = readdir(caps_dir)) != NULL) { + char path[PATH_MAX + NAME_MAX + 1]; + char *name = evt_ent->d_name; + char value[128]; + FILE *file; + + if (!strcmp(name, ".") || !strcmp(name, "..")) + continue; + + snprintf(path, sizeof(path), "%s/%s", caps_path, name); + + file = fopen(path, "r"); + if (!file) + continue; + + if (!fgets(value, sizeof(value), file) || + (perf_pmu__new_caps(&pmu->caps, name, value) < 0)) { + fclose(file); + continue; + } + + nr_caps++; + fclose(file); + } + + closedir(caps_dir); + + return nr_caps; +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 5fb3f16828df..85e0c7f2515c 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -9,7 +9,7 @@ #include "parse-events.h" #include "pmu-events/pmu-events.h" -struct perf_evsel_config_term; +struct evsel_config_term; enum { PERF_PMU_FORMAT_VALUE_CONFIG, @@ -22,6 +22,12 @@ enum { struct perf_event_attr; +struct perf_pmu_caps { + char *name; + char *value; + struct list_head list; +}; + struct perf_pmu { char *name; __u32 type; @@ -33,6 +39,7 @@ struct perf_pmu { struct perf_cpu_map *cpus; struct list_head format; /* HEAD struct perf_pmu_format -> list */ struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ + struct list_head caps; /* HEAD struct perf_pmu_caps -> list */ struct list_head list; /* ELEM */ }; @@ -65,10 +72,11 @@ struct perf_pmu_alias { }; struct perf_pmu *perf_pmu__find(const char *name); +struct perf_pmu *perf_pmu__find_by_type(unsigned int type); int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, struct list_head *head_terms, struct parse_events_error *error); -int perf_pmu__config_terms(struct list_head *formats, +int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats, struct perf_event_attr *attr, struct list_head *head_terms, bool zero, struct parse_events_error *error); @@ -107,4 +115,6 @@ bool pmu_uncore_alias_match(const char *pmu_name, const char *name); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); +int perf_pmu__caps_parse(struct perf_pmu *pmu); + #endif /* __PMU_H */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index eea132f512b0..a08f373d3305 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -102,7 +102,7 @@ void exit_probe_symbol_maps(void) symbol__exit(); } -static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void) +static struct ref_reloc_sym *kernel_get_ref_reloc_sym(struct map **pmap) { /* kmap->ref_reloc_sym should be set if host_machine is initialized */ struct kmap *kmap; @@ -114,6 +114,10 @@ static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void) kmap = map__kmap(map); if (!kmap) return NULL; + + if (pmap) + *pmap = map; + return kmap->ref_reloc_sym; } @@ -125,7 +129,7 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr, struct map *map; /* ref_reloc_sym is just a label. Need a special fix*/ - reloc_sym = kernel_get_ref_reloc_sym(); + reloc_sym = kernel_get_ref_reloc_sym(NULL); if (reloc_sym && strcmp(name, reloc_sym->name) == 0) *addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr; else { @@ -232,21 +236,22 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) static bool kprobe_blacklist__listed(unsigned long address); static bool kprobe_warn_out_range(const char *symbol, unsigned long address) { - u64 etext_addr = 0; - int ret; - - /* Get the address of _etext for checking non-probable text symbol */ - ret = kernel_get_symbol_address_by_name("_etext", &etext_addr, - false, false); + struct map *map; + bool ret = false; - if (ret == 0 && etext_addr < address) - pr_warning("%s is out of .text, skip it.\n", symbol); - else if (kprobe_blacklist__listed(address)) + map = kernel_get_module_map(NULL); + if (map) { + ret = address <= map->start || map->end < address; + if (ret) + pr_warning("%s is out of .text, skip it.\n", symbol); + map__put(map); + } + if (!ret && kprobe_blacklist__listed(address)) { pr_warning("%s is blacklisted function, skip it.\n", symbol); - else - return false; + ret = true; + } - return true; + return ret; } /* @@ -745,6 +750,7 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs, int ntevs) { struct ref_reloc_sym *reloc_sym; + struct map *map; char *tmp; int i, skipped = 0; @@ -753,7 +759,7 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs, return post_process_offline_probe_trace_events(tevs, ntevs, symbol_conf.vmlinux_name); - reloc_sym = kernel_get_ref_reloc_sym(); + reloc_sym = kernel_get_ref_reloc_sym(&map); if (!reloc_sym) { pr_warning("Relocated base symbol is not found!\n"); return -EINVAL; @@ -764,9 +770,13 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs, continue; if (tevs[i].point.retprobe && !kretprobe_offset_is_supported()) continue; - /* If we found a wrong one, mark it by NULL symbol */ + /* + * If we found a wrong one, mark it by NULL symbol. + * Since addresses in debuginfo is same as objdump, we need + * to convert it to addresses on memory. + */ if (kprobe_warn_out_range(tevs[i].point.symbol, - tevs[i].point.address)) { + map__objdump_2mem(map, tevs[i].point.address))) { tmp = NULL; skipped++; } else { @@ -1765,8 +1775,7 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev) fmt1_str = strtok_r(argv0_str, ":", &fmt); fmt2_str = strtok_r(NULL, "/", &fmt); fmt3_str = strtok_r(NULL, " \t", &fmt); - if (fmt1_str == NULL || strlen(fmt1_str) != 1 || fmt2_str == NULL - || fmt3_str == NULL) { + if (fmt1_str == NULL || fmt2_str == NULL || fmt3_str == NULL) { semantic_error("Failed to parse event name: %s\n", argv[0]); ret = -EINVAL; goto out; @@ -2936,7 +2945,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, /* Note that the symbols in the kmodule are not relocated */ if (!pev->uprobes && !pev->target && (!pp->retprobe || kretprobe_offset_is_supported())) { - reloc_sym = kernel_get_ref_reloc_sym(); + reloc_sym = kernel_get_ref_reloc_sym(NULL); if (!reloc_sym) { pr_warning("Relocated base symbol is not found!\n"); ret = -EINVAL; diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index e4cff49384f4..55924255c535 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -101,6 +101,7 @@ enum dso_binary_type distro_dwarf_types[] = { DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, DSO_BINARY_TYPE__BUILDID_DEBUGINFO, + DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, DSO_BINARY_TYPE__NOT_FOUND, }; diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c index 80ff41fc45be..a1d1e4ef6257 100644 --- a/tools/perf/util/pstack.c +++ b/tools/perf/util/pstack.c @@ -15,7 +15,7 @@ struct pstack { unsigned short top; unsigned short max_nr_entries; - void *entries[0]; + void *entries[]; }; struct pstack *pstack__new(unsigned short max_nr_entries) diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 83212c65848b..75a9b1d62bba 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -801,7 +801,7 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel, static void pyrf_evsel__delete(struct pyrf_evsel *pevsel) { - perf_evsel__exit(&pevsel->evsel); + evsel__exit(&pevsel->evsel); Py_TYPE(pevsel)->tp_free((PyObject*)pevsel); } @@ -1044,7 +1044,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, pevent->evsel = evsel; - err = perf_evsel__parse_sample(evsel, event, &pevent->sample); + err = evsel__parse_sample(evsel, event, &pevent->sample); /* Consume the even only after we parsed it out. */ perf_mmap__consume(&md->core); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 7def66168503..a4cc11592f6b 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -10,161 +10,64 @@ #include <subcmd/parse-options.h> #include <perf/cpumap.h> #include "cloexec.h" +#include "util/perf_api_probe.h" #include "record.h" #include "../perf-sys.h" -typedef void (*setup_probe_fn_t)(struct evsel *evsel); - -static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) +/* + * evsel__config_leader_sampling() uses special rules for leader sampling. + * However, if the leader is an AUX area event, then assume the event to sample + * is the next event. + */ +static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evlist) { - struct evlist *evlist; - struct evsel *evsel; - unsigned long flags = perf_event_open_cloexec_flag(); - int err = -EAGAIN, fd; - static pid_t pid = -1; - - evlist = evlist__new(); - if (!evlist) - return -ENOMEM; - - if (parse_events(evlist, str, NULL)) - goto out_delete; - - evsel = evlist__first(evlist); + struct evsel *leader = evsel->leader; - while (1) { - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); - if (fd < 0) { - if (pid == -1 && errno == EACCES) { - pid = 0; - continue; - } - goto out_delete; + if (evsel__is_aux_event(leader)) { + evlist__for_each_entry(evlist, evsel) { + if (evsel->leader == leader && evsel != evsel->leader) + return evsel; } - break; - } - close(fd); - - fn(evsel); - - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); - if (fd < 0) { - if (errno == EINVAL) - err = -EINVAL; - goto out_delete; } - close(fd); - err = 0; - -out_delete: - evlist__delete(evlist); - return err; -} - -static bool perf_probe_api(setup_probe_fn_t fn) -{ - const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL}; - struct perf_cpu_map *cpus; - int cpu, ret, i = 0; - - cpus = perf_cpu_map__new(NULL); - if (!cpus) - return false; - cpu = cpus->map[0]; - perf_cpu_map__put(cpus); - - do { - ret = perf_do_probe_api(fn, cpu, try[i++]); - if (!ret) - return true; - } while (ret == -EAGAIN && try[i]); - return false; + return leader; } -static void perf_probe_sample_identifier(struct evsel *evsel) +static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist) { - evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER; -} - -static void perf_probe_comm_exec(struct evsel *evsel) -{ - evsel->core.attr.comm_exec = 1; -} - -static void perf_probe_context_switch(struct evsel *evsel) -{ - evsel->core.attr.context_switch = 1; -} - -bool perf_can_sample_identifier(void) -{ - return perf_probe_api(perf_probe_sample_identifier); -} + struct perf_event_attr *attr = &evsel->core.attr; + struct evsel *leader = evsel->leader; + struct evsel *read_sampler; -static bool perf_can_comm_exec(void) -{ - return perf_probe_api(perf_probe_comm_exec); -} + if (!leader->sample_read) + return; -bool perf_can_record_switch_events(void) -{ - return perf_probe_api(perf_probe_context_switch); -} + read_sampler = evsel__read_sampler(evsel, evlist); -bool perf_can_record_cpu_wide(void) -{ - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_CPU_CLOCK, - .exclude_kernel = 1, - }; - struct perf_cpu_map *cpus; - int cpu, fd; - - cpus = perf_cpu_map__new(NULL); - if (!cpus) - return false; - cpu = cpus->map[0]; - perf_cpu_map__put(cpus); + if (evsel == read_sampler) + return; - fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); - if (fd < 0) - return false; - close(fd); - - return true; -} - -/* - * Architectures are expected to know if AUX area sampling is supported by the - * hardware. Here we check for kernel support. - */ -bool perf_can_aux_sample(void) -{ - struct perf_event_attr attr = { - .size = sizeof(struct perf_event_attr), - .exclude_kernel = 1, - /* - * Non-zero value causes the kernel to calculate the effective - * attribute size up to that byte. - */ - .aux_sample_size = 1, - }; - int fd; - - fd = sys_perf_event_open(&attr, -1, 0, -1, 0); /* - * If the kernel attribute is big enough to contain aux_sample_size - * then we assume that it is supported. We are relying on the kernel to - * validate the attribute size before anything else that could be wrong. + * Disable sampling for all group members other than the leader in + * case the leader 'leads' the sampling, except when the leader is an + * AUX area event, in which case the 2nd event in the group is the one + * that 'leads' the sampling. */ - if (fd < 0 && errno == E2BIG) - return false; - if (fd >= 0) - close(fd); + attr->freq = 0; + attr->sample_freq = 0; + attr->sample_period = 0; + attr->write_backward = 0; - return true; + /* + * We don't get a sample for slave events, we make them when delivering + * the group leader sample. Set the slave event to follow the master + * sample_type to ease up reporting. + * An AUX area event also has sample_type requirements, so also include + * the sample type bits from the leader's sample_type to cover that + * case. + */ + attr->sample_type = read_sampler->core.attr.sample_type | + leader->core.attr.sample_type; } void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, @@ -188,11 +91,15 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, use_comm_exec = perf_can_comm_exec(); evlist__for_each_entry(evlist, evsel) { - perf_evsel__config(evsel, opts, callchain); + evsel__config(evsel, opts, callchain); if (evsel->tracking && use_comm_exec) evsel->core.attr.comm_exec = 1; } + /* Configure leader sampling here now that the sample type is known */ + evlist__for_each_entry(evlist, evsel) + evsel__config_leader_sampling(evsel, evlist); + if (opts->full_auxtrace) { /* * Need to be able to synthesize and parse selected events with @@ -215,7 +122,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, if (sample_id) { evlist__for_each_entry(evlist, evsel) - perf_evsel__set_sample_id(evsel, use_sample_identifier); + evsel__set_sample_id(evsel, use_sample_identifier); } perf_evlist__set_id_pos(evlist); diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 24316458be20..39d1de4b2a36 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -36,6 +36,7 @@ struct record_opts { bool record_namespaces; bool record_cgroup; bool record_switch_events; + bool record_switch_events_set; bool all_kernel; bool all_user; bool kernel_callchains; @@ -68,6 +69,7 @@ struct record_opts { int affinity; int mmap_flush; unsigned int comp_level; + unsigned int nr_threads_synthesize; }; extern const char * const *record_usage; @@ -75,4 +77,9 @@ extern struct option *record_options; int record__parse_freq(const struct option *opt, const char *str, int unset); +static inline bool record_opts__no_switch_events(const struct record_opts *opts) +{ + return opts->record_switch_events_set && !opts->record_switch_events; +} + #endif // _PERF_RECORD_H diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h index d4356030b504..f55ca07f3ca1 100644 --- a/tools/perf/util/s390-cpumcf-kernel.h +++ b/tools/perf/util/s390-cpumcf-kernel.h @@ -11,6 +11,7 @@ #define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */ #define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */ +#define PERF_EVENT_CPUM_SF_DIAG 0xBD000 /* Event: Combined-sampling */ struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ unsigned int def:16; /* 0-15 Data Entry Format */ diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 6785cd87aa4d..f8861998e5bd 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -1047,6 +1047,14 @@ static void s390_cpumsf_free(struct perf_session *session) free(sf); } +static bool +s390_cpumsf_evsel_is_auxtrace(struct perf_session *session __maybe_unused, + struct evsel *evsel) +{ + return evsel->core.attr.type == PERF_TYPE_RAW && + evsel->core.attr.config == PERF_EVENT_CPUM_SF_DIAG; +} + static int s390_cpumsf_get_type(const char *cpuid) { int ret, family = 0; @@ -1071,7 +1079,8 @@ static bool check_auxtrace_itrace(struct itrace_synth_opts *itops) itops->pwr_events || itops->errors || itops->dont_decode || itops->calls || itops->returns || itops->callchain || itops->thread_stack || - itops->last_branch; + itops->last_branch || itops->add_callchain || + itops->add_last_branch; if (!ison) return true; pr_err("Unsupported --itrace options specified\n"); @@ -1142,6 +1151,7 @@ int s390_cpumsf_process_auxtrace_info(union perf_event *event, sf->auxtrace.flush_events = s390_cpumsf_flush; sf->auxtrace.free_events = s390_cpumsf_free_events; sf->auxtrace.free = s390_cpumsf_free; + sf->auxtrace.evsel_is_auxtrace = s390_cpumsf_evsel_is_auxtrace; session->auxtrace = &sf->auxtrace; if (dump_trace) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 2c372cf5495e..739516fdf6e3 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -741,7 +741,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, if (!dict_sample) Py_FatalError("couldn't create Python dictionary"); - pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel))); + pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(evsel__name(evsel))); pydict_set_item_string_decref(dict, "attr", _PyBytes_FromStringAndSize((const char *)&evsel->core.attr, sizeof(evsel->core.attr))); pydict_set_item_string_decref(dict_sample, "pid", @@ -968,7 +968,7 @@ static int python_export_evsel(struct db_export *dbe, struct evsel *evsel) t = tuple_new(2); tuple_set_u64(t, 0, evsel->db_id); - tuple_set_string(t, 1, perf_evsel__name(evsel)); + tuple_set_string(t, 1, evsel__name(evsel)); call_object(tables->evsel_handler, t, "evsel_table"); @@ -1349,7 +1349,7 @@ static void get_handler_name(char *str, size_t size, { char *p = str; - scnprintf(str, size, "stat__%s", perf_evsel__name(evsel)); + scnprintf(str, size, "stat__%s", evsel__name(evsel)); while ((p = strchr(p, ':'))) { *p = '_'; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0b0bfe5bef17..1a157e84a04a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -33,7 +33,6 @@ #include "../perf.h" #include "arch/common.h" #include <internal/lib.h> -#include <linux/err.h> #ifdef HAVE_ZSTD_SUPPORT static int perf_session__process_compressed_event(struct perf_session *session, @@ -1059,7 +1058,7 @@ static void callchain__printf(struct evsel *evsel, unsigned int i; struct ip_callchain *callchain = sample->callchain; - if (perf_evsel__has_branch_callstack(evsel)) + if (evsel__has_branch_callstack(evsel)) callchain__lbr_callstack_printf(sample); printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr); @@ -1104,7 +1103,7 @@ static void regs_dump__printf(u64 mask, u64 *regs) for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) { u64 val = regs[i++]; - printf(".... %-5s 0x%" PRIx64 "\n", + printf(".... %-5s 0x%016" PRIx64 "\n", perf_reg_name(rid), val); } } @@ -1243,8 +1242,8 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, if (evsel__has_callchain(evsel)) callchain__printf(evsel, sample); - if (sample_type & PERF_SAMPLE_BRANCH_STACK) - branch_stack__printf(sample, perf_evsel__has_branch_callstack(evsel)); + if (evsel__has_br_stack(evsel)) + branch_stack__printf(sample, evsel__has_branch_callstack(evsel)); if (sample_type & PERF_SAMPLE_REGS_USER) regs_user__printf(sample); @@ -1280,8 +1279,7 @@ static void dump_read(struct evsel *evsel, union perf_event *event) return; printf(": %d %d %s %" PRI_lu64 "\n", event->read.pid, event->read.tid, - perf_evsel__name(evsel), - event->read.value); + evsel__name(evsel), event->read.value); if (!evsel) return; @@ -1543,8 +1541,13 @@ static s64 perf_session__process_user_event(struct perf_session *session, */ return 0; case PERF_RECORD_HEADER_TRACING_DATA: - /* setup for reading amidst mmap */ - lseek(fd, file_offset, SEEK_SET); + /* + * Setup for reading amidst mmap, but only when we + * are in 'file' mode. The 'pipe' fd is in proper + * place already. + */ + if (!perf_data__is_pipe(session->data)) + lseek(fd, file_offset, SEEK_SET); return tool->tracing_data(session, event); case PERF_RECORD_HEADER_BUILD_ID: return tool->build_id(session, event); diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c new file mode 100644 index 000000000000..ded9ced02797 --- /dev/null +++ b/tools/perf/util/sideband_evlist.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "util/debug.h" +#include "util/evlist.h" +#include "util/evsel.h" +#include "util/mmap.h" +#include "util/perf_api_probe.h" +#include <perf/mmap.h> +#include <linux/perf_event.h> +#include <limits.h> +#include <pthread.h> +#include <sched.h> +#include <stdbool.h> + +int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, + evsel__sb_cb_t cb, void *data) +{ + struct evsel *evsel; + + if (!attr->sample_id_all) { + pr_warning("enabling sample_id_all for all side band events\n"); + attr->sample_id_all = 1; + } + + evsel = evsel__new_idx(attr, evlist->core.nr_entries); + if (!evsel) + return -1; + + evsel->side_band.cb = cb; + evsel->side_band.data = data; + evlist__add(evlist, evsel); + return 0; +} + +static void *perf_evlist__poll_thread(void *arg) +{ + struct evlist *evlist = arg; + bool draining = false; + int i, done = 0; + /* + * In order to read symbols from other namespaces perf to needs to call + * setns(2). This isn't permitted if the struct_fs has multiple users. + * unshare(2) the fs so that we may continue to setns into namespaces + * that we're observing when, for instance, reading the build-ids at + * the end of a 'perf record' session. + */ + unshare(CLONE_FS); + + while (!done) { + bool got_data = false; + + if (evlist->thread.done) + draining = true; + + if (!draining) + evlist__poll(evlist, 1000); + + for (i = 0; i < evlist->core.nr_mmaps; i++) { + struct mmap *map = &evlist->mmap[i]; + union perf_event *event; + + if (perf_mmap__read_init(&map->core)) + continue; + while ((event = perf_mmap__read_event(&map->core)) != NULL) { + struct evsel *evsel = perf_evlist__event2evsel(evlist, event); + + if (evsel && evsel->side_band.cb) + evsel->side_band.cb(event, evsel->side_band.data); + else + pr_warning("cannot locate proper evsel for the side band event\n"); + + perf_mmap__consume(&map->core); + got_data = true; + } + perf_mmap__read_done(&map->core); + } + + if (draining && !got_data) + break; + } + return NULL; +} + +void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_id_all = 1; + evsel->core.attr.watermark = 1; + evsel->core.attr.wakeup_watermark = 1; + evsel->side_band.cb = cb; + evsel->side_band.data = data; + } +} + +int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target) +{ + struct evsel *counter; + + if (!evlist) + return 0; + + if (perf_evlist__create_maps(evlist, target)) + goto out_delete_evlist; + + if (evlist->core.nr_entries > 1) { + bool can_sample_identifier = perf_can_sample_identifier(); + + evlist__for_each_entry(evlist, counter) + evsel__set_sample_id(counter, can_sample_identifier); + + perf_evlist__set_id_pos(evlist); + } + + evlist__for_each_entry(evlist, counter) { + if (evsel__open(counter, evlist->core.cpus, evlist->core.threads) < 0) + goto out_delete_evlist; + } + + if (evlist__mmap(evlist, UINT_MAX)) + goto out_delete_evlist; + + evlist__for_each_entry(evlist, counter) { + if (evsel__enable(counter)) + goto out_delete_evlist; + } + + evlist->thread.done = 0; + if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) + goto out_delete_evlist; + + return 0; + +out_delete_evlist: + evlist__delete(evlist); + evlist = NULL; + return -1; +} + +void perf_evlist__stop_sb_thread(struct evlist *evlist) +{ + if (!evlist) + return; + evlist->thread.done = 1; + pthread_join(evlist->thread.th, NULL); + evlist__delete(evlist); +} diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c index 3b791ef2cd50..20bacd5972ad 100644 --- a/tools/perf/util/smt.c +++ b/tools/perf/util/smt.c @@ -15,6 +15,9 @@ int smt_on(void) if (cached) return cached_result; + if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) > 0) + goto done; + ncpu = sysconf(_SC_NPROCESSORS_CONF); for (cpu = 0; cpu < ncpu; cpu++) { unsigned long long siblings; @@ -24,13 +27,13 @@ int smt_on(void) snprintf(fn, sizeof fn, "devices/system/cpu/cpu%d/topology/core_cpus", cpu); - if (access(fn, F_OK) == -1) { + if (sysfs__read_str(fn, &str, &strlen) < 0) { snprintf(fn, sizeof fn, "devices/system/cpu/cpu%d/topology/thread_siblings", cpu); + if (sysfs__read_str(fn, &str, &strlen) < 0) + continue; } - if (sysfs__read_str(fn, &str, &strlen) < 0) - continue; /* Entry is hex, but does not have 0x, so need custom parser */ siblings = strtoull(str, NULL, 16); free(str); @@ -42,6 +45,7 @@ int smt_on(void) } if (!cached) { cached_result = 0; +done: cached = true; } return cached_result; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index f14cc728c358..d42339df20f8 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -237,7 +237,7 @@ static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip) return (int64_t)(right_ip - left_ip); } -static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r) +int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r) { if (!sym_l || !sym_r) return cmp_null(sym_l, sym_r); @@ -300,8 +300,14 @@ static int _hist_entry__sym_snprintf(struct map_symbol *ms, if (verbose > 0) { char o = map ? dso__symtab_origin(map->dso) : '!'; + u64 rip = ip; + + if (map && map->dso && map->dso->kernel + && map->dso->adjust_symbols) + rip = map->unmap_ip(map, ip); + ret += repsep_snprintf(bf, size, "%-#*llx %c ", - BITS_PER_LONG / 4 + 2, ip, o); + BITS_PER_LONG / 4 + 2, rip, o); } ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level); @@ -2354,7 +2360,7 @@ static struct evsel *find_evsel(struct evlist *evlist, char *event_name) evsel = evlist__first(evlist); while (--nr > 0) - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); return evsel; } @@ -2811,7 +2817,7 @@ static char *prefix_if_not_in(const char *pre, char *str) return str; if (asprintf(&n, "%s,%s", pre, str) < 0) - return NULL; + n = NULL; free(str); return n; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index cfa6ac6f7d06..66d39c4cfe2b 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -311,5 +311,7 @@ int64_t sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right); int64_t sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right); +int64_t +_sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r); char *hist_entry__srcline(struct hist_entry *he); #endif /* __PERF_SORT_H */ diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 9e757d18d713..3c6976f7574c 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -237,8 +237,6 @@ static bool valid_only_metric(const char *unit) if (!unit) return false; if (strstr(unit, "/sec") || - strstr(unit, "hz") || - strstr(unit, "Hz") || strstr(unit, "CPUs utilized")) return false; return true; @@ -248,7 +246,7 @@ static const char *fixunit(char *buf, struct evsel *evsel, const char *unit) { if (!strncmp(unit, "of all", 6)) { - snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), + snprintf(buf, 1024, "%s %s", evsel__name(evsel), unit); return buf; } @@ -335,7 +333,7 @@ static int first_shadow_cpu(struct perf_stat_config *config, if (config->aggr_mode == AGGR_GLOBAL) return 0; - for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { + for (i = 0; i < evsel__nr_cpus(evsel); i++) { int cpu2 = evsel__cpus(evsel)->map[i]; if (config->aggr_get_id(config, evlist->core.cpus, cpu2) == id) @@ -369,7 +367,7 @@ static void abs_printout(struct perf_stat_config *config, config->csv_output ? 0 : config->unit_width, evsel->unit, config->csv_sep); - fprintf(output, "%-*s", config->csv_output ? 0 : 25, perf_evsel__name(evsel)); + fprintf(output, "%-*s", config->csv_output ? 0 : 25, evsel__name(evsel)); print_cgroup(config, evsel); } @@ -463,8 +461,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, counter->unit, config->csv_sep); fprintf(config->output, "%*s", - config->csv_output ? 0 : -25, - perf_evsel__name(counter)); + config->csv_output ? 0 : -25, evsel__name(counter)); print_cgroup(config, counter); @@ -510,7 +507,7 @@ static void aggr_update_shadow(struct perf_stat_config *config, id = config->aggr_map->map[s]; evlist__for_each_entry(evlist, counter) { val = 0; - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { s2 = config->aggr_get_id(config, evlist->core.cpus, cpu); if (s2 != id) continue; @@ -561,11 +558,11 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c alias = list_prepare_entry(counter, &(evlist->core.entries), core.node); list_for_each_entry_continue (alias, &evlist->core.entries, core.node) { - if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || + if (strcmp(evsel__name(alias), evsel__name(counter)) || alias->scale != counter->scale || alias->cgrp != counter->cgrp || strcmp(alias->unit, counter->unit) || - perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter) || + evsel__is_clock(alias) != evsel__is_clock(counter) || !strcmp(alias->pmu_name, counter->pmu_name)) break; alias->merged_stat = true; @@ -601,7 +598,7 @@ static void aggr_cb(struct perf_stat_config *config, struct aggr_data *ad = data; int cpu, s2; - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { struct perf_counts_values *counts; s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu); @@ -849,7 +846,7 @@ static void print_counter(struct perf_stat_config *config, double uval; int cpu; - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { struct aggr_data ad = { .cpu = cpu }; if (!collect_data(config, counter, counter_cb, &ad)) @@ -1150,7 +1147,7 @@ static void print_percore_thread(struct perf_stat_config *config, int s, s2, id; bool first = true; - for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) { + for (int i = 0; i < evsel__nr_cpus(counter); i++) { s2 = config->aggr_get_id(config, evsel__cpus(counter), i); for (s = 0; s < config->aggr_map->nr; s++) { id = config->aggr_map->map[s]; diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 03ecb8cd0eec..a7c13a88ecb9 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -216,9 +216,9 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, count *= counter->scale; - if (perf_evsel__is_clock(counter)) + if (evsel__is_clock(counter)) update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); - else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) + else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); @@ -241,25 +241,25 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu, count); - else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) + else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, ctx, cpu, count); - else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) + else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, ctx, cpu, count); - else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) + else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); - else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) + else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) + else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) + else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) + else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL)) update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) + else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) + else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); else if (perf_stat_evsel__is(counter, SMI_NUM)) update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); @@ -323,35 +323,46 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) { struct evsel *counter, *leader, **metric_events, *oc; bool found; - const char **metric_names; + struct expr_parse_ctx ctx; + struct hashmap_entry *cur; + size_t bkt; int i; - int num_metric_names; + expr__ctx_init(&ctx); evlist__for_each_entry(evsel_list, counter) { bool invalid = false; leader = counter->leader; if (!counter->metric_expr) continue; + + expr__ctx_clear(&ctx); metric_events = counter->metric_events; if (!metric_events) { - if (expr__find_other(counter->metric_expr, counter->name, - &metric_names, &num_metric_names) < 0) + if (expr__find_other(counter->metric_expr, + counter->name, + &ctx, 1) < 0) continue; metric_events = calloc(sizeof(struct evsel *), - num_metric_names + 1); - if (!metric_events) + hashmap__size(&ctx.ids) + 1); + if (!metric_events) { + expr__ctx_clear(&ctx); return; + } counter->metric_events = metric_events; } - for (i = 0; i < num_metric_names; i++) { + i = 0; + hashmap__for_each_entry((&ctx.ids), cur, bkt) { + const char *metric_name = (const char *)cur->key; + found = false; if (leader) { /* Search in group */ for_each_group_member (oc, leader) { - if (!strcasecmp(oc->name, metric_names[i]) && + if (!strcasecmp(oc->name, + metric_name) && !oc->collect_stat) { found = true; break; @@ -360,7 +371,8 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) } if (!found) { /* Search ignoring groups */ - oc = perf_stat__find_event(evsel_list, metric_names[i]); + oc = perf_stat__find_event(evsel_list, + metric_name); } if (!oc) { /* Deduping one is good enough to handle duplicated PMUs. */ @@ -373,27 +385,28 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) * of events. So we ask the user instead to add the missing * events. */ - if (!printed || strcasecmp(printed, metric_names[i])) { + if (!printed || + strcasecmp(printed, metric_name)) { fprintf(stderr, "Add %s event to groups to get metric expression for %s\n", - metric_names[i], + metric_name, counter->name); - printed = strdup(metric_names[i]); + printed = strdup(metric_name); } invalid = true; continue; } - metric_events[i] = oc; + metric_events[i++] = oc; oc->collect_stat = true; } metric_events[i] = NULL; - free(metric_names); if (invalid) { free(metric_events); counter->metric_events = NULL; counter->metric_expr = NULL; } } + expr__ctx_clear(&ctx); } static double runtime_stat_avg(struct runtime_stat *st, @@ -723,21 +736,19 @@ static void generic_metric(struct perf_stat_config *config, char *name, const char *metric_name, const char *metric_unit, - double avg, + int runtime, int cpu, struct perf_stat_output_ctx *out, struct runtime_stat *st) { print_metric_t print_metric = out->print_metric; - struct parse_ctx pctx; + struct expr_parse_ctx pctx; double ratio, scale; int i; void *ctxp = out->ctx; char *n, *pn; expr__ctx_init(&pctx); - /* Must be first id entry */ - expr__add_id(&pctx, name, avg); for (i = 0; metric_events[i]; i++) { struct saved_value *v; struct stats *stats; @@ -777,7 +788,7 @@ static void generic_metric(struct perf_stat_config *config, } if (!metric_events[i]) { - if (expr__parse(&ratio, &pctx, metric_expr) == 0) { + if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) { char *unit; char metric_bf[64]; @@ -786,13 +797,17 @@ static void generic_metric(struct perf_stat_config *config, &unit, &scale) >= 0) { ratio *= scale; } - - scnprintf(metric_bf, sizeof(metric_bf), + if (strstr(metric_expr, "?")) + scnprintf(metric_bf, sizeof(metric_bf), + "%s %s_%d", unit, metric_name, runtime); + else + scnprintf(metric_bf, sizeof(metric_bf), "%s %s", unit, metric_name); + print_metric(config, ctxp, NULL, "%8.1f", metric_bf, ratio); } else { - print_metric(config, ctxp, NULL, "%8.1f", + print_metric(config, ctxp, NULL, "%8.2f", metric_name ? metric_name : out->force_header ? name : "", @@ -809,8 +824,7 @@ static void generic_metric(struct perf_stat_config *config, (metric_name ? metric_name : name) : "", 0); } - for (i = 1; i < pctx.num_ids; i++) - zfree(&pctx.ids[i].name); + expr__ctx_clear(&pctx); } void perf_stat__print_shadow_stats(struct perf_stat_config *config, @@ -828,7 +842,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct metric_event *me; int num = 1; - if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { + if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) { @@ -853,7 +867,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, "stalled cycles per insn", ratio); } - } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { + } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) print_branch_misses(config, cpu, evsel, avg, out, st); else @@ -908,7 +922,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_ll_cache_misses(config, cpu, evsel, avg, out, st); else print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0); - } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { + } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); if (total) @@ -919,11 +933,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, "of all cache refs", ratio); else print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); - } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { + } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); - } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { + } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); - } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { + } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) { @@ -974,7 +988,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ratio = total / avg; print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio); - } else if (perf_evsel__is_clock(evsel)) { + } else if (evsel__is_clock(evsel)) { if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", avg / (ratio * evsel->scale)); @@ -1022,7 +1036,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name, - evsel->metric_name, NULL, avg, cpu, out, st); + evsel->metric_name, NULL, 1, cpu, out, st); } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { char unit = 'M'; char unit_buf[10]; @@ -1051,7 +1065,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, out->new_line(config, ctxp); generic_metric(config, mexp->metric_expr, mexp->metric_events, evsel->name, mexp->metric_name, - mexp->metric_unit, avg, cpu, out, st); + mexp->metric_unit, mexp->runtime, cpu, out, st); } } if (num == 0) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 5f26137b8d60..cdb154381a87 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -108,14 +108,14 @@ static void perf_stat_evsel_id_init(struct evsel *evsel) /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) { - if (!strcmp(perf_evsel__name(evsel), id_str[i])) { + if (!strcmp(evsel__name(evsel), id_str[i])) { ps->id = i; break; } } } -static void perf_evsel__reset_stat_priv(struct evsel *evsel) +static void evsel__reset_stat_priv(struct evsel *evsel) { int i; struct perf_stat_evsel *ps = evsel->stats; @@ -126,16 +126,16 @@ static void perf_evsel__reset_stat_priv(struct evsel *evsel) perf_stat_evsel_id_init(evsel); } -static int perf_evsel__alloc_stat_priv(struct evsel *evsel) +static int evsel__alloc_stat_priv(struct evsel *evsel) { evsel->stats = zalloc(sizeof(struct perf_stat_evsel)); if (evsel->stats == NULL) return -ENOMEM; - perf_evsel__reset_stat_priv(evsel); + evsel__reset_stat_priv(evsel); return 0; } -static void perf_evsel__free_stat_priv(struct evsel *evsel) +static void evsel__free_stat_priv(struct evsel *evsel) { struct perf_stat_evsel *ps = evsel->stats; @@ -144,8 +144,7 @@ static void perf_evsel__free_stat_priv(struct evsel *evsel) zfree(&evsel->stats); } -static int perf_evsel__alloc_prev_raw_counts(struct evsel *evsel, - int ncpus, int nthreads) +static int evsel__alloc_prev_raw_counts(struct evsel *evsel, int ncpus, int nthreads) { struct perf_counts *counts; @@ -156,29 +155,26 @@ static int perf_evsel__alloc_prev_raw_counts(struct evsel *evsel, return counts ? 0 : -ENOMEM; } -static void perf_evsel__free_prev_raw_counts(struct evsel *evsel) +static void evsel__free_prev_raw_counts(struct evsel *evsel) { perf_counts__delete(evsel->prev_raw_counts); evsel->prev_raw_counts = NULL; } -static void perf_evsel__reset_prev_raw_counts(struct evsel *evsel) +static void evsel__reset_prev_raw_counts(struct evsel *evsel) { - if (evsel->prev_raw_counts) { - evsel->prev_raw_counts->aggr.val = 0; - evsel->prev_raw_counts->aggr.ena = 0; - evsel->prev_raw_counts->aggr.run = 0; - } + if (evsel->prev_raw_counts) + perf_counts__reset(evsel->prev_raw_counts); } -static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) +static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) { - int ncpus = perf_evsel__nr_cpus(evsel); + int ncpus = evsel__nr_cpus(evsel); int nthreads = perf_thread_map__nr(evsel->core.threads); - if (perf_evsel__alloc_stat_priv(evsel) < 0 || - perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || - (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) + if (evsel__alloc_stat_priv(evsel) < 0 || + evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || + (alloc_raw && evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) return -ENOMEM; return 0; @@ -189,7 +185,7 @@ int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (perf_evsel__alloc_stats(evsel, alloc_raw)) + if (evsel__alloc_stats(evsel, alloc_raw)) goto out_free; } @@ -205,9 +201,9 @@ void perf_evlist__free_stats(struct evlist *evlist) struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - perf_evsel__free_stat_priv(evsel); - perf_evsel__free_counts(evsel); - perf_evsel__free_prev_raw_counts(evsel); + evsel__free_stat_priv(evsel); + evsel__free_counts(evsel); + evsel__free_prev_raw_counts(evsel); } } @@ -216,8 +212,8 @@ void perf_evlist__reset_stats(struct evlist *evlist) struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - perf_evsel__reset_stat_priv(evsel); - perf_evsel__reset_counts(evsel); + evsel__reset_stat_priv(evsel); + evsel__reset_counts(evsel); } } @@ -226,7 +222,51 @@ void perf_evlist__reset_prev_raw_counts(struct evlist *evlist) struct evsel *evsel; evlist__for_each_entry(evlist, evsel) - perf_evsel__reset_prev_raw_counts(evsel); + evsel__reset_prev_raw_counts(evsel); +} + +static void perf_evsel__copy_prev_raw_counts(struct evsel *evsel) +{ + int ncpus = evsel__nr_cpus(evsel); + int nthreads = perf_thread_map__nr(evsel->core.threads); + + for (int thread = 0; thread < nthreads; thread++) { + for (int cpu = 0; cpu < ncpus; cpu++) { + *perf_counts(evsel->counts, cpu, thread) = + *perf_counts(evsel->prev_raw_counts, cpu, + thread); + } + } + + evsel->counts->aggr = evsel->prev_raw_counts->aggr; +} + +void perf_evlist__copy_prev_raw_counts(struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) + perf_evsel__copy_prev_raw_counts(evsel); +} + +void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist) +{ + struct evsel *evsel; + + /* + * To collect the overall statistics for interval mode, + * we copy the counts from evsel->prev_raw_counts to + * evsel->counts. The perf_stat_process_counter creates + * aggr values from per cpu values, but the per cpu values + * are 0 for AGGR_GLOBAL. So we use a trick that saves the + * previous aggr value to the first member of perf_counts, + * then aggr calculation in process_counter_values can work + * correctly. + */ + evlist__for_each_entry(evlist, evsel) { + *perf_counts(evsel->prev_raw_counts, 0, 0) = + evsel->prev_raw_counts->aggr; + } } static void zero_per_pkg(struct evsel *counter) @@ -302,7 +342,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, case AGGR_NODE: case AGGR_NONE: if (!evsel->snapshot) - perf_evsel__compute_deltas(evsel, cpu, thread, count); + evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, config->scale, NULL); if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { perf_stat__update_shadow_stats(evsel, count->val, @@ -334,7 +374,7 @@ static int process_counter_maps(struct perf_stat_config *config, struct evsel *counter) { int nthreads = perf_thread_map__nr(counter->core.threads); - int ncpus = perf_evsel__nr_cpus(counter); + int ncpus = evsel__nr_cpus(counter); int cpu, thread; if (counter->core.system_wide) @@ -368,8 +408,10 @@ int perf_stat_process_counter(struct perf_stat_config *config, * interval mode, otherwise overall avg running * averages will be shown for each interval. */ - if (config->interval) - init_stats(ps->res_stats); + if (config->interval || config->summary) { + for (i = 0; i < 3; i++) + init_stats(&ps->res_stats[i]); + } if (counter->per_pkg) zero_per_pkg(counter); @@ -382,7 +424,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, return 0; if (!counter->snapshot) - perf_evsel__compute_deltas(counter, -1, -1, aggr); + evsel__compute_deltas(counter, -1, -1, aggr); perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); for (i = 0; i < 3; i++) @@ -390,7 +432,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, if (verbose > 0) { fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", - perf_evsel__name(counter), count[0], count[1], count[2]); + evsel__name(counter), count[0], count[1], count[2]); } /* @@ -507,7 +549,7 @@ int create_perf_stat_counter(struct evsel *evsel, * either manually by us or by kernel via enable_on_exec * set later. */ - if (perf_evsel__is_group_leader(evsel)) { + if (evsel__is_group_leader(evsel)) { attr->disabled = 1; /* @@ -519,7 +561,7 @@ int create_perf_stat_counter(struct evsel *evsel, } if (target__has_cpu(target) && !target__has_per_thread(target)) - return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu); + return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu); - return perf_evsel__open_per_thread(evsel, evsel->core.threads); + return evsel__open_per_thread(evsel, evsel->core.threads); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index b4fdfaa7f2c0..f75ae679eb28 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -110,6 +110,9 @@ struct perf_stat_config { bool all_kernel; bool all_user; bool percore_show_thread; + bool summary; + bool metric_no_group; + bool metric_no_merge; FILE *output; unsigned int interval; unsigned int timeout; @@ -132,6 +135,8 @@ struct perf_stat_config { struct rblist metric_events; }; +void perf_stat__set_big_num(int set); + void update_stats(struct stats *stats, u64 val); double avg_stats(struct stats *stats); double stddev_stats(struct stats *stats); @@ -198,6 +203,8 @@ int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw); void perf_evlist__free_stats(struct evlist *evlist); void perf_evlist__reset_stats(struct evlist *evlist); void perf_evlist__reset_prev_raw_counts(struct evlist *evlist); +void perf_evlist__copy_prev_raw_counts(struct evlist *evlist); +void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist); int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index be5b493f8284..5e43054bffea 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1458,6 +1458,7 @@ struct kcore_copy_info { u64 first_symbol; u64 last_symbol; u64 first_module; + u64 first_module_symbol; u64 last_module_symbol; size_t phnum; struct list_head phdrs; @@ -1534,6 +1535,8 @@ static int kcore_copy__process_kallsyms(void *arg, const char *name, char type, return 0; if (strchr(name, '[')) { + if (!kci->first_module_symbol || start < kci->first_module_symbol) + kci->first_module_symbol = start; if (start > kci->last_module_symbol) kci->last_module_symbol = start; return 0; @@ -1731,6 +1734,10 @@ static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir, kci->etext += page_size; } + if (kci->first_module_symbol && + (!kci->first_module || kci->first_module_symbol < kci->first_module)) + kci->first_module = kci->first_module_symbol; + kci->first_module = round_down(kci->first_module, page_size); if (kci->last_module_symbol) { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 26bc6a0096ce..5ddf84dcbae7 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -79,6 +79,7 @@ static enum dso_binary_type binary_type_symtab[] = { DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE, DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP, DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, + DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, DSO_BINARY_TYPE__NOT_FOUND, }; @@ -566,6 +567,20 @@ void dso__sort_by_name(struct dso *dso) return symbols__sort_by_name(&dso->symbol_names, &dso->symbols); } +/* + * While we find nice hex chars, build a long_val. + * Return number of chars processed. + */ +static int hex2u64(const char *ptr, u64 *long_val) +{ + char *p; + + *long_val = strtoull(ptr, &p, 16); + + return p - ptr; +} + + int modules__parse(const char *filename, void *arg, int (*process_module)(void *arg, const char *name, u64 start, u64 size)) @@ -1209,6 +1224,7 @@ int maps__merge_in(struct maps *kmaps, struct map *new_map) m->end = old_map->start; list_add_tail(&m->node, &merged); + new_map->pgoff += old_map->end - new_map->start; new_map->start = old_map->end; } } else { @@ -1229,6 +1245,7 @@ int maps__merge_in(struct maps *kmaps, struct map *new_map) * |new......| -> |new...| * |old....| -> |old....| */ + new_map->pgoff += old_map->end - new_map->start; new_map->start = old_map->end; } } @@ -1515,6 +1532,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: case DSO_BINARY_TYPE__FEDORA_DEBUGINFO: case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: + case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: return !kmod && dso->kernel == DSO_TYPE_USER; @@ -1544,6 +1562,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, return true; case DSO_BINARY_TYPE__BPF_PROG_INFO: + case DSO_BINARY_TYPE__BPF_IMAGE: case DSO_BINARY_TYPE__NOT_FOUND: default: return false; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 93fc43db1be3..ff4f4c47e148 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -55,7 +55,7 @@ struct symbol { u8 inlined:1; u8 arch_sym; bool annotate2; - char name[0]; + char name[]; }; void symbol__delete(struct symbol *sym); diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index a661b122d9d8..89b390623b63 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -37,6 +37,7 @@ #include <string.h> #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ #include <api/fs/fs.h> +#include <api/io.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> @@ -71,7 +72,6 @@ int perf_tool__process_synth_event(struct perf_tool *tool, static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, pid_t *tgid, pid_t *ppid) { - char filename[PATH_MAX]; char bf[4096]; int fd; size_t size = 0; @@ -81,11 +81,11 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, *tgid = -1; *ppid = -1; - snprintf(filename, sizeof(filename), "/proc/%d/status", pid); + snprintf(bf, sizeof(bf), "/proc/%d/status", pid); - fd = open(filename, O_RDONLY); + fd = open(bf, O_RDONLY); if (fd < 0) { - pr_debug("couldn't open %s\n", filename); + pr_debug("couldn't open %s\n", bf); return -1; } @@ -274,6 +274,79 @@ static int perf_event__synthesize_fork(struct perf_tool *tool, return 0; } +static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end, + u32 *prot, u32 *flags, __u64 *offset, + u32 *maj, u32 *min, + __u64 *inode, + ssize_t pathname_size, char *pathname) +{ + __u64 temp; + int ch; + char *start_pathname = pathname; + + if (io__get_hex(io, start) != '-') + return false; + if (io__get_hex(io, end) != ' ') + return false; + + /* map protection and flags bits */ + *prot = 0; + ch = io__get_char(io); + if (ch == 'r') + *prot |= PROT_READ; + else if (ch != '-') + return false; + ch = io__get_char(io); + if (ch == 'w') + *prot |= PROT_WRITE; + else if (ch != '-') + return false; + ch = io__get_char(io); + if (ch == 'x') + *prot |= PROT_EXEC; + else if (ch != '-') + return false; + ch = io__get_char(io); + if (ch == 's') + *flags = MAP_SHARED; + else if (ch == 'p') + *flags = MAP_PRIVATE; + else + return false; + if (io__get_char(io) != ' ') + return false; + + if (io__get_hex(io, offset) != ' ') + return false; + + if (io__get_hex(io, &temp) != ':') + return false; + *maj = temp; + if (io__get_hex(io, &temp) != ' ') + return false; + *min = temp; + + ch = io__get_dec(io, inode); + if (ch != ' ') { + *pathname = '\0'; + return ch == '\n'; + } + do { + ch = io__get_char(io); + } while (ch == ' '); + while (true) { + if (ch < 0) + return false; + if (ch == '\0' || ch == '\n' || + (pathname + 1 - start_pathname) >= pathname_size) { + *pathname = '\0'; + return true; + } + *pathname++ = ch; + ch = io__get_char(io); + } +} + int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, @@ -281,9 +354,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, struct machine *machine, bool mmap_data) { - char filename[PATH_MAX]; - FILE *fp; unsigned long long t; + char bf[BUFSIZ]; + struct io io; bool truncation = false; unsigned long long timeout = proc_map_timeout * 1000000ULL; int rc = 0; @@ -293,59 +366,52 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, if (machine__is_default_guest(machine)) return 0; - snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps", - machine->root_dir, pid, pid); + snprintf(bf, sizeof(bf), "%s/proc/%d/task/%d/maps", + machine->root_dir, pid, pid); - fp = fopen(filename, "r"); - if (fp == NULL) { + io.fd = open(bf, O_RDONLY, 0); + if (io.fd < 0) { /* * We raced with a task exiting - just return: */ - pr_debug("couldn't open %s\n", filename); + pr_debug("couldn't open %s\n", bf); return -1; } + io__init(&io, io.fd, bf, sizeof(bf)); event->header.type = PERF_RECORD_MMAP2; t = rdclock(); - while (1) { - char bf[BUFSIZ]; - char prot[5]; - char execname[PATH_MAX]; - char anonstr[] = "//anon"; - unsigned int ino; + while (!io.eof) { + static const char anonstr[] = "//anon"; size_t size; - ssize_t n; - if (fgets(bf, sizeof(bf), fp) == NULL) - break; + /* ensure null termination since stack will be reused. */ + event->mmap2.filename[0] = '\0'; + + /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ + if (!read_proc_maps_line(&io, + &event->mmap2.start, + &event->mmap2.len, + &event->mmap2.prot, + &event->mmap2.flags, + &event->mmap2.pgoff, + &event->mmap2.maj, + &event->mmap2.min, + &event->mmap2.ino, + sizeof(event->mmap2.filename), + event->mmap2.filename)) + continue; if ((rdclock() - t) > timeout) { - pr_warning("Reading %s time out. " + pr_warning("Reading %s/proc/%d/task/%d/maps time out. " "You may want to increase " "the time limit by --proc-map-timeout\n", - filename); + machine->root_dir, pid, pid); truncation = true; goto out; } - /* ensure null termination since stack will be reused. */ - strcpy(execname, ""); - - /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = sscanf(bf, "%"PRI_lx64"-%"PRI_lx64" %s %"PRI_lx64" %x:%x %u %[^\n]\n", - &event->mmap2.start, &event->mmap2.len, prot, - &event->mmap2.pgoff, &event->mmap2.maj, - &event->mmap2.min, - &ino, execname); - - /* - * Anon maps don't have the execname. - */ - if (n < 7) - continue; - - event->mmap2.ino = (u64)ino; event->mmap2.ino_generation = 0; /* @@ -356,23 +422,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, else event->header.misc = PERF_RECORD_MISC_GUEST_USER; - /* map protection and flags bits */ - event->mmap2.prot = 0; - event->mmap2.flags = 0; - if (prot[0] == 'r') - event->mmap2.prot |= PROT_READ; - if (prot[1] == 'w') - event->mmap2.prot |= PROT_WRITE; - if (prot[2] == 'x') - event->mmap2.prot |= PROT_EXEC; - - if (prot[3] == 's') - event->mmap2.flags |= MAP_SHARED; - else - event->mmap2.flags |= MAP_PRIVATE; - - if (prot[2] != 'x') { - if (!mmap_data || prot[0] != 'r') + if ((event->mmap2.prot & PROT_EXEC) == 0) { + if (!mmap_data || (event->mmap2.prot & PROT_READ) == 0) continue; event->header.misc |= PERF_RECORD_MISC_MMAP_DATA; @@ -382,17 +433,17 @@ out: if (truncation) event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT; - if (!strcmp(execname, "")) - strcpy(execname, anonstr); + if (!strcmp(event->mmap2.filename, "")) + strcpy(event->mmap2.filename, anonstr); if (hugetlbfs_mnt_len && - !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) { - strcpy(execname, anonstr); + !strncmp(event->mmap2.filename, hugetlbfs_mnt, + hugetlbfs_mnt_len)) { + strcpy(event->mmap2.filename, anonstr); event->mmap2.flags |= MAP_HUGETLB; } - size = strlen(execname) + 1; - memcpy(event->mmap2.filename, execname, size); + size = strlen(event->mmap2.filename) + 1; size = PERF_ALIGN(size, sizeof(u64)); event->mmap2.len -= event->mmap.start; event->mmap2.header.size = (sizeof(event->mmap2) - @@ -411,7 +462,7 @@ out: break; } - fclose(fp); + close(io.fd); return rc; } @@ -1130,7 +1181,7 @@ void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, struct perf synthesize_mask((struct perf_record_record_cpu_map *)data->data, map, max); default: break; - }; + } } static struct perf_record_cpu_map *cpu_map_event__new(struct perf_cpu_map *map) diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 820fceeb19a9..03bd99d3be16 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -8,9 +8,9 @@ #include "syscalltbl.h" #include <stdlib.h> #include <linux/compiler.h> +#include <linux/zalloc.h> #ifdef HAVE_SYSCALL_TABLE_SUPPORT -#include <linux/zalloc.h> #include <string.h> #include "string2.h" @@ -142,7 +142,7 @@ int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_g struct syscalltbl *syscalltbl__new(void) { - struct syscalltbl *tbl = malloc(sizeof(*tbl)); + struct syscalltbl *tbl = zalloc(sizeof(*tbl)); if (tbl) tbl->audit_machine = audit_detect_machine(); return tbl; diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h index 9172613028d0..a41d2ca9e4ae 100644 --- a/tools/perf/util/syscalltbl.h +++ b/tools/perf/util/syscalltbl.h @@ -3,14 +3,12 @@ #define __PERF_SYSCALLTBL_H struct syscalltbl { - union { - int audit_machine; - struct { - int max_id; - int nr_entries; - void *entries; - } syscalls; - }; + int audit_machine; + struct { + int max_id; + int nr_entries; + void *entries; + } syscalls; }; struct syscalltbl *syscalltbl__new(void); diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 0885967d5bc3..1b992bbba4e8 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -80,6 +80,10 @@ struct thread_stack_entry { * @comm: current comm * @arr_sz: size of array if this is the first element of an array * @rstate: used to detect retpolines + * @br_stack_rb: branch stack (ring buffer) + * @br_stack_sz: maximum branch stack size + * @br_stack_pos: current position in @br_stack_rb + * @mispred_all: mark all branches as mispredicted */ struct thread_stack { struct thread_stack_entry *stack; @@ -95,6 +99,10 @@ struct thread_stack { struct comm *comm; unsigned int arr_sz; enum retpoline_state_t rstate; + struct branch_stack *br_stack_rb; + unsigned int br_stack_sz; + unsigned int br_stack_pos; + bool mispred_all; }; /* @@ -126,13 +134,26 @@ static int thread_stack__grow(struct thread_stack *ts) } static int thread_stack__init(struct thread_stack *ts, struct thread *thread, - struct call_return_processor *crp) + struct call_return_processor *crp, + bool callstack, unsigned int br_stack_sz) { int err; - err = thread_stack__grow(ts); - if (err) - return err; + if (callstack) { + err = thread_stack__grow(ts); + if (err) + return err; + } + + if (br_stack_sz) { + size_t sz = sizeof(struct branch_stack); + + sz += br_stack_sz * sizeof(struct branch_entry); + ts->br_stack_rb = zalloc(sz); + if (!ts->br_stack_rb) + return -ENOMEM; + ts->br_stack_sz = br_stack_sz; + } if (thread->maps && thread->maps->machine) { struct machine *machine = thread->maps->machine; @@ -150,7 +171,9 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread, } static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, - struct call_return_processor *crp) + struct call_return_processor *crp, + bool callstack, + unsigned int br_stack_sz) { struct thread_stack *ts = thread->ts, *new_ts; unsigned int old_sz = ts ? ts->arr_sz : 0; @@ -176,7 +199,7 @@ static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, ts += cpu; if (!ts->stack && - thread_stack__init(ts, thread, crp)) + thread_stack__init(ts, thread, crp, callstack, br_stack_sz)) return NULL; return ts; @@ -319,6 +342,9 @@ static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) if (!crp) { ts->cnt = 0; + ts->br_stack_pos = 0; + if (ts->br_stack_rb) + ts->br_stack_rb->nr = 0; return 0; } @@ -353,8 +379,33 @@ int thread_stack__flush(struct thread *thread) return err; } +static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags, + u64 from_ip, u64 to_ip) +{ + struct branch_stack *bs = ts->br_stack_rb; + struct branch_entry *be; + + if (!ts->br_stack_pos) + ts->br_stack_pos = ts->br_stack_sz; + + ts->br_stack_pos -= 1; + + be = &bs->entries[ts->br_stack_pos]; + be->from = from_ip; + be->to = to_ip; + be->flags.value = 0; + be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT); + be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX); + /* No support for mispredict */ + be->flags.mispred = ts->mispred_all; + + if (bs->nr < ts->br_stack_sz) + bs->nr += 1; +} + int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, - u64 to_ip, u16 insn_len, u64 trace_nr) + u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack, + unsigned int br_stack_sz, bool mispred_all) { struct thread_stack *ts = thread__stack(thread, cpu); @@ -362,12 +413,13 @@ int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, return -EINVAL; if (!ts) { - ts = thread_stack__new(thread, cpu, NULL); + ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz); if (!ts) { pr_warning("Out of memory: no thread stack\n"); return -ENOMEM; } ts->trace_nr = trace_nr; + ts->mispred_all = mispred_all; } /* @@ -381,8 +433,14 @@ int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, ts->trace_nr = trace_nr; } - /* Stop here if thread_stack__process() is in use */ - if (ts->crp) + if (br_stack_sz) + thread_stack__update_br_stack(ts, flags, from_ip, to_ip); + + /* + * Stop here if thread_stack__process() is in use, or not recording call + * stack. + */ + if (ts->crp || !callstack) return 0; if (flags & PERF_IP_FLAG_CALL) { @@ -430,6 +488,7 @@ static void __thread_stack__free(struct thread *thread, struct thread_stack *ts) { __thread_stack__flush(thread, ts); zfree(&ts->stack); + zfree(&ts->br_stack_rb); } static void thread_stack__reset(struct thread *thread, struct thread_stack *ts) @@ -497,6 +556,199 @@ void thread_stack__sample(struct thread *thread, int cpu, chain->nr = i; } +/* + * Hardware sample records, created some time after the event occurred, need to + * have subsequent addresses removed from the call chain. + */ +void thread_stack__sample_late(struct thread *thread, int cpu, + struct ip_callchain *chain, size_t sz, + u64 sample_ip, u64 kernel_start) +{ + struct thread_stack *ts = thread__stack(thread, cpu); + u64 sample_context = callchain_context(sample_ip, kernel_start); + u64 last_context, context, ip; + size_t nr = 0, j; + + if (sz < 2) { + chain->nr = 0; + return; + } + + if (!ts) + goto out; + + /* + * When tracing kernel space, kernel addresses occur at the top of the + * call chain after the event occurred but before tracing stopped. + * Skip them. + */ + for (j = 1; j <= ts->cnt; j++) { + ip = ts->stack[ts->cnt - j].ret_addr; + context = callchain_context(ip, kernel_start); + if (context == PERF_CONTEXT_USER || + (context == sample_context && ip == sample_ip)) + break; + } + + last_context = sample_ip; /* Use sample_ip as an invalid context */ + + for (; nr < sz && j <= ts->cnt; nr++, j++) { + ip = ts->stack[ts->cnt - j].ret_addr; + context = callchain_context(ip, kernel_start); + if (context != last_context) { + if (nr >= sz - 1) + break; + chain->ips[nr++] = context; + last_context = context; + } + chain->ips[nr] = ip; + } +out: + if (nr) { + chain->nr = nr; + } else { + chain->ips[0] = sample_context; + chain->ips[1] = sample_ip; + chain->nr = 2; + } +} + +void thread_stack__br_sample(struct thread *thread, int cpu, + struct branch_stack *dst, unsigned int sz) +{ + struct thread_stack *ts = thread__stack(thread, cpu); + const size_t bsz = sizeof(struct branch_entry); + struct branch_stack *src; + struct branch_entry *be; + unsigned int nr; + + dst->nr = 0; + + if (!ts) + return; + + src = ts->br_stack_rb; + if (!src->nr) + return; + + dst->nr = min((unsigned int)src->nr, sz); + + be = &dst->entries[0]; + nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr); + memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr); + + if (src->nr >= ts->br_stack_sz) { + sz -= nr; + be = &dst->entries[nr]; + nr = min(ts->br_stack_pos, sz); + memcpy(be, &src->entries[0], bsz * ts->br_stack_pos); + } +} + +/* Start of user space branch entries */ +static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start) +{ + if (!*start) + *start = be->to && be->to < kernel_start; + + return *start; +} + +/* + * Start of branch entries after the ip fell in between 2 branches, or user + * space branch entries. + */ +static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start, + bool *start, struct branch_entry *nb) +{ + if (!*start) { + *start = (nb && sample_ip >= be->to && sample_ip <= nb->from) || + be->from < kernel_start || + (be->to && be->to < kernel_start); + } + + return *start; +} + +/* + * Hardware sample records, created some time after the event occurred, need to + * have subsequent addresses removed from the branch stack. + */ +void thread_stack__br_sample_late(struct thread *thread, int cpu, + struct branch_stack *dst, unsigned int sz, + u64 ip, u64 kernel_start) +{ + struct thread_stack *ts = thread__stack(thread, cpu); + struct branch_entry *d, *s, *spos, *ssz; + struct branch_stack *src; + unsigned int nr = 0; + bool start = false; + + dst->nr = 0; + + if (!ts) + return; + + src = ts->br_stack_rb; + if (!src->nr) + return; + + spos = &src->entries[ts->br_stack_pos]; + ssz = &src->entries[ts->br_stack_sz]; + + d = &dst->entries[0]; + s = spos; + + if (ip < kernel_start) { + /* + * User space sample: start copying branch entries when the + * branch is in user space. + */ + for (s = spos; s < ssz && nr < sz; s++) { + if (us_start(s, kernel_start, &start)) { + *d++ = *s; + nr += 1; + } + } + + if (src->nr >= ts->br_stack_sz) { + for (s = &src->entries[0]; s < spos && nr < sz; s++) { + if (us_start(s, kernel_start, &start)) { + *d++ = *s; + nr += 1; + } + } + } + } else { + struct branch_entry *nb = NULL; + + /* + * Kernel space sample: start copying branch entries when the ip + * falls in between 2 branches (or the branch is in user space + * because then the start must have been missed). + */ + for (s = spos; s < ssz && nr < sz; s++) { + if (ks_start(s, ip, kernel_start, &start, nb)) { + *d++ = *s; + nr += 1; + } + nb = s; + } + + if (src->nr >= ts->br_stack_sz) { + for (s = &src->entries[0]; s < spos && nr < sz; s++) { + if (ks_start(s, ip, kernel_start, &start, nb)) { + *d++ = *s; + nr += 1; + } + nb = s; + } + } + } + + dst->nr = nr; +} + struct call_return_processor * call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data), void *data) @@ -864,7 +1116,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, } if (!ts) { - ts = thread_stack__new(thread, sample->cpu, crp); + ts = thread_stack__new(thread, sample->cpu, crp, true, 0); if (!ts) return -ENOMEM; ts->comm = comm; diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index e1ec5a58f1b2..3bc47a42af8e 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -81,10 +81,19 @@ struct call_return_processor { }; int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, - u64 to_ip, u16 insn_len, u64 trace_nr); + u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack, + unsigned int br_stack_sz, bool mispred_all); void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr); void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain, size_t sz, u64 ip, u64 kernel_start); +void thread_stack__sample_late(struct thread *thread, int cpu, + struct ip_callchain *chain, size_t sz, u64 ip, + u64 kernel_start); +void thread_stack__br_sample(struct thread *thread, int cpu, + struct branch_stack *dst, unsigned int sz); +void thread_stack__br_sample_late(struct thread *thread, int cpu, + struct branch_stack *dst, unsigned int sz, + u64 sample_ip, u64 kernel_start); int thread_stack__flush(struct thread *thread); void thread_stack__free(struct thread *thread); size_t thread_stack__depth(struct thread *thread, int cpu); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 28b719388028..665e5c0618ed 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -47,6 +47,7 @@ struct thread *thread__new(pid_t pid, pid_t tid) thread->tid = tid; thread->ppid = -1; thread->cpu = -1; + thread->lbr_stitch_enable = false; INIT_LIST_HEAD(&thread->namespaces_list); INIT_LIST_HEAD(&thread->comm_list); init_rwsem(&thread->namespaces_lock); @@ -110,6 +111,7 @@ void thread__delete(struct thread *thread) exit_rwsem(&thread->namespaces_lock); exit_rwsem(&thread->comm_lock); + thread__free_stitch_list(thread); free(thread); } @@ -452,3 +454,25 @@ int thread__memcpy(struct thread *thread, struct machine *machine, return dso__data_read_offset(al.map->dso, machine, offset, buf, len); } + +void thread__free_stitch_list(struct thread *thread) +{ + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + struct stitch_list *pos, *tmp; + + if (!lbr_stitch) + return; + + list_for_each_entry_safe(pos, tmp, &lbr_stitch->lists, node) { + list_del_init(&pos->node); + free(pos); + } + + list_for_each_entry_safe(pos, tmp, &lbr_stitch->free_lists, node) { + list_del_init(&pos->node); + free(pos); + } + + zfree(&lbr_stitch->prev_lbr_cursor); + zfree(&thread->lbr_stitch); +} diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 20b96b5d1f15..b066fb30d203 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -13,6 +13,8 @@ #include <strlist.h> #include <intlist.h> #include "rwsem.h" +#include "event.h" +#include "callchain.h" struct addr_location; struct map; @@ -20,6 +22,13 @@ struct perf_record_namespaces; struct thread_stack; struct unwind_libunwind_ops; +struct lbr_stitch { + struct list_head lists; + struct list_head free_lists; + struct perf_sample prev_sample; + struct callchain_cursor_node *prev_lbr_cursor; +}; + struct thread { union { struct rb_node rb_node; @@ -46,6 +55,10 @@ struct thread { struct srccode_state srccode_state; bool filter; int filter_entry_depth; + + /* LBR call stack stitch */ + bool lbr_stitch_enable; + struct lbr_stitch *lbr_stitch; }; struct machine; @@ -142,4 +155,6 @@ static inline bool thread__is_filtered(struct thread *thread) return false; } +void thread__free_stitch_list(struct thread *thread); + #endif /* __PERF_THREAD_H */ diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 3dce2de9d005..27945eeb0cb5 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -77,7 +77,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) opts->freq ? "Hz" : ""); } - ret += SNPRINTF(bf + ret, size - ret, "%s", perf_evsel__name(top->sym_evsel)); + ret += SNPRINTF(bf + ret, size - ret, "%s", evsel__name(top->sym_evsel)); ret += SNPRINTF(bf + ret, size - ret, "], "); diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index f117d4f4821e..ff8391208ecd 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -18,7 +18,7 @@ struct perf_session; struct perf_top { struct perf_tool tool; - struct evlist *evlist; + struct evlist *evlist, *sb_evlist; struct record_opts record_opts; struct annotation_options annotation_opts; struct evswitch evswitch; @@ -36,6 +36,7 @@ struct perf_top { bool use_tui, use_stdio; bool vmlinux_warned; bool dump_symtab; + bool stitch_lbr; struct hist_entry *sym_filter_entry; struct evsel *sym_evsel; struct perf_session *session; diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 086e98ff42a3..0e5c4786f296 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -428,7 +428,7 @@ try_id: if (!ppath->next) { error: pr_debug("No memory to alloc tracepoints list\n"); - put_tracepoints_path(&path); + put_tracepoints_path(path.next); return NULL; } next: diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 8593d3c200c6..f507dff713c9 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -75,7 +75,7 @@ static void skip(int size) r = size > BUFSIZ ? BUFSIZ : size; do_read(buf, r); size -= r; - }; + } } static unsigned int read4(struct tep_handle *pevent) diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index b4649f5a0c2f..9aededc0bc06 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -243,7 +243,7 @@ struct eh_frame_hdr { * encoded_t fde_addr; * } binary_search_table[fde_count]; */ - char data[0]; + char data[]; } __packed; static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index d707c9624dd9..37a9492edb3e 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -290,6 +290,7 @@ int perf_event_paranoid(void) bool perf_event_paranoid_check(int max_level) { return perf_cap__capable(CAP_SYS_ADMIN) || + perf_cap__capable(CAP_PERFMON) || perf_event_paranoid() <= max_level; } |