aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2019-10-15 07:19:55 +0200
committerIngo Molnar <mingo@kernel.org>2019-10-15 07:19:55 +0200
commit39b656ee9f2ce41eb969c86525f9a2a63fefac5b (patch)
tree4d5013e83a8ae89f5b70ca176f6b2d8ae973c96a /tools/perf
parentLinux 5.4-rc3 (diff)
parentperf diff: Report noisy for cycles diff (diff)
downloadlinux-dev-39b656ee9f2ce41eb969c86525f9a2a63fefac5b.tar.xz
linux-dev-39b656ee9f2ce41eb969c86525f9a2a63fefac5b.zip
Merge tag 'perf-core-for-mingo-5.5-20191011' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: perf trace: Arnaldo Carvalho de Melo: - Reuse the strace-like syscall_arg_fmt->scnprintf() beautification routines (convert integer arguments into strings, like open flags, etc) in tracepoint arguments. For now the type based scnprintf routines (pid_t, umode_t, etc) and the ones based in well known arg name based ("fd", etc) gets associated with tracepoint args of that type. A tracepoint only arg, "msr", for the msr:{write,read}_msr gets added as an initial step. - Introduce syscall_arg_fmt->strtoul() methods to be the reverse operation of ->scnprintf(), i.e. to go from a string to an integer. - Implement --filter, just like in 'perf record', that affects the tracepoint events specied thus far in the command line, use the ->strtoul() methods to allow strings in tables associated with beautifiers to the integers the in-kernel tracepoint (eBPF later) filters expect, e.g.: # perf trace --max-events 1 -e sched:*ipi --filter="cpu==1 || cpu==2" 0.000 as/24630 sched:sched_wake_idle_without_ipi(cpu: 1) # # perf trace --max-events 1 --max-stack=32 -e msr:* --filter="msr==IA32_TSC_DEADLINE" 207.000 cc1/19963 msr:write_msr(msr: IA32_TSC_DEADLINE, val: 5442316760822) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) lapic_next_deadline ([kernel.kallsyms]) clockevents_program_event ([kernel.kallsyms]) hrtimer_interrupt ([kernel.kallsyms]) smp_apic_timer_interrupt ([kernel.kallsyms]) apic_timer_interrupt ([kernel.kallsyms]) [0x6ff66c] (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1) [0x7047c3] (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1) [0x707708] (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1) execute_one_pass (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1) [0x4f3d37] (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1) [0x4f3d49] (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1) execute_pass_list (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1) cgraph_node::expand (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1) [0x2625b4] (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1) symbol_table::finalize_compilation_unit (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1) [0x5ae8b9] (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1) toplev::main (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1) main (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1) [0x26b6a] (/usr/lib/x86_64-linux-gnu/libc-2.29.so) # # perf trace --max-events 8 -e msr:* --filter="msr==IA32_SPEC_CTRL" 0.000 :13281/13281 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6) 0.063 migration/3/25 msr:write_msr(msr: IA32_SPEC_CTRL) 0.217 kworker/u16:1-/4826 msr:write_msr(msr: IA32_SPEC_CTRL) 0.687 rcu_sched/11 msr:write_msr(msr: IA32_SPEC_CTRL) 0.696 :13280/13280 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6) 0.305 :13281/13281 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6) 0.355 :13274/13274 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6) 2.743 kworker/u16:0-/6711 msr:write_msr(msr: IA32_SPEC_CTRL) # # perf trace --max-events 8 --cpu 1 -e msr:* --filter="msr!=IA32_SPEC_CTRL && msr!=IA32_TSC_DEADLINE && msr != FS_BASE" 0.000 mtr-packet/30819 msr:write_msr(msr: 0x830, val: 68719479037) 0.096 :0/0 msr:read_msr(msr: IA32_TSC_ADJUST) 238.925 mtr-packet/30819 msr:write_msr(msr: 0x830, val: 8589936893) 511.010 :0/0 msr:write_msr(msr: 0x830, val: 68719479037) 1005.052 :0/0 msr:read_msr(msr: IA32_TSC_ADJUST) 1235.131 CPU 0/KVM/3750 msr:write_msr(msr: 0x830, val: 4294969595) 1235.195 CPU 0/KVM/3750 msr:read_msr(msr: IA32_SYSENTER_ESP, val: -2199023037952) 1235.201 CPU 0/KVM/3750 msr:read_msr(msr: IA32_APICBASE, val: 4276096000) # - Default to not using libtraceevent and its plugins for beautifying tracepoint arguments, since now we're reusing the strace-like beatufiers. Use --libtraceevent_print (using just --libtrace is unambiguous and can be used as a short hand) to go back to those beautifiers. This will help in the transition, as can be seen in some of the sched tracepoints that still need some work in the libbeauty based mode: # trace --no-inherit -e msr:*,*sleep,sched:* sleep 1 0.000 ( ): sched:sched_waking(comm: "trace", pid: 3319 (trace), prio: 120, success: 1) 0.006 ( ): sched:sched_wakeup(comm: "trace", pid: 3319 (trace), prio: 120, success: 1) 0.348 ( ): sched:sched_process_exec(filename: 140212596720100, pid: 3319 (sleep), old_pid: 3319 (sleep)) 0.490 ( ): msr:write_msr(msr: FS_BASE, val: 139631189321088) 0.670 ( ): nanosleep(rqtp: 0x7ffc52c23bc0) ... 0.674 ( ): sched:sched_stat_runtime(comm: "sleep", pid: 3319 (sleep), runtime: 659259, vruntime: 78942418342) 0.675 ( ): sched:sched_switch(prev_comm: "sleep", prev_pid: 3319 (sleep), prev_prio: 120, prev_state: 1, next_comm: "swapper/0", next_prio: 120) 1001.059 ( ): sched:sched_waking(comm: "sleep", pid: 3319 (sleep), prio: 120, success: 1) 1001.098 ( ): sched:sched_wakeup(comm: "sleep", pid: 3319 (sleep), prio: 120, success: 1) 0.670 (1000.504 ms): ... [continued]: nanosleep()) = 0 1001.456 ( ): sched:sched_process_exit(comm: "sleep", pid: 3319 (sleep), prio: 120) # trace --libtrace --no-inherit -e msr:*,*sleep,sched:* sleep 1 # trace --libtrace --no-inherit -e msr:*,*sleep,sched:* sleep 1 0.000 ( ): sched:sched_waking(comm=trace pid=3323 prio=120 target_cpu=000) 0.007 ( ): sched:sched_wakeup(comm=trace pid=3323 prio=120 target_cpu=000) 0.382 ( ): sched:sched_process_exec(filename=/usr/bin/sleep pid=3323 old_pid=3323) 0.525 ( ): msr:write_msr(c0000100, value 7f5d508a0580) 0.713 ( ): nanosleep(rqtp: 0x7fff487fb4a0) ... 0.717 ( ): sched:sched_stat_runtime(comm=sleep pid=3323 runtime=617722 [ns] vruntime=78957731636 [ns]) 0.719 ( ): sched:sched_switch(prev_comm=sleep prev_pid=3323 prev_prio=120 prev_state=S ==> next_comm=swapper/0 next_pid=0 next_prio=120) 1001.117 ( ): sched:sched_waking(comm=sleep pid=3323 prio=120 target_cpu=000) 1001.157 ( ): sched:sched_wakeup(comm=sleep pid=3323 prio=120 target_cpu=000) 0.713 (1000.522 ms): ... [continued]: nanosleep()) = 0 1001.538 ( ): sched:sched_process_exit(comm=sleep pid=3323 prio=120) # - Make -v (verbose) mode be honoured for .perfconfig based trace.add_events, to help in diagnosing problems with building eBPF events (-e source.c). - When using eBPF syscall payload augmentation do not show strace-like syscalls when all the user specified was some tracepoint event, bringing the behaviour in line with that of when not using eBPF augmentation. Intel PT: exported-sql-viewer GUI: Adrian Hunter: - Add LookupModel, HBoxLayout, VBoxLayout, global time range calculations so as to add a time chart by CPU. perf script: Andi Kleen: - Allow --time (to specify a time span of interest) with --reltime perf diff: Jin Yao: - Report noise for cycles diff, i.e. a histogram + stddev. (timestamps relative to start). perf annotate: Arnaldo Carvalho de Melo: - Initialize env->cpuid when running in live mode (perf top), as it is used in some of the per arch annotation init routines. samples bpf: Björn Töpel: - Fixup fallout of using tools/perf/perf-sys. from outside tools/perf. Core: Ian Rogers: - Avoid 'sample_reg_masks' being const + weak, as this breaks with some compilers that constant-propagate from the weak symbol. libperf: - First part of moving the perf_mmap class from tools/perf to libperf. - Propagate CFLAGS to libperf from the tools/perf Makefile. Vendor events: John Garry: - Add entry in MAINTAINERS with reviewers for the for perf tool arm64 pmu-events files. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Documentation/perf-config.txt5
-rw-r--r--tools/perf/Documentation/perf-diff.txt5
-rw-r--r--tools/perf/Documentation/perf-trace.txt10
-rw-r--r--tools/perf/Makefile.config28
-rw-r--r--tools/perf/Makefile.perf11
-rw-r--r--tools/perf/arch/arm/util/Build2
-rw-r--r--tools/perf/arch/arm/util/perf_regs.c6
-rw-r--r--tools/perf/arch/arm64/util/Build1
-rw-r--r--tools/perf/arch/arm64/util/perf_regs.c6
-rw-r--r--tools/perf/arch/csky/util/Build2
-rw-r--r--tools/perf/arch/csky/util/perf_regs.c6
-rw-r--r--tools/perf/arch/riscv/util/Build2
-rw-r--r--tools/perf/arch/riscv/util/perf_regs.c6
-rw-r--r--tools/perf/arch/s390/util/Build1
-rw-r--r--tools/perf/arch/s390/util/perf_regs.c6
-rw-r--r--tools/perf/arch/x86/tests/perf-time-to-tsc.c9
-rw-r--r--tools/perf/builtin-diff.c143
-rw-r--r--tools/perf/builtin-kvm.c11
-rw-r--r--tools/perf/builtin-record.c10
-rw-r--r--tools/perf/builtin-script.c5
-rw-r--r--tools/perf/builtin-top.c20
-rw-r--r--tools/perf/builtin-trace.c593
-rwxr-xr-xtools/perf/check-headers.sh1
-rw-r--r--tools/perf/lib/Build1
-rw-r--r--tools/perf/lib/Makefile5
-rw-r--r--tools/perf/lib/core.c3
-rw-r--r--tools/perf/lib/evlist.c324
-rw-r--r--tools/perf/lib/include/internal/evlist.h40
-rw-r--r--tools/perf/lib/include/internal/mmap.h44
-rw-r--r--tools/perf/lib/include/perf/core.h2
-rw-r--r--tools/perf/lib/include/perf/evlist.h5
-rw-r--r--tools/perf/lib/include/perf/mmap.h15
-rw-r--r--tools/perf/lib/internal.h2
-rw-r--r--tools/perf/lib/libperf.map7
-rw-r--r--tools/perf/lib/mmap.c273
-rw-r--r--tools/perf/perf-sys.h6
-rwxr-xr-xtools/perf/scripts/python/exported-sql-viewer.py1555
-rw-r--r--tools/perf/tests/backward-ring-buffer.c7
-rw-r--r--tools/perf/tests/bpf.c7
-rw-r--r--tools/perf/tests/code-reading.c9
-rw-r--r--tools/perf/tests/keep-tracking.c9
-rw-r--r--tools/perf/tests/mmap-basic.c9
-rw-r--r--tools/perf/tests/openat-syscall-tp-fields.c9
-rw-r--r--tools/perf/tests/perf-record.c9
-rw-r--r--tools/perf/tests/sw-clock.c9
-rw-r--r--tools/perf/tests/switch-tracking.c9
-rw-r--r--tools/perf/tests/task-exit.c9
-rw-r--r--tools/perf/trace/beauty/Build1
-rw-r--r--tools/perf/trace/beauty/beauty.h16
-rw-r--r--tools/perf/trace/beauty/tracepoints/Build1
-rw-r--r--tools/perf/trace/beauty/tracepoints/x86_msr.c39
-rwxr-xr-xtools/perf/trace/beauty/tracepoints/x86_msr.sh40
-rw-r--r--tools/perf/util/Build1
-rw-r--r--tools/perf/util/annotate.c4
-rw-r--r--tools/perf/util/annotate.h2
-rw-r--r--tools/perf/util/env.c16
-rw-r--r--tools/perf/util/env.h1
-rw-r--r--tools/perf/util/evlist.c322
-rw-r--r--tools/perf/util/evlist.h12
-rw-r--r--tools/perf/util/mmap.c260
-rw-r--r--tools/perf/util/mmap.h28
-rw-r--r--tools/perf/util/parse-regs-options.c8
-rw-r--r--tools/perf/util/perf_regs.c4
-rw-r--r--tools/perf/util/perf_regs.h4
-rw-r--r--tools/perf/util/python.c7
-rw-r--r--tools/perf/util/session.c29
-rw-r--r--tools/perf/util/session.h6
-rw-r--r--tools/perf/util/sort.h4
-rw-r--r--tools/perf/util/spark.c34
-rw-r--r--tools/perf/util/spark.h8
-rw-r--r--tools/perf/util/symbol.h2
71 files changed, 3401 insertions, 705 deletions
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index c599623a1f3d..c4dd23c4b478 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -561,6 +561,11 @@ trace.*::
trace.show_zeros::
Do not suppress syscall arguments that are equal to zero.
+ trace.tracepoint_beautifiers::
+ Use "libtraceevent" to use that library to augment the tracepoint arguments,
+ "libbeauty", the default, to use the same argument beautifiers used in the
+ strace-like sys_enter+sys_exit lines.
+
llvm.*::
llvm.clang-path::
Path to clang. If omit, search it from $PATH.
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index d5cc15e651cf..f50ca0fef0a4 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -95,6 +95,11 @@ OPTIONS
diff.compute config option. See COMPARISON METHODS section for
more info.
+--cycles-hist::
+ Report a histogram and the standard deviation for cycles data.
+ It can help us to judge if the reported cycles data is noisy or
+ not. This option should be used with '-c cycles'.
+
-p::
--period::
Show period values for both compared hist entries.
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 25b74fdb36fa..3bb89c2e9020 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -42,6 +42,11 @@ OPTIONS
Prefixing with ! shows all syscalls but the ones specified. You may
need to escape it.
+--filter=<filter>::
+ Event filter. This option should follow an event selector (-e) which
+ selects tracepoint event(s).
+
+
-D msecs::
--delay msecs::
After starting the program, wait msecs before measuring. This is useful to
@@ -219,6 +224,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
may happen, for instance, when a thread gets migrated to a different CPU
while processing a syscall.
+--libtraceevent_print::
+ Use libtraceevent to print tracepoint arguments. By default 'perf trace' uses
+ the same beautifiers used in the strace-like enter+exit lines to augment the
+ tracepoint arguments.
+
--map-dump::
Dump BPF maps setup by events passed via -e, for instance the augmented_raw_syscalls
living in tools/perf/examples/bpf/augmented_raw_syscalls.c. For now this
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 46f7fba2306c..063202c53b64 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -188,7 +188,7 @@ endif
# Treat warnings as errors unless directed not to
ifneq ($(WERROR),0)
- CFLAGS += -Werror
+ CORE_CFLAGS += -Werror
CXXFLAGS += -Werror
endif
@@ -198,9 +198,9 @@ endif
ifeq ($(DEBUG),0)
ifeq ($(CC_NO_CLANG), 0)
- CFLAGS += -O3
+ CORE_CFLAGS += -O3
else
- CFLAGS += -O6
+ CORE_CFLAGS += -O6
endif
endif
@@ -245,12 +245,12 @@ FEATURE_CHECK_LDFLAGS-libaio = -lrt
FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
-CFLAGS += -fno-omit-frame-pointer
-CFLAGS += -ggdb3
-CFLAGS += -funwind-tables
-CFLAGS += -Wall
-CFLAGS += -Wextra
-CFLAGS += -std=gnu99
+CORE_CFLAGS += -fno-omit-frame-pointer
+CORE_CFLAGS += -ggdb3
+CORE_CFLAGS += -funwind-tables
+CORE_CFLAGS += -Wall
+CORE_CFLAGS += -Wextra
+CORE_CFLAGS += -std=gnu99
CXXFLAGS += -std=gnu++11 -fno-exceptions -fno-rtti
CXXFLAGS += -Wall
@@ -272,12 +272,12 @@ include $(FEATURES_DUMP)
endif
ifeq ($(feature-stackprotector-all), 1)
- CFLAGS += -fstack-protector-all
+ CORE_CFLAGS += -fstack-protector-all
endif
ifeq ($(DEBUG),0)
ifeq ($(feature-fortify-source), 1)
- CFLAGS += -D_FORTIFY_SOURCE=2
+ CORE_CFLAGS += -D_FORTIFY_SOURCE=2
endif
endif
@@ -301,10 +301,12 @@ INC_FLAGS += -I$(src-perf)/util
INC_FLAGS += -I$(src-perf)
INC_FLAGS += -I$(srctree)/tools/lib/
-CFLAGS += $(INC_FLAGS)
+CORE_CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
+
+CFLAGS += $(CORE_CFLAGS) $(INC_FLAGS)
CXXFLAGS += $(INC_FLAGS)
-CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
+LIBPERF_CFLAGS := $(CORE_CFLAGS) $(EXTRA_CFLAGS)
ifeq ($(feature-sync-compare-and-swap), 1)
CFLAGS += -DHAVE_SYNC_COMPARE_AND_SWAP_SUPPORT
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 902c792f326a..a099a8a89447 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -407,6 +407,7 @@ linux_uapi_dir := $(srctree)/tools/include/uapi/linux
asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/
x86_arch_asm_uapi_dir := $(srctree)/tools/arch/x86/include/uapi/asm/
+x86_arch_asm_dir := $(srctree)/tools/arch/x86/include/asm/
beauty_outdir := $(OUTPUT)trace/beauty/generated
beauty_ioctl_outdir := $(beauty_outdir)/ioctl
@@ -543,6 +544,12 @@ x86_arch_prctl_code_tbl := $(srctree)/tools/perf/trace/beauty/x86_arch_prctl.sh
$(x86_arch_prctl_code_array): $(x86_arch_asm_uapi_dir)/prctl.h $(x86_arch_prctl_code_tbl)
$(Q)$(SHELL) '$(x86_arch_prctl_code_tbl)' $(x86_arch_asm_uapi_dir) > $@
+x86_arch_MSRs_array := $(beauty_outdir)/x86_arch_MSRs_array.c
+x86_arch_MSRs_tbl := $(srctree)/tools/perf/trace/beauty/tracepoints/x86_msr.sh
+
+$(x86_arch_MSRs_array): $(x86_arch_asm_dir)/msr-index.h $(x86_arch_MSRs_tbl)
+ $(Q)$(SHELL) '$(x86_arch_MSRs_tbl)' $(x86_arch_asm_dir) > $@
+
rename_flags_array := $(beauty_outdir)/rename_flags_array.c
rename_flags_tbl := $(srctree)/tools/perf/trace/beauty/rename_flags.sh
@@ -677,6 +684,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
$(perf_ioctl_array) \
$(prctl_option_array) \
$(usbdevfs_ioctl_array) \
+ $(x86_arch_MSRs_array) \
$(x86_arch_prctl_code_array) \
$(rename_flags_array) \
$(arch_errno_name_array) \
@@ -761,7 +769,7 @@ $(LIBBPF)-clean:
$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null
$(LIBPERF): FORCE
- $(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(OUTPUT) $(OUTPUT)libperf.a
+ $(Q)$(MAKE) -C $(LIBPERF_DIR) EXTRA_CFLAGS="$(LIBPERF_CFLAGS)" O=$(OUTPUT) $(OUTPUT)libperf.a
$(LIBPERF)-clean:
$(call QUIET_CLEAN, libperf)
@@ -981,6 +989,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)$(perf_ioctl_array) \
$(OUTPUT)$(prctl_option_array) \
$(OUTPUT)$(usbdevfs_ioctl_array) \
+ $(OUTPUT)$(x86_arch_MSRs_array) \
$(OUTPUT)$(x86_arch_prctl_code_array) \
$(OUTPUT)$(rename_flags_array) \
$(OUTPUT)$(arch_errno_name_array) \
diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build
index 296f0eac5e18..37fc63708966 100644
--- a/tools/perf/arch/arm/util/Build
+++ b/tools/perf/arch/arm/util/Build
@@ -1,3 +1,5 @@
+perf-y += perf_regs.o
+
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
diff --git a/tools/perf/arch/arm/util/perf_regs.c b/tools/perf/arch/arm/util/perf_regs.c
new file mode 100644
index 000000000000..2864e2e3776d
--- /dev/null
+++ b/tools/perf/arch/arm/util/perf_regs.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../util/perf_regs.h"
+
+const struct sample_reg sample_reg_masks[] = {
+ SMPL_REG_END
+};
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index 3cde540d2fcf..0a7782c61209 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,4 +1,5 @@
perf-y += header.o
+perf-y += perf_regs.o
perf-y += sym-handling.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c
new file mode 100644
index 000000000000..2864e2e3776d
--- /dev/null
+++ b/tools/perf/arch/arm64/util/perf_regs.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../util/perf_regs.h"
+
+const struct sample_reg sample_reg_masks[] = {
+ SMPL_REG_END
+};
diff --git a/tools/perf/arch/csky/util/Build b/tools/perf/arch/csky/util/Build
index 1160bb2332ba..7d3050134ae0 100644
--- a/tools/perf/arch/csky/util/Build
+++ b/tools/perf/arch/csky/util/Build
@@ -1,2 +1,4 @@
+perf-y += perf_regs.o
+
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/csky/util/perf_regs.c b/tools/perf/arch/csky/util/perf_regs.c
new file mode 100644
index 000000000000..2864e2e3776d
--- /dev/null
+++ b/tools/perf/arch/csky/util/perf_regs.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../util/perf_regs.h"
+
+const struct sample_reg sample_reg_masks[] = {
+ SMPL_REG_END
+};
diff --git a/tools/perf/arch/riscv/util/Build b/tools/perf/arch/riscv/util/Build
index 1160bb2332ba..7d3050134ae0 100644
--- a/tools/perf/arch/riscv/util/Build
+++ b/tools/perf/arch/riscv/util/Build
@@ -1,2 +1,4 @@
+perf-y += perf_regs.o
+
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/riscv/util/perf_regs.c b/tools/perf/arch/riscv/util/perf_regs.c
new file mode 100644
index 000000000000..2864e2e3776d
--- /dev/null
+++ b/tools/perf/arch/riscv/util/perf_regs.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../util/perf_regs.h"
+
+const struct sample_reg sample_reg_masks[] = {
+ SMPL_REG_END
+};
diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build
index 22797f043b84..3d9d0f4f72ca 100644
--- a/tools/perf/arch/s390/util/Build
+++ b/tools/perf/arch/s390/util/Build
@@ -1,5 +1,6 @@
perf-y += header.o
perf-y += kvm-stat.o
+perf-y += perf_regs.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/s390/util/perf_regs.c b/tools/perf/arch/s390/util/perf_regs.c
new file mode 100644
index 000000000000..2864e2e3776d
--- /dev/null
+++ b/tools/perf/arch/s390/util/perf_regs.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../util/perf_regs.h"
+
+const struct sample_reg sample_reg_masks[] = {
+ SMPL_REG_END
+};
diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index fa947952c16a..909ead08a6f6 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -9,6 +9,7 @@
#include <sys/prctl.h>
#include <perf/cpumap.h>
#include <perf/evlist.h>
+#include <perf/mmap.h>
#include "debug.h"
#include "parse-events.h"
@@ -117,10 +118,10 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
for (i = 0; i < evlist->core.nr_mmaps; i++) {
md = &evlist->mmap[i];
- if (perf_mmap__read_init(md) < 0)
+ if (perf_mmap__read_init(&md->core) < 0)
continue;
- while ((event = perf_mmap__read_event(md)) != NULL) {
+ while ((event = perf_mmap__read_event(&md->core)) != NULL) {
struct perf_sample sample;
if (event->header.type != PERF_RECORD_COMM ||
@@ -139,9 +140,9 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
comm2_time = sample.time;
}
next_event:
- perf_mmap__consume(md);
+ perf_mmap__consume(&md->core);
}
- perf_mmap__read_done(md);
+ perf_mmap__read_done(&md->core);
}
if (!comm1_time || !comm2_time)
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index c37a78677955..5281629c27b1 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -23,6 +23,7 @@
#include "util/time-utils.h"
#include "util/annotate.h"
#include "util/map.h"
+#include "util/spark.h"
#include <linux/err.h>
#include <linux/zalloc.h>
#include <subcmd/pager.h>
@@ -53,6 +54,7 @@ enum {
PERF_HPP_DIFF__FORMULA,
PERF_HPP_DIFF__DELTA_ABS,
PERF_HPP_DIFF__CYCLES,
+ PERF_HPP_DIFF__CYCLES_HIST,
PERF_HPP_DIFF__MAX_INDEX
};
@@ -87,6 +89,7 @@ static bool force;
static bool show_period;
static bool show_formula;
static bool show_baseline_only;
+static bool cycles_hist;
static unsigned int sort_compute = 1;
static s64 compute_wdiff_w1;
@@ -164,6 +167,10 @@ static struct header_column {
[PERF_HPP_DIFF__CYCLES] = {
.name = "[Program Block Range] Cycles Diff",
.width = 70,
+ },
+ [PERF_HPP_DIFF__CYCLES_HIST] = {
+ .name = "stddev/Hist",
+ .width = NUM_SPARKS + 9,
}
};
@@ -610,6 +617,9 @@ static void init_block_info(struct block_info *bi, struct symbol *sym,
bi->cycles_aggr = ch->cycles_aggr;
bi->num = ch->num;
bi->num_aggr = ch->num_aggr;
+
+ memcpy(bi->cycles_spark, ch->cycles_spark,
+ NUM_SPARKS * sizeof(u64));
}
static int process_block_per_sym(struct hist_entry *he)
@@ -689,6 +699,21 @@ static struct hist_entry *get_block_pair(struct hist_entry *he,
return NULL;
}
+static void init_spark_values(unsigned long *svals, int num)
+{
+ for (int i = 0; i < num; i++)
+ svals[i] = 0;
+}
+
+static void update_spark_value(unsigned long *svals, int num,
+ struct stats *stats, u64 val)
+{
+ int n = stats->n;
+
+ if (n < num)
+ svals[n] = val;
+}
+
static void compute_cycles_diff(struct hist_entry *he,
struct hist_entry *pair)
{
@@ -697,6 +722,26 @@ static void compute_cycles_diff(struct hist_entry *he,
pair->diff.cycles =
pair->block_info->cycles_aggr / pair->block_info->num_aggr -
he->block_info->cycles_aggr / he->block_info->num_aggr;
+
+ if (!cycles_hist)
+ return;
+
+ init_stats(&pair->diff.stats);
+ init_spark_values(pair->diff.svals, NUM_SPARKS);
+
+ for (int i = 0; i < pair->block_info->num; i++) {
+ u64 val;
+
+ if (i >= he->block_info->num || i >= NUM_SPARKS)
+ break;
+
+ val = labs(pair->block_info->cycles_spark[i] -
+ he->block_info->cycles_spark[i]);
+
+ update_spark_value(pair->diff.svals, NUM_SPARKS,
+ &pair->diff.stats, val);
+ update_stats(&pair->diff.stats, val);
+ }
}
}
@@ -1255,6 +1300,9 @@ static const struct option options[] = {
"Show period values."),
OPT_BOOLEAN('F', "formula", &show_formula,
"Show formula."),
+ OPT_BOOLEAN(0, "cycles-hist", &cycles_hist,
+ "Show cycles histogram and standard deviation "
+ "- WARNING: use only with -c cycles."),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
@@ -1462,6 +1510,90 @@ static int hpp__color_cycles(struct perf_hpp_fmt *fmt,
return __hpp__color_compare(fmt, hpp, he, COMPUTE_CYCLES);
}
+static int all_zero(unsigned long *vals, int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ if (vals[i] != 0)
+ return 0;
+ return 1;
+}
+
+static int print_cycles_spark(char *bf, int size, unsigned long *svals, u64 n)
+{
+ int printed;
+
+ if (n <= 1)
+ return 0;
+
+ if (n > NUM_SPARKS)
+ n = NUM_SPARKS;
+ if (all_zero(svals, n))
+ return 0;
+
+ printed = print_spark(bf, size, svals, n);
+ printed += scnprintf(bf + printed, size - printed, " ");
+ return printed;
+}
+
+static int hpp__color_cycles_hist(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he)
+{
+ struct diff_hpp_fmt *dfmt =
+ container_of(fmt, struct diff_hpp_fmt, fmt);
+ struct hist_entry *pair = get_pair_fmt(he, dfmt);
+ struct block_hist *bh = container_of(he, struct block_hist, he);
+ struct block_hist *bh_pair;
+ struct hist_entry *block_he;
+ char spark[32], buf[128];
+ double r;
+ int ret, pad;
+
+ if (!pair) {
+ if (bh->block_idx)
+ hpp->skip = true;
+
+ goto no_print;
+ }
+
+ bh_pair = container_of(pair, struct block_hist, he);
+
+ block_he = hists__get_entry(&bh_pair->block_hists, bh->block_idx);
+ if (!block_he) {
+ hpp->skip = true;
+ goto no_print;
+ }
+
+ ret = print_cycles_spark(spark, sizeof(spark), block_he->diff.svals,
+ block_he->diff.stats.n);
+
+ r = rel_stddev_stats(stddev_stats(&block_he->diff.stats),
+ avg_stats(&block_he->diff.stats));
+
+ if (ret) {
+ /*
+ * Padding spaces if number of sparks less than NUM_SPARKS
+ * otherwise the output is not aligned.
+ */
+ pad = NUM_SPARKS - ((ret - 1) / 3);
+ scnprintf(buf, sizeof(buf), "%s%5.1f%% %s", "\u00B1", r, spark);
+ ret = scnprintf(hpp->buf, hpp->size, "%*s",
+ dfmt->header_width, buf);
+
+ if (pad) {
+ ret += scnprintf(hpp->buf + ret, hpp->size - ret,
+ "%-*s", pad, " ");
+ }
+
+ return ret;
+ }
+
+no_print:
+ return scnprintf(hpp->buf, hpp->size, "%*s",
+ dfmt->header_width, " ");
+}
+
static void
hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size)
{
@@ -1667,6 +1799,10 @@ static void data__hpp_register(struct data__file *d, int idx)
fmt->color = hpp__color_cycles;
fmt->sort = hist_entry__cmp_nop;
break;
+ case PERF_HPP_DIFF__CYCLES_HIST:
+ fmt->color = hpp__color_cycles_hist;
+ fmt->sort = hist_entry__cmp_nop;
+ break;
default:
fmt->sort = hist_entry__cmp_nop;
break;
@@ -1692,10 +1828,14 @@ static int ui_init(void)
* PERF_HPP_DIFF__DELTA
* PERF_HPP_DIFF__RATIO
* PERF_HPP_DIFF__WEIGHTED_DIFF
+ * PERF_HPP_DIFF__CYCLES
*/
data__hpp_register(d, i ? compute_2_hpp[compute] :
PERF_HPP_DIFF__BASELINE);
+ if (cycles_hist && i)
+ data__hpp_register(d, PERF_HPP_DIFF__CYCLES_HIST);
+
/*
* And the rest:
*
@@ -1850,6 +1990,9 @@ int cmd_diff(int argc, const char **argv)
if (quiet)
perf_quiet_option();
+ if (cycles_hist && (compute != COMPUTE_CYCLES))
+ usage_with_options(diff_usage, options);
+
symbol__annotation_init();
if (symbol__init(NULL) < 0)
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 58a9e0989491..858da896b518 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -46,6 +46,7 @@
#include <semaphore.h>
#include <signal.h>
#include <math.h>
+#include <perf/mmap.h>
static const char *get_filename_for_perf_kvm(void)
{
@@ -759,14 +760,14 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
*mmap_time = ULLONG_MAX;
md = &evlist->mmap[idx];
- err = perf_mmap__read_init(md);
+ err = perf_mmap__read_init(&md->core);
if (err < 0)
return (err == -EAGAIN) ? 0 : -1;
- while ((event = perf_mmap__read_event(md)) != NULL) {
+ while ((event = perf_mmap__read_event(&md->core)) != NULL) {
err = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
if (err) {
- perf_mmap__consume(md);
+ perf_mmap__consume(&md->core);
pr_err("Failed to parse sample\n");
return -1;
}
@@ -776,7 +777,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
* FIXME: Here we can't consume the event, as perf_session__queue_event will
* point to it, and it'll get possibly overwritten by the kernel.
*/
- perf_mmap__consume(md);
+ perf_mmap__consume(&md->core);
if (err) {
pr_err("Failed to enqueue sample: %d\n", err);
@@ -793,7 +794,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
break;
}
- perf_mmap__read_done(md);
+ perf_mmap__read_done(&md->core);
return n;
}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 23332861de6e..2fb83aabbef5 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -197,7 +197,7 @@ static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
* every aio write request started in record__aio_push() so
* decrement it because the request is now complete.
*/
- perf_mmap__put(md);
+ perf_mmap__put(&md->core);
rc = 1;
} else {
/*
@@ -276,7 +276,7 @@ static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size
if (record__comp_enabled(aio->rec)) {
size = zstd_compress(aio->rec->session, aio->data + aio->size,
- perf_mmap__mmap_len(map) - aio->size,
+ mmap__mmap_len(map) - aio->size,
buf, size);
} else {
memcpy(aio->data + aio->size, buf, size);
@@ -293,7 +293,7 @@ static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size
* after started aio request completion or at record__aio_push()
* if the request failed to start.
*/
- perf_mmap__get(map);
+ perf_mmap__get(&map->core);
}
aio->size += size;
@@ -332,7 +332,7 @@ static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
* map->refcount is decremented in record__aio_complete() after
* aio write operation finishes successfully.
*/
- perf_mmap__put(map);
+ perf_mmap__put(&map->core);
}
return ret;
@@ -488,7 +488,7 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
struct record *rec = to;
if (record__comp_enabled(rec)) {
- size = zstd_compress(rec->session, map->data, perf_mmap__mmap_len(map), bf, size);
+ size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size);
bf = map->data;
}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 67be8d31afab..1c797a948ada 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -3605,11 +3605,6 @@ int cmd_script(int argc, const char **argv)
}
}
- if (script.time_str && reltime) {
- fprintf(stderr, "Don't combine --reltime with --time\n");
- return -1;
- }
-
if (itrace_synth_opts.callchain &&
itrace_synth_opts.callchain_sz > scripting_max_stack)
scripting_max_stack = itrace_synth_opts.callchain_sz;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 1f60124eb19b..d96f24c8770d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -82,6 +82,7 @@
#include <linux/err.h>
#include <linux/ctype.h>
+#include <perf/mmap.h>
static volatile int done;
static volatile int resize;
@@ -869,10 +870,10 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
union perf_event *event;
md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
- if (perf_mmap__read_init(md) < 0)
+ if (perf_mmap__read_init(&md->core) < 0)
return;
- while ((event = perf_mmap__read_event(md)) != NULL) {
+ while ((event = perf_mmap__read_event(&md->core)) != NULL) {
int ret;
ret = perf_evlist__parse_sample_timestamp(evlist, event, &last_timestamp);
@@ -883,7 +884,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
if (ret)
break;
- perf_mmap__consume(md);
+ perf_mmap__consume(&md->core);
if (top->qe.rotate) {
pthread_mutex_lock(&top->qe.mutex);
@@ -893,7 +894,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
}
}
- perf_mmap__read_done(md);
+ perf_mmap__read_done(&md->core);
}
static void perf_top__mmap_read(struct perf_top *top)
@@ -1560,6 +1561,17 @@ int cmd_top(int argc, const char **argv)
status = perf_config(perf_top_config, &top);
if (status)
return status;
+ /*
+ * Since the per arch annotation init routine may need the cpuid, read
+ * it here, since we are not getting this from the perf.data header.
+ */
+ status = perf_env__read_cpuid(&perf_env);
+ if (status) {
+ pr_err("Couldn't read the cpuid for this machine: %s\n",
+ str_error_r(errno, errbuf, sizeof(errbuf)));
+ goto out_delete_evlist;
+ }
+ top.evlist->env = &perf_env;
argc = parse_options(argc, argv, options, top_usage, 0);
if (argc)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index bb5130d02155..144d417ddb22 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -77,6 +77,7 @@
#include <sys/sysmacros.h>
#include <linux/ctype.h>
+#include <perf/mmap.h>
#ifndef O_CLOEXEC
# define O_CLOEXEC 02000000
@@ -86,6 +87,33 @@
# define F_LINUX_SPECIFIC_BASE 1024
#endif
+/*
+ * strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100
+ */
+struct syscall_arg_fmt {
+ size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
+ bool (*strtoul)(char *bf, size_t size, struct syscall_arg *arg, u64 *val);
+ unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
+ void *parm;
+ const char *name;
+ u16 nr_entries; // for arrays
+ bool show_zero;
+};
+
+struct syscall_fmt {
+ const char *name;
+ const char *alias;
+ struct {
+ const char *sys_enter,
+ *sys_exit;
+ } bpf_prog_name;
+ struct syscall_arg_fmt arg[6];
+ u8 nr_args;
+ bool errpid;
+ bool timeout;
+ bool hexret;
+};
+
struct trace {
struct perf_tool tool;
struct syscalltbl *sctbl;
@@ -152,6 +180,7 @@ struct trace {
bool print_sample;
bool show_tool_stats;
bool trace_syscalls;
+ bool libtraceevent_print;
bool kernel_syscallchains;
s16 args_alignment;
bool show_tstamp;
@@ -162,6 +191,7 @@ struct trace {
bool force;
bool vfs_getname;
int trace_pgfaults;
+ char *perfconfig_events;
struct {
struct ordered_events data;
u64 last;
@@ -448,6 +478,34 @@ size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const
return printed;
}
+bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret)
+{
+ int i;
+
+ for (i = 0; i < sa->nr_entries; ++i) {
+ if (sa->entries[i] && strncmp(sa->entries[i], bf, size) == 0 && sa->entries[i][size] == '\0') {
+ *ret = sa->offset + i;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret)
+{
+ int i;
+
+ for (i = 0; i < sas->nr_entries; ++i) {
+ struct strarray *sa = sas->entries[i];
+
+ if (strarray__strtoul(sa, bf, size, ret))
+ return true;
+ }
+
+ return false;
+}
+
size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
struct syscall_arg *arg)
{
@@ -499,6 +557,16 @@ size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *ar
return scnprintf(bf, size, "%ld", arg->val);
}
+static size_t syscall_arg__scnprintf_char_array(char *bf, size_t size, struct syscall_arg *arg)
+{
+ // XXX Hey, maybe for sched:sched_switch prev/next comm fields we can
+ // fill missing comms using thread__set_comm()...
+ // here or in a special syscall_arg__scnprintf_pid_sched_tp...
+ return scnprintf(bf, size, "\"%-.*s\"", arg->fmt->nr_entries, arg->val);
+}
+
+#define SCA_CHAR_ARRAY syscall_arg__scnprintf_char_array
+
static const char *bpf_cmd[] = {
"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
"MAP_GET_NEXT_KEY", "PROG_LOAD",
@@ -694,27 +762,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
#include "trace/beauty/socket_type.c"
#include "trace/beauty/waitid_options.c"
-struct syscall_arg_fmt {
- size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
- unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
- void *parm;
- const char *name;
- bool show_zero;
-};
-
-static struct syscall_fmt {
- const char *name;
- const char *alias;
- struct {
- const char *sys_enter,
- *sys_exit;
- } bpf_prog_name;
- struct syscall_arg_fmt arg[6];
- u8 nr_args;
- bool errpid;
- bool timeout;
- bool hexret;
-} syscall_fmts[] = {
+static struct syscall_fmt syscall_fmts[] = {
{ .name = "access",
.arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
{ .name = "arch_prctl",
@@ -964,24 +1012,35 @@ static int syscall_fmt__cmp(const void *name, const void *fmtp)
return strcmp(name, fmt->name);
}
+static struct syscall_fmt *__syscall_fmt__find(struct syscall_fmt *fmts, const int nmemb, const char *name)
+{
+ return bsearch(name, fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
+}
+
static struct syscall_fmt *syscall_fmt__find(const char *name)
{
const int nmemb = ARRAY_SIZE(syscall_fmts);
- return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
+ return __syscall_fmt__find(syscall_fmts, nmemb, name);
}
-static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
+static struct syscall_fmt *__syscall_fmt__find_by_alias(struct syscall_fmt *fmts, const int nmemb, const char *alias)
{
- int i, nmemb = ARRAY_SIZE(syscall_fmts);
+ int i;
for (i = 0; i < nmemb; ++i) {
- if (syscall_fmts[i].alias && strcmp(syscall_fmts[i].alias, alias) == 0)
- return &syscall_fmts[i];
+ if (fmts[i].alias && strcmp(fmts[i].alias, alias) == 0)
+ return &fmts[i];
}
return NULL;
}
+static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
+{
+ const int nmemb = ARRAY_SIZE(syscall_fmts);
+ return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias);
+}
+
/*
* is_exit: is this "exit" or "exit_group"?
* is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
@@ -1453,15 +1512,38 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
return 0;
}
-static int syscall__set_arg_fmts(struct syscall *sc)
+static struct syscall_arg_fmt syscall_arg_fmts__by_name[] = {
+ { .name = "msr", .scnprintf = SCA_X86_MSR, .strtoul = STUL_X86_MSR, }
+};
+
+static int syscall_arg_fmt__cmp(const void *name, const void *fmtp)
+{
+ const struct syscall_arg_fmt *fmt = fmtp;
+ return strcmp(name, fmt->name);
+}
+
+static struct syscall_arg_fmt *
+__syscall_arg_fmt__find_by_name(struct syscall_arg_fmt *fmts, const int nmemb, const char *name)
{
- struct tep_format_field *field, *last_field = NULL;
- int idx = 0, len;
+ return bsearch(name, fmts, nmemb, sizeof(struct syscall_arg_fmt), syscall_arg_fmt__cmp);
+}
- for (field = sc->args; field; field = field->next, ++idx) {
+static struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *name)
+{
+ const int nmemb = ARRAY_SIZE(syscall_arg_fmts__by_name);
+ return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name);
+}
+
+static struct tep_format_field *
+syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field)
+{
+ struct tep_format_field *last_field = NULL;
+ int len;
+
+ for (; field; field = field->next, ++arg) {
last_field = field;
- if (sc->fmt && sc->fmt->arg[idx].scnprintf)
+ if (arg->scnprintf)
continue;
len = strlen(field->name);
@@ -1469,14 +1551,17 @@ static int syscall__set_arg_fmts(struct syscall *sc)
if (strcmp(field->type, "const char *") == 0 &&
((len >= 4 && strcmp(field->name + len - 4, "name") == 0) ||
strstr(field->name, "path") != NULL))
- sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
+ arg->scnprintf = SCA_FILENAME;
else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
- sc->arg_fmt[idx].scnprintf = SCA_PTR;
+ arg->scnprintf = SCA_PTR;
else if (strcmp(field->type, "pid_t") == 0)
- sc->arg_fmt[idx].scnprintf = SCA_PID;
+ arg->scnprintf = SCA_PID;
else if (strcmp(field->type, "umode_t") == 0)
- sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
- else if ((strcmp(field->type, "int") == 0 ||
+ arg->scnprintf = SCA_MODE_T;
+ else if ((field->flags & TEP_FIELD_IS_ARRAY) && strstarts(field->type, "char")) {
+ arg->scnprintf = SCA_CHAR_ARRAY;
+ arg->nr_entries = field->arraylen;
+ } else if ((strcmp(field->type, "int") == 0 ||
strcmp(field->type, "unsigned int") == 0 ||
strcmp(field->type, "long") == 0) &&
len >= 2 && strcmp(field->name + len - 2, "fd") == 0) {
@@ -1487,10 +1572,24 @@ static int syscall__set_arg_fmts(struct syscall *sc)
* 23 unsigned int
* 7 unsigned long
*/
- sc->arg_fmt[idx].scnprintf = SCA_FD;
+ arg->scnprintf = SCA_FD;
+ } else {
+ struct syscall_arg_fmt *fmt = syscall_arg_fmt__find_by_name(field->name);
+
+ if (fmt) {
+ arg->scnprintf = fmt->scnprintf;
+ arg->strtoul = fmt->strtoul;
+ }
}
}
+ return last_field;
+}
+
+static int syscall__set_arg_fmts(struct syscall *sc)
+{
+ struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args);
+
if (last_field)
sc->args_size = last_field->offset + last_field->size;
@@ -1552,6 +1651,19 @@ static int trace__read_syscall_info(struct trace *trace, int id)
return syscall__set_arg_fmts(sc);
}
+static int perf_evsel__init_tp_arg_scnprintf(struct evsel *evsel)
+{
+ int nr_args = evsel->tp_format->format.nr_fields;
+
+ evsel->priv = calloc(nr_args, sizeof(struct syscall_arg_fmt));
+ if (evsel->priv != NULL) {
+ syscall_arg_fmt__init_array(evsel->priv, evsel->tp_format->format.fields);
+ return 0;
+ }
+
+ return -ENOMEM;
+}
+
static int intcmp(const void *a, const void *b)
{
const int *one = a, *another = b;
@@ -1680,22 +1792,22 @@ static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
* as mount 'flags' argument that needs ignoring some magic flag, see comment
* in tools/perf/trace/beauty/mount_flags.c
*/
-static unsigned long syscall__mask_val(struct syscall *sc, struct syscall_arg *arg, unsigned long val)
+static unsigned long syscall_arg_fmt__mask_val(struct syscall_arg_fmt *fmt, struct syscall_arg *arg, unsigned long val)
{
- if (sc->arg_fmt && sc->arg_fmt[arg->idx].mask_val)
- return sc->arg_fmt[arg->idx].mask_val(arg, val);
+ if (fmt && fmt->mask_val)
+ return fmt->mask_val(arg, val);
return val;
}
-static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
- struct syscall_arg *arg, unsigned long val)
+static size_t syscall_arg_fmt__scnprintf_val(struct syscall_arg_fmt *fmt, char *bf, size_t size,
+ struct syscall_arg *arg, unsigned long val)
{
- if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
+ if (fmt && fmt->scnprintf) {
arg->val = val;
- if (sc->arg_fmt[arg->idx].parm)
- arg->parm = sc->arg_fmt[arg->idx].parm;
- return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
+ if (fmt->parm)
+ arg->parm = fmt->parm;
+ return fmt->scnprintf(bf, size, arg);
}
return scnprintf(bf, size, "%ld", val);
}
@@ -1736,12 +1848,13 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
if (arg.mask & bit)
continue;
+ arg.fmt = &sc->arg_fmt[arg.idx];
val = syscall_arg__val(&arg, arg.idx);
/*
* Some syscall args need some mask, most don't and
* return val untouched.
*/
- val = syscall__mask_val(sc, &arg, val);
+ val = syscall_arg_fmt__mask_val(&sc->arg_fmt[arg.idx], &arg, val);
/*
* Suppress this argument if its value is zero and
@@ -1762,7 +1875,8 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
if (trace->show_arg_names)
printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
- printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
+ printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx],
+ bf + printed, size - printed, &arg, val);
}
} else if (IS_ERR(sc->tp_format)) {
/*
@@ -1777,7 +1891,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
if (printed)
printed += scnprintf(bf + printed, size - printed, ", ");
printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
- printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
+ printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx], bf + printed, size - printed, &arg, val);
next_arg:
++arg.idx;
bit <<= 1;
@@ -2346,6 +2460,71 @@ static void bpf_output__fprintf(struct trace *trace,
++trace->nr_events_printed;
}
+static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, struct perf_sample *sample,
+ struct thread *thread, void *augmented_args, int augmented_args_size)
+{
+ char bf[2048];
+ size_t size = sizeof(bf);
+ struct tep_format_field *field = evsel->tp_format->format.fields;
+ struct syscall_arg_fmt *arg = evsel->priv;
+ size_t printed = 0;
+ unsigned long val;
+ u8 bit = 1;
+ struct syscall_arg syscall_arg = {
+ .augmented = {
+ .size = augmented_args_size,
+ .args = augmented_args,
+ },
+ .idx = 0,
+ .mask = 0,
+ .trace = trace,
+ .thread = thread,
+ .show_string_prefix = trace->show_string_prefix,
+ };
+
+ for (; field && arg; field = field->next, ++syscall_arg.idx, bit <<= 1, ++arg) {
+ if (syscall_arg.mask & bit)
+ continue;
+
+ syscall_arg.fmt = arg;
+ if (field->flags & TEP_FIELD_IS_ARRAY)
+ val = (uintptr_t)(sample->raw_data + field->offset);
+ else
+ val = format_field__intval(field, sample, evsel->needs_swap);
+ /*
+ * Some syscall args need some mask, most don't and
+ * return val untouched.
+ */
+ val = syscall_arg_fmt__mask_val(arg, &syscall_arg, val);
+
+ /*
+ * Suppress this argument if its value is zero and
+ * and we don't have a string associated in an
+ * strarray for it.
+ */
+ if (val == 0 &&
+ !trace->show_zeros &&
+ !((arg->show_zero ||
+ arg->scnprintf == SCA_STRARRAY ||
+ arg->scnprintf == SCA_STRARRAYS) &&
+ arg->parm))
+ continue;
+
+ printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
+
+ /*
+ * XXX Perhaps we should have a show_tp_arg_names,
+ * leaving show_arg_names just for syscalls?
+ */
+ if (1 || trace->show_arg_names)
+ printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
+
+ printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val);
+ }
+
+ return printed + fprintf(trace->output, "%s", bf);
+}
+
static int trace__event_handler(struct trace *trace, struct evsel *evsel,
union perf_event *event __maybe_unused,
struct perf_sample *sample)
@@ -2399,16 +2578,20 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel,
*/
}
- fprintf(trace->output, "%s:", evsel->name);
+ fprintf(trace->output, "%s(", evsel->name);
if (perf_evsel__is_bpf_output(evsel)) {
bpf_output__fprintf(trace, sample);
} else if (evsel->tp_format) {
if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
trace__fprintf_sys_enter(trace, evsel, sample)) {
- event_format__fprintf(evsel->tp_format, sample->cpu,
- sample->raw_data, sample->raw_size,
- trace->output);
+ if (trace->libtraceevent_print) {
+ event_format__fprintf(evsel->tp_format, sample->cpu,
+ sample->raw_data, sample->raw_size,
+ trace->output);
+ } else {
+ trace__fprintf_tp_fields(trace, evsel, sample, thread, NULL, 0);
+ }
++trace->nr_events_printed;
if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
@@ -2419,7 +2602,7 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel,
}
newline:
- fprintf(trace->output, "\n");
+ fprintf(trace->output, ")\n");
if (callchain_ret > 0)
trace__fprintf_callchain(trace, sample);
@@ -3103,7 +3286,27 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
return err;
}
-#else
+
+static void trace__delete_augmented_syscalls(struct trace *trace)
+{
+ struct evsel *evsel, *tmp;
+
+ evlist__remove(trace->evlist, trace->syscalls.events.augmented);
+ evsel__delete(trace->syscalls.events.augmented);
+ trace->syscalls.events.augmented = NULL;
+
+ evlist__for_each_entry_safe(trace->evlist, tmp, evsel) {
+ if (evsel->bpf_obj == trace->bpf_obj) {
+ evlist__remove(trace->evlist, evsel);
+ evsel__delete(evsel);
+ }
+
+ }
+
+ bpf_object__close(trace->bpf_obj);
+ trace->bpf_obj = NULL;
+}
+#else // HAVE_LIBBPF_SUPPORT
static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
{
return 0;
@@ -3124,8 +3327,27 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_
{
return 0;
}
+
+static void trace__delete_augmented_syscalls(struct trace *trace __maybe_unused)
+{
+}
#endif // HAVE_LIBBPF_SUPPORT
+static bool trace__only_augmented_syscalls_evsels(struct trace *trace)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(trace->evlist, evsel) {
+ if (evsel == trace->syscalls.events.augmented ||
+ evsel->bpf_obj == trace->bpf_obj)
+ continue;
+
+ return false;
+ }
+
+ return true;
+}
+
static int trace__set_ev_qualifier_filter(struct trace *trace)
{
if (trace->syscalls.map)
@@ -3175,7 +3397,7 @@ static int trace__set_filter_loop_pids(struct trace *trace)
thread = parent;
}
- err = perf_evlist__set_tp_filter_pids(trace->evlist, nr, pids);
+ err = perf_evlist__append_tp_filter_pids(trace->evlist, nr, pids);
if (!err && trace->filter_pids.map)
err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids);
@@ -3192,8 +3414,8 @@ static int trace__set_filter_pids(struct trace *trace)
* we fork the workload in perf_evlist__prepare_workload.
*/
if (trace->filter_pids.nr > 0) {
- err = perf_evlist__set_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
- trace->filter_pids.entries);
+ err = perf_evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
+ trace->filter_pids.entries);
if (!err && trace->filter_pids.map) {
err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr,
trace->filter_pids.entries);
@@ -3263,6 +3485,133 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
return __trace__deliver_event(trace, event->event);
}
+static struct syscall_arg_fmt *perf_evsel__syscall_arg_fmt(struct evsel *evsel, char *arg)
+{
+ struct tep_format_field *field;
+ struct syscall_arg_fmt *fmt = evsel->priv;
+
+ if (evsel->tp_format == NULL || fmt == NULL)
+ return NULL;
+
+ for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt)
+ if (strcmp(field->name, arg) == 0)
+ return fmt;
+
+ return NULL;
+}
+
+static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel *evsel)
+{
+ char *tok, *left = evsel->filter, *new_filter = evsel->filter;
+
+ while ((tok = strpbrk(left, "=<>!")) != NULL) {
+ char *right = tok + 1, *right_end;
+
+ if (*right == '=')
+ ++right;
+
+ while (isspace(*right))
+ ++right;
+
+ if (*right == '\0')
+ break;
+
+ while (!isalpha(*left))
+ if (++left == tok) {
+ /*
+ * Bail out, can't find the name of the argument that is being
+ * used in the filter, let it try to set this filter, will fail later.
+ */
+ return 0;
+ }
+
+ right_end = right + 1;
+ while (isalnum(*right_end) || *right_end == '_')
+ ++right_end;
+
+ if (isalpha(*right)) {
+ struct syscall_arg_fmt *fmt;
+ int left_size = tok - left,
+ right_size = right_end - right;
+ char arg[128];
+
+ while (isspace(left[left_size - 1]))
+ --left_size;
+
+ scnprintf(arg, sizeof(arg), "%.*s", left_size, left);
+
+ fmt = perf_evsel__syscall_arg_fmt(evsel, arg);
+ if (fmt == NULL) {
+ pr_debug("\"%s\" not found in \"%s\", can't set filter \"%s\"\n",
+ arg, evsel->name, evsel->filter);
+ return -1;
+ }
+
+ pr_debug2("trying to expand \"%s\" \"%.*s\" \"%.*s\" -> ",
+ arg, (int)(right - tok), tok, right_size, right);
+
+ if (fmt->strtoul) {
+ u64 val;
+ if (fmt->strtoul(right, right_size, NULL, &val)) {
+ char *n, expansion[19];
+ int expansion_lenght = scnprintf(expansion, sizeof(expansion), "%#" PRIx64, val);
+ int expansion_offset = right - new_filter;
+
+ pr_debug("%s", expansion);
+
+ if (asprintf(&n, "%.*s%s%s", expansion_offset, new_filter, expansion, right_end) < 0) {
+ pr_debug(" out of memory!\n");
+ free(new_filter);
+ return -1;
+ }
+ if (new_filter != evsel->filter)
+ free(new_filter);
+ left = n + expansion_offset + expansion_lenght;
+ new_filter = n;
+ } else {
+ pr_err("\"%.*s\" not found for \"%s\" in \"%s\", can't set filter \"%s\"\n",
+ right_size, right, arg, evsel->name, evsel->filter);
+ return -1;
+ }
+ } else {
+ pr_err("No resolver (strtoul) for \"%s\" in \"%s\", can't set filter \"%s\"\n",
+ arg, evsel->name, evsel->filter);
+ return -1;
+ }
+
+ pr_debug("\n");
+ } else {
+ left = right_end;
+ }
+ }
+
+ if (new_filter != evsel->filter) {
+ pr_debug("New filter for %s: %s\n", evsel->name, new_filter);
+ perf_evsel__set_filter(evsel, new_filter);
+ free(new_filter);
+ }
+
+ return 0;
+}
+
+static int trace__expand_filters(struct trace *trace, struct evsel **err_evsel)
+{
+ struct evlist *evlist = trace->evlist;
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->filter == NULL)
+ continue;
+
+ if (trace__expand_filter(trace, evsel)) {
+ *err_evsel = evsel;
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
static int trace__run(struct trace *trace, int argc, const char **argv)
{
struct evlist *evlist = trace->evlist;
@@ -3302,7 +3651,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
trace__sched_stat_runtime))
goto out_error_sched_stat_runtime;
-
/*
* If a global cgroup was set, apply it to all the events without an
* explicit cgroup. I.e.:
@@ -3405,6 +3753,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
*/
trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close"));
+ err = trace__expand_filters(trace, &evsel);
+ if (err)
+ goto out_delete_evlist;
err = perf_evlist__apply_filters(evlist, &evsel);
if (err < 0)
goto out_error_apply_filters;
@@ -3450,17 +3801,17 @@ again:
struct mmap *md;
md = &evlist->mmap[i];
- if (perf_mmap__read_init(md) < 0)
+ if (perf_mmap__read_init(&md->core) < 0)
continue;
- while ((event = perf_mmap__read_event(md)) != NULL) {
+ while ((event = perf_mmap__read_event(&md->core)) != NULL) {
++trace->nr_events;
err = trace__deliver_event(trace, event);
if (err)
goto out_disable;
- perf_mmap__consume(md);
+ perf_mmap__consume(&md->core);
if (interrupted)
goto out_disable;
@@ -3470,7 +3821,7 @@ again:
draining = true;
}
}
- perf_mmap__read_done(md);
+ perf_mmap__read_done(&md->core);
}
if (trace->nr_events == before) {
@@ -3858,12 +4209,14 @@ static int parse_pagefaults(const struct option *opt, const char *str,
return 0;
}
-static void evlist__set_evsel_handler(struct evlist *evlist, void *handler)
+static void evlist__set_default_evsel_handler(struct evlist *evlist, void *handler)
{
struct evsel *evsel;
- evlist__for_each_entry(evlist, evsel)
- evsel->handler = handler;
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->handler == NULL)
+ evsel->handler = handler;
+ }
}
static int evlist__set_syscall_tp_fields(struct evlist *evlist)
@@ -3874,8 +4227,10 @@ static int evlist__set_syscall_tp_fields(struct evlist *evlist)
if (evsel->priv || !evsel->tp_format)
continue;
- if (strcmp(evsel->tp_format->system, "syscalls"))
+ if (strcmp(evsel->tp_format->system, "syscalls")) {
+ perf_evsel__init_tp_arg_scnprintf(evsel);
continue;
+ }
if (perf_evsel__init_syscall_tp(evsel))
return -1;
@@ -4029,15 +4384,11 @@ static int trace__config(const char *var, const char *value, void *arg)
int err = 0;
if (!strcmp(var, "trace.add_events")) {
- struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
- "event selector. use 'perf list' to list available events",
- parse_events_option);
- /*
- * We can't propagate parse_event_option() return, as it is 1
- * for failure while perf_config() expects -1.
- */
- if (parse_events_option(&o, value, 0))
- err = -1;
+ trace->perfconfig_events = strdup(value);
+ if (trace->perfconfig_events == NULL) {
+ pr_err("Not enough memory for %s\n", "trace.add_events");
+ return -1;
+ }
} else if (!strcmp(var, "trace.show_timestamp")) {
trace->show_tstamp = perf_config_bool(var, value);
} else if (!strcmp(var, "trace.show_duration")) {
@@ -4061,6 +4412,11 @@ static int trace__config(const char *var, const char *value, void *arg)
int args_alignment = 0;
if (perf_config_int(&args_alignment, var, value) == 0)
trace->args_alignment = args_alignment;
+ } else if (!strcmp(var, "trace.tracepoint_beautifiers")) {
+ if (strcasecmp(value, "libtraceevent") == 0)
+ trace->libtraceevent_print = true;
+ else if (strcasecmp(value, "libbeauty") == 0)
+ trace->libtraceevent_print = false;
}
out:
return err;
@@ -4103,6 +4459,8 @@ int cmd_trace(int argc, const char **argv)
OPT_CALLBACK('e', "event", &trace, "event",
"event/syscall selector. use 'perf list' to list available events",
trace__parse_events_option),
+ OPT_CALLBACK(0, "filter", &trace.evlist, "filter",
+ "event filter", parse_filter),
OPT_BOOLEAN(0, "comm", &trace.show_comm,
"show the thread COMM next to its id"),
OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
@@ -4150,6 +4508,8 @@ int cmd_trace(int argc, const char **argv)
OPT_CALLBACK(0, "call-graph", &trace.opts,
"record_mode[,record_size]", record_callchain_help,
&record_parse_callchain_opt),
+ OPT_BOOLEAN(0, "libtraceevent_print", &trace.libtraceevent_print,
+ "Use libtraceevent to print the tracepoint arguments."),
OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
"Show the kernel callchains on the syscall exit path"),
OPT_ULONG(0, "max-events", &trace.max_events,
@@ -4210,6 +4570,37 @@ int cmd_trace(int argc, const char **argv)
argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
+ /*
+ * Here we already passed thru trace__parse_events_option() and it has
+ * already figured out if -e syscall_name, if not but if --event
+ * foo:bar was used, the user is interested _just_ in those, say,
+ * tracepoint events, not in the strace-like syscall-name-based mode.
+ *
+ * This is important because we need to check if strace-like mode is
+ * needed to decided if we should filter out the eBPF
+ * __augmented_syscalls__ code, if it is in the mix, say, via
+ * .perfconfig trace.add_events, and filter those out.
+ */
+ if (!trace.trace_syscalls && !trace.trace_pgfaults &&
+ trace.evlist->core.nr_entries == 0 /* Was --events used? */) {
+ trace.trace_syscalls = true;
+ }
+ /*
+ * Now that we have --verbose figured out, lets see if we need to parse
+ * events from .perfconfig, so that if those events fail parsing, say some
+ * BPF program fails, then we'll be able to use --verbose to see what went
+ * wrong in more detail.
+ */
+ if (trace.perfconfig_events != NULL) {
+ struct parse_events_error parse_err = { .idx = 0, };
+
+ err = parse_events(trace.evlist, trace.perfconfig_events, &parse_err);
+ if (err) {
+ parse_events_print_error(&parse_err, trace.perfconfig_events);
+ goto out;
+ }
+ }
+
if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
usage_with_options_msg(trace_usage, trace_options,
"cgroup monitoring only available in system-wide mode");
@@ -4238,9 +4629,45 @@ int cmd_trace(int argc, const char **argv)
trace.bpf_obj = evsel->bpf_obj;
- trace__set_bpf_map_filtered_pids(&trace);
- trace__set_bpf_map_syscalls(&trace);
- trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented");
+ /*
+ * If we have _just_ the augmenter event but don't have a
+ * explicit --syscalls, then assume we want all strace-like
+ * syscalls:
+ */
+ if (!trace.trace_syscalls && trace__only_augmented_syscalls_evsels(&trace))
+ trace.trace_syscalls = true;
+ /*
+ * So, if we have a syscall augmenter, but trace_syscalls, aka
+ * strace-like syscall tracing is not set, then we need to trow
+ * away the augmenter, i.e. all the events that were created
+ * from that BPF object file.
+ *
+ * This is more to fix the current .perfconfig trace.add_events
+ * style of setting up the strace-like eBPF based syscall point
+ * payload augmenter.
+ *
+ * All this complexity will be avoided by adding an alternative
+ * to trace.add_events in the form of
+ * trace.bpf_augmented_syscalls, that will be only parsed if we
+ * need it.
+ *
+ * .perfconfig trace.add_events is still useful if we want, for
+ * instance, have msr_write.msr in some .perfconfig profile based
+ * 'perf trace --config determinism.profile' mode, where for some
+ * particular goal/workload type we want a set of events and
+ * output mode (with timings, etc) instead of having to add
+ * all via the command line.
+ *
+ * Also --config to specify an alternate .perfconfig file needs
+ * to be implemented.
+ */
+ if (!trace.trace_syscalls) {
+ trace__delete_augmented_syscalls(&trace);
+ } else {
+ trace__set_bpf_map_filtered_pids(&trace);
+ trace__set_bpf_map_syscalls(&trace);
+ trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented");
+ }
}
err = bpf__setup_stdout(trace.evlist);
@@ -4287,7 +4714,7 @@ int cmd_trace(int argc, const char **argv)
}
if (trace.evlist->core.nr_entries > 0) {
- evlist__set_evsel_handler(trace.evlist, trace__event_handler);
+ evlist__set_default_evsel_handler(trace.evlist, trace__event_handler);
if (evlist__set_syscall_tp_fields(trace.evlist)) {
perror("failed to set syscalls:* tracepoint fields");
goto out;
@@ -4383,11 +4810,6 @@ init_augmented_syscall_tp:
if (trace.summary_only)
trace.summary = trace.summary_only;
- if (!trace.trace_syscalls && !trace.trace_pgfaults &&
- trace.evlist->core.nr_entries == 0 /* Was --events used? */) {
- trace.trace_syscalls = true;
- }
-
if (output_name != NULL) {
err = trace__open_output(&trace, output_name);
if (err < 0) {
@@ -4426,5 +4848,6 @@ out_close:
if (output_name != NULL)
fclose(trace.output);
out:
+ zfree(&trace.perfconfig_events);
return err;
}
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index cea13cb987d0..93c46d38024e 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -28,6 +28,7 @@ arch/x86/include/asm/disabled-features.h
arch/x86/include/asm/required-features.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/inat_types.h
+arch/x86/include/asm/msr-index.h
arch/x86/include/uapi/asm/prctl.h
arch/x86/lib/x86-opcode-map.txt
arch/x86/tools/gen-insn-attr-x86.awk
diff --git a/tools/perf/lib/Build b/tools/perf/lib/Build
index c31f1c111f8f..2ef9a4ec6d99 100644
--- a/tools/perf/lib/Build
+++ b/tools/perf/lib/Build
@@ -3,6 +3,7 @@ libperf-y += cpumap.o
libperf-y += threadmap.o
libperf-y += evsel.o
libperf-y += evlist.o
+libperf-y += mmap.o
libperf-y += zalloc.o
libperf-y += xyarray.o
libperf-y += lib.o
diff --git a/tools/perf/lib/Makefile b/tools/perf/lib/Makefile
index 85ccb8c439a4..0889c9c3ec19 100644
--- a/tools/perf/lib/Makefile
+++ b/tools/perf/lib/Makefile
@@ -172,8 +172,9 @@ install_headers:
$(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \
$(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \
$(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \
- $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644);
- $(call do_install,include/perf/event.h,$(prefix)/include/perf,644);
+ $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \
+ $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \
+ $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644);
install_pkgconfig: $(LIBPERF_PC)
$(call QUIET_INSTALL, $(LIBPERF_PC)) \
diff --git a/tools/perf/lib/core.c b/tools/perf/lib/core.c
index d0b9ae422b9f..58fc894b76c5 100644
--- a/tools/perf/lib/core.c
+++ b/tools/perf/lib/core.c
@@ -5,11 +5,12 @@
#include <stdio.h>
#include <stdarg.h>
#include <unistd.h>
+#include <linux/compiler.h>
#include <perf/core.h>
#include <internal/lib.h>
#include "internal.h"
-static int __base_pr(enum libperf_print_level level, const char *format,
+static int __base_pr(enum libperf_print_level level __maybe_unused, const char *format,
va_list args)
{
return vfprintf(stderr, format, args);
diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c
index d1496fee810c..65045614c938 100644
--- a/tools/perf/lib/evlist.c
+++ b/tools/perf/lib/evlist.c
@@ -8,13 +8,20 @@
#include <internal/evlist.h>
#include <internal/evsel.h>
#include <internal/xyarray.h>
+#include <internal/mmap.h>
+#include <internal/cpumap.h>
+#include <internal/threadmap.h>
+#include <internal/xyarray.h>
+#include <internal/lib.h>
#include <linux/zalloc.h>
+#include <sys/ioctl.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
#include <signal.h>
#include <poll.h>
+#include <sys/mman.h>
#include <perf/cpumap.h>
#include <perf/threadmap.h>
#include <api/fd/array.h>
@@ -27,6 +34,7 @@ void perf_evlist__init(struct perf_evlist *evlist)
INIT_HLIST_HEAD(&evlist->heads[i]);
INIT_LIST_HEAD(&evlist->entries);
evlist->nr_entries = 0;
+ fdarray__init(&evlist->pollfd, 64);
}
static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
@@ -101,8 +109,36 @@ perf_evlist__next(struct perf_evlist *evlist, struct perf_evsel *prev)
return next;
}
+static void perf_evlist__purge(struct perf_evlist *evlist)
+{
+ struct perf_evsel *pos, *n;
+
+ perf_evlist__for_each_entry_safe(evlist, n, pos) {
+ list_del_init(&pos->node);
+ perf_evsel__delete(pos);
+ }
+
+ evlist->nr_entries = 0;
+}
+
+void perf_evlist__exit(struct perf_evlist *evlist)
+{
+ perf_cpu_map__put(evlist->cpus);
+ perf_thread_map__put(evlist->threads);
+ evlist->cpus = NULL;
+ evlist->threads = NULL;
+ fdarray__exit(&evlist->pollfd);
+}
+
void perf_evlist__delete(struct perf_evlist *evlist)
{
+ if (evlist == NULL)
+ return;
+
+ perf_evlist__munmap(evlist);
+ perf_evlist__close(evlist);
+ perf_evlist__purge(evlist);
+ perf_evlist__exit(evlist);
free(evlist);
}
@@ -277,7 +313,295 @@ int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
return pos;
}
+static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
+ void *arg __maybe_unused)
+{
+ struct perf_mmap *map = fda->priv[fd].ptr;
+
+ if (map)
+ perf_mmap__put(map);
+}
+
+int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
+{
+ return fdarray__filter(&evlist->pollfd, revents_and_mask,
+ perf_evlist__munmap_filtered, NULL);
+}
+
int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
{
return fdarray__poll(&evlist->pollfd, timeout);
}
+
+static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, bool overwrite)
+{
+ int i;
+ struct perf_mmap *map;
+
+ evlist->nr_mmaps = perf_cpu_map__nr(evlist->cpus);
+ if (perf_cpu_map__empty(evlist->cpus))
+ evlist->nr_mmaps = perf_thread_map__nr(evlist->threads);
+
+ map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
+ if (!map)
+ return NULL;
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ /*
+ * When the perf_mmap() call is made we grab one refcount, plus
+ * one extra to let perf_mmap__consume() get the last
+ * events after all real references (perf_mmap__get()) are
+ * dropped.
+ *
+ * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
+ * thus does perf_mmap__get() on it.
+ */
+ perf_mmap__init(&map[i], overwrite, NULL);
+ }
+
+ return map;
+}
+
+static void perf_evlist__set_sid_idx(struct perf_evlist *evlist,
+ struct perf_evsel *evsel, int idx, int cpu,
+ int thread)
+{
+ struct perf_sample_id *sid = SID(evsel, cpu, thread);
+
+ sid->idx = idx;
+ if (evlist->cpus && cpu >= 0)
+ sid->cpu = evlist->cpus->map[cpu];
+ else
+ sid->cpu = -1;
+ if (!evsel->system_wide && evlist->threads && thread >= 0)
+ sid->tid = perf_thread_map__pid(evlist->threads, thread);
+ else
+ sid->tid = -1;
+}
+
+static struct perf_mmap*
+perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx)
+{
+ struct perf_mmap *map = &evlist->mmap[idx];
+
+ if (overwrite) {
+ if (!evlist->mmap_ovw) {
+ evlist->mmap_ovw = perf_evlist__alloc_mmap(evlist, true);
+ if (!evlist->mmap_ovw)
+ return NULL;
+ }
+ map = &evlist->mmap_ovw[idx];
+ }
+
+ return map;
+}
+
+#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y))
+
+static int
+perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
+ int output, int cpu)
+{
+ return perf_mmap__mmap(map, mp, output, cpu);
+}
+
+static int
+mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
+ int idx, struct perf_mmap_param *mp, int cpu_idx,
+ int thread, int *_output, int *_output_overwrite)
+{
+ int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx);
+ struct perf_evsel *evsel;
+ int revent;
+
+ perf_evlist__for_each_entry(evlist, evsel) {
+ bool overwrite = evsel->attr.write_backward;
+ struct perf_mmap *map;
+ int *output, fd, cpu;
+
+ if (evsel->system_wide && thread)
+ continue;
+
+ cpu = perf_cpu_map__idx(evsel->cpus, evlist_cpu);
+ if (cpu == -1)
+ continue;
+
+ map = ops->get(evlist, overwrite, idx);
+ if (map == NULL)
+ return -ENOMEM;
+
+ if (overwrite) {
+ mp->prot = PROT_READ;
+ output = _output_overwrite;
+ } else {
+ mp->prot = PROT_READ | PROT_WRITE;
+ output = _output;
+ }
+
+ fd = FD(evsel, cpu, thread);
+
+ if (*output == -1) {
+ *output = fd;
+
+ /*
+ * The last one will be done at perf_mmap__consume(), so that we
+ * make sure we don't prevent tools from consuming every last event in
+ * the ring buffer.
+ *
+ * I.e. we can get the POLLHUP meaning that the fd doesn't exist
+ * anymore, but the last events for it are still in the ring buffer,
+ * waiting to be consumed.
+ *
+ * Tools can chose to ignore this at their own discretion, but the
+ * evlist layer can't just drop it when filtering events in
+ * perf_evlist__filter_pollfd().
+ */
+ refcount_set(&map->refcnt, 2);
+
+ if (ops->mmap(map, mp, *output, evlist_cpu) < 0)
+ return -1;
+ } else {
+ if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
+ return -1;
+
+ perf_mmap__get(map);
+ }
+
+ revent = !overwrite ? POLLIN : 0;
+
+ if (!evsel->system_wide &&
+ perf_evlist__add_pollfd(evlist, fd, map, revent) < 0) {
+ perf_mmap__put(map);
+ return -1;
+ }
+
+ if (evsel->attr.read_format & PERF_FORMAT_ID) {
+ if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
+ fd) < 0)
+ return -1;
+ perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
+ thread);
+ }
+ }
+
+ return 0;
+}
+
+static int
+mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
+ struct perf_mmap_param *mp)
+{
+ int thread;
+ int nr_threads = perf_thread_map__nr(evlist->threads);
+
+ for (thread = 0; thread < nr_threads; thread++) {
+ int output = -1;
+ int output_overwrite = -1;
+
+ if (ops->idx)
+ ops->idx(evlist, mp, thread, false);
+
+ if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread,
+ &output, &output_overwrite))
+ goto out_unmap;
+ }
+
+ return 0;
+
+out_unmap:
+ perf_evlist__munmap(evlist);
+ return -1;
+}
+
+static int
+mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
+ struct perf_mmap_param *mp)
+{
+ int nr_threads = perf_thread_map__nr(evlist->threads);
+ int nr_cpus = perf_cpu_map__nr(evlist->cpus);
+ int cpu, thread;
+
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ int output = -1;
+ int output_overwrite = -1;
+
+ if (ops->idx)
+ ops->idx(evlist, mp, cpu, true);
+
+ for (thread = 0; thread < nr_threads; thread++) {
+ if (mmap_per_evsel(evlist, ops, cpu, mp, cpu,
+ thread, &output, &output_overwrite))
+ goto out_unmap;
+ }
+ }
+
+ return 0;
+
+out_unmap:
+ perf_evlist__munmap(evlist);
+ return -1;
+}
+
+int perf_evlist__mmap_ops(struct perf_evlist *evlist,
+ struct perf_evlist_mmap_ops *ops,
+ struct perf_mmap_param *mp)
+{
+ struct perf_evsel *evsel;
+ const struct perf_cpu_map *cpus = evlist->cpus;
+ const struct perf_thread_map *threads = evlist->threads;
+
+ if (!ops || !ops->get || !ops->mmap)
+ return -EINVAL;
+
+ if (!evlist->mmap)
+ evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
+ if (!evlist->mmap)
+ return -ENOMEM;
+
+ perf_evlist__for_each_entry(evlist, evsel) {
+ if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
+ evsel->sample_id == NULL &&
+ perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
+ return -ENOMEM;
+ }
+
+ if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
+ return -ENOMEM;
+
+ if (perf_cpu_map__empty(cpus))
+ return mmap_per_thread(evlist, ops, mp);
+
+ return mmap_per_cpu(evlist, ops, mp);
+}
+
+int perf_evlist__mmap(struct perf_evlist *evlist, int pages)
+{
+ struct perf_mmap_param mp;
+ struct perf_evlist_mmap_ops ops = {
+ .get = perf_evlist__mmap_cb_get,
+ .mmap = perf_evlist__mmap_cb_mmap,
+ };
+
+ evlist->mmap_len = (pages + 1) * page_size;
+ mp.mask = evlist->mmap_len - page_size - 1;
+
+ return perf_evlist__mmap_ops(evlist, &ops, &mp);
+}
+
+void perf_evlist__munmap(struct perf_evlist *evlist)
+{
+ int i;
+
+ if (evlist->mmap) {
+ for (i = 0; i < evlist->nr_mmaps; i++)
+ perf_mmap__munmap(&evlist->mmap[i]);
+ }
+
+ if (evlist->mmap_ovw) {
+ for (i = 0; i < evlist->nr_mmaps; i++)
+ perf_mmap__munmap(&evlist->mmap_ovw[i]);
+ }
+
+ zfree(&evlist->mmap);
+ zfree(&evlist->mmap_ovw);
+}
diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h
index 9f440ab12b76..be0b25a70730 100644
--- a/tools/perf/lib/include/internal/evlist.h
+++ b/tools/perf/lib/include/internal/evlist.h
@@ -11,6 +11,7 @@
struct perf_cpu_map;
struct perf_thread_map;
+struct perf_mmap_param;
struct perf_evlist {
struct list_head entries;
@@ -22,12 +23,33 @@ struct perf_evlist {
size_t mmap_len;
struct fdarray pollfd;
struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
+ struct perf_mmap *mmap;
+ struct perf_mmap *mmap_ovw;
+};
+
+typedef void
+(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool);
+typedef struct perf_mmap*
+(*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int);
+typedef int
+(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int);
+
+struct perf_evlist_mmap_ops {
+ perf_evlist_mmap__cb_idx_t idx;
+ perf_evlist_mmap__cb_get_t get;
+ perf_evlist_mmap__cb_mmap_t mmap;
};
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
void *ptr, short revent);
+int perf_evlist__mmap_ops(struct perf_evlist *evlist,
+ struct perf_evlist_mmap_ops *ops,
+ struct perf_mmap_param *mp);
+
+void perf_evlist__exit(struct perf_evlist *evlist);
+
/**
* __perf_evlist__for_each_entry - iterate thru all the evsels
* @list: list_head instance to iterate
@@ -60,6 +82,24 @@ int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
#define perf_evlist__for_each_entry_reverse(evlist, evsel) \
__perf_evlist__for_each_entry_reverse(&(evlist)->entries, evsel)
+/**
+ * __perf_evlist__for_each_entry_safe - safely iterate thru all the evsels
+ * @list: list_head instance to iterate
+ * @tmp: struct evsel temp iterator
+ * @evsel: struct evsel iterator
+ */
+#define __perf_evlist__for_each_entry_safe(list, tmp, evsel) \
+ list_for_each_entry_safe(evsel, tmp, list, node)
+
+/**
+ * perf_evlist__for_each_entry_safe - safely iterate thru all the evsels
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ * @tmp: struct evsel temp iterator
+ */
+#define perf_evlist__for_each_entry_safe(evlist, tmp, evsel) \
+ __perf_evlist__for_each_entry_safe(&(evlist)->entries, tmp, evsel)
+
static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist)
{
return list_entry(evlist->entries.next, struct perf_evsel, node);
diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h
index ba1e519c15b9..ee536c4441bb 100644
--- a/tools/perf/lib/include/internal/mmap.h
+++ b/tools/perf/lib/include/internal/mmap.h
@@ -10,23 +10,45 @@
/* perf sample has 16 bits size limit */
#define PERF_SAMPLE_MAX_SIZE (1 << 16)
+struct perf_mmap;
+
+typedef void (*libperf_unmap_cb_t)(struct perf_mmap *map);
+
/**
* struct perf_mmap - perf's ring buffer mmap details
*
* @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
*/
struct perf_mmap {
- void *base;
- int mask;
- int fd;
- int cpu;
- refcount_t refcnt;
- u64 prev;
- u64 start;
- u64 end;
- bool overwrite;
- u64 flush;
- char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
+ void *base;
+ int mask;
+ int fd;
+ int cpu;
+ refcount_t refcnt;
+ u64 prev;
+ u64 start;
+ u64 end;
+ bool overwrite;
+ u64 flush;
+ libperf_unmap_cb_t unmap_cb;
+ char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
+};
+
+struct perf_mmap_param {
+ int prot;
+ int mask;
};
+size_t perf_mmap__mmap_len(struct perf_mmap *map);
+
+void perf_mmap__init(struct perf_mmap *map, bool overwrite,
+ libperf_unmap_cb_t unmap_cb);
+int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
+ int fd, int cpu);
+void perf_mmap__munmap(struct perf_mmap *map);
+void perf_mmap__get(struct perf_mmap *map);
+void perf_mmap__put(struct perf_mmap *map);
+
+u64 perf_mmap__read_head(struct perf_mmap *map);
+
#endif /* __LIBPERF_INTERNAL_MMAP_H */
diff --git a/tools/perf/lib/include/perf/core.h b/tools/perf/lib/include/perf/core.h
index cfd70e720c1c..2a80e4b6f819 100644
--- a/tools/perf/lib/include/perf/core.h
+++ b/tools/perf/lib/include/perf/core.h
@@ -12,6 +12,8 @@ enum libperf_print_level {
LIBPERF_WARN,
LIBPERF_INFO,
LIBPERF_DEBUG,
+ LIBPERF_DEBUG2,
+ LIBPERF_DEBUG3,
};
typedef int (*libperf_print_fn_t)(enum libperf_print_level level,
diff --git a/tools/perf/lib/include/perf/evlist.h b/tools/perf/lib/include/perf/evlist.h
index 8a2ce0757ab2..16f526e74d13 100644
--- a/tools/perf/lib/include/perf/evlist.h
+++ b/tools/perf/lib/include/perf/evlist.h
@@ -32,5 +32,10 @@ LIBPERF_API void perf_evlist__set_maps(struct perf_evlist *evlist,
struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
LIBPERF_API int perf_evlist__poll(struct perf_evlist *evlist, int timeout);
+LIBPERF_API int perf_evlist__filter_pollfd(struct perf_evlist *evlist,
+ short revents_and_mask);
+
+LIBPERF_API int perf_evlist__mmap(struct perf_evlist *evlist, int pages);
+LIBPERF_API void perf_evlist__munmap(struct perf_evlist *evlist);
#endif /* __LIBPERF_EVLIST_H */
diff --git a/tools/perf/lib/include/perf/mmap.h b/tools/perf/lib/include/perf/mmap.h
new file mode 100644
index 000000000000..9508ad90d8b9
--- /dev/null
+++ b/tools/perf/lib/include/perf/mmap.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LIBPERF_MMAP_H
+#define __LIBPERF_MMAP_H
+
+#include <perf/core.h>
+
+struct perf_mmap;
+union perf_event;
+
+LIBPERF_API void perf_mmap__consume(struct perf_mmap *map);
+LIBPERF_API int perf_mmap__read_init(struct perf_mmap *map);
+LIBPERF_API void perf_mmap__read_done(struct perf_mmap *map);
+LIBPERF_API union perf_event *perf_mmap__read_event(struct perf_mmap *map);
+
+#endif /* __LIBPERF_MMAP_H */
diff --git a/tools/perf/lib/internal.h b/tools/perf/lib/internal.h
index dc92f241732e..37db745e1502 100644
--- a/tools/perf/lib/internal.h
+++ b/tools/perf/lib/internal.h
@@ -14,5 +14,7 @@ do { \
#define pr_warning(fmt, ...) __pr(LIBPERF_WARN, fmt, ##__VA_ARGS__)
#define pr_info(fmt, ...) __pr(LIBPERF_INFO, fmt, ##__VA_ARGS__)
#define pr_debug(fmt, ...) __pr(LIBPERF_DEBUG, fmt, ##__VA_ARGS__)
+#define pr_debug2(fmt, ...) __pr(LIBPERF_DEBUG2, fmt, ##__VA_ARGS__)
+#define pr_debug3(fmt, ...) __pr(LIBPERF_DEBUG3, fmt, ##__VA_ARGS__)
#endif /* __LIBPERF_INTERNAL_H */
diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map
index ab8dbde1136c..2184aba36c3f 100644
--- a/tools/perf/lib/libperf.map
+++ b/tools/perf/lib/libperf.map
@@ -40,6 +40,13 @@ LIBPERF_0.0.1 {
perf_evlist__next;
perf_evlist__set_maps;
perf_evlist__poll;
+ perf_evlist__mmap;
+ perf_evlist__munmap;
+ perf_evlist__filter_pollfd;
+ perf_mmap__consume;
+ perf_mmap__read_init;
+ perf_mmap__read_done;
+ perf_mmap__read_event;
local:
*;
};
diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c
new file mode 100644
index 000000000000..0752c193b0fb
--- /dev/null
+++ b/tools/perf/lib/mmap.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/mman.h>
+#include <inttypes.h>
+#include <asm/bug.h>
+#include <errno.h>
+#include <string.h>
+#include <linux/ring_buffer.h>
+#include <linux/perf_event.h>
+#include <perf/mmap.h>
+#include <perf/event.h>
+#include <internal/mmap.h>
+#include <internal/lib.h>
+#include <linux/kernel.h>
+#include "internal.h"
+
+void perf_mmap__init(struct perf_mmap *map, bool overwrite,
+ libperf_unmap_cb_t unmap_cb)
+{
+ map->fd = -1;
+ map->overwrite = overwrite;
+ map->unmap_cb = unmap_cb;
+ refcount_set(&map->refcnt, 0);
+}
+
+size_t perf_mmap__mmap_len(struct perf_mmap *map)
+{
+ return map->mask + 1 + page_size;
+}
+
+int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
+ int fd, int cpu)
+{
+ map->prev = 0;
+ map->mask = mp->mask;
+ map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
+ MAP_SHARED, fd, 0);
+ if (map->base == MAP_FAILED) {
+ map->base = NULL;
+ return -1;
+ }
+
+ map->fd = fd;
+ map->cpu = cpu;
+ return 0;
+}
+
+void perf_mmap__munmap(struct perf_mmap *map)
+{
+ if (map && map->base != NULL) {
+ munmap(map->base, perf_mmap__mmap_len(map));
+ map->base = NULL;
+ map->fd = -1;
+ refcount_set(&map->refcnt, 0);
+ }
+ if (map && map->unmap_cb)
+ map->unmap_cb(map);
+}
+
+void perf_mmap__get(struct perf_mmap *map)
+{
+ refcount_inc(&map->refcnt);
+}
+
+void perf_mmap__put(struct perf_mmap *map)
+{
+ BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
+
+ if (refcount_dec_and_test(&map->refcnt))
+ perf_mmap__munmap(map);
+}
+
+static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
+{
+ ring_buffer_write_tail(md->base, tail);
+}
+
+u64 perf_mmap__read_head(struct perf_mmap *map)
+{
+ return ring_buffer_read_head(map->base);
+}
+
+static bool perf_mmap__empty(struct perf_mmap *map)
+{
+ struct perf_event_mmap_page *pc = map->base;
+
+ return perf_mmap__read_head(map) == map->prev && !pc->aux_size;
+}
+
+void perf_mmap__consume(struct perf_mmap *map)
+{
+ if (!map->overwrite) {
+ u64 old = map->prev;
+
+ perf_mmap__write_tail(map, old);
+ }
+
+ if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
+ perf_mmap__put(map);
+}
+
+static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
+{
+ struct perf_event_header *pheader;
+ u64 evt_head = *start;
+ int size = mask + 1;
+
+ pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start);
+ pheader = (struct perf_event_header *)(buf + (*start & mask));
+ while (true) {
+ if (evt_head - *start >= (unsigned int)size) {
+ pr_debug("Finished reading overwrite ring buffer: rewind\n");
+ if (evt_head - *start > (unsigned int)size)
+ evt_head -= pheader->size;
+ *end = evt_head;
+ return 0;
+ }
+
+ pheader = (struct perf_event_header *)(buf + (evt_head & mask));
+
+ if (pheader->size == 0) {
+ pr_debug("Finished reading overwrite ring buffer: get start\n");
+ *end = evt_head;
+ return 0;
+ }
+
+ evt_head += pheader->size;
+ pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
+ }
+ WARN_ONCE(1, "Shouldn't get here\n");
+ return -1;
+}
+
+/*
+ * Report the start and end of the available data in ringbuffer
+ */
+static int __perf_mmap__read_init(struct perf_mmap *md)
+{
+ u64 head = perf_mmap__read_head(md);
+ u64 old = md->prev;
+ unsigned char *data = md->base + page_size;
+ unsigned long size;
+
+ md->start = md->overwrite ? head : old;
+ md->end = md->overwrite ? old : head;
+
+ if ((md->end - md->start) < md->flush)
+ return -EAGAIN;
+
+ size = md->end - md->start;
+ if (size > (unsigned long)(md->mask) + 1) {
+ if (!md->overwrite) {
+ WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
+
+ md->prev = head;
+ perf_mmap__consume(md);
+ return -EAGAIN;
+ }
+
+ /*
+ * Backward ring buffer is full. We still have a chance to read
+ * most of data from it.
+ */
+ if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int perf_mmap__read_init(struct perf_mmap *map)
+{
+ /*
+ * Check if event was unmapped due to a POLLHUP/POLLERR.
+ */
+ if (!refcount_read(&map->refcnt))
+ return -ENOENT;
+
+ return __perf_mmap__read_init(map);
+}
+
+/*
+ * Mandatory for overwrite mode
+ * The direction of overwrite mode is backward.
+ * The last perf_mmap__read() will set tail to map->core.prev.
+ * Need to correct the map->core.prev to head which is the end of next read.
+ */
+void perf_mmap__read_done(struct perf_mmap *map)
+{
+ /*
+ * Check if event was unmapped due to a POLLHUP/POLLERR.
+ */
+ if (!refcount_read(&map->refcnt))
+ return;
+
+ map->prev = perf_mmap__read_head(map);
+}
+
+/* When check_messup is true, 'end' must points to a good entry */
+static union perf_event *perf_mmap__read(struct perf_mmap *map,
+ u64 *startp, u64 end)
+{
+ unsigned char *data = map->base + page_size;
+ union perf_event *event = NULL;
+ int diff = end - *startp;
+
+ if (diff >= (int)sizeof(event->header)) {
+ size_t size;
+
+ event = (union perf_event *)&data[*startp & map->mask];
+ size = event->header.size;
+
+ if (size < sizeof(event->header) || diff < (int)size)
+ return NULL;
+
+ /*
+ * Event straddles the mmap boundary -- header should always
+ * be inside due to u64 alignment of output.
+ */
+ if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
+ unsigned int offset = *startp;
+ unsigned int len = min(sizeof(*event), size), cpy;
+ void *dst = map->event_copy;
+
+ do {
+ cpy = min(map->mask + 1 - (offset & map->mask), len);
+ memcpy(dst, &data[offset & map->mask], cpy);
+ offset += cpy;
+ dst += cpy;
+ len -= cpy;
+ } while (len);
+
+ event = (union perf_event *)map->event_copy;
+ }
+
+ *startp += size;
+ }
+
+ return event;
+}
+
+/*
+ * Read event from ring buffer one by one.
+ * Return one event for each call.
+ *
+ * Usage:
+ * perf_mmap__read_init()
+ * while(event = perf_mmap__read_event()) {
+ * //process the event
+ * perf_mmap__consume()
+ * }
+ * perf_mmap__read_done()
+ */
+union perf_event *perf_mmap__read_event(struct perf_mmap *map)
+{
+ union perf_event *event;
+
+ /*
+ * Check if event was unmapped due to a POLLHUP/POLLERR.
+ */
+ if (!refcount_read(&map->refcnt))
+ return NULL;
+
+ /* non-overwirte doesn't pause the ringbuffer */
+ if (!map->overwrite)
+ map->end = perf_mmap__read_head(map);
+
+ event = perf_mmap__read(map, &map->start, map->end);
+
+ if (!map->overwrite)
+ map->prev = map->start;
+
+ return event;
+}
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 63e4349a772a..15e458e150bd 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -15,7 +15,9 @@ void test_attr__init(void);
void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
int fd, int group_fd, unsigned long flags);
-#define HAVE_ATTR_TEST
+#ifndef HAVE_ATTR_TEST
+#define HAVE_ATTR_TEST 1
+#endif
static inline int
sys_perf_event_open(struct perf_event_attr *attr,
@@ -27,7 +29,7 @@ sys_perf_event_open(struct perf_event_attr *attr,
fd = syscall(__NR_perf_event_open, attr, pid, cpu,
group_fd, flags);
-#ifdef HAVE_ATTR_TEST
+#if HAVE_ATTR_TEST
if (unlikely(test_attr__enabled))
test_attr__open(attr, pid, cpu, fd, group_fd, flags);
#endif
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 61b3911d91e6..ebc6a2e5eae9 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -105,6 +105,9 @@ except ImportError:
glb_nsz = 16
import re
import os
+import random
+import copy
+import math
pyside_version_1 = True
if not "--pyside-version-1" in sys.argv:
@@ -341,6 +344,15 @@ def LookupCreateModel(model_name, create_fn):
model_cache_lock.release()
return model
+def LookupModel(model_name):
+ model_cache_lock.acquire()
+ try:
+ model = model_cache[model_name]
+ except:
+ model = None
+ model_cache_lock.release()
+ return model
+
# Find bar
class FindBar():
@@ -785,15 +797,16 @@ class CallGraphModel(CallGraphModelBase):
class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase):
- def __init__(self, glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item):
+ def __init__(self, glb, params, row, comm_id, thread_id, calls_id, call_time, time, insn_cnt, cyc_cnt, branch_count, parent_item):
super(CallTreeLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item)
self.comm_id = comm_id
self.thread_id = thread_id
self.calls_id = calls_id
+ self.call_time = call_time
+ self.time = time
self.insn_cnt = insn_cnt
self.cyc_cnt = cyc_cnt
self.branch_count = branch_count
- self.time = time
def Select(self):
self.query_done = True
@@ -830,17 +843,17 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase):
class CallTreeLevelThreeItem(CallTreeLevelTwoPlusItemBase):
- def __init__(self, glb, params, row, comm_id, thread_id, calls_id, name, dso, count, time, insn_cnt, cyc_cnt, branch_count, parent_item):
- super(CallTreeLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item)
+ def __init__(self, glb, params, row, comm_id, thread_id, calls_id, name, dso, call_time, time, insn_cnt, cyc_cnt, branch_count, parent_item):
+ super(CallTreeLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, calls_id, call_time, time, insn_cnt, cyc_cnt, branch_count, parent_item)
dso = dsoname(dso)
if self.params.have_ipc:
insn_pcnt = PercentToOneDP(insn_cnt, parent_item.insn_cnt)
cyc_pcnt = PercentToOneDP(cyc_cnt, parent_item.cyc_cnt)
br_pcnt = PercentToOneDP(branch_count, parent_item.branch_count)
ipc = CalcIPC(cyc_cnt, insn_cnt)
- self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ]
+ self.data = [ name, dso, str(call_time), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ]
else:
- self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ]
+ self.data = [ name, dso, str(call_time), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ]
self.dbid = calls_id
# Call tree data model level two item
@@ -848,7 +861,7 @@ class CallTreeLevelThreeItem(CallTreeLevelTwoPlusItemBase):
class CallTreeLevelTwoItem(CallTreeLevelTwoPlusItemBase):
def __init__(self, glb, params, row, comm_id, thread_id, pid, tid, parent_item):
- super(CallTreeLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 0, 0, 0, 0, 0, parent_item)
+ super(CallTreeLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 0, 0, 0, 0, 0, 0, parent_item)
if self.params.have_ipc:
self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", "", "", "", "", "", ""]
else:
@@ -971,20 +984,41 @@ class CallTreeModel(CallGraphModelBase):
ids.insert(0, query.value(1))
return ids
-# Vertical widget layout
+# Vertical layout
-class VBox():
+class HBoxLayout(QHBoxLayout):
- def __init__(self, w1, w2, w3=None):
- self.vbox = QWidget()
- self.vbox.setLayout(QVBoxLayout())
+ def __init__(self, *children):
+ super(HBoxLayout, self).__init__()
+
+ self.layout().setContentsMargins(0, 0, 0, 0)
+ for child in children:
+ if child.isWidgetType():
+ self.layout().addWidget(child)
+ else:
+ self.layout().addLayout(child)
+
+# Horizontal layout
+
+class VBoxLayout(QVBoxLayout):
- self.vbox.layout().setContentsMargins(0, 0, 0, 0)
+ def __init__(self, *children):
+ super(VBoxLayout, self).__init__()
- self.vbox.layout().addWidget(w1)
- self.vbox.layout().addWidget(w2)
- if w3:
- self.vbox.layout().addWidget(w3)
+ self.layout().setContentsMargins(0, 0, 0, 0)
+ for child in children:
+ if child.isWidgetType():
+ self.layout().addWidget(child)
+ else:
+ self.layout().addLayout(child)
+
+# Vertical layout widget
+
+class VBox():
+
+ def __init__(self, *children):
+ self.vbox = QWidget()
+ self.vbox.setLayout(VBoxLayout(*children))
def Widget(self):
return self.vbox
@@ -1063,7 +1097,7 @@ class CallGraphWindow(TreeWindowBase):
class CallTreeWindow(TreeWindowBase):
- def __init__(self, glb, parent=None):
+ def __init__(self, glb, parent=None, thread_at_time=None):
super(CallTreeWindow, self).__init__(parent)
self.model = LookupCreateModel("Call Tree", lambda x=glb: CallTreeModel(x))
@@ -1081,6 +1115,1343 @@ class CallTreeWindow(TreeWindowBase):
AddSubWindow(glb.mainwindow.mdi_area, self, "Call Tree")
+ if thread_at_time:
+ self.DisplayThreadAtTime(*thread_at_time)
+
+ def DisplayThreadAtTime(self, comm_id, thread_id, time):
+ parent = QModelIndex()
+ for dbid in (comm_id, thread_id):
+ found = False
+ n = self.model.rowCount(parent)
+ for row in xrange(n):
+ child = self.model.index(row, 0, parent)
+ if child.internalPointer().dbid == dbid:
+ found = True
+ self.view.setCurrentIndex(child)
+ parent = child
+ break
+ if not found:
+ return
+ found = False
+ while True:
+ n = self.model.rowCount(parent)
+ if not n:
+ return
+ last_child = None
+ for row in xrange(n):
+ child = self.model.index(row, 0, parent)
+ child_call_time = child.internalPointer().call_time
+ if child_call_time < time:
+ last_child = child
+ elif child_call_time == time:
+ self.view.setCurrentIndex(child)
+ return
+ elif child_call_time > time:
+ break
+ if not last_child:
+ if not found:
+ child = self.model.index(0, 0, parent)
+ self.view.setCurrentIndex(child)
+ return
+ found = True
+ self.view.setCurrentIndex(last_child)
+ parent = last_child
+
+# ExecComm() gets the comm_id of the command string that was set when the process exec'd i.e. the program name
+
+def ExecComm(db, thread_id, time):
+ query = QSqlQuery(db)
+ QueryExec(query, "SELECT comm_threads.comm_id, comms.c_time, comms.exec_flag"
+ " FROM comm_threads"
+ " INNER JOIN comms ON comms.id = comm_threads.comm_id"
+ " WHERE comm_threads.thread_id = " + str(thread_id) +
+ " ORDER BY comms.c_time, comms.id")
+ first = None
+ last = None
+ while query.next():
+ if first is None:
+ first = query.value(0)
+ if query.value(2) and Decimal(query.value(1)) <= Decimal(time):
+ last = query.value(0)
+ if not(last is None):
+ return last
+ return first
+
+# Container for (x, y) data
+
+class XY():
+ def __init__(self, x=0, y=0):
+ self.x = x
+ self.y = y
+
+ def __str__(self):
+ return "XY({}, {})".format(str(self.x), str(self.y))
+
+# Container for sub-range data
+
+class Subrange():
+ def __init__(self, lo=0, hi=0):
+ self.lo = lo
+ self.hi = hi
+
+ def __str__(self):
+ return "Subrange({}, {})".format(str(self.lo), str(self.hi))
+
+# Graph data region base class
+
+class GraphDataRegion(object):
+
+ def __init__(self, key, title = "", ordinal = ""):
+ self.key = key
+ self.title = title
+ self.ordinal = ordinal
+
+# Function to sort GraphDataRegion
+
+def GraphDataRegionOrdinal(data_region):
+ return data_region.ordinal
+
+# Attributes for a graph region
+
+class GraphRegionAttribute():
+
+ def __init__(self, colour):
+ self.colour = colour
+
+# Switch graph data region represents a task
+
+class SwitchGraphDataRegion(GraphDataRegion):
+
+ def __init__(self, key, exec_comm_id, pid, tid, comm, thread_id, comm_id):
+ super(SwitchGraphDataRegion, self).__init__(key)
+
+ self.title = str(pid) + " / " + str(tid) + " " + comm
+ # Order graph legend within exec comm by pid / tid / time
+ self.ordinal = str(pid).rjust(16) + str(exec_comm_id).rjust(8) + str(tid).rjust(16)
+ self.exec_comm_id = exec_comm_id
+ self.pid = pid
+ self.tid = tid
+ self.comm = comm
+ self.thread_id = thread_id
+ self.comm_id = comm_id
+
+# Graph data point
+
+class GraphDataPoint():
+
+ def __init__(self, data, index, x, y, altx=None, alty=None, hregion=None, vregion=None):
+ self.data = data
+ self.index = index
+ self.x = x
+ self.y = y
+ self.altx = altx
+ self.alty = alty
+ self.hregion = hregion
+ self.vregion = vregion
+
+# Graph data (single graph) base class
+
+class GraphData(object):
+
+ def __init__(self, collection, xbase=Decimal(0), ybase=Decimal(0)):
+ self.collection = collection
+ self.points = []
+ self.xbase = xbase
+ self.ybase = ybase
+ self.title = ""
+
+ def AddPoint(self, x, y, altx=None, alty=None, hregion=None, vregion=None):
+ index = len(self.points)
+
+ x = float(Decimal(x) - self.xbase)
+ y = float(Decimal(y) - self.ybase)
+
+ self.points.append(GraphDataPoint(self, index, x, y, altx, alty, hregion, vregion))
+
+ def XToData(self, x):
+ return Decimal(x) + self.xbase
+
+ def YToData(self, y):
+ return Decimal(y) + self.ybase
+
+# Switch graph data (for one CPU)
+
+class SwitchGraphData(GraphData):
+
+ def __init__(self, db, collection, cpu, xbase):
+ super(SwitchGraphData, self).__init__(collection, xbase)
+
+ self.cpu = cpu
+ self.title = "CPU " + str(cpu)
+ self.SelectSwitches(db)
+
+ def SelectComms(self, db, thread_id, last_comm_id, start_time, end_time):
+ query = QSqlQuery(db)
+ QueryExec(query, "SELECT id, c_time"
+ " FROM comms"
+ " WHERE c_thread_id = " + str(thread_id) +
+ " AND exec_flag = TRUE"
+ " AND c_time >= " + str(start_time) +
+ " AND c_time <= " + str(end_time) +
+ " ORDER BY c_time, id")
+ while query.next():
+ comm_id = query.value(0)
+ if comm_id == last_comm_id:
+ continue
+ time = query.value(1)
+ hregion = self.HRegion(db, thread_id, comm_id, time)
+ self.AddPoint(time, 1000, None, None, hregion)
+
+ def SelectSwitches(self, db):
+ last_time = None
+ last_comm_id = None
+ last_thread_id = None
+ query = QSqlQuery(db)
+ QueryExec(query, "SELECT time, thread_out_id, thread_in_id, comm_out_id, comm_in_id, flags"
+ " FROM context_switches"
+ " WHERE machine_id = " + str(self.collection.machine_id) +
+ " AND cpu = " + str(self.cpu) +
+ " ORDER BY time, id")
+ while query.next():
+ flags = int(query.value(5))
+ if flags & 1:
+ # Schedule-out: detect and add exec's
+ if last_thread_id == query.value(1) and last_comm_id is not None and last_comm_id != query.value(3):
+ self.SelectComms(db, last_thread_id, last_comm_id, last_time, query.value(0))
+ continue
+ # Schedule-in: add data point
+ if len(self.points) == 0:
+ start_time = self.collection.glb.StartTime(self.collection.machine_id)
+ hregion = self.HRegion(db, query.value(1), query.value(3), start_time)
+ self.AddPoint(start_time, 1000, None, None, hregion)
+ time = query.value(0)
+ comm_id = query.value(4)
+ thread_id = query.value(2)
+ hregion = self.HRegion(db, thread_id, comm_id, time)
+ self.AddPoint(time, 1000, None, None, hregion)
+ last_time = time
+ last_comm_id = comm_id
+ last_thread_id = thread_id
+
+ def NewHRegion(self, db, key, thread_id, comm_id, time):
+ exec_comm_id = ExecComm(db, thread_id, time)
+ query = QSqlQuery(db)
+ QueryExec(query, "SELECT pid, tid FROM threads WHERE id = " + str(thread_id))
+ if query.next():
+ pid = query.value(0)
+ tid = query.value(1)
+ else:
+ pid = -1
+ tid = -1
+ query = QSqlQuery(db)
+ QueryExec(query, "SELECT comm FROM comms WHERE id = " + str(comm_id))
+ if query.next():
+ comm = query.value(0)
+ else:
+ comm = ""
+ return SwitchGraphDataRegion(key, exec_comm_id, pid, tid, comm, thread_id, comm_id)
+
+ def HRegion(self, db, thread_id, comm_id, time):
+ key = str(thread_id) + ":" + str(comm_id)
+ hregion = self.collection.LookupHRegion(key)
+ if hregion is None:
+ hregion = self.NewHRegion(db, key, thread_id, comm_id, time)
+ self.collection.AddHRegion(key, hregion)
+ return hregion
+
+# Graph data collection (multiple related graphs) base class
+
+class GraphDataCollection(object):
+
+ def __init__(self, glb):
+ self.glb = glb
+ self.data = []
+ self.hregions = {}
+ self.xrangelo = None
+ self.xrangehi = None
+ self.yrangelo = None
+ self.yrangehi = None
+ self.dp = XY(0, 0)
+
+ def AddGraphData(self, data):
+ self.data.append(data)
+
+ def LookupHRegion(self, key):
+ if key in self.hregions:
+ return self.hregions[key]
+ return None
+
+ def AddHRegion(self, key, hregion):
+ self.hregions[key] = hregion
+
+# Switch graph data collection (SwitchGraphData for each CPU)
+
+class SwitchGraphDataCollection(GraphDataCollection):
+
+ def __init__(self, glb, db, machine_id):
+ super(SwitchGraphDataCollection, self).__init__(glb)
+
+ self.machine_id = machine_id
+ self.cpus = self.SelectCPUs(db)
+
+ self.xrangelo = glb.StartTime(machine_id)
+ self.xrangehi = glb.FinishTime(machine_id)
+
+ self.yrangelo = Decimal(0)
+ self.yrangehi = Decimal(1000)
+
+ for cpu in self.cpus:
+ self.AddGraphData(SwitchGraphData(db, self, cpu, self.xrangelo))
+
+ def SelectCPUs(self, db):
+ cpus = []
+ query = QSqlQuery(db)
+ QueryExec(query, "SELECT DISTINCT cpu"
+ " FROM context_switches"
+ " WHERE machine_id = " + str(self.machine_id))
+ while query.next():
+ cpus.append(int(query.value(0)))
+ return sorted(cpus)
+
+# Switch graph data graphics item displays the graphed data
+
+class SwitchGraphDataGraphicsItem(QGraphicsItem):
+
+ def __init__(self, data, graph_width, graph_height, attrs, event_handler, parent=None):
+ super(SwitchGraphDataGraphicsItem, self).__init__(parent)
+
+ self.data = data
+ self.graph_width = graph_width
+ self.graph_height = graph_height
+ self.attrs = attrs
+ self.event_handler = event_handler
+ self.setAcceptHoverEvents(True)
+
+ def boundingRect(self):
+ return QRectF(0, 0, self.graph_width, self.graph_height)
+
+ def PaintPoint(self, painter, last, x):
+ if not(last is None or last.hregion.pid == 0 or x < self.attrs.subrange.x.lo):
+ if last.x < self.attrs.subrange.x.lo:
+ x0 = self.attrs.subrange.x.lo
+ else:
+ x0 = last.x
+ if x > self.attrs.subrange.x.hi:
+ x1 = self.attrs.subrange.x.hi
+ else:
+ x1 = x - 1
+ x0 = self.attrs.XToPixel(x0)
+ x1 = self.attrs.XToPixel(x1)
+
+ y0 = self.attrs.YToPixel(last.y)
+
+ colour = self.attrs.region_attributes[last.hregion.key].colour
+
+ width = x1 - x0 + 1
+ if width < 2:
+ painter.setPen(colour)
+ painter.drawLine(x0, self.graph_height - y0, x0, self.graph_height)
+ else:
+ painter.fillRect(x0, self.graph_height - y0, width, self.graph_height - 1, colour)
+
+ def paint(self, painter, option, widget):
+ last = None
+ for point in self.data.points:
+ self.PaintPoint(painter, last, point.x)
+ if point.x > self.attrs.subrange.x.hi:
+ break;
+ last = point
+ self.PaintPoint(painter, last, self.attrs.subrange.x.hi + 1)
+
+ def BinarySearchPoint(self, target):
+ lower_pos = 0
+ higher_pos = len(self.data.points)
+ while True:
+ pos = int((lower_pos + higher_pos) / 2)
+ val = self.data.points[pos].x
+ if target >= val:
+ lower_pos = pos
+ else:
+ higher_pos = pos
+ if higher_pos <= lower_pos + 1:
+ return lower_pos
+
+ def XPixelToData(self, x):
+ x = self.attrs.PixelToX(x)
+ if x < self.data.points[0].x:
+ x = 0
+ pos = 0
+ low = True
+ else:
+ pos = self.BinarySearchPoint(x)
+ low = False
+ return (low, pos, self.data.XToData(x))
+
+ def EventToData(self, event):
+ no_data = (None,) * 4
+ if len(self.data.points) < 1:
+ return no_data
+ x = event.pos().x()
+ if x < 0:
+ return no_data
+ low0, pos0, time_from = self.XPixelToData(x)
+ low1, pos1, time_to = self.XPixelToData(x + 1)
+ hregions = set()
+ hregion_times = []
+ if not low1:
+ for i in xrange(pos0, pos1 + 1):
+ hregion = self.data.points[i].hregion
+ hregions.add(hregion)
+ if i == pos0:
+ time = time_from
+ else:
+ time = self.data.XToData(self.data.points[i].x)
+ hregion_times.append((hregion, time))
+ return (time_from, time_to, hregions, hregion_times)
+
+ def hoverMoveEvent(self, event):
+ time_from, time_to, hregions, hregion_times = self.EventToData(event)
+ if time_from is not None:
+ self.event_handler.PointEvent(self.data.cpu, time_from, time_to, hregions)
+
+ def hoverLeaveEvent(self, event):
+ self.event_handler.NoPointEvent()
+
+ def mousePressEvent(self, event):
+ if event.button() != Qt.RightButton:
+ super(SwitchGraphDataGraphicsItem, self).mousePressEvent(event)
+ return
+ time_from, time_to, hregions, hregion_times = self.EventToData(event)
+ if hregion_times:
+ self.event_handler.RightClickEvent(self.data.cpu, hregion_times, event.screenPos())
+
+# X-axis graphics item
+
+class XAxisGraphicsItem(QGraphicsItem):
+
+ def __init__(self, width, parent=None):
+ super(XAxisGraphicsItem, self).__init__(parent)
+
+ self.width = width
+ self.max_mark_sz = 4
+ self.height = self.max_mark_sz + 1
+
+ def boundingRect(self):
+ return QRectF(0, 0, self.width, self.height)
+
+ def Step(self):
+ attrs = self.parentItem().attrs
+ subrange = attrs.subrange.x
+ t = subrange.hi - subrange.lo
+ s = (3.0 * t) / self.width
+ n = 1.0
+ while s > n:
+ n = n * 10.0
+ return n
+
+ def PaintMarks(self, painter, at_y, lo, hi, step, i):
+ attrs = self.parentItem().attrs
+ x = lo
+ while x <= hi:
+ xp = attrs.XToPixel(x)
+ if i % 10:
+ if i % 5:
+ sz = 1
+ else:
+ sz = 2
+ else:
+ sz = self.max_mark_sz
+ i = 0
+ painter.drawLine(xp, at_y, xp, at_y + sz)
+ x += step
+ i += 1
+
+ def paint(self, painter, option, widget):
+ # Using QPainter::drawLine(int x1, int y1, int x2, int y2) so x2 = width -1
+ painter.drawLine(0, 0, self.width - 1, 0)
+ n = self.Step()
+ attrs = self.parentItem().attrs
+ subrange = attrs.subrange.x
+ if subrange.lo:
+ x_offset = n - (subrange.lo % n)
+ else:
+ x_offset = 0.0
+ x = subrange.lo + x_offset
+ i = (x / n) % 10
+ self.PaintMarks(painter, 0, x, subrange.hi, n, i)
+
+ def ScaleDimensions(self):
+ n = self.Step()
+ attrs = self.parentItem().attrs
+ lo = attrs.subrange.x.lo
+ hi = (n * 10.0) + lo
+ width = attrs.XToPixel(hi)
+ if width > 500:
+ width = 0
+ return (n, lo, hi, width)
+
+ def PaintScale(self, painter, at_x, at_y):
+ n, lo, hi, width = self.ScaleDimensions()
+ if not width:
+ return
+ painter.drawLine(at_x, at_y, at_x + width, at_y)
+ self.PaintMarks(painter, at_y, lo, hi, n, 0)
+
+ def ScaleWidth(self):
+ n, lo, hi, width = self.ScaleDimensions()
+ return width
+
+ def ScaleHeight(self):
+ return self.height
+
+ def ScaleUnit(self):
+ return self.Step() * 10
+
+# Scale graphics item base class
+
+class ScaleGraphicsItem(QGraphicsItem):
+
+ def __init__(self, axis, parent=None):
+ super(ScaleGraphicsItem, self).__init__(parent)
+ self.axis = axis
+
+ def boundingRect(self):
+ scale_width = self.axis.ScaleWidth()
+ if not scale_width:
+ return QRectF()
+ return QRectF(0, 0, self.axis.ScaleWidth() + 100, self.axis.ScaleHeight())
+
+ def paint(self, painter, option, widget):
+ scale_width = self.axis.ScaleWidth()
+ if not scale_width:
+ return
+ self.axis.PaintScale(painter, 0, 5)
+ x = scale_width + 4
+ painter.drawText(QPointF(x, 10), self.Text())
+
+ def Unit(self):
+ return self.axis.ScaleUnit()
+
+ def Text(self):
+ return ""
+
+# Switch graph scale graphics item
+
+class SwitchScaleGraphicsItem(ScaleGraphicsItem):
+
+ def __init__(self, axis, parent=None):
+ super(SwitchScaleGraphicsItem, self).__init__(axis, parent)
+
+ def Text(self):
+ unit = self.Unit()
+ if unit >= 1000000000:
+ unit = int(unit / 1000000000)
+ us = "s"
+ elif unit >= 1000000:
+ unit = int(unit / 1000000)
+ us = "ms"
+ elif unit >= 1000:
+ unit = int(unit / 1000)
+ us = "us"
+ else:
+ unit = int(unit)
+ us = "ns"
+ return " = " + str(unit) + " " + us
+
+# Switch graph graphics item contains graph title, scale, x/y-axis, and the graphed data
+
+class SwitchGraphGraphicsItem(QGraphicsItem):
+
+ def __init__(self, collection, data, attrs, event_handler, first, parent=None):
+ super(SwitchGraphGraphicsItem, self).__init__(parent)
+ self.collection = collection
+ self.data = data
+ self.attrs = attrs
+ self.event_handler = event_handler
+
+ margin = 20
+ title_width = 50
+
+ self.title_graphics = QGraphicsSimpleTextItem(data.title, self)
+
+ self.title_graphics.setPos(margin, margin)
+ graph_width = attrs.XToPixel(attrs.subrange.x.hi) + 1
+ graph_height = attrs.YToPixel(attrs.subrange.y.hi) + 1
+
+ self.graph_origin_x = margin + title_width + margin
+ self.graph_origin_y = graph_height + margin
+
+ x_axis_size = 1
+ y_axis_size = 1
+ self.yline = QGraphicsLineItem(0, 0, 0, graph_height, self)
+
+ self.x_axis = XAxisGraphicsItem(graph_width, self)
+ self.x_axis.setPos(self.graph_origin_x, self.graph_origin_y + 1)
+
+ if first:
+ self.scale_item = SwitchScaleGraphicsItem(self.x_axis, self)
+ self.scale_item.setPos(self.graph_origin_x, self.graph_origin_y + 10)
+
+ self.yline.setPos(self.graph_origin_x - y_axis_size, self.graph_origin_y - graph_height)
+
+ self.axis_point = QGraphicsLineItem(0, 0, 0, 0, self)
+ self.axis_point.setPos(self.graph_origin_x - 1, self.graph_origin_y +1)
+
+ self.width = self.graph_origin_x + graph_width + margin
+ self.height = self.graph_origin_y + margin
+
+ self.graph = SwitchGraphDataGraphicsItem(data, graph_width, graph_height, attrs, event_handler, self)
+ self.graph.setPos(self.graph_origin_x, self.graph_origin_y - graph_height)
+
+ if parent and 'EnableRubberBand' in dir(parent):
+ parent.EnableRubberBand(self.graph_origin_x, self.graph_origin_x + graph_width - 1, self)
+
+ def boundingRect(self):
+ return QRectF(0, 0, self.width, self.height)
+
+ def paint(self, painter, option, widget):
+ pass
+
+ def RBXToPixel(self, x):
+ return self.attrs.PixelToX(x - self.graph_origin_x)
+
+ def RBXRangeToPixel(self, x0, x1):
+ return (self.RBXToPixel(x0), self.RBXToPixel(x1 + 1))
+
+ def RBPixelToTime(self, x):
+ if x < self.data.points[0].x:
+ return self.data.XToData(0)
+ return self.data.XToData(x)
+
+ def RBEventTimes(self, x0, x1):
+ x0, x1 = self.RBXRangeToPixel(x0, x1)
+ time_from = self.RBPixelToTime(x0)
+ time_to = self.RBPixelToTime(x1)
+ return (time_from, time_to)
+
+ def RBEvent(self, x0, x1):
+ time_from, time_to = self.RBEventTimes(x0, x1)
+ self.event_handler.RangeEvent(time_from, time_to)
+
+ def RBMoveEvent(self, x0, x1):
+ if x1 < x0:
+ x0, x1 = x1, x0
+ self.RBEvent(x0, x1)
+
+ def RBReleaseEvent(self, x0, x1, selection_state):
+ if x1 < x0:
+ x0, x1 = x1, x0
+ x0, x1 = self.RBXRangeToPixel(x0, x1)
+ self.event_handler.SelectEvent(x0, x1, selection_state)
+
+# Graphics item to draw a vertical bracket (used to highlight "forward" sub-range)
+
+class VerticalBracketGraphicsItem(QGraphicsItem):
+
+ def __init__(self, parent=None):
+ super(VerticalBracketGraphicsItem, self).__init__(parent)
+
+ self.width = 0
+ self.height = 0
+ self.hide()
+
+ def SetSize(self, width, height):
+ self.width = width + 1
+ self.height = height + 1
+
+ def boundingRect(self):
+ return QRectF(0, 0, self.width, self.height)
+
+ def paint(self, painter, option, widget):
+ colour = QColor(255, 255, 0, 32)
+ painter.fillRect(0, 0, self.width, self.height, colour)
+ x1 = self.width - 1
+ y1 = self.height - 1
+ painter.drawLine(0, 0, x1, 0)
+ painter.drawLine(0, 0, 0, 3)
+ painter.drawLine(x1, 0, x1, 3)
+ painter.drawLine(0, y1, x1, y1)
+ painter.drawLine(0, y1, 0, y1 - 3)
+ painter.drawLine(x1, y1, x1, y1 - 3)
+
+# Graphics item to contain graphs arranged vertically
+
+class VertcalGraphSetGraphicsItem(QGraphicsItem):
+
+ def __init__(self, collection, attrs, event_handler, child_class, parent=None):
+ super(VertcalGraphSetGraphicsItem, self).__init__(parent)
+
+ self.collection = collection
+
+ self.top = 10
+
+ self.width = 0
+ self.height = self.top
+
+ self.rubber_band = None
+ self.rb_enabled = False
+
+ first = True
+ for data in collection.data:
+ child = child_class(collection, data, attrs, event_handler, first, self)
+ child.setPos(0, self.height + 1)
+ rect = child.boundingRect()
+ if rect.right() > self.width:
+ self.width = rect.right()
+ self.height = self.height + rect.bottom() + 1
+ first = False
+
+ self.bracket = VerticalBracketGraphicsItem(self)
+
+ def EnableRubberBand(self, xlo, xhi, rb_event_handler):
+ if self.rb_enabled:
+ return
+ self.rb_enabled = True
+ self.rb_in_view = False
+ self.setAcceptedMouseButtons(Qt.LeftButton)
+ self.rb_xlo = xlo
+ self.rb_xhi = xhi
+ self.rb_event_handler = rb_event_handler
+ self.mousePressEvent = self.MousePressEvent
+ self.mouseMoveEvent = self.MouseMoveEvent
+ self.mouseReleaseEvent = self.MouseReleaseEvent
+
+ def boundingRect(self):
+ return QRectF(0, 0, self.width, self.height)
+
+ def paint(self, painter, option, widget):
+ pass
+
+ def RubberBandParent(self):
+ scene = self.scene()
+ view = scene.views()[0]
+ viewport = view.viewport()
+ return viewport
+
+ def RubberBandSetGeometry(self, rect):
+ scene_rectf = self.mapRectToScene(QRectF(rect))
+ scene = self.scene()
+ view = scene.views()[0]
+ poly = view.mapFromScene(scene_rectf)
+ self.rubber_band.setGeometry(poly.boundingRect())
+
+ def SetSelection(self, selection_state):
+ if self.rubber_band:
+ if selection_state:
+ self.RubberBandSetGeometry(selection_state)
+ self.rubber_band.show()
+ else:
+ self.rubber_band.hide()
+
+ def SetBracket(self, rect):
+ if rect:
+ x, y, width, height = rect.x(), rect.y(), rect.width(), rect.height()
+ self.bracket.setPos(x, y)
+ self.bracket.SetSize(width, height)
+ self.bracket.show()
+ else:
+ self.bracket.hide()
+
+ def RubberBandX(self, event):
+ x = event.pos().toPoint().x()
+ if x < self.rb_xlo:
+ x = self.rb_xlo
+ elif x > self.rb_xhi:
+ x = self.rb_xhi
+ else:
+ self.rb_in_view = True
+ return x
+
+ def RubberBandRect(self, x):
+ if self.rb_origin.x() <= x:
+ width = x - self.rb_origin.x()
+ rect = QRect(self.rb_origin, QSize(width, self.height))
+ else:
+ width = self.rb_origin.x() - x
+ top_left = QPoint(self.rb_origin.x() - width, self.rb_origin.y())
+ rect = QRect(top_left, QSize(width, self.height))
+ return rect
+
+ def MousePressEvent(self, event):
+ self.rb_in_view = False
+ x = self.RubberBandX(event)
+ self.rb_origin = QPoint(x, self.top)
+ if self.rubber_band is None:
+ self.rubber_band = QRubberBand(QRubberBand.Rectangle, self.RubberBandParent())
+ self.RubberBandSetGeometry(QRect(self.rb_origin, QSize(0, self.height)))
+ if self.rb_in_view:
+ self.rubber_band.show()
+ self.rb_event_handler.RBMoveEvent(x, x)
+ else:
+ self.rubber_band.hide()
+
+ def MouseMoveEvent(self, event):
+ x = self.RubberBandX(event)
+ rect = self.RubberBandRect(x)
+ self.RubberBandSetGeometry(rect)
+ if self.rb_in_view:
+ self.rubber_band.show()
+ self.rb_event_handler.RBMoveEvent(self.rb_origin.x(), x)
+
+ def MouseReleaseEvent(self, event):
+ x = self.RubberBandX(event)
+ if self.rb_in_view:
+ selection_state = self.RubberBandRect(x)
+ else:
+ selection_state = None
+ self.rb_event_handler.RBReleaseEvent(self.rb_origin.x(), x, selection_state)
+
+# Switch graph legend data model
+
+class SwitchGraphLegendModel(QAbstractTableModel):
+
+ def __init__(self, collection, region_attributes, parent=None):
+ super(SwitchGraphLegendModel, self).__init__(parent)
+
+ self.region_attributes = region_attributes
+
+ self.child_items = sorted(collection.hregions.values(), key=GraphDataRegionOrdinal)
+ self.child_count = len(self.child_items)
+
+ self.highlight_set = set()
+
+ self.column_headers = ("pid", "tid", "comm")
+
+ def rowCount(self, parent):
+ return self.child_count
+
+ def headerData(self, section, orientation, role):
+ if role != Qt.DisplayRole:
+ return None
+ if orientation != Qt.Horizontal:
+ return None
+ return self.columnHeader(section)
+
+ def index(self, row, column, parent):
+ return self.createIndex(row, column, self.child_items[row])
+
+ def columnCount(self, parent=None):
+ return len(self.column_headers)
+
+ def columnHeader(self, column):
+ return self.column_headers[column]
+
+ def data(self, index, role):
+ if role == Qt.BackgroundRole:
+ child = self.child_items[index.row()]
+ if child in self.highlight_set:
+ return self.region_attributes[child.key].colour
+ return None
+ if role == Qt.ForegroundRole:
+ child = self.child_items[index.row()]
+ if child in self.highlight_set:
+ return QColor(255, 255, 255)
+ return self.region_attributes[child.key].colour
+ if role != Qt.DisplayRole:
+ return None
+ hregion = self.child_items[index.row()]
+ col = index.column()
+ if col == 0:
+ return hregion.pid
+ if col == 1:
+ return hregion.tid
+ if col == 2:
+ return hregion.comm
+ return None
+
+ def SetHighlight(self, row, set_highlight):
+ child = self.child_items[row]
+ top_left = self.createIndex(row, 0, child)
+ bottom_right = self.createIndex(row, len(self.column_headers) - 1, child)
+ self.dataChanged.emit(top_left, bottom_right)
+
+ def Highlight(self, highlight_set):
+ for row in xrange(self.child_count):
+ child = self.child_items[row]
+ if child in self.highlight_set:
+ if child not in highlight_set:
+ self.SetHighlight(row, False)
+ elif child in highlight_set:
+ self.SetHighlight(row, True)
+ self.highlight_set = highlight_set
+
+# Switch graph legend is a table
+
+class SwitchGraphLegend(QWidget):
+
+ def __init__(self, collection, region_attributes, parent=None):
+ super(SwitchGraphLegend, self).__init__(parent)
+
+ self.data_model = SwitchGraphLegendModel(collection, region_attributes)
+
+ self.model = QSortFilterProxyModel()
+ self.model.setSourceModel(self.data_model)
+
+ self.view = QTableView()
+ self.view.setModel(self.model)
+ self.view.setEditTriggers(QAbstractItemView.NoEditTriggers)
+ self.view.verticalHeader().setVisible(False)
+ self.view.sortByColumn(-1, Qt.AscendingOrder)
+ self.view.setSortingEnabled(True)
+ self.view.resizeColumnsToContents()
+ self.view.resizeRowsToContents()
+
+ self.vbox = VBoxLayout(self.view)
+ self.setLayout(self.vbox)
+
+ sz1 = self.view.columnWidth(0) + self.view.columnWidth(1) + self.view.columnWidth(2) + 2
+ sz1 = sz1 + self.view.verticalScrollBar().sizeHint().width()
+ self.saved_size = sz1
+
+ def resizeEvent(self, event):
+ self.saved_size = self.size().width()
+ super(SwitchGraphLegend, self).resizeEvent(event)
+
+ def Highlight(self, highlight_set):
+ self.data_model.Highlight(highlight_set)
+ self.update()
+
+ def changeEvent(self, event):
+ if event.type() == QEvent.FontChange:
+ self.view.resizeRowsToContents()
+ self.view.resizeColumnsToContents()
+ # Need to resize rows again after column resize
+ self.view.resizeRowsToContents()
+ super(SwitchGraphLegend, self).changeEvent(event)
+
+# Random colour generation
+
+def RGBColourTooLight(r, g, b):
+ if g > 230:
+ return True
+ if g <= 160:
+ return False
+ if r <= 180 and g <= 180:
+ return False
+ if r < 60:
+ return False
+ return True
+
+def GenerateColours(x):
+ cs = [0]
+ for i in xrange(1, x):
+ cs.append(int((255.0 / i) + 0.5))
+ colours = []
+ for r in cs:
+ for g in cs:
+ for b in cs:
+ # Exclude black and colours that look too light against a white background
+ if (r, g, b) == (0, 0, 0) or RGBColourTooLight(r, g, b):
+ continue
+ colours.append(QColor(r, g, b))
+ return colours
+
+def GenerateNColours(n):
+ for x in xrange(2, n + 2):
+ colours = GenerateColours(x)
+ if len(colours) >= n:
+ return colours
+ return []
+
+def GenerateNRandomColours(n, seed):
+ colours = GenerateNColours(n)
+ random.seed(seed)
+ random.shuffle(colours)
+ return colours
+
+# Graph attributes, in particular the scale and subrange that change when zooming
+
+class GraphAttributes():
+
+ def __init__(self, scale, subrange, region_attributes, dp):
+ self.scale = scale
+ self.subrange = subrange
+ self.region_attributes = region_attributes
+ # Rounding avoids errors due to finite floating point precision
+ self.dp = dp # data decimal places
+ self.Update()
+
+ def XToPixel(self, x):
+ return int(round((x - self.subrange.x.lo) * self.scale.x, self.pdp.x))
+
+ def YToPixel(self, y):
+ return int(round((y - self.subrange.y.lo) * self.scale.y, self.pdp.y))
+
+ def PixelToXRounded(self, px):
+ return round((round(px, 0) / self.scale.x), self.dp.x) + self.subrange.x.lo
+
+ def PixelToYRounded(self, py):
+ return round((round(py, 0) / self.scale.y), self.dp.y) + self.subrange.y.lo
+
+ def PixelToX(self, px):
+ x = self.PixelToXRounded(px)
+ if self.pdp.x == 0:
+ rt = self.XToPixel(x)
+ if rt > px:
+ return x - 1
+ return x
+
+ def PixelToY(self, py):
+ y = self.PixelToYRounded(py)
+ if self.pdp.y == 0:
+ rt = self.YToPixel(y)
+ if rt > py:
+ return y - 1
+ return y
+
+ def ToPDP(self, dp, scale):
+ # Calculate pixel decimal places:
+ # (10 ** dp) is the minimum delta in the data
+ # scale it to get the minimum delta in pixels
+ # log10 gives the number of decimals places negatively
+ # subtrace 1 to divide by 10
+ # round to the lower negative number
+ # change the sign to get the number of decimals positively
+ x = math.log10((10 ** dp) * scale)
+ if x < 0:
+ x -= 1
+ x = -int(math.floor(x) - 0.1)
+ else:
+ x = 0
+ return x
+
+ def Update(self):
+ x = self.ToPDP(self.dp.x, self.scale.x)
+ y = self.ToPDP(self.dp.y, self.scale.y)
+ self.pdp = XY(x, y) # pixel decimal places
+
+# Switch graph splitter which divides the CPU graphs from the legend
+
+class SwitchGraphSplitter(QSplitter):
+
+ def __init__(self, parent=None):
+ super(SwitchGraphSplitter, self).__init__(parent)
+
+ self.first_time = False
+
+ def resizeEvent(self, ev):
+ if self.first_time:
+ self.first_time = False
+ sz1 = self.widget(1).view.columnWidth(0) + self.widget(1).view.columnWidth(1) + self.widget(1).view.columnWidth(2) + 2
+ sz1 = sz1 + self.widget(1).view.verticalScrollBar().sizeHint().width()
+ sz0 = self.size().width() - self.handleWidth() - sz1
+ self.setSizes([sz0, sz1])
+ elif not(self.widget(1).saved_size is None):
+ sz1 = self.widget(1).saved_size
+ sz0 = self.size().width() - self.handleWidth() - sz1
+ self.setSizes([sz0, sz1])
+ super(SwitchGraphSplitter, self).resizeEvent(ev)
+
+# Graph widget base class
+
+class GraphWidget(QWidget):
+
+ graph_title_changed = Signal(object)
+
+ def __init__(self, parent=None):
+ super(GraphWidget, self).__init__(parent)
+
+ def GraphTitleChanged(self, title):
+ self.graph_title_changed.emit(title)
+
+ def Title(self):
+ return ""
+
+# Display time in s, ms, us or ns
+
+def ToTimeStr(val):
+ val = Decimal(val)
+ if val >= 1000000000:
+ return "{} s".format((val / 1000000000).quantize(Decimal("0.000000001")))
+ if val >= 1000000:
+ return "{} ms".format((val / 1000000).quantize(Decimal("0.000001")))
+ if val >= 1000:
+ return "{} us".format((val / 1000).quantize(Decimal("0.001")))
+ return "{} ns".format(val.quantize(Decimal("1")))
+
+# Switch (i.e. context switch i.e. Time Chart by CPU) graph widget which contains the CPU graphs and the legend and control buttons
+
+class SwitchGraphWidget(GraphWidget):
+
+ def __init__(self, glb, collection, parent=None):
+ super(SwitchGraphWidget, self).__init__(parent)
+
+ self.glb = glb
+ self.collection = collection
+
+ self.back_state = []
+ self.forward_state = []
+ self.selection_state = (None, None)
+ self.fwd_rect = None
+ self.start_time = self.glb.StartTime(collection.machine_id)
+
+ i = 0
+ hregions = collection.hregions.values()
+ colours = GenerateNRandomColours(len(hregions), 1013)
+ region_attributes = {}
+ for hregion in hregions:
+ if hregion.pid == 0 and hregion.tid == 0:
+ region_attributes[hregion.key] = GraphRegionAttribute(QColor(0, 0, 0))
+ else:
+ region_attributes[hregion.key] = GraphRegionAttribute(colours[i])
+ i = i + 1
+
+ # Default to entire range
+ xsubrange = Subrange(0.0, float(collection.xrangehi - collection.xrangelo) + 1.0)
+ ysubrange = Subrange(0.0, float(collection.yrangehi - collection.yrangelo) + 1.0)
+ subrange = XY(xsubrange, ysubrange)
+
+ scale = self.GetScaleForRange(subrange)
+
+ self.attrs = GraphAttributes(scale, subrange, region_attributes, collection.dp)
+
+ self.item = VertcalGraphSetGraphicsItem(collection, self.attrs, self, SwitchGraphGraphicsItem)
+
+ self.scene = QGraphicsScene()
+ self.scene.addItem(self.item)
+
+ self.view = QGraphicsView(self.scene)
+ self.view.centerOn(0, 0)
+ self.view.setAlignment(Qt.AlignLeft | Qt.AlignTop)
+
+ self.legend = SwitchGraphLegend(collection, region_attributes)
+
+ self.splitter = SwitchGraphSplitter()
+ self.splitter.addWidget(self.view)
+ self.splitter.addWidget(self.legend)
+
+ self.point_label = QLabel("")
+ self.point_label.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Fixed)
+
+ self.back_button = QToolButton()
+ self.back_button.setIcon(self.style().standardIcon(QStyle.SP_ArrowLeft))
+ self.back_button.setDisabled(True)
+ self.back_button.released.connect(lambda: self.Back())
+
+ self.forward_button = QToolButton()
+ self.forward_button.setIcon(self.style().standardIcon(QStyle.SP_ArrowRight))
+ self.forward_button.setDisabled(True)
+ self.forward_button.released.connect(lambda: self.Forward())
+
+ self.zoom_button = QToolButton()
+ self.zoom_button.setText("Zoom")
+ self.zoom_button.setDisabled(True)
+ self.zoom_button.released.connect(lambda: self.Zoom())
+
+ self.hbox = HBoxLayout(self.back_button, self.forward_button, self.zoom_button, self.point_label)
+
+ self.vbox = VBoxLayout(self.splitter, self.hbox)
+
+ self.setLayout(self.vbox)
+
+ def GetScaleForRangeX(self, xsubrange):
+ # Default graph 1000 pixels wide
+ dflt = 1000.0
+ r = xsubrange.hi - xsubrange.lo
+ return dflt / r
+
+ def GetScaleForRangeY(self, ysubrange):
+ # Default graph 50 pixels high
+ dflt = 50.0
+ r = ysubrange.hi - ysubrange.lo
+ return dflt / r
+
+ def GetScaleForRange(self, subrange):
+ # Default graph 1000 pixels wide, 50 pixels high
+ xscale = self.GetScaleForRangeX(subrange.x)
+ yscale = self.GetScaleForRangeY(subrange.y)
+ return XY(xscale, yscale)
+
+ def PointEvent(self, cpu, time_from, time_to, hregions):
+ text = "CPU: " + str(cpu)
+ time_from = time_from.quantize(Decimal(1))
+ rel_time_from = time_from - self.glb.StartTime(self.collection.machine_id)
+ text = text + " Time: " + str(time_from) + " (+" + ToTimeStr(rel_time_from) + ")"
+ self.point_label.setText(text)
+ self.legend.Highlight(hregions)
+
+ def RightClickEvent(self, cpu, hregion_times, pos):
+ if not IsSelectable(self.glb.db, "calls", "WHERE parent_id >= 0"):
+ return
+ menu = QMenu(self.view)
+ for hregion, time in hregion_times:
+ thread_at_time = (hregion.exec_comm_id, hregion.thread_id, time)
+ menu_text = "Show Call Tree for {} {}:{} at {}".format(hregion.comm, hregion.pid, hregion.tid, time)
+ menu.addAction(CreateAction(menu_text, "Show Call Tree", lambda a=None, args=thread_at_time: self.RightClickSelect(args), self.view))
+ menu.exec_(pos)
+
+ def RightClickSelect(self, args):
+ CallTreeWindow(self.glb, self.glb.mainwindow, thread_at_time=args)
+
+ def NoPointEvent(self):
+ self.point_label.setText("")
+ self.legend.Highlight({})
+
+ def RangeEvent(self, time_from, time_to):
+ time_from = time_from.quantize(Decimal(1))
+ time_to = time_to.quantize(Decimal(1))
+ if time_to <= time_from:
+ self.point_label.setText("")
+ return
+ rel_time_from = time_from - self.start_time
+ rel_time_to = time_to - self.start_time
+ text = " Time: " + str(time_from) + " (+" + ToTimeStr(rel_time_from) + ") to: " + str(time_to) + " (+" + ToTimeStr(rel_time_to) + ")"
+ text = text + " duration: " + ToTimeStr(time_to - time_from)
+ self.point_label.setText(text)
+
+ def BackState(self):
+ return (self.attrs.subrange, self.attrs.scale, self.selection_state, self.fwd_rect)
+
+ def PushBackState(self):
+ state = copy.deepcopy(self.BackState())
+ self.back_state.append(state)
+ self.back_button.setEnabled(True)
+
+ def PopBackState(self):
+ self.attrs.subrange, self.attrs.scale, self.selection_state, self.fwd_rect = self.back_state.pop()
+ self.attrs.Update()
+ if not self.back_state:
+ self.back_button.setDisabled(True)
+
+ def PushForwardState(self):
+ state = copy.deepcopy(self.BackState())
+ self.forward_state.append(state)
+ self.forward_button.setEnabled(True)
+
+ def PopForwardState(self):
+ self.attrs.subrange, self.attrs.scale, self.selection_state, self.fwd_rect = self.forward_state.pop()
+ self.attrs.Update()
+ if not self.forward_state:
+ self.forward_button.setDisabled(True)
+
+ def Title(self):
+ time_from = self.collection.xrangelo + Decimal(self.attrs.subrange.x.lo)
+ time_to = self.collection.xrangelo + Decimal(self.attrs.subrange.x.hi)
+ rel_time_from = time_from - self.start_time
+ rel_time_to = time_to - self.start_time
+ title = "+" + ToTimeStr(rel_time_from) + " to +" + ToTimeStr(rel_time_to)
+ title = title + " (" + ToTimeStr(time_to - time_from) + ")"
+ return title
+
+ def Update(self):
+ selected_subrange, selection_state = self.selection_state
+ self.item.SetSelection(selection_state)
+ self.item.SetBracket(self.fwd_rect)
+ self.zoom_button.setDisabled(selected_subrange is None)
+ self.GraphTitleChanged(self.Title())
+ self.item.update(self.item.boundingRect())
+
+ def Back(self):
+ if not self.back_state:
+ return
+ self.PushForwardState()
+ self.PopBackState()
+ self.Update()
+
+ def Forward(self):
+ if not self.forward_state:
+ return
+ self.PushBackState()
+ self.PopForwardState()
+ self.Update()
+
+ def SelectEvent(self, x0, x1, selection_state):
+ if selection_state is None:
+ selected_subrange = None
+ else:
+ if x1 - x0 < 1.0:
+ x1 += 1.0
+ selected_subrange = Subrange(x0, x1)
+ self.selection_state = (selected_subrange, selection_state)
+ self.zoom_button.setDisabled(selected_subrange is None)
+
+ def Zoom(self):
+ selected_subrange, selection_state = self.selection_state
+ if selected_subrange is None:
+ return
+ self.fwd_rect = selection_state
+ self.item.SetSelection(None)
+ self.PushBackState()
+ self.attrs.subrange.x = selected_subrange
+ self.forward_state = []
+ self.forward_button.setDisabled(True)
+ self.selection_state = (None, None)
+ self.fwd_rect = None
+ self.attrs.scale.x = self.GetScaleForRangeX(self.attrs.subrange.x)
+ self.attrs.Update()
+ self.Update()
+
+# Slow initialization - perform non-GUI initialization in a separate thread and put up a modal message box while waiting
+
+class SlowInitClass():
+
+ def __init__(self, glb, title, init_fn):
+ self.init_fn = init_fn
+ self.done = False
+ self.result = None
+
+ self.msg_box = QMessageBox(glb.mainwindow)
+ self.msg_box.setText("Initializing " + title + ". Please wait.")
+ self.msg_box.setWindowTitle("Initializing " + title)
+ self.msg_box.setWindowIcon(glb.mainwindow.style().standardIcon(QStyle.SP_MessageBoxInformation))
+
+ self.init_thread = Thread(self.ThreadFn, glb)
+ self.init_thread.done.connect(lambda: self.Done(), Qt.QueuedConnection)
+
+ self.init_thread.start()
+
+ def Done(self):
+ self.msg_box.done(0)
+
+ def ThreadFn(self, glb):
+ conn_name = "SlowInitClass" + str(os.getpid())
+ db, dbname = glb.dbref.Open(conn_name)
+ self.result = self.init_fn(db)
+ self.done = True
+ return (True, 0)
+
+ def Result(self):
+ while not self.done:
+ self.msg_box.exec_()
+ self.init_thread.wait()
+ return self.result
+
+def SlowInit(glb, title, init_fn):
+ init = SlowInitClass(glb, title, init_fn)
+ return init.Result()
+
+# Time chart by CPU window
+
+class TimeChartByCPUWindow(QMdiSubWindow):
+
+ def __init__(self, glb, parent=None):
+ super(TimeChartByCPUWindow, self).__init__(parent)
+
+ self.glb = glb
+ self.machine_id = glb.HostMachineId()
+ self.collection_name = "SwitchGraphDataCollection " + str(self.machine_id)
+
+ collection = LookupModel(self.collection_name)
+ if collection is None:
+ collection = SlowInit(glb, "Time Chart", self.Init)
+
+ self.widget = SwitchGraphWidget(glb, collection, self)
+ self.view = self.widget
+
+ self.base_title = "Time Chart by CPU"
+ self.setWindowTitle(self.base_title + self.widget.Title())
+ self.widget.graph_title_changed.connect(self.GraphTitleChanged)
+
+ self.setWidget(self.widget)
+
+ AddSubWindow(glb.mainwindow.mdi_area, self, self.windowTitle())
+
+ def Init(self, db):
+ return LookupCreateModel(self.collection_name, lambda : SwitchGraphDataCollection(self.glb, db, self.machine_id))
+
+ def GraphTitleChanged(self, title):
+ self.setWindowTitle(self.base_title + " : " + title)
+
# Child data item finder
class ChildDataItemFinder():
@@ -2058,10 +3429,8 @@ class SampleTimeRangesDataItem(LineEditDataItem):
QueryExec(query, "SELECT id, time FROM samples ORDER BY id DESC LIMIT 1")
if query.next():
self.last_id = int(query.value(0))
- self.last_time = int(query.value(1))
- QueryExec(query, "SELECT time FROM samples WHERE time != 0 ORDER BY id LIMIT 1")
- if query.next():
- self.first_time = int(query.value(0))
+ self.first_time = int(glb.HostStartTime())
+ self.last_time = int(glb.HostFinishTime())
if placeholder_text:
placeholder_text += ", between " + str(self.first_time) + " and " + str(self.last_time)
@@ -2954,7 +4323,9 @@ p.c2 {
<p class=c2><a href=#allbranches>1.3 All branches</a></p>
<p class=c2><a href=#selectedbranches>1.4 Selected branches</a></p>
<p class=c2><a href=#topcallsbyelapsedtime>1.5 Top calls by elapsed time</a></p>
-<p class=c1><a href=#tables>2. Tables</a></p>
+<p class=c1><a href=#charts>2. Charts</a></p>
+<p class=c2><a href=#timechartbycpu>2.1 Time chart by CPU</a></p>
+<p class=c1><a href=#tables>3. Tables</a></p>
<h1 id=reports>1. Reports</h1>
<h2 id=callgraph>1.1 Context-Sensitive Call Graph</h2>
The result is a GUI window with a tree representing a context-sensitive
@@ -3042,7 +4413,29 @@ N.B. Due to the granularity of timestamps, there could be no branches in any giv
The Top calls by elapsed time report displays calls in descending order of time elapsed between when the function was called and when it returned.
The data is reduced by various selection criteria. A dialog box displays available criteria which are AND'ed together.
If not all data is fetched, a Fetch bar is provided. Ctrl-F displays a Find bar.
-<h1 id=tables>2. Tables</h1>
+<h1 id=charts>2. Charts</h1>
+<h2 id=timechartbycpu>2.1 Time chart by CPU</h2>
+This chart displays context switch information when that data is available. Refer to context_switches_view on the Tables menu.
+<h3>Features</h3>
+<ol>
+<li>Mouse over to highight the task and show the time</li>
+<li>Drag the mouse to select a region and zoom by pushing the Zoom button</li>
+<li>Go back and forward by pressing the arrow buttons</li>
+<li>If call information is available, right-click to show a call tree opened to that task and time.
+Note, the call tree may take some time to appear, and there may not be call information for the task or time selected.
+</li>
+</ol>
+<h3>Important</h3>
+The graph can be misleading in the following respects:
+<ol>
+<li>The graph shows the first task on each CPU as running from the beginning of the time range.
+Because tracing might start on different CPUs at different times, that is not necessarily the case.
+Refer to context_switches_view on the Tables menu to understand what data the graph is based upon.</li>
+<li>Similarly, the last task on each CPU can be showing running longer than it really was.
+Again, refer to context_switches_view on the Tables menu to understand what data the graph is based upon.</li>
+<li>When the mouse is over a task, the highlighted task might not be visible on the legend without scrolling if the legend does not fit fully in the window</li>
+</ol>
+<h1 id=tables>3. Tables</h1>
The Tables menu shows all tables and views in the database. Most tables have an associated view
which displays the information in a more friendly way. Not all data for large tables is fetched
immediately. More records can be fetched using the Fetch bar provided. Columns can be sorted,
@@ -3238,6 +4631,10 @@ class MainWindow(QMainWindow):
if IsSelectable(glb.db, "calls"):
reports_menu.addAction(CreateAction("&Top calls by elapsed time", "Create a new window displaying top calls by elapsed time", self.NewTopCalls, self))
+ if IsSelectable(glb.db, "context_switches"):
+ charts_menu = menu.addMenu("&Charts")
+ charts_menu.addAction(CreateAction("&Time chart by CPU", "Create a new window displaying time charts by CPU", self.TimeChartByCPU, self))
+
self.TableMenu(GetTableList(glb), menu)
self.window_menu = WindowMenu(self.mdi_area, menu)
@@ -3298,6 +4695,9 @@ class MainWindow(QMainWindow):
label = "Selected branches" if branches_events == 1 else "Selected branches " + "(id=" + dbid + ")"
reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewSelectedBranchView(x), self))
+ def TimeChartByCPU(self):
+ TimeChartByCPUWindow(self.glb, self)
+
def TableMenu(self, tables, menu):
table_menu = menu.addMenu("&Tables")
for table in tables:
@@ -3470,6 +4870,9 @@ class Glb():
self.have_disassembler = True
except:
self.have_disassembler = False
+ self.host_machine_id = 0
+ self.host_start_time = 0
+ self.host_finish_time = 0
def FileFromBuildId(self, build_id):
file_name = self.buildid_dir + build_id[0:2] + "/" + build_id[2:] + "/elf"
@@ -3502,6 +4905,110 @@ class Glb():
except:
pass
+ def GetHostMachineId(self):
+ query = QSqlQuery(self.db)
+ QueryExec(query, "SELECT id FROM machines WHERE pid = -1")
+ if query.next():
+ self.host_machine_id = query.value(0)
+ else:
+ self.host_machine_id = 0
+ return self.host_machine_id
+
+ def HostMachineId(self):
+ if self.host_machine_id:
+ return self.host_machine_id
+ return self.GetHostMachineId()
+
+ def SelectValue(self, sql):
+ query = QSqlQuery(self.db)
+ try:
+ QueryExec(query, sql)
+ except:
+ return None
+ if query.next():
+ return Decimal(query.value(0))
+ return None
+
+ def SwitchesMinTime(self, machine_id):
+ return self.SelectValue("SELECT time"
+ " FROM context_switches"
+ " WHERE time != 0 AND machine_id = " + str(machine_id) +
+ " ORDER BY id LIMIT 1")
+
+ def SwitchesMaxTime(self, machine_id):
+ return self.SelectValue("SELECT time"
+ " FROM context_switches"
+ " WHERE time != 0 AND machine_id = " + str(machine_id) +
+ " ORDER BY id DESC LIMIT 1")
+
+ def SamplesMinTime(self, machine_id):
+ return self.SelectValue("SELECT time"
+ " FROM samples"
+ " WHERE time != 0 AND machine_id = " + str(machine_id) +
+ " ORDER BY id LIMIT 1")
+
+ def SamplesMaxTime(self, machine_id):
+ return self.SelectValue("SELECT time"
+ " FROM samples"
+ " WHERE time != 0 AND machine_id = " + str(machine_id) +
+ " ORDER BY id DESC LIMIT 1")
+
+ def CallsMinTime(self, machine_id):
+ return self.SelectValue("SELECT calls.call_time"
+ " FROM calls"
+ " INNER JOIN threads ON threads.thread_id = calls.thread_id"
+ " WHERE calls.call_time != 0 AND threads.machine_id = " + str(machine_id) +
+ " ORDER BY calls.id LIMIT 1")
+
+ def CallsMaxTime(self, machine_id):
+ return self.SelectValue("SELECT calls.return_time"
+ " FROM calls"
+ " INNER JOIN threads ON threads.thread_id = calls.thread_id"
+ " WHERE calls.return_time != 0 AND threads.machine_id = " + str(machine_id) +
+ " ORDER BY calls.return_time DESC LIMIT 1")
+
+ def GetStartTime(self, machine_id):
+ t0 = self.SwitchesMinTime(machine_id)
+ t1 = self.SamplesMinTime(machine_id)
+ t2 = self.CallsMinTime(machine_id)
+ if t0 is None or (not(t1 is None) and t1 < t0):
+ t0 = t1
+ if t0 is None or (not(t2 is None) and t2 < t0):
+ t0 = t2
+ return t0
+
+ def GetFinishTime(self, machine_id):
+ t0 = self.SwitchesMaxTime(machine_id)
+ t1 = self.SamplesMaxTime(machine_id)
+ t2 = self.CallsMaxTime(machine_id)
+ if t0 is None or (not(t1 is None) and t1 > t0):
+ t0 = t1
+ if t0 is None or (not(t2 is None) and t2 > t0):
+ t0 = t2
+ return t0
+
+ def HostStartTime(self):
+ if self.host_start_time:
+ return self.host_start_time
+ self.host_start_time = self.GetStartTime(self.HostMachineId())
+ return self.host_start_time
+
+ def HostFinishTime(self):
+ if self.host_finish_time:
+ return self.host_finish_time
+ self.host_finish_time = self.GetFinishTime(self.HostMachineId())
+ return self.host_finish_time
+
+ def StartTime(self, machine_id):
+ if machine_id == self.HostMachineId():
+ return self.HostStartTime()
+ return self.GetStartTime(machine_id)
+
+ def FinishTime(self, machine_id):
+ if machine_id == self.HostMachineId():
+ return self.HostFinishTime()
+ return self.GetFinishTime(machine_id)
+
# Database reference
class DBRef():
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index 338cd9faa835..a4cd30c0beb3 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -13,6 +13,7 @@
#include "util/mmap.h"
#include <errno.h>
#include <linux/string.h>
+#include <perf/mmap.h>
#define NR_ITERS 111
@@ -37,8 +38,8 @@ static int count_samples(struct evlist *evlist, int *sample_count,
struct mmap *map = &evlist->overwrite_mmap[i];
union perf_event *event;
- perf_mmap__read_init(map);
- while ((event = perf_mmap__read_event(map)) != NULL) {
+ perf_mmap__read_init(&map->core);
+ while ((event = perf_mmap__read_event(&map->core)) != NULL) {
const u32 type = event->header.type;
switch (type) {
@@ -53,7 +54,7 @@ static int count_samples(struct evlist *evlist, int *sample_count,
return TEST_FAIL;
}
}
- perf_mmap__read_done(map);
+ perf_mmap__read_done(&map->core);
}
return TEST_OK;
}
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 1eb0bffaed6c..5d20bf8397f0 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -15,6 +15,7 @@
#include <linux/string.h>
#include <api/fs/fs.h>
#include <bpf/bpf.h>
+#include <perf/mmap.h>
#include "tests.h"
#include "llvm.h"
#include "debug.h"
@@ -184,16 +185,16 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
struct mmap *md;
md = &evlist->mmap[i];
- if (perf_mmap__read_init(md) < 0)
+ if (perf_mmap__read_init(&md->core) < 0)
continue;
- while ((event = perf_mmap__read_event(md)) != NULL) {
+ while ((event = perf_mmap__read_event(&md->core)) != NULL) {
const u32 type = event->header.type;
if (type == PERF_RECORD_SAMPLE)
count ++;
}
- perf_mmap__read_done(md);
+ perf_mmap__read_done(&md->core);
}
if (count != expect) {
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index f5764a3890b9..1f017e1b2a55 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -10,6 +10,7 @@
#include <sys/param.h>
#include <perf/cpumap.h>
#include <perf/evlist.h>
+#include <perf/mmap.h>
#include "debug.h"
#include "dso.h"
@@ -425,16 +426,16 @@ static int process_events(struct machine *machine, struct evlist *evlist,
for (i = 0; i < evlist->core.nr_mmaps; i++) {
md = &evlist->mmap[i];
- if (perf_mmap__read_init(md) < 0)
+ if (perf_mmap__read_init(&md->core) < 0)
continue;
- while ((event = perf_mmap__read_event(md)) != NULL) {
+ while ((event = perf_mmap__read_event(&md->core)) != NULL) {
ret = process_event(machine, evlist, event, state);
- perf_mmap__consume(md);
+ perf_mmap__consume(&md->core);
if (ret < 0)
return ret;
}
- perf_mmap__read_done(md);
+ perf_mmap__read_done(&md->core);
}
return 0;
}
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index 92c7d591bcac..50a0c9fcde7d 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -5,6 +5,7 @@
#include <sys/prctl.h>
#include <perf/cpumap.h>
#include <perf/evlist.h>
+#include <perf/mmap.h>
#include "debug.h"
#include "parse-events.h"
@@ -38,17 +39,17 @@ static int find_comm(struct evlist *evlist, const char *comm)
found = 0;
for (i = 0; i < evlist->core.nr_mmaps; i++) {
md = &evlist->mmap[i];
- if (perf_mmap__read_init(md) < 0)
+ if (perf_mmap__read_init(&md->core) < 0)
continue;
- while ((event = perf_mmap__read_event(md)) != NULL) {
+ while ((event = perf_mmap__read_event(&md->core)) != NULL) {
if (event->header.type == PERF_RECORD_COMM &&
(pid_t)event->comm.pid == getpid() &&
(pid_t)event->comm.tid == getpid() &&
strcmp(event->comm.comm, comm) == 0)
found += 1;
- perf_mmap__consume(md);
+ perf_mmap__consume(&md->core);
}
- perf_mmap__read_done(md);
+ perf_mmap__read_done(&md->core);
}
return found;
}
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 3a22dce991ba..5f4c0dbb4715 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -16,6 +16,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <perf/evlist.h>
+#include <perf/mmap.h>
/*
* This test will generate random numbers of calls to some getpid syscalls,
@@ -113,10 +114,10 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
}
md = &evlist->mmap[0];
- if (perf_mmap__read_init(md) < 0)
+ if (perf_mmap__read_init(&md->core) < 0)
goto out_init;
- while ((event = perf_mmap__read_event(md)) != NULL) {
+ while ((event = perf_mmap__read_event(&md->core)) != NULL) {
struct perf_sample sample;
if (event->header.type != PERF_RECORD_SAMPLE) {
@@ -139,9 +140,9 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
goto out_delete_evlist;
}
nr_events[evsel->idx]++;
- perf_mmap__consume(md);
+ perf_mmap__consume(&md->core);
}
- perf_mmap__read_done(md);
+ perf_mmap__read_done(&md->core);
out_init:
err = 0;
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index 2b5c46813053..c6b2d7aab608 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -13,6 +13,7 @@
#include "debug.h"
#include "util/mmap.h"
#include <errno.h>
+#include <perf/mmap.h>
#ifndef O_DIRECTORY
#define O_DIRECTORY 00200000
@@ -92,10 +93,10 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
struct mmap *md;
md = &evlist->mmap[i];
- if (perf_mmap__read_init(md) < 0)
+ if (perf_mmap__read_init(&md->core) < 0)
continue;
- while ((event = perf_mmap__read_event(md)) != NULL) {
+ while ((event = perf_mmap__read_event(&md->core)) != NULL) {
const u32 type = event->header.type;
int tp_flags;
struct perf_sample sample;
@@ -103,7 +104,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
++nr_events;
if (type != PERF_RECORD_SAMPLE) {
- perf_mmap__consume(md);
+ perf_mmap__consume(&md->core);
continue;
}
@@ -123,7 +124,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
goto out_ok;
}
- perf_mmap__read_done(md);
+ perf_mmap__read_done(&md->core);
}
if (nr_events == before)
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 437426be29e9..2195fc205e72 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -6,6 +6,7 @@
#include <pthread.h>
#include <sched.h>
+#include <perf/mmap.h>
#include "evlist.h"
#include "evsel.h"
#include "debug.h"
@@ -170,10 +171,10 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
struct mmap *md;
md = &evlist->mmap[i];
- if (perf_mmap__read_init(md) < 0)
+ if (perf_mmap__read_init(&md->core) < 0)
continue;
- while ((event = perf_mmap__read_event(md)) != NULL) {
+ while ((event = perf_mmap__read_event(&md->core)) != NULL) {
const u32 type = event->header.type;
const char *name = perf_event__name(type);
@@ -276,9 +277,9 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
++errs;
}
- perf_mmap__consume(md);
+ perf_mmap__consume(&md->core);
}
- perf_mmap__read_done(md);
+ perf_mmap__read_done(&md->core);
}
/*
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index 84519df87f30..bfb9986093d8 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -15,6 +15,7 @@
#include "util/mmap.h"
#include "util/thread_map.h"
#include <perf/evlist.h>
+#include <perf/mmap.h>
#define NR_LOOPS 10000000
@@ -99,10 +100,10 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
evlist__disable(evlist);
md = &evlist->mmap[0];
- if (perf_mmap__read_init(md) < 0)
+ if (perf_mmap__read_init(&md->core) < 0)
goto out_init;
- while ((event = perf_mmap__read_event(md)) != NULL) {
+ while ((event = perf_mmap__read_event(&md->core)) != NULL) {
struct perf_sample sample;
if (event->header.type != PERF_RECORD_SAMPLE)
@@ -117,9 +118,9 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
total_periods += sample.period;
nr_samples++;
next_event:
- perf_mmap__consume(md);
+ perf_mmap__consume(&md->core);
}
- perf_mmap__read_done(md);
+ perf_mmap__read_done(&md->core);
out_init:
if ((u64) nr_samples == total_periods) {
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index ffa592e0020e..fcb0d03dba4e 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -8,6 +8,7 @@
#include <linux/zalloc.h>
#include <perf/cpumap.h>
#include <perf/evlist.h>
+#include <perf/mmap.h>
#include "debug.h"
#include "parse-events.h"
@@ -269,17 +270,17 @@ static int process_events(struct evlist *evlist,
for (i = 0; i < evlist->core.nr_mmaps; i++) {
md = &evlist->mmap[i];
- if (perf_mmap__read_init(md) < 0)
+ if (perf_mmap__read_init(&md->core) < 0)
continue;
- while ((event = perf_mmap__read_event(md)) != NULL) {
+ while ((event = perf_mmap__read_event(&md->core)) != NULL) {
cnt += 1;
ret = add_event(evlist, &events, event);
- perf_mmap__consume(md);
+ perf_mmap__consume(&md->core);
if (ret < 0)
goto out_free_nodes;
}
- perf_mmap__read_done(md);
+ perf_mmap__read_done(&md->core);
}
events_array = calloc(cnt, sizeof(struct event_node));
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index bce3a4cb4c89..4965f8b9055b 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -12,6 +12,7 @@
#include <linux/string.h>
#include <perf/cpumap.h>
#include <perf/evlist.h>
+#include <perf/mmap.h>
static int exited;
static int nr_exit;
@@ -117,16 +118,16 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
retry:
md = &evlist->mmap[0];
- if (perf_mmap__read_init(md) < 0)
+ if (perf_mmap__read_init(&md->core) < 0)
goto out_init;
- while ((event = perf_mmap__read_event(md)) != NULL) {
+ while ((event = perf_mmap__read_event(&md->core)) != NULL) {
if (event->header.type == PERF_RECORD_EXIT)
nr_exit++;
- perf_mmap__consume(md);
+ perf_mmap__consume(&md->core);
}
- perf_mmap__read_done(md);
+ perf_mmap__read_done(&md->core);
out_init:
if (!exited || !nr_exit) {
diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build
index afa75a76f6b8..433dc39053a7 100644
--- a/tools/perf/trace/beauty/Build
+++ b/tools/perf/trace/beauty/Build
@@ -17,3 +17,4 @@ perf-y += sockaddr.o
perf-y += socket.o
perf-y += statx.o
perf-y += sync_file_range.o
+perf-y += tracepoints/
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index 7e06605f7c76..77ad80a399fd 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -5,9 +5,10 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <sys/types.h>
+#include <stdbool.h>
struct strarray {
- int offset;
+ u64 offset;
int nr_entries;
const char *prefix;
const char **entries;
@@ -29,6 +30,8 @@ struct strarray {
size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val);
size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, bool show_prefix, unsigned long flags);
+bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret);
+
struct trace;
struct thread;
@@ -51,6 +54,8 @@ struct strarrays {
size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val);
+bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret);
+
size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size);
extern struct strarray strarray__socket_families;
@@ -78,6 +83,8 @@ struct augmented_arg {
u64 value[];
};
+struct syscall_arg_fmt;
+
/**
* @val: value of syscall argument being formatted
* @args: All the args, use syscall_args__val(arg, nth) to access one
@@ -94,6 +101,7 @@ struct augmented_arg {
struct syscall_arg {
unsigned long val;
unsigned char *args;
+ struct syscall_arg_fmt *fmt;
struct {
struct augmented_arg *args;
int size;
@@ -111,6 +119,12 @@ unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx);
size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_STRARRAY_FLAGS syscall_arg__scnprintf_strarray_flags
+size_t syscall_arg__scnprintf_x86_MSR(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_X86_MSR syscall_arg__scnprintf_x86_MSR
+
+bool syscall_arg__strtoul_x86_MSR(char *bf, size_t size, struct syscall_arg *arg, u64 *ret);
+#define STUL_X86_MSR syscall_arg__strtoul_x86_MSR
+
size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_STRARRAYS syscall_arg__scnprintf_strarrays
diff --git a/tools/perf/trace/beauty/tracepoints/Build b/tools/perf/trace/beauty/tracepoints/Build
new file mode 100644
index 000000000000..625a67663de3
--- /dev/null
+++ b/tools/perf/trace/beauty/tracepoints/Build
@@ -0,0 +1 @@
+perf-y += x86_msr.o
diff --git a/tools/perf/trace/beauty/tracepoints/x86_msr.c b/tools/perf/trace/beauty/tracepoints/x86_msr.c
new file mode 100644
index 000000000000..6b8f129579d6
--- /dev/null
+++ b/tools/perf/trace/beauty/tracepoints/x86_msr.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * trace/beauty/x86_msr.c
+ *
+ * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "trace/beauty/beauty.h"
+
+#include "trace/beauty/generated/x86_arch_MSRs_array.c"
+
+static DEFINE_STRARRAY(x86_MSRs, "MSR_");
+static DEFINE_STRARRAY_OFFSET(x86_64_specific_MSRs, "MSR_", x86_64_specific_MSRs_offset);
+static DEFINE_STRARRAY_OFFSET(x86_AMD_V_KVM_MSRs, "MSR_", x86_AMD_V_KVM_MSRs_offset);
+
+static struct strarray *x86_MSRs_tables[] = {
+ &strarray__x86_MSRs,
+ &strarray__x86_64_specific_MSRs,
+ &strarray__x86_AMD_V_KVM_MSRs,
+};
+
+static DEFINE_STRARRAYS(x86_MSRs_tables);
+
+static size_t x86_MSR__scnprintf(unsigned long msr, char *bf, size_t size, bool show_prefix)
+{
+ return strarrays__scnprintf(&strarrays__x86_MSRs_tables, bf, size, "%#x", show_prefix, msr);
+}
+
+size_t syscall_arg__scnprintf_x86_MSR(char *bf, size_t size, struct syscall_arg *arg)
+{
+ unsigned long flags = arg->val;
+
+ return x86_MSR__scnprintf(flags, bf, size, arg->show_string_prefix);
+}
+
+bool syscall_arg__strtoul_x86_MSR(char *bf, size_t size, struct syscall_arg *arg __maybe_unused, u64 *ret)
+{
+ return strarrays__strtoul(&strarrays__x86_MSRs_tables, bf, size, ret);
+}
diff --git a/tools/perf/trace/beauty/tracepoints/x86_msr.sh b/tools/perf/trace/beauty/tracepoints/x86_msr.sh
new file mode 100755
index 000000000000..831c02cf0586
--- /dev/null
+++ b/tools/perf/trace/beauty/tracepoints/x86_msr.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 1 ] ; then
+ arch_x86_header_dir=tools/arch/x86/include/asm/
+else
+ arch_x86_header_dir=$1
+fi
+
+x86_msr_index=${arch_x86_header_dir}/msr-index.h
+
+# Support all later, with some hash table, for now chop off
+# Just the ones starting with 0x00000 so as to have a simple
+# array.
+
+printf "static const char *x86_MSRs[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x00000[[:xdigit:]]+)[[:space:]]*.*'
+egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|AMD64|IA32_TSCDEADLINE|IDT_FCR4)' | \
+ sed -r "s/$regex/\2 \1/g" | sort -n | \
+ xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n\n"
+
+# Remove MSR_K6_WHCR, clashes with MSR_LSTAR
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0xc0000[[:xdigit:]]+)[[:space:]]*.*'
+printf "#define x86_64_specific_MSRs_offset "
+egrep $regex ${x86_msr_index} | sed -r "s/$regex/\2/g" | sort -n | head -1
+printf "static const char *x86_64_specific_MSRs[] = {\n"
+egrep $regex ${x86_msr_index} | \
+ sed -r "s/$regex/\2 \1/g" | egrep -vw 'K6_WHCR' | sort -n | \
+ xargs printf "\t[%s - x86_64_specific_MSRs_offset] = \"%s\",\n"
+printf "};\n\n"
+
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0xc0010[[:xdigit:]]+)[[:space:]]*.*'
+printf "#define x86_AMD_V_KVM_MSRs_offset "
+egrep $regex ${x86_msr_index} | sed -r "s/$regex/\2/g" | sort -n | head -1
+printf "static const char *x86_AMD_V_KVM_MSRs[] = {\n"
+egrep $regex ${x86_msr_index} | \
+ sed -r "s/$regex/\2 \1/g" | sort -n | \
+ xargs printf "\t[%s - x86_AMD_V_KVM_MSRs_offset] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8dcfca1a882f..39814b1806a6 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -95,6 +95,7 @@ perf-y += cloexec.o
perf-y += call-path.o
perf-y += rwsem.o
perf-y += thread-stack.o
+perf-y += spark.o
perf-$(CONFIG_AUXTRACE) += auxtrace.o
perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
perf-$(CONFIG_AUXTRACE) += intel-pt.o
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 4036c7f7b0fb..2b856b6b46f6 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -853,6 +853,10 @@ static int __symbol__account_cycles(struct cyc_hist *ch,
ch[offset].start < start)
return 0;
}
+
+ if (ch[offset].num < NUM_SPARKS)
+ ch[offset].cycles_spark[ch[offset].num] = cycles;
+
ch[offset].have_start = have_start;
ch[offset].start = start;
ch[offset].cycles += cycles;
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index d76fd0e81f46..3528bd4f8f21 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -11,6 +11,7 @@
#include <pthread.h>
#include <asm/bug.h>
#include "symbol_conf.h"
+#include "spark.h"
struct hist_browser_timer;
struct hist_entry;
@@ -235,6 +236,7 @@ struct cyc_hist {
u64 cycles_aggr;
u64 cycles_max;
u64 cycles_min;
+ s64 cycles_spark[NUM_SPARKS];
u32 num;
u32 num_aggr;
u8 have_start;
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 3baca06786fb..2a91a10ccfcc 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -2,6 +2,7 @@
#include "cpumap.h"
#include "debug.h"
#include "env.h"
+#include "util/header.h"
#include <linux/ctype.h>
#include <linux/zalloc.h>
#include "bpf-event.h"
@@ -256,6 +257,21 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
return 0;
}
+int perf_env__read_cpuid(struct perf_env *env)
+{
+ char cpuid[128];
+ int err = get_cpuid(cpuid, sizeof(cpuid));
+
+ if (err)
+ return err;
+
+ free(env->cpuid);
+ env->cpuid = strdup(cpuid);
+ if (env->cpuid == NULL)
+ return ENOMEM;
+ return 0;
+}
+
static int perf_env__read_arch(struct perf_env *env)
{
struct utsname uts;
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index db40906e2937..a3059dc1abe5 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -104,6 +104,7 @@ void perf_env__exit(struct perf_env *env);
int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]);
+int perf_env__read_cpuid(struct perf_env *env);
int perf_env__read_cpu_topology_map(struct perf_env *env);
void cpu_cache_level__free(struct cpu_cache_level *cache);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index d277a98e62df..21b77efa802c 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -42,6 +42,7 @@
#include <perf/evlist.h>
#include <perf/evsel.h>
#include <perf/cpumap.h>
+#include <perf/mmap.h>
#include <internal/xyarray.h>
@@ -57,7 +58,6 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
{
perf_evlist__init(&evlist->core);
perf_evlist__set_maps(&evlist->core, cpus, threads);
- fdarray__init(&evlist->core.pollfd, 64);
evlist->workload.pid = -1;
evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
}
@@ -138,7 +138,7 @@ void evlist__exit(struct evlist *evlist)
{
zfree(&evlist->mmap);
zfree(&evlist->overwrite_mmap);
- fdarray__exit(&evlist->core.pollfd);
+ perf_evlist__exit(&evlist->core);
}
void evlist__delete(struct evlist *evlist)
@@ -148,10 +148,6 @@ void evlist__delete(struct evlist *evlist)
evlist__munmap(evlist);
evlist__close(evlist);
- perf_cpu_map__put(evlist->core.cpus);
- perf_thread_map__put(evlist->core.threads);
- evlist->core.cpus = NULL;
- evlist->core.threads = NULL;
evlist__purge(evlist);
evlist__exit(evlist);
free(evlist);
@@ -186,6 +182,30 @@ void perf_evlist__splice_list_tail(struct evlist *evlist,
}
}
+int __evlist__set_tracepoints_handlers(struct evlist *evlist,
+ const struct evsel_str_handler *assocs, size_t nr_assocs)
+{
+ struct evsel *evsel;
+ size_t i;
+ int err;
+
+ for (i = 0; i < nr_assocs; i++) {
+ // Adding a handler for an event not in this evlist, just ignore it.
+ evsel = perf_evlist__find_tracepoint_by_name(evlist, assocs[i].name);
+ if (evsel == NULL)
+ continue;
+
+ err = -EEXIST;
+ if (evsel->handler != NULL)
+ goto out;
+ evsel->handler = assocs[i].handler;
+ }
+
+ err = 0;
+out:
+ return err;
+}
+
void __perf_evlist__set_leader(struct list_head *list)
{
struct evsel *evsel, *leader;
@@ -403,19 +423,9 @@ int evlist__add_pollfd(struct evlist *evlist, int fd)
return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN);
}
-static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
- void *arg __maybe_unused)
-{
- struct mmap *map = fda->priv[fd].ptr;
-
- if (map)
- perf_mmap__put(map);
-}
-
int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
{
- return fdarray__filter(&evlist->core.pollfd, revents_and_mask,
- perf_evlist__munmap_filtered, NULL);
+ return perf_evlist__filter_pollfd(&evlist->core, revents_and_mask);
}
int evlist__poll(struct evlist *evlist, int timeout)
@@ -423,22 +433,6 @@ int evlist__poll(struct evlist *evlist, int timeout)
return perf_evlist__poll(&evlist->core, timeout);
}
-static void perf_evlist__set_sid_idx(struct evlist *evlist,
- struct evsel *evsel, int idx, int cpu,
- int thread)
-{
- struct perf_sample_id *sid = SID(evsel, cpu, thread);
- sid->idx = idx;
- if (evlist->core.cpus && cpu >= 0)
- sid->cpu = evlist->core.cpus->map[cpu];
- else
- sid->cpu = -1;
- if (!evsel->core.system_wide && evlist->core.threads && thread >= 0)
- sid->tid = perf_thread_map__pid(evlist->core.threads, thread);
- else
- sid->tid = -1;
-}
-
struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id)
{
struct hlist_head *head;
@@ -577,11 +571,11 @@ static void evlist__munmap_nofree(struct evlist *evlist)
if (evlist->mmap)
for (i = 0; i < evlist->core.nr_mmaps; i++)
- perf_mmap__munmap(&evlist->mmap[i]);
+ perf_mmap__munmap(&evlist->mmap[i].core);
if (evlist->overwrite_mmap)
for (i = 0; i < evlist->core.nr_mmaps; i++)
- perf_mmap__munmap(&evlist->overwrite_mmap[i]);
+ perf_mmap__munmap(&evlist->overwrite_mmap[i].core);
}
void evlist__munmap(struct evlist *evlist)
@@ -591,6 +585,13 @@ void evlist__munmap(struct evlist *evlist)
zfree(&evlist->overwrite_mmap);
}
+static void perf_mmap__unmap_cb(struct perf_mmap *map)
+{
+ struct mmap *m = container_of(map, struct mmap, core);
+
+ mmap__munmap(m);
+}
+
static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
bool overwrite)
{
@@ -605,8 +606,6 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
return NULL;
for (i = 0; i < evlist->core.nr_mmaps; i++) {
- map[i].core.fd = -1;
- map[i].core.overwrite = overwrite;
/*
* When the perf_mmap() call is made we grab one refcount, plus
* one extra to let perf_mmap__consume() get the last
@@ -616,151 +615,54 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
* Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
* thus does perf_mmap__get() on it.
*/
- refcount_set(&map[i].core.refcnt, 0);
+ perf_mmap__init(&map[i].core, overwrite, perf_mmap__unmap_cb);
}
- return map;
-}
-static bool
-perf_evlist__should_poll(struct evlist *evlist __maybe_unused,
- struct evsel *evsel)
-{
- if (evsel->core.attr.write_backward)
- return false;
- return true;
+ return map;
}
-static int evlist__mmap_per_evsel(struct evlist *evlist, int idx,
- struct mmap_params *mp, int cpu_idx,
- int thread, int *_output, int *_output_overwrite)
+static void
+perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist,
+ struct perf_mmap_param *_mp,
+ int idx, bool per_cpu)
{
- struct evsel *evsel;
- int revent;
- int evlist_cpu = cpu_map__cpu(evlist->core.cpus, cpu_idx);
-
- evlist__for_each_entry(evlist, evsel) {
- struct mmap *maps = evlist->mmap;
- int *output = _output;
- int fd;
- int cpu;
-
- mp->prot = PROT_READ | PROT_WRITE;
- if (evsel->core.attr.write_backward) {
- output = _output_overwrite;
- maps = evlist->overwrite_mmap;
-
- if (!maps) {
- maps = evlist__alloc_mmap(evlist, true);
- if (!maps)
- return -1;
- evlist->overwrite_mmap = maps;
- if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
- perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
- }
- mp->prot &= ~PROT_WRITE;
- }
-
- if (evsel->core.system_wide && thread)
- continue;
-
- cpu = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu);
- if (cpu == -1)
- continue;
-
- fd = FD(evsel, cpu, thread);
-
- if (*output == -1) {
- *output = fd;
-
- if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0)
- return -1;
- } else {
- if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
- return -1;
-
- perf_mmap__get(&maps[idx]);
- }
-
- revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;
+ struct evlist *evlist = container_of(_evlist, struct evlist, core);
+ struct mmap_params *mp = container_of(_mp, struct mmap_params, core);
- /*
- * The system_wide flag causes a selected event to be opened
- * always without a pid. Consequently it will never get a
- * POLLHUP, but it is used for tracking in combination with
- * other events, so it should not need to be polled anyway.
- * Therefore don't add it for polling.
- */
- if (!evsel->core.system_wide &&
- perf_evlist__add_pollfd(&evlist->core, fd, &maps[idx], revent) < 0) {
- perf_mmap__put(&maps[idx]);
- return -1;
- }
-
- if (evsel->core.attr.read_format & PERF_FORMAT_ID) {
- if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, cpu, thread,
- fd) < 0)
- return -1;
- perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
- thread);
- }
- }
-
- return 0;
+ auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu);
}
-static int evlist__mmap_per_cpu(struct evlist *evlist,
- struct mmap_params *mp)
+static struct perf_mmap*
+perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx)
{
- int cpu, thread;
- int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
- int nr_threads = perf_thread_map__nr(evlist->core.threads);
+ struct evlist *evlist = container_of(_evlist, struct evlist, core);
+ struct mmap *maps = evlist->mmap;
- pr_debug2("perf event ring buffer mmapped per cpu\n");
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- int output = -1;
- int output_overwrite = -1;
+ if (overwrite) {
+ maps = evlist->overwrite_mmap;
- auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
- true);
+ if (!maps) {
+ maps = evlist__alloc_mmap(evlist, true);
+ if (!maps)
+ return NULL;
- for (thread = 0; thread < nr_threads; thread++) {
- if (evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
- thread, &output, &output_overwrite))
- goto out_unmap;
+ evlist->overwrite_mmap = maps;
+ if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
+ perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
}
}
- return 0;
-
-out_unmap:
- evlist__munmap_nofree(evlist);
- return -1;
+ return &maps[idx].core;
}
-static int evlist__mmap_per_thread(struct evlist *evlist,
- struct mmap_params *mp)
+static int
+perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp,
+ int output, int cpu)
{
- int thread;
- int nr_threads = perf_thread_map__nr(evlist->core.threads);
-
- pr_debug2("perf event ring buffer mmapped per thread\n");
- for (thread = 0; thread < nr_threads; thread++) {
- int output = -1;
- int output_overwrite = -1;
-
- auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
- false);
+ struct mmap *map = container_of(_map, struct mmap, core);
+ struct mmap_params *mp = container_of(_mp, struct mmap_params, core);
- if (evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
- &output, &output_overwrite))
- goto out_unmap;
- }
-
- return 0;
-
-out_unmap:
- evlist__munmap_nofree(evlist);
- return -1;
+ return mmap__mmap(map, mp, output, cpu);
}
unsigned long perf_event_mlock_kb_in_pages(void)
@@ -890,43 +792,36 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
int comp_level)
{
- struct evsel *evsel;
- const struct perf_cpu_map *cpus = evlist->core.cpus;
- const struct perf_thread_map *threads = evlist->core.threads;
/*
* Delay setting mp.prot: set it before calling perf_mmap__mmap.
* Its value is decided by evsel's write_backward.
* So &mp should not be passed through const pointer.
*/
- struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush,
- .comp_level = comp_level };
+ struct mmap_params mp = {
+ .nr_cblocks = nr_cblocks,
+ .affinity = affinity,
+ .flush = flush,
+ .comp_level = comp_level
+ };
+ struct perf_evlist_mmap_ops ops = {
+ .idx = perf_evlist__mmap_cb_idx,
+ .get = perf_evlist__mmap_cb_get,
+ .mmap = perf_evlist__mmap_cb_mmap,
+ };
if (!evlist->mmap)
evlist->mmap = evlist__alloc_mmap(evlist, false);
if (!evlist->mmap)
return -ENOMEM;
- if (evlist->core.pollfd.entries == NULL && perf_evlist__alloc_pollfd(&evlist->core) < 0)
- return -ENOMEM;
-
evlist->core.mmap_len = evlist__mmap_size(pages);
pr_debug("mmap size %zuB\n", evlist->core.mmap_len);
- mp.mask = evlist->core.mmap_len - page_size - 1;
+ mp.core.mask = evlist->core.mmap_len - page_size - 1;
auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len,
auxtrace_pages, auxtrace_overwrite);
- evlist__for_each_entry(evlist, evsel) {
- if ((evsel->core.attr.read_format & PERF_FORMAT_ID) &&
- evsel->core.sample_id == NULL &&
- perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr) < 0)
- return -ENOMEM;
- }
-
- if (perf_cpu_map__empty(cpus))
- return evlist__mmap_per_thread(evlist, &mp);
-
- return evlist__mmap_per_cpu(evlist, &mp);
+ return perf_evlist__mmap_ops(&evlist->core, &ops, &mp.core);
}
int evlist__mmap(struct evlist *evlist, unsigned int pages)
@@ -1029,6 +924,9 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
struct evsel *evsel;
int err = 0;
+ if (filter == NULL)
+ return -1;
+
evlist__for_each_entry(evlist, evsel) {
if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
continue;
@@ -1041,16 +939,35 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
return err;
}
-int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
+int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter)
+{
+ struct evsel *evsel;
+ int err = 0;
+
+ if (filter == NULL)
+ return -1;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
+ continue;
+
+ err = perf_evsel__append_tp_filter(evsel, filter);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static char *asprintf__tp_filter_pids(size_t npids, pid_t *pids)
{
char *filter;
- int ret = -1;
size_t i;
for (i = 0; i < npids; ++i) {
if (i == 0) {
if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
- return -1;
+ return NULL;
} else {
char *tmp;
@@ -1062,9 +979,18 @@ int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *
}
}
- ret = perf_evlist__set_tp_filter(evlist, filter);
+ return filter;
out_free:
free(filter);
+ return NULL;
+}
+
+int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
+{
+ char *filter = asprintf__tp_filter_pids(npids, pids);
+ int ret = perf_evlist__set_tp_filter(evlist, filter);
+
+ free(filter);
return ret;
}
@@ -1073,6 +999,20 @@ int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
return perf_evlist__set_tp_filter_pids(evlist, 1, &pid);
}
+int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
+{
+ char *filter = asprintf__tp_filter_pids(npids, pids);
+ int ret = perf_evlist__append_tp_filter(evlist, filter);
+
+ free(filter);
+ return ret;
+}
+
+int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid)
+{
+ return perf_evlist__append_tp_filter_pids(evlist, 1, &pid);
+}
+
bool perf_evlist__valid_sample_type(struct evlist *evlist)
{
struct evsel *pos;
@@ -1729,9 +1669,9 @@ static void *perf_evlist__poll_thread(void *arg)
struct mmap *map = &evlist->mmap[i];
union perf_event *event;
- if (perf_mmap__read_init(map))
+ if (perf_mmap__read_init(&map->core))
continue;
- while ((event = perf_mmap__read_event(map)) != NULL) {
+ while ((event = perf_mmap__read_event(&map->core)) != NULL) {
struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
if (evsel && evsel->side_band.cb)
@@ -1739,10 +1679,10 @@ static void *perf_evlist__poll_thread(void *arg)
else
pr_warning("cannot locate proper evsel for the side band event\n");
- perf_mmap__consume(map);
+ perf_mmap__consume(&map->core);
got_data = true;
}
- perf_mmap__read_done(map);
+ perf_mmap__read_done(&map->core);
}
if (draining && !got_data)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 7cfe75522ba5..13051409fd22 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -118,6 +118,13 @@ void perf_evlist__stop_sb_thread(struct evlist *evlist);
int perf_evlist__add_newtp(struct evlist *evlist,
const char *sys, const char *name, void *handler);
+int __evlist__set_tracepoints_handlers(struct evlist *evlist,
+ const struct evsel_str_handler *assocs,
+ size_t nr_assocs);
+
+#define evlist__set_tracepoints_handlers(evlist, array) \
+ __evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array))
+
void __perf_evlist__set_sample_bit(struct evlist *evlist,
enum perf_event_sample_format bit);
void __perf_evlist__reset_sample_bit(struct evlist *evlist,
@@ -133,6 +140,11 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter);
int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid);
int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
+int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter);
+
+int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid);
+int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
+
struct evsel *
perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index a35dc57d5995..063d1b93c53d 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -13,6 +13,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h> // sysconf()
+#include <perf/mmap.h>
#ifdef HAVE_LIBNUMA_SUPPORT
#include <numaif.h>
#endif
@@ -23,116 +24,9 @@
#include "../perf.h"
#include <internal/lib.h> /* page_size */
-size_t perf_mmap__mmap_len(struct mmap *map)
+size_t mmap__mmap_len(struct mmap *map)
{
- return map->core.mask + 1 + page_size;
-}
-
-/* When check_messup is true, 'end' must points to a good entry */
-static union perf_event *perf_mmap__read(struct mmap *map,
- u64 *startp, u64 end)
-{
- unsigned char *data = map->core.base + page_size;
- union perf_event *event = NULL;
- int diff = end - *startp;
-
- if (diff >= (int)sizeof(event->header)) {
- size_t size;
-
- event = (union perf_event *)&data[*startp & map->core.mask];
- size = event->header.size;
-
- if (size < sizeof(event->header) || diff < (int)size)
- return NULL;
-
- /*
- * Event straddles the mmap boundary -- header should always
- * be inside due to u64 alignment of output.
- */
- if ((*startp & map->core.mask) + size != ((*startp + size) & map->core.mask)) {
- unsigned int offset = *startp;
- unsigned int len = min(sizeof(*event), size), cpy;
- void *dst = map->core.event_copy;
-
- do {
- cpy = min(map->core.mask + 1 - (offset & map->core.mask), len);
- memcpy(dst, &data[offset & map->core.mask], cpy);
- offset += cpy;
- dst += cpy;
- len -= cpy;
- } while (len);
-
- event = (union perf_event *)map->core.event_copy;
- }
-
- *startp += size;
- }
-
- return event;
-}
-
-/*
- * Read event from ring buffer one by one.
- * Return one event for each call.
- *
- * Usage:
- * perf_mmap__read_init()
- * while(event = perf_mmap__read_event()) {
- * //process the event
- * perf_mmap__consume()
- * }
- * perf_mmap__read_done()
- */
-union perf_event *perf_mmap__read_event(struct mmap *map)
-{
- union perf_event *event;
-
- /*
- * Check if event was unmapped due to a POLLHUP/POLLERR.
- */
- if (!refcount_read(&map->core.refcnt))
- return NULL;
-
- /* non-overwirte doesn't pause the ringbuffer */
- if (!map->core.overwrite)
- map->core.end = perf_mmap__read_head(map);
-
- event = perf_mmap__read(map, &map->core.start, map->core.end);
-
- if (!map->core.overwrite)
- map->core.prev = map->core.start;
-
- return event;
-}
-
-static bool perf_mmap__empty(struct mmap *map)
-{
- return perf_mmap__read_head(map) == map->core.prev && !map->auxtrace_mmap.base;
-}
-
-void perf_mmap__get(struct mmap *map)
-{
- refcount_inc(&map->core.refcnt);
-}
-
-void perf_mmap__put(struct mmap *map)
-{
- BUG_ON(map->core.base && refcount_read(&map->core.refcnt) == 0);
-
- if (refcount_dec_and_test(&map->core.refcnt))
- perf_mmap__munmap(map);
-}
-
-void perf_mmap__consume(struct mmap *map)
-{
- if (!map->core.overwrite) {
- u64 old = map->core.prev;
-
- perf_mmap__write_tail(map, old);
- }
-
- if (refcount_read(&map->core.refcnt) == 1 && perf_mmap__empty(map))
- perf_mmap__put(map);
+ return perf_mmap__mmap_len(&map->core);
}
int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
@@ -170,7 +64,7 @@ static int perf_mmap__aio_enabled(struct mmap *map)
#ifdef HAVE_LIBNUMA_SUPPORT
static int perf_mmap__aio_alloc(struct mmap *map, int idx)
{
- map->aio.data[idx] = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE,
+ map->aio.data[idx] = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
if (map->aio.data[idx] == MAP_FAILED) {
map->aio.data[idx] = NULL;
@@ -183,7 +77,7 @@ static int perf_mmap__aio_alloc(struct mmap *map, int idx)
static void perf_mmap__aio_free(struct mmap *map, int idx)
{
if (map->aio.data[idx]) {
- munmap(map->aio.data[idx], perf_mmap__mmap_len(map));
+ munmap(map->aio.data[idx], mmap__mmap_len(map));
map->aio.data[idx] = NULL;
}
}
@@ -196,7 +90,7 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity)
if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) {
data = map->aio.data[idx];
- mmap_len = perf_mmap__mmap_len(map);
+ mmap_len = mmap__mmap_len(map);
node_mask = 1UL << cpu__get_node(cpu);
if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) {
pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n",
@@ -210,7 +104,7 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity)
#else /* !HAVE_LIBNUMA_SUPPORT */
static int perf_mmap__aio_alloc(struct mmap *map, int idx)
{
- map->aio.data[idx] = malloc(perf_mmap__mmap_len(map));
+ map->aio.data[idx] = malloc(mmap__mmap_len(map));
if (map->aio.data[idx] == NULL)
return -1;
@@ -311,19 +205,13 @@ static void perf_mmap__aio_munmap(struct mmap *map __maybe_unused)
}
#endif
-void perf_mmap__munmap(struct mmap *map)
+void mmap__munmap(struct mmap *map)
{
perf_mmap__aio_munmap(map);
if (map->data != NULL) {
- munmap(map->data, perf_mmap__mmap_len(map));
+ munmap(map->data, mmap__mmap_len(map));
map->data = NULL;
}
- if (map->core.base != NULL) {
- munmap(map->core.base, perf_mmap__mmap_len(map));
- map->core.base = NULL;
- map->core.fd = -1;
- refcount_set(&map->core.refcnt, 0);
- }
auxtrace_mmap__munmap(&map->auxtrace_mmap);
}
@@ -353,34 +241,13 @@ static void perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params
CPU_SET(map->core.cpu, &map->affinity_mask);
}
-int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu)
+int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu)
{
- /*
- * The last one will be done at perf_mmap__consume(), so that we
- * make sure we don't prevent tools from consuming every last event in
- * the ring buffer.
- *
- * I.e. we can get the POLLHUP meaning that the fd doesn't exist
- * anymore, but the last events for it are still in the ring buffer,
- * waiting to be consumed.
- *
- * Tools can chose to ignore this at their own discretion, but the
- * evlist layer can't just drop it when filtering events in
- * perf_evlist__filter_pollfd().
- */
- refcount_set(&map->core.refcnt, 2);
- map->core.prev = 0;
- map->core.mask = mp->mask;
- map->core.base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
- MAP_SHARED, fd, 0);
- if (map->core.base == MAP_FAILED) {
+ if (perf_mmap__mmap(&map->core, &mp->core, fd, cpu)) {
pr_debug2("failed to mmap perf event ring buffer, error %d\n",
errno);
- map->core.base = NULL;
return -1;
}
- map->core.fd = fd;
- map->core.cpu = cpu;
perf_mmap__setup_affinity_mask(map, mp);
@@ -389,7 +256,7 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu)
map->comp_level = mp->comp_level;
if (map->comp_level && !perf_mmap__aio_enabled(map)) {
- map->data = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE,
+ map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
if (map->data == MAP_FAILED) {
pr_debug2("failed to mmap data buffer, error %d\n",
@@ -406,96 +273,16 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu)
return perf_mmap__aio_mmap(map, mp);
}
-static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
-{
- struct perf_event_header *pheader;
- u64 evt_head = *start;
- int size = mask + 1;
-
- pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start);
- pheader = (struct perf_event_header *)(buf + (*start & mask));
- while (true) {
- if (evt_head - *start >= (unsigned int)size) {
- pr_debug("Finished reading overwrite ring buffer: rewind\n");
- if (evt_head - *start > (unsigned int)size)
- evt_head -= pheader->size;
- *end = evt_head;
- return 0;
- }
-
- pheader = (struct perf_event_header *)(buf + (evt_head & mask));
-
- if (pheader->size == 0) {
- pr_debug("Finished reading overwrite ring buffer: get start\n");
- *end = evt_head;
- return 0;
- }
-
- evt_head += pheader->size;
- pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
- }
- WARN_ONCE(1, "Shouldn't get here\n");
- return -1;
-}
-
-/*
- * Report the start and end of the available data in ringbuffer
- */
-static int __perf_mmap__read_init(struct mmap *md)
-{
- u64 head = perf_mmap__read_head(md);
- u64 old = md->core.prev;
- unsigned char *data = md->core.base + page_size;
- unsigned long size;
-
- md->core.start = md->core.overwrite ? head : old;
- md->core.end = md->core.overwrite ? old : head;
-
- if ((md->core.end - md->core.start) < md->core.flush)
- return -EAGAIN;
-
- size = md->core.end - md->core.start;
- if (size > (unsigned long)(md->core.mask) + 1) {
- if (!md->core.overwrite) {
- WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
-
- md->core.prev = head;
- perf_mmap__consume(md);
- return -EAGAIN;
- }
-
- /*
- * Backward ring buffer is full. We still have a chance to read
- * most of data from it.
- */
- if (overwrite_rb_find_range(data, md->core.mask, &md->core.start, &md->core.end))
- return -EINVAL;
- }
-
- return 0;
-}
-
-int perf_mmap__read_init(struct mmap *map)
-{
- /*
- * Check if event was unmapped due to a POLLHUP/POLLERR.
- */
- if (!refcount_read(&map->core.refcnt))
- return -ENOENT;
-
- return __perf_mmap__read_init(map);
-}
-
int perf_mmap__push(struct mmap *md, void *to,
int push(struct mmap *map, void *to, void *buf, size_t size))
{
- u64 head = perf_mmap__read_head(md);
+ u64 head = perf_mmap__read_head(&md->core);
unsigned char *data = md->core.base + page_size;
unsigned long size;
void *buf;
int rc = 0;
- rc = perf_mmap__read_init(md);
+ rc = perf_mmap__read_init(&md->core);
if (rc < 0)
return (rc == -EAGAIN) ? 1 : -1;
@@ -522,24 +309,7 @@ int perf_mmap__push(struct mmap *md, void *to,
}
md->core.prev = head;
- perf_mmap__consume(md);
+ perf_mmap__consume(&md->core);
out:
return rc;
}
-
-/*
- * Mandatory for overwrite mode
- * The direction of overwrite mode is backward.
- * The last perf_mmap__read() will set tail to map->core.prev.
- * Need to correct the map->core.prev to head which is the end of next read.
- */
-void perf_mmap__read_done(struct mmap *map)
-{
- /*
- * Check if event was unmapped due to a POLLHUP/POLLERR.
- */
- if (!refcount_read(&map->core.refcnt))
- return;
-
- map->core.prev = perf_mmap__read_head(map);
-}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index e567c1c875bd..bee4e83f7109 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -37,37 +37,19 @@ struct mmap {
};
struct mmap_params {
- int prot, mask, nr_cblocks, affinity, flush, comp_level;
+ struct perf_mmap_param core;
+ int nr_cblocks, affinity, flush, comp_level;
struct auxtrace_mmap_params auxtrace_mp;
};
-int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu);
-void perf_mmap__munmap(struct mmap *map);
-
-void perf_mmap__get(struct mmap *map);
-void perf_mmap__put(struct mmap *map);
-
-void perf_mmap__consume(struct mmap *map);
-
-static inline u64 perf_mmap__read_head(struct mmap *mm)
-{
- return ring_buffer_read_head(mm->core.base);
-}
-
-static inline void perf_mmap__write_tail(struct mmap *md, u64 tail)
-{
- ring_buffer_write_tail(md->core.base, tail);
-}
+int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu);
+void mmap__munmap(struct mmap *map);
union perf_event *perf_mmap__read_forward(struct mmap *map);
-union perf_event *perf_mmap__read_event(struct mmap *map);
-
int perf_mmap__push(struct mmap *md, void *to,
int push(struct mmap *map, void *to, void *buf, size_t size));
-size_t perf_mmap__mmap_len(struct mmap *map);
+size_t mmap__mmap_len(struct mmap *map);
-int perf_mmap__read_init(struct mmap *md);
-void perf_mmap__read_done(struct mmap *map);
#endif /*__PERF_MMAP_H */
diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
index ef46c2848808..e687497b3aac 100644
--- a/tools/perf/util/parse-regs-options.c
+++ b/tools/perf/util/parse-regs-options.c
@@ -13,7 +13,7 @@ static int
__parse_regs(const struct option *opt, const char *str, int unset, bool intr)
{
uint64_t *mode = (uint64_t *)opt->value;
- const struct sample_reg *r;
+ const struct sample_reg *r = NULL;
char *s, *os = NULL, *p;
int ret = -1;
uint64_t mask;
@@ -46,19 +46,23 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
if (!strcmp(s, "?")) {
fprintf(stderr, "available registers: ");
+#ifdef HAVE_PERF_REGS_SUPPORT
for (r = sample_reg_masks; r->name; r++) {
if (r->mask & mask)
fprintf(stderr, "%s ", r->name);
}
+#endif
fputc('\n', stderr);
/* just printing available regs */
return -1;
}
+#ifdef HAVE_PERF_REGS_SUPPORT
for (r = sample_reg_masks; r->name; r++) {
if ((r->mask & mask) && !strcasecmp(s, r->name))
break;
}
- if (!r->name) {
+#endif
+ if (!r || !r->name) {
ui__warning("Unknown register \"%s\", check man page or run \"perf record %s?\"\n",
s, intr ? "-I" : "--user-regs=");
goto error;
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index 2774cec1f15f..5ee47ae1509c 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -3,10 +3,6 @@
#include "perf_regs.h"
#include "event.h"
-const struct sample_reg __weak sample_reg_masks[] = {
- SMPL_REG_END
-};
-
int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused,
char **new_op __maybe_unused)
{
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index 47fe34e5f7d5..e014c2c038f4 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -15,8 +15,6 @@ struct sample_reg {
#define SMPL_REG2(n, b) { .name = #n, .mask = 3ULL << (b) }
#define SMPL_REG_END { .name = NULL }
-extern const struct sample_reg sample_reg_masks[];
-
enum {
SDT_ARG_VALID = 0,
SDT_ARG_SKIP,
@@ -27,6 +25,8 @@ uint64_t arch__intr_reg_mask(void);
uint64_t arch__user_reg_mask(void);
#ifdef HAVE_PERF_REGS_SUPPORT
+extern const struct sample_reg sample_reg_masks[];
+
#include <perf_regs.h>
#define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP))
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 02460362256d..25118605f3f8 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -6,6 +6,7 @@
#include <linux/err.h>
#include <perf/cpumap.h>
#include <traceevent/event-parse.h>
+#include <perf/mmap.h>
#include "evlist.h"
#include "callchain.h"
#include "evsel.h"
@@ -1022,10 +1023,10 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
if (!md)
return NULL;
- if (perf_mmap__read_init(md) < 0)
+ if (perf_mmap__read_init(&md->core) < 0)
goto end;
- event = perf_mmap__read_event(md);
+ event = perf_mmap__read_event(&md->core);
if (event != NULL) {
PyObject *pyevent = pyrf_event__new(event);
struct pyrf_event *pevent = (struct pyrf_event *)pyevent;
@@ -1045,7 +1046,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
err = perf_evsel__parse_sample(evsel, event, &pevent->sample);
/* Consume the even only after we parsed it out. */
- perf_mmap__consume(md);
+ perf_mmap__consume(&md->core);
if (err)
return PyErr_Format(PyExc_OSError,
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 061bb4d6a3f5..6cc32f5ec043 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2355,35 +2355,6 @@ void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
fprintf(fp, "# ========\n#\n");
}
-
-int __perf_session__set_tracepoints_handlers(struct perf_session *session,
- const struct evsel_str_handler *assocs,
- size_t nr_assocs)
-{
- struct evsel *evsel;
- size_t i;
- int err;
-
- for (i = 0; i < nr_assocs; i++) {
- /*
- * Adding a handler for an event not in the session,
- * just ignore it.
- */
- evsel = perf_evlist__find_tracepoint_by_name(session->evlist, assocs[i].name);
- if (evsel == NULL)
- continue;
-
- err = -EEXIST;
- if (evsel->handler != NULL)
- goto out;
- evsel->handler = assocs[i].handler;
- }
-
- err = 0;
-out:
- return err;
-}
-
int perf_event__process_id_index(struct perf_session *session,
union perf_event *event)
{
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index b4c9428c18f0..8456e1d868fd 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -120,12 +120,8 @@ void perf_session__fprintf_info(struct perf_session *s, FILE *fp, bool full);
struct evsel_str_handler;
-int __perf_session__set_tracepoints_handlers(struct perf_session *session,
- const struct evsel_str_handler *assocs,
- size_t nr_assocs);
-
#define perf_session__set_tracepoints_handlers(session, array) \
- __perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array))
+ __evlist__set_tracepoints_handlers(session->evlist, array, ARRAY_SIZE(array))
extern volatile int session_done;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 7b93f34ac1f4..5aff9542d9b7 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -10,6 +10,8 @@
#include "callchain.h"
#include "values.h"
#include "hist.h"
+#include "stat.h"
+#include "spark.h"
struct option;
struct thread;
@@ -71,6 +73,8 @@ struct hist_entry_diff {
/* PERF_HPP_DIFF__CYCLES */
s64 cycles;
};
+ struct stats stats;
+ unsigned long svals[NUM_SPARKS];
};
struct hist_entry_ops {
diff --git a/tools/perf/util/spark.c b/tools/perf/util/spark.c
new file mode 100644
index 000000000000..70272a8b81a6
--- /dev/null
+++ b/tools/perf/util/spark.c
@@ -0,0 +1,34 @@
+#include <stdio.h>
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+#include "spark.h"
+#include "stat.h"
+
+#define SPARK_SHIFT 8
+
+/* Print spark lines on outf for numval values in val. */
+int print_spark(char *bf, int size, unsigned long *val, int numval)
+{
+ static const char *ticks[NUM_SPARKS] = {
+ "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"
+ };
+ int i, printed = 0;
+ unsigned long min = ULONG_MAX, max = 0, f;
+
+ for (i = 0; i < numval; i++) {
+ if (val[i] < min)
+ min = val[i];
+ if (val[i] > max)
+ max = val[i];
+ }
+ f = ((max - min) << SPARK_SHIFT) / (NUM_SPARKS - 1);
+ if (f < 1)
+ f = 1;
+ for (i = 0; i < numval; i++) {
+ printed += scnprintf(bf + printed, size - printed, "%s",
+ ticks[((val[i] - min) << SPARK_SHIFT) / f]);
+ }
+
+ return printed;
+}
diff --git a/tools/perf/util/spark.h b/tools/perf/util/spark.h
new file mode 100644
index 000000000000..25402d7d7a64
--- /dev/null
+++ b/tools/perf/util/spark.h
@@ -0,0 +1,8 @@
+#ifndef SPARK_H
+#define SPARK_H 1
+
+#define NUM_SPARKS 8
+
+int print_spark(char *bf, int size, unsigned long *val, int numval);
+
+#endif
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 0b0c6b5b1899..cc2a89b99d3d 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -11,6 +11,7 @@
#include <stdio.h>
#include "path.h"
#include "symbol_conf.h"
+#include "spark.h"
#ifdef HAVE_LIBELF_SUPPORT
#include <libelf.h>
@@ -111,6 +112,7 @@ struct block_info {
u64 end;
u64 cycles;
u64 cycles_aggr;
+ s64 cycles_spark[NUM_SPARKS];
int num;
int num_aggr;
refcount_t refcnt;