aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util/tsc.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-17 11:47:46 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-17 11:47:46 -0700
commit9d9af1007bc08971953ae915d88dc9bb21344b53 (patch)
tree02090da0b271c31f19d58d80f4cff19c8ef89971 /tools/perf/util/tsc.c
parentMerge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma (diff)
parentperf c2c: Update documentation for metrics reorganization (diff)
downloadlinux-dev-9d9af1007bc08971953ae915d88dc9bb21344b53.tar.xz
linux-dev-9d9af1007bc08971953ae915d88dc9bb21344b53.zip
Merge tag 'perf-tools-for-v5.10-2020-10-15' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools updates from Arnaldo Carvalho de Melo: - cgroup improvements for 'perf stat', allowing for compact specification of events and cgroups in the command line. - Support per thread topdown metrics in 'perf stat'. - Support sample-read topdown metric group in 'perf record' - Show start of latency in addition to its start in 'perf sched latency'. - Add min, max to 'perf script' futex-contention output, in addition to avg. - Allow usage of 'perf_event_attr->exclusive' attribute via the new ':e' event modifier. - Add 'snapshot' command to 'perf record --control', using it with Intel PT. - Support FIFO file names as alternative options to 'perf record --control'. - Introduce branch history "streams", to compare 'perf record' runs with 'perf diff' based on branch records and report hot streams. - Support PE executable symbol tables using libbfd, to profile, for instance, wine binaries. - Add filter support for option 'perf ftrace -F/--funcs'. - Allow configuring the 'disassembler_style' 'perf annotate' knob via 'perf config' - Update CascadelakeX and SkylakeX JSON vendor events files. - Add support for parsing perchip/percore JSON vendor events. - Add power9 hv_24x7 core level metric events. - Add L2 prefetch, ITLB instruction fetch hits JSON events for AMD zen1. - Enable Family 19h users by matching Zen2 AMD vendor events. - Use debuginfod in 'perf probe' when required debug files not found locally. - Display negative tid in non-sample events in 'perf script'. - Make GTK2 support opt-in - Add build test with GTK+ - Add missing -lzstd to the fast path feature detection - Add scripts to auto generate 'mmap', 'mremap' string<->id tables for use in 'perf trace'. - Show python test script in verbose mode. - Fix uncore metric expressions - Msan uninitialized use fixes. - Use condition variables in 'perf bench numa' - Autodetect python3 binary in systems without python2. - Support md5 build ids in addition to sha1. - Add build id 'perf test' regression test. - Fix printable strings in python3 scripts. - Fix off by ones in 'perf trace' in arches using libaudit. - Fix JSON event code for events referencing std arch events. - Introduce 'perf test' shell script for Arm CoreSight testing. - Add rdtsc() for Arm64 for used in the PERF_RECORD_TIME_CONV metadata event and in 'perf test tsc'. - 'perf c2c' improvements: Add "RMT Load Hit" metric, "Total Stores", fixes and documentation update. - Fix usage of reloc_sym in 'perf probe' when using both kallsyms and debuginfo files. - Do not print 'Metric Groups:' unnecessarily in 'perf list' - Refcounting fixes in the event parsing code. - Add expand cgroup event 'perf test' entry. - Fix out of bounds CPU map access when handling armv8_pmu events in 'perf stat'. - Add build-id injection 'perf bench' benchmark. - Enter namespace when reading build-id in 'perf inject'. - Do not load map/dso when injecting build-id speeding up the 'perf inject' process. - Add --buildid-all option to avoid processing all samples, just the mmap metadata events. - Add feature test to check if libbfd has buildid support - Add 'perf test' entry for PE binary format support. - Fix typos in power8 PMU vendor events JSON files. - Hide libtraceevent non API functions. * tag 'perf-tools-for-v5.10-2020-10-15' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (113 commits) perf c2c: Update documentation for metrics reorganization perf c2c: Add metrics "RMT Load Hit" perf c2c: Correct LLC load hit metrics perf c2c: Change header for LLC local hit perf c2c: Use more explicit headers for HITM perf c2c: Change header from "LLC Load Hitm" to "Load Hitm" perf c2c: Organize metrics based on memory hierarchy perf c2c: Display "Total Stores" as a standalone metrics perf c2c: Display the total numbers continuously perf bench: Use condition variables in numa. perf jevents: Fix event code for events referencing std arch events perf diff: Support hot streams comparison perf streams: Report hot streams perf streams: Calculate the sum of total streams hits perf streams: Link stream pair perf streams: Compare two streams perf streams: Get the evsel_streams by evsel_idx perf streams: Introduce branch history "streams" perf intel-pt: Improve PT documentation slightly perf tools: Add support for exclusive groups/events ...
Diffstat (limited to 'tools/perf/util/tsc.c')
-rw-r--r--tools/perf/util/tsc.c81
1 files changed, 81 insertions, 0 deletions
diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c
index bfa782421cbd..62b4c75c966c 100644
--- a/tools/perf/util/tsc.c
+++ b/tools/perf/util/tsc.c
@@ -1,7 +1,16 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+
#include <linux/compiler.h>
+#include <linux/perf_event.h>
+#include <linux/stddef.h>
#include <linux/types.h>
+#include <asm/barrier.h>
+
+#include "event.h"
+#include "synthetic-events.h"
+#include "debug.h"
#include "tsc.h"
u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc)
@@ -19,12 +28,84 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
{
u64 quot, rem;
+ if (tc->cap_user_time_short)
+ cyc = tc->time_cycles +
+ ((cyc - tc->time_cycles) & tc->time_mask);
+
quot = cyc >> tc->time_shift;
rem = cyc & (((u64)1 << tc->time_shift) - 1);
return tc->time_zero + quot * tc->time_mult +
((rem * tc->time_mult) >> tc->time_shift);
}
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+ struct perf_tsc_conversion *tc)
+{
+ u32 seq;
+ int i = 0;
+
+ while (1) {
+ seq = pc->lock;
+ rmb();
+ tc->time_mult = pc->time_mult;
+ tc->time_shift = pc->time_shift;
+ tc->time_zero = pc->time_zero;
+ tc->time_cycles = pc->time_cycles;
+ tc->time_mask = pc->time_mask;
+ tc->cap_user_time_zero = pc->cap_user_time_zero;
+ tc->cap_user_time_short = pc->cap_user_time_short;
+ rmb();
+ if (pc->lock == seq && !(seq & 1))
+ break;
+ if (++i > 10000) {
+ pr_debug("failed to get perf_event_mmap_page lock\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!tc->cap_user_time_zero)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
+ struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ union perf_event event = {
+ .time_conv = {
+ .header = {
+ .type = PERF_RECORD_TIME_CONV,
+ .size = sizeof(struct perf_record_time_conv),
+ },
+ },
+ };
+ struct perf_tsc_conversion tc;
+ int err;
+
+ if (!pc)
+ return 0;
+ err = perf_read_tsc_conversion(pc, &tc);
+ if (err == -EOPNOTSUPP)
+ return 0;
+ if (err)
+ return err;
+
+ pr_debug2("Synthesizing TSC conversion information\n");
+
+ event.time_conv.time_mult = tc.time_mult;
+ event.time_conv.time_shift = tc.time_shift;
+ event.time_conv.time_zero = tc.time_zero;
+ event.time_conv.time_cycles = tc.time_cycles;
+ event.time_conv.time_mask = tc.time_mask;
+ event.time_conv.cap_user_time_zero = tc.cap_user_time_zero;
+ event.time_conv.cap_user_time_short = tc.cap_user_time_short;
+
+ return process(tool, &event, NULL, machine);
+}
+
u64 __weak rdtsc(void)
{
return 0;