aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Makefile.perf4
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl4
-rwxr-xr-xtools/perf/check-headers.sh2
-rwxr-xr-xtools/perf/scripts/python/exported-sql-viewer.py77
-rwxr-xr-xtools/perf/trace/beauty/mmap_flags.sh14
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c1
-rw-r--r--tools/perf/util/evlist.c29
-rw-r--r--tools/perf/util/evlist.h2
-rw-r--r--tools/perf/util/evsel.c72
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c20
-rw-r--r--tools/perf/util/machine.c32
-rw-r--r--tools/perf/util/pmu.c10
12 files changed, 179 insertions, 88 deletions
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 01f7555fd933..e8c9f77e9010 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -481,8 +481,8 @@ $(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_t
mmap_flags_array := $(beauty_outdir)/mmap_flags_array.c
mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh
-$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_flags_tbl)
- $(Q)$(SHELL) '$(mmap_flags_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@
+$(mmap_flags_array): $(linux_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_flags_tbl)
+ $(Q)$(SHELL) '$(mmap_flags_tbl)' $(linux_uapi_dir) $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@
mount_flags_array := $(beauty_outdir)/mount_flags_array.c
mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 2ae92fddb6d5..92ee0b4378d4 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -345,6 +345,10 @@
334 common rseq __x64_sys_rseq
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
+424 common pidfd_send_signal __x64_sys_pidfd_send_signal
+425 common io_uring_setup __x64_sys_io_uring_setup
+426 common io_uring_enter __x64_sys_io_uring_enter
+427 common io_uring_register __x64_sys_io_uring_register
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 7b55613924de..c68ee06cae63 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -103,7 +103,7 @@ done
# diff with extra ignore lines
check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
-check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common.h>"'
+check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"'
# diff non-symmetric files
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index e38518cdcbc3..74ef92f1d19a 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -107,6 +107,7 @@ import os
from PySide.QtCore import *
from PySide.QtGui import *
from PySide.QtSql import *
+pyside_version_1 = True
from decimal import *
from ctypes import *
from multiprocessing import Process, Array, Value, Event
@@ -1526,6 +1527,19 @@ def BranchDataPrep(query):
" (" + dsoname(query.value(15)) + ")")
return data
+def BranchDataPrepWA(query):
+ data = []
+ data.append(query.value(0))
+ # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string
+ data.append("{:>19}".format(query.value(1)))
+ for i in xrange(2, 8):
+ data.append(query.value(i))
+ data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) +
+ " (" + dsoname(query.value(11)) + ")" + " -> " +
+ tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) +
+ " (" + dsoname(query.value(15)) + ")")
+ return data
+
# Branch data model
class BranchModel(TreeModel):
@@ -1553,7 +1567,11 @@ class BranchModel(TreeModel):
" AND evsel_id = " + str(self.event_id) +
" ORDER BY samples.id"
" LIMIT " + str(glb_chunk_sz))
- self.fetcher = SQLFetcher(glb, sql, BranchDataPrep, self.AddSample)
+ if pyside_version_1 and sys.version_info[0] == 3:
+ prep = BranchDataPrepWA
+ else:
+ prep = BranchDataPrep
+ self.fetcher = SQLFetcher(glb, sql, prep, self.AddSample)
self.fetcher.done.connect(self.Update)
self.fetcher.Fetch(glb_chunk_sz)
@@ -2079,14 +2097,6 @@ def IsSelectable(db, table, sql = ""):
return False
return True
-# SQL data preparation
-
-def SQLTableDataPrep(query, count):
- data = []
- for i in xrange(count):
- data.append(query.value(i))
- return data
-
# SQL table data model item
class SQLTableItem():
@@ -2110,7 +2120,7 @@ class SQLTableModel(TableModel):
self.more = True
self.populated = 0
self.column_headers = column_headers
- self.fetcher = SQLFetcher(glb, sql, lambda x, y=len(column_headers): SQLTableDataPrep(x, y), self.AddSample)
+ self.fetcher = SQLFetcher(glb, sql, lambda x, y=len(column_headers): self.SQLTableDataPrep(x, y), self.AddSample)
self.fetcher.done.connect(self.Update)
self.fetcher.Fetch(glb_chunk_sz)
@@ -2154,6 +2164,12 @@ class SQLTableModel(TableModel):
def columnHeader(self, column):
return self.column_headers[column]
+ def SQLTableDataPrep(self, query, count):
+ data = []
+ for i in xrange(count):
+ data.append(query.value(i))
+ return data
+
# SQL automatic table data model
class SQLAutoTableModel(SQLTableModel):
@@ -2182,8 +2198,32 @@ class SQLAutoTableModel(SQLTableModel):
QueryExec(query, "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' and table_name = '" + select_table_name + "'")
while query.next():
column_headers.append(query.value(0))
+ if pyside_version_1 and sys.version_info[0] == 3:
+ if table_name == "samples_view":
+ self.SQLTableDataPrep = self.samples_view_DataPrep
+ if table_name == "samples":
+ self.SQLTableDataPrep = self.samples_DataPrep
super(SQLAutoTableModel, self).__init__(glb, sql, column_headers, parent)
+ def samples_view_DataPrep(self, query, count):
+ data = []
+ data.append(query.value(0))
+ # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string
+ data.append("{:>19}".format(query.value(1)))
+ for i in xrange(2, count):
+ data.append(query.value(i))
+ return data
+
+ def samples_DataPrep(self, query, count):
+ data = []
+ for i in xrange(9):
+ data.append(query.value(i))
+ # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string
+ data.append("{:>19}".format(query.value(9)))
+ for i in xrange(10, count):
+ data.append(query.value(i))
+ return data
+
# Base class for custom ResizeColumnsToContents
class ResizeColumnsToContentsBase(QObject):
@@ -2868,9 +2908,13 @@ class LibXED():
ok = self.xed_format_context(2, inst.xedp, inst.bufferp, sizeof(inst.buffer), ip, 0, 0)
if not ok:
return 0, ""
+ if sys.version_info[0] == 2:
+ result = inst.buffer.value
+ else:
+ result = inst.buffer.value.decode()
# Return instruction length and the disassembled instruction text
# For now, assume the length is in byte 166
- return inst.xedd[166], inst.buffer.value
+ return inst.xedd[166], result
def TryOpen(file_name):
try:
@@ -2886,9 +2930,14 @@ def Is64Bit(f):
header = f.read(7)
f.seek(pos)
magic = header[0:4]
- eclass = ord(header[4])
- encoding = ord(header[5])
- version = ord(header[6])
+ if sys.version_info[0] == 2:
+ eclass = ord(header[4])
+ encoding = ord(header[5])
+ version = ord(header[6])
+ else:
+ eclass = header[4]
+ encoding = header[5]
+ version = header[6]
if magic == chr(127) + "ELF" and eclass > 0 and eclass < 3 and encoding > 0 and encoding < 3 and version == 1:
result = True if eclass == 2 else False
return result
diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh
index 32bac9c0d694..5f5eefcb3c74 100755
--- a/tools/perf/trace/beauty/mmap_flags.sh
+++ b/tools/perf/trace/beauty/mmap_flags.sh
@@ -1,15 +1,18 @@
#!/bin/sh
# SPDX-License-Identifier: LGPL-2.1
-if [ $# -ne 2 ] ; then
+if [ $# -ne 3 ] ; then
[ $# -eq 1 ] && hostarch=$1 || hostarch=`uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/`
+ linux_header_dir=tools/include/uapi/linux
header_dir=tools/include/uapi/asm-generic
arch_header_dir=tools/arch/${hostarch}/include/uapi/asm
else
- header_dir=$1
- arch_header_dir=$2
+ linux_header_dir=$1
+ header_dir=$2
+ arch_header_dir=$3
fi
+linux_mman=${linux_header_dir}/mman.h
arch_mman=${arch_header_dir}/mman.h
# those in egrep -vw are flags, we want just the bits
@@ -20,6 +23,11 @@ egrep -q $regex ${arch_mman} && \
(egrep $regex ${arch_mman} | \
sed -r "s/$regex/\2 \1/g" | \
xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+egrep -q $regex ${linux_mman} && \
+(egrep $regex ${linux_mman} | \
+ egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman}) &&
(egrep $regex ${header_dir}/mman-common.h | \
egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index ba4c623cd8de..39fe21e1cf93 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -387,6 +387,7 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
break;
case OCSD_INSTR_ISB:
case OCSD_INSTR_DSB_DMB:
+ case OCSD_INSTR_WFI_WFE:
case OCSD_INSTR_OTHER:
default:
packet->last_instr_taken_branch = false;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index ec78e93085de..6689378ee577 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -231,35 +231,6 @@ void perf_evlist__set_leader(struct perf_evlist *evlist)
}
}
-void perf_event_attr__set_max_precise_ip(struct perf_event_attr *pattr)
-{
- struct perf_event_attr attr = {
- .type = PERF_TYPE_HARDWARE,
- .config = PERF_COUNT_HW_CPU_CYCLES,
- .exclude_kernel = 1,
- .precise_ip = 3,
- };
-
- event_attr_init(&attr);
-
- /*
- * Unnamed union member, not supported as struct member named
- * initializer in older compilers such as gcc 4.4.7
- */
- attr.sample_period = 1;
-
- while (attr.precise_ip != 0) {
- int fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
- if (fd != -1) {
- close(fd);
- break;
- }
- --attr.precise_ip;
- }
-
- pattr->precise_ip = attr.precise_ip;
-}
-
int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise)
{
struct perf_evsel *evsel = perf_evsel__new_cycles(precise);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index dcb68f34d2cd..6a94785b9100 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -315,8 +315,6 @@ void perf_evlist__to_front(struct perf_evlist *evlist,
void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
struct perf_evsel *tracking_evsel);
-void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr);
-
struct perf_evsel *
perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7835e05f0c0a..66d066f18b5b 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -295,7 +295,6 @@ struct perf_evsel *perf_evsel__new_cycles(bool precise)
if (!precise)
goto new_event;
- perf_event_attr__set_max_precise_ip(&attr);
/*
* Now let the usual logic to set up the perf_event_attr defaults
* to kick in when we return and before perf_evsel__open() is called.
@@ -305,6 +304,8 @@ new_event:
if (evsel == NULL)
goto out;
+ evsel->precise_max = true;
+
/* use asprintf() because free(evsel) assumes name is allocated */
if (asprintf(&evsel->name, "cycles%s%s%.*s",
(attr.precise_ip || attr.exclude_kernel) ? ":" : "",
@@ -1083,7 +1084,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
}
if (evsel->precise_max)
- perf_event_attr__set_max_precise_ip(attr);
+ attr->precise_ip = 3;
if (opts->all_user) {
attr->exclude_kernel = 1;
@@ -1749,6 +1750,59 @@ static bool ignore_missing_thread(struct perf_evsel *evsel,
return true;
}
+static void display_attr(struct perf_event_attr *attr)
+{
+ if (verbose >= 2) {
+ fprintf(stderr, "%.60s\n", graph_dotted_line);
+ fprintf(stderr, "perf_event_attr:\n");
+ perf_event_attr__fprintf(stderr, attr, __open_attr__fprintf, NULL);
+ fprintf(stderr, "%.60s\n", graph_dotted_line);
+ }
+}
+
+static int perf_event_open(struct perf_evsel *evsel,
+ pid_t pid, int cpu, int group_fd,
+ unsigned long flags)
+{
+ int precise_ip = evsel->attr.precise_ip;
+ int fd;
+
+ while (1) {
+ pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
+ pid, cpu, group_fd, flags);
+
+ fd = sys_perf_event_open(&evsel->attr, pid, cpu, group_fd, flags);
+ if (fd >= 0)
+ break;
+
+ /*
+ * Do quick precise_ip fallback if:
+ * - there is precise_ip set in perf_event_attr
+ * - maximum precise is requested
+ * - sys_perf_event_open failed with ENOTSUP error,
+ * which is associated with wrong precise_ip
+ */
+ if (!precise_ip || !evsel->precise_max || (errno != ENOTSUP))
+ break;
+
+ /*
+ * We tried all the precise_ip values, and it's
+ * still failing, so leave it to standard fallback.
+ */
+ if (!evsel->attr.precise_ip) {
+ evsel->attr.precise_ip = precise_ip;
+ break;
+ }
+
+ pr_debug2("\nsys_perf_event_open failed, error %d\n", -ENOTSUP);
+ evsel->attr.precise_ip--;
+ pr_debug2("decreasing precise_ip by one (%d)\n", evsel->attr.precise_ip);
+ display_attr(&evsel->attr);
+ }
+
+ return fd;
+}
+
int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
struct thread_map *threads)
{
@@ -1824,12 +1878,7 @@ retry_sample_id:
if (perf_missing_features.sample_id_all)
evsel->attr.sample_id_all = 0;
- if (verbose >= 2) {
- fprintf(stderr, "%.60s\n", graph_dotted_line);
- fprintf(stderr, "perf_event_attr:\n");
- perf_event_attr__fprintf(stderr, &evsel->attr, __open_attr__fprintf, NULL);
- fprintf(stderr, "%.60s\n", graph_dotted_line);
- }
+ display_attr(&evsel->attr);
for (cpu = 0; cpu < cpus->nr; cpu++) {
@@ -1841,13 +1890,10 @@ retry_sample_id:
group_fd = get_group_fd(evsel, cpu, thread);
retry_open:
- pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
- pid, cpus->map[cpu], group_fd, flags);
-
test_attr__ready();
- fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu],
- group_fd, flags);
+ fd = perf_event_open(evsel, pid, cpus->map[cpu],
+ group_fd, flags);
FD(evsel, cpu, thread) = fd;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 6e03db142091..872fab163585 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -251,19 +251,15 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d))
decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n /
decoder->tsc_ctc_ratio_d;
-
- /*
- * Allow for timestamps appearing to backwards because a TSC
- * packet has slipped past a MTC packet, so allow 2 MTC ticks
- * or ...
- */
- decoder->tsc_slip = multdiv(2 << decoder->mtc_shift,
- decoder->tsc_ctc_ratio_n,
- decoder->tsc_ctc_ratio_d);
}
- /* ... or 0x100 paranoia */
- if (decoder->tsc_slip < 0x100)
- decoder->tsc_slip = 0x100;
+
+ /*
+ * A TSC packet can slip past MTC packets so that the timestamp appears
+ * to go backwards. One estimate is that can be up to about 40 CPU
+ * cycles, which is certainly less than 0x1000 TSC ticks, but accept
+ * slippage an order of magnitude more to be on the safe side.
+ */
+ decoder->tsc_slip = 0x10000;
intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift);
intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 61959aba7e27..3c520baa198c 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1421,6 +1421,20 @@ static void machine__set_kernel_mmap(struct machine *machine,
machine->vmlinux_map->end = ~0ULL;
}
+static void machine__update_kernel_mmap(struct machine *machine,
+ u64 start, u64 end)
+{
+ struct map *map = machine__kernel_map(machine);
+
+ map__get(map);
+ map_groups__remove(&machine->kmaps, map);
+
+ machine__set_kernel_mmap(machine, start, end);
+
+ map_groups__insert(&machine->kmaps, map);
+ map__put(map);
+}
+
int machine__create_kernel_maps(struct machine *machine)
{
struct dso *kernel = machine__get_kernel(machine);
@@ -1453,17 +1467,11 @@ int machine__create_kernel_maps(struct machine *machine)
goto out_put;
}
- /* we have a real start address now, so re-order the kmaps */
- map = machine__kernel_map(machine);
-
- map__get(map);
- map_groups__remove(&machine->kmaps, map);
-
- /* assume it's the last in the kmaps */
- machine__set_kernel_mmap(machine, addr, ~0ULL);
-
- map_groups__insert(&machine->kmaps, map);
- map__put(map);
+ /*
+ * we have a real start address now, so re-order the kmaps
+ * assume it's the last in the kmaps
+ */
+ machine__update_kernel_mmap(machine, addr, ~0ULL);
}
if (machine__create_extra_kernel_maps(machine, kernel))
@@ -1599,7 +1607,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
if (strstr(kernel->long_name, "vmlinux"))
dso__set_short_name(kernel, "[kernel.vmlinux]", false);
- machine__set_kernel_mmap(machine, event->mmap.start,
+ machine__update_kernel_mmap(machine, event->mmap.start,
event->mmap.start + event->mmap.len);
/*
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 6199a3174ab9..e0429f4ef335 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -732,10 +732,20 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
if (!is_arm_pmu_core(name)) {
pname = pe->pmu ? pe->pmu : "cpu";
+
+ /*
+ * uncore alias may be from different PMU
+ * with common prefix
+ */
+ if (pmu_is_uncore(name) &&
+ !strncmp(pname, name, strlen(pname)))
+ goto new_alias;
+
if (strcmp(pname, name))
continue;
}
+new_alias:
/* need type casts to override 'const' */
__perf_pmu__new_alias(head, NULL, (char *)pe->name,
(char *)pe->desc, (char *)pe->event,