aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools/perf/arch/x86/util
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/arch/x86/util')
-rw-r--r--tools/perf/arch/x86/util/Build35
-rw-r--r--tools/perf/arch/x86/util/archinsn.c11
-rw-r--r--tools/perf/arch/x86/util/auxtrace.c12
-rw-r--r--tools/perf/arch/x86/util/cpuid.h34
-rw-r--r--tools/perf/arch/x86/util/dwarf-regs.c115
-rw-r--r--tools/perf/arch/x86/util/event.c150
-rw-r--r--tools/perf/arch/x86/util/evlist.c83
-rw-r--r--tools/perf/arch/x86/util/evsel.c145
-rw-r--r--tools/perf/arch/x86/util/evsel.h7
-rw-r--r--tools/perf/arch/x86/util/group.c28
-rw-r--r--tools/perf/arch/x86/util/header.c32
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c18
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c267
-rw-r--r--tools/perf/arch/x86/util/iostat.c475
-rw-r--r--tools/perf/arch/x86/util/kvm-stat.c57
-rw-r--r--tools/perf/arch/x86/util/mem-events.c34
-rw-r--r--tools/perf/arch/x86/util/mem-events.h11
-rw-r--r--tools/perf/arch/x86/util/perf_regs.c34
-rw-r--r--tools/perf/arch/x86/util/pmu.c308
-rw-r--r--tools/perf/arch/x86/util/topdown.c90
-rw-r--r--tools/perf/arch/x86/util/topdown.h9
-rw-r--r--tools/perf/arch/x86/util/tsc.c133
-rw-r--r--tools/perf/arch/x86/util/unwind-libdw.c5
23 files changed, 1644 insertions, 449 deletions
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 47f9c56e744f..06d7c0205b3d 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -1,19 +1,20 @@
-perf-y += header.o
-perf-y += tsc.o
-perf-y += pmu.o
-perf-y += kvm-stat.o
-perf-y += perf_regs.o
-perf-y += group.o
-perf-y += machine.o
-perf-y += event.o
+perf-util-y += header.o
+perf-util-y += tsc.o
+perf-util-y += pmu.o
+perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o
+perf-util-y += perf_regs.o
+perf-util-y += topdown.o
+perf-util-y += machine.o
+perf-util-y += event.o
+perf-util-y += evlist.o
+perf-util-y += mem-events.o
+perf-util-y += evsel.o
+perf-util-y += iostat.o
-perf-$(CONFIG_DWARF) += dwarf-regs.o
-perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
+perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
+perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
-perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
-perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
-
-perf-$(CONFIG_AUXTRACE) += auxtrace.o
-perf-$(CONFIG_AUXTRACE) += archinsn.o
-perf-$(CONFIG_AUXTRACE) += intel-pt.o
-perf-$(CONFIG_AUXTRACE) += intel-bts.o
+perf-util-$(CONFIG_AUXTRACE) += auxtrace.o
+perf-util-y += archinsn.o
+perf-util-$(CONFIG_AUXTRACE) += intel-pt.o
+perf-util-$(CONFIG_AUXTRACE) += intel-bts.o
diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c
index 3e6791531ca5..546feda08428 100644
--- a/tools/perf/arch/x86/util/archinsn.c
+++ b/tools/perf/arch/x86/util/archinsn.c
@@ -1,17 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
-#include "../../../../arch/x86/include/asm/insn.h"
#include "archinsn.h"
#include "event.h"
#include "machine.h"
#include "thread.h"
#include "symbol.h"
+#include "../../../../arch/x86/include/asm/insn.h"
void arch_fetch_insn(struct perf_sample *sample,
struct thread *thread,
struct machine *machine)
{
struct insn insn;
- int len;
+ int len, ret;
bool is64bit = false;
if (!sample->ip)
@@ -19,8 +19,9 @@ void arch_fetch_insn(struct perf_sample *sample,
len = thread__memcpy(thread, machine, sample->insn, sample->ip, sizeof(sample->insn), &is64bit);
if (len <= 0)
return;
- insn_init(&insn, sample->insn, len, is64bit);
- insn_get_length(&insn);
- if (insn_complete(&insn) && insn.length <= len)
+
+ ret = insn_decode(&insn, sample->insn, len,
+ is64bit ? INSN_MODE_64 : INSN_MODE_32);
+ if (ret >= 0 && insn.length <= len)
sample->insn_len = insn.length;
}
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
index 3da506e13f49..ecbf61a7eb3a 100644
--- a/tools/perf/arch/x86/util/auxtrace.c
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -10,6 +10,7 @@
#include "../../../util/header.h"
#include "../../../util/debug.h"
#include "../../../util/pmu.h"
+#include "../../../util/pmus.h"
#include "../../../util/auxtrace.h"
#include "../../../util/intel-pt.h"
#include "../../../util/intel-bts.h"
@@ -25,12 +26,8 @@ struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist,
bool found_pt = false;
bool found_bts = false;
- intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
- if (intel_pt_pmu)
- intel_pt_pmu->auxtrace = true;
- intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
- if (intel_bts_pmu)
- intel_bts_pmu->auxtrace = true;
+ intel_pt_pmu = perf_pmus__find(INTEL_PT_PMU_NAME);
+ intel_bts_pmu = perf_pmus__find(INTEL_BTS_PMU_NAME);
evlist__for_each_entry(evlist, evsel) {
if (intel_pt_pmu && evsel->core.attr.type == intel_pt_pmu->type)
@@ -58,11 +55,12 @@ struct auxtrace_record *auxtrace_record__init(struct evlist *evlist,
int *err)
{
char buffer[64];
+ struct perf_cpu cpu = perf_cpu_map__min(evlist->core.all_cpus);
int ret;
*err = 0;
- ret = get_cpuid(buffer, sizeof(buffer));
+ ret = get_cpuid(buffer, sizeof(buffer), cpu);
if (ret) {
*err = ret;
return NULL;
diff --git a/tools/perf/arch/x86/util/cpuid.h b/tools/perf/arch/x86/util/cpuid.h
new file mode 100644
index 000000000000..0a3ae0ace7e9
--- /dev/null
+++ b/tools/perf/arch/x86/util/cpuid.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_CPUID_H
+#define PERF_CPUID_H 1
+
+
+static inline void
+cpuid(unsigned int op, unsigned int op2, unsigned int *a, unsigned int *b,
+ unsigned int *c, unsigned int *d)
+{
+ /*
+ * Preserve %ebx/%rbx register by either placing it in %rdi or saving it
+ * on the stack - x86-64 needs to avoid the stack red zone. In PIC
+ * compilations %ebx contains the address of the global offset
+ * table. %rbx is occasionally used to address stack variables in
+ * presence of dynamic allocas.
+ */
+ asm(
+#if defined(__x86_64__)
+ "mov %%rbx, %%rdi\n"
+ "cpuid\n"
+ "xchg %%rdi, %%rbx\n"
+#else
+ "pushl %%ebx\n"
+ "cpuid\n"
+ "movl %%ebx, %%edi\n"
+ "popl %%ebx\n"
+#endif
+ : "=a"(*a), "=D"(*b), "=c"(*c), "=d"(*d)
+ : "a"(op), "2"(op2));
+}
+
+void get_cpuid_0(char *vendor, unsigned int *lvl);
+
+#endif
diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c
deleted file mode 100644
index 530934805710..000000000000
--- a/tools/perf/arch/x86/util/dwarf-regs.c
+++ /dev/null
@@ -1,115 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * dwarf-regs.c : Mapping of DWARF debug register numbers into register names.
- * Extracted from probe-finder.c
- *
- * Written by Masami Hiramatsu <mhiramat@redhat.com>
- */
-
-#include <stddef.h>
-#include <errno.h> /* for EINVAL */
-#include <string.h> /* for strcmp */
-#include <linux/ptrace.h> /* for struct pt_regs */
-#include <linux/kernel.h> /* for offsetof */
-#include <dwarf-regs.h>
-
-/*
- * See arch/x86/kernel/ptrace.c.
- * Different from it:
- *
- * - Since struct pt_regs is defined differently for user and kernel,
- * but we want to use 'ax, bx' instead of 'rax, rbx' (which is struct
- * field name of user's pt_regs), we make REG_OFFSET_NAME to accept
- * both string name and reg field name.
- *
- * - Since accessing x86_32's pt_regs from x86_64 building is difficult
- * and vise versa, we simply fill offset with -1, so
- * get_arch_regstr() still works but regs_query_register_offset()
- * returns error.
- * The only inconvenience caused by it now is that we are not allowed
- * to generate BPF prologue for a x86_64 kernel if perf is built for
- * x86_32. This is really a rare usecase.
- *
- * - Order is different from kernel's ptrace.c for get_arch_regstr(). Use
- * the order defined by dwarf.
- */
-
-struct pt_regs_offset {
- const char *name;
- int offset;
-};
-
-#define REG_OFFSET_END {.name = NULL, .offset = 0}
-
-#ifdef __x86_64__
-# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)}
-# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = -1}
-#else
-# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = -1}
-# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)}
-#endif
-
-/* TODO: switching by dwarf address size */
-#ifndef __x86_64__
-static const struct pt_regs_offset x86_32_regoffset_table[] = {
- REG_OFFSET_NAME_32("%ax", eax),
- REG_OFFSET_NAME_32("%cx", ecx),
- REG_OFFSET_NAME_32("%dx", edx),
- REG_OFFSET_NAME_32("%bx", ebx),
- REG_OFFSET_NAME_32("$stack", esp), /* Stack address instead of %sp */
- REG_OFFSET_NAME_32("%bp", ebp),
- REG_OFFSET_NAME_32("%si", esi),
- REG_OFFSET_NAME_32("%di", edi),
- REG_OFFSET_END,
-};
-
-#define regoffset_table x86_32_regoffset_table
-#else
-static const struct pt_regs_offset x86_64_regoffset_table[] = {
- REG_OFFSET_NAME_64("%ax", rax),
- REG_OFFSET_NAME_64("%dx", rdx),
- REG_OFFSET_NAME_64("%cx", rcx),
- REG_OFFSET_NAME_64("%bx", rbx),
- REG_OFFSET_NAME_64("%si", rsi),
- REG_OFFSET_NAME_64("%di", rdi),
- REG_OFFSET_NAME_64("%bp", rbp),
- REG_OFFSET_NAME_64("%sp", rsp),
- REG_OFFSET_NAME_64("%r8", r8),
- REG_OFFSET_NAME_64("%r9", r9),
- REG_OFFSET_NAME_64("%r10", r10),
- REG_OFFSET_NAME_64("%r11", r11),
- REG_OFFSET_NAME_64("%r12", r12),
- REG_OFFSET_NAME_64("%r13", r13),
- REG_OFFSET_NAME_64("%r14", r14),
- REG_OFFSET_NAME_64("%r15", r15),
- REG_OFFSET_END,
-};
-
-#define regoffset_table x86_64_regoffset_table
-#endif
-
-/* Minus 1 for the ending REG_OFFSET_END */
-#define ARCH_MAX_REGS ((sizeof(regoffset_table) / sizeof(regoffset_table[0])) - 1)
-
-/* Return architecture dependent register string (for kprobe-tracer) */
-const char *get_arch_regstr(unsigned int n)
-{
- return (n < ARCH_MAX_REGS) ? regoffset_table[n].name : NULL;
-}
-
-/* Reuse code from arch/x86/kernel/ptrace.c */
-/**
- * regs_query_register_offset() - query register offset from its name
- * @name: the name of a register
- *
- * regs_query_register_offset() returns the offset of a register in struct
- * pt_regs from its name. If the name is invalid, this returns -EINVAL;
- */
-int regs_query_register_offset(const char *name)
-{
- const struct pt_regs_offset *roff;
- for (roff = regoffset_table; roff->name != NULL; roff++)
- if (!strcmp(roff->name, name))
- return roff->offset;
- return -EINVAL;
-}
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
index 047dc00eafa6..a0400707180c 100644
--- a/tools/perf/arch/x86/util/event.c
+++ b/tools/perf/arch/x86/util/event.c
@@ -2,6 +2,7 @@
#include <linux/types.h>
#include <linux/string.h>
#include <linux/zalloc.h>
+#include <stdlib.h>
#include "../../../util/event.h"
#include "../../../util/synthetic-events.h"
@@ -9,69 +10,130 @@
#include "../../../util/tool.h"
#include "../../../util/map.h"
#include "../../../util/debug.h"
+#include "util/sample.h"
#if defined(__x86_64__)
-int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
- perf_event__handler_t process,
- struct machine *machine)
+struct perf_event__synthesize_extra_kmaps_cb_args {
+ const struct perf_tool *tool;
+ perf_event__handler_t process;
+ struct machine *machine;
+ union perf_event *event;
+};
+
+static int perf_event__synthesize_extra_kmaps_cb(struct map *map, void *data)
{
- int rc = 0;
- struct map *pos;
- struct maps *kmaps = &machine->kmaps;
- union perf_event *event = zalloc(sizeof(event->mmap) +
- machine->id_hdr_size);
+ struct perf_event__synthesize_extra_kmaps_cb_args *args = data;
+ union perf_event *event = args->event;
+ struct kmap *kmap;
+ size_t size;
- if (!event) {
- pr_debug("Not enough memory synthesizing mmap event "
- "for extra kernel maps\n");
- return -1;
- }
+ if (!__map__is_extra_kernel_map(map))
+ return 0;
- maps__for_each_entry(kmaps, pos) {
- struct kmap *kmap;
- size_t size;
+ kmap = map__kmap(map);
- if (!__map__is_extra_kernel_map(pos))
- continue;
+ size = sizeof(event->mmap) - sizeof(event->mmap.filename) +
+ PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) +
+ args->machine->id_hdr_size;
- kmap = map__kmap(pos);
+ memset(event, 0, size);
- size = sizeof(event->mmap) - sizeof(event->mmap.filename) +
- PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) +
- machine->id_hdr_size;
+ event->mmap.header.type = PERF_RECORD_MMAP;
- memset(event, 0, size);
+ /*
+ * kernel uses 0 for user space maps, see kernel/perf_event.c
+ * __perf_event_mmap
+ */
+ if (machine__is_host(args->machine))
+ event->header.misc = PERF_RECORD_MISC_KERNEL;
+ else
+ event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
- event->mmap.header.type = PERF_RECORD_MMAP;
+ event->mmap.header.size = size;
- /*
- * kernel uses 0 for user space maps, see kernel/perf_event.c
- * __perf_event_mmap
- */
- if (machine__is_host(machine))
- event->header.misc = PERF_RECORD_MISC_KERNEL;
- else
- event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+ event->mmap.start = map__start(map);
+ event->mmap.len = map__size(map);
+ event->mmap.pgoff = map__pgoff(map);
+ event->mmap.pid = args->machine->pid;
- event->mmap.header.size = size;
+ strlcpy(event->mmap.filename, kmap->name, PATH_MAX);
- event->mmap.start = pos->start;
- event->mmap.len = pos->end - pos->start;
- event->mmap.pgoff = pos->pgoff;
- event->mmap.pid = machine->pid;
+ if (perf_tool__process_synth_event(args->tool, event, args->machine, args->process) != 0)
+ return -1;
- strlcpy(event->mmap.filename, kmap->name, PATH_MAX);
+ return 0;
+}
- if (perf_tool__process_synth_event(tool, event, machine,
- process) != 0) {
- rc = -1;
- break;
- }
+int perf_event__synthesize_extra_kmaps(const struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ int rc;
+ struct maps *kmaps = machine__kernel_maps(machine);
+ struct perf_event__synthesize_extra_kmaps_cb_args args = {
+ .tool = tool,
+ .process = process,
+ .machine = machine,
+ .event = zalloc(sizeof(args.event->mmap) + machine->id_hdr_size),
+ };
+
+ if (!args.event) {
+ pr_debug("Not enough memory synthesizing mmap event "
+ "for extra kernel maps\n");
+ return -1;
}
- free(event);
+ rc = maps__for_each_map(kmaps, perf_event__synthesize_extra_kmaps_cb, &args);
+
+ free(args.event);
return rc;
}
#endif
+
+void arch_perf_parse_sample_weight(struct perf_sample *data,
+ const __u64 *array, u64 type)
+{
+ union perf_sample_weight weight;
+
+ weight.full = *array;
+ if (type & PERF_SAMPLE_WEIGHT)
+ data->weight = weight.full;
+ else {
+ data->weight = weight.var1_dw;
+ data->ins_lat = weight.var2_w;
+ data->retire_lat = weight.var3_w;
+ }
+}
+
+void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
+ __u64 *array, u64 type)
+{
+ *array = data->weight;
+
+ if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
+ *array &= 0xffffffff;
+ *array |= ((u64)data->ins_lat << 32);
+ *array |= ((u64)data->retire_lat << 48);
+ }
+}
+
+const char *arch_perf_header_entry(const char *se_header)
+{
+ if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
+ return "Local Retire Latency";
+ else if (!strcmp(se_header, "Pipeline Stage Cycle"))
+ return "Retire Latency";
+
+ return se_header;
+}
+
+int arch_support_sort_key(const char *sort_key)
+{
+ if (!strcmp(sort_key, "p_stage_cyc"))
+ return 1;
+ if (!strcmp(sort_key, "local_p_stage_cyc"))
+ return 1;
+ return 0;
+}
diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
new file mode 100644
index 000000000000..1969758cc8c1
--- /dev/null
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "../../../util/evlist.h"
+#include "../../../util/evsel.h"
+#include "topdown.h"
+#include "evsel.h"
+
+int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
+{
+ /*
+ * Currently the following topdown events sequence are supported to
+ * move and regroup correctly.
+ *
+ * a. all events in a group
+ * perf stat -e "{instructions,topdown-retiring,slots}" -C0 sleep 1
+ * WARNING: events were regrouped to match PMUs
+ * Performance counter stats for 'CPU(s) 0':
+ * 15,066,240 slots
+ * 1,899,760 instructions
+ * 2,126,998 topdown-retiring
+ * b. all events not in a group
+ * perf stat -e "instructions,topdown-retiring,slots" -C0 sleep 1
+ * WARNING: events were regrouped to match PMUs
+ * Performance counter stats for 'CPU(s) 0':
+ * 2,045,561 instructions
+ * 17,108,370 slots
+ * 2,281,116 topdown-retiring
+ * c. slots event in a group but topdown metrics events outside the group
+ * perf stat -e "{instructions,slots},topdown-retiring" -C0 sleep 1
+ * WARNING: events were regrouped to match PMUs
+ * Performance counter stats for 'CPU(s) 0':
+ * 20,323,878 slots
+ * 2,634,884 instructions
+ * 3,028,656 topdown-retiring
+ * d. slots event and topdown metrics events in two groups
+ * perf stat -e "{instructions,slots},{topdown-retiring}" -C0 sleep 1
+ * WARNING: events were regrouped to match PMUs
+ * Performance counter stats for 'CPU(s) 0':
+ * 26,319,024 slots
+ * 2,427,791 instructions
+ * 2,683,508 topdown-retiring
+ * e. slots event and metrics event are not in a group and not adjacent
+ * perf stat -e "{instructions,slots},cycles,topdown-retiring" -C0 sleep 1
+ * WARNING: events were regrouped to match PMUs
+ * 68,433,522 slots
+ * 8,856,102 topdown-retiring
+ * 7,791,494 instructions
+ * 11,469,513 cycles
+ */
+ if (topdown_sys_has_perf_metrics() &&
+ (arch_evsel__must_be_in_group(lhs) || arch_evsel__must_be_in_group(rhs))) {
+ /* Ensure the topdown slots comes first. */
+ if (arch_is_topdown_slots(lhs))
+ return -1;
+ if (arch_is_topdown_slots(rhs))
+ return 1;
+
+ /*
+ * Move topdown metrics events forward only when topdown metrics
+ * events are not in same group with previous slots event. If
+ * topdown metrics events are already in same group with slots
+ * event, do nothing.
+ */
+ if (lhs->core.leader != rhs->core.leader) {
+ bool lhs_topdown = arch_is_topdown_metrics(lhs);
+ bool rhs_topdown = arch_is_topdown_metrics(rhs);
+
+ if (lhs_topdown && !rhs_topdown)
+ return -1;
+ if (!lhs_topdown && rhs_topdown)
+ return 1;
+ }
+ }
+
+ /* Retire latency event should not be group leader*/
+ if (lhs->retire_lat && !rhs->retire_lat)
+ return 1;
+ if (!lhs->retire_lat && rhs->retire_lat)
+ return -1;
+
+ /* Default ordering by insertion index. */
+ return lhs->core.idx - rhs->core.idx;
+}
diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
new file mode 100644
index 000000000000..3dd29ba2c23b
--- /dev/null
+++ b/tools/perf/arch/x86/util/evsel.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include "util/evsel.h"
+#include "util/env.h"
+#include "util/pmu.h"
+#include "util/pmus.h"
+#include "linux/string.h"
+#include "topdown.h"
+#include "evsel.h"
+#include "util/debug.h"
+#include "env.h"
+
+#define IBS_FETCH_L3MISSONLY (1ULL << 59)
+#define IBS_OP_L3MISSONLY (1ULL << 16)
+
+void arch_evsel__set_sample_weight(struct evsel *evsel)
+{
+ evsel__set_sample_bit(evsel, WEIGHT_STRUCT);
+}
+
+/* Check whether the evsel's PMU supports the perf metrics */
+bool evsel__sys_has_perf_metrics(const struct evsel *evsel)
+{
+ struct perf_pmu *pmu;
+ u32 type = evsel->core.attr.type;
+
+ /*
+ * The PERF_TYPE_RAW type is the core PMU type, e.g., "cpu" PMU
+ * on a non-hybrid machine, "cpu_core" PMU on a hybrid machine.
+ * The slots event is only available for the core PMU, which
+ * supports the perf metrics feature.
+ * Checking both the PERF_TYPE_RAW type and the slots event
+ * should be good enough to detect the perf metrics feature.
+ */
+again:
+ switch (type) {
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_HW_CACHE:
+ type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT;
+ if (type)
+ goto again;
+ break;
+ case PERF_TYPE_RAW:
+ break;
+ default:
+ return false;
+ }
+
+ pmu = evsel->pmu;
+ if (pmu && perf_pmu__is_fake(pmu))
+ pmu = NULL;
+
+ if (!pmu) {
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ if (pmu->type == PERF_TYPE_RAW)
+ break;
+ }
+ }
+ return pmu && perf_pmu__have_event(pmu, "slots");
+}
+
+bool arch_evsel__must_be_in_group(const struct evsel *evsel)
+{
+ if (!evsel__sys_has_perf_metrics(evsel) || !evsel->name ||
+ strcasestr(evsel->name, "uops_retired.slots"))
+ return false;
+
+ return arch_is_topdown_metrics(evsel) || arch_is_topdown_slots(evsel);
+}
+
+int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
+{
+ u64 event = evsel->core.attr.config & PERF_HW_EVENT_MASK;
+ u64 pmu = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT;
+ const char *event_name;
+
+ if (event < PERF_COUNT_HW_MAX && evsel__hw_names[event])
+ event_name = evsel__hw_names[event];
+ else
+ event_name = "unknown-hardware";
+
+ /* The PMU type is not required for the non-hybrid platform. */
+ if (!pmu)
+ return scnprintf(bf, size, "%s", event_name);
+
+ return scnprintf(bf, size, "%s/%s/",
+ evsel->pmu ? evsel->pmu->name : "cpu",
+ event_name);
+}
+
+static void ibs_l3miss_warn(void)
+{
+ pr_warning(
+"WARNING: Hw internally resets sampling period when L3 Miss Filtering is enabled\n"
+"and tagged operation does not cause L3 Miss. This causes sampling period skew.\n");
+}
+
+void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr)
+{
+ struct perf_pmu *evsel_pmu, *ibs_fetch_pmu, *ibs_op_pmu;
+ static int warned_once;
+
+ if (warned_once || !x86__is_amd_cpu())
+ return;
+
+ evsel_pmu = evsel__find_pmu(evsel);
+ if (!evsel_pmu)
+ return;
+
+ ibs_fetch_pmu = perf_pmus__find("ibs_fetch");
+ ibs_op_pmu = perf_pmus__find("ibs_op");
+
+ if (ibs_fetch_pmu && ibs_fetch_pmu->type == evsel_pmu->type) {
+ if (attr->config & IBS_FETCH_L3MISSONLY) {
+ ibs_l3miss_warn();
+ warned_once = 1;
+ }
+ } else if (ibs_op_pmu && ibs_op_pmu->type == evsel_pmu->type) {
+ if (attr->config & IBS_OP_L3MISSONLY) {
+ ibs_l3miss_warn();
+ warned_once = 1;
+ }
+ }
+}
+
+int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size)
+{
+ if (!x86__is_amd_cpu())
+ return 0;
+
+ if (!evsel->core.attr.precise_ip &&
+ !(evsel->pmu && !strncmp(evsel->pmu->name, "ibs", 3)))
+ return 0;
+
+ /* More verbose IBS errors. */
+ if (evsel->core.attr.exclude_kernel || evsel->core.attr.exclude_user ||
+ evsel->core.attr.exclude_hv || evsel->core.attr.exclude_idle ||
+ evsel->core.attr.exclude_host || evsel->core.attr.exclude_guest) {
+ return scnprintf(msg, size, "AMD IBS doesn't support privilege filtering. Try "
+ "again without the privilege modifiers (like 'k') at the end.");
+ }
+
+ return 0;
+}
diff --git a/tools/perf/arch/x86/util/evsel.h b/tools/perf/arch/x86/util/evsel.h
new file mode 100644
index 000000000000..19ad1691374d
--- /dev/null
+++ b/tools/perf/arch/x86/util/evsel.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _EVSEL_H
+#define _EVSEL_H 1
+
+bool evsel__sys_has_perf_metrics(const struct evsel *evsel);
+
+#endif
diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c
deleted file mode 100644
index e2f8034b8973..000000000000
--- a/tools/perf/arch/x86/util/group.c
+++ /dev/null
@@ -1,28 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include "api/fs/fs.h"
-#include "util/group.h"
-
-/*
- * Check whether we can use a group for top down.
- * Without a group may get bad results due to multiplexing.
- */
-bool arch_topdown_check_group(bool *warn)
-{
- int n;
-
- if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0)
- return false;
- if (n > 0) {
- *warn = true;
- return false;
- }
- return true;
-}
-
-void arch_topdown_group_warn(void)
-{
- fprintf(stderr,
- "nmi_watchdog enabled with topdown. May give wrong results.\n"
- "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n");
-}
diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c
index 578c8c568ffd..412977f8aa83 100644
--- a/tools/perf/arch/x86/util/header.c
+++ b/tools/perf/arch/x86/util/header.c
@@ -9,18 +9,17 @@
#include "../../../util/debug.h"
#include "../../../util/header.h"
+#include "cpuid.h"
-static inline void
-cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c,
- unsigned int *d)
+void get_cpuid_0(char *vendor, unsigned int *lvl)
{
- __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\t"
- "movl %%ebx, %%esi\n\t.byte 0x5b"
- : "=a" (*a),
- "=S" (*b),
- "=c" (*c),
- "=d" (*d)
- : "a" (op));
+ unsigned int b, c, d;
+
+ cpuid(0, 0, lvl, &b, &c, &d);
+ strncpy(&vendor[0], (char *)(&b), 4);
+ strncpy(&vendor[4], (char *)(&d), 4);
+ strncpy(&vendor[8], (char *)(&c), 4);
+ vendor[12] = '\0';
}
static int
@@ -31,14 +30,10 @@ __get_cpuid(char *buffer, size_t sz, const char *fmt)
int nb;
char vendor[16];
- cpuid(0, &lvl, &b, &c, &d);
- strncpy(&vendor[0], (char *)(&b), 4);
- strncpy(&vendor[4], (char *)(&d), 4);
- strncpy(&vendor[8], (char *)(&c), 4);
- vendor[12] = '\0';
+ get_cpuid_0(vendor, &lvl);
if (lvl >= 1) {
- cpuid(1, &a, &b, &c, &d);
+ cpuid(1, 0, &a, &b, &c, &d);
family = (a >> 8) & 0xf; /* bits 11 - 8 */
model = (a >> 4) & 0xf; /* Bits 7 - 4 */
@@ -63,13 +58,12 @@ __get_cpuid(char *buffer, size_t sz, const char *fmt)
}
int
-get_cpuid(char *buffer, size_t sz)
+get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu __maybe_unused)
{
return __get_cpuid(buffer, sz, "%s,%u,%u,%u$");
}
-char *
-get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
+char *get_cpuid_str(struct perf_cpu cpu __maybe_unused)
{
char *buf = malloc(128);
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 0dc09b5809c1..85c8186300c8 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -17,7 +17,7 @@
#include "../../../util/evlist.h"
#include "../../../util/mmap.h"
#include "../../../util/session.h"
-#include "../../../util/pmu.h"
+#include "../../../util/pmus.h"
#include "../../../util/debug.h"
#include "../../../util/record.h"
#include "../../../util/tsc.h"
@@ -110,7 +110,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
container_of(itr, struct intel_bts_recording, itr);
struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
struct evsel *evsel, *intel_bts_evsel = NULL;
- const struct perf_cpu_map *cpus = evlist->core.cpus;
+ const struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
bool privileged = perf_event_paranoid_check(-1);
if (opts->auxtrace_sample_mode) {
@@ -129,6 +129,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
}
evsel->core.attr.freq = 0;
evsel->core.attr.sample_period = 1;
+ evsel->needs_auxtrace_mmap = true;
intel_bts_evsel = evsel;
opts->full_auxtrace = true;
}
@@ -142,7 +143,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
if (!opts->full_auxtrace)
return 0;
- if (opts->full_auxtrace && !perf_cpu_map__empty(cpus)) {
+ if (opts->full_auxtrace && !perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n");
return -EINVAL;
}
@@ -218,12 +219,12 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
* To obtain the auxtrace buffer file descriptor, the auxtrace event
* must come first.
*/
- perf_evlist__to_front(evlist, intel_bts_evsel);
+ evlist__to_front(evlist, intel_bts_evsel);
/*
* In the case of per-cpu mmaps, we need the CPU on the
* AUX event.
*/
- if (!perf_cpu_map__empty(cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
evsel__set_sample_bit(intel_bts_evsel, CPU);
}
@@ -232,13 +233,13 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
struct evsel *tracking_evsel;
int err;
- err = parse_events(evlist, "dummy:u", NULL);
+ err = parse_event(evlist, "dummy:u");
if (err)
return err;
tracking_evsel = evlist__last(evlist);
- perf_evlist__set_tracking_event(evlist, tracking_evsel);
+ evlist__set_tracking_event(evlist, tracking_evsel);
tracking_evsel->core.attr.freq = 0;
tracking_evsel->core.attr.sample_period = 1;
@@ -415,7 +416,7 @@ out_err:
struct auxtrace_record *intel_bts_recording_init(int *err)
{
- struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
+ struct perf_pmu *intel_bts_pmu = perf_pmus__find(INTEL_BTS_PMU_NAME);
struct intel_bts_recording *btsr;
if (!intel_bts_pmu)
@@ -433,7 +434,6 @@ struct auxtrace_record *intel_bts_recording_init(int *err)
}
btsr->intel_bts_pmu = intel_bts_pmu;
- btsr->itr.pmu = intel_bts_pmu;
btsr->itr.recording_options = intel_bts_recording_options;
btsr->itr.info_priv_size = intel_bts_info_priv_size;
btsr->itr.info_fill = intel_bts_info_fill;
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 839ef52c1ac2..add33cb5d1da 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -11,6 +11,7 @@
#include <linux/bitops.h>
#include <linux/log2.h>
#include <linux/zalloc.h>
+#include <linux/err.h>
#include <cpuid.h>
#include "../../../util/session.h"
@@ -18,11 +19,12 @@
#include "../../../util/evlist.h"
#include "../../../util/evsel.h"
#include "../../../util/evsel_config.h"
+#include "../../../util/config.h"
#include "../../../util/cpumap.h"
#include "../../../util/mmap.h"
#include <subcmd/parse-options.h>
#include "../../../util/parse-events.h"
-#include "../../../util/pmu.h"
+#include "../../../util/pmus.h"
#include "../../../util/debug.h"
#include "../../../util/auxtrace.h"
#include "../../../util/perf_api_probe.h"
@@ -31,6 +33,7 @@
#include "../../../util/tsc.h"
#include <internal/lib.h> // page_size
#include "../../../util/intel-pt.h"
+#include <api/fs/fs.h>
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
@@ -50,6 +53,7 @@ struct intel_pt_recording {
struct perf_pmu *intel_pt_pmu;
int have_sched_switch;
struct evlist *evlist;
+ bool all_switch_events;
bool snapshot_mode;
bool snapshot_init_done;
size_t snapshot_size;
@@ -59,43 +63,35 @@ struct intel_pt_recording {
size_t priv_size;
};
-static int intel_pt_parse_terms_with_default(const char *pmu_name,
- struct list_head *formats,
+static int intel_pt_parse_terms_with_default(const struct perf_pmu *pmu,
const char *str,
u64 *config)
{
- struct list_head *terms;
+ struct parse_events_terms terms;
struct perf_event_attr attr = { .size = 0, };
int err;
- terms = malloc(sizeof(struct list_head));
- if (!terms)
- return -ENOMEM;
-
- INIT_LIST_HEAD(terms);
-
- err = parse_events_terms(terms, str);
+ parse_events_terms__init(&terms);
+ err = parse_events_terms(&terms, str, /*input=*/ NULL);
if (err)
goto out_free;
attr.config = *config;
- err = perf_pmu__config_terms(pmu_name, formats, &attr, terms, true,
- NULL);
+ err = perf_pmu__config_terms(pmu, &attr, &terms, /*zero=*/true, /*apply_hardcoded=*/false,
+ /*err=*/NULL);
if (err)
goto out_free;
*config = attr.config;
out_free:
- parse_events_terms__delete(terms);
+ parse_events_terms__exit(&terms);
return err;
}
-static int intel_pt_parse_terms(const char *pmu_name, struct list_head *formats,
- const char *str, u64 *config)
+static int intel_pt_parse_terms(const struct perf_pmu *pmu, const char *str, u64 *config)
{
*config = 0;
- return intel_pt_parse_terms_with_default(pmu_name, formats, str,
- config);
+ return intel_pt_parse_terms_with_default(pmu, str, config);
}
static u64 intel_pt_masked_bits(u64 mask, u64 bits)
@@ -125,7 +121,7 @@ static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str,
*res = 0;
- mask = perf_pmu__format_bits(&intel_pt_pmu->format, str);
+ mask = perf_pmu__format_bits(intel_pt_pmu, str);
if (!mask)
return -EINVAL;
@@ -185,7 +181,7 @@ static int intel_pt_pick_bit(int bits, int target)
return pick;
}
-static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
+static u64 intel_pt_default_config(const struct perf_pmu *intel_pt_pmu)
{
char buf[256];
int mtc, mtc_periods = 0, mtc_period;
@@ -193,16 +189,19 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
int pos = 0;
u64 config;
char c;
+ int dirfd;
+
+ dirfd = perf_pmu__event_source_devices_fd();
pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc");
- if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d",
- &mtc) != 1)
+ if (perf_pmu__scan_file_at(intel_pt_pmu, dirfd, "caps/mtc", "%d",
+ &mtc) != 1)
mtc = 1;
if (mtc) {
- if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x",
- &mtc_periods) != 1)
+ if (perf_pmu__scan_file_at(intel_pt_pmu, dirfd, "caps/mtc_periods", "%x",
+ &mtc_periods) != 1)
mtc_periods = 0;
if (mtc_periods) {
mtc_period = intel_pt_pick_bit(mtc_periods, 3);
@@ -211,13 +210,13 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
}
}
- if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d",
- &psb_cyc) != 1)
+ if (perf_pmu__scan_file_at(intel_pt_pmu, dirfd, "caps/psb_cyc", "%d",
+ &psb_cyc) != 1)
psb_cyc = 1;
if (psb_cyc && mtc_periods) {
- if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x",
- &psb_periods) != 1)
+ if (perf_pmu__scan_file_at(intel_pt_pmu, dirfd, "caps/psb_periods", "%x",
+ &psb_periods) != 1)
psb_periods = 0;
if (psb_periods) {
psb_period = intel_pt_pick_bit(psb_periods, 3);
@@ -226,15 +225,15 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
}
}
- if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 &&
- perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1)
+ if (perf_pmu__scan_file_at(intel_pt_pmu, dirfd, "format/pt", "%c", &c) == 1 &&
+ perf_pmu__scan_file_at(intel_pt_pmu, dirfd, "format/branch", "%c", &c) == 1)
pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch");
pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
- intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, buf,
- &config);
+ intel_pt_parse_terms(intel_pt_pmu, buf, &config);
+ close(dirfd);
return config;
}
@@ -261,20 +260,17 @@ static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr,
return 0;
}
-struct perf_event_attr *
-intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
+void intel_pt_pmu_default_config(const struct perf_pmu *intel_pt_pmu,
+ struct perf_event_attr *attr)
{
- struct perf_event_attr *attr;
-
- attr = zalloc(sizeof(struct perf_event_attr));
- if (!attr)
- return NULL;
+ static u64 config;
+ static bool initialized;
- attr->config = intel_pt_default_config(intel_pt_pmu);
-
- intel_pt_pmu->selectable = true;
-
- return attr;
+ if (!initialized) {
+ config = intel_pt_default_config(intel_pt_pmu);
+ initialized = true;
+ }
+ attr->config = config;
}
static const char *intel_pt_find_filter(struct evlist *evlist,
@@ -306,6 +302,7 @@ intel_pt_info_priv_size(struct auxtrace_record *itr, struct evlist *evlist)
ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) +
intel_pt_filter_bytes(filter);
+ ptr->priv_size += sizeof(u64); /* Cap Event Trace */
return ptr->priv_size;
}
@@ -335,28 +332,27 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
unsigned long max_non_turbo_ratio;
size_t filter_str_len;
const char *filter;
+ int event_trace;
__u64 *info;
int err;
if (priv_size != ptr->priv_size)
return -EINVAL;
- intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
- "tsc", &tsc_bit);
- intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
- "noretcomp", &noretcomp_bit);
- intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
- "mtc", &mtc_bit);
- mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format,
- "mtc_period");
- intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
- "cyc", &cyc_bit);
+ intel_pt_parse_terms(intel_pt_pmu, "tsc", &tsc_bit);
+ intel_pt_parse_terms(intel_pt_pmu, "noretcomp", &noretcomp_bit);
+ intel_pt_parse_terms(intel_pt_pmu, "mtc", &mtc_bit);
+ mtc_freq_bits = perf_pmu__format_bits(intel_pt_pmu, "mtc_period");
+ intel_pt_parse_terms(intel_pt_pmu, "cyc", &cyc_bit);
intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);
if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio",
"%lu", &max_non_turbo_ratio) != 1)
max_non_turbo_ratio = 0;
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/event_trace",
+ "%d", &event_trace) != 1)
+ event_trace = 0;
filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu);
filter_str_len = filter ? strlen(filter) : 0;
@@ -377,7 +373,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
ui__warning("Intel Processor Trace: TSC not available\n");
}
- per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.cpus);
+ per_cpu_mmaps = !perf_cpu_map__is_any_cpu_or_is_empty(session->evlist->core.user_requested_cpus);
auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
@@ -407,36 +403,44 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
info += len >> 3;
}
+ *info++ = event_trace;
+
return 0;
}
+#ifdef HAVE_LIBTRACEEVENT
static int intel_pt_track_switches(struct evlist *evlist)
{
const char *sched_switch = "sched:sched_switch";
struct evsel *evsel;
int err;
- if (!perf_evlist__can_select_event(evlist, sched_switch))
+ if (!evlist__can_select_event(evlist, sched_switch))
return -EPERM;
- err = parse_events(evlist, sched_switch, NULL);
- if (err) {
- pr_debug2("%s: failed to parse %s, error %d\n",
+ evsel = evlist__add_sched_switch(evlist, true);
+ if (IS_ERR(evsel)) {
+ err = PTR_ERR(evsel);
+ pr_debug2("%s: failed to create %s, error = %d\n",
__func__, sched_switch, err);
return err;
}
- evsel = evlist__last(evlist);
-
- evsel__set_sample_bit(evsel, CPU);
- evsel__set_sample_bit(evsel, TIME);
-
- evsel->core.system_wide = true;
- evsel->no_aux_samples = true;
evsel->immediate = true;
return 0;
}
+#endif
+
+static bool intel_pt_exclude_guest(void)
+{
+ int pt_mode;
+
+ if (sysfs__read_int("module/kvm_intel/parameters/pt_mode", &pt_mode))
+ pt_mode = 0;
+
+ return pt_mode == 1;
+}
static void intel_pt_valid_str(char *str, size_t len, u64 valid)
{
@@ -484,7 +488,7 @@ static void intel_pt_valid_str(char *str, size_t len, u64 valid)
}
}
-static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu,
+static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu, int dirfd,
const char *caps, const char *name,
const char *supported, u64 config)
{
@@ -494,16 +498,16 @@ static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu,
u64 bits;
int ok;
- if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1)
+ if (perf_pmu__scan_file_at(intel_pt_pmu, dirfd, caps, "%llx", &valid) != 1)
valid = 0;
if (supported &&
- perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok)
+ perf_pmu__scan_file_at(intel_pt_pmu, dirfd, supported, "%d", &ok) == 1 && !ok)
valid = 0;
valid |= 1;
- bits = perf_pmu__format_bits(&intel_pt_pmu->format, name);
+ bits = perf_pmu__format_bits(intel_pt_pmu, name);
config &= bits;
@@ -527,56 +531,45 @@ out_err:
static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu,
struct evsel *evsel)
{
- int err;
+ int err, dirfd;
char c;
if (!evsel)
return 0;
+ dirfd = perf_pmu__event_source_devices_fd();
+ if (dirfd < 0)
+ return dirfd;
+
/*
* If supported, force pass-through config term (pt=1) even if user
* sets pt=0, which avoids senseless kernel errors.
*/
- if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 &&
+ if (perf_pmu__scan_file_at(intel_pt_pmu, dirfd, "format/pt", "%c", &c) == 1 &&
!(evsel->core.attr.config & 1)) {
pr_warning("pt=0 doesn't make sense, forcing pt=1\n");
evsel->core.attr.config |= 1;
}
- err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds",
+ err = intel_pt_val_config_term(intel_pt_pmu, dirfd, "caps/cycle_thresholds",
"cyc_thresh", "caps/psb_cyc",
evsel->core.attr.config);
if (err)
- return err;
+ goto out;
- err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods",
+ err = intel_pt_val_config_term(intel_pt_pmu, dirfd, "caps/mtc_periods",
"mtc_period", "caps/mtc",
evsel->core.attr.config);
if (err)
- return err;
+ goto out;
- return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods",
+ err = intel_pt_val_config_term(intel_pt_pmu, dirfd, "caps/psb_periods",
"psb_period", "caps/psb_cyc",
evsel->core.attr.config);
-}
-static void intel_pt_config_sample_mode(struct perf_pmu *intel_pt_pmu,
- struct evsel *evsel)
-{
- u64 user_bits = 0, bits;
- struct evsel_config_term *term = evsel__get_config_term(evsel, CFG_CHG);
-
- if (term)
- user_bits = term->val.cfg_chg;
-
- bits = perf_pmu__format_bits(&intel_pt_pmu->format, "psb_period");
-
- /* Did user change psb_period */
- if (bits & user_bits)
- return;
-
- /* Set psb_period to 0 */
- evsel->core.attr.config &= ~bits;
+out:
+ close(dirfd);
+ return err;
}
static void intel_pt_min_max_sample_sz(struct evlist *evlist,
@@ -625,7 +618,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
bool have_timing_info, need_immediate = false;
struct evsel *evsel, *intel_pt_evsel = NULL;
- const struct perf_cpu_map *cpus = evlist->core.cpus;
+ const struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
bool privileged = perf_event_paranoid_check(-1);
u64 tsc_bit;
int err;
@@ -641,6 +634,9 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
}
evsel->core.attr.freq = 0;
evsel->core.attr.sample_period = 1;
+ evsel->core.attr.exclude_guest = intel_pt_exclude_guest();
+ evsel->no_aux_samples = true;
+ evsel->needs_auxtrace_mmap = true;
intel_pt_evsel = evsel;
opts->full_auxtrace = true;
}
@@ -668,7 +664,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
return 0;
if (opts->auxtrace_sample_mode)
- intel_pt_config_sample_mode(intel_pt_pmu, intel_pt_evsel);
+ evsel__set_config_if_unset(intel_pt_pmu, intel_pt_evsel,
+ "psb_period", 0);
err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel);
if (err)
@@ -775,8 +772,14 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
}
}
- intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
- "tsc", &tsc_bit);
+ if (!opts->auxtrace_snapshot_mode && !opts->auxtrace_sample_mode) {
+ size_t aw = opts->auxtrace_mmap_pages * (size_t)page_size / 4;
+ u32 aux_watermark = aw > UINT_MAX ? UINT_MAX : aw;
+
+ intel_pt_evsel->core.attr.aux_watermark = aux_watermark;
+ }
+
+ intel_pt_parse_terms(intel_pt_pmu, "tsc", &tsc_bit);
if (opts->full_auxtrace && (intel_pt_evsel->core.attr.config & tsc_bit))
have_timing_info = true;
@@ -787,27 +790,20 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* Per-cpu recording needs sched_switch events to distinguish different
* threads.
*/
- if (have_timing_info && !perf_cpu_map__empty(cpus) &&
+ if (have_timing_info && !perf_cpu_map__is_any_cpu_or_is_empty(cpus) &&
!record_opts__no_switch_events(opts)) {
if (perf_can_record_switch_events()) {
bool cpu_wide = !target__none(&opts->target) &&
!target__has_task(&opts->target);
- if (!cpu_wide && perf_can_record_cpu_wide()) {
+ if (ptr->all_switch_events && !cpu_wide && perf_can_record_cpu_wide()) {
struct evsel *switch_evsel;
- err = parse_events(evlist, "dummy:u", NULL);
- if (err)
- return err;
-
- switch_evsel = evlist__last(evlist);
+ switch_evsel = evlist__add_dummy_on_all_cpus(evlist);
+ if (!switch_evsel)
+ return -ENOMEM;
- switch_evsel->core.attr.freq = 0;
- switch_evsel->core.attr.sample_period = 1;
switch_evsel->core.attr.context_switch = 1;
-
- switch_evsel->core.system_wide = true;
- switch_evsel->no_aux_samples = true;
switch_evsel->immediate = true;
evsel__set_sample_bit(switch_evsel, TID);
@@ -826,6 +822,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
ptr->have_sched_switch = 2;
}
} else {
+#ifdef HAVE_LIBTRACEEVENT
err = intel_pt_track_switches(evlist);
if (err == -EPERM)
pr_debug2("Unable to select sched:sched_switch\n");
@@ -833,44 +830,51 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
return err;
else
ptr->have_sched_switch = 1;
+#endif
}
}
+ if (have_timing_info && !intel_pt_evsel->core.attr.exclude_kernel &&
+ perf_can_record_text_poke_events() && perf_can_record_cpu_wide())
+ opts->text_poke = true;
+
if (intel_pt_evsel) {
/*
* To obtain the auxtrace buffer file descriptor, the auxtrace
* event must come first.
*/
- perf_evlist__to_front(evlist, intel_pt_evsel);
+ evlist__to_front(evlist, intel_pt_evsel);
/*
* In the case of per-cpu mmaps, we need the CPU on the
* AUX event.
*/
- if (!perf_cpu_map__empty(cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
evsel__set_sample_bit(intel_pt_evsel, CPU);
}
/* Add dummy event to keep tracking */
if (opts->full_auxtrace) {
+ bool need_system_wide_tracking;
struct evsel *tracking_evsel;
- err = parse_events(evlist, "dummy:u", NULL);
- if (err)
- return err;
-
- tracking_evsel = evlist__last(evlist);
+ /*
+ * User space tasks can migrate between CPUs, so when tracing
+ * selected CPUs, sideband for all CPUs is still needed.
+ */
+ need_system_wide_tracking = opts->target.cpu_list &&
+ !intel_pt_evsel->core.attr.exclude_user;
- perf_evlist__set_tracking_event(evlist, tracking_evsel);
+ tracking_evsel = evlist__add_aux_dummy(evlist, need_system_wide_tracking);
+ if (!tracking_evsel)
+ return -ENOMEM;
- tracking_evsel->core.attr.freq = 0;
- tracking_evsel->core.attr.sample_period = 1;
+ evlist__set_tracking_event(evlist, tracking_evsel);
- tracking_evsel->no_aux_samples = true;
if (need_immediate)
tracking_evsel->immediate = true;
/* In per-cpu case, always need the time of mmap events etc */
- if (!perf_cpu_map__empty(cpus)) {
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
evsel__set_sample_bit(tracking_evsel, TIME);
/* And the CPU for switch events */
evsel__set_sample_bit(tracking_evsel, CPU);
@@ -882,7 +886,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* Warn the user when we do not have enough information to decode i.e.
* per-cpu with no sched_switch (except workload-only).
*/
- if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) &&
+ if (!ptr->have_sched_switch && !perf_cpu_map__is_any_cpu_or_is_empty(cpus) &&
!target__none(&opts->target) &&
!intel_pt_evsel->core.attr.exclude_user)
ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
@@ -1176,9 +1180,19 @@ static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
return rdtsc();
}
+static int intel_pt_perf_config(const char *var, const char *value, void *data)
+{
+ struct intel_pt_recording *ptr = data;
+
+ if (!strcmp(var, "intel-pt.all-switch-events"))
+ ptr->all_switch_events = perf_config_bool(var, value);
+
+ return 0;
+}
+
struct auxtrace_record *intel_pt_recording_init(int *err)
{
- struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
+ struct perf_pmu *intel_pt_pmu = perf_pmus__find(INTEL_PT_PMU_NAME);
struct intel_pt_recording *ptr;
if (!intel_pt_pmu)
@@ -1195,8 +1209,9 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
return NULL;
}
+ perf_config(intel_pt_perf_config, ptr);
+
ptr->intel_pt_pmu = intel_pt_pmu;
- ptr->itr.pmu = intel_pt_pmu;
ptr->itr.recording_options = intel_pt_recording_options;
ptr->itr.info_priv_size = intel_pt_info_priv_size;
ptr->itr.info_fill = intel_pt_info_fill;
diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c
new file mode 100644
index 000000000000..7442a2cd87ed
--- /dev/null
+++ b/tools/perf/arch/x86/util/iostat.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * perf iostat
+ *
+ * Copyright (C) 2020, Intel Corporation
+ *
+ * Authors: Alexander Antonov <alexander.antonov@linux.intel.com>
+ */
+
+#include <api/fs/fs.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/zalloc.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <regex.h>
+#include "util/cpumap.h"
+#include "util/debug.h"
+#include "util/iostat.h"
+#include "util/counts.h"
+#include "path.h"
+
+#ifndef MAX_PATH
+#define MAX_PATH 1024
+#endif
+
+#define UNCORE_IIO_PMU_PATH "bus/event_source/devices/uncore_iio_%d"
+#define SYSFS_UNCORE_PMU_PATH "%s/"UNCORE_IIO_PMU_PATH
+#define PLATFORM_MAPPING_PATH UNCORE_IIO_PMU_PATH"/die%d"
+
+/*
+ * Each metric requiries one IIO event which increments at every 4B transfer
+ * in corresponding direction. The formulas to compute metrics are generic:
+ * #EventCount * 4B / (1024 * 1024)
+ */
+static const char * const iostat_metrics[] = {
+ "Inbound Read(MB)",
+ "Inbound Write(MB)",
+ "Outbound Read(MB)",
+ "Outbound Write(MB)",
+};
+
+static inline int iostat_metrics_count(void)
+{
+ return sizeof(iostat_metrics) / sizeof(char *);
+}
+
+static const char *iostat_metric_by_idx(int idx)
+{
+ return *(iostat_metrics + idx % iostat_metrics_count());
+}
+
+struct iio_root_port {
+ u32 domain;
+ u8 bus;
+ u8 die;
+ u8 pmu_idx;
+ int idx;
+};
+
+struct iio_root_ports_list {
+ struct iio_root_port **rps;
+ int nr_entries;
+};
+
+static struct iio_root_ports_list *root_ports;
+
+static void iio_root_port_show(FILE *output,
+ const struct iio_root_port * const rp)
+{
+ if (output && rp)
+ fprintf(output, "S%d-uncore_iio_%d<%04x:%02x>\n",
+ rp->die, rp->pmu_idx, rp->domain, rp->bus);
+}
+
+static struct iio_root_port *iio_root_port_new(u32 domain, u8 bus,
+ u8 die, u8 pmu_idx)
+{
+ struct iio_root_port *p = calloc(1, sizeof(*p));
+
+ if (p) {
+ p->domain = domain;
+ p->bus = bus;
+ p->die = die;
+ p->pmu_idx = pmu_idx;
+ }
+ return p;
+}
+
+static void iio_root_ports_list_free(struct iio_root_ports_list *list)
+{
+ int idx;
+
+ if (list) {
+ for (idx = 0; idx < list->nr_entries; idx++)
+ zfree(&list->rps[idx]);
+ zfree(&list->rps);
+ free(list);
+ }
+}
+
+static struct iio_root_port *iio_root_port_find_by_notation(
+ const struct iio_root_ports_list * const list, u32 domain, u8 bus)
+{
+ int idx;
+ struct iio_root_port *rp;
+
+ if (list) {
+ for (idx = 0; idx < list->nr_entries; idx++) {
+ rp = list->rps[idx];
+ if (rp && rp->domain == domain && rp->bus == bus)
+ return rp;
+ }
+ }
+ return NULL;
+}
+
+static int iio_root_ports_list_insert(struct iio_root_ports_list *list,
+ struct iio_root_port * const rp)
+{
+ struct iio_root_port **tmp_buf;
+
+ if (list && rp) {
+ rp->idx = list->nr_entries++;
+ tmp_buf = realloc(list->rps,
+ list->nr_entries * sizeof(*list->rps));
+ if (!tmp_buf) {
+ pr_err("Failed to realloc memory\n");
+ return -ENOMEM;
+ }
+ tmp_buf[rp->idx] = rp;
+ list->rps = tmp_buf;
+ }
+ return 0;
+}
+
+static int iio_mapping(u8 pmu_idx, struct iio_root_ports_list * const list)
+{
+ char *buf;
+ char path[MAX_PATH];
+ u32 domain;
+ u8 bus;
+ struct iio_root_port *rp;
+ size_t size;
+ int ret;
+
+ for (int die = 0; die < cpu__max_node(); die++) {
+ scnprintf(path, MAX_PATH, PLATFORM_MAPPING_PATH, pmu_idx, die);
+ if (sysfs__read_str(path, &buf, &size) < 0) {
+ if (pmu_idx)
+ goto out;
+ pr_err("Mode iostat is not supported\n");
+ return -1;
+ }
+ ret = sscanf(buf, "%04x:%02hhx", &domain, &bus);
+ free(buf);
+ if (ret != 2) {
+ pr_err("Invalid mapping data: iio_%d; die%d\n",
+ pmu_idx, die);
+ return -1;
+ }
+ rp = iio_root_port_new(domain, bus, die, pmu_idx);
+ if (!rp || iio_root_ports_list_insert(list, rp)) {
+ free(rp);
+ return -ENOMEM;
+ }
+ }
+out:
+ return 0;
+}
+
+static u8 iio_pmu_count(void)
+{
+ u8 pmu_idx = 0;
+ char path[MAX_PATH];
+ const char *sysfs = sysfs__mountpoint();
+
+ if (sysfs) {
+ for (;; pmu_idx++) {
+ snprintf(path, sizeof(path), SYSFS_UNCORE_PMU_PATH,
+ sysfs, pmu_idx);
+ if (access(path, F_OK) != 0)
+ break;
+ }
+ }
+ return pmu_idx;
+}
+
+static int iio_root_ports_scan(struct iio_root_ports_list **list)
+{
+ int ret = -ENOMEM;
+ struct iio_root_ports_list *tmp_list;
+ u8 pmu_count = iio_pmu_count();
+
+ if (!pmu_count) {
+ pr_err("Unsupported uncore pmu configuration\n");
+ return -1;
+ }
+
+ tmp_list = calloc(1, sizeof(*tmp_list));
+ if (!tmp_list)
+ goto err;
+
+ for (u8 pmu_idx = 0; pmu_idx < pmu_count; pmu_idx++) {
+ ret = iio_mapping(pmu_idx, tmp_list);
+ if (ret)
+ break;
+ }
+err:
+ if (!ret)
+ *list = tmp_list;
+ else
+ iio_root_ports_list_free(tmp_list);
+
+ return ret;
+}
+
+static int iio_root_port_parse_str(u32 *domain, u8 *bus, char *str)
+{
+ int ret;
+ regex_t regex;
+ /*
+ * Expected format domain:bus:
+ * Valid domain range [0:ffff]
+ * Valid bus range [0:ff]
+ * Example: 0000:af, 0:3d, 01:7
+ */
+ regcomp(&regex, "^([a-f0-9A-F]{1,}):([a-f0-9A-F]{1,2})", REG_EXTENDED);
+ ret = regexec(&regex, str, 0, NULL, 0);
+ if (ret || sscanf(str, "%08x:%02hhx", domain, bus) != 2)
+ pr_warning("Unrecognized root port format: %s\n"
+ "Please use the following format:\n"
+ "\t [domain]:[bus]\n"
+ "\t for example: 0000:3d\n", str);
+
+ regfree(&regex);
+ return ret;
+}
+
+static int iio_root_ports_list_filter(struct iio_root_ports_list **list,
+ const char *filter)
+{
+ char *tok, *tmp, *filter_copy = NULL;
+ struct iio_root_port *rp;
+ u32 domain;
+ u8 bus;
+ int ret = -ENOMEM;
+ struct iio_root_ports_list *tmp_list = calloc(1, sizeof(*tmp_list));
+
+ if (!tmp_list)
+ goto err;
+
+ filter_copy = strdup(filter);
+ if (!filter_copy)
+ goto err;
+
+ for (tok = strtok_r(filter_copy, ",", &tmp); tok;
+ tok = strtok_r(NULL, ",", &tmp)) {
+ if (!iio_root_port_parse_str(&domain, &bus, tok)) {
+ rp = iio_root_port_find_by_notation(*list, domain, bus);
+ if (rp) {
+ (*list)->rps[rp->idx] = NULL;
+ ret = iio_root_ports_list_insert(tmp_list, rp);
+ if (ret) {
+ free(rp);
+ goto err;
+ }
+ } else if (!iio_root_port_find_by_notation(tmp_list,
+ domain, bus))
+ pr_warning("Root port %04x:%02x were not found\n",
+ domain, bus);
+ }
+ }
+
+ if (tmp_list->nr_entries == 0) {
+ pr_err("Requested root ports were not found\n");
+ ret = -EINVAL;
+ }
+err:
+ iio_root_ports_list_free(*list);
+ if (ret)
+ iio_root_ports_list_free(tmp_list);
+ else
+ *list = tmp_list;
+
+ free(filter_copy);
+ return ret;
+}
+
+static int iostat_event_group(struct evlist *evl,
+ struct iio_root_ports_list *list)
+{
+ int ret;
+ int idx;
+ const char *iostat_cmd_template =
+ "{uncore_iio_%x/event=0x83,umask=0x04,ch_mask=0xF,fc_mask=0x07/,\
+ uncore_iio_%x/event=0x83,umask=0x01,ch_mask=0xF,fc_mask=0x07/,\
+ uncore_iio_%x/event=0xc0,umask=0x04,ch_mask=0xF,fc_mask=0x07/,\
+ uncore_iio_%x/event=0xc0,umask=0x01,ch_mask=0xF,fc_mask=0x07/}";
+ const int len_template = strlen(iostat_cmd_template) + 1;
+ struct evsel *evsel = NULL;
+ int metrics_count = iostat_metrics_count();
+ char *iostat_cmd = calloc(len_template, 1);
+
+ if (!iostat_cmd)
+ return -ENOMEM;
+
+ for (idx = 0; idx < list->nr_entries; idx++) {
+ sprintf(iostat_cmd, iostat_cmd_template,
+ list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx,
+ list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx);
+ ret = parse_event(evl, iostat_cmd);
+ if (ret)
+ goto err;
+ }
+
+ evlist__for_each_entry(evl, evsel) {
+ evsel->priv = list->rps[evsel->core.idx / metrics_count];
+ }
+ list->nr_entries = 0;
+err:
+ iio_root_ports_list_free(list);
+ free(iostat_cmd);
+ return ret;
+}
+
+int iostat_prepare(struct evlist *evlist, struct perf_stat_config *config)
+{
+ if (evlist->core.nr_entries > 0) {
+ pr_warning("The -e and -M options are not supported."
+ "All chosen events/metrics will be dropped\n");
+ evlist__delete(evlist);
+ evlist = evlist__new();
+ if (!evlist)
+ return -ENOMEM;
+ }
+
+ config->metric_only = true;
+ config->aggr_mode = AGGR_GLOBAL;
+
+ return iostat_event_group(evlist, root_ports);
+}
+
+int iostat_parse(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ int ret;
+ struct perf_stat_config *config = (struct perf_stat_config *)opt->data;
+
+ ret = iio_root_ports_scan(&root_ports);
+ if (!ret) {
+ config->iostat_run = true;
+ if (!str)
+ iostat_mode = IOSTAT_RUN;
+ else if (!strcmp(str, "list"))
+ iostat_mode = IOSTAT_LIST;
+ else {
+ iostat_mode = IOSTAT_RUN;
+ ret = iio_root_ports_list_filter(&root_ports, str);
+ }
+ }
+ return ret;
+}
+
+void iostat_list(struct evlist *evlist, struct perf_stat_config *config)
+{
+ struct evsel *evsel;
+ struct iio_root_port *rp = NULL;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (rp != evsel->priv) {
+ rp = evsel->priv;
+ iio_root_port_show(config->output, rp);
+ }
+ }
+}
+
+void iostat_release(struct evlist *evlist)
+{
+ struct evsel *evsel;
+ struct iio_root_port *rp = NULL;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (rp != evsel->priv) {
+ rp = evsel->priv;
+ zfree(&evsel->priv);
+ }
+ }
+}
+
+void iostat_prefix(struct evlist *evlist,
+ struct perf_stat_config *config,
+ char *prefix, struct timespec *ts)
+{
+ struct iio_root_port *rp = evlist->selected->priv;
+
+ if (rp) {
+ /*
+ * TODO: This is the incorrect format in JSON mode.
+ * See prepare_timestamp()
+ */
+ if (ts)
+ sprintf(prefix, "%6lu.%09lu%s%04x:%02x%s",
+ ts->tv_sec, ts->tv_nsec,
+ config->csv_sep, rp->domain, rp->bus,
+ config->csv_sep);
+ else
+ sprintf(prefix, "%04x:%02x%s", rp->domain, rp->bus,
+ config->csv_sep);
+ }
+}
+
+void iostat_print_header_prefix(struct perf_stat_config *config)
+{
+ if (config->csv_output)
+ fputs("port,", config->output);
+ else if (config->interval)
+ fprintf(config->output, "# time port ");
+ else
+ fprintf(config->output, " port ");
+}
+
+void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel,
+ struct perf_stat_output_ctx *out)
+{
+ double iostat_value = 0;
+ u64 prev_count_val = 0;
+ const char *iostat_metric = iostat_metric_by_idx(evsel->core.idx);
+ u8 die = ((struct iio_root_port *)evsel->priv)->die;
+ struct perf_counts_values *count = perf_counts(evsel->counts, die, 0);
+
+ if (count && count->run && count->ena) {
+ if (evsel->prev_raw_counts && !out->force_header) {
+ struct perf_counts_values *prev_count =
+ perf_counts(evsel->prev_raw_counts, die, 0);
+
+ prev_count_val = prev_count->val;
+ prev_count->val = count->val;
+ }
+ iostat_value = (count->val - prev_count_val) /
+ ((double) count->run / count->ena);
+ }
+ out->print_metric(config, out->ctx, METRIC_THRESHOLD_UNKNOWN, "%8.0f", iostat_metric,
+ iostat_value / (256 * 1024));
+}
+
+void iostat_print_counters(struct evlist *evlist,
+ struct perf_stat_config *config, struct timespec *ts,
+ char *prefix, iostat_print_counter_t print_cnt_cb, void *arg)
+{
+ void *perf_device = NULL;
+ struct evsel *counter = evlist__first(evlist);
+
+ evlist__set_selected(evlist, counter);
+ iostat_prefix(evlist, config, prefix, ts);
+ fprintf(config->output, "%s", prefix);
+ evlist__for_each_entry(evlist, counter) {
+ perf_device = evlist->selected->priv;
+ if (perf_device && perf_device != counter->priv) {
+ evlist__set_selected(evlist, counter);
+ iostat_prefix(evlist, config, prefix, ts);
+ fprintf(config->output, "\n%s", prefix);
+ }
+ print_cnt_cb(config, counter, arg);
+ }
+ fputc('\n', config->output);
+}
diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c
index 072920475b65..424716518b75 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -18,7 +18,6 @@ static struct kvm_events_ops exit_events = {
};
const char *vcpu_id_str = "vcpu_id";
-const int decode_str_len = 20;
const char *kvm_exit_reason = "exit_reason";
const char *kvm_entry_trace = "kvm:kvm_entry";
const char *kvm_exit_trace = "kvm:kvm_exit";
@@ -47,7 +46,7 @@ static bool mmio_event_begin(struct evsel *evsel,
return true;
/* MMIO write begin event in kernel. */
- if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
+ if (evsel__name_is(evsel, "kvm:kvm_mmio") &&
evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) {
mmio_event_get_key(evsel, sample, key);
return true;
@@ -64,7 +63,7 @@ static bool mmio_event_end(struct evsel *evsel, struct perf_sample *sample,
return true;
/* MMIO read end event in kernel.*/
- if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
+ if (evsel__name_is(evsel, "kvm:kvm_mmio") &&
evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) {
mmio_event_get_key(evsel, sample, key);
return true;
@@ -77,7 +76,7 @@ static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
struct event_key *key,
char *decode)
{
- scnprintf(decode, decode_str_len, "%#lx:%s",
+ scnprintf(decode, KVM_EVENT_NAME_LEN, "%#lx:%s",
(unsigned long)key->key,
key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R");
}
@@ -102,7 +101,7 @@ static bool ioport_event_begin(struct evsel *evsel,
struct perf_sample *sample,
struct event_key *key)
{
- if (!strcmp(evsel->name, "kvm:kvm_pio")) {
+ if (evsel__name_is(evsel, "kvm:kvm_pio")) {
ioport_event_get_key(evsel, sample, key);
return true;
}
@@ -121,7 +120,7 @@ static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
struct event_key *key,
char *decode)
{
- scnprintf(decode, decode_str_len, "%#llx:%s",
+ scnprintf(decode, KVM_EVENT_NAME_LEN, "%#llx:%s",
(unsigned long long)key->key,
key->info ? "POUT" : "PIN");
}
@@ -133,11 +132,56 @@ static struct kvm_events_ops ioport_events = {
.name = "IO Port Access"
};
+ /* The time of emulation msr is from kvm_msr to kvm_entry. */
+static void msr_event_get_key(struct evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key)
+{
+ key->key = evsel__intval(evsel, sample, "ecx");
+ key->info = evsel__intval(evsel, sample, "write");
+}
+
+static bool msr_event_begin(struct evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key)
+{
+ if (evsel__name_is(evsel, "kvm:kvm_msr")) {
+ msr_event_get_key(evsel, sample, key);
+ return true;
+ }
+
+ return false;
+}
+
+static bool msr_event_end(struct evsel *evsel,
+ struct perf_sample *sample __maybe_unused,
+ struct event_key *key __maybe_unused)
+{
+ return kvm_entry_event(evsel);
+}
+
+static void msr_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
+ struct event_key *key,
+ char *decode)
+{
+ scnprintf(decode, KVM_EVENT_NAME_LEN, "%#llx:%s",
+ (unsigned long long)key->key,
+ key->info ? "W" : "R");
+}
+
+static struct kvm_events_ops msr_events = {
+ .is_begin_event = msr_event_begin,
+ .is_end_event = msr_event_end,
+ .decode_key = msr_event_decode_key,
+ .name = "MSR Access"
+};
+
const char *kvm_events_tp[] = {
"kvm:kvm_entry",
"kvm:kvm_exit",
"kvm:kvm_mmio",
"kvm:kvm_pio",
+ "kvm:kvm_msr",
NULL,
};
@@ -145,6 +189,7 @@ struct kvm_reg_events_ops kvm_reg_events_ops[] = {
{ .name = "vmexit", .ops = &exit_events },
{ .name = "mmio", .ops = &mmio_events },
{ .name = "ioport", .ops = &ioport_events },
+ { .name = "msr", .ops = &msr_events },
{ NULL, NULL },
};
diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
new file mode 100644
index 000000000000..b38f519020ff
--- /dev/null
+++ b/tools/perf/arch/x86/util/mem-events.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "linux/string.h"
+#include "util/map_symbol.h"
+#include "util/mem-events.h"
+#include "mem-events.h"
+
+
+#define MEM_LOADS_AUX 0x8203
+
+#define E(t, n, s, l, a) { .tag = t, .name = n, .event_name = s, .ldlat = l, .aux_event = a }
+
+struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX] = {
+ E("ldlat-loads", "%s/mem-loads,ldlat=%u/P", "mem-loads", true, 0),
+ E("ldlat-stores", "%s/mem-stores/P", "mem-stores", false, 0),
+ E(NULL, NULL, NULL, false, 0),
+};
+
+struct perf_mem_event perf_mem_events_intel_aux[PERF_MEM_EVENTS__MAX] = {
+ E("ldlat-loads", "{%s/mem-loads-aux/,%s/mem-loads,ldlat=%u/}:P", "mem-loads", true, MEM_LOADS_AUX),
+ E("ldlat-stores", "%s/mem-stores/P", "mem-stores", false, 0),
+ E(NULL, NULL, NULL, false, 0),
+};
+
+struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = {
+ E(NULL, NULL, NULL, false, 0),
+ E(NULL, NULL, NULL, false, 0),
+ E("mem-ldst", "%s//", NULL, false, 0),
+};
+
+struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX] = {
+ E(NULL, NULL, NULL, false, 0),
+ E(NULL, NULL, NULL, false, 0),
+ E("mem-ldst", "%s/ldlat=%u/", NULL, true, 0),
+};
diff --git a/tools/perf/arch/x86/util/mem-events.h b/tools/perf/arch/x86/util/mem-events.h
new file mode 100644
index 000000000000..11e09a256f5b
--- /dev/null
+++ b/tools/perf/arch/x86/util/mem-events.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _X86_MEM_EVENTS_H
+#define _X86_MEM_EVENTS_H
+
+extern struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX];
+extern struct perf_mem_event perf_mem_events_intel_aux[PERF_MEM_EVENTS__MAX];
+
+extern struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX];
+extern struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX];
+
+#endif /* _X86_MEM_EVENTS_H */
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index fca81b39b09f..12fd93f04802 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -5,12 +5,15 @@
#include <linux/kernel.h>
#include <linux/zalloc.h>
+#include "perf_regs.h"
#include "../../../perf-sys.h"
#include "../../../util/perf_regs.h"
#include "../../../util/debug.h"
#include "../../../util/event.h"
+#include "../../../util/pmu.h"
+#include "../../../util/pmus.h"
-const struct sample_reg sample_reg_masks[] = {
+static const struct sample_reg sample_reg_masks[] = {
SMPL_REG(AX, PERF_REG_X86_AX),
SMPL_REG(BX, PERF_REG_X86_BX),
SMPL_REG(CX, PERF_REG_X86_CX),
@@ -165,7 +168,7 @@ static int sdt_init_op_regex(void)
/*
* Max x86 register name length is 5(ex: %r15d). So, 6th char
* should always contain NULL. This helps to find register name
- * length using strlen, insted of maintaing one more variable.
+ * length using strlen, instead of maintaining one more variable.
*/
#define SDT_REG_NAME_SIZE 6
@@ -207,7 +210,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
* and displacement 0 (Both sign and displacement 0 are
* optional so it may be empty). Use one more character
* to hold last NULL so that strlen can be used to find
- * prefix length, instead of maintaing one more variable.
+ * prefix length, instead of maintaining one more variable.
*/
char prefix[3] = {0};
@@ -273,6 +276,11 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
return SDT_ARG_VALID;
}
+const struct sample_reg *arch__sample_reg_masks(void)
+{
+ return sample_reg_masks;
+}
+
uint64_t arch__intr_reg_mask(void)
{
struct perf_event_attr attr = {
@@ -290,6 +298,21 @@ uint64_t arch__intr_reg_mask(void)
*/
attr.sample_period = 1;
+ if (perf_pmus__num_core_pmus() > 1) {
+ struct perf_pmu *pmu = NULL;
+ __u64 type = PERF_TYPE_RAW;
+
+ /*
+ * The same register set is supported among different hybrid PMUs.
+ * Only check the first available one.
+ */
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ type = pmu->type;
+ break;
+ }
+ attr.config |= type << PERF_PMU_TYPE_SHIFT;
+ }
+
event_attr_init(&attr);
fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
@@ -300,3 +323,8 @@ uint64_t arch__intr_reg_mask(void)
return PERF_REGS_MASK;
}
+
+uint64_t arch__user_reg_mask(void)
+{
+ return PERF_REGS_MASK;
+}
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index d48d608517fd..58113482654b 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -1,20 +1,316 @@
// SPDX-License-Identifier: GPL-2.0
#include <string.h>
-
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <fcntl.h>
#include <linux/stddef.h>
#include <linux/perf_event.h>
+#include <linux/zalloc.h>
+#include <api/fs/fs.h>
+#include <api/io_dir.h>
+#include <internal/cpumap.h>
+#include <errno.h>
#include "../../../util/intel-pt.h"
#include "../../../util/intel-bts.h"
#include "../../../util/pmu.h"
+#include "../../../util/fncache.h"
+#include "../../../util/pmus.h"
+#include "mem-events.h"
+#include "util/debug.h"
+#include "util/env.h"
+#include "util/header.h"
+
+static bool x86__is_intel_graniterapids(void)
+{
+ static bool checked_if_graniterapids;
+ static bool is_graniterapids;
+
+ if (!checked_if_graniterapids) {
+ const char *graniterapids_cpuid = "GenuineIntel-6-A[DE]";
+ char *cpuid = get_cpuid_str((struct perf_cpu){0});
+
+ is_graniterapids = cpuid && strcmp_cpuid_str(graniterapids_cpuid, cpuid) == 0;
+ free(cpuid);
+ checked_if_graniterapids = true;
+ }
+ return is_graniterapids;
+}
+
+static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path)
+{
+ struct perf_cpu_map *cpus;
+ char *buf = NULL;
+ size_t buf_len;
+
+ if (sysfs__read_str(sysfs_path, &buf, &buf_len) < 0)
+ return NULL;
+
+ cpus = perf_cpu_map__new(buf);
+ free(buf);
+ return cpus;
+}
+
+static int snc_nodes_per_l3_cache(void)
+{
+ static bool checked_snc;
+ static int snc_nodes;
+
+ if (!checked_snc) {
+ struct perf_cpu_map *node_cpus =
+ read_sysfs_cpu_map("devices/system/node/node0/cpulist");
+ struct perf_cpu_map *cache_cpus =
+ read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list");
+
+ snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus);
+ perf_cpu_map__put(cache_cpus);
+ perf_cpu_map__put(node_cpus);
+ checked_snc = true;
+ }
+ return snc_nodes;
+}
+
+static bool starts_with(const char *str, const char *prefix)
+{
+ return !strncmp(prefix, str, strlen(prefix));
+}
+
+static int num_chas(void)
+{
+ static bool checked_chas;
+ static int num_chas;
+
+ if (!checked_chas) {
+ int fd = perf_pmu__event_source_devices_fd();
+ struct io_dir dir;
+ struct io_dirent64 *dent;
+
+ if (fd < 0)
+ return -1;
+
+ io_dir__init(&dir, fd);
+
+ while ((dent = io_dir__readdir(&dir)) != NULL) {
+ /* Note, dent->d_type will be DT_LNK and so isn't a useful filter. */
+ if (starts_with(dent->d_name, "uncore_cha_"))
+ num_chas++;
+ }
+ close(fd);
+ checked_chas = true;
+ }
+ return num_chas;
+}
+
+#define MAX_SNCS 6
+
+static int uncore_cha_snc(struct perf_pmu *pmu)
+{
+ // CHA SNC numbers are ordered correspond to the CHAs number.
+ unsigned int cha_num;
+ int num_cha, chas_per_node, cha_snc;
+ int snc_nodes = snc_nodes_per_l3_cache();
+
+ if (snc_nodes <= 1)
+ return 0;
-struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
+ num_cha = num_chas();
+ if (num_cha <= 0) {
+ pr_warning("Unexpected: no CHAs found\n");
+ return 0;
+ }
+
+ /* Compute SNC for PMU. */
+ if (sscanf(pmu->name, "uncore_cha_%u", &cha_num) != 1) {
+ pr_warning("Unexpected: unable to compute CHA number '%s'\n", pmu->name);
+ return 0;
+ }
+ chas_per_node = num_cha / snc_nodes;
+ cha_snc = cha_num / chas_per_node;
+
+ /* Range check cha_snc. for unexpected out of bounds. */
+ return cha_snc >= MAX_SNCS ? 0 : cha_snc;
+}
+
+static int uncore_imc_snc(struct perf_pmu *pmu)
+{
+ // Compute the IMC SNC using lookup tables.
+ unsigned int imc_num;
+ int snc_nodes = snc_nodes_per_l3_cache();
+ const u8 snc2_map[] = {1, 1, 0, 0, 1, 1, 0, 0};
+ const u8 snc3_map[] = {1, 1, 0, 0, 2, 2, 1, 1, 0, 0, 2, 2};
+ const u8 *snc_map;
+ size_t snc_map_len;
+
+ switch (snc_nodes) {
+ case 2:
+ snc_map = snc2_map;
+ snc_map_len = ARRAY_SIZE(snc2_map);
+ break;
+ case 3:
+ snc_map = snc3_map;
+ snc_map_len = ARRAY_SIZE(snc3_map);
+ break;
+ default:
+ /* Error or no lookup support for SNC with >3 nodes. */
+ return 0;
+ }
+
+ /* Compute SNC for PMU. */
+ if (sscanf(pmu->name, "uncore_imc_%u", &imc_num) != 1) {
+ pr_warning("Unexpected: unable to compute IMC number '%s'\n", pmu->name);
+ return 0;
+ }
+ if (imc_num >= snc_map_len) {
+ pr_warning("Unexpected IMC %d for SNC%d mapping\n", imc_num, snc_nodes);
+ return 0;
+ }
+ return snc_map[imc_num];
+}
+
+static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
+{
+ static bool checked_cpu_adjust[MAX_SNCS];
+ static int cpu_adjust[MAX_SNCS];
+ struct perf_cpu_map *node_cpus;
+ char node_path[] = "devices/system/node/node0/cpulist";
+
+ /* Was adjust already computed? */
+ if (checked_cpu_adjust[pmu_snc])
+ return cpu_adjust[pmu_snc];
+
+ /* SNC0 doesn't need an adjust. */
+ if (pmu_snc == 0) {
+ cpu_adjust[0] = 0;
+ checked_cpu_adjust[0] = true;
+ return 0;
+ }
+
+ /*
+ * Use NUMA topology to compute first CPU of the NUMA node, we want to
+ * adjust CPU 0 to be this and similarly for other CPUs if there is >1
+ * socket.
+ */
+ assert(pmu_snc >= 0 && pmu_snc <= 9);
+ node_path[24] += pmu_snc; // Shift node0 to be node<pmu_snc>.
+ node_cpus = read_sysfs_cpu_map(node_path);
+ cpu_adjust[pmu_snc] = perf_cpu_map__cpu(node_cpus, 0).cpu;
+ if (cpu_adjust[pmu_snc] < 0) {
+ pr_debug("Failed to read valid CPU list from <sysfs>/%s\n", node_path);
+ cpu_adjust[pmu_snc] = 0;
+ } else {
+ checked_cpu_adjust[pmu_snc] = true;
+ }
+ perf_cpu_map__put(node_cpus);
+ return cpu_adjust[pmu_snc];
+}
+
+static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
{
+ // With sub-NUMA clustering (SNC) there is a NUMA node per SNC in the
+ // topology. For example, a two socket graniterapids machine may be set
+ // up with 3-way SNC meaning there are 6 NUMA nodes that should be
+ // displayed with --per-node. The cpumask of the CHA and IMC PMUs
+ // reflects per-socket information meaning, for example, uncore_cha_60
+ // on a two socket graniterapids machine with 120 cores per socket will
+ // have a cpumask of "0,120". This cpumask needs adjusting to "40,160"
+ // to reflect that uncore_cha_60 is used for the 2nd SNC of each
+ // socket. Without the adjustment events on uncore_cha_60 will appear in
+ // node 0 and node 3 (in our example 2 socket 3-way set up), but with
+ // the adjustment they will appear in node 1 and node 4. The number of
+ // CHAs is typically larger than the number of cores. The CHA numbers
+ // are assumed to split evenly and inorder wrt core numbers. There are
+ // fewer memory IMC PMUs than cores and mapping is handled using lookup
+ // tables.
+ static struct perf_cpu_map *cha_adjusted[MAX_SNCS];
+ static struct perf_cpu_map *imc_adjusted[MAX_SNCS];
+ struct perf_cpu_map **adjusted = cha ? cha_adjusted : imc_adjusted;
+ int idx, pmu_snc, cpu_adjust;
+ struct perf_cpu cpu;
+ bool alloc;
+
+ // Cpus from the kernel holds first CPU of each socket. e.g. 0,120.
+ if (perf_cpu_map__cpu(pmu->cpus, 0).cpu != 0) {
+ pr_debug("Ignoring cpumask adjust for %s as unexpected first CPU\n", pmu->name);
+ return;
+ }
+
+ pmu_snc = cha ? uncore_cha_snc(pmu) : uncore_imc_snc(pmu);
+ if (pmu_snc == 0) {
+ // No adjustment necessary for the first SNC.
+ return;
+ }
+
+ alloc = adjusted[pmu_snc] == NULL;
+ if (alloc) {
+ // Hold onto the perf_cpu_map globally to avoid recomputation.
+ cpu_adjust = uncore_cha_imc_compute_cpu_adjust(pmu_snc);
+ adjusted[pmu_snc] = perf_cpu_map__empty_new(perf_cpu_map__nr(pmu->cpus));
+ if (!adjusted[pmu_snc])
+ return;
+ }
+
+ perf_cpu_map__for_each_cpu(cpu, idx, pmu->cpus) {
+ // Compute the new cpu map values or if not allocating, assert
+ // that they match expectations. asserts will be removed to
+ // avoid overhead in NDEBUG builds.
+ if (alloc) {
+ RC_CHK_ACCESS(adjusted[pmu_snc])->map[idx].cpu = cpu.cpu + cpu_adjust;
+ } else if (idx == 0) {
+ cpu_adjust = perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu - cpu.cpu;
+ assert(uncore_cha_imc_compute_cpu_adjust(pmu_snc) == cpu_adjust);
+ } else {
+ assert(perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu ==
+ cpu.cpu + cpu_adjust);
+ }
+ }
+
+ perf_cpu_map__put(pmu->cpus);
+ pmu->cpus = perf_cpu_map__get(adjusted[pmu_snc]);
+}
+
+void perf_pmu__arch_init(struct perf_pmu *pmu)
+{
+ struct perf_pmu_caps *ldlat_cap;
+
#ifdef HAVE_AUXTRACE_SUPPORT
- if (!strcmp(pmu->name, INTEL_PT_PMU_NAME))
- return intel_pt_pmu_default_config(pmu);
- if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME))
+ if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
+ pmu->auxtrace = true;
+ pmu->selectable = true;
+ pmu->perf_event_attr_init_default = intel_pt_pmu_default_config;
+ }
+ if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) {
+ pmu->auxtrace = true;
pmu->selectable = true;
+ }
#endif
- return NULL;
+
+ if (x86__is_amd_cpu()) {
+ if (strcmp(pmu->name, "ibs_op"))
+ return;
+
+ pmu->mem_events = perf_mem_events_amd;
+
+ if (!perf_pmu__caps_parse(pmu))
+ return;
+
+ ldlat_cap = perf_pmu__get_cap(pmu, "ldlat");
+ if (!ldlat_cap || strcmp(ldlat_cap->value, "1"))
+ return;
+
+ perf_mem_events__loads_ldlat = 0;
+ pmu->mem_events = perf_mem_events_amd_ldlat;
+ } else {
+ if (pmu->is_core) {
+ if (perf_pmu__have_event(pmu, "mem-loads-aux"))
+ pmu->mem_events = perf_mem_events_intel_aux;
+ else
+ pmu->mem_events = perf_mem_events_intel;
+ } else if (x86__is_intel_graniterapids()) {
+ if (starts_with(pmu->name, "uncore_cha_"))
+ gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
+ else if (starts_with(pmu->name, "uncore_imc_"))
+ gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
+ }
+ }
}
diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
new file mode 100644
index 000000000000..d1c654839049
--- /dev/null
+++ b/tools/perf/arch/x86/util/topdown.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "api/fs/fs.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/pmu.h"
+#include "util/pmus.h"
+#include "util/topdown.h"
+#include "topdown.h"
+#include "evsel.h"
+
+/* Check whether there is a PMU which supports the perf metrics. */
+bool topdown_sys_has_perf_metrics(void)
+{
+ static bool has_perf_metrics;
+ static bool cached;
+ struct perf_pmu *pmu;
+
+ if (cached)
+ return has_perf_metrics;
+
+ /*
+ * The perf metrics feature is a core PMU feature.
+ * The PERF_TYPE_RAW type is the type of a core PMU.
+ * The slots event is only available when the core PMU
+ * supports the perf metrics feature.
+ */
+ pmu = perf_pmus__find_by_type(PERF_TYPE_RAW);
+ if (pmu && perf_pmu__have_event(pmu, "slots"))
+ has_perf_metrics = true;
+
+ cached = true;
+ return has_perf_metrics;
+}
+
+#define TOPDOWN_SLOTS 0x0400
+bool arch_is_topdown_slots(const struct evsel *evsel)
+{
+ if (evsel->core.attr.config == TOPDOWN_SLOTS)
+ return true;
+
+ return false;
+}
+
+bool arch_is_topdown_metrics(const struct evsel *evsel)
+{
+ int config = evsel->core.attr.config;
+ const char *name_from_config;
+ struct perf_pmu *pmu;
+
+ /* All topdown events have an event code of 0. */
+ if ((config & 0xFF) != 0)
+ return false;
+
+ pmu = evsel__find_pmu(evsel);
+ if (!pmu || !pmu->is_core)
+ return false;
+
+ name_from_config = perf_pmu__name_from_config(pmu, config);
+ return name_from_config && strcasestr(name_from_config, "topdown");
+}
+
+/*
+ * Check whether a topdown group supports sample-read.
+ *
+ * Only Topdown metric supports sample-read. The slots
+ * event must be the leader of the topdown group.
+ */
+bool arch_topdown_sample_read(struct evsel *leader)
+{
+ struct evsel *evsel;
+
+ if (!evsel__sys_has_perf_metrics(leader))
+ return false;
+
+ if (!arch_is_topdown_slots(leader))
+ return false;
+
+ /*
+ * If slots event as leader event but no topdown metric events
+ * in group, slots event should still sample as leader.
+ */
+ evlist__for_each_entry(leader->evlist, evsel) {
+ if (evsel->core.leader != leader->core.leader)
+ continue;
+ if (evsel != leader && arch_is_topdown_metrics(evsel))
+ return true;
+ }
+
+ return false;
+}
diff --git a/tools/perf/arch/x86/util/topdown.h b/tools/perf/arch/x86/util/topdown.h
new file mode 100644
index 000000000000..1bae9b1822d7
--- /dev/null
+++ b/tools/perf/arch/x86/util/topdown.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOPDOWN_H
+#define _TOPDOWN_H 1
+
+bool topdown_sys_has_perf_metrics(void);
+bool arch_is_topdown_slots(const struct evsel *evsel);
+bool arch_is_topdown_metrics(const struct evsel *evsel);
+
+#endif
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index 2f55afb14e1f..3a439e4b12d2 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -1,45 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
-#include <stdbool.h>
-#include <errno.h>
-
-#include <linux/stddef.h>
-#include <linux/perf_event.h>
-
#include <linux/types.h>
-#include <asm/barrier.h>
+#include <math.h>
+#include <string.h>
+#include <stdlib.h>
+
#include "../../../util/debug.h"
-#include "../../../util/event.h"
-#include "../../../util/synthetic-events.h"
#include "../../../util/tsc.h"
-
-int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
- struct perf_tsc_conversion *tc)
-{
- bool cap_user_time_zero;
- u32 seq;
- int i = 0;
-
- while (1) {
- seq = pc->lock;
- rmb();
- tc->time_mult = pc->time_mult;
- tc->time_shift = pc->time_shift;
- tc->time_zero = pc->time_zero;
- cap_user_time_zero = pc->cap_user_time_zero;
- rmb();
- if (pc->lock == seq && !(seq & 1))
- break;
- if (++i > 10000) {
- pr_debug("failed to get perf_event_mmap_page lock\n");
- return -EINVAL;
- }
- }
-
- if (!cap_user_time_zero)
- return -EOPNOTSUPP;
-
- return 0;
-}
+#include "cpuid.h"
u64 rdtsc(void)
{
@@ -50,35 +17,77 @@ u64 rdtsc(void)
return low | ((u64)high) << 32;
}
-int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
- struct perf_tool *tool,
- perf_event__handler_t process,
- struct machine *machine)
+/*
+ * Derive the TSC frequency in Hz from the /proc/cpuinfo, for example:
+ * ...
+ * model name : Intel(R) Xeon(R) Gold 6154 CPU @ 3.00GHz
+ * ...
+ * will return 3000000000.
+ */
+static u64 cpuinfo_tsc_freq(void)
{
- union perf_event event = {
- .time_conv = {
- .header = {
- .type = PERF_RECORD_TIME_CONV,
- .size = sizeof(struct perf_record_time_conv),
- },
- },
- };
- struct perf_tsc_conversion tc;
- int err;
+ u64 result = 0;
+ FILE *cpuinfo;
+ char *line = NULL;
+ size_t len = 0;
- if (!pc)
+ cpuinfo = fopen("/proc/cpuinfo", "r");
+ if (!cpuinfo) {
+ pr_err("Failed to read /proc/cpuinfo for TSC frequency\n");
return 0;
- err = perf_read_tsc_conversion(pc, &tc);
- if (err == -EOPNOTSUPP)
+ }
+ while (getline(&line, &len, cpuinfo) > 0) {
+ if (!strncmp(line, "model name", 10)) {
+ char *pos = strstr(line + 11, " @ ");
+ double float_result;
+
+ if (pos && sscanf(pos, " @ %lfGHz", &float_result) == 1) {
+ float_result *= 1000000000;
+ result = (u64)float_result;
+ goto out;
+ }
+ }
+ }
+out:
+ if (result == 0)
+ pr_err("Failed to find TSC frequency in /proc/cpuinfo\n");
+
+ free(line);
+ fclose(cpuinfo);
+ return result;
+}
+
+u64 arch_get_tsc_freq(void)
+{
+ unsigned int a, b, c, d, lvl;
+ static bool cached;
+ static double tsc;
+ char vendor[16];
+
+ if (cached)
+ return tsc;
+
+ cached = true;
+ get_cpuid_0(vendor, &lvl);
+ if (!strstr(vendor, "Intel"))
return 0;
- if (err)
- return err;
- pr_debug2("Synthesizing TSC conversion information\n");
+ /*
+ * Don't support Time Stamp Counter and
+ * Nominal Core Crystal Clock Information Leaf.
+ */
+ if (lvl < 0x15) {
+ tsc = cpuinfo_tsc_freq();
+ return tsc;
+ }
- event.time_conv.time_mult = tc.time_mult;
- event.time_conv.time_shift = tc.time_shift;
- event.time_conv.time_zero = tc.time_zero;
+ cpuid(0x15, 0, &a, &b, &c, &d);
+ /* TSC frequency is not enumerated */
+ if (!a || !b || !c) {
+ tsc = cpuinfo_tsc_freq();
+ return tsc;
+ }
- return process(tool, &event, NULL, machine);
+ tsc = (u64)c * (u64)b / (u64)a;
+ return tsc;
}
diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c
index eea2bf87232b..798493e887d7 100644
--- a/tools/perf/arch/x86/util/unwind-libdw.c
+++ b/tools/perf/arch/x86/util/unwind-libdw.c
@@ -1,13 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#include <elfutils/libdwfl.h>
+#include "perf_regs.h"
#include "../../../util/unwind-libdw.h"
#include "../../../util/perf_regs.h"
-#include "../../../util/event.h"
+#include "util/sample.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
struct unwind_info *ui = arg;
- struct regs_dump *user_regs = &ui->sample->user_regs;
+ struct regs_dump *user_regs = perf_sample__user_regs(ui->sample);
Dwarf_Word dwarf_regs[17];
unsigned nregs;