aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/arch/x86/util
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--tools/perf/arch/x86/util/cpuid.h34
-rw-r--r--tools/perf/arch/x86/util/event.c2
-rw-r--r--tools/perf/arch/x86/util/evlist.c92
-rw-r--r--tools/perf/arch/x86/util/evsel.c104
-rw-r--r--tools/perf/arch/x86/util/evsel.h7
-rw-r--r--tools/perf/arch/x86/util/header.c27
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c5
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c58
-rw-r--r--tools/perf/arch/x86/util/iostat.c2
-rw-r--r--tools/perf/arch/x86/util/mem-events.c31
-rw-r--r--tools/perf/arch/x86/util/perf_regs.c12
-rw-r--r--tools/perf/arch/x86/util/topdown.c97
-rw-r--r--tools/perf/arch/x86/util/topdown.h8
-rw-r--r--tools/perf/arch/x86/util/tsc.c77
14 files changed, 478 insertions, 78 deletions
diff --git a/tools/perf/arch/x86/util/cpuid.h b/tools/perf/arch/x86/util/cpuid.h
new file mode 100644
index 000000000000..0a3ae0ace7e9
--- /dev/null
+++ b/tools/perf/arch/x86/util/cpuid.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_CPUID_H
+#define PERF_CPUID_H 1
+
+
+static inline void
+cpuid(unsigned int op, unsigned int op2, unsigned int *a, unsigned int *b,
+ unsigned int *c, unsigned int *d)
+{
+ /*
+ * Preserve %ebx/%rbx register by either placing it in %rdi or saving it
+ * on the stack - x86-64 needs to avoid the stack red zone. In PIC
+ * compilations %ebx contains the address of the global offset
+ * table. %rbx is occasionally used to address stack variables in
+ * presence of dynamic allocas.
+ */
+ asm(
+#if defined(__x86_64__)
+ "mov %%rbx, %%rdi\n"
+ "cpuid\n"
+ "xchg %%rdi, %%rbx\n"
+#else
+ "pushl %%ebx\n"
+ "cpuid\n"
+ "movl %%ebx, %%edi\n"
+ "popl %%ebx\n"
+#endif
+ : "=a"(*a), "=D"(*b), "=c"(*c), "=d"(*d)
+ : "a"(op), "2"(op2));
+}
+
+void get_cpuid_0(char *vendor, unsigned int *lvl);
+
+#endif
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
index 9b31734ee968..e670f3547581 100644
--- a/tools/perf/arch/x86/util/event.c
+++ b/tools/perf/arch/x86/util/event.c
@@ -18,7 +18,7 @@ int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
{
int rc = 0;
struct map *pos;
- struct maps *kmaps = &machine->kmaps;
+ struct maps *kmaps = machine__kernel_maps(machine);
union perf_event *event = zalloc(sizeof(event->mmap) +
machine->id_hdr_size);
diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index 0b0951030a2f..cb59ce9b9638 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -3,17 +3,91 @@
#include "util/pmu.h"
#include "util/evlist.h"
#include "util/parse-events.h"
+#include "util/event.h"
+#include "util/pmu-hybrid.h"
+#include "topdown.h"
-#define TOPDOWN_L1_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}"
-#define TOPDOWN_L2_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound,topdown-heavy-ops,topdown-br-mispredict,topdown-fetch-lat,topdown-mem-bound}"
+static int ___evlist__add_default_attrs(struct evlist *evlist,
+ struct perf_event_attr *attrs,
+ size_t nr_attrs)
+{
+ struct perf_cpu_map *cpus;
+ struct evsel *evsel, *n;
+ struct perf_pmu *pmu;
+ LIST_HEAD(head);
+ size_t i = 0;
+
+ for (i = 0; i < nr_attrs; i++)
+ event_attr_init(attrs + i);
+
+ if (!perf_pmu__has_hybrid())
+ return evlist__add_attrs(evlist, attrs, nr_attrs);
+
+ for (i = 0; i < nr_attrs; i++) {
+ if (attrs[i].type == PERF_TYPE_SOFTWARE) {
+ evsel = evsel__new(attrs + i);
+ if (evsel == NULL)
+ goto out_delete_partial_list;
+ list_add_tail(&evsel->core.node, &head);
+ continue;
+ }
-int arch_evlist__add_default_attrs(struct evlist *evlist)
+ perf_pmu__for_each_hybrid_pmu(pmu) {
+ evsel = evsel__new(attrs + i);
+ if (evsel == NULL)
+ goto out_delete_partial_list;
+ evsel->core.attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
+ cpus = perf_cpu_map__get(pmu->cpus);
+ evsel->core.cpus = cpus;
+ evsel->core.own_cpus = perf_cpu_map__get(cpus);
+ evsel->pmu_name = strdup(pmu->name);
+ list_add_tail(&evsel->core.node, &head);
+ }
+ }
+
+ evlist__splice_list_tail(evlist, &head);
+
+ return 0;
+
+out_delete_partial_list:
+ __evlist__for_each_entry_safe(&head, n, evsel)
+ evsel__delete(evsel);
+ return -1;
+}
+
+int arch_evlist__add_default_attrs(struct evlist *evlist,
+ struct perf_event_attr *attrs,
+ size_t nr_attrs)
{
- if (!pmu_have_event("cpu", "slots"))
- return 0;
+ if (nr_attrs)
+ return ___evlist__add_default_attrs(evlist, attrs, nr_attrs);
+
+ return topdown_parse_events(evlist);
+}
+
+struct evsel *arch_evlist__leader(struct list_head *list)
+{
+ struct evsel *evsel, *first, *slots = NULL;
+ bool has_topdown = false;
+
+ first = list_first_entry(list, struct evsel, core.node);
+
+ if (!topdown_sys_has_perf_metrics())
+ return first;
- if (pmu_have_event("cpu", "topdown-heavy-ops"))
- return parse_events(evlist, TOPDOWN_L2_EVENTS, NULL);
- else
- return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL);
+ /* If there is a slots event and a topdown event then the slots event comes first. */
+ __evlist__for_each_entry(list, evsel) {
+ if (evsel->pmu_name && !strncmp(evsel->pmu_name, "cpu", 3) && evsel->name) {
+ if (strcasestr(evsel->name, "slots")) {
+ slots = evsel;
+ if (slots == first)
+ return first;
+ }
+ if (strcasestr(evsel->name, "topdown"))
+ has_topdown = true;
+ if (slots && has_topdown)
+ return slots;
+ }
+ }
+ return first;
}
diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
index ac2899a25b7a..ea3972d785d1 100644
--- a/tools/perf/arch/x86/util/evsel.c
+++ b/tools/perf/arch/x86/util/evsel.c
@@ -3,7 +3,13 @@
#include <stdlib.h>
#include "util/evsel.h"
#include "util/env.h"
+#include "util/pmu.h"
#include "linux/string.h"
+#include "evsel.h"
+#include "util/debug.h"
+
+#define IBS_FETCH_L3MISSONLY (1ULL << 59)
+#define IBS_OP_L3MISSONLY (1ULL << 16)
void arch_evsel__set_sample_weight(struct evsel *evsel)
{
@@ -29,3 +35,101 @@ void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr)
free(env.cpuid);
}
+
+/* Check whether the evsel's PMU supports the perf metrics */
+bool evsel__sys_has_perf_metrics(const struct evsel *evsel)
+{
+ const char *pmu_name = evsel->pmu_name ? evsel->pmu_name : "cpu";
+
+ /*
+ * The PERF_TYPE_RAW type is the core PMU type, e.g., "cpu" PMU
+ * on a non-hybrid machine, "cpu_core" PMU on a hybrid machine.
+ * The slots event is only available for the core PMU, which
+ * supports the perf metrics feature.
+ * Checking both the PERF_TYPE_RAW type and the slots event
+ * should be good enough to detect the perf metrics feature.
+ */
+ if ((evsel->core.attr.type == PERF_TYPE_RAW) &&
+ pmu_have_event(pmu_name, "slots"))
+ return true;
+
+ return false;
+}
+
+bool arch_evsel__must_be_in_group(const struct evsel *evsel)
+{
+ if (!evsel__sys_has_perf_metrics(evsel))
+ return false;
+
+ return evsel->name &&
+ (strcasestr(evsel->name, "slots") ||
+ strcasestr(evsel->name, "topdown"));
+}
+
+int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
+{
+ u64 event = evsel->core.attr.config & PERF_HW_EVENT_MASK;
+ u64 pmu = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT;
+ const char *event_name;
+
+ if (event < PERF_COUNT_HW_MAX && evsel__hw_names[event])
+ event_name = evsel__hw_names[event];
+ else
+ event_name = "unknown-hardware";
+
+ /* The PMU type is not required for the non-hybrid platform. */
+ if (!pmu)
+ return scnprintf(bf, size, "%s", event_name);
+
+ return scnprintf(bf, size, "%s/%s/",
+ evsel->pmu_name ? evsel->pmu_name : "cpu",
+ event_name);
+}
+
+static void ibs_l3miss_warn(void)
+{
+ pr_warning(
+"WARNING: Hw internally resets sampling period when L3 Miss Filtering is enabled\n"
+"and tagged operation does not cause L3 Miss. This causes sampling period skew.\n");
+}
+
+void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr)
+{
+ struct perf_pmu *evsel_pmu, *ibs_fetch_pmu, *ibs_op_pmu;
+ static int warned_once;
+ /* 0: Uninitialized, 1: Yes, -1: No */
+ static int is_amd;
+
+ if (warned_once || is_amd == -1)
+ return;
+
+ if (!is_amd) {
+ struct perf_env *env = evsel__env(evsel);
+
+ if (!perf_env__cpuid(env) || !env->cpuid ||
+ !strstarts(env->cpuid, "AuthenticAMD")) {
+ is_amd = -1;
+ return;
+ }
+ is_amd = 1;
+ }
+
+ evsel_pmu = evsel__find_pmu(evsel);
+ if (!evsel_pmu)
+ return;
+
+ ibs_fetch_pmu = perf_pmu__find("ibs_fetch");
+ ibs_op_pmu = perf_pmu__find("ibs_op");
+
+ if (ibs_fetch_pmu && ibs_fetch_pmu->type == evsel_pmu->type) {
+ if (attr->config & IBS_FETCH_L3MISSONLY) {
+ ibs_l3miss_warn();
+ warned_once = 1;
+ }
+ } else if (ibs_op_pmu && ibs_op_pmu->type == evsel_pmu->type) {
+ if (attr->config & IBS_OP_L3MISSONLY) {
+ ibs_l3miss_warn();
+ warned_once = 1;
+ }
+ }
+}
diff --git a/tools/perf/arch/x86/util/evsel.h b/tools/perf/arch/x86/util/evsel.h
new file mode 100644
index 000000000000..19ad1691374d
--- /dev/null
+++ b/tools/perf/arch/x86/util/evsel.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _EVSEL_H
+#define _EVSEL_H 1
+
+bool evsel__sys_has_perf_metrics(const struct evsel *evsel);
+
+#endif
diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c
index 578c8c568ffd..a51444a77a5f 100644
--- a/tools/perf/arch/x86/util/header.c
+++ b/tools/perf/arch/x86/util/header.c
@@ -9,18 +9,17 @@
#include "../../../util/debug.h"
#include "../../../util/header.h"
+#include "cpuid.h"
-static inline void
-cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c,
- unsigned int *d)
+void get_cpuid_0(char *vendor, unsigned int *lvl)
{
- __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\t"
- "movl %%ebx, %%esi\n\t.byte 0x5b"
- : "=a" (*a),
- "=S" (*b),
- "=c" (*c),
- "=d" (*d)
- : "a" (op));
+ unsigned int b, c, d;
+
+ cpuid(0, 0, lvl, &b, &c, &d);
+ strncpy(&vendor[0], (char *)(&b), 4);
+ strncpy(&vendor[4], (char *)(&d), 4);
+ strncpy(&vendor[8], (char *)(&c), 4);
+ vendor[12] = '\0';
}
static int
@@ -31,14 +30,10 @@ __get_cpuid(char *buffer, size_t sz, const char *fmt)
int nb;
char vendor[16];
- cpuid(0, &lvl, &b, &c, &d);
- strncpy(&vendor[0], (char *)(&b), 4);
- strncpy(&vendor[4], (char *)(&d), 4);
- strncpy(&vendor[8], (char *)(&c), 4);
- vendor[12] = '\0';
+ get_cpuid_0(vendor, &lvl);
if (lvl >= 1) {
- cpuid(1, &a, &b, &c, &d);
+ cpuid(1, 0, &a, &b, &c, &d);
family = (a >> 8) & 0xf; /* bits 11 - 8 */
model = (a >> 4) & 0xf; /* Bits 7 - 4 */
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 4a76d49d25d6..439c2956f3e7 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -110,7 +110,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
container_of(itr, struct intel_bts_recording, itr);
struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
struct evsel *evsel, *intel_bts_evsel = NULL;
- const struct perf_cpu_map *cpus = evlist->core.cpus;
+ const struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
bool privileged = perf_event_paranoid_check(-1);
if (opts->auxtrace_sample_mode) {
@@ -129,6 +129,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
}
evsel->core.attr.freq = 0;
evsel->core.attr.sample_period = 1;
+ evsel->needs_auxtrace_mmap = true;
intel_bts_evsel = evsel;
opts->full_auxtrace = true;
}
@@ -232,7 +233,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
struct evsel *tracking_evsel;
int err;
- err = parse_events(evlist, "dummy:u", NULL);
+ err = parse_event(evlist, "dummy:u");
if (err)
return err;
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 6df0dc00d73a..af102f471e9f 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -11,6 +11,7 @@
#include <linux/bitops.h>
#include <linux/log2.h>
#include <linux/zalloc.h>
+#include <linux/err.h>
#include <cpuid.h>
#include "../../../util/session.h"
@@ -306,6 +307,7 @@ intel_pt_info_priv_size(struct auxtrace_record *itr, struct evlist *evlist)
ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) +
intel_pt_filter_bytes(filter);
+ ptr->priv_size += sizeof(u64); /* Cap Event Trace */
return ptr->priv_size;
}
@@ -335,6 +337,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
unsigned long max_non_turbo_ratio;
size_t filter_str_len;
const char *filter;
+ int event_trace;
__u64 *info;
int err;
@@ -357,6 +360,9 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio",
"%lu", &max_non_turbo_ratio) != 1)
max_non_turbo_ratio = 0;
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/event_trace",
+ "%d", &event_trace) != 1)
+ event_trace = 0;
filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu);
filter_str_len = filter ? strlen(filter) : 0;
@@ -377,7 +383,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
ui__warning("Intel Processor Trace: TSC not available\n");
}
- per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.cpus);
+ per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.user_requested_cpus);
auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
@@ -407,6 +413,8 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
info += len >> 3;
}
+ *info++ = event_trace;
+
return 0;
}
@@ -419,20 +427,14 @@ static int intel_pt_track_switches(struct evlist *evlist)
if (!evlist__can_select_event(evlist, sched_switch))
return -EPERM;
- err = parse_events(evlist, sched_switch, NULL);
- if (err) {
- pr_debug2("%s: failed to parse %s, error %d\n",
+ evsel = evlist__add_sched_switch(evlist, true);
+ if (IS_ERR(evsel)) {
+ err = PTR_ERR(evsel);
+ pr_debug2("%s: failed to create %s, error = %d\n",
__func__, sched_switch, err);
return err;
}
- evsel = evlist__last(evlist);
-
- evsel__set_sample_bit(evsel, CPU);
- evsel__set_sample_bit(evsel, TIME);
-
- evsel->core.system_wide = true;
- evsel->no_aux_samples = true;
evsel->immediate = true;
return 0;
@@ -625,7 +627,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
bool have_timing_info, need_immediate = false;
struct evsel *evsel, *intel_pt_evsel = NULL;
- const struct perf_cpu_map *cpus = evlist->core.cpus;
+ const struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
bool privileged = perf_event_paranoid_check(-1);
u64 tsc_bit;
int err;
@@ -642,6 +644,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
evsel->core.attr.freq = 0;
evsel->core.attr.sample_period = 1;
evsel->no_aux_samples = true;
+ evsel->needs_auxtrace_mmap = true;
intel_pt_evsel = evsel;
opts->full_auxtrace = true;
}
@@ -803,18 +806,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
if (!cpu_wide && perf_can_record_cpu_wide()) {
struct evsel *switch_evsel;
- err = parse_events(evlist, "dummy:u", NULL);
- if (err)
- return err;
+ switch_evsel = evlist__add_dummy_on_all_cpus(evlist);
+ if (!switch_evsel)
+ return -ENOMEM;
- switch_evsel = evlist__last(evlist);
-
- switch_evsel->core.attr.freq = 0;
- switch_evsel->core.attr.sample_period = 1;
switch_evsel->core.attr.context_switch = 1;
-
- switch_evsel->core.system_wide = true;
- switch_evsel->no_aux_samples = true;
switch_evsel->immediate = true;
evsel__set_sample_bit(switch_evsel, TID);
@@ -863,20 +859,22 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
/* Add dummy event to keep tracking */
if (opts->full_auxtrace) {
+ bool need_system_wide_tracking;
struct evsel *tracking_evsel;
- err = parse_events(evlist, "dummy:u", NULL);
- if (err)
- return err;
+ /*
+ * User space tasks can migrate between CPUs, so when tracing
+ * selected CPUs, sideband for all CPUs is still needed.
+ */
+ need_system_wide_tracking = opts->target.cpu_list &&
+ !intel_pt_evsel->core.attr.exclude_user;
- tracking_evsel = evlist__last(evlist);
+ tracking_evsel = evlist__add_aux_dummy(evlist, need_system_wide_tracking);
+ if (!tracking_evsel)
+ return -ENOMEM;
evlist__set_tracking_event(evlist, tracking_evsel);
- tracking_evsel->core.attr.freq = 0;
- tracking_evsel->core.attr.sample_period = 1;
-
- tracking_evsel->no_aux_samples = true;
if (need_immediate)
tracking_evsel->immediate = true;
diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c
index 792cd75ade33..404de795ec0b 100644
--- a/tools/perf/arch/x86/util/iostat.c
+++ b/tools/perf/arch/x86/util/iostat.c
@@ -316,7 +316,7 @@ static int iostat_event_group(struct evlist *evl,
sprintf(iostat_cmd, iostat_cmd_template,
list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx,
list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx);
- ret = parse_events(evl, iostat_cmd, NULL);
+ ret = parse_event(evl, iostat_cmd);
if (ret)
goto err;
}
diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
index 5214370ca4e4..f683ac702247 100644
--- a/tools/perf/arch/x86/util/mem-events.c
+++ b/tools/perf/arch/x86/util/mem-events.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include "util/pmu.h"
+#include "util/env.h"
#include "map_symbol.h"
#include "mem-events.h"
+#include "linux/string.h"
static char mem_loads_name[100];
static bool mem_loads_name__init;
@@ -12,18 +14,43 @@ static char mem_stores_name[100];
#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
-static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+static struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX] = {
E("ldlat-loads", "%s/mem-loads,ldlat=%u/P", "%s/events/mem-loads"),
E("ldlat-stores", "%s/mem-stores/P", "%s/events/mem-stores"),
E(NULL, NULL, NULL),
};
+static struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = {
+ E(NULL, NULL, NULL),
+ E(NULL, NULL, NULL),
+ E("mem-ldst", "ibs_op//", "ibs_op"),
+};
+
+static int perf_mem_is_amd_cpu(void)
+{
+ struct perf_env env = { .total_mem = 0, };
+
+ perf_env__cpuid(&env);
+ if (env.cpuid && strstarts(env.cpuid, "AuthenticAMD"))
+ return 1;
+ return -1;
+}
+
struct perf_mem_event *perf_mem_events__ptr(int i)
{
+ /* 0: Uninitialized, 1: Yes, -1: No */
+ static int is_amd;
+
if (i >= PERF_MEM_EVENTS__MAX)
return NULL;
- return &perf_mem_events[i];
+ if (!is_amd)
+ is_amd = perf_mem_is_amd_cpu();
+
+ if (is_amd == 1)
+ return &perf_mem_events_amd[i];
+
+ return &perf_mem_events_intel[i];
}
bool is_mem_loads_aux_event(struct evsel *leader)
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index 207c56805c55..0ed177991ad0 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -9,6 +9,8 @@
#include "../../../util/perf_regs.h"
#include "../../../util/debug.h"
#include "../../../util/event.h"
+#include "../../../util/pmu.h"
+#include "../../../util/pmu-hybrid.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG(AX, PERF_REG_X86_AX),
@@ -284,12 +286,22 @@ uint64_t arch__intr_reg_mask(void)
.disabled = 1,
.exclude_kernel = 1,
};
+ struct perf_pmu *pmu;
int fd;
/*
* In an unnamed union, init it here to build on older gcc versions
*/
attr.sample_period = 1;
+ if (perf_pmu__has_hybrid()) {
+ /*
+ * The same register set is supported among different hybrid PMUs.
+ * Only check the first available one.
+ */
+ pmu = list_first_entry(&perf_pmu__hybrid_pmus, typeof(*pmu), hybrid_list);
+ attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
+ }
+
event_attr_init(&attr);
fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
index 2f3d96aa92a5..54810f9acd6f 100644
--- a/tools/perf/arch/x86/util/topdown.c
+++ b/tools/perf/arch/x86/util/topdown.c
@@ -3,6 +3,40 @@
#include "api/fs/fs.h"
#include "util/pmu.h"
#include "util/topdown.h"
+#include "util/evlist.h"
+#include "util/debug.h"
+#include "util/pmu-hybrid.h"
+#include "topdown.h"
+#include "evsel.h"
+
+#define TOPDOWN_L1_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}"
+#define TOPDOWN_L1_EVENTS_CORE "{slots,cpu_core/topdown-retiring/,cpu_core/topdown-bad-spec/,cpu_core/topdown-fe-bound/,cpu_core/topdown-be-bound/}"
+#define TOPDOWN_L2_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound,topdown-heavy-ops,topdown-br-mispredict,topdown-fetch-lat,topdown-mem-bound}"
+#define TOPDOWN_L2_EVENTS_CORE "{slots,cpu_core/topdown-retiring/,cpu_core/topdown-bad-spec/,cpu_core/topdown-fe-bound/,cpu_core/topdown-be-bound/,cpu_core/topdown-heavy-ops/,cpu_core/topdown-br-mispredict/,cpu_core/topdown-fetch-lat/,cpu_core/topdown-mem-bound/}"
+
+/* Check whether there is a PMU which supports the perf metrics. */
+bool topdown_sys_has_perf_metrics(void)
+{
+ static bool has_perf_metrics;
+ static bool cached;
+ struct perf_pmu *pmu;
+
+ if (cached)
+ return has_perf_metrics;
+
+ /*
+ * The perf metrics feature is a core PMU feature.
+ * The PERF_TYPE_RAW type is the type of a core PMU.
+ * The slots event is only available when the core PMU
+ * supports the perf metrics feature.
+ */
+ pmu = perf_pmu__find_by_type(PERF_TYPE_RAW);
+ if (pmu && pmu_have_event(pmu->name, "slots"))
+ has_perf_metrics = true;
+
+ cached = true;
+ return has_perf_metrics;
+}
/*
* Check whether we can use a group for top down.
@@ -30,34 +64,63 @@ void arch_topdown_group_warn(void)
#define TOPDOWN_SLOTS 0x0400
-static bool is_topdown_slots_event(struct evsel *counter)
-{
- if (!counter->pmu_name)
- return false;
-
- if (strcmp(counter->pmu_name, "cpu"))
- return false;
-
- if (counter->core.attr.config == TOPDOWN_SLOTS)
- return true;
-
- return false;
-}
-
/*
* Check whether a topdown group supports sample-read.
*
- * Only Topdown metic supports sample-read. The slots
+ * Only Topdown metric supports sample-read. The slots
* event must be the leader of the topdown group.
*/
bool arch_topdown_sample_read(struct evsel *leader)
{
- if (!pmu_have_event("cpu", "slots"))
+ if (!evsel__sys_has_perf_metrics(leader))
return false;
- if (is_topdown_slots_event(leader))
+ if (leader->core.attr.config == TOPDOWN_SLOTS)
return true;
return false;
}
+
+const char *arch_get_topdown_pmu_name(struct evlist *evlist, bool warn)
+{
+ const char *pmu_name;
+
+ if (!perf_pmu__has_hybrid())
+ return "cpu";
+
+ if (!evlist->hybrid_pmu_name) {
+ if (warn)
+ pr_warning("WARNING: default to use cpu_core topdown events\n");
+ evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu("core");
+ }
+
+ pmu_name = evlist->hybrid_pmu_name;
+
+ return pmu_name;
+}
+
+int topdown_parse_events(struct evlist *evlist)
+{
+ const char *topdown_events;
+ const char *pmu_name;
+
+ if (!topdown_sys_has_perf_metrics())
+ return 0;
+
+ pmu_name = arch_get_topdown_pmu_name(evlist, false);
+
+ if (pmu_have_event(pmu_name, "topdown-heavy-ops")) {
+ if (!strcmp(pmu_name, "cpu_core"))
+ topdown_events = TOPDOWN_L2_EVENTS_CORE;
+ else
+ topdown_events = TOPDOWN_L2_EVENTS;
+ } else {
+ if (!strcmp(pmu_name, "cpu_core"))
+ topdown_events = TOPDOWN_L1_EVENTS_CORE;
+ else
+ topdown_events = TOPDOWN_L1_EVENTS;
+ }
+
+ return parse_event(evlist, topdown_events);
+}
diff --git a/tools/perf/arch/x86/util/topdown.h b/tools/perf/arch/x86/util/topdown.h
new file mode 100644
index 000000000000..7eb81f042838
--- /dev/null
+++ b/tools/perf/arch/x86/util/topdown.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOPDOWN_H
+#define _TOPDOWN_H 1
+
+bool topdown_sys_has_perf_metrics(void);
+int topdown_parse_events(struct evlist *evlist);
+
+#endif
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index 559365f8fe52..eb2b5195bd02 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -1,7 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
+#include <math.h>
+#include <string.h>
+#include "../../../util/debug.h"
#include "../../../util/tsc.h"
+#include "cpuid.h"
u64 rdtsc(void)
{
@@ -11,3 +15,76 @@ u64 rdtsc(void)
return low | ((u64)high) << 32;
}
+
+/*
+ * Derive the TSC frequency in Hz from the /proc/cpuinfo, for example:
+ * ...
+ * model name : Intel(R) Xeon(R) Gold 6154 CPU @ 3.00GHz
+ * ...
+ * will return 3000000000.
+ */
+static double cpuinfo_tsc_freq(void)
+{
+ double result = 0;
+ FILE *cpuinfo;
+ char *line = NULL;
+ size_t len = 0;
+
+ cpuinfo = fopen("/proc/cpuinfo", "r");
+ if (!cpuinfo) {
+ pr_err("Failed to read /proc/cpuinfo for TSC frequency");
+ return NAN;
+ }
+ while (getline(&line, &len, cpuinfo) > 0) {
+ if (!strncmp(line, "model name", 10)) {
+ char *pos = strstr(line + 11, " @ ");
+
+ if (pos && sscanf(pos, " @ %lfGHz", &result) == 1) {
+ result *= 1000000000;
+ goto out;
+ }
+ }
+ }
+out:
+ if (fpclassify(result) == FP_ZERO)
+ pr_err("Failed to find TSC frequency in /proc/cpuinfo");
+
+ free(line);
+ fclose(cpuinfo);
+ return result;
+}
+
+double arch_get_tsc_freq(void)
+{
+ unsigned int a, b, c, d, lvl;
+ static bool cached;
+ static double tsc;
+ char vendor[16];
+
+ if (cached)
+ return tsc;
+
+ cached = true;
+ get_cpuid_0(vendor, &lvl);
+ if (!strstr(vendor, "Intel"))
+ return 0;
+
+ /*
+ * Don't support Time Stamp Counter and
+ * Nominal Core Crystal Clock Information Leaf.
+ */
+ if (lvl < 0x15) {
+ tsc = cpuinfo_tsc_freq();
+ return tsc;
+ }
+
+ cpuid(0x15, 0, &a, &b, &c, &d);
+ /* TSC frequency is not enumerated */
+ if (!a || !b || !c) {
+ tsc = cpuinfo_tsc_freq();
+ return tsc;
+ }
+
+ tsc = (double)c * (double)b / (double)a;
+ return tsc;
+}