diff options
Diffstat (limited to '')
-rw-r--r-- | tools/perf/arch/x86/util/cpuid.h | 34 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/event.c | 2 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/evlist.c | 92 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/evsel.c | 104 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/evsel.h | 7 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/header.c | 27 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/intel-bts.c | 5 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/intel-pt.c | 58 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/iostat.c | 2 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/mem-events.c | 31 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/perf_regs.c | 12 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/topdown.c | 97 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/topdown.h | 8 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/tsc.c | 77 |
14 files changed, 478 insertions, 78 deletions
diff --git a/tools/perf/arch/x86/util/cpuid.h b/tools/perf/arch/x86/util/cpuid.h new file mode 100644 index 000000000000..0a3ae0ace7e9 --- /dev/null +++ b/tools/perf/arch/x86/util/cpuid.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef PERF_CPUID_H +#define PERF_CPUID_H 1 + + +static inline void +cpuid(unsigned int op, unsigned int op2, unsigned int *a, unsigned int *b, + unsigned int *c, unsigned int *d) +{ + /* + * Preserve %ebx/%rbx register by either placing it in %rdi or saving it + * on the stack - x86-64 needs to avoid the stack red zone. In PIC + * compilations %ebx contains the address of the global offset + * table. %rbx is occasionally used to address stack variables in + * presence of dynamic allocas. + */ + asm( +#if defined(__x86_64__) + "mov %%rbx, %%rdi\n" + "cpuid\n" + "xchg %%rdi, %%rbx\n" +#else + "pushl %%ebx\n" + "cpuid\n" + "movl %%ebx, %%edi\n" + "popl %%ebx\n" +#endif + : "=a"(*a), "=D"(*b), "=c"(*c), "=d"(*d) + : "a"(op), "2"(op2)); +} + +void get_cpuid_0(char *vendor, unsigned int *lvl); + +#endif diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index 9b31734ee968..e670f3547581 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -18,7 +18,7 @@ int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, { int rc = 0; struct map *pos; - struct maps *kmaps = &machine->kmaps; + struct maps *kmaps = machine__kernel_maps(machine); union perf_event *event = zalloc(sizeof(event->mmap) + machine->id_hdr_size); diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index 0b0951030a2f..cb59ce9b9638 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -3,17 +3,91 @@ #include "util/pmu.h" #include "util/evlist.h" #include "util/parse-events.h" +#include "util/event.h" +#include "util/pmu-hybrid.h" +#include "topdown.h" -#define TOPDOWN_L1_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}" -#define TOPDOWN_L2_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound,topdown-heavy-ops,topdown-br-mispredict,topdown-fetch-lat,topdown-mem-bound}" +static int ___evlist__add_default_attrs(struct evlist *evlist, + struct perf_event_attr *attrs, + size_t nr_attrs) +{ + struct perf_cpu_map *cpus; + struct evsel *evsel, *n; + struct perf_pmu *pmu; + LIST_HEAD(head); + size_t i = 0; + + for (i = 0; i < nr_attrs; i++) + event_attr_init(attrs + i); + + if (!perf_pmu__has_hybrid()) + return evlist__add_attrs(evlist, attrs, nr_attrs); + + for (i = 0; i < nr_attrs; i++) { + if (attrs[i].type == PERF_TYPE_SOFTWARE) { + evsel = evsel__new(attrs + i); + if (evsel == NULL) + goto out_delete_partial_list; + list_add_tail(&evsel->core.node, &head); + continue; + } -int arch_evlist__add_default_attrs(struct evlist *evlist) + perf_pmu__for_each_hybrid_pmu(pmu) { + evsel = evsel__new(attrs + i); + if (evsel == NULL) + goto out_delete_partial_list; + evsel->core.attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + cpus = perf_cpu_map__get(pmu->cpus); + evsel->core.cpus = cpus; + evsel->core.own_cpus = perf_cpu_map__get(cpus); + evsel->pmu_name = strdup(pmu->name); + list_add_tail(&evsel->core.node, &head); + } + } + + evlist__splice_list_tail(evlist, &head); + + return 0; + +out_delete_partial_list: + __evlist__for_each_entry_safe(&head, n, evsel) + evsel__delete(evsel); + return -1; +} + +int arch_evlist__add_default_attrs(struct evlist *evlist, + struct perf_event_attr *attrs, + size_t nr_attrs) { - if (!pmu_have_event("cpu", "slots")) - return 0; + if (nr_attrs) + return ___evlist__add_default_attrs(evlist, attrs, nr_attrs); + + return topdown_parse_events(evlist); +} + +struct evsel *arch_evlist__leader(struct list_head *list) +{ + struct evsel *evsel, *first, *slots = NULL; + bool has_topdown = false; + + first = list_first_entry(list, struct evsel, core.node); + + if (!topdown_sys_has_perf_metrics()) + return first; - if (pmu_have_event("cpu", "topdown-heavy-ops")) - return parse_events(evlist, TOPDOWN_L2_EVENTS, NULL); - else - return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL); + /* If there is a slots event and a topdown event then the slots event comes first. */ + __evlist__for_each_entry(list, evsel) { + if (evsel->pmu_name && !strncmp(evsel->pmu_name, "cpu", 3) && evsel->name) { + if (strcasestr(evsel->name, "slots")) { + slots = evsel; + if (slots == first) + return first; + } + if (strcasestr(evsel->name, "topdown")) + has_topdown = true; + if (slots && has_topdown) + return slots; + } + } + return first; } diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c index ac2899a25b7a..ea3972d785d1 100644 --- a/tools/perf/arch/x86/util/evsel.c +++ b/tools/perf/arch/x86/util/evsel.c @@ -3,7 +3,13 @@ #include <stdlib.h> #include "util/evsel.h" #include "util/env.h" +#include "util/pmu.h" #include "linux/string.h" +#include "evsel.h" +#include "util/debug.h" + +#define IBS_FETCH_L3MISSONLY (1ULL << 59) +#define IBS_OP_L3MISSONLY (1ULL << 16) void arch_evsel__set_sample_weight(struct evsel *evsel) { @@ -29,3 +35,101 @@ void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr) free(env.cpuid); } + +/* Check whether the evsel's PMU supports the perf metrics */ +bool evsel__sys_has_perf_metrics(const struct evsel *evsel) +{ + const char *pmu_name = evsel->pmu_name ? evsel->pmu_name : "cpu"; + + /* + * The PERF_TYPE_RAW type is the core PMU type, e.g., "cpu" PMU + * on a non-hybrid machine, "cpu_core" PMU on a hybrid machine. + * The slots event is only available for the core PMU, which + * supports the perf metrics feature. + * Checking both the PERF_TYPE_RAW type and the slots event + * should be good enough to detect the perf metrics feature. + */ + if ((evsel->core.attr.type == PERF_TYPE_RAW) && + pmu_have_event(pmu_name, "slots")) + return true; + + return false; +} + +bool arch_evsel__must_be_in_group(const struct evsel *evsel) +{ + if (!evsel__sys_has_perf_metrics(evsel)) + return false; + + return evsel->name && + (strcasestr(evsel->name, "slots") || + strcasestr(evsel->name, "topdown")); +} + +int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size) +{ + u64 event = evsel->core.attr.config & PERF_HW_EVENT_MASK; + u64 pmu = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT; + const char *event_name; + + if (event < PERF_COUNT_HW_MAX && evsel__hw_names[event]) + event_name = evsel__hw_names[event]; + else + event_name = "unknown-hardware"; + + /* The PMU type is not required for the non-hybrid platform. */ + if (!pmu) + return scnprintf(bf, size, "%s", event_name); + + return scnprintf(bf, size, "%s/%s/", + evsel->pmu_name ? evsel->pmu_name : "cpu", + event_name); +} + +static void ibs_l3miss_warn(void) +{ + pr_warning( +"WARNING: Hw internally resets sampling period when L3 Miss Filtering is enabled\n" +"and tagged operation does not cause L3 Miss. This causes sampling period skew.\n"); +} + +void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr) +{ + struct perf_pmu *evsel_pmu, *ibs_fetch_pmu, *ibs_op_pmu; + static int warned_once; + /* 0: Uninitialized, 1: Yes, -1: No */ + static int is_amd; + + if (warned_once || is_amd == -1) + return; + + if (!is_amd) { + struct perf_env *env = evsel__env(evsel); + + if (!perf_env__cpuid(env) || !env->cpuid || + !strstarts(env->cpuid, "AuthenticAMD")) { + is_amd = -1; + return; + } + is_amd = 1; + } + + evsel_pmu = evsel__find_pmu(evsel); + if (!evsel_pmu) + return; + + ibs_fetch_pmu = perf_pmu__find("ibs_fetch"); + ibs_op_pmu = perf_pmu__find("ibs_op"); + + if (ibs_fetch_pmu && ibs_fetch_pmu->type == evsel_pmu->type) { + if (attr->config & IBS_FETCH_L3MISSONLY) { + ibs_l3miss_warn(); + warned_once = 1; + } + } else if (ibs_op_pmu && ibs_op_pmu->type == evsel_pmu->type) { + if (attr->config & IBS_OP_L3MISSONLY) { + ibs_l3miss_warn(); + warned_once = 1; + } + } +} diff --git a/tools/perf/arch/x86/util/evsel.h b/tools/perf/arch/x86/util/evsel.h new file mode 100644 index 000000000000..19ad1691374d --- /dev/null +++ b/tools/perf/arch/x86/util/evsel.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _EVSEL_H +#define _EVSEL_H 1 + +bool evsel__sys_has_perf_metrics(const struct evsel *evsel); + +#endif diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index 578c8c568ffd..a51444a77a5f 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -9,18 +9,17 @@ #include "../../../util/debug.h" #include "../../../util/header.h" +#include "cpuid.h" -static inline void -cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, - unsigned int *d) +void get_cpuid_0(char *vendor, unsigned int *lvl) { - __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\t" - "movl %%ebx, %%esi\n\t.byte 0x5b" - : "=a" (*a), - "=S" (*b), - "=c" (*c), - "=d" (*d) - : "a" (op)); + unsigned int b, c, d; + + cpuid(0, 0, lvl, &b, &c, &d); + strncpy(&vendor[0], (char *)(&b), 4); + strncpy(&vendor[4], (char *)(&d), 4); + strncpy(&vendor[8], (char *)(&c), 4); + vendor[12] = '\0'; } static int @@ -31,14 +30,10 @@ __get_cpuid(char *buffer, size_t sz, const char *fmt) int nb; char vendor[16]; - cpuid(0, &lvl, &b, &c, &d); - strncpy(&vendor[0], (char *)(&b), 4); - strncpy(&vendor[4], (char *)(&d), 4); - strncpy(&vendor[8], (char *)(&c), 4); - vendor[12] = '\0'; + get_cpuid_0(vendor, &lvl); if (lvl >= 1) { - cpuid(1, &a, &b, &c, &d); + cpuid(1, 0, &a, &b, &c, &d); family = (a >> 8) & 0xf; /* bits 11 - 8 */ model = (a >> 4) & 0xf; /* Bits 7 - 4 */ diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 4a76d49d25d6..439c2956f3e7 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -110,7 +110,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, container_of(itr, struct intel_bts_recording, itr); struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu; struct evsel *evsel, *intel_bts_evsel = NULL; - const struct perf_cpu_map *cpus = evlist->core.cpus; + const struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; bool privileged = perf_event_paranoid_check(-1); if (opts->auxtrace_sample_mode) { @@ -129,6 +129,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, } evsel->core.attr.freq = 0; evsel->core.attr.sample_period = 1; + evsel->needs_auxtrace_mmap = true; intel_bts_evsel = evsel; opts->full_auxtrace = true; } @@ -232,7 +233,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, struct evsel *tracking_evsel; int err; - err = parse_events(evlist, "dummy:u", NULL); + err = parse_event(evlist, "dummy:u"); if (err) return err; diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 6df0dc00d73a..af102f471e9f 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -11,6 +11,7 @@ #include <linux/bitops.h> #include <linux/log2.h> #include <linux/zalloc.h> +#include <linux/err.h> #include <cpuid.h> #include "../../../util/session.h" @@ -306,6 +307,7 @@ intel_pt_info_priv_size(struct auxtrace_record *itr, struct evlist *evlist) ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) + intel_pt_filter_bytes(filter); + ptr->priv_size += sizeof(u64); /* Cap Event Trace */ return ptr->priv_size; } @@ -335,6 +337,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, unsigned long max_non_turbo_ratio; size_t filter_str_len; const char *filter; + int event_trace; __u64 *info; int err; @@ -357,6 +360,9 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio", "%lu", &max_non_turbo_ratio) != 1) max_non_turbo_ratio = 0; + if (perf_pmu__scan_file(intel_pt_pmu, "caps/event_trace", + "%d", &event_trace) != 1) + event_trace = 0; filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu); filter_str_len = filter ? strlen(filter) : 0; @@ -377,7 +383,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, ui__warning("Intel Processor Trace: TSC not available\n"); } - per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.cpus); + per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.user_requested_cpus); auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; @@ -407,6 +413,8 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, info += len >> 3; } + *info++ = event_trace; + return 0; } @@ -419,20 +427,14 @@ static int intel_pt_track_switches(struct evlist *evlist) if (!evlist__can_select_event(evlist, sched_switch)) return -EPERM; - err = parse_events(evlist, sched_switch, NULL); - if (err) { - pr_debug2("%s: failed to parse %s, error %d\n", + evsel = evlist__add_sched_switch(evlist, true); + if (IS_ERR(evsel)) { + err = PTR_ERR(evsel); + pr_debug2("%s: failed to create %s, error = %d\n", __func__, sched_switch, err); return err; } - evsel = evlist__last(evlist); - - evsel__set_sample_bit(evsel, CPU); - evsel__set_sample_bit(evsel, TIME); - - evsel->core.system_wide = true; - evsel->no_aux_samples = true; evsel->immediate = true; return 0; @@ -625,7 +627,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; bool have_timing_info, need_immediate = false; struct evsel *evsel, *intel_pt_evsel = NULL; - const struct perf_cpu_map *cpus = evlist->core.cpus; + const struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; bool privileged = perf_event_paranoid_check(-1); u64 tsc_bit; int err; @@ -642,6 +644,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, evsel->core.attr.freq = 0; evsel->core.attr.sample_period = 1; evsel->no_aux_samples = true; + evsel->needs_auxtrace_mmap = true; intel_pt_evsel = evsel; opts->full_auxtrace = true; } @@ -803,18 +806,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, if (!cpu_wide && perf_can_record_cpu_wide()) { struct evsel *switch_evsel; - err = parse_events(evlist, "dummy:u", NULL); - if (err) - return err; + switch_evsel = evlist__add_dummy_on_all_cpus(evlist); + if (!switch_evsel) + return -ENOMEM; - switch_evsel = evlist__last(evlist); - - switch_evsel->core.attr.freq = 0; - switch_evsel->core.attr.sample_period = 1; switch_evsel->core.attr.context_switch = 1; - - switch_evsel->core.system_wide = true; - switch_evsel->no_aux_samples = true; switch_evsel->immediate = true; evsel__set_sample_bit(switch_evsel, TID); @@ -863,20 +859,22 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, /* Add dummy event to keep tracking */ if (opts->full_auxtrace) { + bool need_system_wide_tracking; struct evsel *tracking_evsel; - err = parse_events(evlist, "dummy:u", NULL); - if (err) - return err; + /* + * User space tasks can migrate between CPUs, so when tracing + * selected CPUs, sideband for all CPUs is still needed. + */ + need_system_wide_tracking = opts->target.cpu_list && + !intel_pt_evsel->core.attr.exclude_user; - tracking_evsel = evlist__last(evlist); + tracking_evsel = evlist__add_aux_dummy(evlist, need_system_wide_tracking); + if (!tracking_evsel) + return -ENOMEM; evlist__set_tracking_event(evlist, tracking_evsel); - tracking_evsel->core.attr.freq = 0; - tracking_evsel->core.attr.sample_period = 1; - - tracking_evsel->no_aux_samples = true; if (need_immediate) tracking_evsel->immediate = true; diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c index 792cd75ade33..404de795ec0b 100644 --- a/tools/perf/arch/x86/util/iostat.c +++ b/tools/perf/arch/x86/util/iostat.c @@ -316,7 +316,7 @@ static int iostat_event_group(struct evlist *evl, sprintf(iostat_cmd, iostat_cmd_template, list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx); - ret = parse_events(evl, iostat_cmd, NULL); + ret = parse_event(evl, iostat_cmd); if (ret) goto err; } diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c index 5214370ca4e4..f683ac702247 100644 --- a/tools/perf/arch/x86/util/mem-events.c +++ b/tools/perf/arch/x86/util/mem-events.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include "util/pmu.h" +#include "util/env.h" #include "map_symbol.h" #include "mem-events.h" +#include "linux/string.h" static char mem_loads_name[100]; static bool mem_loads_name__init; @@ -12,18 +14,43 @@ static char mem_stores_name[100]; #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } -static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { +static struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX] = { E("ldlat-loads", "%s/mem-loads,ldlat=%u/P", "%s/events/mem-loads"), E("ldlat-stores", "%s/mem-stores/P", "%s/events/mem-stores"), E(NULL, NULL, NULL), }; +static struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = { + E(NULL, NULL, NULL), + E(NULL, NULL, NULL), + E("mem-ldst", "ibs_op//", "ibs_op"), +}; + +static int perf_mem_is_amd_cpu(void) +{ + struct perf_env env = { .total_mem = 0, }; + + perf_env__cpuid(&env); + if (env.cpuid && strstarts(env.cpuid, "AuthenticAMD")) + return 1; + return -1; +} + struct perf_mem_event *perf_mem_events__ptr(int i) { + /* 0: Uninitialized, 1: Yes, -1: No */ + static int is_amd; + if (i >= PERF_MEM_EVENTS__MAX) return NULL; - return &perf_mem_events[i]; + if (!is_amd) + is_amd = perf_mem_is_amd_cpu(); + + if (is_amd == 1) + return &perf_mem_events_amd[i]; + + return &perf_mem_events_intel[i]; } bool is_mem_loads_aux_event(struct evsel *leader) diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index 207c56805c55..0ed177991ad0 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -9,6 +9,8 @@ #include "../../../util/perf_regs.h" #include "../../../util/debug.h" #include "../../../util/event.h" +#include "../../../util/pmu.h" +#include "../../../util/pmu-hybrid.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG(AX, PERF_REG_X86_AX), @@ -284,12 +286,22 @@ uint64_t arch__intr_reg_mask(void) .disabled = 1, .exclude_kernel = 1, }; + struct perf_pmu *pmu; int fd; /* * In an unnamed union, init it here to build on older gcc versions */ attr.sample_period = 1; + if (perf_pmu__has_hybrid()) { + /* + * The same register set is supported among different hybrid PMUs. + * Only check the first available one. + */ + pmu = list_first_entry(&perf_pmu__hybrid_pmus, typeof(*pmu), hybrid_list); + attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + } + event_attr_init(&attr); fd = sys_perf_event_open(&attr, 0, -1, -1, 0); diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c index 2f3d96aa92a5..54810f9acd6f 100644 --- a/tools/perf/arch/x86/util/topdown.c +++ b/tools/perf/arch/x86/util/topdown.c @@ -3,6 +3,40 @@ #include "api/fs/fs.h" #include "util/pmu.h" #include "util/topdown.h" +#include "util/evlist.h" +#include "util/debug.h" +#include "util/pmu-hybrid.h" +#include "topdown.h" +#include "evsel.h" + +#define TOPDOWN_L1_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}" +#define TOPDOWN_L1_EVENTS_CORE "{slots,cpu_core/topdown-retiring/,cpu_core/topdown-bad-spec/,cpu_core/topdown-fe-bound/,cpu_core/topdown-be-bound/}" +#define TOPDOWN_L2_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound,topdown-heavy-ops,topdown-br-mispredict,topdown-fetch-lat,topdown-mem-bound}" +#define TOPDOWN_L2_EVENTS_CORE "{slots,cpu_core/topdown-retiring/,cpu_core/topdown-bad-spec/,cpu_core/topdown-fe-bound/,cpu_core/topdown-be-bound/,cpu_core/topdown-heavy-ops/,cpu_core/topdown-br-mispredict/,cpu_core/topdown-fetch-lat/,cpu_core/topdown-mem-bound/}" + +/* Check whether there is a PMU which supports the perf metrics. */ +bool topdown_sys_has_perf_metrics(void) +{ + static bool has_perf_metrics; + static bool cached; + struct perf_pmu *pmu; + + if (cached) + return has_perf_metrics; + + /* + * The perf metrics feature is a core PMU feature. + * The PERF_TYPE_RAW type is the type of a core PMU. + * The slots event is only available when the core PMU + * supports the perf metrics feature. + */ + pmu = perf_pmu__find_by_type(PERF_TYPE_RAW); + if (pmu && pmu_have_event(pmu->name, "slots")) + has_perf_metrics = true; + + cached = true; + return has_perf_metrics; +} /* * Check whether we can use a group for top down. @@ -30,34 +64,63 @@ void arch_topdown_group_warn(void) #define TOPDOWN_SLOTS 0x0400 -static bool is_topdown_slots_event(struct evsel *counter) -{ - if (!counter->pmu_name) - return false; - - if (strcmp(counter->pmu_name, "cpu")) - return false; - - if (counter->core.attr.config == TOPDOWN_SLOTS) - return true; - - return false; -} - /* * Check whether a topdown group supports sample-read. * - * Only Topdown metic supports sample-read. The slots + * Only Topdown metric supports sample-read. The slots * event must be the leader of the topdown group. */ bool arch_topdown_sample_read(struct evsel *leader) { - if (!pmu_have_event("cpu", "slots")) + if (!evsel__sys_has_perf_metrics(leader)) return false; - if (is_topdown_slots_event(leader)) + if (leader->core.attr.config == TOPDOWN_SLOTS) return true; return false; } + +const char *arch_get_topdown_pmu_name(struct evlist *evlist, bool warn) +{ + const char *pmu_name; + + if (!perf_pmu__has_hybrid()) + return "cpu"; + + if (!evlist->hybrid_pmu_name) { + if (warn) + pr_warning("WARNING: default to use cpu_core topdown events\n"); + evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu("core"); + } + + pmu_name = evlist->hybrid_pmu_name; + + return pmu_name; +} + +int topdown_parse_events(struct evlist *evlist) +{ + const char *topdown_events; + const char *pmu_name; + + if (!topdown_sys_has_perf_metrics()) + return 0; + + pmu_name = arch_get_topdown_pmu_name(evlist, false); + + if (pmu_have_event(pmu_name, "topdown-heavy-ops")) { + if (!strcmp(pmu_name, "cpu_core")) + topdown_events = TOPDOWN_L2_EVENTS_CORE; + else + topdown_events = TOPDOWN_L2_EVENTS; + } else { + if (!strcmp(pmu_name, "cpu_core")) + topdown_events = TOPDOWN_L1_EVENTS_CORE; + else + topdown_events = TOPDOWN_L1_EVENTS; + } + + return parse_event(evlist, topdown_events); +} diff --git a/tools/perf/arch/x86/util/topdown.h b/tools/perf/arch/x86/util/topdown.h new file mode 100644 index 000000000000..7eb81f042838 --- /dev/null +++ b/tools/perf/arch/x86/util/topdown.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOPDOWN_H +#define _TOPDOWN_H 1 + +bool topdown_sys_has_perf_metrics(void); +int topdown_parse_events(struct evlist *evlist); + +#endif diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index 559365f8fe52..eb2b5195bd02 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -1,7 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> +#include <math.h> +#include <string.h> +#include "../../../util/debug.h" #include "../../../util/tsc.h" +#include "cpuid.h" u64 rdtsc(void) { @@ -11,3 +15,76 @@ u64 rdtsc(void) return low | ((u64)high) << 32; } + +/* + * Derive the TSC frequency in Hz from the /proc/cpuinfo, for example: + * ... + * model name : Intel(R) Xeon(R) Gold 6154 CPU @ 3.00GHz + * ... + * will return 3000000000. + */ +static double cpuinfo_tsc_freq(void) +{ + double result = 0; + FILE *cpuinfo; + char *line = NULL; + size_t len = 0; + + cpuinfo = fopen("/proc/cpuinfo", "r"); + if (!cpuinfo) { + pr_err("Failed to read /proc/cpuinfo for TSC frequency"); + return NAN; + } + while (getline(&line, &len, cpuinfo) > 0) { + if (!strncmp(line, "model name", 10)) { + char *pos = strstr(line + 11, " @ "); + + if (pos && sscanf(pos, " @ %lfGHz", &result) == 1) { + result *= 1000000000; + goto out; + } + } + } +out: + if (fpclassify(result) == FP_ZERO) + pr_err("Failed to find TSC frequency in /proc/cpuinfo"); + + free(line); + fclose(cpuinfo); + return result; +} + +double arch_get_tsc_freq(void) +{ + unsigned int a, b, c, d, lvl; + static bool cached; + static double tsc; + char vendor[16]; + + if (cached) + return tsc; + + cached = true; + get_cpuid_0(vendor, &lvl); + if (!strstr(vendor, "Intel")) + return 0; + + /* + * Don't support Time Stamp Counter and + * Nominal Core Crystal Clock Information Leaf. + */ + if (lvl < 0x15) { + tsc = cpuinfo_tsc_freq(); + return tsc; + } + + cpuid(0x15, 0, &a, &b, &c, &d); + /* TSC frequency is not enumerated */ + if (!a || !b || !c) { + tsc = cpuinfo_tsc_freq(); + return tsc; + } + + tsc = (double)c * (double)b / (double)a; + return tsc; +} |