diff options
Diffstat (limited to 'tools/perf/arch/arm64/util')
-rw-r--r-- | tools/perf/arch/arm64/util/Build | 5 | ||||
-rw-r--r-- | tools/perf/arch/arm64/util/arm-spe.c | 375 | ||||
-rw-r--r-- | tools/perf/arch/arm64/util/arm64_exception_types.h | 92 | ||||
-rw-r--r-- | tools/perf/arch/arm64/util/header.c | 73 | ||||
-rw-r--r-- | tools/perf/arch/arm64/util/hisi-ptt.c | 188 | ||||
-rw-r--r-- | tools/perf/arch/arm64/util/kvm-stat.c | 84 | ||||
-rw-r--r-- | tools/perf/arch/arm64/util/machine.c | 27 | ||||
-rw-r--r-- | tools/perf/arch/arm64/util/mem-events.c | 12 | ||||
-rw-r--r-- | tools/perf/arch/arm64/util/mem-events.h | 7 | ||||
-rw-r--r-- | tools/perf/arch/arm64/util/perf_regs.c | 178 | ||||
-rw-r--r-- | tools/perf/arch/arm64/util/pmu.c | 45 | ||||
-rw-r--r-- | tools/perf/arch/arm64/util/tsc.c | 21 | ||||
-rw-r--r-- | tools/perf/arch/arm64/util/unwind-libdw.c | 3 | ||||
-rw-r--r-- | tools/perf/arch/arm64/util/unwind-libunwind.c | 77 |
14 files changed, 1040 insertions, 147 deletions
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 5c13438c7bd4..78ef7115be3d 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,6 +1,9 @@ perf-y += header.o perf-y += machine.o perf-y += perf_regs.o +perf-y += tsc.o +perf-y += pmu.o +perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o @@ -8,4 +11,4 @@ perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ ../../arm/util/auxtrace.o \ ../../arm/util/cs-etm.o \ - arm-spe.o + arm-spe.o mem-events.o hisi-ptt.o diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index e3593063b3d1..51ccbfd3d246 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -14,6 +14,7 @@ #include "../../../util/cpumap.h" #include "../../../util/event.h" #include "../../../util/evsel.h" +#include "../../../util/evsel_config.h" #include "../../../util/evlist.h" #include "../../../util/session.h" #include <internal/lib.h> // page_size @@ -22,6 +23,7 @@ #include "../../../util/auxtrace.h" #include "../../../util/record.h" #include "../../../util/arm-spe.h" +#include <tools/libc_compat.h> // reallocarray #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) @@ -30,6 +32,8 @@ struct arm_spe_recording { struct auxtrace_record itr; struct perf_pmu *arm_spe_pmu; struct evlist *evlist; + int wrapped_cnt; + bool *wrapped; }; static size_t @@ -60,6 +64,74 @@ static int arm_spe_info_fill(struct auxtrace_record *itr, return 0; } +static void +arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts *opts, + bool privileged) +{ + /* + * The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size nor + * snapshot size is specified, then the default is 4MiB for privileged users, 128KiB for + * unprivileged users. + * + * The default auxtrace mmap size is 4MiB/page_size for privileged users, 128KiB for + * unprivileged users. If an unprivileged user does not specify mmap pages, the mmap pages + * will be reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the + * user is likely to get an error as they exceed their mlock limmit. + */ + + /* + * No size were given to '-S' or '-m,', so go with the default + */ + if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } else if (!opts->auxtrace_mmap_pages && !privileged && opts->mmap_pages == UINT_MAX) { + opts->mmap_pages = KiB(256) / page_size; + } + + /* + * '-m,xyz' was specified but no snapshot size, so make the snapshot size as big as the + * auxtrace mmap area. + */ + if (!opts->auxtrace_snapshot_size) + opts->auxtrace_snapshot_size = opts->auxtrace_mmap_pages * (size_t)page_size; + + /* + * '-Sxyz' was specified but no auxtrace mmap area, so make the auxtrace mmap area big + * enough to fit the requested snapshot size. + */ + if (!opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_snapshot_size; + + sz = round_up(sz, page_size) / page_size; + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); + } +} + +static __u64 arm_spe_pmu__sample_period(const struct perf_pmu *arm_spe_pmu) +{ + static __u64 sample_period; + + if (sample_period) + return sample_period; + + /* + * If kernel driver doesn't advertise a minimum, + * use max allowable by PMSIDR_EL1.INTERVAL + */ + if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu", + &sample_period) != 1) { + pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n"); + sample_period = 4096; + } + return sample_period; +} + static int arm_spe_recording_options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts) @@ -68,9 +140,11 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, container_of(itr, struct arm_spe_recording, itr); struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; struct evsel *evsel, *arm_spe_evsel = NULL; + struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; bool privileged = perf_event_paranoid_check(-1); struct evsel *tracking_evsel; int err; + u64 bit; sper->evlist = evlist; @@ -81,7 +155,8 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, return -EINVAL; } evsel->core.attr.freq = 0; - evsel->core.attr.sample_period = 1; + evsel->core.attr.sample_period = arm_spe_pmu__sample_period(arm_spe_pmu); + evsel->needs_auxtrace_mmap = true; arm_spe_evsel = evsel; opts->full_auxtrace = true; } @@ -90,8 +165,38 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, if (!opts->full_auxtrace) return 0; + /* + * we are in snapshot mode. + */ + if (opts->auxtrace_snapshot_mode) { + /* + * Command arguments '-Sxyz' and/or '-m,xyz' are missing, so fill those in with + * default values. + */ + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) + arm_spe_snapshot_resolve_auxtrace_defaults(opts, privileged); + + /* + * Snapshot size can't be bigger than the auxtrace area. + */ + if (opts->auxtrace_snapshot_size > opts->auxtrace_mmap_pages * (size_t)page_size) { + pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", + opts->auxtrace_snapshot_size, + opts->auxtrace_mmap_pages * (size_t)page_size); + return -EINVAL; + } + + /* + * Something went wrong somewhere - this shouldn't happen. + */ + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { + pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); + return -EINVAL; + } + } + /* We are in full trace mode but '-m,xyz' wasn't specified */ - if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { + if (!opts->auxtrace_mmap_pages) { if (privileged) { opts->auxtrace_mmap_pages = MiB(4) / page_size; } else { @@ -113,30 +218,246 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, } } + if (opts->auxtrace_snapshot_mode) + pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME, + opts->auxtrace_snapshot_size); /* * To obtain the auxtrace buffer file descriptor, the auxtrace event * must come first. */ - perf_evlist__to_front(evlist, arm_spe_evsel); + evlist__to_front(evlist, arm_spe_evsel); + + /* + * In the case of per-cpu mmaps, sample CPU for AUX event; + * also enable the timestamp tracing for samples correlation. + */ + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { + evsel__set_sample_bit(arm_spe_evsel, CPU); + evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel, + "ts_enable", 1); + } + + /* + * Set this only so that perf report knows that SPE generates memory info. It has no effect + * on the opening of the event or the SPE data produced. + */ + evsel__set_sample_bit(arm_spe_evsel, DATA_SRC); - evsel__set_sample_bit(arm_spe_evsel, CPU); - evsel__set_sample_bit(arm_spe_evsel, TIME); - evsel__set_sample_bit(arm_spe_evsel, TID); + /* + * The PHYS_ADDR flag does not affect the driver behaviour, it is used to + * inform that the resulting output's SPE samples contain physical addresses + * where applicable. + */ + bit = perf_pmu__format_bits(arm_spe_pmu, "pa_enable"); + if (arm_spe_evsel->core.attr.config & bit) + evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR); /* Add dummy event to keep tracking */ - err = parse_events(evlist, "dummy:u", NULL); + err = parse_event(evlist, "dummy:u"); if (err) return err; tracking_evsel = evlist__last(evlist); - perf_evlist__set_tracking_event(evlist, tracking_evsel); + evlist__set_tracking_event(evlist, tracking_evsel); tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; - evsel__set_sample_bit(tracking_evsel, TIME); - evsel__set_sample_bit(tracking_evsel, CPU); - evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); + + /* In per-cpu case, always need the time of mmap events etc */ + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { + evsel__set_sample_bit(tracking_evsel, TIME); + evsel__set_sample_bit(tracking_evsel, CPU); + + /* also track task context switch */ + if (!record_opts__no_switch_events(opts)) + tracking_evsel->core.attr.context_switch = 1; + } + + return 0; +} + +static int arm_spe_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused, + struct record_opts *opts, + const char *str) +{ + unsigned long long snapshot_size = 0; + char *endptr; + + if (str) { + snapshot_size = strtoull(str, &endptr, 0); + if (*endptr || snapshot_size > SIZE_MAX) + return -1; + } + + opts->auxtrace_snapshot_mode = true; + opts->auxtrace_snapshot_size = snapshot_size; + + return 0; +} + +static int arm_spe_snapshot_start(struct auxtrace_record *itr) +{ + struct arm_spe_recording *ptr = + container_of(itr, struct arm_spe_recording, itr); + struct evsel *evsel; + + evlist__for_each_entry(ptr->evlist, evsel) { + if (evsel->core.attr.type == ptr->arm_spe_pmu->type) + return evsel__disable(evsel); + } + return -EINVAL; +} + +static int arm_spe_snapshot_finish(struct auxtrace_record *itr) +{ + struct arm_spe_recording *ptr = + container_of(itr, struct arm_spe_recording, itr); + struct evsel *evsel; + + evlist__for_each_entry(ptr->evlist, evsel) { + if (evsel->core.attr.type == ptr->arm_spe_pmu->type) + return evsel__enable(evsel); + } + return -EINVAL; +} + +static int arm_spe_alloc_wrapped_array(struct arm_spe_recording *ptr, int idx) +{ + bool *wrapped; + int cnt = ptr->wrapped_cnt, new_cnt, i; + + /* + * No need to allocate, so return early. + */ + if (idx < cnt) + return 0; + + /* + * Make ptr->wrapped as big as idx. + */ + new_cnt = idx + 1; + + /* + * Free'ed in arm_spe_recording_free(). + */ + wrapped = reallocarray(ptr->wrapped, new_cnt, sizeof(bool)); + if (!wrapped) + return -ENOMEM; + + /* + * init new allocated values. + */ + for (i = cnt; i < new_cnt; i++) + wrapped[i] = false; + + ptr->wrapped_cnt = new_cnt; + ptr->wrapped = wrapped; + + return 0; +} + +static bool arm_spe_buffer_has_wrapped(unsigned char *buffer, + size_t buffer_size, u64 head) +{ + u64 i, watermark; + u64 *buf = (u64 *)buffer; + size_t buf_size = buffer_size; + + /* + * Defensively handle the case where head might be continually increasing - if its value is + * equal or greater than the size of the ring buffer, then we can safely determine it has + * wrapped around. Otherwise, continue to detect if head might have wrapped. + */ + if (head >= buffer_size) + return true; + + /* + * We want to look the very last 512 byte (chosen arbitrarily) in the ring buffer. + */ + watermark = buf_size - 512; + + /* + * The value of head is somewhere within the size of the ring buffer. This can be that there + * hasn't been enough data to fill the ring buffer yet or the trace time was so long that + * head has numerically wrapped around. To find we need to check if we have data at the + * very end of the ring buffer. We can reliably do this because mmap'ed pages are zeroed + * out and there is a fresh mapping with every new session. + */ + + /* + * head is less than 512 byte from the end of the ring buffer. + */ + if (head > watermark) + watermark = head; + + /* + * Speed things up by using 64 bit transactions (see "u64 *buf" above) + */ + watermark /= sizeof(u64); + buf_size /= sizeof(u64); + + /* + * If we find trace data at the end of the ring buffer, head has been there and has + * numerically wrapped around at least once. + */ + for (i = watermark; i < buf_size; i++) + if (buf[i]) + return true; + + return false; +} + +static int arm_spe_find_snapshot(struct auxtrace_record *itr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 *head, u64 *old) +{ + int err; + bool wrapped; + struct arm_spe_recording *ptr = + container_of(itr, struct arm_spe_recording, itr); + + /* + * Allocate memory to keep track of wrapping if this is the first + * time we deal with this *mm. + */ + if (idx >= ptr->wrapped_cnt) { + err = arm_spe_alloc_wrapped_array(ptr, idx); + if (err) + return err; + } + + /* + * Check to see if *head has wrapped around. If it hasn't only the + * amount of data between *head and *old is snapshot'ed to avoid + * bloating the perf.data file with zeros. But as soon as *head has + * wrapped around the entire size of the AUX ring buffer it taken. + */ + wrapped = ptr->wrapped[idx]; + if (!wrapped && arm_spe_buffer_has_wrapped(data, mm->len, *head)) { + wrapped = true; + ptr->wrapped[idx] = true; + } + + pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n", + __func__, idx, (size_t)*old, (size_t)*head, mm->len); + + /* + * No wrap has occurred, we can just use *head and *old. + */ + if (!wrapped) + return 0; + + /* + * *head has wrapped around - adjust *head and *old to pickup the + * entire content of the AUX buffer. + */ + if (*head >= mm->len) { + *old = *head - mm->len; + } else { + *head += mm->len; + *old = *head - mm->len; + } return 0; } @@ -155,6 +476,7 @@ static void arm_spe_recording_free(struct auxtrace_record *itr) struct arm_spe_recording *sper = container_of(itr, struct arm_spe_recording, itr); + zfree(&sper->wrapped); free(sper); } @@ -176,6 +498,10 @@ struct auxtrace_record *arm_spe_recording_init(int *err, sper->arm_spe_pmu = arm_spe_pmu; sper->itr.pmu = arm_spe_pmu; + sper->itr.snapshot_start = arm_spe_snapshot_start; + sper->itr.snapshot_finish = arm_spe_snapshot_finish; + sper->itr.find_snapshot = arm_spe_find_snapshot; + sper->itr.parse_snapshot_options = arm_spe_parse_snapshot_options; sper->itr.recording_options = arm_spe_recording_options; sper->itr.info_priv_size = arm_spe_info_priv_size; sper->itr.info_fill = arm_spe_info_fill; @@ -188,29 +514,8 @@ struct auxtrace_record *arm_spe_recording_init(int *err, return &sper->itr; } -struct perf_event_attr -*arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu) +void +arm_spe_pmu_default_config(const struct perf_pmu *arm_spe_pmu, struct perf_event_attr *attr) { - struct perf_event_attr *attr; - - attr = zalloc(sizeof(struct perf_event_attr)); - if (!attr) { - pr_err("arm_spe default config cannot allocate a perf_event_attr\n"); - return NULL; - } - - /* - * If kernel driver doesn't advertise a minimum, - * use max allowable by PMSIDR_EL1.INTERVAL - */ - if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu", - &attr->sample_period) != 1) { - pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n"); - attr->sample_period = 4096; - } - - arm_spe_pmu->selectable = true; - arm_spe_pmu->is_uncore = false; - - return attr; + attr->sample_period = arm_spe_pmu__sample_period(arm_spe_pmu); } diff --git a/tools/perf/arch/arm64/util/arm64_exception_types.h b/tools/perf/arch/arm64/util/arm64_exception_types.h new file mode 100644 index 000000000000..27c981ebe401 --- /dev/null +++ b/tools/perf/arch/arm64/util/arm64_exception_types.h @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef ARCH_PERF_ARM64_EXCEPTION_TYPES_H +#define ARCH_PERF_ARM64_EXCEPTION_TYPES_H + +/* Per asm/virt.h */ +#define HVC_STUB_ERR 0xbadca11 + +/* Per asm/kvm_asm.h */ +#define ARM_EXCEPTION_IRQ 0 +#define ARM_EXCEPTION_EL1_SERROR 1 +#define ARM_EXCEPTION_TRAP 2 +#define ARM_EXCEPTION_IL 3 +/* The hyp-stub will return this for any kvm_call_hyp() call */ +#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR + +#define kvm_arm_exception_type \ + {ARM_EXCEPTION_IRQ, "IRQ" }, \ + {ARM_EXCEPTION_EL1_SERROR, "SERROR" }, \ + {ARM_EXCEPTION_TRAP, "TRAP" }, \ + {ARM_EXCEPTION_IL, "ILLEGAL" }, \ + {ARM_EXCEPTION_HYP_GONE, "HYP_GONE" } + +/* Per asm/esr.h */ +#define ESR_ELx_EC_UNKNOWN (0x00) +#define ESR_ELx_EC_WFx (0x01) +/* Unallocated EC: 0x02 */ +#define ESR_ELx_EC_CP15_32 (0x03) +#define ESR_ELx_EC_CP15_64 (0x04) +#define ESR_ELx_EC_CP14_MR (0x05) +#define ESR_ELx_EC_CP14_LS (0x06) +#define ESR_ELx_EC_FP_ASIMD (0x07) +#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */ +#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */ +/* Unallocated EC: 0x0A - 0x0B */ +#define ESR_ELx_EC_CP14_64 (0x0C) +/* Unallocated EC: 0x0d */ +#define ESR_ELx_EC_ILL (0x0E) +/* Unallocated EC: 0x0F - 0x10 */ +#define ESR_ELx_EC_SVC32 (0x11) +#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */ +#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */ +/* Unallocated EC: 0x14 */ +#define ESR_ELx_EC_SVC64 (0x15) +#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */ +#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */ +#define ESR_ELx_EC_SYS64 (0x18) +#define ESR_ELx_EC_SVE (0x19) +#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ +/* Unallocated EC: 0x1b - 0x1E */ +#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ +#define ESR_ELx_EC_IABT_LOW (0x20) +#define ESR_ELx_EC_IABT_CUR (0x21) +#define ESR_ELx_EC_PC_ALIGN (0x22) +/* Unallocated EC: 0x23 */ +#define ESR_ELx_EC_DABT_LOW (0x24) +#define ESR_ELx_EC_DABT_CUR (0x25) +#define ESR_ELx_EC_SP_ALIGN (0x26) +/* Unallocated EC: 0x27 */ +#define ESR_ELx_EC_FP_EXC32 (0x28) +/* Unallocated EC: 0x29 - 0x2B */ +#define ESR_ELx_EC_FP_EXC64 (0x2C) +/* Unallocated EC: 0x2D - 0x2E */ +#define ESR_ELx_EC_SERROR (0x2F) +#define ESR_ELx_EC_BREAKPT_LOW (0x30) +#define ESR_ELx_EC_BREAKPT_CUR (0x31) +#define ESR_ELx_EC_SOFTSTP_LOW (0x32) +#define ESR_ELx_EC_SOFTSTP_CUR (0x33) +#define ESR_ELx_EC_WATCHPT_LOW (0x34) +#define ESR_ELx_EC_WATCHPT_CUR (0x35) +/* Unallocated EC: 0x36 - 0x37 */ +#define ESR_ELx_EC_BKPT32 (0x38) +/* Unallocated EC: 0x39 */ +#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */ +/* Unallocated EC: 0x3B */ +#define ESR_ELx_EC_BRK64 (0x3C) +/* Unallocated EC: 0x3D - 0x3F */ +#define ESR_ELx_EC_MAX (0x3F) + +#define ECN(x) { ESR_ELx_EC_##x, #x } + +#define kvm_arm_exception_class \ + ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \ + ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(PAC), ECN(CP14_64), \ + ECN(SVC64), ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(SVE), \ + ECN(IMP_DEF), ECN(IABT_LOW), ECN(IABT_CUR), \ + ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \ + ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \ + ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \ + ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ + ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) + +#endif /* ARCH_PERF_ARM64_EXCEPTION_TYPES_H */ diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c index d730666ab95d..97037499152e 100644 --- a/tools/perf/arch/arm64/util/header.c +++ b/tools/perf/arch/arm64/util/header.c @@ -1,3 +1,6 @@ +#include <linux/kernel.h> +#include <linux/bits.h> +#include <linux/bitfield.h> #include <stdio.h> #include <stdlib.h> #include <perf/cpumap.h> @@ -10,15 +13,14 @@ #define MIDR "/regs/identification/midr_el1" #define MIDR_SIZE 19 -#define MIDR_REVISION_MASK 0xf -#define MIDR_VARIANT_SHIFT 20 -#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT) +#define MIDR_REVISION_MASK GENMASK(3, 0) +#define MIDR_VARIANT_MASK GENMASK(23, 20) static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus) { const char *sysfs = sysfs__mountpoint(); - u64 midr = 0; int cpu; + int ret = EINVAL; if (!sysfs || sz < MIDR_SIZE) return EINVAL; @@ -29,8 +31,8 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus) char path[PATH_MAX]; FILE *file; - scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR, - sysfs, cpus->map[cpu]); + scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d" MIDR, + sysfs, RC_CHK_ACCESS(cpus)->map[cpu].cpu); file = fopen(path, "r"); if (!file) { @@ -44,27 +46,18 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus) } fclose(file); - /* Ignore/clear Variant[23:20] and - * Revision[3:0] of MIDR - */ - midr = strtoul(buf, NULL, 16); - midr &= (~(MIDR_VARIANT_MASK | MIDR_REVISION_MASK)); - scnprintf(buf, MIDR_SIZE, "0x%016lx", midr); /* got midr break loop */ + ret = 0; break; } perf_cpu_map__put(cpus); - - if (!midr) - return EINVAL; - - return 0; + return ret; } int get_cpuid(char *buf, size_t sz) { - struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); + struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus(); int ret; if (!cpus) @@ -99,3 +92,47 @@ char *get_cpuid_str(struct perf_pmu *pmu) return buf; } + +/* + * Return 0 if idstr is a higher or equal to version of the same part as + * mapcpuid. Therefore, if mapcpuid has 0 for revision and variant then any + * version of idstr will match as long as it's the same CPU type. + * + * Return 1 if the CPU type is different or the version of idstr is lower. + */ +int strcmp_cpuid_str(const char *mapcpuid, const char *idstr) +{ + u64 map_id = strtoull(mapcpuid, NULL, 16); + char map_id_variant = FIELD_GET(MIDR_VARIANT_MASK, map_id); + char map_id_revision = FIELD_GET(MIDR_REVISION_MASK, map_id); + u64 id = strtoull(idstr, NULL, 16); + char id_variant = FIELD_GET(MIDR_VARIANT_MASK, id); + char id_revision = FIELD_GET(MIDR_REVISION_MASK, id); + u64 id_fields = ~(MIDR_VARIANT_MASK | MIDR_REVISION_MASK); + + /* Compare without version first */ + if ((map_id & id_fields) != (id & id_fields)) + return 1; + + /* + * ID matches, now compare version. + * + * Arm revisions (like r0p0) are compared here like two digit semver + * values eg. 1.3 < 2.0 < 2.1 < 2.2. + * + * r = high value = 'Variant' field in MIDR + * p = low value = 'Revision' field in MIDR + * + */ + if (id_variant > map_id_variant) + return 0; + + if (id_variant == map_id_variant && id_revision >= map_id_revision) + return 0; + + /* + * variant is less than mapfile variant or variants are the same but + * the revision doesn't match. Return no match. + */ + return 1; +} diff --git a/tools/perf/arch/arm64/util/hisi-ptt.c b/tools/perf/arch/arm64/util/hisi-ptt.c new file mode 100644 index 000000000000..ba97c8a562a0 --- /dev/null +++ b/tools/perf/arch/arm64/util/hisi-ptt.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * HiSilicon PCIe Trace and Tuning (PTT) support + * Copyright (c) 2022 HiSilicon Technologies Co., Ltd. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/log2.h> +#include <linux/zalloc.h> +#include <time.h> + +#include <internal/lib.h> // page_size +#include "../../../util/auxtrace.h" +#include "../../../util/cpumap.h" +#include "../../../util/debug.h" +#include "../../../util/event.h" +#include "../../../util/evlist.h" +#include "../../../util/evsel.h" +#include "../../../util/hisi-ptt.h" +#include "../../../util/pmu.h" +#include "../../../util/record.h" +#include "../../../util/session.h" +#include "../../../util/tsc.h" + +#define KiB(x) ((x) * 1024) +#define MiB(x) ((x) * 1024 * 1024) + +struct hisi_ptt_recording { + struct auxtrace_record itr; + struct perf_pmu *hisi_ptt_pmu; + struct evlist *evlist; +}; + +static size_t +hisi_ptt_info_priv_size(struct auxtrace_record *itr __maybe_unused, + struct evlist *evlist __maybe_unused) +{ + return HISI_PTT_AUXTRACE_PRIV_SIZE; +} + +static int hisi_ptt_info_fill(struct auxtrace_record *itr, + struct perf_session *session, + struct perf_record_auxtrace_info *auxtrace_info, + size_t priv_size) +{ + struct hisi_ptt_recording *pttr = + container_of(itr, struct hisi_ptt_recording, itr); + struct perf_pmu *hisi_ptt_pmu = pttr->hisi_ptt_pmu; + + if (priv_size != HISI_PTT_AUXTRACE_PRIV_SIZE) + return -EINVAL; + + if (!session->evlist->core.nr_mmaps) + return -EINVAL; + + auxtrace_info->type = PERF_AUXTRACE_HISI_PTT; + auxtrace_info->priv[0] = hisi_ptt_pmu->type; + + return 0; +} + +static int hisi_ptt_set_auxtrace_mmap_page(struct record_opts *opts) +{ + bool privileged = perf_event_paranoid_check(-1); + + if (!opts->full_auxtrace) + return 0; + + if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(16) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } + + /* Validate auxtrace_mmap_pages */ + if (opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; + size_t min_sz = KiB(8); + + if (sz < min_sz || !is_power_of_2(sz)) { + pr_err("Invalid mmap size for HISI PTT: must be at least %zuKiB and a power of 2\n", + min_sz / 1024); + return -EINVAL; + } + } + + return 0; +} + +static int hisi_ptt_recording_options(struct auxtrace_record *itr, + struct evlist *evlist, + struct record_opts *opts) +{ + struct hisi_ptt_recording *pttr = + container_of(itr, struct hisi_ptt_recording, itr); + struct perf_pmu *hisi_ptt_pmu = pttr->hisi_ptt_pmu; + struct evsel *evsel, *hisi_ptt_evsel = NULL; + struct evsel *tracking_evsel; + int err; + + pttr->evlist = evlist; + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.type == hisi_ptt_pmu->type) { + if (hisi_ptt_evsel) { + pr_err("There may be only one " HISI_PTT_PMU_NAME "x event\n"); + return -EINVAL; + } + evsel->core.attr.freq = 0; + evsel->core.attr.sample_period = 1; + evsel->needs_auxtrace_mmap = true; + hisi_ptt_evsel = evsel; + opts->full_auxtrace = true; + } + } + + err = hisi_ptt_set_auxtrace_mmap_page(opts); + if (err) + return err; + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace event + * must come first. + */ + evlist__to_front(evlist, hisi_ptt_evsel); + evsel__set_sample_bit(hisi_ptt_evsel, TIME); + + /* Add dummy event to keep tracking */ + err = parse_event(evlist, "dummy:u"); + if (err) + return err; + + tracking_evsel = evlist__last(evlist); + evlist__set_tracking_event(evlist, tracking_evsel); + + tracking_evsel->core.attr.freq = 0; + tracking_evsel->core.attr.sample_period = 1; + evsel__set_sample_bit(tracking_evsel, TIME); + + return 0; +} + +static u64 hisi_ptt_reference(struct auxtrace_record *itr __maybe_unused) +{ + return rdtsc(); +} + +static void hisi_ptt_recording_free(struct auxtrace_record *itr) +{ + struct hisi_ptt_recording *pttr = + container_of(itr, struct hisi_ptt_recording, itr); + + free(pttr); +} + +struct auxtrace_record *hisi_ptt_recording_init(int *err, + struct perf_pmu *hisi_ptt_pmu) +{ + struct hisi_ptt_recording *pttr; + + if (!hisi_ptt_pmu) { + *err = -ENODEV; + return NULL; + } + + pttr = zalloc(sizeof(*pttr)); + if (!pttr) { + *err = -ENOMEM; + return NULL; + } + + pttr->hisi_ptt_pmu = hisi_ptt_pmu; + pttr->itr.pmu = hisi_ptt_pmu; + pttr->itr.recording_options = hisi_ptt_recording_options; + pttr->itr.info_priv_size = hisi_ptt_info_priv_size; + pttr->itr.info_fill = hisi_ptt_info_fill; + pttr->itr.free = hisi_ptt_recording_free; + pttr->itr.reference = hisi_ptt_reference; + pttr->itr.read_finish = auxtrace_record__read_finish; + pttr->itr.alignment = 0; + + *err = 0; + return &pttr->itr; +} diff --git a/tools/perf/arch/arm64/util/kvm-stat.c b/tools/perf/arch/arm64/util/kvm-stat.c new file mode 100644 index 000000000000..6611aa21cba9 --- /dev/null +++ b/tools/perf/arch/arm64/util/kvm-stat.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <memory.h> +#include "../../../util/evsel.h" +#include "../../../util/kvm-stat.h" +#include "arm64_exception_types.h" +#include "debug.h" + +define_exit_reasons_table(arm64_exit_reasons, kvm_arm_exception_type); +define_exit_reasons_table(arm64_trap_exit_reasons, kvm_arm_exception_class); + +const char *kvm_trap_exit_reason = "esr_ec"; +const char *vcpu_id_str = "id"; +const char *kvm_exit_reason = "ret"; +const char *kvm_entry_trace = "kvm:kvm_entry"; +const char *kvm_exit_trace = "kvm:kvm_exit"; + +const char *kvm_events_tp[] = { + "kvm:kvm_entry", + "kvm:kvm_exit", + NULL, +}; + +static void event_get_key(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->info = 0; + key->key = evsel__intval(evsel, sample, kvm_exit_reason); + key->exit_reasons = arm64_exit_reasons; + + /* + * TRAP exceptions carry exception class info in esr_ec field + * and, hence, we need to use a different exit_reasons table to + * properly decode event's est_ec. + */ + if (key->key == ARM_EXCEPTION_TRAP) { + key->key = evsel__intval(evsel, sample, kvm_trap_exit_reason); + key->exit_reasons = arm64_trap_exit_reasons; + } +} + +static bool event_begin(struct evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return evsel__name_is(evsel, kvm_entry_trace); +} + +static bool event_end(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + if (evsel__name_is(evsel, kvm_exit_trace)) { + event_get_key(evsel, sample, key); + return true; + } + return false; +} + +static struct kvm_events_ops exit_events = { + .is_begin_event = event_begin, + .is_end_event = event_end, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + +struct kvm_reg_events_ops kvm_reg_events_ops[] = { + { + .name = "vmexit", + .ops = &exit_events, + }, + { NULL, NULL }, +}; + +const char * const kvm_skip_events[] = { + NULL, +}; + +int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) +{ + kvm->exit_reasons_isa = "arm64"; + return 0; +} diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c index d41b27e781d3..aab1cc2bc283 100644 --- a/tools/perf/arch/arm64/util/machine.c +++ b/tools/perf/arch/arm64/util/machine.c @@ -1,27 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 +#include <inttypes.h> #include <stdio.h> #include <string.h> #include "debug.h" #include "symbol.h" +#include "callchain.h" +#include "perf_regs.h" +#include "record.h" +#include "util/perf_regs.h" -/* On arm64, kernel text segment start at high memory address, - * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory - * address, like 0xffff 0000 00ax xxxx. When only samll amount of - * memory is used by modules, gap between end of module's text segment - * and start of kernel text segment may be reach 2G. - * Therefore do not fill this gap and do not assign it to the kernel dso map. - */ - -#define SYMBOL_LIMIT (1 << 12) /* 4K */ - -void arch__symbols__fixup_end(struct symbol *p, struct symbol *c) +void arch__add_leaf_frame_record_opts(struct record_opts *opts) { - if ((strchr(p->name, '[') && strchr(c->name, '[') == NULL) || - (strchr(p->name, '[') == NULL && strchr(c->name, '['))) - /* Limit range of last symbol in module and kernel */ - p->end += SYMBOL_LIMIT; - else - p->end = c->start; - pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end); + const struct sample_reg *sample_reg_masks = arch__sample_reg_masks(); + + opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask; } diff --git a/tools/perf/arch/arm64/util/mem-events.c b/tools/perf/arch/arm64/util/mem-events.c new file mode 100644 index 000000000000..9f8da7937255 --- /dev/null +++ b/tools/perf/arch/arm64/util/mem-events.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "util/map_symbol.h" +#include "util/mem-events.h" +#include "mem-events.h" + +#define E(t, n, s, l, a) { .tag = t, .name = n, .event_name = s, .ldlat = l, .aux_event = a } + +struct perf_mem_event perf_mem_events_arm[PERF_MEM_EVENTS__MAX] = { + E("spe-load", "%s/ts_enable=1,pa_enable=1,load_filter=1,store_filter=0,min_latency=%u/", NULL, true, 0), + E("spe-store", "%s/ts_enable=1,pa_enable=1,load_filter=0,store_filter=1/", NULL, false, 0), + E("spe-ldst", "%s/ts_enable=1,pa_enable=1,load_filter=1,store_filter=1,min_latency=%u/", NULL, true, 0), +}; diff --git a/tools/perf/arch/arm64/util/mem-events.h b/tools/perf/arch/arm64/util/mem-events.h new file mode 100644 index 000000000000..5fc50be4be38 --- /dev/null +++ b/tools/perf/arch/arm64/util/mem-events.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ARM64_MEM_EVENTS_H +#define _ARM64_MEM_EVENTS_H + +extern struct perf_mem_event perf_mem_events_arm[PERF_MEM_EVENTS__MAX]; + +#endif /* _ARM64_MEM_EVENTS_H */ diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c index 2833e101a7c6..09308665e28a 100644 --- a/tools/perf/arch/arm64/util/perf_regs.c +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -1,6 +1,182 @@ // SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <regex.h> +#include <string.h> +#include <sys/auxv.h> +#include <linux/kernel.h> +#include <linux/zalloc.h> + +#include "perf_regs.h" +#include "../../../perf-sys.h" +#include "../../../util/debug.h" +#include "../../../util/event.h" #include "../../../util/perf_regs.h" -const struct sample_reg sample_reg_masks[] = { +#ifndef HWCAP_SVE +#define HWCAP_SVE (1 << 22) +#endif + +static const struct sample_reg sample_reg_masks[] = { + SMPL_REG(x0, PERF_REG_ARM64_X0), + SMPL_REG(x1, PERF_REG_ARM64_X1), + SMPL_REG(x2, PERF_REG_ARM64_X2), + SMPL_REG(x3, PERF_REG_ARM64_X3), + SMPL_REG(x4, PERF_REG_ARM64_X4), + SMPL_REG(x5, PERF_REG_ARM64_X5), + SMPL_REG(x6, PERF_REG_ARM64_X6), + SMPL_REG(x7, PERF_REG_ARM64_X7), + SMPL_REG(x8, PERF_REG_ARM64_X8), + SMPL_REG(x9, PERF_REG_ARM64_X9), + SMPL_REG(x10, PERF_REG_ARM64_X10), + SMPL_REG(x11, PERF_REG_ARM64_X11), + SMPL_REG(x12, PERF_REG_ARM64_X12), + SMPL_REG(x13, PERF_REG_ARM64_X13), + SMPL_REG(x14, PERF_REG_ARM64_X14), + SMPL_REG(x15, PERF_REG_ARM64_X15), + SMPL_REG(x16, PERF_REG_ARM64_X16), + SMPL_REG(x17, PERF_REG_ARM64_X17), + SMPL_REG(x18, PERF_REG_ARM64_X18), + SMPL_REG(x19, PERF_REG_ARM64_X19), + SMPL_REG(x20, PERF_REG_ARM64_X20), + SMPL_REG(x21, PERF_REG_ARM64_X21), + SMPL_REG(x22, PERF_REG_ARM64_X22), + SMPL_REG(x23, PERF_REG_ARM64_X23), + SMPL_REG(x24, PERF_REG_ARM64_X24), + SMPL_REG(x25, PERF_REG_ARM64_X25), + SMPL_REG(x26, PERF_REG_ARM64_X26), + SMPL_REG(x27, PERF_REG_ARM64_X27), + SMPL_REG(x28, PERF_REG_ARM64_X28), + SMPL_REG(x29, PERF_REG_ARM64_X29), + SMPL_REG(lr, PERF_REG_ARM64_LR), + SMPL_REG(sp, PERF_REG_ARM64_SP), + SMPL_REG(pc, PERF_REG_ARM64_PC), + SMPL_REG(vg, PERF_REG_ARM64_VG), SMPL_REG_END }; + +/* %xNUM */ +#define SDT_OP_REGEX1 "^(x[1-2]?[0-9]|3[0-1])$" + +/* [sp], [sp, NUM] */ +#define SDT_OP_REGEX2 "^\\[sp(, )?([0-9]+)?\\]$" + +static regex_t sdt_op_regex1, sdt_op_regex2; + +static int sdt_init_op_regex(void) +{ + static int initialized; + int ret = 0; + + if (initialized) + return 0; + + ret = regcomp(&sdt_op_regex1, SDT_OP_REGEX1, REG_EXTENDED); + if (ret) + goto error; + + ret = regcomp(&sdt_op_regex2, SDT_OP_REGEX2, REG_EXTENDED); + if (ret) + goto free_regex1; + + initialized = 1; + return 0; + +free_regex1: + regfree(&sdt_op_regex1); +error: + pr_debug4("Regex compilation error.\n"); + return ret; +} + +/* + * SDT marker arguments on Arm64 uses %xREG or [sp, NUM], currently + * support these two formats. + */ +int arch_sdt_arg_parse_op(char *old_op, char **new_op) +{ + int ret, new_len; + regmatch_t rm[5]; + + ret = sdt_init_op_regex(); + if (ret < 0) + return ret; + + if (!regexec(&sdt_op_regex1, old_op, 3, rm, 0)) { + /* Extract xNUM */ + new_len = 2; /* % NULL */ + new_len += (int)(rm[1].rm_eo - rm[1].rm_so); + + *new_op = zalloc(new_len); + if (!*new_op) + return -ENOMEM; + + scnprintf(*new_op, new_len, "%%%.*s", + (int)(rm[1].rm_eo - rm[1].rm_so), old_op + rm[1].rm_so); + } else if (!regexec(&sdt_op_regex2, old_op, 5, rm, 0)) { + /* [sp], [sp, NUM] or [sp,NUM] */ + new_len = 7; /* + ( % s p ) NULL */ + + /* If the argument is [sp], need to fill offset '0' */ + if (rm[2].rm_so == -1) + new_len += 1; + else + new_len += (int)(rm[2].rm_eo - rm[2].rm_so); + + *new_op = zalloc(new_len); + if (!*new_op) + return -ENOMEM; + + if (rm[2].rm_so == -1) + scnprintf(*new_op, new_len, "+0(%%sp)"); + else + scnprintf(*new_op, new_len, "+%.*s(%%sp)", + (int)(rm[2].rm_eo - rm[2].rm_so), + old_op + rm[2].rm_so); + } else { + pr_debug4("Skipping unsupported SDT argument: %s\n", old_op); + return SDT_ARG_SKIP; + } + + return SDT_ARG_VALID; +} + +uint64_t arch__intr_reg_mask(void) +{ + return PERF_REGS_MASK; +} + +uint64_t arch__user_reg_mask(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .sample_type = PERF_SAMPLE_REGS_USER, + .disabled = 1, + .exclude_kernel = 1, + .sample_period = 1, + .sample_regs_user = PERF_REGS_MASK + }; + int fd; + + if (getauxval(AT_HWCAP) & HWCAP_SVE) + attr.sample_regs_user |= SMPL_REG_MASK(PERF_REG_ARM64_VG); + + /* + * Check if the pmu supports perf extended regs, before + * returning the register mask to sample. + */ + if (attr.sample_regs_user != PERF_REGS_MASK) { + event_attr_init(&attr); + fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + if (fd != -1) { + close(fd); + return attr.sample_regs_user; + } + } + return PERF_REGS_MASK; +} + +const struct sample_reg *arch__sample_reg_masks(void) +{ + return sample_reg_masks; +} diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c new file mode 100644 index 000000000000..2a4eab2d160e --- /dev/null +++ b/tools/perf/arch/arm64/util/pmu.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <internal/cpumap.h> +#include "../../../util/cpumap.h" +#include "../../../util/header.h" +#include "../../../util/pmu.h" +#include "../../../util/pmus.h" +#include <api/fs/fs.h> +#include <math.h> + +const struct pmu_metrics_table *pmu_metrics_table__find(void) +{ + struct perf_pmu *pmu; + + /* Metrics aren't currently supported on heterogeneous Arm systems */ + if (perf_pmus__num_core_pmus() > 1) + return NULL; + + /* Doesn't matter which one here because they'll all be the same */ + pmu = perf_pmus__find_core_pmu(); + if (pmu) + return perf_pmu__find_metrics_table(pmu); + + return NULL; +} + +double perf_pmu__cpu_slots_per_cycle(void) +{ + char path[PATH_MAX]; + unsigned long long slots = 0; + struct perf_pmu *pmu = perf_pmus__find_core_pmu(); + + if (pmu) { + perf_pmu__pathname_scnprintf(path, sizeof(path), + pmu->name, "caps/slots"); + /* + * The value of slots is not greater than 32 bits, but + * filename__read_int can't read value with 0x prefix, + * so use filename__read_ull instead. + */ + filename__read_ull(path, &slots); + } + + return slots ? (double)slots : NAN; +} diff --git a/tools/perf/arch/arm64/util/tsc.c b/tools/perf/arch/arm64/util/tsc.c new file mode 100644 index 000000000000..cc85bd9e73f1 --- /dev/null +++ b/tools/perf/arch/arm64/util/tsc.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/types.h> + +#include "../../../util/tsc.h" + +u64 rdtsc(void) +{ + u64 val; + + /* + * According to ARM DDI 0487F.c, from Armv8.0 to Armv8.5 inclusive, the + * system counter is at least 56 bits wide; from Armv8.6, the counter + * must be 64 bits wide. So the system counter could be less than 64 + * bits wide and it is attributed with the flag 'cap_user_time_short' + * is true. + */ + asm volatile("mrs %0, cntvct_el0" : "=r" (val)); + + return val; +} diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c b/tools/perf/arch/arm64/util/unwind-libdw.c index a50941629649..e056d50ab42e 100644 --- a/tools/perf/arch/arm64/util/unwind-libdw.c +++ b/tools/perf/arch/arm64/util/unwind-libdw.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include <elfutils/libdwfl.h> +#include "perf_regs.h" #include "../../../util/unwind-libdw.h" #include "../../../util/perf_regs.h" -#include "../../../util/event.h" +#include "../../../util/sample.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c index 1495a9523a23..871af5992298 100644 --- a/tools/perf/arch/arm64/util/unwind-libunwind.c +++ b/tools/perf/arch/arm64/util/unwind-libunwind.c @@ -4,83 +4,14 @@ #ifndef REMOTE_UNWIND_LIBUNWIND #include <libunwind.h> #include "perf_regs.h" -#include "../../util/unwind.h" +#include "../../../util/unwind.h" #endif -#include "../../util/debug.h" +#include "../../../util/debug.h" int LIBUNWIND__ARCH_REG_ID(int regnum) { - switch (regnum) { - case UNW_AARCH64_X0: - return PERF_REG_ARM64_X0; - case UNW_AARCH64_X1: - return PERF_REG_ARM64_X1; - case UNW_AARCH64_X2: - return PERF_REG_ARM64_X2; - case UNW_AARCH64_X3: - return PERF_REG_ARM64_X3; - case UNW_AARCH64_X4: - return PERF_REG_ARM64_X4; - case UNW_AARCH64_X5: - return PERF_REG_ARM64_X5; - case UNW_AARCH64_X6: - return PERF_REG_ARM64_X6; - case UNW_AARCH64_X7: - return PERF_REG_ARM64_X7; - case UNW_AARCH64_X8: - return PERF_REG_ARM64_X8; - case UNW_AARCH64_X9: - return PERF_REG_ARM64_X9; - case UNW_AARCH64_X10: - return PERF_REG_ARM64_X10; - case UNW_AARCH64_X11: - return PERF_REG_ARM64_X11; - case UNW_AARCH64_X12: - return PERF_REG_ARM64_X12; - case UNW_AARCH64_X13: - return PERF_REG_ARM64_X13; - case UNW_AARCH64_X14: - return PERF_REG_ARM64_X14; - case UNW_AARCH64_X15: - return PERF_REG_ARM64_X15; - case UNW_AARCH64_X16: - return PERF_REG_ARM64_X16; - case UNW_AARCH64_X17: - return PERF_REG_ARM64_X17; - case UNW_AARCH64_X18: - return PERF_REG_ARM64_X18; - case UNW_AARCH64_X19: - return PERF_REG_ARM64_X19; - case UNW_AARCH64_X20: - return PERF_REG_ARM64_X20; - case UNW_AARCH64_X21: - return PERF_REG_ARM64_X21; - case UNW_AARCH64_X22: - return PERF_REG_ARM64_X22; - case UNW_AARCH64_X23: - return PERF_REG_ARM64_X23; - case UNW_AARCH64_X24: - return PERF_REG_ARM64_X24; - case UNW_AARCH64_X25: - return PERF_REG_ARM64_X25; - case UNW_AARCH64_X26: - return PERF_REG_ARM64_X26; - case UNW_AARCH64_X27: - return PERF_REG_ARM64_X27; - case UNW_AARCH64_X28: - return PERF_REG_ARM64_X28; - case UNW_AARCH64_X29: - return PERF_REG_ARM64_X29; - case UNW_AARCH64_X30: - return PERF_REG_ARM64_LR; - case UNW_AARCH64_SP: - return PERF_REG_ARM64_SP; - case UNW_AARCH64_PC: - return PERF_REG_ARM64_PC; - default: - pr_err("unwind: invalid reg id %d\n", regnum); + if (regnum < 0 || regnum >= PERF_REG_ARM64_EXTENDED_MAX) return -EINVAL; - } - return -EINVAL; + return regnum; } |