aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools/perf/arch/arm64/util
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/arch/arm64/util')
-rw-r--r--tools/perf/arch/arm64/util/Build5
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c375
-rw-r--r--tools/perf/arch/arm64/util/arm64_exception_types.h92
-rw-r--r--tools/perf/arch/arm64/util/header.c73
-rw-r--r--tools/perf/arch/arm64/util/hisi-ptt.c188
-rw-r--r--tools/perf/arch/arm64/util/kvm-stat.c84
-rw-r--r--tools/perf/arch/arm64/util/machine.c27
-rw-r--r--tools/perf/arch/arm64/util/mem-events.c12
-rw-r--r--tools/perf/arch/arm64/util/mem-events.h7
-rw-r--r--tools/perf/arch/arm64/util/perf_regs.c178
-rw-r--r--tools/perf/arch/arm64/util/pmu.c45
-rw-r--r--tools/perf/arch/arm64/util/tsc.c21
-rw-r--r--tools/perf/arch/arm64/util/unwind-libdw.c3
-rw-r--r--tools/perf/arch/arm64/util/unwind-libunwind.c77
14 files changed, 1040 insertions, 147 deletions
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index 5c13438c7bd4..78ef7115be3d 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,6 +1,9 @@
perf-y += header.o
perf-y += machine.o
perf-y += perf_regs.o
+perf-y += tsc.o
+perf-y += pmu.o
+perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
@@ -8,4 +11,4 @@ perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
perf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
../../arm/util/auxtrace.o \
../../arm/util/cs-etm.o \
- arm-spe.o
+ arm-spe.o mem-events.o hisi-ptt.o
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index e3593063b3d1..51ccbfd3d246 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -14,6 +14,7 @@
#include "../../../util/cpumap.h"
#include "../../../util/event.h"
#include "../../../util/evsel.h"
+#include "../../../util/evsel_config.h"
#include "../../../util/evlist.h"
#include "../../../util/session.h"
#include <internal/lib.h> // page_size
@@ -22,6 +23,7 @@
#include "../../../util/auxtrace.h"
#include "../../../util/record.h"
#include "../../../util/arm-spe.h"
+#include <tools/libc_compat.h> // reallocarray
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
@@ -30,6 +32,8 @@ struct arm_spe_recording {
struct auxtrace_record itr;
struct perf_pmu *arm_spe_pmu;
struct evlist *evlist;
+ int wrapped_cnt;
+ bool *wrapped;
};
static size_t
@@ -60,6 +64,74 @@ static int arm_spe_info_fill(struct auxtrace_record *itr,
return 0;
}
+static void
+arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts *opts,
+ bool privileged)
+{
+ /*
+ * The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size nor
+ * snapshot size is specified, then the default is 4MiB for privileged users, 128KiB for
+ * unprivileged users.
+ *
+ * The default auxtrace mmap size is 4MiB/page_size for privileged users, 128KiB for
+ * unprivileged users. If an unprivileged user does not specify mmap pages, the mmap pages
+ * will be reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
+ * user is likely to get an error as they exceed their mlock limmit.
+ */
+
+ /*
+ * No size were given to '-S' or '-m,', so go with the default
+ */
+ if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(4) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ } else if (!opts->auxtrace_mmap_pages && !privileged && opts->mmap_pages == UINT_MAX) {
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+
+ /*
+ * '-m,xyz' was specified but no snapshot size, so make the snapshot size as big as the
+ * auxtrace mmap area.
+ */
+ if (!opts->auxtrace_snapshot_size)
+ opts->auxtrace_snapshot_size = opts->auxtrace_mmap_pages * (size_t)page_size;
+
+ /*
+ * '-Sxyz' was specified but no auxtrace mmap area, so make the auxtrace mmap area big
+ * enough to fit the requested snapshot size.
+ */
+ if (!opts->auxtrace_mmap_pages) {
+ size_t sz = opts->auxtrace_snapshot_size;
+
+ sz = round_up(sz, page_size) / page_size;
+ opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
+ }
+}
+
+static __u64 arm_spe_pmu__sample_period(const struct perf_pmu *arm_spe_pmu)
+{
+ static __u64 sample_period;
+
+ if (sample_period)
+ return sample_period;
+
+ /*
+ * If kernel driver doesn't advertise a minimum,
+ * use max allowable by PMSIDR_EL1.INTERVAL
+ */
+ if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu",
+ &sample_period) != 1) {
+ pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
+ sample_period = 4096;
+ }
+ return sample_period;
+}
+
static int arm_spe_recording_options(struct auxtrace_record *itr,
struct evlist *evlist,
struct record_opts *opts)
@@ -68,9 +140,11 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
container_of(itr, struct arm_spe_recording, itr);
struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
struct evsel *evsel, *arm_spe_evsel = NULL;
+ struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
bool privileged = perf_event_paranoid_check(-1);
struct evsel *tracking_evsel;
int err;
+ u64 bit;
sper->evlist = evlist;
@@ -81,7 +155,8 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
return -EINVAL;
}
evsel->core.attr.freq = 0;
- evsel->core.attr.sample_period = 1;
+ evsel->core.attr.sample_period = arm_spe_pmu__sample_period(arm_spe_pmu);
+ evsel->needs_auxtrace_mmap = true;
arm_spe_evsel = evsel;
opts->full_auxtrace = true;
}
@@ -90,8 +165,38 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
if (!opts->full_auxtrace)
return 0;
+ /*
+ * we are in snapshot mode.
+ */
+ if (opts->auxtrace_snapshot_mode) {
+ /*
+ * Command arguments '-Sxyz' and/or '-m,xyz' are missing, so fill those in with
+ * default values.
+ */
+ if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages)
+ arm_spe_snapshot_resolve_auxtrace_defaults(opts, privileged);
+
+ /*
+ * Snapshot size can't be bigger than the auxtrace area.
+ */
+ if (opts->auxtrace_snapshot_size > opts->auxtrace_mmap_pages * (size_t)page_size) {
+ pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
+ opts->auxtrace_snapshot_size,
+ opts->auxtrace_mmap_pages * (size_t)page_size);
+ return -EINVAL;
+ }
+
+ /*
+ * Something went wrong somewhere - this shouldn't happen.
+ */
+ if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
+ pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
+ return -EINVAL;
+ }
+ }
+
/* We are in full trace mode but '-m,xyz' wasn't specified */
- if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+ if (!opts->auxtrace_mmap_pages) {
if (privileged) {
opts->auxtrace_mmap_pages = MiB(4) / page_size;
} else {
@@ -113,30 +218,246 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
}
}
+ if (opts->auxtrace_snapshot_mode)
+ pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME,
+ opts->auxtrace_snapshot_size);
/*
* To obtain the auxtrace buffer file descriptor, the auxtrace event
* must come first.
*/
- perf_evlist__to_front(evlist, arm_spe_evsel);
+ evlist__to_front(evlist, arm_spe_evsel);
+
+ /*
+ * In the case of per-cpu mmaps, sample CPU for AUX event;
+ * also enable the timestamp tracing for samples correlation.
+ */
+ if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ evsel__set_sample_bit(arm_spe_evsel, CPU);
+ evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel,
+ "ts_enable", 1);
+ }
+
+ /*
+ * Set this only so that perf report knows that SPE generates memory info. It has no effect
+ * on the opening of the event or the SPE data produced.
+ */
+ evsel__set_sample_bit(arm_spe_evsel, DATA_SRC);
- evsel__set_sample_bit(arm_spe_evsel, CPU);
- evsel__set_sample_bit(arm_spe_evsel, TIME);
- evsel__set_sample_bit(arm_spe_evsel, TID);
+ /*
+ * The PHYS_ADDR flag does not affect the driver behaviour, it is used to
+ * inform that the resulting output's SPE samples contain physical addresses
+ * where applicable.
+ */
+ bit = perf_pmu__format_bits(arm_spe_pmu, "pa_enable");
+ if (arm_spe_evsel->core.attr.config & bit)
+ evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR);
/* Add dummy event to keep tracking */
- err = parse_events(evlist, "dummy:u", NULL);
+ err = parse_event(evlist, "dummy:u");
if (err)
return err;
tracking_evsel = evlist__last(evlist);
- perf_evlist__set_tracking_event(evlist, tracking_evsel);
+ evlist__set_tracking_event(evlist, tracking_evsel);
tracking_evsel->core.attr.freq = 0;
tracking_evsel->core.attr.sample_period = 1;
- evsel__set_sample_bit(tracking_evsel, TIME);
- evsel__set_sample_bit(tracking_evsel, CPU);
- evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
+
+ /* In per-cpu case, always need the time of mmap events etc */
+ if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ evsel__set_sample_bit(tracking_evsel, TIME);
+ evsel__set_sample_bit(tracking_evsel, CPU);
+
+ /* also track task context switch */
+ if (!record_opts__no_switch_events(opts))
+ tracking_evsel->core.attr.context_switch = 1;
+ }
+
+ return 0;
+}
+
+static int arm_spe_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
+ struct record_opts *opts,
+ const char *str)
+{
+ unsigned long long snapshot_size = 0;
+ char *endptr;
+
+ if (str) {
+ snapshot_size = strtoull(str, &endptr, 0);
+ if (*endptr || snapshot_size > SIZE_MAX)
+ return -1;
+ }
+
+ opts->auxtrace_snapshot_mode = true;
+ opts->auxtrace_snapshot_size = snapshot_size;
+
+ return 0;
+}
+
+static int arm_spe_snapshot_start(struct auxtrace_record *itr)
+{
+ struct arm_spe_recording *ptr =
+ container_of(itr, struct arm_spe_recording, itr);
+ struct evsel *evsel;
+
+ evlist__for_each_entry(ptr->evlist, evsel) {
+ if (evsel->core.attr.type == ptr->arm_spe_pmu->type)
+ return evsel__disable(evsel);
+ }
+ return -EINVAL;
+}
+
+static int arm_spe_snapshot_finish(struct auxtrace_record *itr)
+{
+ struct arm_spe_recording *ptr =
+ container_of(itr, struct arm_spe_recording, itr);
+ struct evsel *evsel;
+
+ evlist__for_each_entry(ptr->evlist, evsel) {
+ if (evsel->core.attr.type == ptr->arm_spe_pmu->type)
+ return evsel__enable(evsel);
+ }
+ return -EINVAL;
+}
+
+static int arm_spe_alloc_wrapped_array(struct arm_spe_recording *ptr, int idx)
+{
+ bool *wrapped;
+ int cnt = ptr->wrapped_cnt, new_cnt, i;
+
+ /*
+ * No need to allocate, so return early.
+ */
+ if (idx < cnt)
+ return 0;
+
+ /*
+ * Make ptr->wrapped as big as idx.
+ */
+ new_cnt = idx + 1;
+
+ /*
+ * Free'ed in arm_spe_recording_free().
+ */
+ wrapped = reallocarray(ptr->wrapped, new_cnt, sizeof(bool));
+ if (!wrapped)
+ return -ENOMEM;
+
+ /*
+ * init new allocated values.
+ */
+ for (i = cnt; i < new_cnt; i++)
+ wrapped[i] = false;
+
+ ptr->wrapped_cnt = new_cnt;
+ ptr->wrapped = wrapped;
+
+ return 0;
+}
+
+static bool arm_spe_buffer_has_wrapped(unsigned char *buffer,
+ size_t buffer_size, u64 head)
+{
+ u64 i, watermark;
+ u64 *buf = (u64 *)buffer;
+ size_t buf_size = buffer_size;
+
+ /*
+ * Defensively handle the case where head might be continually increasing - if its value is
+ * equal or greater than the size of the ring buffer, then we can safely determine it has
+ * wrapped around. Otherwise, continue to detect if head might have wrapped.
+ */
+ if (head >= buffer_size)
+ return true;
+
+ /*
+ * We want to look the very last 512 byte (chosen arbitrarily) in the ring buffer.
+ */
+ watermark = buf_size - 512;
+
+ /*
+ * The value of head is somewhere within the size of the ring buffer. This can be that there
+ * hasn't been enough data to fill the ring buffer yet or the trace time was so long that
+ * head has numerically wrapped around. To find we need to check if we have data at the
+ * very end of the ring buffer. We can reliably do this because mmap'ed pages are zeroed
+ * out and there is a fresh mapping with every new session.
+ */
+
+ /*
+ * head is less than 512 byte from the end of the ring buffer.
+ */
+ if (head > watermark)
+ watermark = head;
+
+ /*
+ * Speed things up by using 64 bit transactions (see "u64 *buf" above)
+ */
+ watermark /= sizeof(u64);
+ buf_size /= sizeof(u64);
+
+ /*
+ * If we find trace data at the end of the ring buffer, head has been there and has
+ * numerically wrapped around at least once.
+ */
+ for (i = watermark; i < buf_size; i++)
+ if (buf[i])
+ return true;
+
+ return false;
+}
+
+static int arm_spe_find_snapshot(struct auxtrace_record *itr, int idx,
+ struct auxtrace_mmap *mm, unsigned char *data,
+ u64 *head, u64 *old)
+{
+ int err;
+ bool wrapped;
+ struct arm_spe_recording *ptr =
+ container_of(itr, struct arm_spe_recording, itr);
+
+ /*
+ * Allocate memory to keep track of wrapping if this is the first
+ * time we deal with this *mm.
+ */
+ if (idx >= ptr->wrapped_cnt) {
+ err = arm_spe_alloc_wrapped_array(ptr, idx);
+ if (err)
+ return err;
+ }
+
+ /*
+ * Check to see if *head has wrapped around. If it hasn't only the
+ * amount of data between *head and *old is snapshot'ed to avoid
+ * bloating the perf.data file with zeros. But as soon as *head has
+ * wrapped around the entire size of the AUX ring buffer it taken.
+ */
+ wrapped = ptr->wrapped[idx];
+ if (!wrapped && arm_spe_buffer_has_wrapped(data, mm->len, *head)) {
+ wrapped = true;
+ ptr->wrapped[idx] = true;
+ }
+
+ pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
+ __func__, idx, (size_t)*old, (size_t)*head, mm->len);
+
+ /*
+ * No wrap has occurred, we can just use *head and *old.
+ */
+ if (!wrapped)
+ return 0;
+
+ /*
+ * *head has wrapped around - adjust *head and *old to pickup the
+ * entire content of the AUX buffer.
+ */
+ if (*head >= mm->len) {
+ *old = *head - mm->len;
+ } else {
+ *head += mm->len;
+ *old = *head - mm->len;
+ }
return 0;
}
@@ -155,6 +476,7 @@ static void arm_spe_recording_free(struct auxtrace_record *itr)
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
+ zfree(&sper->wrapped);
free(sper);
}
@@ -176,6 +498,10 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
sper->arm_spe_pmu = arm_spe_pmu;
sper->itr.pmu = arm_spe_pmu;
+ sper->itr.snapshot_start = arm_spe_snapshot_start;
+ sper->itr.snapshot_finish = arm_spe_snapshot_finish;
+ sper->itr.find_snapshot = arm_spe_find_snapshot;
+ sper->itr.parse_snapshot_options = arm_spe_parse_snapshot_options;
sper->itr.recording_options = arm_spe_recording_options;
sper->itr.info_priv_size = arm_spe_info_priv_size;
sper->itr.info_fill = arm_spe_info_fill;
@@ -188,29 +514,8 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
return &sper->itr;
}
-struct perf_event_attr
-*arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu)
+void
+arm_spe_pmu_default_config(const struct perf_pmu *arm_spe_pmu, struct perf_event_attr *attr)
{
- struct perf_event_attr *attr;
-
- attr = zalloc(sizeof(struct perf_event_attr));
- if (!attr) {
- pr_err("arm_spe default config cannot allocate a perf_event_attr\n");
- return NULL;
- }
-
- /*
- * If kernel driver doesn't advertise a minimum,
- * use max allowable by PMSIDR_EL1.INTERVAL
- */
- if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu",
- &attr->sample_period) != 1) {
- pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
- attr->sample_period = 4096;
- }
-
- arm_spe_pmu->selectable = true;
- arm_spe_pmu->is_uncore = false;
-
- return attr;
+ attr->sample_period = arm_spe_pmu__sample_period(arm_spe_pmu);
}
diff --git a/tools/perf/arch/arm64/util/arm64_exception_types.h b/tools/perf/arch/arm64/util/arm64_exception_types.h
new file mode 100644
index 000000000000..27c981ebe401
--- /dev/null
+++ b/tools/perf/arch/arm64/util/arm64_exception_types.h
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef ARCH_PERF_ARM64_EXCEPTION_TYPES_H
+#define ARCH_PERF_ARM64_EXCEPTION_TYPES_H
+
+/* Per asm/virt.h */
+#define HVC_STUB_ERR 0xbadca11
+
+/* Per asm/kvm_asm.h */
+#define ARM_EXCEPTION_IRQ 0
+#define ARM_EXCEPTION_EL1_SERROR 1
+#define ARM_EXCEPTION_TRAP 2
+#define ARM_EXCEPTION_IL 3
+/* The hyp-stub will return this for any kvm_call_hyp() call */
+#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR
+
+#define kvm_arm_exception_type \
+ {ARM_EXCEPTION_IRQ, "IRQ" }, \
+ {ARM_EXCEPTION_EL1_SERROR, "SERROR" }, \
+ {ARM_EXCEPTION_TRAP, "TRAP" }, \
+ {ARM_EXCEPTION_IL, "ILLEGAL" }, \
+ {ARM_EXCEPTION_HYP_GONE, "HYP_GONE" }
+
+/* Per asm/esr.h */
+#define ESR_ELx_EC_UNKNOWN (0x00)
+#define ESR_ELx_EC_WFx (0x01)
+/* Unallocated EC: 0x02 */
+#define ESR_ELx_EC_CP15_32 (0x03)
+#define ESR_ELx_EC_CP15_64 (0x04)
+#define ESR_ELx_EC_CP14_MR (0x05)
+#define ESR_ELx_EC_CP14_LS (0x06)
+#define ESR_ELx_EC_FP_ASIMD (0x07)
+#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */
+#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */
+/* Unallocated EC: 0x0A - 0x0B */
+#define ESR_ELx_EC_CP14_64 (0x0C)
+/* Unallocated EC: 0x0d */
+#define ESR_ELx_EC_ILL (0x0E)
+/* Unallocated EC: 0x0F - 0x10 */
+#define ESR_ELx_EC_SVC32 (0x11)
+#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */
+#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */
+/* Unallocated EC: 0x14 */
+#define ESR_ELx_EC_SVC64 (0x15)
+#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */
+#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */
+#define ESR_ELx_EC_SYS64 (0x18)
+#define ESR_ELx_EC_SVE (0x19)
+#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */
+/* Unallocated EC: 0x1b - 0x1E */
+#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */
+#define ESR_ELx_EC_IABT_LOW (0x20)
+#define ESR_ELx_EC_IABT_CUR (0x21)
+#define ESR_ELx_EC_PC_ALIGN (0x22)
+/* Unallocated EC: 0x23 */
+#define ESR_ELx_EC_DABT_LOW (0x24)
+#define ESR_ELx_EC_DABT_CUR (0x25)
+#define ESR_ELx_EC_SP_ALIGN (0x26)
+/* Unallocated EC: 0x27 */
+#define ESR_ELx_EC_FP_EXC32 (0x28)
+/* Unallocated EC: 0x29 - 0x2B */
+#define ESR_ELx_EC_FP_EXC64 (0x2C)
+/* Unallocated EC: 0x2D - 0x2E */
+#define ESR_ELx_EC_SERROR (0x2F)
+#define ESR_ELx_EC_BREAKPT_LOW (0x30)
+#define ESR_ELx_EC_BREAKPT_CUR (0x31)
+#define ESR_ELx_EC_SOFTSTP_LOW (0x32)
+#define ESR_ELx_EC_SOFTSTP_CUR (0x33)
+#define ESR_ELx_EC_WATCHPT_LOW (0x34)
+#define ESR_ELx_EC_WATCHPT_CUR (0x35)
+/* Unallocated EC: 0x36 - 0x37 */
+#define ESR_ELx_EC_BKPT32 (0x38)
+/* Unallocated EC: 0x39 */
+#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */
+/* Unallocated EC: 0x3B */
+#define ESR_ELx_EC_BRK64 (0x3C)
+/* Unallocated EC: 0x3D - 0x3F */
+#define ESR_ELx_EC_MAX (0x3F)
+
+#define ECN(x) { ESR_ELx_EC_##x, #x }
+
+#define kvm_arm_exception_class \
+ ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \
+ ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(PAC), ECN(CP14_64), \
+ ECN(SVC64), ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(SVE), \
+ ECN(IMP_DEF), ECN(IABT_LOW), ECN(IABT_CUR), \
+ ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \
+ ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \
+ ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \
+ ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
+ ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
+
+#endif /* ARCH_PERF_ARM64_EXCEPTION_TYPES_H */
diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
index d730666ab95d..97037499152e 100644
--- a/tools/perf/arch/arm64/util/header.c
+++ b/tools/perf/arch/arm64/util/header.c
@@ -1,3 +1,6 @@
+#include <linux/kernel.h>
+#include <linux/bits.h>
+#include <linux/bitfield.h>
#include <stdio.h>
#include <stdlib.h>
#include <perf/cpumap.h>
@@ -10,15 +13,14 @@
#define MIDR "/regs/identification/midr_el1"
#define MIDR_SIZE 19
-#define MIDR_REVISION_MASK 0xf
-#define MIDR_VARIANT_SHIFT 20
-#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT)
+#define MIDR_REVISION_MASK GENMASK(3, 0)
+#define MIDR_VARIANT_MASK GENMASK(23, 20)
static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
{
const char *sysfs = sysfs__mountpoint();
- u64 midr = 0;
int cpu;
+ int ret = EINVAL;
if (!sysfs || sz < MIDR_SIZE)
return EINVAL;
@@ -29,8 +31,8 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
char path[PATH_MAX];
FILE *file;
- scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR,
- sysfs, cpus->map[cpu]);
+ scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d" MIDR,
+ sysfs, RC_CHK_ACCESS(cpus)->map[cpu].cpu);
file = fopen(path, "r");
if (!file) {
@@ -44,27 +46,18 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
}
fclose(file);
- /* Ignore/clear Variant[23:20] and
- * Revision[3:0] of MIDR
- */
- midr = strtoul(buf, NULL, 16);
- midr &= (~(MIDR_VARIANT_MASK | MIDR_REVISION_MASK));
- scnprintf(buf, MIDR_SIZE, "0x%016lx", midr);
/* got midr break loop */
+ ret = 0;
break;
}
perf_cpu_map__put(cpus);
-
- if (!midr)
- return EINVAL;
-
- return 0;
+ return ret;
}
int get_cpuid(char *buf, size_t sz)
{
- struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
+ struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus();
int ret;
if (!cpus)
@@ -99,3 +92,47 @@ char *get_cpuid_str(struct perf_pmu *pmu)
return buf;
}
+
+/*
+ * Return 0 if idstr is a higher or equal to version of the same part as
+ * mapcpuid. Therefore, if mapcpuid has 0 for revision and variant then any
+ * version of idstr will match as long as it's the same CPU type.
+ *
+ * Return 1 if the CPU type is different or the version of idstr is lower.
+ */
+int strcmp_cpuid_str(const char *mapcpuid, const char *idstr)
+{
+ u64 map_id = strtoull(mapcpuid, NULL, 16);
+ char map_id_variant = FIELD_GET(MIDR_VARIANT_MASK, map_id);
+ char map_id_revision = FIELD_GET(MIDR_REVISION_MASK, map_id);
+ u64 id = strtoull(idstr, NULL, 16);
+ char id_variant = FIELD_GET(MIDR_VARIANT_MASK, id);
+ char id_revision = FIELD_GET(MIDR_REVISION_MASK, id);
+ u64 id_fields = ~(MIDR_VARIANT_MASK | MIDR_REVISION_MASK);
+
+ /* Compare without version first */
+ if ((map_id & id_fields) != (id & id_fields))
+ return 1;
+
+ /*
+ * ID matches, now compare version.
+ *
+ * Arm revisions (like r0p0) are compared here like two digit semver
+ * values eg. 1.3 < 2.0 < 2.1 < 2.2.
+ *
+ * r = high value = 'Variant' field in MIDR
+ * p = low value = 'Revision' field in MIDR
+ *
+ */
+ if (id_variant > map_id_variant)
+ return 0;
+
+ if (id_variant == map_id_variant && id_revision >= map_id_revision)
+ return 0;
+
+ /*
+ * variant is less than mapfile variant or variants are the same but
+ * the revision doesn't match. Return no match.
+ */
+ return 1;
+}
diff --git a/tools/perf/arch/arm64/util/hisi-ptt.c b/tools/perf/arch/arm64/util/hisi-ptt.c
new file mode 100644
index 000000000000..ba97c8a562a0
--- /dev/null
+++ b/tools/perf/arch/arm64/util/hisi-ptt.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * HiSilicon PCIe Trace and Tuning (PTT) support
+ * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include <linux/zalloc.h>
+#include <time.h>
+
+#include <internal/lib.h> // page_size
+#include "../../../util/auxtrace.h"
+#include "../../../util/cpumap.h"
+#include "../../../util/debug.h"
+#include "../../../util/event.h"
+#include "../../../util/evlist.h"
+#include "../../../util/evsel.h"
+#include "../../../util/hisi-ptt.h"
+#include "../../../util/pmu.h"
+#include "../../../util/record.h"
+#include "../../../util/session.h"
+#include "../../../util/tsc.h"
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+
+struct hisi_ptt_recording {
+ struct auxtrace_record itr;
+ struct perf_pmu *hisi_ptt_pmu;
+ struct evlist *evlist;
+};
+
+static size_t
+hisi_ptt_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+ struct evlist *evlist __maybe_unused)
+{
+ return HISI_PTT_AUXTRACE_PRIV_SIZE;
+}
+
+static int hisi_ptt_info_fill(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct perf_record_auxtrace_info *auxtrace_info,
+ size_t priv_size)
+{
+ struct hisi_ptt_recording *pttr =
+ container_of(itr, struct hisi_ptt_recording, itr);
+ struct perf_pmu *hisi_ptt_pmu = pttr->hisi_ptt_pmu;
+
+ if (priv_size != HISI_PTT_AUXTRACE_PRIV_SIZE)
+ return -EINVAL;
+
+ if (!session->evlist->core.nr_mmaps)
+ return -EINVAL;
+
+ auxtrace_info->type = PERF_AUXTRACE_HISI_PTT;
+ auxtrace_info->priv[0] = hisi_ptt_pmu->type;
+
+ return 0;
+}
+
+static int hisi_ptt_set_auxtrace_mmap_page(struct record_opts *opts)
+{
+ bool privileged = perf_event_paranoid_check(-1);
+
+ if (!opts->full_auxtrace)
+ return 0;
+
+ if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(16) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ }
+
+ /* Validate auxtrace_mmap_pages */
+ if (opts->auxtrace_mmap_pages) {
+ size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
+ size_t min_sz = KiB(8);
+
+ if (sz < min_sz || !is_power_of_2(sz)) {
+ pr_err("Invalid mmap size for HISI PTT: must be at least %zuKiB and a power of 2\n",
+ min_sz / 1024);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int hisi_ptt_recording_options(struct auxtrace_record *itr,
+ struct evlist *evlist,
+ struct record_opts *opts)
+{
+ struct hisi_ptt_recording *pttr =
+ container_of(itr, struct hisi_ptt_recording, itr);
+ struct perf_pmu *hisi_ptt_pmu = pttr->hisi_ptt_pmu;
+ struct evsel *evsel, *hisi_ptt_evsel = NULL;
+ struct evsel *tracking_evsel;
+ int err;
+
+ pttr->evlist = evlist;
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type == hisi_ptt_pmu->type) {
+ if (hisi_ptt_evsel) {
+ pr_err("There may be only one " HISI_PTT_PMU_NAME "x event\n");
+ return -EINVAL;
+ }
+ evsel->core.attr.freq = 0;
+ evsel->core.attr.sample_period = 1;
+ evsel->needs_auxtrace_mmap = true;
+ hisi_ptt_evsel = evsel;
+ opts->full_auxtrace = true;
+ }
+ }
+
+ err = hisi_ptt_set_auxtrace_mmap_page(opts);
+ if (err)
+ return err;
+ /*
+ * To obtain the auxtrace buffer file descriptor, the auxtrace event
+ * must come first.
+ */
+ evlist__to_front(evlist, hisi_ptt_evsel);
+ evsel__set_sample_bit(hisi_ptt_evsel, TIME);
+
+ /* Add dummy event to keep tracking */
+ err = parse_event(evlist, "dummy:u");
+ if (err)
+ return err;
+
+ tracking_evsel = evlist__last(evlist);
+ evlist__set_tracking_event(evlist, tracking_evsel);
+
+ tracking_evsel->core.attr.freq = 0;
+ tracking_evsel->core.attr.sample_period = 1;
+ evsel__set_sample_bit(tracking_evsel, TIME);
+
+ return 0;
+}
+
+static u64 hisi_ptt_reference(struct auxtrace_record *itr __maybe_unused)
+{
+ return rdtsc();
+}
+
+static void hisi_ptt_recording_free(struct auxtrace_record *itr)
+{
+ struct hisi_ptt_recording *pttr =
+ container_of(itr, struct hisi_ptt_recording, itr);
+
+ free(pttr);
+}
+
+struct auxtrace_record *hisi_ptt_recording_init(int *err,
+ struct perf_pmu *hisi_ptt_pmu)
+{
+ struct hisi_ptt_recording *pttr;
+
+ if (!hisi_ptt_pmu) {
+ *err = -ENODEV;
+ return NULL;
+ }
+
+ pttr = zalloc(sizeof(*pttr));
+ if (!pttr) {
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ pttr->hisi_ptt_pmu = hisi_ptt_pmu;
+ pttr->itr.pmu = hisi_ptt_pmu;
+ pttr->itr.recording_options = hisi_ptt_recording_options;
+ pttr->itr.info_priv_size = hisi_ptt_info_priv_size;
+ pttr->itr.info_fill = hisi_ptt_info_fill;
+ pttr->itr.free = hisi_ptt_recording_free;
+ pttr->itr.reference = hisi_ptt_reference;
+ pttr->itr.read_finish = auxtrace_record__read_finish;
+ pttr->itr.alignment = 0;
+
+ *err = 0;
+ return &pttr->itr;
+}
diff --git a/tools/perf/arch/arm64/util/kvm-stat.c b/tools/perf/arch/arm64/util/kvm-stat.c
new file mode 100644
index 000000000000..6611aa21cba9
--- /dev/null
+++ b/tools/perf/arch/arm64/util/kvm-stat.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <memory.h>
+#include "../../../util/evsel.h"
+#include "../../../util/kvm-stat.h"
+#include "arm64_exception_types.h"
+#include "debug.h"
+
+define_exit_reasons_table(arm64_exit_reasons, kvm_arm_exception_type);
+define_exit_reasons_table(arm64_trap_exit_reasons, kvm_arm_exception_class);
+
+const char *kvm_trap_exit_reason = "esr_ec";
+const char *vcpu_id_str = "id";
+const char *kvm_exit_reason = "ret";
+const char *kvm_entry_trace = "kvm:kvm_entry";
+const char *kvm_exit_trace = "kvm:kvm_exit";
+
+const char *kvm_events_tp[] = {
+ "kvm:kvm_entry",
+ "kvm:kvm_exit",
+ NULL,
+};
+
+static void event_get_key(struct evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key)
+{
+ key->info = 0;
+ key->key = evsel__intval(evsel, sample, kvm_exit_reason);
+ key->exit_reasons = arm64_exit_reasons;
+
+ /*
+ * TRAP exceptions carry exception class info in esr_ec field
+ * and, hence, we need to use a different exit_reasons table to
+ * properly decode event's est_ec.
+ */
+ if (key->key == ARM_EXCEPTION_TRAP) {
+ key->key = evsel__intval(evsel, sample, kvm_trap_exit_reason);
+ key->exit_reasons = arm64_trap_exit_reasons;
+ }
+}
+
+static bool event_begin(struct evsel *evsel,
+ struct perf_sample *sample __maybe_unused,
+ struct event_key *key __maybe_unused)
+{
+ return evsel__name_is(evsel, kvm_entry_trace);
+}
+
+static bool event_end(struct evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key)
+{
+ if (evsel__name_is(evsel, kvm_exit_trace)) {
+ event_get_key(evsel, sample, key);
+ return true;
+ }
+ return false;
+}
+
+static struct kvm_events_ops exit_events = {
+ .is_begin_event = event_begin,
+ .is_end_event = event_end,
+ .decode_key = exit_event_decode_key,
+ .name = "VM-EXIT"
+};
+
+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
+ {
+ .name = "vmexit",
+ .ops = &exit_events,
+ },
+ { NULL, NULL },
+};
+
+const char * const kvm_skip_events[] = {
+ NULL,
+};
+
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused)
+{
+ kvm->exit_reasons_isa = "arm64";
+ return 0;
+}
diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
index d41b27e781d3..aab1cc2bc283 100644
--- a/tools/perf/arch/arm64/util/machine.c
+++ b/tools/perf/arch/arm64/util/machine.c
@@ -1,27 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
#include <stdio.h>
#include <string.h>
#include "debug.h"
#include "symbol.h"
+#include "callchain.h"
+#include "perf_regs.h"
+#include "record.h"
+#include "util/perf_regs.h"
-/* On arm64, kernel text segment start at high memory address,
- * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
- * address, like 0xffff 0000 00ax xxxx. When only samll amount of
- * memory is used by modules, gap between end of module's text segment
- * and start of kernel text segment may be reach 2G.
- * Therefore do not fill this gap and do not assign it to the kernel dso map.
- */
-
-#define SYMBOL_LIMIT (1 << 12) /* 4K */
-
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
+void arch__add_leaf_frame_record_opts(struct record_opts *opts)
{
- if ((strchr(p->name, '[') && strchr(c->name, '[') == NULL) ||
- (strchr(p->name, '[') == NULL && strchr(c->name, '[')))
- /* Limit range of last symbol in module and kernel */
- p->end += SYMBOL_LIMIT;
- else
- p->end = c->start;
- pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
+ const struct sample_reg *sample_reg_masks = arch__sample_reg_masks();
+
+ opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask;
}
diff --git a/tools/perf/arch/arm64/util/mem-events.c b/tools/perf/arch/arm64/util/mem-events.c
new file mode 100644
index 000000000000..9f8da7937255
--- /dev/null
+++ b/tools/perf/arch/arm64/util/mem-events.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/map_symbol.h"
+#include "util/mem-events.h"
+#include "mem-events.h"
+
+#define E(t, n, s, l, a) { .tag = t, .name = n, .event_name = s, .ldlat = l, .aux_event = a }
+
+struct perf_mem_event perf_mem_events_arm[PERF_MEM_EVENTS__MAX] = {
+ E("spe-load", "%s/ts_enable=1,pa_enable=1,load_filter=1,store_filter=0,min_latency=%u/", NULL, true, 0),
+ E("spe-store", "%s/ts_enable=1,pa_enable=1,load_filter=0,store_filter=1/", NULL, false, 0),
+ E("spe-ldst", "%s/ts_enable=1,pa_enable=1,load_filter=1,store_filter=1,min_latency=%u/", NULL, true, 0),
+};
diff --git a/tools/perf/arch/arm64/util/mem-events.h b/tools/perf/arch/arm64/util/mem-events.h
new file mode 100644
index 000000000000..5fc50be4be38
--- /dev/null
+++ b/tools/perf/arch/arm64/util/mem-events.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARM64_MEM_EVENTS_H
+#define _ARM64_MEM_EVENTS_H
+
+extern struct perf_mem_event perf_mem_events_arm[PERF_MEM_EVENTS__MAX];
+
+#endif /* _ARM64_MEM_EVENTS_H */
diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c
index 2833e101a7c6..09308665e28a 100644
--- a/tools/perf/arch/arm64/util/perf_regs.c
+++ b/tools/perf/arch/arm64/util/perf_regs.c
@@ -1,6 +1,182 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <regex.h>
+#include <string.h>
+#include <sys/auxv.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+
+#include "perf_regs.h"
+#include "../../../perf-sys.h"
+#include "../../../util/debug.h"
+#include "../../../util/event.h"
#include "../../../util/perf_regs.h"
-const struct sample_reg sample_reg_masks[] = {
+#ifndef HWCAP_SVE
+#define HWCAP_SVE (1 << 22)
+#endif
+
+static const struct sample_reg sample_reg_masks[] = {
+ SMPL_REG(x0, PERF_REG_ARM64_X0),
+ SMPL_REG(x1, PERF_REG_ARM64_X1),
+ SMPL_REG(x2, PERF_REG_ARM64_X2),
+ SMPL_REG(x3, PERF_REG_ARM64_X3),
+ SMPL_REG(x4, PERF_REG_ARM64_X4),
+ SMPL_REG(x5, PERF_REG_ARM64_X5),
+ SMPL_REG(x6, PERF_REG_ARM64_X6),
+ SMPL_REG(x7, PERF_REG_ARM64_X7),
+ SMPL_REG(x8, PERF_REG_ARM64_X8),
+ SMPL_REG(x9, PERF_REG_ARM64_X9),
+ SMPL_REG(x10, PERF_REG_ARM64_X10),
+ SMPL_REG(x11, PERF_REG_ARM64_X11),
+ SMPL_REG(x12, PERF_REG_ARM64_X12),
+ SMPL_REG(x13, PERF_REG_ARM64_X13),
+ SMPL_REG(x14, PERF_REG_ARM64_X14),
+ SMPL_REG(x15, PERF_REG_ARM64_X15),
+ SMPL_REG(x16, PERF_REG_ARM64_X16),
+ SMPL_REG(x17, PERF_REG_ARM64_X17),
+ SMPL_REG(x18, PERF_REG_ARM64_X18),
+ SMPL_REG(x19, PERF_REG_ARM64_X19),
+ SMPL_REG(x20, PERF_REG_ARM64_X20),
+ SMPL_REG(x21, PERF_REG_ARM64_X21),
+ SMPL_REG(x22, PERF_REG_ARM64_X22),
+ SMPL_REG(x23, PERF_REG_ARM64_X23),
+ SMPL_REG(x24, PERF_REG_ARM64_X24),
+ SMPL_REG(x25, PERF_REG_ARM64_X25),
+ SMPL_REG(x26, PERF_REG_ARM64_X26),
+ SMPL_REG(x27, PERF_REG_ARM64_X27),
+ SMPL_REG(x28, PERF_REG_ARM64_X28),
+ SMPL_REG(x29, PERF_REG_ARM64_X29),
+ SMPL_REG(lr, PERF_REG_ARM64_LR),
+ SMPL_REG(sp, PERF_REG_ARM64_SP),
+ SMPL_REG(pc, PERF_REG_ARM64_PC),
+ SMPL_REG(vg, PERF_REG_ARM64_VG),
SMPL_REG_END
};
+
+/* %xNUM */
+#define SDT_OP_REGEX1 "^(x[1-2]?[0-9]|3[0-1])$"
+
+/* [sp], [sp, NUM] */
+#define SDT_OP_REGEX2 "^\\[sp(, )?([0-9]+)?\\]$"
+
+static regex_t sdt_op_regex1, sdt_op_regex2;
+
+static int sdt_init_op_regex(void)
+{
+ static int initialized;
+ int ret = 0;
+
+ if (initialized)
+ return 0;
+
+ ret = regcomp(&sdt_op_regex1, SDT_OP_REGEX1, REG_EXTENDED);
+ if (ret)
+ goto error;
+
+ ret = regcomp(&sdt_op_regex2, SDT_OP_REGEX2, REG_EXTENDED);
+ if (ret)
+ goto free_regex1;
+
+ initialized = 1;
+ return 0;
+
+free_regex1:
+ regfree(&sdt_op_regex1);
+error:
+ pr_debug4("Regex compilation error.\n");
+ return ret;
+}
+
+/*
+ * SDT marker arguments on Arm64 uses %xREG or [sp, NUM], currently
+ * support these two formats.
+ */
+int arch_sdt_arg_parse_op(char *old_op, char **new_op)
+{
+ int ret, new_len;
+ regmatch_t rm[5];
+
+ ret = sdt_init_op_regex();
+ if (ret < 0)
+ return ret;
+
+ if (!regexec(&sdt_op_regex1, old_op, 3, rm, 0)) {
+ /* Extract xNUM */
+ new_len = 2; /* % NULL */
+ new_len += (int)(rm[1].rm_eo - rm[1].rm_so);
+
+ *new_op = zalloc(new_len);
+ if (!*new_op)
+ return -ENOMEM;
+
+ scnprintf(*new_op, new_len, "%%%.*s",
+ (int)(rm[1].rm_eo - rm[1].rm_so), old_op + rm[1].rm_so);
+ } else if (!regexec(&sdt_op_regex2, old_op, 5, rm, 0)) {
+ /* [sp], [sp, NUM] or [sp,NUM] */
+ new_len = 7; /* + ( % s p ) NULL */
+
+ /* If the argument is [sp], need to fill offset '0' */
+ if (rm[2].rm_so == -1)
+ new_len += 1;
+ else
+ new_len += (int)(rm[2].rm_eo - rm[2].rm_so);
+
+ *new_op = zalloc(new_len);
+ if (!*new_op)
+ return -ENOMEM;
+
+ if (rm[2].rm_so == -1)
+ scnprintf(*new_op, new_len, "+0(%%sp)");
+ else
+ scnprintf(*new_op, new_len, "+%.*s(%%sp)",
+ (int)(rm[2].rm_eo - rm[2].rm_so),
+ old_op + rm[2].rm_so);
+ } else {
+ pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
+ return SDT_ARG_SKIP;
+ }
+
+ return SDT_ARG_VALID;
+}
+
+uint64_t arch__intr_reg_mask(void)
+{
+ return PERF_REGS_MASK;
+}
+
+uint64_t arch__user_reg_mask(void)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .sample_type = PERF_SAMPLE_REGS_USER,
+ .disabled = 1,
+ .exclude_kernel = 1,
+ .sample_period = 1,
+ .sample_regs_user = PERF_REGS_MASK
+ };
+ int fd;
+
+ if (getauxval(AT_HWCAP) & HWCAP_SVE)
+ attr.sample_regs_user |= SMPL_REG_MASK(PERF_REG_ARM64_VG);
+
+ /*
+ * Check if the pmu supports perf extended regs, before
+ * returning the register mask to sample.
+ */
+ if (attr.sample_regs_user != PERF_REGS_MASK) {
+ event_attr_init(&attr);
+ fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ if (fd != -1) {
+ close(fd);
+ return attr.sample_regs_user;
+ }
+ }
+ return PERF_REGS_MASK;
+}
+
+const struct sample_reg *arch__sample_reg_masks(void)
+{
+ return sample_reg_masks;
+}
diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c
new file mode 100644
index 000000000000..2a4eab2d160e
--- /dev/null
+++ b/tools/perf/arch/arm64/util/pmu.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <internal/cpumap.h>
+#include "../../../util/cpumap.h"
+#include "../../../util/header.h"
+#include "../../../util/pmu.h"
+#include "../../../util/pmus.h"
+#include <api/fs/fs.h>
+#include <math.h>
+
+const struct pmu_metrics_table *pmu_metrics_table__find(void)
+{
+ struct perf_pmu *pmu;
+
+ /* Metrics aren't currently supported on heterogeneous Arm systems */
+ if (perf_pmus__num_core_pmus() > 1)
+ return NULL;
+
+ /* Doesn't matter which one here because they'll all be the same */
+ pmu = perf_pmus__find_core_pmu();
+ if (pmu)
+ return perf_pmu__find_metrics_table(pmu);
+
+ return NULL;
+}
+
+double perf_pmu__cpu_slots_per_cycle(void)
+{
+ char path[PATH_MAX];
+ unsigned long long slots = 0;
+ struct perf_pmu *pmu = perf_pmus__find_core_pmu();
+
+ if (pmu) {
+ perf_pmu__pathname_scnprintf(path, sizeof(path),
+ pmu->name, "caps/slots");
+ /*
+ * The value of slots is not greater than 32 bits, but
+ * filename__read_int can't read value with 0x prefix,
+ * so use filename__read_ull instead.
+ */
+ filename__read_ull(path, &slots);
+ }
+
+ return slots ? (double)slots : NAN;
+}
diff --git a/tools/perf/arch/arm64/util/tsc.c b/tools/perf/arch/arm64/util/tsc.c
new file mode 100644
index 000000000000..cc85bd9e73f1
--- /dev/null
+++ b/tools/perf/arch/arm64/util/tsc.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/types.h>
+
+#include "../../../util/tsc.h"
+
+u64 rdtsc(void)
+{
+ u64 val;
+
+ /*
+ * According to ARM DDI 0487F.c, from Armv8.0 to Armv8.5 inclusive, the
+ * system counter is at least 56 bits wide; from Armv8.6, the counter
+ * must be 64 bits wide. So the system counter could be less than 64
+ * bits wide and it is attributed with the flag 'cap_user_time_short'
+ * is true.
+ */
+ asm volatile("mrs %0, cntvct_el0" : "=r" (val));
+
+ return val;
+}
diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c b/tools/perf/arch/arm64/util/unwind-libdw.c
index a50941629649..e056d50ab42e 100644
--- a/tools/perf/arch/arm64/util/unwind-libdw.c
+++ b/tools/perf/arch/arm64/util/unwind-libdw.c
@@ -1,8 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <elfutils/libdwfl.h>
+#include "perf_regs.h"
#include "../../../util/unwind-libdw.h"
#include "../../../util/perf_regs.h"
-#include "../../../util/event.h"
+#include "../../../util/sample.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c
index 1495a9523a23..871af5992298 100644
--- a/tools/perf/arch/arm64/util/unwind-libunwind.c
+++ b/tools/perf/arch/arm64/util/unwind-libunwind.c
@@ -4,83 +4,14 @@
#ifndef REMOTE_UNWIND_LIBUNWIND
#include <libunwind.h>
#include "perf_regs.h"
-#include "../../util/unwind.h"
+#include "../../../util/unwind.h"
#endif
-#include "../../util/debug.h"
+#include "../../../util/debug.h"
int LIBUNWIND__ARCH_REG_ID(int regnum)
{
- switch (regnum) {
- case UNW_AARCH64_X0:
- return PERF_REG_ARM64_X0;
- case UNW_AARCH64_X1:
- return PERF_REG_ARM64_X1;
- case UNW_AARCH64_X2:
- return PERF_REG_ARM64_X2;
- case UNW_AARCH64_X3:
- return PERF_REG_ARM64_X3;
- case UNW_AARCH64_X4:
- return PERF_REG_ARM64_X4;
- case UNW_AARCH64_X5:
- return PERF_REG_ARM64_X5;
- case UNW_AARCH64_X6:
- return PERF_REG_ARM64_X6;
- case UNW_AARCH64_X7:
- return PERF_REG_ARM64_X7;
- case UNW_AARCH64_X8:
- return PERF_REG_ARM64_X8;
- case UNW_AARCH64_X9:
- return PERF_REG_ARM64_X9;
- case UNW_AARCH64_X10:
- return PERF_REG_ARM64_X10;
- case UNW_AARCH64_X11:
- return PERF_REG_ARM64_X11;
- case UNW_AARCH64_X12:
- return PERF_REG_ARM64_X12;
- case UNW_AARCH64_X13:
- return PERF_REG_ARM64_X13;
- case UNW_AARCH64_X14:
- return PERF_REG_ARM64_X14;
- case UNW_AARCH64_X15:
- return PERF_REG_ARM64_X15;
- case UNW_AARCH64_X16:
- return PERF_REG_ARM64_X16;
- case UNW_AARCH64_X17:
- return PERF_REG_ARM64_X17;
- case UNW_AARCH64_X18:
- return PERF_REG_ARM64_X18;
- case UNW_AARCH64_X19:
- return PERF_REG_ARM64_X19;
- case UNW_AARCH64_X20:
- return PERF_REG_ARM64_X20;
- case UNW_AARCH64_X21:
- return PERF_REG_ARM64_X21;
- case UNW_AARCH64_X22:
- return PERF_REG_ARM64_X22;
- case UNW_AARCH64_X23:
- return PERF_REG_ARM64_X23;
- case UNW_AARCH64_X24:
- return PERF_REG_ARM64_X24;
- case UNW_AARCH64_X25:
- return PERF_REG_ARM64_X25;
- case UNW_AARCH64_X26:
- return PERF_REG_ARM64_X26;
- case UNW_AARCH64_X27:
- return PERF_REG_ARM64_X27;
- case UNW_AARCH64_X28:
- return PERF_REG_ARM64_X28;
- case UNW_AARCH64_X29:
- return PERF_REG_ARM64_X29;
- case UNW_AARCH64_X30:
- return PERF_REG_ARM64_LR;
- case UNW_AARCH64_SP:
- return PERF_REG_ARM64_SP;
- case UNW_AARCH64_PC:
- return PERF_REG_ARM64_PC;
- default:
- pr_err("unwind: invalid reg id %d\n", regnum);
+ if (regnum < 0 || regnum >= PERF_REG_ARM64_EXTENDED_MAX)
return -EINVAL;
- }
- return -EINVAL;
+ return regnum;
}