aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/perf
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/perf')
-rw-r--r--arch/powerpc/perf/core-book3s.c39
-rw-r--r--arch/powerpc/perf/hv-24x7.c10
-rw-r--r--arch/powerpc/perf/hv-gpci.c10
-rw-r--r--arch/powerpc/perf/imc-pmu.c25
-rw-r--r--arch/powerpc/perf/isa207-common.c58
-rw-r--r--arch/powerpc/perf/isa207-common.h9
-rw-r--r--arch/powerpc/perf/perf_regs.c13
-rw-r--r--arch/powerpc/perf/power9-events-list.h24
-rw-r--r--arch/powerpc/perf/power9-pmu.c26
9 files changed, 134 insertions, 80 deletions
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 81f8a0c838ae..b0723002a396 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -10,6 +10,7 @@
*/
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/sched/clock.h>
#include <linux/perf_event.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
@@ -130,6 +131,14 @@ static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
static void pmao_restore_workaround(bool ebb) { }
#endif /* CONFIG_PPC32 */
+bool is_sier_available(void)
+{
+ if (ppmu->flags & PPMU_HAS_SIER)
+ return true;
+
+ return false;
+}
+
static bool regs_use_siar(struct pt_regs *regs)
{
/*
@@ -864,6 +873,8 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
int i, j;
unsigned long addf = ppmu->add_fields;
unsigned long tadd = ppmu->test_adder;
+ unsigned long grp_mask = ppmu->group_constraint_mask;
+ unsigned long grp_val = ppmu->group_constraint_val;
if (n_ev > ppmu->n_counter)
return -1;
@@ -884,15 +895,23 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
for (i = 0; i < n_ev; ++i) {
nv = (value | cpuhw->avalues[i][0]) +
(value & cpuhw->avalues[i][0] & addf);
- if ((((nv + tadd) ^ value) & mask) != 0 ||
- (((nv + tadd) ^ cpuhw->avalues[i][0]) &
- cpuhw->amasks[i][0]) != 0)
+
+ if (((((nv + tadd) ^ value) & mask) & (~grp_mask)) != 0)
+ break;
+
+ if (((((nv + tadd) ^ cpuhw->avalues[i][0]) & cpuhw->amasks[i][0])
+ & (~grp_mask)) != 0)
break;
+
value = nv;
mask |= cpuhw->amasks[i][0];
}
- if (i == n_ev)
- return 0; /* all OK */
+ if (i == n_ev) {
+ if ((value & mask & grp_mask) != (mask & grp_val))
+ return -1;
+ else
+ return 0; /* all OK */
+ }
/* doesn't work, gather alternatives... */
if (!ppmu->get_alternatives)
@@ -2148,7 +2167,7 @@ static bool pmc_overflow(unsigned long val)
/*
* Performance monitor interrupt stuff
*/
-static void perf_event_interrupt(struct pt_regs *regs)
+static void __perf_event_interrupt(struct pt_regs *regs)
{
int i, j;
struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
@@ -2232,6 +2251,14 @@ static void perf_event_interrupt(struct pt_regs *regs)
irq_exit();
}
+static void perf_event_interrupt(struct pt_regs *regs)
+{
+ u64 start_clock = sched_clock();
+
+ __perf_event_interrupt(regs);
+ perf_sample_event_took(sched_clock() - start_clock);
+}
+
static int power_pmu_prepare_cpu(unsigned int cpu)
{
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 72238eedc360..d2b8e6061933 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -1306,15 +1306,6 @@ static int h_24x7_event_init(struct perf_event *event)
return -EINVAL;
}
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest)
- return -EINVAL;
-
/* no branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
@@ -1577,6 +1568,7 @@ static struct pmu h_24x7_pmu = {
.start_txn = h_24x7_event_start_txn,
.commit_txn = h_24x7_event_commit_txn,
.cancel_txn = h_24x7_event_cancel_txn,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
static int hv_24x7_init(void)
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 43fabb3cae0f..735e77b09cdb 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -232,15 +232,6 @@ static int h_gpci_event_init(struct perf_event *event)
return -EINVAL;
}
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest)
- return -EINVAL;
-
/* no branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
@@ -285,6 +276,7 @@ static struct pmu h_gpci_pmu = {
.start = h_gpci_event_start,
.stop = h_gpci_event_stop,
.read = h_gpci_event_update,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
static int hv_gpci_init(void)
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 6954636b16d1..b1c37cc3fa98 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -28,13 +28,13 @@ static DEFINE_MUTEX(nest_init_lock);
static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc);
static struct imc_pmu **per_nest_pmu_arr;
static cpumask_t nest_imc_cpumask;
-struct imc_pmu_ref *nest_imc_refc;
+static struct imc_pmu_ref *nest_imc_refc;
static int nest_pmus;
/* Core IMC data structures and variables */
static cpumask_t core_imc_cpumask;
-struct imc_pmu_ref *core_imc_refc;
+static struct imc_pmu_ref *core_imc_refc;
static struct imc_pmu *core_imc_pmu;
/* Thread IMC data structures and variables */
@@ -43,7 +43,7 @@ static DEFINE_PER_CPU(u64 *, thread_imc_mem);
static struct imc_pmu *thread_imc_pmu;
static int thread_imc_mem_size;
-struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
+static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
{
return container_of(event->pmu, struct imc_pmu, pmu);
}
@@ -473,15 +473,6 @@ static int nest_imc_event_init(struct perf_event *event)
if (event->hw.sample_period)
return -EINVAL;
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest)
- return -EINVAL;
-
if (event->cpu < 0)
return -EINVAL;
@@ -748,15 +739,6 @@ static int core_imc_event_init(struct perf_event *event)
if (event->hw.sample_period)
return -EINVAL;
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest)
- return -EINVAL;
-
if (event->cpu < 0)
return -EINVAL;
@@ -1069,6 +1051,7 @@ static int update_pmu_ops(struct imc_pmu *pmu)
pmu->pmu.stop = imc_event_stop;
pmu->pmu.read = imc_event_update;
pmu->pmu.attr_groups = pmu->attr_groups;
+ pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
switch (pmu->domain) {
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 177de814286f..a6c24d866b2f 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -148,6 +148,14 @@ static bool is_thresh_cmp_valid(u64 event)
return true;
}
+static unsigned int dc_ic_rld_quad_l1_sel(u64 event)
+{
+ unsigned int cache;
+
+ cache = (event >> EVENT_CACHE_SEL_SHIFT) & MMCR1_DC_IC_QUAL_MASK;
+ return cache;
+}
+
static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
{
u64 ret = PERF_MEM_NA;
@@ -226,8 +234,13 @@ void isa207_get_mem_weight(u64 *weight)
u64 mmcra = mfspr(SPRN_MMCRA);
u64 exp = MMCRA_THR_CTR_EXP(mmcra);
u64 mantissa = MMCRA_THR_CTR_MANT(mmcra);
+ u64 sier = mfspr(SPRN_SIER);
+ u64 val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
- *weight = mantissa << (2 * exp);
+ if (val == 0 || val == 7)
+ *weight = 0;
+ else
+ *weight = mantissa << (2 * exp);
}
int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
@@ -274,19 +287,27 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
}
if (unit >= 6 && unit <= 9) {
- /*
- * L2/L3 events contain a cache selector field, which is
- * supposed to be programmed into MMCRC. However MMCRC is only
- * HV writable, and there is no API for guest kernels to modify
- * it. The solution is for the hypervisor to initialise the
- * field to zeroes, and for us to only ever allow events that
- * have a cache selector of zero. The bank selector (bit 3) is
- * irrelevant, as long as the rest of the value is 0.
- */
- if (cache & 0x7)
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ mask |= CNST_CACHE_GROUP_MASK;
+ value |= CNST_CACHE_GROUP_VAL(event & 0xff);
+
+ mask |= CNST_CACHE_PMC4_MASK;
+ if (pmc == 4)
+ value |= CNST_CACHE_PMC4_VAL;
+ } else if (cache & 0x7) {
+ /*
+ * L2/L3 events contain a cache selector field, which is
+ * supposed to be programmed into MMCRC. However MMCRC is only
+ * HV writable, and there is no API for guest kernels to modify
+ * it. The solution is for the hypervisor to initialise the
+ * field to zeroes, and for us to only ever allow events that
+ * have a cache selector of zero. The bank selector (bit 3) is
+ * irrelevant, as long as the rest of the value is 0.
+ */
return -1;
+ }
- } else if (event & EVENT_IS_L1) {
+ } else if (cpu_has_feature(CPU_FTR_ARCH_300) || (event & EVENT_IS_L1)) {
mask |= CNST_L1_QUAL_MASK;
value |= CNST_L1_QUAL_VAL(cache);
}
@@ -389,11 +410,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
/* In continuous sampling mode, update SDAR on TLB miss */
mmcra_sdar_mode(event[i], &mmcra);
- if (event[i] & EVENT_IS_L1) {
- cache = event[i] >> EVENT_CACHE_SEL_SHIFT;
- mmcr1 |= (cache & 1) << MMCR1_IC_QUAL_SHIFT;
- cache >>= 1;
- mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT;
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ cache = dc_ic_rld_quad_l1_sel(event[i]);
+ mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT;
+ } else {
+ if (event[i] & EVENT_IS_L1) {
+ cache = dc_ic_rld_quad_l1_sel(event[i]);
+ mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT;
+ }
}
if (is_event_marked(event[i])) {
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 0028f4b9490d..91350f42a662 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -134,6 +134,11 @@
#define CNST_SAMPLE_VAL(v) (((v) & EVENT_SAMPLE_MASK) << 16)
#define CNST_SAMPLE_MASK CNST_SAMPLE_VAL(EVENT_SAMPLE_MASK)
+#define CNST_CACHE_GROUP_VAL(v) (((v) & 0xffull) << 55)
+#define CNST_CACHE_GROUP_MASK CNST_CACHE_GROUP_VAL(0xff)
+#define CNST_CACHE_PMC4_VAL (1ull << 54)
+#define CNST_CACHE_PMC4_MASK CNST_CACHE_PMC4_VAL
+
/*
* For NC we are counting up to 4 events. This requires three bits, and we need
* the fifth event to overflow and set the 4th bit. To achieve that we bias the
@@ -163,8 +168,8 @@
#define MMCR1_COMBINE_SHIFT(pmc) (35 - ((pmc) - 1))
#define MMCR1_PMCSEL_SHIFT(pmc) (24 - (((pmc) - 1)) * 8)
#define MMCR1_FAB_SHIFT 36
-#define MMCR1_DC_QUAL_SHIFT 47
-#define MMCR1_IC_QUAL_SHIFT 46
+#define MMCR1_DC_IC_QUAL_MASK 0x3
+#define MMCR1_DC_IC_QUAL_SHIFT 46
/* MMCR1 Combine bits macro for power9 */
#define p9_MMCR1_COMBINE_SHIFT(pmc) (38 - ((pmc - 1) * 2))
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index 09ceea6175ba..3349f3f8fe84 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -69,6 +69,8 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap),
PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_SIER, dar),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr),
};
u64 perf_reg_value(struct pt_regs *regs, int idx)
@@ -76,6 +78,17 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
return 0;
+ if (idx == PERF_REG_POWERPC_SIER &&
+ (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
+ IS_ENABLED(CONFIG_PPC32) ||
+ !is_sier_available()))
+ return 0;
+
+ if (idx == PERF_REG_POWERPC_MMCRA &&
+ (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
+ IS_ENABLED(CONFIG_PPC32)))
+ return 0;
+
return regs_get_register(regs, pt_regs_offset[idx]);
}
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index 7de344b7d9cc..063c9d9f2516 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -97,3 +97,27 @@ EVENT(PM_MRK_DTLB_MISS_64K, 0x3d156)
EVENT(PM_DTLB_MISS_16M, 0x4c056)
EVENT(PM_DTLB_MISS_1G, 0x4c05a)
EVENT(PM_MRK_DTLB_MISS_16M, 0x4c15e)
+
+/*
+ * Memory Access Events
+ *
+ * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0)
+ * To enable capturing of memory profiling, these MMCRA bits
+ * needs to be programmed and corresponding raw event format
+ * encoding.
+ *
+ * MMCRA bits encoding needed are
+ * SM (Sampling Mode)
+ * EM (Eligibility for Random Sampling)
+ * TECE (Threshold Event Counter Event)
+ * TS (Threshold Start Event)
+ * TE (Threshold End Event)
+ *
+ * Corresponding Raw Encoding bits:
+ * sample [EM,SM]
+ * thresh_sel (TECE)
+ * thresh start (TS)
+ * thresh end (TE)
+ */
+EVENT(MEM_LOADS, 0x34340401e0)
+EVENT(MEM_STORES, 0x343c0401e0)
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index e012b1030a5b..030544e35959 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -63,16 +63,8 @@
* MMCRA[9:11] = thresh_cmp[0:2]
* MMCRA[12:18] = thresh_cmp[3:9]
*
- * if unit == 6 or unit == 7
- * MMCRC[53:55] = cache_sel[1:3] (L2EVENT_SEL)
- * else if unit == 8 or unit == 9:
- * if cache_sel[0] == 0: # L3 bank
- * MMCRC[47:49] = cache_sel[1:3] (L3EVENT_SEL0)
- * else if cache_sel[0] == 1:
- * MMCRC[50:51] = cache_sel[2:3] (L3EVENT_SEL1)
- * else if cache_sel[1]: # L1 event
- * MMCR1[16] = cache_sel[2]
- * MMCR1[17] = cache_sel[3]
+ * MMCR1[16] = cache_sel[2]
+ * MMCR1[17] = cache_sel[3]
*
* if mark:
* MMCRA[63] = 1 (SAMPLE_ENABLE)
@@ -168,6 +160,8 @@ GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL);
GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL);
GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1);
GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1_FIN);
+GENERIC_EVENT_ATTR(mem-loads, MEM_LOADS);
+GENERIC_EVENT_ATTR(mem-stores, MEM_STORES);
CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1_FIN);
CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1);
@@ -179,8 +173,6 @@ CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_WRITE);
CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS);
CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3);
CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL);
-CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS);
-CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST);
CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL);
CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
@@ -195,6 +187,8 @@ static struct attribute *power9_events_attr[] = {
GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
GENERIC_EVENT_PTR(PM_LD_REF_L1),
GENERIC_EVENT_PTR(PM_LD_MISS_L1_FIN),
+ GENERIC_EVENT_PTR(MEM_LOADS),
+ GENERIC_EVENT_PTR(MEM_STORES),
CACHE_EVENT_PTR(PM_LD_MISS_L1_FIN),
CACHE_EVENT_PTR(PM_LD_REF_L1),
CACHE_EVENT_PTR(PM_L1_PREF),
@@ -205,8 +199,6 @@ static struct attribute *power9_events_attr[] = {
CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
CACHE_EVENT_PTR(PM_DATA_FROM_L3),
CACHE_EVENT_PTR(PM_L3_PREF_ALL),
- CACHE_EVENT_PTR(PM_L2_ST_MISS),
- CACHE_EVENT_PTR(PM_L2_ST),
CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
CACHE_EVENT_PTR(PM_BR_CMPL),
CACHE_EVENT_PTR(PM_DTLB_MISS),
@@ -354,8 +346,8 @@ static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(RESULT_MISS) ] = PM_DATA_FROM_L3MISS,
},
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = PM_L2_ST,
- [ C(RESULT_MISS) ] = PM_L2_ST_MISS,
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL,
@@ -427,6 +419,8 @@ static struct power_pmu power9_pmu = {
.n_counter = MAX_PMU_COUNTERS,
.add_fields = ISA207_ADD_FIELDS,
.test_adder = ISA207_TEST_ADDER,
+ .group_constraint_mask = CNST_CACHE_PMC4_MASK,
+ .group_constraint_val = CNST_CACHE_PMC4_VAL,
.compute_mmcr = isa207_compute_mmcr,
.config_bhrb = power9_config_bhrb,
.bhrb_filter_map = power9_bhrb_filter_map,