aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/perf
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/perf')
-rw-r--r--arch/powerpc/perf/core-book3s.c39
-rw-r--r--arch/powerpc/perf/imc-pmu.c6
-rw-r--r--arch/powerpc/perf/isa207-common.c58
-rw-r--r--arch/powerpc/perf/isa207-common.h9
-rw-r--r--arch/powerpc/perf/perf_regs.c7
-rw-r--r--arch/powerpc/perf/power9-pmu.c22
6 files changed, 97 insertions, 44 deletions
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 81f8a0c838ae..b0723002a396 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -10,6 +10,7 @@
*/
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/sched/clock.h>
#include <linux/perf_event.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
@@ -130,6 +131,14 @@ static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
static void pmao_restore_workaround(bool ebb) { }
#endif /* CONFIG_PPC32 */
+bool is_sier_available(void)
+{
+ if (ppmu->flags & PPMU_HAS_SIER)
+ return true;
+
+ return false;
+}
+
static bool regs_use_siar(struct pt_regs *regs)
{
/*
@@ -864,6 +873,8 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
int i, j;
unsigned long addf = ppmu->add_fields;
unsigned long tadd = ppmu->test_adder;
+ unsigned long grp_mask = ppmu->group_constraint_mask;
+ unsigned long grp_val = ppmu->group_constraint_val;
if (n_ev > ppmu->n_counter)
return -1;
@@ -884,15 +895,23 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
for (i = 0; i < n_ev; ++i) {
nv = (value | cpuhw->avalues[i][0]) +
(value & cpuhw->avalues[i][0] & addf);
- if ((((nv + tadd) ^ value) & mask) != 0 ||
- (((nv + tadd) ^ cpuhw->avalues[i][0]) &
- cpuhw->amasks[i][0]) != 0)
+
+ if (((((nv + tadd) ^ value) & mask) & (~grp_mask)) != 0)
+ break;
+
+ if (((((nv + tadd) ^ cpuhw->avalues[i][0]) & cpuhw->amasks[i][0])
+ & (~grp_mask)) != 0)
break;
+
value = nv;
mask |= cpuhw->amasks[i][0];
}
- if (i == n_ev)
- return 0; /* all OK */
+ if (i == n_ev) {
+ if ((value & mask & grp_mask) != (mask & grp_val))
+ return -1;
+ else
+ return 0; /* all OK */
+ }
/* doesn't work, gather alternatives... */
if (!ppmu->get_alternatives)
@@ -2148,7 +2167,7 @@ static bool pmc_overflow(unsigned long val)
/*
* Performance monitor interrupt stuff
*/
-static void perf_event_interrupt(struct pt_regs *regs)
+static void __perf_event_interrupt(struct pt_regs *regs)
{
int i, j;
struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
@@ -2232,6 +2251,14 @@ static void perf_event_interrupt(struct pt_regs *regs)
irq_exit();
}
+static void perf_event_interrupt(struct pt_regs *regs)
+{
+ u64 start_clock = sched_clock();
+
+ __perf_event_interrupt(regs);
+ perf_sample_event_took(sched_clock() - start_clock);
+}
+
static int power_pmu_prepare_cpu(unsigned int cpu)
{
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 6954636b16d1..f292a3f284f1 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -28,13 +28,13 @@ static DEFINE_MUTEX(nest_init_lock);
static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc);
static struct imc_pmu **per_nest_pmu_arr;
static cpumask_t nest_imc_cpumask;
-struct imc_pmu_ref *nest_imc_refc;
+static struct imc_pmu_ref *nest_imc_refc;
static int nest_pmus;
/* Core IMC data structures and variables */
static cpumask_t core_imc_cpumask;
-struct imc_pmu_ref *core_imc_refc;
+static struct imc_pmu_ref *core_imc_refc;
static struct imc_pmu *core_imc_pmu;
/* Thread IMC data structures and variables */
@@ -43,7 +43,7 @@ static DEFINE_PER_CPU(u64 *, thread_imc_mem);
static struct imc_pmu *thread_imc_pmu;
static int thread_imc_mem_size;
-struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
+static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
{
return container_of(event->pmu, struct imc_pmu, pmu);
}
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 177de814286f..a6c24d866b2f 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -148,6 +148,14 @@ static bool is_thresh_cmp_valid(u64 event)
return true;
}
+static unsigned int dc_ic_rld_quad_l1_sel(u64 event)
+{
+ unsigned int cache;
+
+ cache = (event >> EVENT_CACHE_SEL_SHIFT) & MMCR1_DC_IC_QUAL_MASK;
+ return cache;
+}
+
static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
{
u64 ret = PERF_MEM_NA;
@@ -226,8 +234,13 @@ void isa207_get_mem_weight(u64 *weight)
u64 mmcra = mfspr(SPRN_MMCRA);
u64 exp = MMCRA_THR_CTR_EXP(mmcra);
u64 mantissa = MMCRA_THR_CTR_MANT(mmcra);
+ u64 sier = mfspr(SPRN_SIER);
+ u64 val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
- *weight = mantissa << (2 * exp);
+ if (val == 0 || val == 7)
+ *weight = 0;
+ else
+ *weight = mantissa << (2 * exp);
}
int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
@@ -274,19 +287,27 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
}
if (unit >= 6 && unit <= 9) {
- /*
- * L2/L3 events contain a cache selector field, which is
- * supposed to be programmed into MMCRC. However MMCRC is only
- * HV writable, and there is no API for guest kernels to modify
- * it. The solution is for the hypervisor to initialise the
- * field to zeroes, and for us to only ever allow events that
- * have a cache selector of zero. The bank selector (bit 3) is
- * irrelevant, as long as the rest of the value is 0.
- */
- if (cache & 0x7)
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ mask |= CNST_CACHE_GROUP_MASK;
+ value |= CNST_CACHE_GROUP_VAL(event & 0xff);
+
+ mask |= CNST_CACHE_PMC4_MASK;
+ if (pmc == 4)
+ value |= CNST_CACHE_PMC4_VAL;
+ } else if (cache & 0x7) {
+ /*
+ * L2/L3 events contain a cache selector field, which is
+ * supposed to be programmed into MMCRC. However MMCRC is only
+ * HV writable, and there is no API for guest kernels to modify
+ * it. The solution is for the hypervisor to initialise the
+ * field to zeroes, and for us to only ever allow events that
+ * have a cache selector of zero. The bank selector (bit 3) is
+ * irrelevant, as long as the rest of the value is 0.
+ */
return -1;
+ }
- } else if (event & EVENT_IS_L1) {
+ } else if (cpu_has_feature(CPU_FTR_ARCH_300) || (event & EVENT_IS_L1)) {
mask |= CNST_L1_QUAL_MASK;
value |= CNST_L1_QUAL_VAL(cache);
}
@@ -389,11 +410,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
/* In continuous sampling mode, update SDAR on TLB miss */
mmcra_sdar_mode(event[i], &mmcra);
- if (event[i] & EVENT_IS_L1) {
- cache = event[i] >> EVENT_CACHE_SEL_SHIFT;
- mmcr1 |= (cache & 1) << MMCR1_IC_QUAL_SHIFT;
- cache >>= 1;
- mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT;
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ cache = dc_ic_rld_quad_l1_sel(event[i]);
+ mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT;
+ } else {
+ if (event[i] & EVENT_IS_L1) {
+ cache = dc_ic_rld_quad_l1_sel(event[i]);
+ mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT;
+ }
}
if (is_event_marked(event[i])) {
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 0028f4b9490d..91350f42a662 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -134,6 +134,11 @@
#define CNST_SAMPLE_VAL(v) (((v) & EVENT_SAMPLE_MASK) << 16)
#define CNST_SAMPLE_MASK CNST_SAMPLE_VAL(EVENT_SAMPLE_MASK)
+#define CNST_CACHE_GROUP_VAL(v) (((v) & 0xffull) << 55)
+#define CNST_CACHE_GROUP_MASK CNST_CACHE_GROUP_VAL(0xff)
+#define CNST_CACHE_PMC4_VAL (1ull << 54)
+#define CNST_CACHE_PMC4_MASK CNST_CACHE_PMC4_VAL
+
/*
* For NC we are counting up to 4 events. This requires three bits, and we need
* the fifth event to overflow and set the 4th bit. To achieve that we bias the
@@ -163,8 +168,8 @@
#define MMCR1_COMBINE_SHIFT(pmc) (35 - ((pmc) - 1))
#define MMCR1_PMCSEL_SHIFT(pmc) (24 - (((pmc) - 1)) * 8)
#define MMCR1_FAB_SHIFT 36
-#define MMCR1_DC_QUAL_SHIFT 47
-#define MMCR1_IC_QUAL_SHIFT 46
+#define MMCR1_DC_IC_QUAL_MASK 0x3
+#define MMCR1_DC_IC_QUAL_SHIFT 46
/* MMCR1 Combine bits macro for power9 */
#define p9_MMCR1_COMBINE_SHIFT(pmc) (38 - ((pmc - 1) * 2))
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index 09ceea6175ba..5c36b3a8d47a 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -69,6 +69,7 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap),
PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_SIER, dar),
};
u64 perf_reg_value(struct pt_regs *regs, int idx)
@@ -76,6 +77,12 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
return 0;
+ if (idx == PERF_REG_POWERPC_SIER &&
+ (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
+ IS_ENABLED(CONFIG_PPC32) ||
+ !is_sier_available()))
+ return 0;
+
return regs_get_register(regs, pt_regs_offset[idx]);
}
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index e012b1030a5b..0ff9c43733e9 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -63,16 +63,8 @@
* MMCRA[9:11] = thresh_cmp[0:2]
* MMCRA[12:18] = thresh_cmp[3:9]
*
- * if unit == 6 or unit == 7
- * MMCRC[53:55] = cache_sel[1:3] (L2EVENT_SEL)
- * else if unit == 8 or unit == 9:
- * if cache_sel[0] == 0: # L3 bank
- * MMCRC[47:49] = cache_sel[1:3] (L3EVENT_SEL0)
- * else if cache_sel[0] == 1:
- * MMCRC[50:51] = cache_sel[2:3] (L3EVENT_SEL1)
- * else if cache_sel[1]: # L1 event
- * MMCR1[16] = cache_sel[2]
- * MMCR1[17] = cache_sel[3]
+ * MMCR1[16] = cache_sel[2]
+ * MMCR1[17] = cache_sel[3]
*
* if mark:
* MMCRA[63] = 1 (SAMPLE_ENABLE)
@@ -179,8 +171,6 @@ CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_WRITE);
CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS);
CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3);
CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL);
-CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS);
-CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST);
CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL);
CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
@@ -205,8 +195,6 @@ static struct attribute *power9_events_attr[] = {
CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
CACHE_EVENT_PTR(PM_DATA_FROM_L3),
CACHE_EVENT_PTR(PM_L3_PREF_ALL),
- CACHE_EVENT_PTR(PM_L2_ST_MISS),
- CACHE_EVENT_PTR(PM_L2_ST),
CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
CACHE_EVENT_PTR(PM_BR_CMPL),
CACHE_EVENT_PTR(PM_DTLB_MISS),
@@ -354,8 +342,8 @@ static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(RESULT_MISS) ] = PM_DATA_FROM_L3MISS,
},
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = PM_L2_ST,
- [ C(RESULT_MISS) ] = PM_L2_ST_MISS,
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL,
@@ -427,6 +415,8 @@ static struct power_pmu power9_pmu = {
.n_counter = MAX_PMU_COUNTERS,
.add_fields = ISA207_ADD_FIELDS,
.test_adder = ISA207_TEST_ADDER,
+ .group_constraint_mask = CNST_CACHE_PMC4_MASK,
+ .group_constraint_val = CNST_CACHE_PMC4_VAL,
.compute_mmcr = isa207_compute_mmcr,
.config_bhrb = power9_config_bhrb,
.bhrb_filter_map = power9_bhrb_filter_map,