aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/perf/power8-pmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/perf/power8-pmu.c')
-rw-r--r--arch/powerpc/perf/power8-pmu.c222
1 files changed, 205 insertions, 17 deletions
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index a3f7abd2f13f..fe2763b6e039 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -10,6 +10,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) "power8-pmu: " fmt
+
#include <linux/kernel.h>
#include <linux/perf_event.h>
#include <asm/firmware.h>
@@ -25,15 +27,48 @@
#define PM_BRU_FIN 0x10068
#define PM_BR_MPRED_CMPL 0x400f6
+/* All L1 D cache load references counted at finish, gated by reject */
+#define PM_LD_REF_L1 0x100ee
+/* Load Missed L1 */
+#define PM_LD_MISS_L1 0x3e054
+/* Store Missed L1 */
+#define PM_ST_MISS_L1 0x300f0
+/* L1 cache data prefetches */
+#define PM_L1_PREF 0x0d8b8
+/* Instruction fetches from L1 */
+#define PM_INST_FROM_L1 0x04080
+/* Demand iCache Miss */
+#define PM_L1_ICACHE_MISS 0x200fd
+/* Instruction Demand sectors wriittent into IL1 */
+#define PM_L1_DEMAND_WRITE 0x0408c
+/* Instruction prefetch written into IL1 */
+#define PM_IC_PREF_WRITE 0x0408e
+/* The data cache was reloaded from local core's L3 due to a demand load */
+#define PM_DATA_FROM_L3 0x4c042
+/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
+#define PM_DATA_FROM_L3MISS 0x300fe
+/* All successful D-side store dispatches for this thread */
+#define PM_L2_ST 0x17080
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+#define PM_L2_ST_MISS 0x17082
+/* Total HW L3 prefetches(Load+store) */
+#define PM_L3_PREF_ALL 0x4e052
+/* Data PTEG reload */
+#define PM_DTLB_MISS 0x300fc
+/* ITLB Reloaded */
+#define PM_ITLB_MISS 0x400fc
+
/*
* Raw event encoding for POWER8:
*
* 60 56 52 48 44 40 36 32
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
- * | [ thresh_cmp ] [ thresh_ctl ]
- * | |
- * *- EBB (Linux) thresh start/stop OR FAB match -*
+ * | | [ ] [ thresh_cmp ] [ thresh_ctl ]
+ * | | | |
+ * | | *- IFM (Linux) thresh start/stop OR FAB match -*
+ * | *- BHRB (Linux)
+ * *- EBB (Linux)
*
* 28 24 20 16 12 8 4 0
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
@@ -83,9 +118,18 @@
* MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG)
 * MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE)
*
+ * if EBB and BHRB:
+ * MMCRA[32:33] = IFM
+ *
*/
#define EVENT_EBB_MASK 1ull
+#define EVENT_EBB_SHIFT PERF_EVENT_CONFIG_EBB_SHIFT
+#define EVENT_BHRB_MASK 1ull
+#define EVENT_BHRB_SHIFT 62
+#define EVENT_WANTS_BHRB (EVENT_BHRB_MASK << EVENT_BHRB_SHIFT)
+#define EVENT_IFM_MASK 3ull
+#define EVENT_IFM_SHIFT 60
#define EVENT_THR_CMP_SHIFT 40 /* Threshold CMP value */
#define EVENT_THR_CMP_MASK 0x3ff
#define EVENT_THR_CTL_SHIFT 32 /* Threshold control value (start/stop) */
@@ -110,6 +154,12 @@
#define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT)
#define EVENT_PSEL_MASK 0xff /* PMCxSEL value */
+/* Bits defined by Linux */
+#define EVENT_LINUX_MASK \
+ ((EVENT_EBB_MASK << EVENT_EBB_SHIFT) | \
+ (EVENT_BHRB_MASK << EVENT_BHRB_SHIFT) | \
+ (EVENT_IFM_MASK << EVENT_IFM_SHIFT))
+
#define EVENT_VALID_MASK \
((EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \
(EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \
@@ -118,7 +168,7 @@
(EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \
(EVENT_COMBINE_MASK << EVENT_COMBINE_SHIFT) | \
(EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \
- (EVENT_EBB_MASK << PERF_EVENT_CONFIG_EBB_SHIFT) | \
+ EVENT_LINUX_MASK | \
EVENT_PSEL_MASK)
/* MMCRA IFM bits - POWER8 */
@@ -142,10 +192,11 @@
*
* 28 24 20 16 12 8 4 0
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
- * | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1]
- * EBB -* | |
- * | | Count of events for each PMC.
- * L1 I/D qualifier -* | p1, p2, p3, p4, p5, p6.
+ * [ ] | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1]
+ * | | | |
+ * BHRB IFM -* | | | Count of events for each PMC.
+ * EBB -* | | p1, p2, p3, p4, p5, p6.
+ * L1 I/D qualifier -* |
* nc - number of counters -*
*
* The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints
@@ -164,6 +215,9 @@
#define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24)
#define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK)
+#define CNST_IFM_VAL(v) (((v) & EVENT_IFM_MASK) << 25)
+#define CNST_IFM_MASK CNST_IFM_VAL(EVENT_IFM_MASK)
+
#define CNST_L1_QUAL_VAL(v) (((v) & 3) << 22)
#define CNST_L1_QUAL_MASK CNST_L1_QUAL_VAL(3)
@@ -210,6 +264,7 @@
#define MMCRA_THR_SEL_SHIFT 16
#define MMCRA_THR_CMP_SHIFT 32
#define MMCRA_SDAR_MODE_TLB (1ull << 42)
+#define MMCRA_IFM_SHIFT 30
static inline bool event_is_fab_match(u64 event)
@@ -234,20 +289,22 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long
pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK;
cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK;
- ebb = (event >> PERF_EVENT_CONFIG_EBB_SHIFT) & EVENT_EBB_MASK;
-
- /* Clear the EBB bit in the event, so event checks work below */
- event &= ~(EVENT_EBB_MASK << PERF_EVENT_CONFIG_EBB_SHIFT);
+ ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK;
if (pmc) {
+ u64 base_event;
+
if (pmc > 6)
return -1;
- mask |= CNST_PMC_MASK(pmc);
- value |= CNST_PMC_VAL(pmc);
+ /* Ignore Linux defined bits when checking event below */
+ base_event = event & ~EVENT_LINUX_MASK;
- if (pmc >= 5 && event != 0x500fa && event != 0x600f4)
+ if (pmc >= 5 && base_event != 0x500fa && base_event != 0x600f4)
return -1;
+
+ mask |= CNST_PMC_MASK(pmc);
+ value |= CNST_PMC_VAL(pmc);
}
if (pmc <= 4) {
@@ -268,9 +325,10 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long
* HV writable, and there is no API for guest kernels to modify
* it. The solution is for the hypervisor to initialise the
* field to zeroes, and for us to only ever allow events that
- * have a cache selector of zero.
+ * have a cache selector of zero. The bank selector (bit 3) is
+ * irrelevant, as long as the rest of the value is 0.
*/
- if (cache)
+ if (cache & 0x7)
return -1;
} else if (event & EVENT_IS_L1) {
@@ -311,6 +369,15 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long
/* EBB events must specify the PMC */
return -1;
+ if (event & EVENT_WANTS_BHRB) {
+ if (!ebb)
+ /* Only EBB events can request BHRB */
+ return -1;
+
+ mask |= CNST_IFM_MASK;
+ value |= CNST_IFM_VAL(event >> EVENT_IFM_SHIFT);
+ }
+
/*
* All events must agree on EBB, either all request it or none.
* EBB events are pinned & exclusive, so this should never actually
@@ -400,6 +467,11 @@ static int power8_compute_mmcr(u64 event[], int n_ev,
mmcra |= val << MMCRA_THR_CMP_SHIFT;
}
+ if (event[i] & EVENT_WANTS_BHRB) {
+ val = (event[i] >> EVENT_IFM_SHIFT) & EVENT_IFM_MASK;
+ mmcra |= val << MMCRA_IFM_SHIFT;
+ }
+
hwc[i] = pmc - 1;
}
@@ -557,6 +629,8 @@ static int power8_generic_events[] = {
[PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_FIN,
[PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
+ [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1,
};
static u64 power8_bhrb_filter_map(u64 branch_sample_type)
@@ -596,6 +670,116 @@ static void power8_config_bhrb(u64 pmu_bhrb_filter)
mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
}
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = PM_LD_REF_L1,
+ [ C(RESULT_MISS) ] = PM_LD_MISS_L1,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_ST_MISS_L1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = PM_L1_PREF,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = PM_INST_FROM_L1,
+ [ C(RESULT_MISS) ] = PM_L1_ICACHE_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = PM_L1_DEMAND_WRITE,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = PM_IC_PREF_WRITE,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = PM_DATA_FROM_L3,
+ [ C(RESULT_MISS) ] = PM_DATA_FROM_L3MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = PM_L2_ST,
+ [ C(RESULT_MISS) ] = PM_L2_ST_MISS,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_DTLB_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_ITLB_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = PM_BRU_FIN,
+ [ C(RESULT_MISS) ] = PM_BR_MPRED_CMPL,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+#undef C
+
static struct power_pmu power8_pmu = {
.name = "POWER8",
.n_counter = 6,
@@ -611,6 +795,7 @@ static struct power_pmu power8_pmu = {
.flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB | PPMU_EBB,
.n_generic = ARRAY_SIZE(power8_generic_events),
.generic_events = power8_generic_events,
+ .cache_events = &power8_cache_events,
.attr_groups = power8_pmu_attr_groups,
.bhrb_nr = 32,
};
@@ -630,6 +815,9 @@ static int __init init_power8_pmu(void)
/* Tell userspace that EBB is supported */
cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+ if (cpu_has_feature(CPU_FTR_PMAO_BUG))
+ pr_info("PMAO restore workaround active.\n");
+
return 0;
}
early_initcall(init_power8_pmu);