diff options
Diffstat (limited to 'arch/powerpc/perf/power8-pmu.c')
-rw-r--r-- | arch/powerpc/perf/power8-pmu.c | 222 |
1 files changed, 205 insertions, 17 deletions
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index a3f7abd2f13f..fe2763b6e039 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -10,6 +10,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) "power8-pmu: " fmt + #include <linux/kernel.h> #include <linux/perf_event.h> #include <asm/firmware.h> @@ -25,15 +27,48 @@ #define PM_BRU_FIN 0x10068 #define PM_BR_MPRED_CMPL 0x400f6 +/* All L1 D cache load references counted at finish, gated by reject */ +#define PM_LD_REF_L1 0x100ee +/* Load Missed L1 */ +#define PM_LD_MISS_L1 0x3e054 +/* Store Missed L1 */ +#define PM_ST_MISS_L1 0x300f0 +/* L1 cache data prefetches */ +#define PM_L1_PREF 0x0d8b8 +/* Instruction fetches from L1 */ +#define PM_INST_FROM_L1 0x04080 +/* Demand iCache Miss */ +#define PM_L1_ICACHE_MISS 0x200fd +/* Instruction Demand sectors wriittent into IL1 */ +#define PM_L1_DEMAND_WRITE 0x0408c +/* Instruction prefetch written into IL1 */ +#define PM_IC_PREF_WRITE 0x0408e +/* The data cache was reloaded from local core's L3 due to a demand load */ +#define PM_DATA_FROM_L3 0x4c042 +/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ +#define PM_DATA_FROM_L3MISS 0x300fe +/* All successful D-side store dispatches for this thread */ +#define PM_L2_ST 0x17080 +/* All successful D-side store dispatches for this thread that were L2 Miss */ +#define PM_L2_ST_MISS 0x17082 +/* Total HW L3 prefetches(Load+store) */ +#define PM_L3_PREF_ALL 0x4e052 +/* Data PTEG reload */ +#define PM_DTLB_MISS 0x300fc +/* ITLB Reloaded */ +#define PM_ITLB_MISS 0x400fc + /* * Raw event encoding for POWER8: * * 60 56 52 48 44 40 36 32 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * | [ thresh_cmp ] [ thresh_ctl ] - * | | - * *- EBB (Linux) thresh start/stop OR FAB match -* + * | | [ ] [ thresh_cmp ] [ thresh_ctl ] + * | | | | + * | | *- IFM (Linux) thresh start/stop OR FAB match -* + * | *- BHRB (Linux) + * *- EBB (Linux) * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | @@ -83,9 +118,18 @@ * MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG) * MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE) * + * if EBB and BHRB: + * MMCRA[32:33] = IFM + * */ #define EVENT_EBB_MASK 1ull +#define EVENT_EBB_SHIFT PERF_EVENT_CONFIG_EBB_SHIFT +#define EVENT_BHRB_MASK 1ull +#define EVENT_BHRB_SHIFT 62 +#define EVENT_WANTS_BHRB (EVENT_BHRB_MASK << EVENT_BHRB_SHIFT) +#define EVENT_IFM_MASK 3ull +#define EVENT_IFM_SHIFT 60 #define EVENT_THR_CMP_SHIFT 40 /* Threshold CMP value */ #define EVENT_THR_CMP_MASK 0x3ff #define EVENT_THR_CTL_SHIFT 32 /* Threshold control value (start/stop) */ @@ -110,6 +154,12 @@ #define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) #define EVENT_PSEL_MASK 0xff /* PMCxSEL value */ +/* Bits defined by Linux */ +#define EVENT_LINUX_MASK \ + ((EVENT_EBB_MASK << EVENT_EBB_SHIFT) | \ + (EVENT_BHRB_MASK << EVENT_BHRB_SHIFT) | \ + (EVENT_IFM_MASK << EVENT_IFM_SHIFT)) + #define EVENT_VALID_MASK \ ((EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \ (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \ @@ -118,7 +168,7 @@ (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \ (EVENT_COMBINE_MASK << EVENT_COMBINE_SHIFT) | \ (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ - (EVENT_EBB_MASK << PERF_EVENT_CONFIG_EBB_SHIFT) | \ + EVENT_LINUX_MASK | \ EVENT_PSEL_MASK) /* MMCRA IFM bits - POWER8 */ @@ -142,10 +192,11 @@ * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1] - * EBB -* | | - * | | Count of events for each PMC. - * L1 I/D qualifier -* | p1, p2, p3, p4, p5, p6. + * [ ] | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1] + * | | | | + * BHRB IFM -* | | | Count of events for each PMC. + * EBB -* | | p1, p2, p3, p4, p5, p6. + * L1 I/D qualifier -* | * nc - number of counters -* * * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints @@ -164,6 +215,9 @@ #define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24) #define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK) +#define CNST_IFM_VAL(v) (((v) & EVENT_IFM_MASK) << 25) +#define CNST_IFM_MASK CNST_IFM_VAL(EVENT_IFM_MASK) + #define CNST_L1_QUAL_VAL(v) (((v) & 3) << 22) #define CNST_L1_QUAL_MASK CNST_L1_QUAL_VAL(3) @@ -210,6 +264,7 @@ #define MMCRA_THR_SEL_SHIFT 16 #define MMCRA_THR_CMP_SHIFT 32 #define MMCRA_SDAR_MODE_TLB (1ull << 42) +#define MMCRA_IFM_SHIFT 30 static inline bool event_is_fab_match(u64 event) @@ -234,20 +289,22 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK; - ebb = (event >> PERF_EVENT_CONFIG_EBB_SHIFT) & EVENT_EBB_MASK; - - /* Clear the EBB bit in the event, so event checks work below */ - event &= ~(EVENT_EBB_MASK << PERF_EVENT_CONFIG_EBB_SHIFT); + ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK; if (pmc) { + u64 base_event; + if (pmc > 6) return -1; - mask |= CNST_PMC_MASK(pmc); - value |= CNST_PMC_VAL(pmc); + /* Ignore Linux defined bits when checking event below */ + base_event = event & ~EVENT_LINUX_MASK; - if (pmc >= 5 && event != 0x500fa && event != 0x600f4) + if (pmc >= 5 && base_event != 0x500fa && base_event != 0x600f4) return -1; + + mask |= CNST_PMC_MASK(pmc); + value |= CNST_PMC_VAL(pmc); } if (pmc <= 4) { @@ -268,9 +325,10 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long * HV writable, and there is no API for guest kernels to modify * it. The solution is for the hypervisor to initialise the * field to zeroes, and for us to only ever allow events that - * have a cache selector of zero. + * have a cache selector of zero. The bank selector (bit 3) is + * irrelevant, as long as the rest of the value is 0. */ - if (cache) + if (cache & 0x7) return -1; } else if (event & EVENT_IS_L1) { @@ -311,6 +369,15 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long /* EBB events must specify the PMC */ return -1; + if (event & EVENT_WANTS_BHRB) { + if (!ebb) + /* Only EBB events can request BHRB */ + return -1; + + mask |= CNST_IFM_MASK; + value |= CNST_IFM_VAL(event >> EVENT_IFM_SHIFT); + } + /* * All events must agree on EBB, either all request it or none. * EBB events are pinned & exclusive, so this should never actually @@ -400,6 +467,11 @@ static int power8_compute_mmcr(u64 event[], int n_ev, mmcra |= val << MMCRA_THR_CMP_SHIFT; } + if (event[i] & EVENT_WANTS_BHRB) { + val = (event[i] >> EVENT_IFM_SHIFT) & EVENT_IFM_MASK; + mmcra |= val << MMCRA_IFM_SHIFT; + } + hwc[i] = pmc - 1; } @@ -557,6 +629,8 @@ static int power8_generic_events[] = { [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_FIN, [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL, + [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, + [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1, }; static u64 power8_bhrb_filter_map(u64 branch_sample_type) @@ -596,6 +670,116 @@ static void power8_config_bhrb(u64 pmu_bhrb_filter) mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter)); } +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static int power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = PM_LD_REF_L1, + [ C(RESULT_MISS) ] = PM_LD_MISS_L1, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = PM_ST_MISS_L1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = PM_L1_PREF, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = PM_INST_FROM_L1, + [ C(RESULT_MISS) ] = PM_L1_ICACHE_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = PM_L1_DEMAND_WRITE, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = PM_IC_PREF_WRITE, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = PM_DATA_FROM_L3, + [ C(RESULT_MISS) ] = PM_DATA_FROM_L3MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = PM_L2_ST, + [ C(RESULT_MISS) ] = PM_L2_ST_MISS, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = PM_DTLB_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = PM_ITLB_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = PM_BRU_FIN, + [ C(RESULT_MISS) ] = PM_BR_MPRED_CMPL, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +#undef C + static struct power_pmu power8_pmu = { .name = "POWER8", .n_counter = 6, @@ -611,6 +795,7 @@ static struct power_pmu power8_pmu = { .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB | PPMU_EBB, .n_generic = ARRAY_SIZE(power8_generic_events), .generic_events = power8_generic_events, + .cache_events = &power8_cache_events, .attr_groups = power8_pmu_attr_groups, .bhrb_nr = 32, }; @@ -630,6 +815,9 @@ static int __init init_power8_pmu(void) /* Tell userspace that EBB is supported */ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB; + if (cpu_has_feature(CPU_FTR_PMAO_BUG)) + pr_info("PMAO restore workaround active.\n"); + return 0; } early_initcall(init_power8_pmu); |