aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_event_intel.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c247
1 files changed, 208 insertions, 39 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 2502d0d9d246..a73947c53b65 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -220,6 +220,15 @@ static struct event_constraint intel_hsw_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+static struct event_constraint intel_bdw_event_constraints[] = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
+ INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */
+ EVENT_CONSTRAINT_END
+};
+
static u64 intel_pmu_event_map(int hw_event)
{
return intel_perfmon_event_map[hw_event];
@@ -415,6 +424,126 @@ static __initconst const u64 snb_hw_cache_event_ids
};
+static __initconst const u64 hsw_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
+ [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */
+ [ C(RESULT_ACCESS) ] = 0x1b7,
+ /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE|
+ L3_MISS|ANY_SNOOP */
+ [ C(RESULT_MISS) ] = 0x1b7,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE:ALL_RFO */
+ /* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */
+ [ C(RESULT_MISS) ] = 0x1b7,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
+ [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
+ [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */
+ [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
+ [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+static __initconst const u64 hsw_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */
+ [ C(RESULT_ACCESS) ] = 0x2d5,
+ /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE|
+ L3_MISS|ANY_SNOOP */
+ [ C(RESULT_MISS) ] = 0x3fbc0202d5ull,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x122, /* OFFCORE_RESPONSE:ALL_RFO */
+ /* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */
+ [ C(RESULT_MISS) ] = 0x3fbc020122ull,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+};
+
static __initconst const u64 westmere_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -1045,7 +1174,7 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
static void intel_pmu_disable_all(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
@@ -1058,7 +1187,7 @@ static void intel_pmu_disable_all(void)
static void intel_pmu_enable_all(int added)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
intel_pmu_pebs_enable_all();
intel_pmu_lbr_enable_all();
@@ -1092,7 +1221,7 @@ static void intel_pmu_enable_all(int added)
*/
static void intel_pmu_nhm_workaround(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
static const unsigned long nhm_magic[4] = {
0x4300B5,
0x4300D2,
@@ -1191,7 +1320,7 @@ static inline bool event_is_checkpointed(struct perf_event *event)
static void intel_pmu_disable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
intel_pmu_disable_bts();
@@ -1255,7 +1384,7 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
static void intel_pmu_enable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
if (!__this_cpu_read(cpu_hw_events.enabled))
@@ -1349,7 +1478,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
u64 status;
int handled;
- cpuc = &__get_cpu_var(cpu_hw_events);
+ cpuc = this_cpu_ptr(&cpu_hw_events);
/*
* No known reason to not always do late ACK,
@@ -1781,7 +1910,7 @@ EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
@@ -1802,7 +1931,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
int idx;
@@ -1836,7 +1965,7 @@ static void core_pmu_enable_event(struct perf_event *event)
static void core_pmu_enable_all(int added)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -1905,6 +2034,24 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
return c;
}
+/*
+ * Broadwell:
+ * The INST_RETIRED.ALL period always needs to have lowest
+ * 6bits cleared (BDM57). It shall not use a period smaller
+ * than 100 (BDM11). We combine the two to enforce
+ * a min-period of 128.
+ */
+static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
+{
+ if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
+ X86_CONFIG(.event=0xc0, .umask=0x01)) {
+ if (left < 128)
+ left = 128;
+ left &= ~0x3fu;
+ }
+ return left;
+}
+
PMU_FORMAT_ATTR(event, "config:0-7" );
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
@@ -2367,15 +2514,15 @@ __init int intel_pmu_init(void)
* Install the hw-cache-events table:
*/
switch (boot_cpu_data.x86_model) {
- case 14: /* 65 nm core solo/duo, "Yonah" */
+ case 14: /* 65nm Core "Yonah" */
pr_cont("Core events, ");
break;
- case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
+ case 15: /* 65nm Core2 "Merom" */
x86_add_quirk(intel_clovertown_quirk);
- case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
- case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
- case 29: /* six-core 45 nm xeon "Dunnington" */
+ case 22: /* 65nm Core2 "Merom-L" */
+ case 23: /* 45nm Core2 "Penryn" */
+ case 29: /* 45nm Core2 "Dunnington (MP) */
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -2386,9 +2533,9 @@ __init int intel_pmu_init(void)
pr_cont("Core2 events, ");
break;
- case 26: /* 45 nm nehalem, "Bloomfield" */
- case 30: /* 45 nm nehalem, "Lynnfield" */
- case 46: /* 45 nm nehalem-ex, "Beckton" */
+ case 30: /* 45nm Nehalem */
+ case 26: /* 45nm Nehalem-EP */
+ case 46: /* 45nm Nehalem-EX */
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -2415,11 +2562,11 @@ __init int intel_pmu_init(void)
pr_cont("Nehalem events, ");
break;
- case 28: /* Atom */
- case 38: /* Lincroft */
- case 39: /* Penwell */
- case 53: /* Cloverview */
- case 54: /* Cedarview */
+ case 28: /* 45nm Atom "Pineview" */
+ case 38: /* 45nm Atom "Lincroft" */
+ case 39: /* 32nm Atom "Penwell" */
+ case 53: /* 32nm Atom "Cloverview" */
+ case 54: /* 32nm Atom "Cedarview" */
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -2430,8 +2577,8 @@ __init int intel_pmu_init(void)
pr_cont("Atom events, ");
break;
- case 55: /* Atom 22nm "Silvermont" */
- case 77: /* Avoton "Silvermont" */
+ case 55: /* 22nm Atom "Silvermont" */
+ case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@ -2446,9 +2593,9 @@ __init int intel_pmu_init(void)
pr_cont("Silvermont events, ");
break;
- case 37: /* 32 nm nehalem, "Clarkdale" */
- case 44: /* 32 nm nehalem, "Gulftown" */
- case 47: /* 32 nm Xeon E7 */
+ case 37: /* 32nm Westmere */
+ case 44: /* 32nm Westmere-EP */
+ case 47: /* 32nm Westmere-EX */
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -2474,8 +2621,8 @@ __init int intel_pmu_init(void)
pr_cont("Westmere events, ");
break;
- case 42: /* SandyBridge */
- case 45: /* SandyBridge, "Romely-EP" */
+ case 42: /* 32nm SandyBridge */
+ case 45: /* 32nm SandyBridge-E/EN/EP */
x86_add_quirk(intel_sandybridge_quirk);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -2506,8 +2653,9 @@ __init int intel_pmu_init(void)
pr_cont("SandyBridge events, ");
break;
- case 58: /* IvyBridge */
- case 62: /* IvyBridge EP */
+
+ case 58: /* 22nm IvyBridge */
+ case 62: /* 22nm IvyBridge-EP/EX */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
/* dTLB-load-misses on IVB is different than SNB */
@@ -2539,20 +2687,19 @@ __init int intel_pmu_init(void)
break;
- case 60: /* Haswell Client */
- case 70:
- case 71:
- case 63:
- case 69:
+ case 60: /* 22nm Haswell Core */
+ case 63: /* 22nm Haswell Server */
+ case 69: /* 22nm Haswell ULT */
+ case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
x86_pmu.late_ack = true;
- memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
- memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+ memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_snb();
x86_pmu.event_constraints = intel_hsw_event_constraints;
x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
- x86_pmu.extra_regs = intel_snb_extra_regs;
+ x86_pmu.extra_regs = intel_snbep_extra_regs;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
/* all extra regs are per-cpu when HT is on */
x86_pmu.er_flags |= ERF_HAS_RSP_1;
@@ -2565,6 +2712,28 @@ __init int intel_pmu_init(void)
pr_cont("Haswell events, ");
break;
+ case 61: /* 14nm Broadwell Core-M */
+ x86_pmu.late_ack = true;
+ memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_snb();
+
+ x86_pmu.event_constraints = intel_bdw_event_constraints;
+ x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_snbep_extra_regs;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+ /* all extra regs are per-cpu when HT is on */
+ x86_pmu.er_flags |= ERF_HAS_RSP_1;
+ x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+
+ x86_pmu.hw_config = hsw_hw_config;
+ x86_pmu.get_event_constraints = hsw_get_event_constraints;
+ x86_pmu.cpu_events = hsw_events_attrs;
+ x86_pmu.limit_period = bdw_limit_period;
+ pr_cont("Broadwell events, ");
+ break;
+
default:
switch (x86_pmu.version) {
case 1: