From 1a6461b12872e9622c231928e1620504d741cc79 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 24 Jan 2013 16:10:25 +0100 Subject: perf/x86: Support CPU specific sysfs events Add a way for the CPU initialization code to register additional events, and merge them into the events attribute directory. Used in the next patch. Signed-off-by: Andi Kleen Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: namhyung.kim@lge.com Link: http://lkml.kernel.org/r/1359040242-8269-2-git-send-email-eranian@google.com [ small cleanups ] Signed-off-by: Ingo Molnar [ merge_attr returns a **, not just * ] Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/kernel/cpu/perf_event.c | 34 ++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/perf_event.h | 1 + 2 files changed, 35 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index bf0f01aea994..c886dc8c63f8 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1330,6 +1330,32 @@ static void __init filter_events(struct attribute **attrs) } } +/* Merge two pointer arrays */ +static __init struct attribute **merge_attr(struct attribute **a, struct attribute **b) +{ + struct attribute **new; + int j, i; + + for (j = 0; a[j]; j++) + ; + for (i = 0; b[i]; i++) + j++; + j++; + + new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL); + if (!new) + return NULL; + + j = 0; + for (i = 0; a[i]; i++) + new[j++] = a[i]; + for (i = 0; b[i]; i++) + new[j++] = b[i]; + new[j] = NULL; + + return new; +} + static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) { @@ -1469,6 +1495,14 @@ static int __init init_hw_perf_events(void) else filter_events(x86_pmu_events_group.attrs); + if (x86_pmu.cpu_events) { + struct attribute **tmp; + + tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events); + if (!WARN_ON(!tmp)) + x86_pmu_events_group.attrs = tmp; + } + pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.cntval_bits); pr_info("... generic registers: %d\n", x86_pmu.num_counters); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 7f5c75c2afdd..95152c12a8d9 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -357,6 +357,7 @@ struct x86_pmu { struct attribute **format_attrs; ssize_t (*events_sysfs_show)(char *page, u64 config); + struct attribute **cpu_events; /* * CPU Hotplug hooks -- cgit v1.2.3-59-g8ed1b From 3a54aaa0a3ddb2cf2ec1b94a94024e9a8a8af962 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 24 Jan 2013 16:10:26 +0100 Subject: perf/x86: Improve sysfs event mapping with event string This patch extends Jiri's changes to make generic events mapping visible via sysfs. The patch extends the mechanism to non-generic events by allowing the mappings to be hardcoded in strings. This mechanism will be used by the PEBS-LL patch later on. Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: namhyung.kim@lge.com Link: http://lkml.kernel.org/r/1359040242-8269-3-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar [ fixed up conflict with 2663960 "perf: Make EVENT_ATTR global" ] Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/kernel/cpu/perf_event.c | 20 ++++++++++++-------- arch/x86/kernel/cpu/perf_event.h | 17 +++++++++++++++++ include/linux/perf_event.h | 1 + 3 files changed, 30 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c886dc8c63f8..6e8ab0427041 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1316,9 +1316,16 @@ static struct attribute_group x86_pmu_format_group = { */ static void __init filter_events(struct attribute **attrs) { + struct device_attribute *d; + struct perf_pmu_events_attr *pmu_attr; int i, j; for (i = 0; attrs[i]; i++) { + d = (struct device_attribute *)attrs[i]; + pmu_attr = container_of(d, struct perf_pmu_events_attr, attr); + /* str trumps id */ + if (pmu_attr->event_str) + continue; if (x86_pmu.event_map(i)) continue; @@ -1361,17 +1368,14 @@ static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *at { struct perf_pmu_events_attr *pmu_attr = \ container_of(attr, struct perf_pmu_events_attr, attr); - u64 config = x86_pmu.event_map(pmu_attr->id); - return x86_pmu.events_sysfs_show(page, config); -} -#define EVENT_VAR(_id) event_attr_##_id -#define EVENT_PTR(_id) &event_attr_##_id.attr.attr + /* string trumps id */ + if (pmu_attr->event_str) + return sprintf(page, "%s", pmu_attr->event_str); -#define EVENT_ATTR(_name, _id) \ - PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \ - events_sysfs_show) + return x86_pmu.events_sysfs_show(page, config); +} EVENT_ATTR(cpu-cycles, CPU_CYCLES ); EVENT_ATTR(instructions, INSTRUCTIONS ); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 95152c12a8d9..b1518eed5f99 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -422,6 +422,23 @@ do { \ #define ERF_NO_HT_SHARING 1 #define ERF_HAS_RSP_1 2 +#define EVENT_VAR(_id) event_attr_##_id +#define EVENT_PTR(_id) &event_attr_##_id.attr.attr + +#define EVENT_ATTR(_name, _id) \ +static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ + .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ + .id = PERF_COUNT_HW_##_id, \ + .event_str = NULL, \ +}; + +#define EVENT_ATTR_STR(_name, v, str) \ +static struct perf_pmu_events_attr event_attr_##v = { \ + .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ + .id = 0, \ + .event_str = str, \ +}; + extern struct x86_pmu x86_pmu __read_mostly; DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8737e1cee8b2..1c592114c437 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -809,6 +809,7 @@ do { \ struct perf_pmu_events_attr { struct device_attribute attr; u64 id; + const char *event_str; }; #define PMU_EVENT_ATTR(_name, _var, _id, _show) \ -- cgit v1.2.3-59-g8ed1b From 9fac2cf316b070ae43d2ae2525e381ff2d1d68aa Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 24 Jan 2013 16:10:27 +0100 Subject: perf/x86: Add flags to event constraints This patch adds a flags field to each event constraint. It can be used to store event specific features which can then later be used by scheduling code or low-level x86 code. The flags are propagated into event->hw.flags during the get_event_constraint() call. They are cleared during the put_event_constraint() call. This mechanism is going to be used by the PEBS-LL patches. It avoids defining yet another table to hold event specific information. Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: jolsa@redhat.com Cc: namhyung.kim@lge.com Link: http://lkml.kernel.org/r/1359040242-8269-4-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/kernel/cpu/perf_event.c | 2 +- arch/x86/kernel/cpu/perf_event.h | 8 +++++--- arch/x86/kernel/cpu/perf_event_intel.c | 6 +++++- arch/x86/kernel/cpu/perf_event_intel_ds.c | 4 +++- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 +- include/linux/perf_event.h | 1 + 6 files changed, 16 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 6e8ab0427041..8ba51518f689 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1489,7 +1489,7 @@ static int __init init_hw_perf_events(void) unconstrained = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, - 0, x86_pmu.num_counters, 0); + 0, x86_pmu.num_counters, 0, 0); x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ x86_pmu_format_group.attrs = x86_pmu.format_attrs; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index b1518eed5f99..9686d38eb458 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -59,6 +59,7 @@ struct event_constraint { u64 cmask; int weight; int overlap; + int flags; }; struct amd_nb { @@ -170,16 +171,17 @@ struct cpu_hw_events { void *kfree_on_online; }; -#define __EVENT_CONSTRAINT(c, n, m, w, o) {\ +#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\ { .idxmsk64 = (n) }, \ .code = (c), \ .cmask = (m), \ .weight = (w), \ .overlap = (o), \ + .flags = f, \ } #define EVENT_CONSTRAINT(c, n, m) \ - __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0) + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0) /* * The overlap flag marks event constraints with overlapping counter @@ -203,7 +205,7 @@ struct cpu_hw_events { * and its counter masks must be kept at a minimum. */ #define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ - __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1) + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0) /* * Constraint on the Event code. diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index dab7580c47ae..df3beaac3397 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1392,8 +1392,11 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) if (x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) { - if ((event->hw.config & c->cmask) == c->code) + if ((event->hw.config & c->cmask) == c->code) { + /* hw.flags zeroed at initialization */ + event->hw.flags |= c->flags; return c; + } } } @@ -1438,6 +1441,7 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, static void intel_put_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { + event->hw.flags = 0; intel_put_shared_regs_event_constraints(cpuc, event); } diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 826054a4f2ee..f30d85bcbda9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -430,8 +430,10 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event) if (x86_pmu.pebs_constraints) { for_each_event_constraint(c, x86_pmu.pebs_constraints) { - if ((event->hw.config & c->cmask) == c->code) + if ((event->hw.config & c->cmask) == c->code) { + event->hw.flags |= c->flags; return c; + } } } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index b43200dbfe7e..75da9e18b128 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2438,7 +2438,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type) type->unconstrainted = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, - 0, type->num_counters, 0); + 0, type->num_counters, 0, 0); for (i = 0; i < type->num_boxes; i++) { pmus[i].func_id = -1; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1c592114c437..cd3bb2cd9494 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -127,6 +127,7 @@ struct hw_perf_event { int event_base_rdpmc; int idx; int last_cpu; + int flags; struct hw_perf_event_extra extra_reg; struct hw_perf_event_extra branch_reg; -- cgit v1.2.3-59-g8ed1b From f20093eef5f7843a25adfc0512617d4b1ff1aa6e Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 24 Jan 2013 16:10:32 +0100 Subject: perf/x86: Add memory profiling via PEBS Load Latency This patch adds support for memory profiling using the PEBS Load Latency facility. Load accesses are sampled by HW and the instruction address, data address, load latency, data source, tlb, locked information can be saved in the sampling buffer if using the PERF_SAMPLE_COST (for latency), PERF_SAMPLE_ADDR, PERF_SAMPLE_DATA_SRC types. To enable PEBS Load Latency, users have to use the model specific event: - on NHM/WSM: MEM_INST_RETIRED:LATENCY_ABOVE_THRESHOLD - on SNB/IVB: MEM_TRANS_RETIRED:LATENCY_ABOVE_THRESHOLD To make things easier, this patch also exports a generic alias via sysfs: mem-loads. It export the right event encoding based on the host CPU and can be used directly by the perf tool. Loosely based on Intel's Lin Ming patch posted on LKML in July 2011. Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: namhyung.kim@lge.com Link: http://lkml.kernel.org/r/1359040242-8269-9-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/include/uapi/asm/msr-index.h | 1 + arch/x86/kernel/cpu/perf_event.c | 5 +- arch/x86/kernel/cpu/perf_event.h | 25 +++++- arch/x86/kernel/cpu/perf_event_intel.c | 24 ++++++ arch/x86/kernel/cpu/perf_event_intel_ds.c | 133 ++++++++++++++++++++++++++++-- 5 files changed, 178 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 892ce40a7470..b31798d5e62e 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -71,6 +71,7 @@ #define MSR_IA32_PEBS_ENABLE 0x000003f1 #define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_PERF_CAPABILITIES 0x00000345 +#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 #define MSR_MTRRfix64K_00000 0x00000250 #define MSR_MTRRfix16K_80000 0x00000258 diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8ba51518f689..5ed7a4c5baf7 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1363,7 +1363,7 @@ static __init struct attribute **merge_attr(struct attribute **a, struct attribu return new; } -static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, +ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) { struct perf_pmu_events_attr *pmu_attr = \ @@ -1494,6 +1494,9 @@ static int __init init_hw_perf_events(void) x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ x86_pmu_format_group.attrs = x86_pmu.format_attrs; + if (x86_pmu.event_attrs) + x86_pmu_events_group.attrs = x86_pmu.event_attrs; + if (!x86_pmu.events_sysfs_show) x86_pmu_events_group.attrs = &empty_attrs; else diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 9686d38eb458..f3a9a94e4d22 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -46,6 +46,7 @@ enum extra_reg_type { EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ EXTRA_REG_LBR = 2, /* lbr_select */ + EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ EXTRA_REG_MAX /* number of entries needed */ }; @@ -61,6 +62,10 @@ struct event_constraint { int overlap; int flags; }; +/* + * struct event_constraint flags + */ +#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ struct amd_nb { int nb_id; /* NorthBridge id */ @@ -233,6 +238,10 @@ struct cpu_hw_events { #define INTEL_UEVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) +#define INTEL_PLD_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) + #define EVENT_CONSTRAINT_END \ EVENT_CONSTRAINT(0, 0, 0) @@ -262,12 +271,22 @@ struct extra_reg { .msr = (ms), \ .config_mask = (m), \ .valid_mask = (vm), \ - .idx = EXTRA_REG_##i \ + .idx = EXTRA_REG_##i, \ } #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) +#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \ + EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \ + ARCH_PERFMON_EVENTSEL_UMASK, vm, idx) + +#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \ + INTEL_UEVENT_EXTRA_REG(c, \ + MSR_PEBS_LD_LAT_THRESHOLD, \ + 0xffff, \ + LDLAT) + #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) union perf_capabilities { @@ -357,6 +376,7 @@ struct x86_pmu { */ int attr_rdpmc; struct attribute **format_attrs; + struct attribute **event_attrs; ssize_t (*events_sysfs_show)(char *page, u64 config); struct attribute **cpu_events; @@ -648,6 +668,9 @@ int p6_pmu_init(void); int knc_pmu_init(void); +ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page); + #else /* CONFIG_CPU_SUP_INTEL */ static inline void reserve_ds_buffers(void) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index df3beaac3397..d5ea5a03cd37 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -81,6 +81,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = { INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), EVENT_EXTRA_END }; @@ -136,6 +137,7 @@ static struct extra_reg intel_westmere_extra_regs[] __read_mostly = { INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), EVENT_EXTRA_END }; @@ -155,9 +157,23 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = static struct extra_reg intel_snb_extra_regs[] __read_mostly = { INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), EVENT_EXTRA_END }; +EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); +EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); + +struct attribute *nhm_events_attrs[] = { + EVENT_PTR(mem_ld_nhm), + NULL, +}; + +struct attribute *snb_events_attrs[] = { + EVENT_PTR(mem_ld_snb), + NULL, +}; + static u64 intel_pmu_event_map(int hw_event) { return intel_perfmon_event_map[hw_event]; @@ -2035,6 +2051,8 @@ __init int intel_pmu_init(void) x86_pmu.enable_all = intel_pmu_nhm_enable_all; x86_pmu.extra_regs = intel_nehalem_extra_regs; + x86_pmu.cpu_events = nhm_events_attrs; + /* UOPS_ISSUED.STALLED_CYCLES */ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); @@ -2078,6 +2096,8 @@ __init int intel_pmu_init(void) x86_pmu.extra_regs = intel_westmere_extra_regs; x86_pmu.er_flags |= ERF_HAS_RSP_1; + x86_pmu.cpu_events = nhm_events_attrs; + /* UOPS_ISSUED.STALLED_CYCLES */ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); @@ -2106,6 +2126,8 @@ __init int intel_pmu_init(void) x86_pmu.er_flags |= ERF_HAS_RSP_1; x86_pmu.er_flags |= ERF_NO_HT_SHARING; + x86_pmu.cpu_events = snb_events_attrs; + /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); @@ -2132,6 +2154,8 @@ __init int intel_pmu_init(void) x86_pmu.er_flags |= ERF_HAS_RSP_1; x86_pmu.er_flags |= ERF_NO_HT_SHARING; + x86_pmu.cpu_events = snb_events_attrs; + /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index f30d85bcbda9..a6400bd0463c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -24,6 +24,92 @@ struct pebs_record_32 { */ +union intel_x86_pebs_dse { + u64 val; + struct { + unsigned int ld_dse:4; + unsigned int ld_stlb_miss:1; + unsigned int ld_locked:1; + unsigned int ld_reserved:26; + }; + struct { + unsigned int st_l1d_hit:1; + unsigned int st_reserved1:3; + unsigned int st_stlb_miss:1; + unsigned int st_locked:1; + unsigned int st_reserved2:26; + }; +}; + + +/* + * Map PEBS Load Latency Data Source encodings to generic + * memory data source information + */ +#define P(a, b) PERF_MEM_S(a, b) +#define OP_LH (P(OP, LOAD) | P(LVL, HIT)) +#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) + +static const u64 pebs_data_source[] = { + P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */ + OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */ + OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */ + OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */ + OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */ + OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */ + OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */ + OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */ + OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */ + OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/ + OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */ + OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */ + OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */ + OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */ + OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */ + OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */ +}; + +static u64 load_latency_data(u64 status) +{ + union intel_x86_pebs_dse dse; + u64 val; + int model = boot_cpu_data.x86_model; + int fam = boot_cpu_data.x86; + + dse.val = status; + + /* + * use the mapping table for bit 0-3 + */ + val = pebs_data_source[dse.ld_dse]; + + /* + * Nehalem models do not support TLB, Lock infos + */ + if (fam == 0x6 && (model == 26 || model == 30 + || model == 31 || model == 46)) { + val |= P(TLB, NA) | P(LOCK, NA); + return val; + } + /* + * bit 4: TLB access + * 0 = did not miss 2nd level TLB + * 1 = missed 2nd level TLB + */ + if (dse.ld_stlb_miss) + val |= P(TLB, MISS) | P(TLB, L2); + else + val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + /* + * bit 5: locked prefix + */ + if (dse.ld_locked) + val |= P(LOCK, LOCKED); + + return val; +} + struct pebs_record_core { u64 flags, ip; u64 ax, bx, cx, dx; @@ -364,7 +450,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { }; struct event_constraint intel_nehalem_pebs_event_constraints[] = { - INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ @@ -379,7 +465,7 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = { }; struct event_constraint intel_westmere_pebs_event_constraints[] = { - INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ @@ -399,7 +485,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ + INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -413,7 +499,7 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = { INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ + INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -448,6 +534,9 @@ void intel_pmu_pebs_enable(struct perf_event *event) hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; cpuc->pebs_enabled |= 1ULL << hwc->idx; + + if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) + cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32); } void intel_pmu_pebs_disable(struct perf_event *event) @@ -560,20 +649,48 @@ static void __intel_pmu_pebs_event(struct perf_event *event, struct pt_regs *iregs, void *__pebs) { /* - * We cast to pebs_record_core since that is a subset of - * both formats and we don't use the other fields in this - * routine. + * We cast to pebs_record_nhm to get the load latency data + * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used */ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct pebs_record_core *pebs = __pebs; + struct pebs_record_nhm *pebs = __pebs; struct perf_sample_data data; struct pt_regs regs; + u64 sample_type; + int fll; if (!intel_pmu_save_and_restart(event)) return; + fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; + perf_sample_data_init(&data, 0, event->hw.last_period); + data.period = event->hw.last_period; + sample_type = event->attr.sample_type; + + /* + * if PEBS-LL or PreciseStore + */ + if (fll) { + if (sample_type & PERF_SAMPLE_ADDR) + data.addr = pebs->dla; + + /* + * Use latency for weight (only avail with PEBS-LL) + */ + if (fll && (sample_type & PERF_SAMPLE_WEIGHT)) + data.weight = pebs->lat; + + /* + * data.data_src encodes the data source + */ + if (sample_type & PERF_SAMPLE_DATA_SRC) { + if (fll) + data.data_src.val = load_latency_data(pebs->dse); + } + } + /* * We use the interrupt regs as a base because the PEBS record * does not contain a full regs set, specifically it seems to -- cgit v1.2.3-59-g8ed1b From a63fcab45273174e665e6a8c9fa1a79a9046d0d5 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 24 Jan 2013 16:10:33 +0100 Subject: perf/x86: Export PEBS load latency threshold register to sysfs Make the PEBS Load Latency threshold register layout and encoding visible to user level tools. Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: namhyung.kim@lge.com Link: http://lkml.kernel.org/r/1359040242-8269-10-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/kernel/cpu/perf_event_intel.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index d5ea5a03cd37..ae6096b175b9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1781,6 +1781,8 @@ static void intel_pmu_flush_branch_stack(void) PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); +PMU_FORMAT_ATTR(ldlat, "config1:0-15"); + static struct attribute *intel_arch3_formats_attr[] = { &format_attr_event.attr, &format_attr_umask.attr, @@ -1791,6 +1793,7 @@ static struct attribute *intel_arch3_formats_attr[] = { &format_attr_cmask.attr, &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ + &format_attr_ldlat.attr, /* PEBS load latency */ NULL, }; -- cgit v1.2.3-59-g8ed1b From 9ad64c0f481c37a63dd39842a0fd264bee44a097 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 24 Jan 2013 16:10:34 +0100 Subject: perf/x86: Add support for PEBS Precise Store This patch adds support for PEBS Precise Store which is available on Intel Sandy Bridge and Ivy Bridge processors. To use Precise store, the proper PEBS event must be used: mem_trans_retired:precise_stores. For the perf tool, the generic mem-stores event exported via sysfs can be used directly. Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: namhyung.kim@lge.com Link: http://lkml.kernel.org/r/1359040242-8269-11-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/kernel/cpu/perf_event.h | 5 ++++ arch/x86/kernel/cpu/perf_event_intel.c | 2 ++ arch/x86/kernel/cpu/perf_event_intel_ds.c | 49 +++++++++++++++++++++++++++++-- 3 files changed, 54 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index f3a9a94e4d22..ba9aadfa683b 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -66,6 +66,7 @@ struct event_constraint { * struct event_constraint flags */ #define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ +#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ struct amd_nb { int nb_id; /* NorthBridge id */ @@ -242,6 +243,10 @@ struct cpu_hw_events { __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) +#define INTEL_PST_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) + #define EVENT_CONSTRAINT_END \ EVENT_CONSTRAINT(0, 0, 0) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ae6096b175b9..e84c4ba44b59 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -163,6 +163,7 @@ static struct extra_reg intel_snb_extra_regs[] __read_mostly = { EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); +EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); struct attribute *nhm_events_attrs[] = { EVENT_PTR(mem_ld_nhm), @@ -171,6 +172,7 @@ struct attribute *nhm_events_attrs[] = { struct attribute *snb_events_attrs[] = { EVENT_PTR(mem_ld_snb), + EVENT_PTR(mem_st_snb), NULL, }; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index a6400bd0463c..36dc13d1ad02 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -69,6 +69,44 @@ static const u64 pebs_data_source[] = { OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */ }; +static u64 precise_store_data(u64 status) +{ + union intel_x86_pebs_dse dse; + u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2); + + dse.val = status; + + /* + * bit 4: TLB access + * 1 = stored missed 2nd level TLB + * + * so it either hit the walker or the OS + * otherwise hit 2nd level TLB + */ + if (dse.st_stlb_miss) + val |= P(TLB, MISS); + else + val |= P(TLB, HIT); + + /* + * bit 0: hit L1 data cache + * if not set, then all we know is that + * it missed L1D + */ + if (dse.st_l1d_hit) + val |= P(LVL, HIT); + else + val |= P(LVL, MISS); + + /* + * bit 5: Locked prefix + */ + if (dse.st_locked) + val |= P(LOCK, LOCKED); + + return val; +} + static u64 load_latency_data(u64 status) { union intel_x86_pebs_dse dse; @@ -486,6 +524,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ + INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -500,6 +539,7 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ + INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -537,6 +577,8 @@ void intel_pmu_pebs_enable(struct perf_event *event) if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32); + else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) + cpuc->pebs_enabled |= 1ULL << 63; } void intel_pmu_pebs_disable(struct perf_event *event) @@ -657,12 +699,13 @@ static void __intel_pmu_pebs_event(struct perf_event *event, struct perf_sample_data data; struct pt_regs regs; u64 sample_type; - int fll; + int fll, fst; if (!intel_pmu_save_and_restart(event)) return; fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; + fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST; perf_sample_data_init(&data, 0, event->hw.last_period); @@ -672,7 +715,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, /* * if PEBS-LL or PreciseStore */ - if (fll) { + if (fll || fst) { if (sample_type & PERF_SAMPLE_ADDR) data.addr = pebs->dla; @@ -688,6 +731,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event, if (sample_type & PERF_SAMPLE_DATA_SRC) { if (fll) data.data_src.val = load_latency_data(pebs->dse); + else + data.data_src.val = precise_store_data(pebs->dse); } } -- cgit v1.2.3-59-g8ed1b From ab07e807be533d6317ea0971900bdf547962effd Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Fri, 22 Mar 2013 20:48:38 +0530 Subject: uprobes/powerpc: Teach uprobes to ignore gdb breakpoints Powerpc has many trap variants that could be used by entities like gdb. Currently, running gdb on a program being traced by uprobes causes an endless loop since uprobes doesn't understand that the trap was inserted by some other entity and a SIGTRAP needs to be delivered. Teach uprobes to ignore breakpoints that do not belong to it. Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- arch/powerpc/kernel/uprobes.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index bc77834dbf43..cb7f63da140c 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -30,6 +30,16 @@ #define UPROBE_TRAP_NR UINT_MAX +/** + * is_trap_insn - check if the instruction is a trap variant + * @insn: instruction to be checked. + * Returns true if @insn is a trap variant. + */ +bool is_trap_insn(uprobe_opcode_t *insn) +{ + return (is_trap(*insn)); +} + /** * arch_uprobe_analyze_insn * @mm: the probed address space. -- cgit v1.2.3-59-g8ed1b From 3c9eb54f71fed86b761a6da4ad3eedc9566ceb8d Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Fri, 22 Mar 2013 20:49:46 +0530 Subject: uprobes/powerpc: Remove additional trap instruction check prepare_uprobe() already checks if the underlying unstruction (on file) is a trap variant. We don't need to check this again. Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- arch/powerpc/kernel/uprobes.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index cb7f63da140c..2ecdbe304f46 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -53,12 +53,6 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, if (addr & 0x03) return -EINVAL; - /* - * We currently don't support a uprobe on an already - * existing breakpoint instruction underneath - */ - if (is_trap(auprobe->ainsn)) - return -ENOTSUPP; return 0; } -- cgit v1.2.3-59-g8ed1b From 23995bbee01d75f09f72b1380bd6045a5b02947b Mon Sep 17 00:00:00 2001 From: Andreas Krebbel Date: Wed, 9 Jan 2013 10:13:55 +0100 Subject: oprofile, s390: Add support for IBM zEnterprise EC12 This patch adds support for the latest release of the IBM mainframe series - the IBM zEnterprise EC12 (zEC12). The CPU measurement facility didn't change. So only the new CPU type has to be tolerated. Signed-off-by: Andreas Krebbel Signed-off-by: Robert Richter --- arch/s390/oprofile/init.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 584b93674ea4..ffeb17ce7f31 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -440,6 +440,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) switch (id.machine) { case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; + case 0x2827: ops->cpu_type = "s390/zEC12"; break; default: return -ENODEV; } } -- cgit v1.2.3-59-g8ed1b From 8101376dc5f42bd93b36d4ab210b44503d0ec11f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 4 Apr 2013 19:42:30 +0900 Subject: kprobes/x86: Just return error for sanity check failure instead of using BUG_ON Return an error from __copy_instruction() and use printk() to give us a more productive message, since this is just an error case which we can handle and also the BUG_ON() never tells us why and what happened. This is related to the following bug-report: https://bugzilla.redhat.com/show_bug.cgi?id=910649 Signed-off-by: Masami Hiramatsu Cc: Frank Ch. Eigler Cc: Steven Rostedt Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20130404104230.22862.85242.stgit@mhiramat-M0-7522 Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 7bfe318d3d8a..9895a9a41380 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -353,7 +353,11 @@ int __kprobes __copy_instruction(u8 *dest, u8 *src) * have given. */ newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest; - BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ + if ((s64) (s32) newdisp != newdisp) { + pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp); + pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", src, dest, insn.displacement.value); + return 0; + } disp = (u8 *) dest + insn_offset_displacement(&insn); *(s32 *) disp = (s32) newdisp; } -- cgit v1.2.3-59-g8ed1b From f8378f5259647710f0b4ecb814b0a1b0d9040de0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 8 Mar 2013 15:22:48 -0800 Subject: perf/x86: Add Sandy Bridge constraints for CYCLE_ACTIVITY.* Add CYCLE_ACTIVITY.CYCLES_NO_DISPATCH/CYCLES_L1D_PENDING constraints. These recently documented events have restrictions to counter 0-3 and counter 2 respectively. The perf scheduler needs to know that to schedule them correctly. IvyBridge already has the necessary constraints. Signed-off-by: Andi Kleen Cc: a.p.zijlstra@chello.nl Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1362784968-12542-1-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index e84c4ba44b59..2ad2374d53d4 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -109,6 +109,8 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ + INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ + INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ EVENT_CONSTRAINT_END }; -- cgit v1.2.3-59-g8ed1b From 791eca10107f2886c1915d91c99a3b022a75909c Mon Sep 17 00:00:00 2001 From: Anton Arapov Date: Wed, 3 Apr 2013 18:00:33 +0200 Subject: uretprobes/x86: Hijack return address Hijack the return address and replace it with a trampoline address. Signed-off-by: Anton Arapov Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- arch/x86/include/asm/uprobes.h | 1 + arch/x86/kernel/uprobes.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 8ff8be7835ab..6e5197910fd8 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -55,4 +55,5 @@ extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); +extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); #endif /* _ASM_UPROBES_H */ diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 0ba4cfb4f412..2ed845928b5f 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -697,3 +697,32 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) send_sig(SIGTRAP, current, 0); return ret; } + +unsigned long +arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs) +{ + int rasize, ncopied; + unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */ + + rasize = is_ia32_task() ? 4 : 8; + ncopied = copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize); + if (unlikely(ncopied)) + return -1; + + /* check whether address has been already hijacked */ + if (orig_ret_vaddr == trampoline_vaddr) + return orig_ret_vaddr; + + ncopied = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize); + if (likely(!ncopied)) + return orig_ret_vaddr; + + if (ncopied != rasize) { + pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, " + "%%ip=%#lx\n", current->pid, regs->sp, regs->ip); + + force_sig_info(SIGSEGV, SEND_SIG_FORCED, current); + } + + return -1; +} -- cgit v1.2.3-59-g8ed1b From f15706b79d6f71e016cd06afa21ee31500029067 Mon Sep 17 00:00:00 2001 From: Anton Arapov Date: Wed, 3 Apr 2013 18:00:34 +0200 Subject: uretprobes/powerpc: Hijack return address Hijack the return address and replace it with a trampoline address. PowerPC implementation. Signed-off-by: Anton Arapov Acked-by: Ananth N Mavinakayanahalli Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- arch/powerpc/include/asm/uprobes.h | 1 + arch/powerpc/kernel/uprobes.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index b532060d0916..23016020915e 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -51,4 +51,5 @@ extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); +extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); #endif /* _ASM_UPROBES_H */ diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 2ecdbe304f46..59f419b935f2 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -192,3 +192,16 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) return false; } + +unsigned long +arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs) +{ + unsigned long orig_ret_vaddr; + + orig_ret_vaddr = regs->link; + + /* Replace the return addr with trampoline addr */ + regs->link = trampoline_vaddr; + + return orig_ret_vaddr; +} -- cgit v1.2.3-59-g8ed1b From 22cc4ccf63e10e361531bf61e6e6c96c53a2f665 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 16 Apr 2013 19:51:05 +0800 Subject: perf/x86: Avoid kfree() in CPU_{STARTING,DYING} On -rt kfree() can schedule, but CPU_{STARTING,DYING} should be atomic. So use a list to defer kfree until CPU_{ONLINE,DEAD}. Signed-off-by: Yan, Zheng Acked-by: Peter Zijlstra Cc: peterz@infradead.org Cc: eranian@google.com Cc: ak@linux.intel.com Link: http://lkml.kernel.org/r/1366113067-3262-2-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 28 ++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 75da9e18b128..50d4a1c58106 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2622,6 +2622,21 @@ static void __init uncore_pci_exit(void) } } +/* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */ +static LIST_HEAD(boxes_to_free); + +static void __cpuinit uncore_kfree_boxes(void) +{ + struct intel_uncore_box *box; + + while (!list_empty(&boxes_to_free)) { + box = list_entry(boxes_to_free.next, + struct intel_uncore_box, list); + list_del(&box->list); + kfree(box); + } +} + static void __cpuinit uncore_cpu_dying(int cpu) { struct intel_uncore_type *type; @@ -2636,7 +2651,7 @@ static void __cpuinit uncore_cpu_dying(int cpu) box = *per_cpu_ptr(pmu->box, cpu); *per_cpu_ptr(pmu->box, cpu) = NULL; if (box && atomic_dec_and_test(&box->refcnt)) - kfree(box); + list_add(&box->list, &boxes_to_free); } } } @@ -2666,8 +2681,11 @@ static int __cpuinit uncore_cpu_starting(int cpu) if (exist && exist->phys_id == phys_id) { atomic_inc(&exist->refcnt); *per_cpu_ptr(pmu->box, cpu) = exist; - kfree(box); - box = NULL; + if (box) { + list_add(&box->list, + &boxes_to_free); + box = NULL; + } break; } } @@ -2806,6 +2824,10 @@ static int case CPU_DYING: uncore_cpu_dying(cpu); break; + case CPU_ONLINE: + case CPU_DEAD: + uncore_kfree_boxes(); + break; default: break; } -- cgit v1.2.3-59-g8ed1b From 46bdd905987199febdef611bab40e2b1ac0036b8 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 16 Apr 2013 19:51:06 +0800 Subject: perf/x86/intel: Fix SNB-EP CBO and PCU uncore PMU filter management The existing code assumes all Cbox and PCU events are using filter, but actually the filter is event specific. Furthermore the filter is sub-divided into multiple fields which are used by different events. Signed-off-by: Yan, Zheng Acked-by: Peter Zijlstra Cc: peterz@infradead.org Cc: ak@linux.intel.com Link: http://lkml.kernel.org/r/1366113067-3262-3-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar Reported-by: Stephane Eranian --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 326 +++++++++++++++++++++----- arch/x86/kernel/cpu/perf_event_intel_uncore.h | 13 +- 2 files changed, 280 insertions(+), 59 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 50d4a1c58106..8f590ea9ece0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -17,6 +17,9 @@ static struct event_constraint constraint_fixed = static struct event_constraint constraint_empty = EVENT_CONSTRAINT(0, 0, 0); +#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \ + ((1ULL << (n)) - 1))) + DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); @@ -110,6 +113,21 @@ static void uncore_put_constraint(struct intel_uncore_box *box, struct perf_even reg1->alloc = 0; } +static u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) +{ + struct intel_uncore_extra_reg *er; + unsigned long flags; + u64 config; + + er = &box->shared_regs[idx]; + + raw_spin_lock_irqsave(&er->lock, flags); + config = er->config; + raw_spin_unlock_irqrestore(&er->lock, flags); + + return config; +} + /* Sandy Bridge-EP uncore support */ static struct intel_uncore_type snbep_uncore_cbox; static struct intel_uncore_type snbep_uncore_pcu; @@ -205,7 +223,7 @@ static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct p struct hw_perf_event_extra *reg1 = &hwc->extra_reg; if (reg1->idx != EXTRA_REG_NONE) - wrmsrl(reg1->reg, reg1->config); + wrmsrl(reg1->reg, uncore_shared_reg_config(box, 0)); wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); } @@ -226,29 +244,6 @@ static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT); } -static int snbep_uncore_hw_config(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - - if (box->pmu->type == &snbep_uncore_cbox) { - reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + - SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; - reg1->config = event->attr.config1 & - SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK; - } else { - if (box->pmu->type == &snbep_uncore_pcu) { - reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER; - reg1->config = event->attr.config1 & SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK; - } else { - return 0; - } - } - reg1->idx = 0; - - return 0; -} - static struct attribute *snbep_uncore_formats_attr[] = { &format_attr_event.attr, &format_attr_umask.attr, @@ -345,16 +340,16 @@ static struct attribute_group snbep_uncore_qpi_format_group = { .attrs = snbep_uncore_qpi_formats_attr, }; +#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ + .init_box = snbep_uncore_msr_init_box, \ + .disable_box = snbep_uncore_msr_disable_box, \ + .enable_box = snbep_uncore_msr_enable_box, \ + .disable_event = snbep_uncore_msr_disable_event, \ + .enable_event = snbep_uncore_msr_enable_event, \ + .read_counter = uncore_msr_read_counter + static struct intel_uncore_ops snbep_uncore_msr_ops = { - .init_box = snbep_uncore_msr_init_box, - .disable_box = snbep_uncore_msr_disable_box, - .enable_box = snbep_uncore_msr_enable_box, - .disable_event = snbep_uncore_msr_disable_event, - .enable_event = snbep_uncore_msr_enable_event, - .read_counter = uncore_msr_read_counter, - .get_constraint = uncore_get_constraint, - .put_constraint = uncore_put_constraint, - .hw_config = snbep_uncore_hw_config, + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), }; static struct intel_uncore_ops snbep_uncore_pci_ops = { @@ -446,6 +441,145 @@ static struct intel_uncore_type snbep_uncore_ubox = { .format_group = &snbep_uncore_ubox_format_group, }; +static struct extra_reg snbep_uncore_cbox_extra_regs[] = { + SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, + SNBEP_CBO_PMON_CTL_TID_EN, 0x1), + SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6), + SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x2), + EVENT_EXTRA_END +}; + +static void snbep_cbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct intel_uncore_extra_reg *er = &box->shared_regs[0]; + int i; + + if (uncore_box_is_fake(box)) + return; + + for (i = 0; i < 5; i++) { + if (reg1->alloc & (0x1 << i)) + atomic_sub(1 << (i * 6), &er->ref); + } + reg1->alloc = 0; +} + +static struct event_constraint * +__snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event, + u64 (*cbox_filter_mask)(int fields)) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct intel_uncore_extra_reg *er = &box->shared_regs[0]; + int i, alloc = 0; + unsigned long flags; + u64 mask; + + if (reg1->idx == EXTRA_REG_NONE) + return NULL; + + raw_spin_lock_irqsave(&er->lock, flags); + for (i = 0; i < 5; i++) { + if (!(reg1->idx & (0x1 << i))) + continue; + if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i))) + continue; + + mask = cbox_filter_mask(0x1 << i); + if (!__BITS_VALUE(atomic_read(&er->ref), i, 6) || + !((reg1->config ^ er->config) & mask)) { + atomic_add(1 << (i * 6), &er->ref); + er->config &= ~mask; + er->config |= reg1->config & mask; + alloc |= (0x1 << i); + } else { + break; + } + } + raw_spin_unlock_irqrestore(&er->lock, flags); + if (i < 5) + goto fail; + + if (!uncore_box_is_fake(box)) + reg1->alloc |= alloc; + + return 0; +fail: + for (; i >= 0; i--) { + if (alloc & (0x1 << i)) + atomic_sub(1 << (i * 6), &er->ref); + } + return &constraint_empty; +} + +static u64 snbep_cbox_filter_mask(int fields) +{ + u64 mask = 0; + + if (fields & 0x1) + mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_TID; + if (fields & 0x2) + mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_NID; + if (fields & 0x4) + mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE; + if (fields & 0x8) + mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC; + + return mask; +} + +static struct event_constraint * +snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + return __snbep_cbox_get_constraint(box, event, snbep_cbox_filter_mask); +} + +static int snbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct extra_reg *er; + int idx = 0; + + for (er = snbep_uncore_cbox_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + idx |= er->idx; + } + + if (idx) { + reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + + SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & snbep_cbox_filter_mask(idx); + reg1->idx = idx; + } + return 0; +} + +static struct intel_uncore_ops snbep_uncore_cbox_ops = { + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = snbep_cbox_hw_config, + .get_constraint = snbep_cbox_get_constraint, + .put_constraint = snbep_cbox_put_constraint, +}; + static struct intel_uncore_type snbep_uncore_cbox = { .name = "cbox", .num_counters = 4, @@ -458,10 +592,104 @@ static struct intel_uncore_type snbep_uncore_cbox = { .msr_offset = SNBEP_CBO_MSR_OFFSET, .num_shared_regs = 1, .constraints = snbep_uncore_cbox_constraints, - .ops = &snbep_uncore_msr_ops, + .ops = &snbep_uncore_cbox_ops, .format_group = &snbep_uncore_cbox_format_group, }; +static u64 snbep_pcu_alter_er(struct perf_event *event, int new_idx, bool modify) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + u64 config = reg1->config; + + if (new_idx > reg1->idx) + config <<= 8 * (new_idx - reg1->idx); + else + config >>= 8 * (reg1->idx - new_idx); + + if (modify) { + hwc->config += new_idx - reg1->idx; + reg1->config = config; + reg1->idx = new_idx; + } + return config; +} + +static struct event_constraint * +snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct intel_uncore_extra_reg *er = &box->shared_regs[0]; + unsigned long flags; + int idx = reg1->idx; + u64 mask, config1 = reg1->config; + bool ok = false; + + if (reg1->idx == EXTRA_REG_NONE || + (!uncore_box_is_fake(box) && reg1->alloc)) + return NULL; +again: + mask = 0xff << (idx * 8); + raw_spin_lock_irqsave(&er->lock, flags); + if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) || + !((config1 ^ er->config) & mask)) { + atomic_add(1 << (idx * 8), &er->ref); + er->config &= ~mask; + er->config |= config1 & mask; + ok = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + if (!ok) { + idx = (idx + 1) % 4; + if (idx != reg1->idx) { + config1 = snbep_pcu_alter_er(event, idx, false); + goto again; + } + return &constraint_empty; + } + + if (!uncore_box_is_fake(box)) { + if (idx != reg1->idx) + snbep_pcu_alter_er(event, idx, true); + reg1->alloc = 1; + } + return NULL; +} + +static void snbep_pcu_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct intel_uncore_extra_reg *er = &box->shared_regs[0]; + + if (uncore_box_is_fake(box) || !reg1->alloc) + return; + + atomic_sub(1 << (reg1->idx * 8), &er->ref); + reg1->alloc = 0; +} + +static int snbep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK; + + if (ev_sel >= 0xb && ev_sel <= 0xe) { + reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER; + reg1->idx = ev_sel - 0xb; + reg1->config = event->attr.config1 & (0xff << reg1->idx); + } + return 0; +} + +static struct intel_uncore_ops snbep_uncore_pcu_ops = { + SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = snbep_pcu_hw_config, + .get_constraint = snbep_pcu_get_constraint, + .put_constraint = snbep_pcu_put_constraint, +}; + static struct intel_uncore_type snbep_uncore_pcu = { .name = "pcu", .num_counters = 4, @@ -472,7 +700,7 @@ static struct intel_uncore_type snbep_uncore_pcu = { .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL, .num_shared_regs = 1, - .ops = &snbep_uncore_msr_ops, + .ops = &snbep_uncore_pcu_ops, .format_group = &snbep_uncore_pcu_format_group, }; @@ -808,9 +1036,6 @@ static struct intel_uncore_type *nhm_msr_uncores[] = { /* end of Nehalem uncore support */ /* Nehalem-EX uncore support */ -#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \ - ((1ULL << (n)) - 1))) - DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5"); DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7"); DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63"); @@ -1161,7 +1386,7 @@ static struct extra_reg nhmex_uncore_mbox_extra_regs[] = { }; /* Nehalem-EX or Westmere-EX ? */ -bool uncore_nhmex; +static bool uncore_nhmex; static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config) { @@ -1239,7 +1464,7 @@ static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx) atomic_sub(1 << (idx * 8), &er->ref); } -u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify) +static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify) { struct hw_perf_event *hwc = &event->hw; struct hw_perf_event_extra *reg1 = &hwc->extra_reg; @@ -1554,7 +1779,7 @@ static struct intel_uncore_type nhmex_uncore_mbox = { .format_group = &nhmex_uncore_mbox_format_group, }; -void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) +static void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; struct hw_perf_event_extra *reg1 = &hwc->extra_reg; @@ -1724,21 +1949,6 @@ static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event return 0; } -static u64 nhmex_rbox_shared_reg_config(struct intel_uncore_box *box, int idx) -{ - struct intel_uncore_extra_reg *er; - unsigned long flags; - u64 config; - - er = &box->shared_regs[idx]; - - raw_spin_lock_irqsave(&er->lock, flags); - config = er->config; - raw_spin_unlock_irqrestore(&er->lock, flags); - - return config; -} - static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -1759,7 +1969,7 @@ static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct per case 2: case 3: wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port), - nhmex_rbox_shared_reg_config(box, 2 + (idx / 6) * 5)); + uncore_shared_reg_config(box, 2 + (idx / 6) * 5)); break; case 4: wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port), @@ -2285,7 +2495,7 @@ out: return ret; } -int uncore_pmu_event_init(struct perf_event *event) +static int uncore_pmu_event_init(struct perf_event *event) { struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index e68a4550e952..f14a3413a85d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -148,9 +148,20 @@ #define SNBEP_C0_MSR_PMON_CTL0 0xd10 #define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04 #define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14 -#define SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK 0xfffffc1f #define SNBEP_CBO_MSR_OFFSET 0x20 +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_TID 0x1f +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_NID 0x3fc00 +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE 0x7c0000 +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC 0xff800000 + +#define SNBEP_CBO_EVENT_EXTRA_REG(e, m, i) { \ + .event = (e), \ + .msr = SNBEP_C0_MSR_PMON_BOX_FILTER, \ + .config_mask = (m), \ + .idx = (i) \ +} + /* SNB-EP PCU register */ #define SNBEP_PCU_MSR_PMON_CTR0 0xc36 #define SNBEP_PCU_MSR_PMON_CTL0 0xc30 -- cgit v1.2.3-59-g8ed1b From e850f9c33c0c7cc4097ae29f6f8d633237d235e6 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 16 Apr 2013 19:51:07 +0800 Subject: perf/x86/intel: Add Ivy Bridge-EP uncore support The uncore subsystem in Ivy Bridge-EP is similar to Sandy Bridge-EP. There are some differences in config register encoding and pci device IDs. The Ivy Bridge-EP uncore also supports a few new events. Signed-off-by: Yan, Zheng Acked-by: Peter Zijlstra Cc: peterz@infradead.org Cc: eranian@google.com Cc: ak@linux.intel.com Link: http://lkml.kernel.org/r/1366113067-3262-4-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 520 ++++++++++++++++++++++++-- arch/x86/kernel/cpu/perf_event_intel_uncore.h | 51 ++- 2 files changed, 543 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 8f590ea9ece0..d0f9e5aa2151 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -34,9 +34,13 @@ DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15"); DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30"); DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); +DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8"); DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17"); +DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47"); DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22"); +DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22"); DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31"); +DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60"); DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7"); DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15"); DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23"); @@ -367,6 +371,7 @@ static struct event_constraint snbep_uncore_cbox_constraints[] = { UNCORE_EVENT_CONSTRAINT(0x04, 0x3), UNCORE_EVENT_CONSTRAINT(0x05, 0x3), UNCORE_EVENT_CONSTRAINT(0x07, 0x3), + UNCORE_EVENT_CONSTRAINT(0x09, 0x3), UNCORE_EVENT_CONSTRAINT(0x11, 0x1), UNCORE_EVENT_CONSTRAINT(0x12, 0x3), UNCORE_EVENT_CONSTRAINT(0x13, 0x3), @@ -416,6 +421,14 @@ static struct event_constraint snbep_uncore_r3qpi_constraints[] = { UNCORE_EVENT_CONSTRAINT(0x24, 0x3), UNCORE_EVENT_CONSTRAINT(0x25, 0x3), UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x28, 0x3), + UNCORE_EVENT_CONSTRAINT(0x29, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2a, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2b, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2c, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2e, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2f, 0x3), UNCORE_EVENT_CONSTRAINT(0x30, 0x3), UNCORE_EVENT_CONSTRAINT(0x31, 0x3), UNCORE_EVENT_CONSTRAINT(0x32, 0x3), @@ -423,6 +436,8 @@ static struct event_constraint snbep_uncore_r3qpi_constraints[] = { UNCORE_EVENT_CONSTRAINT(0x34, 0x3), UNCORE_EVENT_CONSTRAINT(0x36, 0x3), UNCORE_EVENT_CONSTRAINT(0x37, 0x3), + UNCORE_EVENT_CONSTRAINT(0x38, 0x3), + UNCORE_EVENT_CONSTRAINT(0x39, 0x3), EVENT_CONSTRAINT_END }; @@ -772,55 +787,63 @@ static struct intel_uncore_type snbep_uncore_r3qpi = { SNBEP_UNCORE_PCI_COMMON_INIT(), }; +enum { + SNBEP_PCI_UNCORE_HA, + SNBEP_PCI_UNCORE_IMC, + SNBEP_PCI_UNCORE_QPI, + SNBEP_PCI_UNCORE_R2PCIE, + SNBEP_PCI_UNCORE_R3QPI, +}; + static struct intel_uncore_type *snbep_pci_uncores[] = { - &snbep_uncore_ha, - &snbep_uncore_imc, - &snbep_uncore_qpi, - &snbep_uncore_r2pcie, - &snbep_uncore_r3qpi, + [SNBEP_PCI_UNCORE_HA] = &snbep_uncore_ha, + [SNBEP_PCI_UNCORE_IMC] = &snbep_uncore_imc, + [SNBEP_PCI_UNCORE_QPI] = &snbep_uncore_qpi, + [SNBEP_PCI_UNCORE_R2PCIE] = &snbep_uncore_r2pcie, + [SNBEP_PCI_UNCORE_R3QPI] = &snbep_uncore_r3qpi, NULL, }; static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = { { /* Home Agent */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA), - .driver_data = (unsigned long)&snbep_uncore_ha, + .driver_data = SNBEP_PCI_UNCORE_HA, }, { /* MC Channel 0 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0), - .driver_data = (unsigned long)&snbep_uncore_imc, + .driver_data = SNBEP_PCI_UNCORE_IMC, }, { /* MC Channel 1 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1), - .driver_data = (unsigned long)&snbep_uncore_imc, + .driver_data = SNBEP_PCI_UNCORE_IMC, }, { /* MC Channel 2 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2), - .driver_data = (unsigned long)&snbep_uncore_imc, + .driver_data = SNBEP_PCI_UNCORE_IMC, }, { /* MC Channel 3 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3), - .driver_data = (unsigned long)&snbep_uncore_imc, + .driver_data = SNBEP_PCI_UNCORE_IMC, }, { /* QPI Port 0 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0), - .driver_data = (unsigned long)&snbep_uncore_qpi, + .driver_data = SNBEP_PCI_UNCORE_QPI, }, { /* QPI Port 1 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1), - .driver_data = (unsigned long)&snbep_uncore_qpi, + .driver_data = SNBEP_PCI_UNCORE_QPI, }, - { /* P2PCIe */ + { /* R2PCIe */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE), - .driver_data = (unsigned long)&snbep_uncore_r2pcie, + .driver_data = SNBEP_PCI_UNCORE_R2PCIE, }, { /* R3QPI Link 0 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0), - .driver_data = (unsigned long)&snbep_uncore_r3qpi, + .driver_data = SNBEP_PCI_UNCORE_R3QPI, }, { /* R3QPI Link 1 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1), - .driver_data = (unsigned long)&snbep_uncore_r3qpi, + .driver_data = SNBEP_PCI_UNCORE_R3QPI, }, { /* end: all zeroes */ } }; @@ -833,7 +856,7 @@ static struct pci_driver snbep_uncore_pci_driver = { /* * build pci bus to socket mapping */ -static int snbep_pci2phy_map_init(void) +static int snbep_pci2phy_map_init(int devid) { struct pci_dev *ubox_dev = NULL; int i, bus, nodeid; @@ -842,9 +865,7 @@ static int snbep_pci2phy_map_init(void) while (1) { /* find the UBOX device */ - ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX, - ubox_dev); + ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, ubox_dev); if (!ubox_dev) break; bus = ubox_dev->bus->number; @@ -867,7 +888,7 @@ static int snbep_pci2phy_map_init(void) break; } } - }; + } if (ubox_dev) pci_dev_put(ubox_dev); @@ -876,6 +897,440 @@ static int snbep_pci2phy_map_init(void) } /* end of Sandy Bridge-EP uncore support */ +/* IvyTown uncore support */ +static void ivt_uncore_msr_init_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + if (msr) + wrmsrl(msr, IVT_PMON_BOX_CTL_INT); +} + +static void ivt_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + + pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, IVT_PMON_BOX_CTL_INT); +} + +#define IVT_UNCORE_MSR_OPS_COMMON_INIT() \ + .init_box = ivt_uncore_msr_init_box, \ + .disable_box = snbep_uncore_msr_disable_box, \ + .enable_box = snbep_uncore_msr_enable_box, \ + .disable_event = snbep_uncore_msr_disable_event, \ + .enable_event = snbep_uncore_msr_enable_event, \ + .read_counter = uncore_msr_read_counter + +static struct intel_uncore_ops ivt_uncore_msr_ops = { + IVT_UNCORE_MSR_OPS_COMMON_INIT(), +}; + +static struct intel_uncore_ops ivt_uncore_pci_ops = { + .init_box = ivt_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_uncore_pci_enable_event, + .read_counter = snbep_uncore_pci_read_counter, +}; + +#define IVT_UNCORE_PCI_COMMON_INIT() \ + .perf_ctr = SNBEP_PCI_PMON_CTR0, \ + .event_ctl = SNBEP_PCI_PMON_CTL0, \ + .event_mask = IVT_PMON_RAW_EVENT_MASK, \ + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \ + .ops = &ivt_uncore_pci_ops, \ + .format_group = &ivt_uncore_format_group + +static struct attribute *ivt_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute *ivt_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh5.attr, + NULL, +}; + +static struct attribute *ivt_uncore_cbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid.attr, + &format_attr_filter_link.attr, + &format_attr_filter_state2.attr, + &format_attr_filter_nid2.attr, + &format_attr_filter_opc2.attr, + NULL, +}; + +static struct attribute *ivt_uncore_pcu_formats_attr[] = { + &format_attr_event_ext.attr, + &format_attr_occ_sel.attr, + &format_attr_edge.attr, + &format_attr_thresh5.attr, + &format_attr_occ_invert.attr, + &format_attr_occ_edge.attr, + &format_attr_filter_band0.attr, + &format_attr_filter_band1.attr, + &format_attr_filter_band2.attr, + &format_attr_filter_band3.attr, + NULL, +}; + +static struct attribute *ivt_uncore_qpi_formats_attr[] = { + &format_attr_event_ext.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute_group ivt_uncore_format_group = { + .name = "format", + .attrs = ivt_uncore_formats_attr, +}; + +static struct attribute_group ivt_uncore_ubox_format_group = { + .name = "format", + .attrs = ivt_uncore_ubox_formats_attr, +}; + +static struct attribute_group ivt_uncore_cbox_format_group = { + .name = "format", + .attrs = ivt_uncore_cbox_formats_attr, +}; + +static struct attribute_group ivt_uncore_pcu_format_group = { + .name = "format", + .attrs = ivt_uncore_pcu_formats_attr, +}; + +static struct attribute_group ivt_uncore_qpi_format_group = { + .name = "format", + .attrs = ivt_uncore_qpi_formats_attr, +}; + +static struct intel_uncore_type ivt_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = SNBEP_U_MSR_PMON_CTR0, + .event_ctl = SNBEP_U_MSR_PMON_CTL0, + .event_mask = IVT_U_MSR_PMON_RAW_EVENT_MASK, + .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL, + .ops = &ivt_uncore_msr_ops, + .format_group = &ivt_uncore_ubox_format_group, +}; + +static struct extra_reg ivt_uncore_cbox_extra_regs[] = { + SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, + SNBEP_CBO_PMON_CTL_TID_EN, 0x1), + SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18), + SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8), + EVENT_EXTRA_END +}; + +static u64 ivt_cbox_filter_mask(int fields) +{ + u64 mask = 0; + + if (fields & 0x1) + mask |= IVT_CB0_MSR_PMON_BOX_FILTER_TID; + if (fields & 0x2) + mask |= IVT_CB0_MSR_PMON_BOX_FILTER_LINK; + if (fields & 0x4) + mask |= IVT_CB0_MSR_PMON_BOX_FILTER_STATE; + if (fields & 0x8) + mask |= IVT_CB0_MSR_PMON_BOX_FILTER_NID; + if (fields & 0x10) + mask |= IVT_CB0_MSR_PMON_BOX_FILTER_OPC; + + return mask; +} + +static struct event_constraint * +ivt_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + return __snbep_cbox_get_constraint(box, event, ivt_cbox_filter_mask); +} + +static int ivt_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct extra_reg *er; + int idx = 0; + + for (er = ivt_uncore_cbox_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + idx |= er->idx; + } + + if (idx) { + reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + + SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & ivt_cbox_filter_mask(idx); + reg1->idx = idx; + } + return 0; +} + +static void ivt_cbox_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + u64 filter = uncore_shared_reg_config(box, 0); + wrmsrl(reg1->reg, filter & 0xffffffff); + wrmsrl(reg1->reg + 6, filter >> 32); + } + + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops ivt_uncore_cbox_ops = { + .init_box = ivt_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = ivt_cbox_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = ivt_cbox_hw_config, + .get_constraint = ivt_cbox_get_constraint, + .put_constraint = snbep_cbox_put_constraint, +}; + +static struct intel_uncore_type ivt_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .num_boxes = 15, + .perf_ctr_bits = 44, + .event_ctl = SNBEP_C0_MSR_PMON_CTL0, + .perf_ctr = SNBEP_C0_MSR_PMON_CTR0, + .event_mask = IVT_CBO_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL, + .msr_offset = SNBEP_CBO_MSR_OFFSET, + .num_shared_regs = 1, + .constraints = snbep_uncore_cbox_constraints, + .ops = &ivt_uncore_cbox_ops, + .format_group = &ivt_uncore_cbox_format_group, +}; + +static struct intel_uncore_ops ivt_uncore_pcu_ops = { + IVT_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = snbep_pcu_hw_config, + .get_constraint = snbep_pcu_get_constraint, + .put_constraint = snbep_pcu_put_constraint, +}; + +static struct intel_uncore_type ivt_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0, + .event_ctl = SNBEP_PCU_MSR_PMON_CTL0, + .event_mask = IVT_PCU_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &ivt_uncore_pcu_ops, + .format_group = &ivt_uncore_pcu_format_group, +}; + +static struct intel_uncore_type *ivt_msr_uncores[] = { + &ivt_uncore_ubox, + &ivt_uncore_cbox, + &ivt_uncore_pcu, + NULL, +}; + +static struct intel_uncore_type ivt_uncore_ha = { + .name = "ha", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + IVT_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type ivt_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 8, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, + .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, + IVT_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type ivt_uncore_qpi = { + .name = "qpi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = IVT_QPI_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &ivt_uncore_pci_ops, + .format_group = &ivt_uncore_qpi_format_group, +}; + +static struct intel_uncore_type ivt_uncore_r2pcie = { + .name = "r2pcie", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 44, + .constraints = snbep_uncore_r2pcie_constraints, + IVT_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type ivt_uncore_r3qpi = { + .name = "r3qpi", + .num_counters = 3, + .num_boxes = 2, + .perf_ctr_bits = 44, + .constraints = snbep_uncore_r3qpi_constraints, + IVT_UNCORE_PCI_COMMON_INIT(), +}; + +enum { + IVT_PCI_UNCORE_HA, + IVT_PCI_UNCORE_IMC, + IVT_PCI_UNCORE_QPI, + IVT_PCI_UNCORE_R2PCIE, + IVT_PCI_UNCORE_R3QPI, +}; + +static struct intel_uncore_type *ivt_pci_uncores[] = { + [IVT_PCI_UNCORE_HA] = &ivt_uncore_ha, + [IVT_PCI_UNCORE_IMC] = &ivt_uncore_imc, + [IVT_PCI_UNCORE_QPI] = &ivt_uncore_qpi, + [IVT_PCI_UNCORE_R2PCIE] = &ivt_uncore_r2pcie, + [IVT_PCI_UNCORE_R3QPI] = &ivt_uncore_r3qpi, + NULL, +}; + +static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = { + { /* Home Agent 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30), + .driver_data = IVT_PCI_UNCORE_HA, + }, + { /* Home Agent 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38), + .driver_data = IVT_PCI_UNCORE_HA, + }, + { /* MC0 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4), + .driver_data = IVT_PCI_UNCORE_IMC, + }, + { /* MC0 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5), + .driver_data = IVT_PCI_UNCORE_IMC, + }, + { /* MC0 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0), + .driver_data = IVT_PCI_UNCORE_IMC, + }, + { /* MC0 Channel 4 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1), + .driver_data = IVT_PCI_UNCORE_IMC, + }, + { /* MC1 Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4), + .driver_data = IVT_PCI_UNCORE_IMC, + }, + { /* MC1 Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5), + .driver_data = IVT_PCI_UNCORE_IMC, + }, + { /* MC1 Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0), + .driver_data = IVT_PCI_UNCORE_IMC, + }, + { /* MC1 Channel 4 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1), + .driver_data = IVT_PCI_UNCORE_IMC, + }, + { /* QPI0 Port 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32), + .driver_data = IVT_PCI_UNCORE_QPI, + }, + { /* QPI0 Port 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33), + .driver_data = IVT_PCI_UNCORE_QPI, + }, + { /* QPI1 Port 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a), + .driver_data = IVT_PCI_UNCORE_QPI, + }, + { /* R2PCIe */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34), + .driver_data = IVT_PCI_UNCORE_R2PCIE, + }, + { /* R3QPI0 Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36), + .driver_data = IVT_PCI_UNCORE_R3QPI, + }, + { /* R3QPI0 Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37), + .driver_data = IVT_PCI_UNCORE_R3QPI, + }, + { /* R3QPI1 Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e), + .driver_data = IVT_PCI_UNCORE_R3QPI, + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver ivt_uncore_pci_driver = { + .name = "ivt_uncore", + .id_table = ivt_uncore_pci_ids, +}; +/* end of IvyTown uncore support */ + /* Sandy Bridge uncore support */ static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) { @@ -2766,6 +3221,8 @@ static void uncore_pci_remove(struct pci_dev *pdev) if (WARN_ON_ONCE(phys_id != box->phys_id)) return; + pci_set_drvdata(pdev, NULL); + raw_spin_lock(&uncore_box_lock); list_del(&box->list); raw_spin_unlock(&uncore_box_lock); @@ -2784,11 +3241,7 @@ static void uncore_pci_remove(struct pci_dev *pdev) static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - struct intel_uncore_type *type; - - type = (struct intel_uncore_type *)id->driver_data; - - return uncore_pci_add(type, pdev); + return uncore_pci_add(pci_uncores[id->driver_data], pdev); } static int __init uncore_pci_init(void) @@ -2797,12 +3250,19 @@ static int __init uncore_pci_init(void) switch (boot_cpu_data.x86_model) { case 45: /* Sandy Bridge-EP */ - ret = snbep_pci2phy_map_init(); + ret = snbep_pci2phy_map_init(0x3ce0); if (ret) return ret; pci_uncores = snbep_pci_uncores; uncore_pci_driver = &snbep_uncore_pci_driver; break; + case 62: /* IvyTown */ + ret = snbep_pci2phy_map_init(0x0e1e); + if (ret) + return ret; + pci_uncores = ivt_pci_uncores; + uncore_pci_driver = &ivt_uncore_pci_driver; + break; default: return 0; } @@ -3103,6 +3563,12 @@ static int __init uncore_cpu_init(void) nhmex_uncore_cbox.num_boxes = max_cores; msr_uncores = nhmex_msr_uncores; break; + case 62: /* IvyTown */ + if (ivt_uncore_cbox.num_boxes > max_cores) + ivt_uncore_cbox.num_boxes = max_cores; + msr_uncores = ivt_msr_uncores; + break; + default: return 0; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index f14a3413a85d..f9528917f6e8 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -76,7 +76,7 @@ #define SNBEP_PMON_CTL_UMASK_MASK 0x0000ff00 #define SNBEP_PMON_CTL_RST (1 << 17) #define SNBEP_PMON_CTL_EDGE_DET (1 << 18) -#define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21) /* only for QPI */ +#define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21) #define SNBEP_PMON_CTL_EN (1 << 22) #define SNBEP_PMON_CTL_INVERT (1 << 23) #define SNBEP_PMON_CTL_TRESH_MASK 0xff000000 @@ -171,6 +171,55 @@ #define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc #define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd +/* IVT event control */ +#define IVT_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \ + SNBEP_PMON_BOX_CTL_RST_CTRS) +#define IVT_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_TRESH_MASK) +/* IVT Ubox */ +#define IVT_U_MSR_PMON_GLOBAL_CTL 0xc00 +#define IVT_U_PMON_GLOBAL_FRZ_ALL (1 << 31) +#define IVT_U_PMON_GLOBAL_UNFRZ_ALL (1 << 29) + +#define IVT_U_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_U_MSR_PMON_CTL_TRESH_MASK) +/* IVT Cbo */ +#define IVT_CBO_MSR_PMON_RAW_EVENT_MASK (IVT_PMON_RAW_EVENT_MASK | \ + SNBEP_CBO_PMON_CTL_TID_EN) + +#define IVT_CB0_MSR_PMON_BOX_FILTER_TID (0x1fULL << 0) +#define IVT_CB0_MSR_PMON_BOX_FILTER_LINK (0xfULL << 5) +#define IVT_CB0_MSR_PMON_BOX_FILTER_STATE (0x3fULL << 17) +#define IVT_CB0_MSR_PMON_BOX_FILTER_NID (0xffffULL << 32) +#define IVT_CB0_MSR_PMON_BOX_FILTER_OPC (0x1ffULL << 52) +#define IVT_CB0_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61) +#define IVT_CB0_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62) +#define IVT_CB0_MSR_PMON_BOX_FILTER_IOSC (0x1ULL << 63) + +/* IVT home agent */ +#define IVT_HA_PCI_PMON_CTL_Q_OCC_RST (1 << 16) +#define IVT_HA_PCI_PMON_RAW_EVENT_MASK \ + (IVT_PMON_RAW_EVENT_MASK | \ + IVT_HA_PCI_PMON_CTL_Q_OCC_RST) +/* IVT PCU */ +#define IVT_PCU_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_EV_SEL_EXT | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) +/* IVT QPI */ +#define IVT_QPI_PCI_PMON_RAW_EVENT_MASK \ + (IVT_PMON_RAW_EVENT_MASK | \ + SNBEP_PMON_CTL_EV_SEL_EXT) + /* NHM-EX event control */ #define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff #define NHMEX_PMON_CTL_UMASK_MASK 0x0000ff00 -- cgit v1.2.3-59-g8ed1b From c43ca5091a374c1f6778bd7e4a39a5a10735a917 Mon Sep 17 00:00:00 2001 From: Jacob Shin Date: Fri, 19 Apr 2013 16:34:28 -0500 Subject: perf/x86/amd: Add support for AMD NB and L2I "uncore" counters Add support for AMD Family 15h [and above] northbridge performance counters. MSRs 0xc0010240 ~ 0xc0010247 are shared across all cores that share a common northbridge. Add support for AMD Family 16h L2 performance counters. MSRs 0xc0010230 ~ 0xc0010237 are shared across all cores that share a common L2 cache. We do not enable counter overflow interrupts. Sampling mode and per-thread events are not supported. Signed-off-by: Jacob Shin Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20130419213428.GA8229@jshin-Toonie Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 2 + arch/x86/include/uapi/asm/msr-index.h | 4 + arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/perf_event_amd_uncore.c | 546 ++++++++++++++++++++++++++++ 4 files changed, 553 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/cpu/perf_event_amd_uncore.c (limited to 'arch') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 93fe929d1cee..ac10df72925b 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -168,6 +168,7 @@ #define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ #define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */ #define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */ +#define X86_FEATURE_PERFCTR_L2 (6*32+28) /* L2 performance counter extensions */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -311,6 +312,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) #define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB) +#define cpu_has_perfctr_l2 boot_cpu_has(X86_FEATURE_PERFCTR_L2) #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index bf7bb68f43a8..b5757885d7a4 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -196,6 +196,10 @@ #define MSR_AMD64_IBSBRTARGET 0xc001103b #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +/* Fam 16h MSRs */ +#define MSR_F16H_L2I_PERF_CTL 0xc0010230 +#define MSR_F16H_L2I_PERF_CTR 0xc0010231 + /* Fam 15h MSRs */ #define MSR_F15H_PERF_CTL 0xc0010200 #define MSR_F15H_PERF_CTR 0xc0010201 diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index a0e067d3d96c..00745729f2a1 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -31,7 +31,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o ifdef CONFIG_PERF_EVENTS -obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o +obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c new file mode 100644 index 000000000000..6dc62273639c --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c @@ -0,0 +1,546 @@ +/* + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Jacob Shin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define NUM_COUNTERS_NB 4 +#define NUM_COUNTERS_L2 4 +#define MAX_COUNTERS NUM_COUNTERS_NB + +#define RDPMC_BASE_NB 6 +#define RDPMC_BASE_L2 10 + +#define COUNTER_SHIFT 16 + +struct amd_uncore { + int id; + int refcnt; + int cpu; + int num_counters; + int rdpmc_base; + u32 msr_base; + cpumask_t *active_mask; + struct pmu *pmu; + struct perf_event *events[MAX_COUNTERS]; + struct amd_uncore *free_when_cpu_online; +}; + +static struct amd_uncore * __percpu *amd_uncore_nb; +static struct amd_uncore * __percpu *amd_uncore_l2; + +static struct pmu amd_nb_pmu; +static struct pmu amd_l2_pmu; + +static cpumask_t amd_nb_active_mask; +static cpumask_t amd_l2_active_mask; + +static bool is_nb_event(struct perf_event *event) +{ + return event->pmu->type == amd_nb_pmu.type; +} + +static bool is_l2_event(struct perf_event *event) +{ + return event->pmu->type == amd_l2_pmu.type; +} + +static struct amd_uncore *event_to_amd_uncore(struct perf_event *event) +{ + if (is_nb_event(event) && amd_uncore_nb) + return *per_cpu_ptr(amd_uncore_nb, event->cpu); + else if (is_l2_event(event) && amd_uncore_l2) + return *per_cpu_ptr(amd_uncore_l2, event->cpu); + + return NULL; +} + +static void amd_uncore_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev, new; + s64 delta; + + /* + * since we do not enable counter overflow interrupts, + * we do not have to worry about prev_count changing on us + */ + + prev = local64_read(&hwc->prev_count); + rdpmcl(hwc->event_base_rdpmc, new); + local64_set(&hwc->prev_count, new); + delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT); + delta >>= COUNTER_SHIFT; + local64_add(delta, &event->count); +} + +static void amd_uncore_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (flags & PERF_EF_RELOAD) + wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count)); + + hwc->state = 0; + wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE)); + perf_event_update_userpage(event); +} + +static void amd_uncore_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, hwc->config); + hwc->state |= PERF_HES_STOPPED; + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + amd_uncore_read(event); + hwc->state |= PERF_HES_UPTODATE; + } +} + +static int amd_uncore_add(struct perf_event *event, int flags) +{ + int i; + struct amd_uncore *uncore = event_to_amd_uncore(event); + struct hw_perf_event *hwc = &event->hw; + + /* are we already assigned? */ + if (hwc->idx != -1 && uncore->events[hwc->idx] == event) + goto out; + + for (i = 0; i < uncore->num_counters; i++) { + if (uncore->events[i] == event) { + hwc->idx = i; + goto out; + } + } + + /* if not, take the first available counter */ + hwc->idx = -1; + for (i = 0; i < uncore->num_counters; i++) { + if (cmpxchg(&uncore->events[i], NULL, event) == NULL) { + hwc->idx = i; + break; + } + } + +out: + if (hwc->idx == -1) + return -EBUSY; + + hwc->config_base = uncore->msr_base + (2 * hwc->idx); + hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx); + hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx; + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + amd_uncore_start(event, PERF_EF_RELOAD); + + return 0; +} + +static void amd_uncore_del(struct perf_event *event, int flags) +{ + int i; + struct amd_uncore *uncore = event_to_amd_uncore(event); + struct hw_perf_event *hwc = &event->hw; + + amd_uncore_stop(event, PERF_EF_UPDATE); + + for (i = 0; i < uncore->num_counters; i++) { + if (cmpxchg(&uncore->events[i], event, NULL) == event) + break; + } + + hwc->idx = -1; +} + +static int amd_uncore_event_init(struct perf_event *event) +{ + struct amd_uncore *uncore; + struct hw_perf_event *hwc = &event->hw; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* + * NB and L2 counters (MSRs) are shared across all cores that share the + * same NB / L2 cache. Interrupts can be directed to a single target + * core, however, event counts generated by processes running on other + * cores cannot be masked out. So we do not support sampling and + * per-thread events. + */ + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EINVAL; + + /* NB and L2 counters do not have usr/os/guest/host bits */ + if (event->attr.exclude_user || event->attr.exclude_kernel || + event->attr.exclude_host || event->attr.exclude_guest) + return -EINVAL; + + /* and we do not enable counter overflow interrupts */ + hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; + hwc->idx = -1; + + if (event->cpu < 0) + return -EINVAL; + + uncore = event_to_amd_uncore(event); + if (!uncore) + return -ENODEV; + + /* + * since request can come in to any of the shared cores, we will remap + * to a single common cpu. + */ + event->cpu = uncore->cpu; + + return 0; +} + +static ssize_t amd_uncore_attr_show_cpumask(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int n; + cpumask_t *active_mask; + struct pmu *pmu = dev_get_drvdata(dev); + + if (pmu->type == amd_nb_pmu.type) + active_mask = &amd_nb_active_mask; + else if (pmu->type == amd_l2_pmu.type) + active_mask = &amd_l2_active_mask; + else + return 0; + + n = cpulist_scnprintf(buf, PAGE_SIZE - 2, active_mask); + buf[n++] = '\n'; + buf[n] = '\0'; + return n; +} +static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL); + +static struct attribute *amd_uncore_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group amd_uncore_attr_group = { + .attrs = amd_uncore_attrs, +}; + +PMU_FORMAT_ATTR(event, "config:0-7,32-35"); +PMU_FORMAT_ATTR(umask, "config:8-15"); + +static struct attribute *amd_uncore_format_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + NULL, +}; + +static struct attribute_group amd_uncore_format_group = { + .name = "format", + .attrs = amd_uncore_format_attr, +}; + +static const struct attribute_group *amd_uncore_attr_groups[] = { + &amd_uncore_attr_group, + &amd_uncore_format_group, + NULL, +}; + +static struct pmu amd_nb_pmu = { + .attr_groups = amd_uncore_attr_groups, + .name = "amd_nb", + .event_init = amd_uncore_event_init, + .add = amd_uncore_add, + .del = amd_uncore_del, + .start = amd_uncore_start, + .stop = amd_uncore_stop, + .read = amd_uncore_read, +}; + +static struct pmu amd_l2_pmu = { + .attr_groups = amd_uncore_attr_groups, + .name = "amd_l2", + .event_init = amd_uncore_event_init, + .add = amd_uncore_add, + .del = amd_uncore_del, + .start = amd_uncore_start, + .stop = amd_uncore_stop, + .read = amd_uncore_read, +}; + +static struct amd_uncore * __cpuinit amd_uncore_alloc(unsigned int cpu) +{ + return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL, + cpu_to_node(cpu)); +} + +static void __cpuinit amd_uncore_cpu_up_prepare(unsigned int cpu) +{ + struct amd_uncore *uncore; + + if (amd_uncore_nb) { + uncore = amd_uncore_alloc(cpu); + uncore->cpu = cpu; + uncore->num_counters = NUM_COUNTERS_NB; + uncore->rdpmc_base = RDPMC_BASE_NB; + uncore->msr_base = MSR_F15H_NB_PERF_CTL; + uncore->active_mask = &amd_nb_active_mask; + uncore->pmu = &amd_nb_pmu; + *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; + } + + if (amd_uncore_l2) { + uncore = amd_uncore_alloc(cpu); + uncore->cpu = cpu; + uncore->num_counters = NUM_COUNTERS_L2; + uncore->rdpmc_base = RDPMC_BASE_L2; + uncore->msr_base = MSR_F16H_L2I_PERF_CTL; + uncore->active_mask = &amd_l2_active_mask; + uncore->pmu = &amd_l2_pmu; + *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; + } +} + +static struct amd_uncore * +__cpuinit amd_uncore_find_online_sibling(struct amd_uncore *this, + struct amd_uncore * __percpu *uncores) +{ + unsigned int cpu; + struct amd_uncore *that; + + for_each_online_cpu(cpu) { + that = *per_cpu_ptr(uncores, cpu); + + if (!that) + continue; + + if (this == that) + continue; + + if (this->id == that->id) { + that->free_when_cpu_online = this; + this = that; + break; + } + } + + this->refcnt++; + return this; +} + +static void __cpuinit amd_uncore_cpu_starting(unsigned int cpu) +{ + unsigned int eax, ebx, ecx, edx; + struct amd_uncore *uncore; + + if (amd_uncore_nb) { + uncore = *per_cpu_ptr(amd_uncore_nb, cpu); + cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); + uncore->id = ecx & 0xff; + + uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb); + *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; + } + + if (amd_uncore_l2) { + unsigned int apicid = cpu_data(cpu).apicid; + unsigned int nshared; + + uncore = *per_cpu_ptr(amd_uncore_l2, cpu); + cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx); + nshared = ((eax >> 14) & 0xfff) + 1; + uncore->id = apicid - (apicid % nshared); + + uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2); + *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; + } +} + +static void __cpuinit uncore_online(unsigned int cpu, + struct amd_uncore * __percpu *uncores) +{ + struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu); + + kfree(uncore->free_when_cpu_online); + uncore->free_when_cpu_online = NULL; + + if (cpu == uncore->cpu) + cpumask_set_cpu(cpu, uncore->active_mask); +} + +static void __cpuinit amd_uncore_cpu_online(unsigned int cpu) +{ + if (amd_uncore_nb) + uncore_online(cpu, amd_uncore_nb); + + if (amd_uncore_l2) + uncore_online(cpu, amd_uncore_l2); +} + +static void __cpuinit uncore_down_prepare(unsigned int cpu, + struct amd_uncore * __percpu *uncores) +{ + unsigned int i; + struct amd_uncore *this = *per_cpu_ptr(uncores, cpu); + + if (this->cpu != cpu) + return; + + /* this cpu is going down, migrate to a shared sibling if possible */ + for_each_online_cpu(i) { + struct amd_uncore *that = *per_cpu_ptr(uncores, i); + + if (cpu == i) + continue; + + if (this == that) { + perf_pmu_migrate_context(this->pmu, cpu, i); + cpumask_clear_cpu(cpu, that->active_mask); + cpumask_set_cpu(i, that->active_mask); + that->cpu = i; + break; + } + } +} + +static void __cpuinit amd_uncore_cpu_down_prepare(unsigned int cpu) +{ + if (amd_uncore_nb) + uncore_down_prepare(cpu, amd_uncore_nb); + + if (amd_uncore_l2) + uncore_down_prepare(cpu, amd_uncore_l2); +} + +static void __cpuinit uncore_dead(unsigned int cpu, + struct amd_uncore * __percpu *uncores) +{ + struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu); + + if (cpu == uncore->cpu) + cpumask_clear_cpu(cpu, uncore->active_mask); + + if (!--uncore->refcnt) + kfree(uncore); + *per_cpu_ptr(amd_uncore_nb, cpu) = NULL; +} + +static void __cpuinit amd_uncore_cpu_dead(unsigned int cpu) +{ + if (amd_uncore_nb) + uncore_dead(cpu, amd_uncore_nb); + + if (amd_uncore_l2) + uncore_dead(cpu, amd_uncore_l2); +} + +static int __cpuinit +amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action, + void *hcpu) +{ + unsigned int cpu = (long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + amd_uncore_cpu_up_prepare(cpu); + break; + + case CPU_STARTING: + amd_uncore_cpu_starting(cpu); + break; + + case CPU_ONLINE: + amd_uncore_cpu_online(cpu); + break; + + case CPU_DOWN_PREPARE: + amd_uncore_cpu_down_prepare(cpu); + break; + + case CPU_UP_CANCELED: + case CPU_DEAD: + amd_uncore_cpu_dead(cpu); + break; + + default: + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block amd_uncore_cpu_notifier_block __cpuinitdata = { + .notifier_call = amd_uncore_cpu_notifier, + .priority = CPU_PRI_PERF + 1, +}; + +static void __init init_cpu_already_online(void *dummy) +{ + unsigned int cpu = smp_processor_id(); + + amd_uncore_cpu_up_prepare(cpu); + amd_uncore_cpu_starting(cpu); + amd_uncore_cpu_online(cpu); +} + +static int __init amd_uncore_init(void) +{ + unsigned int cpu; + int ret = -ENODEV; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return -ENODEV; + + if (!cpu_has_topoext) + return -ENODEV; + + if (cpu_has_perfctr_nb) { + amd_uncore_nb = alloc_percpu(struct amd_uncore *); + perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1); + + printk(KERN_INFO "perf: AMD NB counters detected\n"); + ret = 0; + } + + if (cpu_has_perfctr_l2) { + amd_uncore_l2 = alloc_percpu(struct amd_uncore *); + perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1); + + printk(KERN_INFO "perf: AMD L2I counters detected\n"); + ret = 0; + } + + if (ret) + return -ENODEV; + + get_online_cpus(); + /* init cpus already online before registering for hotplug notifier */ + for_each_online_cpu(cpu) + smp_call_function_single(cpu, init_cpu_already_online, NULL, 1); + + register_cpu_notifier(&amd_uncore_cpu_notifier_block); + put_online_cpus(); + + return 0; +} +device_initcall(amd_uncore_init); -- cgit v1.2.3-59-g8ed1b From a5ebe0ba3dff658c5286e8d5f20e4328f719d5a3 Mon Sep 17 00:00:00 2001 From: George Dunlap Date: Wed, 3 Apr 2013 15:46:28 +0100 Subject: perf/x86: Check all MSRs before passing hw check check_hw_exists() has a number of checks which go to two exit paths: msr_fail and bios_fail. Checks classified as msr_fail will cause check_hw_exists() to return false, causing the PMU not to be used; bios_fail checks will only cause a warning to be printed, but will return true. The problem is that if there are both msr failures and bios failures, and the routine hits a bios_fail check first, it will exit early and return true, not finishing the rest of the msr checks. If those msrs are in fact broken, it will cause them to be used erroneously. In the case of a Xen PV VM, the guest OS has read access to all the MSRs, but write access is white-listed to supported features. Writes to unsupported MSRs have no effect. The PMU MSRs are not (typically) supported, because they are expensive to save and restore on a VM context switch. One of the "msr_fail" checks is supposed to detect this circumstance (ether for Xen or KVM) and disable the harware PMU. However, on one of my AMD boxen, there is (apparently) a broken BIOS which triggers one of the bios_fail checks. In particular, MSR_K7_EVNTSEL0 has the ARCH_PERFMON_EVENTSEL_ENABLE bit set. The guest kernel detects this because it has read access to all MSRs, and causes it to skip the rest of the checks and try to use the non-existent hardware PMU. This minimally causes a lot of useless instruction emulation and Xen console spam; it may cause other issues with the watchdog as well. This changset causes check_hw_exists() to go through all of the msr checks, failing and returning false if any of them fail. This makes sure that a guest running under Xen without a virtual PMU will detect that there is no functioning PMU and not attempt to use it. This problem affects kernels as far back as 3.2, and should thus be considered for backport. Signed-off-by: George Dunlap Cc: Konrad Wilk Cc: Ian Campbell Cc: David Vrabel Cc: Andrew Cooper Link: http://lkml.kernel.org/r/1365000388-32448-1-git-send-email-george.dunlap@eu.citrix.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5ed7a4c5baf7..1025f3c99d20 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -180,8 +180,9 @@ static void release_pmc_hardware(void) {} static bool check_hw_exists(void) { - u64 val, val_new = ~0; - int i, reg, ret = 0; + u64 val, val_fail, val_new= ~0; + int i, reg, reg_fail, ret = 0; + int bios_fail = 0; /* * Check to see if the BIOS enabled any of the counters, if so @@ -192,8 +193,11 @@ static bool check_hw_exists(void) ret = rdmsrl_safe(reg, &val); if (ret) goto msr_fail; - if (val & ARCH_PERFMON_EVENTSEL_ENABLE) - goto bios_fail; + if (val & ARCH_PERFMON_EVENTSEL_ENABLE) { + bios_fail = 1; + val_fail = val; + reg_fail = reg; + } } if (x86_pmu.num_counters_fixed) { @@ -202,8 +206,11 @@ static bool check_hw_exists(void) if (ret) goto msr_fail; for (i = 0; i < x86_pmu.num_counters_fixed; i++) { - if (val & (0x03 << i*4)) - goto bios_fail; + if (val & (0x03 << i*4)) { + bios_fail = 1; + val_fail = val; + reg_fail = reg; + } } } @@ -221,14 +228,13 @@ static bool check_hw_exists(void) if (ret || val != val_new) goto msr_fail; - return true; - -bios_fail: /* * We still allow the PMU driver to operate: */ - printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n"); - printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val); + if (bios_fail) { + printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n"); + printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg_fail, val_fail); + } return true; -- cgit v1.2.3-59-g8ed1b From 0cf5f4323b1b51ecca3e952f95110e03ea611882 Mon Sep 17 00:00:00 2001 From: Jacob Shin Date: Mon, 15 Apr 2013 12:21:22 -0500 Subject: perf/x86/amd: Remove old-style NB counter support from perf_event_amd.c Support for NB counters, MSRs 0xc0010240 ~ 0xc0010247, got moved to perf_event_amd_uncore.c in the following commit: c43ca5091a37 perf/x86/amd: Add support for AMD NB and L2I "uncore" counters AMD Family 10h NB events (events 0xe0 ~ 0xff, on MSRs 0xc001000 ~ 0xc001007) will still continue to be handled by perf_event_amd.c Signed-off-by: Jacob Shin Acked-by: Peter Zijlstra Cc: Stephane Eranian Cc: Jiri Olsa Cc: Jacob Shin Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/1366046483-1765-2-git-send-email-jacob.shin@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_amd.c | 138 ++--------------------------------- 1 file changed, 5 insertions(+), 133 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index dfdab42aed27..7e28d9467bb4 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -132,14 +132,11 @@ static u64 amd_pmu_event_map(int hw_event) return amd_perfmon_event_map[hw_event]; } -static struct event_constraint *amd_nb_event_constraint; - /* * Previously calculated offsets */ static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly; static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; -static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly; /* * Legacy CPUs: @@ -147,14 +144,10 @@ static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly; * * CPUs with core performance counter extensions: * 6 counters starting at 0xc0010200 each offset by 2 - * - * CPUs with north bridge performance counter extensions: - * 4 additional counters starting at 0xc0010240 each offset by 2 - * (indexed right above either one of the above core counters) */ static inline int amd_pmu_addr_offset(int index, bool eventsel) { - int offset, first, base; + int offset; if (!index) return index; @@ -167,23 +160,7 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) if (offset) return offset; - if (amd_nb_event_constraint && - test_bit(index, amd_nb_event_constraint->idxmsk)) { - /* - * calculate the offset of NB counters with respect to - * base eventsel or perfctr - */ - - first = find_first_bit(amd_nb_event_constraint->idxmsk, - X86_PMC_IDX_MAX); - - if (eventsel) - base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel; - else - base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr; - - offset = base + ((index - first) << 1); - } else if (!cpu_has_perfctr_core) + if (!cpu_has_perfctr_core) offset = index; else offset = index << 1; @@ -196,36 +173,6 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) return offset; } -static inline int amd_pmu_rdpmc_index(int index) -{ - int ret, first; - - if (!index) - return index; - - ret = rdpmc_indexes[index]; - - if (ret) - return ret; - - if (amd_nb_event_constraint && - test_bit(index, amd_nb_event_constraint->idxmsk)) { - /* - * according to the mnual, ECX value of the NB counters is - * the index of the NB counter (0, 1, 2 or 3) plus 6 - */ - - first = find_first_bit(amd_nb_event_constraint->idxmsk, - X86_PMC_IDX_MAX); - ret = index - first + 6; - } else - ret = index; - - rdpmc_indexes[index] = ret; - - return ret; -} - static int amd_core_hw_config(struct perf_event *event) { if (event->attr.exclude_host && event->attr.exclude_guest) @@ -244,34 +191,6 @@ static int amd_core_hw_config(struct perf_event *event) return 0; } -/* - * NB counters do not support the following event select bits: - * Host/Guest only - * Counter mask - * Invert counter mask - * Edge detect - * OS/User mode - */ -static int amd_nb_hw_config(struct perf_event *event) -{ - /* for NB, we only allow system wide counting mode */ - if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) - return -EINVAL; - - if (event->attr.exclude_user || event->attr.exclude_kernel || - event->attr.exclude_host || event->attr.exclude_guest) - return -EINVAL; - - event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | - ARCH_PERFMON_EVENTSEL_OS); - - if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB | - ARCH_PERFMON_EVENTSEL_INT)) - return -EINVAL; - - return 0; -} - /* * AMD64 events are detected based on their event codes. */ @@ -285,11 +204,6 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc) return (hwc->config & 0xe0) == 0xe0; } -static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc) -{ - return amd_nb_event_constraint && amd_is_nb_event(hwc); -} - static inline int amd_has_nb(struct cpu_hw_events *cpuc) { struct amd_nb *nb = cpuc->amd_nb; @@ -315,9 +229,6 @@ static int amd_pmu_hw_config(struct perf_event *event) if (event->attr.type == PERF_TYPE_RAW) event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; - if (amd_is_perfctr_nb_event(&event->hw)) - return amd_nb_hw_config(event); - return amd_core_hw_config(event); } @@ -341,19 +252,6 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, } } -static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc) -{ - int core_id = cpu_data(smp_processor_id()).cpu_core_id; - - /* deliver interrupts only to this core */ - if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) { - hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE; - hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK; - hwc->config |= (u64)(core_id) << - AMD64_EVENTSEL_INT_CORE_SEL_SHIFT; - } -} - /* * AMD64 NorthBridge events need special treatment because * counter access needs to be synchronized across all cores @@ -441,9 +339,6 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev if (new == -1) return &emptyconstraint; - if (amd_is_perfctr_nb_event(hwc)) - amd_nb_interrupt_hw_config(hwc); - return &nb->event_constraints[new]; } @@ -543,8 +438,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))) return &unconstrained; - return __amd_get_nb_event_constraints(cpuc, event, - amd_nb_event_constraint); + return __amd_get_nb_event_constraints(cpuc, event, NULL); } static void amd_put_event_constraints(struct cpu_hw_events *cpuc, @@ -643,9 +537,6 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); -static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0); -static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0); - static struct event_constraint * amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) { @@ -711,8 +602,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev return &amd_f15_PMC20; } case AMD_EVENT_NB: - return __amd_get_nb_event_constraints(cpuc, event, - amd_nb_event_constraint); + /* moved to perf_event_amd_uncore.c */ + return &emptyconstraint; default: return &emptyconstraint; } @@ -738,7 +629,6 @@ static __initconst const struct x86_pmu amd_pmu = { .eventsel = MSR_K7_EVNTSEL0, .perfctr = MSR_K7_PERFCTR0, .addr_offset = amd_pmu_addr_offset, - .rdpmc_index = amd_pmu_rdpmc_index, .event_map = amd_pmu_event_map, .max_events = ARRAY_SIZE(amd_perfmon_event_map), .num_counters = AMD64_NUM_COUNTERS, @@ -790,23 +680,6 @@ static int setup_perfctr_core(void) return 0; } -static int setup_perfctr_nb(void) -{ - if (!cpu_has_perfctr_nb) - return -ENODEV; - - x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB; - - if (cpu_has_perfctr_core) - amd_nb_event_constraint = &amd_NBPMC96; - else - amd_nb_event_constraint = &amd_NBPMC74; - - printk(KERN_INFO "perf: AMD northbridge performance counters detected\n"); - - return 0; -} - __init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ @@ -817,7 +690,6 @@ __init int amd_pmu_init(void) setup_event_constraints(); setup_perfctr_core(); - setup_perfctr_nb(); /* Events are common for all AMDs */ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, -- cgit v1.2.3-59-g8ed1b From 94f4db3590893c600506105b88dab581c7f6f5c8 Mon Sep 17 00:00:00 2001 From: Jacob Shin Date: Sun, 21 Apr 2013 13:06:27 -0500 Subject: perf/x86/amd: Fix AMD NB and L2I "uncore" support Borislav Petkov reported a lockdep splat warning about kzalloc() done in an IPI (hardirq) handler. This is a real bug, do not call kzalloc() in a smp_call_function_single() handler because it can schedule and crash. Reported-by: Borislav Petkov Signed-off-by: Jacob Shin Tested-by: Borislav Petkov Cc: Borislav Petkov Cc: Cc: Cc: Cc: Link: http://lkml.kernel.org/r/20130421180627.GA21049@jshin-Toonie Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_amd_uncore.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c index 6dc62273639c..c0c661adf03e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c @@ -498,7 +498,6 @@ static void __init init_cpu_already_online(void *dummy) { unsigned int cpu = smp_processor_id(); - amd_uncore_cpu_up_prepare(cpu); amd_uncore_cpu_starting(cpu); amd_uncore_cpu_online(cpu); } @@ -535,8 +534,10 @@ static int __init amd_uncore_init(void) get_online_cpus(); /* init cpus already online before registering for hotplug notifier */ - for_each_online_cpu(cpu) + for_each_online_cpu(cpu) { + amd_uncore_cpu_up_prepare(cpu); smp_call_function_single(cpu, init_cpu_already_online, NULL, 1); + } register_cpu_notifier(&amd_uncore_cpu_notifier_block); put_online_cpus(); -- cgit v1.2.3-59-g8ed1b From 5ac2b5c2721501a8f5c5e1cd4116cbc31ace6886 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 24 Apr 2013 09:26:30 +0200 Subject: perf/x86/intel/P4: Robistify P4 PMU types Linus found, while extending integer type extension checks in the sparse static code checker, various fragile patterns of mixed signed/unsigned 64-bit/32-bit integer use in perf_events_p4.c. The relevant hardware register ABI is 64 bit wide on 32-bit kernels as well, so clean it all up a bit, remove unnecessary casts, and make sure we use 64-bit unsigned integers in these places. [ Unfortunately this patch was not tested on real P4 hardware, those are pretty rare already. If this patch causes any problems on P4 hardware then please holler ... ] Reported-by: Linus Torvalds Signed-off-by: Ingo Molnar Cc: David Miller Cc: Theodore Ts'o Cc: Oleg Nesterov Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20130424072630.GB1780@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event_p4.h | 62 ++++++++++++++++++------------------ arch/x86/kernel/cpu/perf_event_p4.c | 9 +++--- 2 files changed, 35 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 4f7e67e2345e..85e13ccf15c4 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -24,45 +24,45 @@ #define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1) #define ARCH_P4_UNFLAGGED_BIT ((1ULL) << (ARCH_P4_CNTRVAL_BITS - 1)) -#define P4_ESCR_EVENT_MASK 0x7e000000U +#define P4_ESCR_EVENT_MASK 0x7e000000ULL #define P4_ESCR_EVENT_SHIFT 25 -#define P4_ESCR_EVENTMASK_MASK 0x01fffe00U +#define P4_ESCR_EVENTMASK_MASK 0x01fffe00ULL #define P4_ESCR_EVENTMASK_SHIFT 9 -#define P4_ESCR_TAG_MASK 0x000001e0U +#define P4_ESCR_TAG_MASK 0x000001e0ULL #define P4_ESCR_TAG_SHIFT 5 -#define P4_ESCR_TAG_ENABLE 0x00000010U -#define P4_ESCR_T0_OS 0x00000008U -#define P4_ESCR_T0_USR 0x00000004U -#define P4_ESCR_T1_OS 0x00000002U -#define P4_ESCR_T1_USR 0x00000001U +#define P4_ESCR_TAG_ENABLE 0x00000010ULL +#define P4_ESCR_T0_OS 0x00000008ULL +#define P4_ESCR_T0_USR 0x00000004ULL +#define P4_ESCR_T1_OS 0x00000002ULL +#define P4_ESCR_T1_USR 0x00000001ULL #define P4_ESCR_EVENT(v) ((v) << P4_ESCR_EVENT_SHIFT) #define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) #define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) -#define P4_CCCR_OVF 0x80000000U -#define P4_CCCR_CASCADE 0x40000000U -#define P4_CCCR_OVF_PMI_T0 0x04000000U -#define P4_CCCR_OVF_PMI_T1 0x08000000U -#define P4_CCCR_FORCE_OVF 0x02000000U -#define P4_CCCR_EDGE 0x01000000U -#define P4_CCCR_THRESHOLD_MASK 0x00f00000U +#define P4_CCCR_OVF 0x80000000ULL +#define P4_CCCR_CASCADE 0x40000000ULL +#define P4_CCCR_OVF_PMI_T0 0x04000000ULL +#define P4_CCCR_OVF_PMI_T1 0x08000000ULL +#define P4_CCCR_FORCE_OVF 0x02000000ULL +#define P4_CCCR_EDGE 0x01000000ULL +#define P4_CCCR_THRESHOLD_MASK 0x00f00000ULL #define P4_CCCR_THRESHOLD_SHIFT 20 -#define P4_CCCR_COMPLEMENT 0x00080000U -#define P4_CCCR_COMPARE 0x00040000U -#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U +#define P4_CCCR_COMPLEMENT 0x00080000ULL +#define P4_CCCR_COMPARE 0x00040000ULL +#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000ULL #define P4_CCCR_ESCR_SELECT_SHIFT 13 -#define P4_CCCR_ENABLE 0x00001000U -#define P4_CCCR_THREAD_SINGLE 0x00010000U -#define P4_CCCR_THREAD_BOTH 0x00020000U -#define P4_CCCR_THREAD_ANY 0x00030000U -#define P4_CCCR_RESERVED 0x00000fffU +#define P4_CCCR_ENABLE 0x00001000ULL +#define P4_CCCR_THREAD_SINGLE 0x00010000ULL +#define P4_CCCR_THREAD_BOTH 0x00020000ULL +#define P4_CCCR_THREAD_ANY 0x00030000ULL +#define P4_CCCR_RESERVED 0x00000fffULL #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) #define P4_GEN_ESCR_EMASK(class, name, bit) \ - class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) + class##__##name = ((1ULL << bit) << P4_ESCR_EVENTMASK_SHIFT) #define P4_ESCR_EMASK_BIT(class, name) class##__##name /* @@ -107,7 +107,7 @@ * P4_PEBS_CONFIG_MASK and related bits on * modification.) */ -#define P4_CONFIG_ALIASABLE (1 << 9) +#define P4_CONFIG_ALIASABLE (1ULL << 9) /* * The bits we allow to pass for RAW events @@ -784,17 +784,17 @@ enum P4_ESCR_EMASKS { * Note we have UOP and PEBS bits reserved for now * just in case if we will need them once */ -#define P4_PEBS_CONFIG_ENABLE (1 << 7) -#define P4_PEBS_CONFIG_UOP_TAG (1 << 8) -#define P4_PEBS_CONFIG_METRIC_MASK 0x3f -#define P4_PEBS_CONFIG_MASK 0xff +#define P4_PEBS_CONFIG_ENABLE (1ULL << 7) +#define P4_PEBS_CONFIG_UOP_TAG (1ULL << 8) +#define P4_PEBS_CONFIG_METRIC_MASK 0x3FLL +#define P4_PEBS_CONFIG_MASK 0xFFLL /* * mem: Only counters MSR_IQ_COUNTER4 (16) and * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling */ -#define P4_PEBS_ENABLE 0x02000000U -#define P4_PEBS_ENABLE_UOP_TAG 0x01000000U +#define P4_PEBS_ENABLE 0x02000000ULL +#define P4_PEBS_ENABLE_UOP_TAG 0x01000000ULL #define p4_config_unpack_metric(v) (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK) #define p4_config_unpack_pebs(v) (((u64)(v)) & P4_PEBS_CONFIG_MASK) diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 92c7e39a079f..3486e6660357 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -895,8 +895,8 @@ static void p4_pmu_disable_pebs(void) * So at moment let leave metrics turned on forever -- it's * ok for now but need to be revisited! * - * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)0); - * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)0); + * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, 0); + * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, 0); */ } @@ -910,8 +910,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event) * asserted again and again */ (void)wrmsrl_safe(hwc->config_base, - (u64)(p4_config_unpack_cccr(hwc->config)) & - ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); + p4_config_unpack_cccr(hwc->config) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); } static void p4_pmu_disable_all(void) @@ -957,7 +956,7 @@ static void p4_pmu_enable_event(struct perf_event *event) u64 escr_addr, cccr; bind = &p4_event_bind_map[idx]; - escr_addr = (u64)bind->escr_msr[thread]; + escr_addr = bind->escr_msr[thread]; /* * - we dont support cascaded counters yet -- cgit v1.2.3-59-g8ed1b