aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/events
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/events')
-rw-r--r--arch/x86/events/Makefile13
-rw-r--r--arch/x86/events/amd/Makefile7
-rw-r--r--arch/x86/events/amd/ibs.c2
-rw-r--r--arch/x86/events/amd/uncore.c204
-rw-r--r--arch/x86/events/intel/core.c7
-rw-r--r--arch/x86/events/intel/cstate.c3
-rw-r--r--arch/x86/events/intel/pt.c6
-rw-r--r--arch/x86/events/intel/rapl.c63
-rw-r--r--arch/x86/events/intel/uncore.c234
9 files changed, 279 insertions, 260 deletions
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
index 1d392c39fe56..b8ccdb5c9244 100644
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile
@@ -1,11 +1,4 @@
-obj-y += core.o
-
-obj-$(CONFIG_CPU_SUP_AMD) += amd/core.o amd/uncore.o
-obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += amd/power.o
-obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o
-ifdef CONFIG_AMD_IOMMU
-obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o
-endif
-
-obj-$(CONFIG_CPU_SUP_INTEL) += msr.o
+obj-y += core.o
+obj-y += amd/
+obj-$(CONFIG_X86_LOCAL_APIC) += msr.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel/
diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile
new file mode 100644
index 000000000000..b1da46f396e0
--- /dev/null
+++ b/arch/x86/events/amd/Makefile
@@ -0,0 +1,7 @@
+obj-$(CONFIG_CPU_SUP_AMD) += core.o uncore.o
+obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o
+obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o
+ifdef CONFIG_AMD_IOMMU
+obj-$(CONFIG_CPU_SUP_AMD) += iommu.o
+endif
+
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 05612a2529c8..496e60391fac 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -1010,7 +1010,7 @@ static __init int amd_ibs_init(void)
* all online cpus.
*/
cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
- "perf/x86/amd/ibs:STARTING",
+ "perf/x86/amd/ibs:starting",
x86_pmu_amd_ibs_starting_cpu,
x86_pmu_amd_ibs_dying_cpu);
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index a0b1bdb3ad42..4d1f7f2d9aff 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -22,13 +22,17 @@
#define NUM_COUNTERS_NB 4
#define NUM_COUNTERS_L2 4
-#define MAX_COUNTERS NUM_COUNTERS_NB
+#define NUM_COUNTERS_L3 6
+#define MAX_COUNTERS 6
#define RDPMC_BASE_NB 6
-#define RDPMC_BASE_L2 10
+#define RDPMC_BASE_LLC 10
#define COUNTER_SHIFT 16
+static int num_counters_llc;
+static int num_counters_nb;
+
static HLIST_HEAD(uncore_unused_list);
struct amd_uncore {
@@ -45,30 +49,30 @@ struct amd_uncore {
};
static struct amd_uncore * __percpu *amd_uncore_nb;
-static struct amd_uncore * __percpu *amd_uncore_l2;
+static struct amd_uncore * __percpu *amd_uncore_llc;
static struct pmu amd_nb_pmu;
-static struct pmu amd_l2_pmu;
+static struct pmu amd_llc_pmu;
static cpumask_t amd_nb_active_mask;
-static cpumask_t amd_l2_active_mask;
+static cpumask_t amd_llc_active_mask;
static bool is_nb_event(struct perf_event *event)
{
return event->pmu->type == amd_nb_pmu.type;
}
-static bool is_l2_event(struct perf_event *event)
+static bool is_llc_event(struct perf_event *event)
{
- return event->pmu->type == amd_l2_pmu.type;
+ return event->pmu->type == amd_llc_pmu.type;
}
static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
{
if (is_nb_event(event) && amd_uncore_nb)
return *per_cpu_ptr(amd_uncore_nb, event->cpu);
- else if (is_l2_event(event) && amd_uncore_l2)
- return *per_cpu_ptr(amd_uncore_l2, event->cpu);
+ else if (is_llc_event(event) && amd_uncore_llc)
+ return *per_cpu_ptr(amd_uncore_llc, event->cpu);
return NULL;
}
@@ -183,16 +187,16 @@ static int amd_uncore_event_init(struct perf_event *event)
return -ENOENT;
/*
- * NB and L2 counters (MSRs) are shared across all cores that share the
- * same NB / L2 cache. Interrupts can be directed to a single target
- * core, however, event counts generated by processes running on other
- * cores cannot be masked out. So we do not support sampling and
- * per-thread events.
+ * NB and Last level cache counters (MSRs) are shared across all cores
+ * that share the same NB / Last level cache. Interrupts can be directed
+ * to a single target core, however, event counts generated by processes
+ * running on other cores cannot be masked out. So we do not support
+ * sampling and per-thread events.
*/
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
return -EINVAL;
- /* NB and L2 counters do not have usr/os/guest/host bits */
+ /* NB and Last level cache counters do not have usr/os/guest/host bits */
if (event->attr.exclude_user || event->attr.exclude_kernel ||
event->attr.exclude_host || event->attr.exclude_guest)
return -EINVAL;
@@ -226,8 +230,8 @@ static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
if (pmu->type == amd_nb_pmu.type)
active_mask = &amd_nb_active_mask;
- else if (pmu->type == amd_l2_pmu.type)
- active_mask = &amd_l2_active_mask;
+ else if (pmu->type == amd_llc_pmu.type)
+ active_mask = &amd_llc_active_mask;
else
return 0;
@@ -244,30 +248,47 @@ static struct attribute_group amd_uncore_attr_group = {
.attrs = amd_uncore_attrs,
};
-PMU_FORMAT_ATTR(event, "config:0-7,32-35");
-PMU_FORMAT_ATTR(umask, "config:8-15");
-
-static struct attribute *amd_uncore_format_attr[] = {
- &format_attr_event.attr,
- &format_attr_umask.attr,
- NULL,
-};
-
-static struct attribute_group amd_uncore_format_group = {
- .name = "format",
- .attrs = amd_uncore_format_attr,
+/*
+ * Similar to PMU_FORMAT_ATTR but allowing for format_attr to be assigned based
+ * on family
+ */
+#define AMD_FORMAT_ATTR(_dev, _name, _format) \
+static ssize_t \
+_dev##_show##_name(struct device *dev, \
+ struct device_attribute *attr, \
+ char *page) \
+{ \
+ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
+ return sprintf(page, _format "\n"); \
+} \
+static struct device_attribute format_attr_##_dev##_name = __ATTR_RO(_dev);
+
+/* Used for each uncore counter type */
+#define AMD_ATTRIBUTE(_name) \
+static struct attribute *amd_uncore_format_attr_##_name[] = { \
+ &format_attr_event_##_name.attr, \
+ &format_attr_umask.attr, \
+ NULL, \
+}; \
+static struct attribute_group amd_uncore_format_group_##_name = { \
+ .name = "format", \
+ .attrs = amd_uncore_format_attr_##_name, \
+}; \
+static const struct attribute_group *amd_uncore_attr_groups_##_name[] = { \
+ &amd_uncore_attr_group, \
+ &amd_uncore_format_group_##_name, \
+ NULL, \
};
-static const struct attribute_group *amd_uncore_attr_groups[] = {
- &amd_uncore_attr_group,
- &amd_uncore_format_group,
- NULL,
-};
+AMD_FORMAT_ATTR(event, , "config:0-7,32-35");
+AMD_FORMAT_ATTR(umask, , "config:8-15");
+AMD_FORMAT_ATTR(event, _df, "config:0-7,32-35,59-60");
+AMD_FORMAT_ATTR(event, _l3, "config:0-7");
+AMD_ATTRIBUTE(df);
+AMD_ATTRIBUTE(l3);
static struct pmu amd_nb_pmu = {
.task_ctx_nr = perf_invalid_context,
- .attr_groups = amd_uncore_attr_groups,
- .name = "amd_nb",
.event_init = amd_uncore_event_init,
.add = amd_uncore_add,
.del = amd_uncore_del,
@@ -276,10 +297,8 @@ static struct pmu amd_nb_pmu = {
.read = amd_uncore_read,
};
-static struct pmu amd_l2_pmu = {
+static struct pmu amd_llc_pmu = {
.task_ctx_nr = perf_invalid_context,
- .attr_groups = amd_uncore_attr_groups,
- .name = "amd_l2",
.event_init = amd_uncore_event_init,
.add = amd_uncore_add,
.del = amd_uncore_del,
@@ -296,14 +315,14 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
static int amd_uncore_cpu_up_prepare(unsigned int cpu)
{
- struct amd_uncore *uncore_nb = NULL, *uncore_l2;
+ struct amd_uncore *uncore_nb = NULL, *uncore_llc;
if (amd_uncore_nb) {
uncore_nb = amd_uncore_alloc(cpu);
if (!uncore_nb)
goto fail;
uncore_nb->cpu = cpu;
- uncore_nb->num_counters = NUM_COUNTERS_NB;
+ uncore_nb->num_counters = num_counters_nb;
uncore_nb->rdpmc_base = RDPMC_BASE_NB;
uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
uncore_nb->active_mask = &amd_nb_active_mask;
@@ -312,18 +331,18 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
}
- if (amd_uncore_l2) {
- uncore_l2 = amd_uncore_alloc(cpu);
- if (!uncore_l2)
+ if (amd_uncore_llc) {
+ uncore_llc = amd_uncore_alloc(cpu);
+ if (!uncore_llc)
goto fail;
- uncore_l2->cpu = cpu;
- uncore_l2->num_counters = NUM_COUNTERS_L2;
- uncore_l2->rdpmc_base = RDPMC_BASE_L2;
- uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
- uncore_l2->active_mask = &amd_l2_active_mask;
- uncore_l2->pmu = &amd_l2_pmu;
- uncore_l2->id = -1;
- *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
+ uncore_llc->cpu = cpu;
+ uncore_llc->num_counters = num_counters_llc;
+ uncore_llc->rdpmc_base = RDPMC_BASE_LLC;
+ uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL;
+ uncore_llc->active_mask = &amd_llc_active_mask;
+ uncore_llc->pmu = &amd_llc_pmu;
+ uncore_llc->id = -1;
+ *per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc;
}
return 0;
@@ -376,17 +395,17 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
*per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
}
- if (amd_uncore_l2) {
+ if (amd_uncore_llc) {
unsigned int apicid = cpu_data(cpu).apicid;
unsigned int nshared;
- uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
+ uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
nshared = ((eax >> 14) & 0xfff) + 1;
uncore->id = apicid - (apicid % nshared);
- uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
- *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
+ uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
+ *per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
}
return 0;
@@ -419,8 +438,8 @@ static int amd_uncore_cpu_online(unsigned int cpu)
if (amd_uncore_nb)
uncore_online(cpu, amd_uncore_nb);
- if (amd_uncore_l2)
- uncore_online(cpu, amd_uncore_l2);
+ if (amd_uncore_llc)
+ uncore_online(cpu, amd_uncore_llc);
return 0;
}
@@ -456,8 +475,8 @@ static int amd_uncore_cpu_down_prepare(unsigned int cpu)
if (amd_uncore_nb)
uncore_down_prepare(cpu, amd_uncore_nb);
- if (amd_uncore_l2)
- uncore_down_prepare(cpu, amd_uncore_l2);
+ if (amd_uncore_llc)
+ uncore_down_prepare(cpu, amd_uncore_llc);
return 0;
}
@@ -479,8 +498,8 @@ static int amd_uncore_cpu_dead(unsigned int cpu)
if (amd_uncore_nb)
uncore_dead(cpu, amd_uncore_nb);
- if (amd_uncore_l2)
- uncore_dead(cpu, amd_uncore_l2);
+ if (amd_uncore_llc)
+ uncore_dead(cpu, amd_uncore_llc);
return 0;
}
@@ -492,6 +511,47 @@ static int __init amd_uncore_init(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
goto fail_nodev;
+ switch(boot_cpu_data.x86) {
+ case 23:
+ /* Family 17h: */
+ num_counters_nb = NUM_COUNTERS_NB;
+ num_counters_llc = NUM_COUNTERS_L3;
+ /*
+ * For Family17h, the NorthBridge counters are
+ * re-purposed as Data Fabric counters. Also, support is
+ * added for L3 counters. The pmus are exported based on
+ * family as either L2 or L3 and NB or DF.
+ */
+ amd_nb_pmu.name = "amd_df";
+ amd_llc_pmu.name = "amd_l3";
+ format_attr_event_df.show = &event_show_df;
+ format_attr_event_l3.show = &event_show_l3;
+ break;
+ case 22:
+ /* Family 16h - may change: */
+ num_counters_nb = NUM_COUNTERS_NB;
+ num_counters_llc = NUM_COUNTERS_L2;
+ amd_nb_pmu.name = "amd_nb";
+ amd_llc_pmu.name = "amd_l2";
+ format_attr_event_df = format_attr_event;
+ format_attr_event_l3 = format_attr_event;
+ break;
+ default:
+ /*
+ * All prior families have the same number of
+ * NorthBridge and Last Level Cache counters
+ */
+ num_counters_nb = NUM_COUNTERS_NB;
+ num_counters_llc = NUM_COUNTERS_L2;
+ amd_nb_pmu.name = "amd_nb";
+ amd_llc_pmu.name = "amd_l2";
+ format_attr_event_df = format_attr_event;
+ format_attr_event_l3 = format_attr_event;
+ break;
+ }
+ amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
+ amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
+
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
goto fail_nodev;
@@ -510,16 +570,16 @@ static int __init amd_uncore_init(void)
}
if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
- amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
- if (!amd_uncore_l2) {
+ amd_uncore_llc = alloc_percpu(struct amd_uncore *);
+ if (!amd_uncore_llc) {
ret = -ENOMEM;
- goto fail_l2;
+ goto fail_llc;
}
- ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
+ ret = perf_pmu_register(&amd_llc_pmu, amd_llc_pmu.name, -1);
if (ret)
- goto fail_l2;
+ goto fail_llc;
- pr_info("perf: AMD L2I counters detected\n");
+ pr_info("perf: AMD LLC counters detected\n");
ret = 0;
}
@@ -529,7 +589,7 @@ static int __init amd_uncore_init(void)
if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
"perf/x86/amd/uncore:prepare",
amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead))
- goto fail_l2;
+ goto fail_llc;
if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
"perf/x86/amd/uncore:starting",
@@ -546,11 +606,11 @@ fail_start:
cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
fail_prep:
cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
-fail_l2:
+fail_llc:
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
perf_pmu_unregister(&amd_nb_pmu);
- if (amd_uncore_l2)
- free_percpu(amd_uncore_l2);
+ if (amd_uncore_llc)
+ free_percpu(amd_uncore_llc);
fail_nb:
if (amd_uncore_nb)
free_percpu(amd_uncore_nb);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index d611cab214a6..eb1484c86bb4 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3176,13 +3176,16 @@ static void intel_pmu_cpu_starting(int cpu)
if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
for_each_cpu(i, topology_sibling_cpumask(cpu)) {
+ struct cpu_hw_events *sibling;
struct intel_excl_cntrs *c;
- c = per_cpu(cpu_hw_events, i).excl_cntrs;
+ sibling = &per_cpu(cpu_hw_events, i);
+ c = sibling->excl_cntrs;
if (c && c->core_id == core_id) {
cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
cpuc->excl_cntrs = c;
- cpuc->excl_thread_id = 1;
+ if (!sibling->excl_thread_id)
+ cpuc->excl_thread_id = 1;
break;
}
}
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 1076c9a77292..aff4b5b69d40 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -541,6 +541,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates),
+
X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
{ },
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 1c1b9fe705c8..5900471ee508 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -99,18 +99,24 @@ static struct attribute_group pt_cap_group = {
};
PMU_FORMAT_ATTR(cyc, "config:1" );
+PMU_FORMAT_ATTR(pwr_evt, "config:4" );
+PMU_FORMAT_ATTR(fup_on_ptw, "config:5" );
PMU_FORMAT_ATTR(mtc, "config:9" );
PMU_FORMAT_ATTR(tsc, "config:10" );
PMU_FORMAT_ATTR(noretcomp, "config:11" );
+PMU_FORMAT_ATTR(ptw, "config:12" );
PMU_FORMAT_ATTR(mtc_period, "config:14-17" );
PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" );
PMU_FORMAT_ATTR(psb_period, "config:24-27" );
static struct attribute *pt_formats_attr[] = {
&format_attr_cyc.attr,
+ &format_attr_pwr_evt.attr,
+ &format_attr_fup_on_ptw.attr,
&format_attr_mtc.attr,
&format_attr_tsc.attr,
&format_attr_noretcomp.attr,
+ &format_attr_ptw.attr,
&format_attr_mtc_period.attr,
&format_attr_cyc_thresh.attr,
&format_attr_psb_period.attr,
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 17c3564d087a..22054ca49026 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -161,7 +161,13 @@ static u64 rapl_timer_ms;
static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
{
- return rapl_pmus->pmus[topology_logical_package_id(cpu)];
+ unsigned int pkgid = topology_logical_package_id(cpu);
+
+ /*
+ * The unsigned check also catches the '-1' return value for non
+ * existent mappings in the topology map.
+ */
+ return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL;
}
static inline u64 rapl_read_counter(struct perf_event *event)
@@ -402,6 +408,8 @@ static int rapl_pmu_event_init(struct perf_event *event)
/* must be done before validate_group */
pmu = cpu_to_rapl_pmu(event->cpu);
+ if (!pmu)
+ return -EINVAL;
event->cpu = pmu->cpu;
event->pmu_private = pmu;
event->hw.event_base = msr;
@@ -585,6 +593,20 @@ static int rapl_cpu_online(unsigned int cpu)
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
int target;
+ if (!pmu) {
+ pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
+ if (!pmu)
+ return -ENOMEM;
+
+ raw_spin_lock_init(&pmu->lock);
+ INIT_LIST_HEAD(&pmu->active_list);
+ pmu->pmu = &rapl_pmus->pmu;
+ pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+ rapl_hrtimer_init(pmu);
+
+ rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
+ }
+
/*
* Check if there is an online cpu in the package which collects rapl
* events already.
@@ -598,27 +620,6 @@ static int rapl_cpu_online(unsigned int cpu)
return 0;
}
-static int rapl_cpu_prepare(unsigned int cpu)
-{
- struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
-
- if (pmu)
- return 0;
-
- pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
- if (!pmu)
- return -ENOMEM;
-
- raw_spin_lock_init(&pmu->lock);
- INIT_LIST_HEAD(&pmu->active_list);
- pmu->pmu = &rapl_pmus->pmu;
- pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
- pmu->cpu = -1;
- rapl_hrtimer_init(pmu);
- rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
- return 0;
-}
-
static int rapl_check_hw_unit(bool apply_quirk)
{
u64 msr_rapl_power_unit_bits;
@@ -770,6 +771,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init),
+
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
{},
};
@@ -803,29 +807,21 @@ static int __init rapl_pmu_init(void)
/*
* Install callbacks. Core will call them for each online cpu.
*/
-
- ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "perf/x86/rapl:prepare",
- rapl_cpu_prepare, NULL);
- if (ret)
- goto out;
-
ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
"perf/x86/rapl:online",
rapl_cpu_online, rapl_cpu_offline);
if (ret)
- goto out1;
+ goto out;
ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
if (ret)
- goto out2;
+ goto out1;
rapl_advertise();
return 0;
-out2:
- cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
out1:
- cpuhp_remove_state(CPUHP_PERF_X86_RAPL_PREP);
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
out:
pr_warn("Initialization failed (%d), disabled\n", ret);
cleanup_rapl_pmus();
@@ -836,7 +832,6 @@ module_init(rapl_pmu_init);
static void __exit intel_rapl_exit(void)
{
cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
- cpuhp_remove_state_nocalls(CPUHP_PERF_X86_RAPL_PREP);
perf_pmu_unregister(&rapl_pmus->pmu);
cleanup_rapl_pmus();
}
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 8c4ccdc3a3f3..758c1aa5009d 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -100,7 +100,13 @@ ssize_t uncore_event_show(struct kobject *kobj,
struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
{
- return pmu->boxes[topology_logical_package_id(cpu)];
+ unsigned int pkgid = topology_logical_package_id(cpu);
+
+ /*
+ * The unsigned check also catches the '-1' return value for non
+ * existent mappings in the topology map.
+ */
+ return pkgid < max_packages ? pmu->boxes[pkgid] : NULL;
}
u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
@@ -764,30 +770,6 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
pmu->registered = false;
}
-static void __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
-{
- struct intel_uncore_pmu *pmu = type->pmus;
- struct intel_uncore_box *box;
- int i, pkg;
-
- if (pmu) {
- pkg = topology_physical_package_id(cpu);
- for (i = 0; i < type->num_boxes; i++, pmu++) {
- box = pmu->boxes[pkg];
- if (box)
- uncore_box_exit(box);
- }
- }
-}
-
-static void uncore_exit_boxes(void *dummy)
-{
- struct intel_uncore_type **types;
-
- for (types = uncore_msr_uncores; *types; types++)
- __uncore_exit_boxes(*types++, smp_processor_id());
-}
-
static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
{
int pkg;
@@ -1058,86 +1040,6 @@ static void uncore_pci_exit(void)
}
}
-static int uncore_cpu_dying(unsigned int cpu)
-{
- struct intel_uncore_type *type, **types = uncore_msr_uncores;
- struct intel_uncore_pmu *pmu;
- struct intel_uncore_box *box;
- int i, pkg;
-
- pkg = topology_logical_package_id(cpu);
- for (; *types; types++) {
- type = *types;
- pmu = type->pmus;
- for (i = 0; i < type->num_boxes; i++, pmu++) {
- box = pmu->boxes[pkg];
- if (box && atomic_dec_return(&box->refcnt) == 0)
- uncore_box_exit(box);
- }
- }
- return 0;
-}
-
-static int first_init;
-
-static int uncore_cpu_starting(unsigned int cpu)
-{
- struct intel_uncore_type *type, **types = uncore_msr_uncores;
- struct intel_uncore_pmu *pmu;
- struct intel_uncore_box *box;
- int i, pkg, ncpus = 1;
-
- if (first_init) {
- /*
- * On init we get the number of online cpus in the package
- * and set refcount for all of them.
- */
- ncpus = cpumask_weight(topology_core_cpumask(cpu));
- }
-
- pkg = topology_logical_package_id(cpu);
- for (; *types; types++) {
- type = *types;
- pmu = type->pmus;
- for (i = 0; i < type->num_boxes; i++, pmu++) {
- box = pmu->boxes[pkg];
- if (!box)
- continue;
- /* The first cpu on a package activates the box */
- if (atomic_add_return(ncpus, &box->refcnt) == ncpus)
- uncore_box_init(box);
- }
- }
-
- return 0;
-}
-
-static int uncore_cpu_prepare(unsigned int cpu)
-{
- struct intel_uncore_type *type, **types = uncore_msr_uncores;
- struct intel_uncore_pmu *pmu;
- struct intel_uncore_box *box;
- int i, pkg;
-
- pkg = topology_logical_package_id(cpu);
- for (; *types; types++) {
- type = *types;
- pmu = type->pmus;
- for (i = 0; i < type->num_boxes; i++, pmu++) {
- if (pmu->boxes[pkg])
- continue;
- /* First cpu of a package allocates the box */
- box = uncore_alloc_box(type, cpu_to_node(cpu));
- if (!box)
- return -ENOMEM;
- box->pmu = pmu;
- box->pkgid = pkg;
- pmu->boxes[pkg] = box;
- }
- }
- return 0;
-}
-
static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
int new_cpu)
{
@@ -1177,12 +1079,14 @@ static void uncore_change_context(struct intel_uncore_type **uncores,
static int uncore_event_cpu_offline(unsigned int cpu)
{
- int target;
+ struct intel_uncore_type *type, **types = uncore_msr_uncores;
+ struct intel_uncore_pmu *pmu;
+ struct intel_uncore_box *box;
+ int i, pkg, target;
/* Check if exiting cpu is used for collecting uncore events */
if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
- return 0;
-
+ goto unref;
/* Find a new cpu to collect uncore events */
target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
@@ -1194,12 +1098,82 @@ static int uncore_event_cpu_offline(unsigned int cpu)
uncore_change_context(uncore_msr_uncores, cpu, target);
uncore_change_context(uncore_pci_uncores, cpu, target);
+
+unref:
+ /* Clear the references */
+ pkg = topology_logical_package_id(cpu);
+ for (; *types; types++) {
+ type = *types;
+ pmu = type->pmus;
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ box = pmu->boxes[pkg];
+ if (box && atomic_dec_return(&box->refcnt) == 0)
+ uncore_box_exit(box);
+ }
+ }
return 0;
}
+static int allocate_boxes(struct intel_uncore_type **types,
+ unsigned int pkg, unsigned int cpu)
+{
+ struct intel_uncore_box *box, *tmp;
+ struct intel_uncore_type *type;
+ struct intel_uncore_pmu *pmu;
+ LIST_HEAD(allocated);
+ int i;
+
+ /* Try to allocate all required boxes */
+ for (; *types; types++) {
+ type = *types;
+ pmu = type->pmus;
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ if (pmu->boxes[pkg])
+ continue;
+ box = uncore_alloc_box(type, cpu_to_node(cpu));
+ if (!box)
+ goto cleanup;
+ box->pmu = pmu;
+ box->pkgid = pkg;
+ list_add(&box->active_list, &allocated);
+ }
+ }
+ /* Install them in the pmus */
+ list_for_each_entry_safe(box, tmp, &allocated, active_list) {
+ list_del_init(&box->active_list);
+ box->pmu->boxes[pkg] = box;
+ }
+ return 0;
+
+cleanup:
+ list_for_each_entry_safe(box, tmp, &allocated, active_list) {
+ list_del_init(&box->active_list);
+ kfree(box);
+ }
+ return -ENOMEM;
+}
+
static int uncore_event_cpu_online(unsigned int cpu)
{
- int target;
+ struct intel_uncore_type *type, **types = uncore_msr_uncores;
+ struct intel_uncore_pmu *pmu;
+ struct intel_uncore_box *box;
+ int i, ret, pkg, target;
+
+ pkg = topology_logical_package_id(cpu);
+ ret = allocate_boxes(types, pkg, cpu);
+ if (ret)
+ return ret;
+
+ for (; *types; types++) {
+ type = *types;
+ pmu = type->pmus;
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ box = pmu->boxes[pkg];
+ if (!box && atomic_inc_return(&box->refcnt) == 1)
+ uncore_box_init(box);
+ }
+ }
/*
* Check if there is an online cpu in the package
@@ -1354,6 +1328,8 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, skx_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init),
{},
};
@@ -1389,38 +1365,16 @@ static int __init intel_uncore_init(void)
if (cret && pret)
return -ENODEV;
- /*
- * Install callbacks. Core will call them for each online cpu.
- *
- * The first online cpu of each package allocates and takes
- * the refcounts for all other online cpus in that package.
- * If msrs are not enabled no allocation is required and
- * uncore_cpu_prepare() is not called for each online cpu.
- */
- if (!cret) {
- ret = cpuhp_setup_state(CPUHP_PERF_X86_UNCORE_PREP,
- "perf/x86/intel/uncore:prepare",
- uncore_cpu_prepare, NULL);
- if (ret)
- goto err;
- } else {
- cpuhp_setup_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP,
- "perf/x86/intel/uncore:prepare",
- uncore_cpu_prepare, NULL);
- }
- first_init = 1;
- cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING,
- "perf/x86/uncore:starting",
- uncore_cpu_starting, uncore_cpu_dying);
- first_init = 0;
- cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
- "perf/x86/uncore:online",
- uncore_event_cpu_online, uncore_event_cpu_offline);
+ /* Install hotplug callbacks to setup the targets for each package */
+ ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
+ "perf/x86/intel/uncore:online",
+ uncore_event_cpu_online,
+ uncore_event_cpu_offline);
+ if (ret)
+ goto err;
return 0;
err:
- /* Undo box->init_box() */
- on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
uncore_types_exit(uncore_msr_uncores);
uncore_pci_exit();
return ret;
@@ -1429,9 +1383,7 @@ module_init(intel_uncore_init);
static void __exit intel_uncore_exit(void)
{
- cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
- cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_STARTING);
- cpuhp_remove_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP);
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
uncore_types_exit(uncore_msr_uncores);
uncore_pci_exit();
}