aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/events/core.c37
-rw-r--r--arch/x86/events/intel/core.c346
-rw-r--r--arch/x86/events/intel/cstate.c8
-rw-r--r--arch/x86/events/intel/pt.c2
-rw-r--r--arch/x86/events/intel/rapl.c4
-rw-r--r--arch/x86/events/msr.c8
-rw-r--r--arch/x86/events/perf_event.h4
-rw-r--r--arch/x86/include/asm/intel-family.h33
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/include/asm/perf_event.h1
-rw-r--r--arch/x86/include/asm/ptrace.h42
-rw-r--r--arch/x86/kernel/cpu/common.c28
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.c17
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c12
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c385
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c176
-rw-r--r--arch/x86/kernel/kprobes/opt.c2
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/tsc_msr.c10
-rw-r--r--arch/x86/platform/atom/punit_atom_debug.c6
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_bt.c2
21 files changed, 804 insertions, 322 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index dfb2f7c0d019..de32741d041a 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1033,6 +1033,27 @@ static inline void x86_assign_hw_event(struct perf_event *event,
}
}
+/**
+ * x86_perf_rdpmc_index - Return PMC counter used for event
+ * @event: the perf_event to which the PMC counter was assigned
+ *
+ * The counter assigned to this performance event may change if interrupts
+ * are enabled. This counter should thus never be used while interrupts are
+ * enabled. Before this function is used to obtain the assigned counter the
+ * event should be checked for validity using, for example,
+ * perf_event_read_local(), within the same interrupt disabled section in
+ * which this counter is planned to be used.
+ *
+ * Return: The index of the performance monitoring counter assigned to
+ * @perf_event.
+ */
+int x86_perf_rdpmc_index(struct perf_event *event)
+{
+ lockdep_assert_irqs_disabled();
+
+ return event->hw.event_base_rdpmc;
+}
+
static inline int match_prev_assignment(struct hw_perf_event *hwc,
struct cpu_hw_events *cpuc,
int i)
@@ -1584,7 +1605,7 @@ static void __init pmu_check_apic(void)
}
-static struct attribute_group x86_pmu_format_group = {
+static struct attribute_group x86_pmu_format_group __ro_after_init = {
.name = "format",
.attrs = NULL,
};
@@ -1631,9 +1652,9 @@ __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
struct attribute **new;
int j, i;
- for (j = 0; a[j]; j++)
+ for (j = 0; a && a[j]; j++)
;
- for (i = 0; b[i]; i++)
+ for (i = 0; b && b[i]; i++)
j++;
j++;
@@ -1642,9 +1663,9 @@ __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
return NULL;
j = 0;
- for (i = 0; a[i]; i++)
+ for (i = 0; a && a[i]; i++)
new[j++] = a[i];
- for (i = 0; b[i]; i++)
+ for (i = 0; b && b[i]; i++)
new[j++] = b[i];
new[j] = NULL;
@@ -1715,7 +1736,7 @@ static struct attribute *events_attr[] = {
NULL,
};
-static struct attribute_group x86_pmu_events_group = {
+static struct attribute_group x86_pmu_events_group __ro_after_init = {
.name = "events",
.attrs = events_attr,
};
@@ -2230,7 +2251,7 @@ static struct attribute *x86_pmu_attrs[] = {
NULL,
};
-static struct attribute_group x86_pmu_attr_group = {
+static struct attribute_group x86_pmu_attr_group __ro_after_init = {
.attrs = x86_pmu_attrs,
};
@@ -2248,7 +2269,7 @@ static struct attribute *x86_pmu_caps_attrs[] = {
NULL
};
-static struct attribute_group x86_pmu_caps_group = {
+static struct attribute_group x86_pmu_caps_group __ro_after_init = {
.name = "caps",
.attrs = x86_pmu_caps_attrs,
};
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 035c37481f57..0fb8659b20d8 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -242,7 +242,7 @@ EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
-static struct attribute *nhm_events_attrs[] = {
+static struct attribute *nhm_mem_events_attrs[] = {
EVENT_PTR(mem_ld_nhm),
NULL,
};
@@ -278,8 +278,6 @@ EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
"4", "2");
static struct attribute *snb_events_attrs[] = {
- EVENT_PTR(mem_ld_snb),
- EVENT_PTR(mem_st_snb),
EVENT_PTR(td_slots_issued),
EVENT_PTR(td_slots_retired),
EVENT_PTR(td_fetch_bubbles),
@@ -290,6 +288,12 @@ static struct attribute *snb_events_attrs[] = {
NULL,
};
+static struct attribute *snb_mem_events_attrs[] = {
+ EVENT_PTR(mem_ld_snb),
+ EVENT_PTR(mem_st_snb),
+ NULL,
+};
+
static struct event_constraint intel_hsw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -1995,6 +1999,18 @@ static void intel_pmu_nhm_enable_all(int added)
intel_pmu_enable_all(added);
}
+static void enable_counter_freeze(void)
+{
+ update_debugctlmsr(get_debugctlmsr() |
+ DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
+}
+
+static void disable_counter_freeze(void)
+{
+ update_debugctlmsr(get_debugctlmsr() &
+ ~DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
+}
+
static inline u64 intel_pmu_get_status(void)
{
u64 status;
@@ -2200,59 +2216,15 @@ static void intel_pmu_reset(void)
local_irq_restore(flags);
}
-/*
- * This handler is triggered by the local APIC, so the APIC IRQ handling
- * rules apply:
- */
-static int intel_pmu_handle_irq(struct pt_regs *regs)
+static int handle_pmi_common(struct pt_regs *regs, u64 status)
{
struct perf_sample_data data;
- struct cpu_hw_events *cpuc;
- int bit, loops;
- u64 status;
- int handled;
- int pmu_enabled;
-
- cpuc = this_cpu_ptr(&cpu_hw_events);
-
- /*
- * Save the PMU state.
- * It needs to be restored when leaving the handler.
- */
- pmu_enabled = cpuc->enabled;
- /*
- * No known reason to not always do late ACK,
- * but just in case do it opt-in.
- */
- if (!x86_pmu.late_ack)
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- intel_bts_disable_local();
- cpuc->enabled = 0;
- __intel_pmu_disable_all();
- handled = intel_pmu_drain_bts_buffer();
- handled += intel_bts_interrupt();
- status = intel_pmu_get_status();
- if (!status)
- goto done;
-
- loops = 0;
-again:
- intel_pmu_lbr_read();
- intel_pmu_ack_status(status);
- if (++loops > 100) {
- static bool warned = false;
- if (!warned) {
- WARN(1, "perfevents: irq loop stuck!\n");
- perf_event_print_debug();
- warned = true;
- }
- intel_pmu_reset();
- goto done;
- }
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int bit;
+ int handled = 0;
inc_irq_stat(apic_perf_irqs);
-
/*
* Ignore a range of extra bits in status that do not indicate
* overflow by themselves.
@@ -2261,7 +2233,7 @@ again:
GLOBAL_STATUS_ASIF |
GLOBAL_STATUS_LBRS_FROZEN);
if (!status)
- goto done;
+ return 0;
/*
* In case multiple PEBS events are sampled at the same time,
* it is possible to have GLOBAL_STATUS bit 62 set indicating
@@ -2331,6 +2303,146 @@ again:
x86_pmu_stop(event, 0);
}
+ return handled;
+}
+
+static bool disable_counter_freezing;
+static int __init intel_perf_counter_freezing_setup(char *s)
+{
+ disable_counter_freezing = true;
+ pr_info("Intel PMU Counter freezing feature disabled\n");
+ return 1;
+}
+__setup("disable_counter_freezing", intel_perf_counter_freezing_setup);
+
+/*
+ * Simplified handler for Arch Perfmon v4:
+ * - We rely on counter freezing/unfreezing to enable/disable the PMU.
+ * This is done automatically on PMU ack.
+ * - Ack the PMU only after the APIC.
+ */
+
+static int intel_pmu_handle_irq_v4(struct pt_regs *regs)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int handled = 0;
+ bool bts = false;
+ u64 status;
+ int pmu_enabled = cpuc->enabled;
+ int loops = 0;
+
+ /* PMU has been disabled because of counter freezing */
+ cpuc->enabled = 0;
+ if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
+ bts = true;
+ intel_bts_disable_local();
+ handled = intel_pmu_drain_bts_buffer();
+ handled += intel_bts_interrupt();
+ }
+ status = intel_pmu_get_status();
+ if (!status)
+ goto done;
+again:
+ intel_pmu_lbr_read();
+ if (++loops > 100) {
+ static bool warned;
+
+ if (!warned) {
+ WARN(1, "perfevents: irq loop stuck!\n");
+ perf_event_print_debug();
+ warned = true;
+ }
+ intel_pmu_reset();
+ goto done;
+ }
+
+
+ handled += handle_pmi_common(regs, status);
+done:
+ /* Ack the PMI in the APIC */
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+
+ /*
+ * The counters start counting immediately while ack the status.
+ * Make it as close as possible to IRET. This avoids bogus
+ * freezing on Skylake CPUs.
+ */
+ if (status) {
+ intel_pmu_ack_status(status);
+ } else {
+ /*
+ * CPU may issues two PMIs very close to each other.
+ * When the PMI handler services the first one, the
+ * GLOBAL_STATUS is already updated to reflect both.
+ * When it IRETs, the second PMI is immediately
+ * handled and it sees clear status. At the meantime,
+ * there may be a third PMI, because the freezing bit
+ * isn't set since the ack in first PMI handlers.
+ * Double check if there is more work to be done.
+ */
+ status = intel_pmu_get_status();
+ if (status)
+ goto again;
+ }
+
+ if (bts)
+ intel_bts_enable_local();
+ cpuc->enabled = pmu_enabled;
+ return handled;
+}
+
+/*
+ * This handler is triggered by the local APIC, so the APIC IRQ handling
+ * rules apply:
+ */
+static int intel_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct cpu_hw_events *cpuc;
+ int loops;
+ u64 status;
+ int handled;
+ int pmu_enabled;
+
+ cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /*
+ * Save the PMU state.
+ * It needs to be restored when leaving the handler.
+ */
+ pmu_enabled = cpuc->enabled;
+ /*
+ * No known reason to not always do late ACK,
+ * but just in case do it opt-in.
+ */
+ if (!x86_pmu.late_ack)
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ intel_bts_disable_local();
+ cpuc->enabled = 0;
+ __intel_pmu_disable_all();
+ handled = intel_pmu_drain_bts_buffer();
+ handled += intel_bts_interrupt();
+ status = intel_pmu_get_status();
+ if (!status)
+ goto done;
+
+ loops = 0;
+again:
+ intel_pmu_lbr_read();
+ intel_pmu_ack_status(status);
+ if (++loops > 100) {
+ static bool warned;
+
+ if (!warned) {
+ WARN(1, "perfevents: irq loop stuck!\n");
+ perf_event_print_debug();
+ warned = true;
+ }
+ intel_pmu_reset();
+ goto done;
+ }
+
+ handled += handle_pmi_common(regs, status);
+
/*
* Repeat if there is more work to be done:
*/
@@ -3350,6 +3462,9 @@ static void intel_pmu_cpu_starting(int cpu)
if (x86_pmu.version > 1)
flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
+ if (x86_pmu.counter_freezing)
+ enable_counter_freeze();
+
if (!cpuc->shared_regs)
return;
@@ -3421,6 +3536,9 @@ static void intel_pmu_cpu_dying(int cpu)
free_excl_cntrs(cpu);
fini_debug_store_on_cpu(cpu);
+
+ if (x86_pmu.counter_freezing)
+ disable_counter_freeze();
}
static void intel_pmu_sched_task(struct perf_event_context *ctx,
@@ -3725,6 +3843,40 @@ static __init void intel_nehalem_quirk(void)
}
}
+static bool intel_glp_counter_freezing_broken(int cpu)
+{
+ u32 rev = UINT_MAX; /* default to broken for unknown stepping */
+
+ switch (cpu_data(cpu).x86_stepping) {
+ case 1:
+ rev = 0x28;
+ break;
+ case 8:
+ rev = 0x6;
+ break;
+ }
+
+ return (cpu_data(cpu).microcode < rev);
+}
+
+static __init void intel_glp_counter_freezing_quirk(void)
+{
+ /* Check if it's already disabled */
+ if (disable_counter_freezing)
+ return;
+
+ /*
+ * If the system starts with the wrong ucode, leave the
+ * counter-freezing feature permanently disabled.
+ */
+ if (intel_glp_counter_freezing_broken(raw_smp_processor_id())) {
+ pr_info("PMU counter freezing disabled due to CPU errata,"
+ "please upgrade microcode\n");
+ x86_pmu.counter_freezing = false;
+ x86_pmu.handle_irq = intel_pmu_handle_irq;
+ }
+}
+
/*
* enable software workaround for errata:
* SNB: BJ122
@@ -3764,8 +3916,6 @@ EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1");
EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1");
static struct attribute *hsw_events_attrs[] = {
- EVENT_PTR(mem_ld_hsw),
- EVENT_PTR(mem_st_hsw),
EVENT_PTR(td_slots_issued),
EVENT_PTR(td_slots_retired),
EVENT_PTR(td_fetch_bubbles),
@@ -3776,6 +3926,12 @@ static struct attribute *hsw_events_attrs[] = {
NULL
};
+static struct attribute *hsw_mem_events_attrs[] = {
+ EVENT_PTR(mem_ld_hsw),
+ EVENT_PTR(mem_st_hsw),
+ NULL,
+};
+
static struct attribute *hsw_tsx_events_attrs[] = {
EVENT_PTR(tx_start),
EVENT_PTR(tx_commit),
@@ -3792,13 +3948,6 @@ static struct attribute *hsw_tsx_events_attrs[] = {
NULL
};
-static __init struct attribute **get_hsw_events_attrs(void)
-{
- return boot_cpu_has(X86_FEATURE_RTM) ?
- merge_attr(hsw_events_attrs, hsw_tsx_events_attrs) :
- hsw_events_attrs;
-}
-
static ssize_t freeze_on_smi_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
@@ -3875,9 +4024,32 @@ static struct attribute *intel_pmu_attrs[] = {
NULL,
};
+static __init struct attribute **
+get_events_attrs(struct attribute **base,
+ struct attribute **mem,
+ struct attribute **tsx)
+{
+ struct attribute **attrs = base;
+ struct attribute **old;
+
+ if (mem && x86_pmu.pebs)
+ attrs = merge_attr(attrs, mem);
+
+ if (tsx && boot_cpu_has(X86_FEATURE_RTM)) {
+ old = attrs;
+ attrs = merge_attr(attrs, tsx);
+ if (old != base)
+ kfree(old);
+ }
+
+ return attrs;
+}
+
__init int intel_pmu_init(void)
{
struct attribute **extra_attr = NULL;
+ struct attribute **mem_attr = NULL;
+ struct attribute **tsx_attr = NULL;
struct attribute **to_free = NULL;
union cpuid10_edx edx;
union cpuid10_eax eax;
@@ -3935,6 +4107,9 @@ __init int intel_pmu_init(void)
max((int)edx.split.num_counters_fixed, assume);
}
+ if (version >= 4)
+ x86_pmu.counter_freezing = !disable_counter_freezing;
+
if (boot_cpu_has(X86_FEATURE_PDCM)) {
u64 capabilities;
@@ -3986,7 +4161,7 @@ __init int intel_pmu_init(void)
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
x86_pmu.extra_regs = intel_nehalem_extra_regs;
- x86_pmu.cpu_events = nhm_events_attrs;
+ mem_attr = nhm_mem_events_attrs;
/* UOPS_ISSUED.STALLED_CYCLES */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
@@ -4004,11 +4179,11 @@ __init int intel_pmu_init(void)
name = "nehalem";
break;
- case INTEL_FAM6_ATOM_PINEVIEW:
- case INTEL_FAM6_ATOM_LINCROFT:
- case INTEL_FAM6_ATOM_PENWELL:
- case INTEL_FAM6_ATOM_CLOVERVIEW:
- case INTEL_FAM6_ATOM_CEDARVIEW:
+ case INTEL_FAM6_ATOM_BONNELL:
+ case INTEL_FAM6_ATOM_BONNELL_MID:
+ case INTEL_FAM6_ATOM_SALTWELL:
+ case INTEL_FAM6_ATOM_SALTWELL_MID:
+ case INTEL_FAM6_ATOM_SALTWELL_TABLET:
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -4021,9 +4196,11 @@ __init int intel_pmu_init(void)
name = "bonnell";
break;
- case INTEL_FAM6_ATOM_SILVERMONT1:
- case INTEL_FAM6_ATOM_SILVERMONT2:
+ case INTEL_FAM6_ATOM_SILVERMONT:
+ case INTEL_FAM6_ATOM_SILVERMONT_X:
+ case INTEL_FAM6_ATOM_SILVERMONT_MID:
case INTEL_FAM6_ATOM_AIRMONT:
+ case INTEL_FAM6_ATOM_AIRMONT_MID:
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@ -4042,7 +4219,7 @@ __init int intel_pmu_init(void)
break;
case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_ATOM_DENVERTON:
+ case INTEL_FAM6_ATOM_GOLDMONT_X:
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@@ -4068,7 +4245,8 @@ __init int intel_pmu_init(void)
name = "goldmont";
break;
- case INTEL_FAM6_ATOM_GEMINI_LAKE:
+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ x86_add_quirk(intel_glp_counter_freezing_quirk);
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
@@ -4112,7 +4290,7 @@ __init int intel_pmu_init(void)
x86_pmu.extra_regs = intel_westmere_extra_regs;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- x86_pmu.cpu_events = nhm_events_attrs;
+ mem_attr = nhm_mem_events_attrs;
/* UOPS_ISSUED.STALLED_CYCLES */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
@@ -4152,6 +4330,7 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
x86_pmu.cpu_events = snb_events_attrs;
+ mem_attr = snb_mem_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
@@ -4192,6 +4371,7 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
x86_pmu.cpu_events = snb_events_attrs;
+ mem_attr = snb_mem_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
@@ -4226,10 +4406,12 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = get_hsw_events_attrs();
+ x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.lbr_double_abort = true;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
+ mem_attr = hsw_mem_events_attrs;
+ tsx_attr = hsw_tsx_events_attrs;
pr_cont("Haswell events, ");
name = "haswell";
break;
@@ -4265,10 +4447,12 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = get_hsw_events_attrs();
+ x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.limit_period = bdw_limit_period;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
+ mem_attr = hsw_mem_events_attrs;
+ tsx_attr = hsw_tsx_events_attrs;
pr_cont("Broadwell events, ");
name = "broadwell";
break;
@@ -4324,7 +4508,9 @@ __init int intel_pmu_init(void)
hsw_format_attr : nhm_format_attr;
extra_attr = merge_attr(extra_attr, skl_format_attr);
to_free = extra_attr;
- x86_pmu.cpu_events = get_hsw_events_attrs();
+ x86_pmu.cpu_events = hsw_events_attrs;
+ mem_attr = hsw_mem_events_attrs;
+ tsx_attr = hsw_tsx_events_attrs;
intel_pmu_pebs_data_source_skl(
boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
pr_cont("Skylake events, ");
@@ -4357,6 +4543,9 @@ __init int intel_pmu_init(void)
WARN_ON(!x86_pmu.format_attrs);
}
+ x86_pmu.cpu_events = get_events_attrs(x86_pmu.cpu_events,
+ mem_attr, tsx_attr);
+
if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
@@ -4431,6 +4620,13 @@ __init int intel_pmu_init(void)
pr_cont("full-width counters, ");
}
+ /*
+ * For arch perfmon 4 use counter freezing to avoid
+ * several MSR accesses in the PMI.
+ */
+ if (x86_pmu.counter_freezing)
+ x86_pmu.handle_irq = intel_pmu_handle_irq_v4;
+
kfree(to_free);
return 0;
}
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 9f8084f18d58..d2e780705c5a 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -559,8 +559,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT, slm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_X, slm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates),
@@ -581,9 +581,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_DENVERTON, glm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_X, glm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GEMINI_LAKE, glm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 8d016ce5b80d..3a0aa83cbd07 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -95,7 +95,7 @@ static ssize_t pt_cap_show(struct device *cdev,
return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap));
}
-static struct attribute_group pt_cap_group = {
+static struct attribute_group pt_cap_group __ro_after_init = {
.name = "caps",
};
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 32f3e9423e99..91039ffed633 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -777,9 +777,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, skl_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GEMINI_LAKE, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init),
{},
};
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index b4771a6ddbc1..1b9f85abf9bc 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -69,14 +69,14 @@ static bool test_intel(int idx)
case INTEL_FAM6_BROADWELL_GT3E:
case INTEL_FAM6_BROADWELL_X:
- case INTEL_FAM6_ATOM_SILVERMONT1:
- case INTEL_FAM6_ATOM_SILVERMONT2:
+ case INTEL_FAM6_ATOM_SILVERMONT:
+ case INTEL_FAM6_ATOM_SILVERMONT_X:
case INTEL_FAM6_ATOM_AIRMONT:
case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_ATOM_DENVERTON:
+ case INTEL_FAM6_ATOM_GOLDMONT_X:
- case INTEL_FAM6_ATOM_GEMINI_LAKE:
+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_XEON_PHI_KNL:
case INTEL_FAM6_XEON_PHI_KNM:
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 156286335351..adae087cecdd 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -560,9 +560,11 @@ struct x86_pmu {
struct event_constraint *event_constraints;
struct x86_pmu_quirk *quirks;
int perfctr_second_write;
- bool late_ack;
u64 (*limit_period)(struct perf_event *event, u64 l);
+ /* PMI handler bits */
+ unsigned int late_ack :1,
+ counter_freezing :1;
/*
* sysfs attrs
*/
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 7ed08a7c3398..0dd6b0f4000e 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -8,9 +8,6 @@
* The "_X" parts are generally the EP and EX Xeons, or the
* "Extreme" ones, like Broadwell-E.
*
- * Things ending in "2" are usually because we have no better
- * name for them. There's no processor called "SILVERMONT2".
- *
* While adding a new CPUID for a new microarchitecture, add a new
* group to keep logically sorted out in chronological order. Within
* that group keep the CPUID for the variants sorted by model number.
@@ -57,19 +54,23 @@
/* "Small Core" Processors (Atom) */
-#define INTEL_FAM6_ATOM_PINEVIEW 0x1C
-#define INTEL_FAM6_ATOM_LINCROFT 0x26
-#define INTEL_FAM6_ATOM_PENWELL 0x27
-#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35
-#define INTEL_FAM6_ATOM_CEDARVIEW 0x36
-#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */
-#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */
-#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */
-#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */
-#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */
-#define INTEL_FAM6_ATOM_GOLDMONT 0x5C
-#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */
-#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A
+#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
+#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */
+
+#define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */
+#define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */
+#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */
+
+#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */
+#define INTEL_FAM6_ATOM_SILVERMONT_X 0x4D /* Avaton, Rangely */
+#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */
+
+#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */
+#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */
+
+#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
+#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */
+#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
/* Xeon Phi */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 4731f0cf97c5..80f4a4f38c79 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -164,6 +164,7 @@
#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9)
#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
+#define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12)
#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 78241b736f2a..8bdf74902293 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -278,6 +278,7 @@ struct perf_guest_switch_msr {
extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
extern void perf_check_microcode(void);
+extern int x86_perf_rdpmc_index(struct perf_event *event);
#else
static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 5e58a74bfd3a..25f49af1b13c 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -239,23 +239,51 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs,
}
/**
+ * regs_get_kernel_stack_nth_addr() - get the address of the Nth entry on stack
+ * @regs: pt_regs which contains kernel stack pointer.
+ * @n: stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns the address of the @n th entry of the
+ * kernel stack which is specified by @regs. If the @n th entry is NOT in
+ * the kernel stack, this returns NULL.
+ */
+static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n)
+{
+ unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+
+ addr += n;
+ if (regs_within_kernel_stack(regs, (unsigned long)addr))
+ return addr;
+ else
+ return NULL;
+}
+
+/* To avoid include hell, we can't include uaccess.h */
+extern long probe_kernel_read(void *dst, const void *src, size_t size);
+
+/**
* regs_get_kernel_stack_nth() - get Nth entry of the stack
* @regs: pt_regs which contains kernel stack pointer.
* @n: stack entry number.
*
* regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
- * is specified by @regs. If the @n th entry is NOT in the kernel stack,
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack
* this returns 0.
*/
static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
unsigned int n)
{
- unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
- addr += n;
- if (regs_within_kernel_stack(regs, (unsigned long)addr))
- return *addr;
- else
- return 0;
+ unsigned long *addr;
+ unsigned long val;
+ long ret;
+
+ addr = regs_get_kernel_stack_nth_addr(regs, n);
+ if (addr) {
+ ret = probe_kernel_read(&val, addr, sizeof(val));
+ if (!ret)
+ return val;
+ }
+ return 0;
}
#define arch_has_single_step() (1)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 44c4ef3d989b..10e5ccfa9278 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -949,11 +949,11 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
}
static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_TABLET, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL_MID, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_MID, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL, X86_FEATURE_ANY },
{ X86_VENDOR_CENTAUR, 5 },
{ X86_VENDOR_INTEL, 5 },
{ X86_VENDOR_NSC, 5 },
@@ -968,10 +968,10 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
/* Only list CPUs which speculate but are non susceptible to SSB */
static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
@@ -984,14 +984,14 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
static const __initconst struct x86_cpu_id cpu_no_l1tf[] = {
/* in addition to cpu_no_speculation */
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MOOREFIELD },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT_MID },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GEMINI_LAKE },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_X },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_PLUS },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
{}
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index abb71ac70443..44272b7107ad 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -485,9 +485,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
size_t tsize;
if (is_llc_occupancy_enabled()) {
- d->rmid_busy_llc = kcalloc(BITS_TO_LONGS(r->num_rmid),
- sizeof(unsigned long),
- GFP_KERNEL);
+ d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
if (!d->rmid_busy_llc)
return -ENOMEM;
INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
@@ -496,7 +494,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
tsize = sizeof(*d->mbm_total);
d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
if (!d->mbm_total) {
- kfree(d->rmid_busy_llc);
+ bitmap_free(d->rmid_busy_llc);
return -ENOMEM;
}
}
@@ -504,7 +502,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
tsize = sizeof(*d->mbm_local);
d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
if (!d->mbm_local) {
- kfree(d->rmid_busy_llc);
+ bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
return -ENOMEM;
}
@@ -610,9 +608,16 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cancel_delayed_work(&d->cqm_limbo);
}
+ /*
+ * rdt_domain "d" is going to be freed below, so clear
+ * its pointer from pseudo_lock_region struct.
+ */
+ if (d->plr)
+ d->plr->d = NULL;
+
kfree(d->ctrl_val);
kfree(d->mbps_val);
- kfree(d->rmid_busy_llc);
+ bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
kfree(d);
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index 0f53049719cd..27937458c231 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -404,8 +404,16 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
for_each_alloc_enabled_rdt_resource(r)
seq_printf(s, "%s:uninitialized\n", r->name);
} else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
- seq_printf(s, "%s:%d=%x\n", rdtgrp->plr->r->name,
- rdtgrp->plr->d->id, rdtgrp->plr->cbm);
+ if (!rdtgrp->plr->d) {
+ rdt_last_cmd_clear();
+ rdt_last_cmd_puts("Cache domain offline\n");
+ ret = -ENODEV;
+ } else {
+ seq_printf(s, "%s:%d=%x\n",
+ rdtgrp->plr->r->name,
+ rdtgrp->plr->d->id,
+ rdtgrp->plr->cbm);
+ }
} else {
closid = rdtgrp->closid;
for_each_alloc_enabled_rdt_resource(r) {
diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
index f8c260d522ca..815b4e92522c 100644
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
@@ -17,6 +17,7 @@
#include <linux/debugfs.h>
#include <linux/kthread.h>
#include <linux/mman.h>
+#include <linux/perf_event.h>
#include <linux/pm_qos.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
@@ -26,6 +27,7 @@
#include <asm/intel_rdt_sched.h>
#include <asm/perf_event.h>
+#include "../../events/perf_event.h" /* For X86_CONFIG() */
#include "intel_rdt.h"
#define CREATE_TRACE_POINTS
@@ -91,7 +93,7 @@ static u64 get_prefetch_disable_bits(void)
*/
return 0xF;
case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_ATOM_GEMINI_LAKE:
+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
/*
* SDM defines bits of MSR_MISC_FEATURE_CONTROL register
* as:
@@ -106,16 +108,6 @@ static u64 get_prefetch_disable_bits(void)
return 0;
}
-/*
- * Helper to write 64bit value to MSR without tracing. Used when
- * use of the cache should be restricted and use of registers used
- * for local variables avoided.
- */
-static inline void pseudo_wrmsrl_notrace(unsigned int msr, u64 val)
-{
- __wrmsr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
-}
-
/**
* pseudo_lock_minor_get - Obtain available minor number
* @minor: Pointer to where new minor number will be stored
@@ -888,31 +880,14 @@ static int measure_cycles_lat_fn(void *_plr)
struct pseudo_lock_region *plr = _plr;
unsigned long i;
u64 start, end;
-#ifdef CONFIG_KASAN
- /*
- * The registers used for local register variables are also used
- * when KASAN is active. When KASAN is active we use a regular
- * variable to ensure we always use a valid pointer to access memory.
- * The cost is that accessing this pointer, which could be in
- * cache, will be included in the measurement of memory read latency.
- */
void *mem_r;
-#else
-#ifdef CONFIG_X86_64
- register void *mem_r asm("rbx");
-#else
- register void *mem_r asm("ebx");
-#endif /* CONFIG_X86_64 */
-#endif /* CONFIG_KASAN */
local_irq_disable();
/*
- * The wrmsr call may be reordered with the assignment below it.
- * Call wrmsr as directly as possible to avoid tracing clobbering
- * local register variable used for memory pointer.
+ * Disable hardware prefetchers.
*/
- __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
- mem_r = plr->kmem;
+ wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+ mem_r = READ_ONCE(plr->kmem);
/*
* Dummy execute of the time measurement to load the needed
* instructions into the L1 instruction cache.
@@ -934,157 +909,240 @@ static int measure_cycles_lat_fn(void *_plr)
return 0;
}
-static int measure_cycles_perf_fn(void *_plr)
+/*
+ * Create a perf_event_attr for the hit and miss perf events that will
+ * be used during the performance measurement. A perf_event maintains
+ * a pointer to its perf_event_attr so a unique attribute structure is
+ * created for each perf_event.
+ *
+ * The actual configuration of the event is set right before use in order
+ * to use the X86_CONFIG macro.
+ */
+static struct perf_event_attr perf_miss_attr = {
+ .type = PERF_TYPE_RAW,
+ .size = sizeof(struct perf_event_attr),
+ .pinned = 1,
+ .disabled = 0,
+ .exclude_user = 1,
+};
+
+static struct perf_event_attr perf_hit_attr = {
+ .type = PERF_TYPE_RAW,
+ .size = sizeof(struct perf_event_attr),
+ .pinned = 1,
+ .disabled = 0,
+ .exclude_user = 1,
+};
+
+struct residency_counts {
+ u64 miss_before, hits_before;
+ u64 miss_after, hits_after;
+};
+
+static int measure_residency_fn(struct perf_event_attr *miss_attr,
+ struct perf_event_attr *hit_attr,
+ struct pseudo_lock_region *plr,
+ struct residency_counts *counts)
{
- unsigned long long l3_hits = 0, l3_miss = 0;
- u64 l3_hit_bits = 0, l3_miss_bits = 0;
- struct pseudo_lock_region *plr = _plr;
- unsigned long long l2_hits, l2_miss;
- u64 l2_hit_bits, l2_miss_bits;
- unsigned long i;
-#ifdef CONFIG_KASAN
- /*
- * The registers used for local register variables are also used
- * when KASAN is active. When KASAN is active we use regular variables
- * at the cost of including cache access latency to these variables
- * in the measurements.
- */
+ u64 hits_before = 0, hits_after = 0, miss_before = 0, miss_after = 0;
+ struct perf_event *miss_event, *hit_event;
+ int hit_pmcnum, miss_pmcnum;
unsigned int line_size;
unsigned int size;
+ unsigned long i;
void *mem_r;
-#else
- register unsigned int line_size asm("esi");
- register unsigned int size asm("edi");
-#ifdef CONFIG_X86_64
- register void *mem_r asm("rbx");
-#else
- register void *mem_r asm("ebx");
-#endif /* CONFIG_X86_64 */
-#endif /* CONFIG_KASAN */
+ u64 tmp;
+
+ miss_event = perf_event_create_kernel_counter(miss_attr, plr->cpu,
+ NULL, NULL, NULL);
+ if (IS_ERR(miss_event))
+ goto out;
+
+ hit_event = perf_event_create_kernel_counter(hit_attr, plr->cpu,
+ NULL, NULL, NULL);
+ if (IS_ERR(hit_event))
+ goto out_miss;
+
+ local_irq_disable();
+ /*
+ * Check any possible error state of events used by performing
+ * one local read.
+ */
+ if (perf_event_read_local(miss_event, &tmp, NULL, NULL)) {
+ local_irq_enable();
+ goto out_hit;
+ }
+ if (perf_event_read_local(hit_event, &tmp, NULL, NULL)) {
+ local_irq_enable();
+ goto out_hit;
+ }
+
+ /*
+ * Disable hardware prefetchers.
+ */
+ wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+
+ /* Initialize rest of local variables */
+ /*
+ * Performance event has been validated right before this with
+ * interrupts disabled - it is thus safe to read the counter index.
+ */
+ miss_pmcnum = x86_perf_rdpmc_index(miss_event);
+ hit_pmcnum = x86_perf_rdpmc_index(hit_event);
+ line_size = READ_ONCE(plr->line_size);
+ mem_r = READ_ONCE(plr->kmem);
+ size = READ_ONCE(plr->size);
+
+ /*
+ * Read counter variables twice - first to load the instructions
+ * used in L1 cache, second to capture accurate value that does not
+ * include cache misses incurred because of instruction loads.
+ */
+ rdpmcl(hit_pmcnum, hits_before);
+ rdpmcl(miss_pmcnum, miss_before);
+ /*
+ * From SDM: Performing back-to-back fast reads are not guaranteed
+ * to be monotonic.
+ * Use LFENCE to ensure all previous instructions are retired
+ * before proceeding.
+ */
+ rmb();
+ rdpmcl(hit_pmcnum, hits_before);
+ rdpmcl(miss_pmcnum, miss_before);
+ /*
+ * Use LFENCE to ensure all previous instructions are retired
+ * before proceeding.
+ */
+ rmb();
+ for (i = 0; i < size; i += line_size) {
+ /*
+ * Add a barrier to prevent speculative execution of this
+ * loop reading beyond the end of the buffer.
+ */
+ rmb();
+ asm volatile("mov (%0,%1,1), %%eax\n\t"
+ :
+ : "r" (mem_r), "r" (i)
+ : "%eax", "memory");
+ }
+ /*
+ * Use LFENCE to ensure all previous instructions are retired
+ * before proceeding.
+ */
+ rmb();
+ rdpmcl(hit_pmcnum, hits_after);
+ rdpmcl(miss_pmcnum, miss_after);
+ /*
+ * Use LFENCE to ensure all previous instructions are retired
+ * before proceeding.
+ */
+ rmb();
+ /* Re-enable hardware prefetchers */
+ wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
+ local_irq_enable();
+out_hit:
+ perf_event_release_kernel(hit_event);
+out_miss:
+ perf_event_release_kernel(miss_event);
+out:
+ /*
+ * All counts will be zero on failure.
+ */
+ counts->miss_before = miss_before;
+ counts->hits_before = hits_before;
+ counts->miss_after = miss_after;
+ counts->hits_after = hits_after;
+ return 0;
+}
+
+static int measure_l2_residency(void *_plr)
+{
+ struct pseudo_lock_region *plr = _plr;
+ struct residency_counts counts = {0};
/*
* Non-architectural event for the Goldmont Microarchitecture
* from Intel x86 Architecture Software Developer Manual (SDM):
* MEM_LOAD_UOPS_RETIRED D1H (event number)
* Umask values:
- * L1_HIT 01H
* L2_HIT 02H
- * L1_MISS 08H
* L2_MISS 10H
- *
- * On Broadwell Microarchitecture the MEM_LOAD_UOPS_RETIRED event
- * has two "no fix" errata associated with it: BDM35 and BDM100. On
- * this platform we use the following events instead:
- * L2_RQSTS 24H (Documented in https://download.01.org/perfmon/BDW/)
- * REFERENCES FFH
- * MISS 3FH
- * LONGEST_LAT_CACHE 2EH (Documented in SDM)
- * REFERENCE 4FH
- * MISS 41H
*/
-
- /*
- * Start by setting flags for IA32_PERFEVTSELx:
- * OS (Operating system mode) 0x2
- * INT (APIC interrupt enable) 0x10
- * EN (Enable counter) 0x40
- *
- * Then add the Umask value and event number to select performance
- * event.
- */
-
switch (boot_cpu_data.x86_model) {
case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_ATOM_GEMINI_LAKE:
- l2_hit_bits = (0x52ULL << 16) | (0x2 << 8) | 0xd1;
- l2_miss_bits = (0x52ULL << 16) | (0x10 << 8) | 0xd1;
- break;
- case INTEL_FAM6_BROADWELL_X:
- /* On BDW the l2_hit_bits count references, not hits */
- l2_hit_bits = (0x52ULL << 16) | (0xff << 8) | 0x24;
- l2_miss_bits = (0x52ULL << 16) | (0x3f << 8) | 0x24;
- /* On BDW the l3_hit_bits count references, not hits */
- l3_hit_bits = (0x52ULL << 16) | (0x4f << 8) | 0x2e;
- l3_miss_bits = (0x52ULL << 16) | (0x41 << 8) | 0x2e;
+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ perf_miss_attr.config = X86_CONFIG(.event = 0xd1,
+ .umask = 0x10);
+ perf_hit_attr.config = X86_CONFIG(.event = 0xd1,
+ .umask = 0x2);
break;
default:
goto out;
}
- local_irq_disable();
+ measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
/*
- * Call wrmsr direcly to avoid the local register variables from
- * being overwritten due to reordering of their assignment with
- * the wrmsr calls.
+ * If a failure prevented the measurements from succeeding
+ * tracepoints will still be written and all counts will be zero.
*/
- __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
- /* Disable events and reset counters */
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 1, 0x0);
- if (l3_hit_bits > 0) {
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 2, 0x0);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 3, 0x0);
- }
- /* Set and enable the L2 counters */
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0, l2_hit_bits);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1, l2_miss_bits);
- if (l3_hit_bits > 0) {
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2,
- l3_hit_bits);
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3,
- l3_miss_bits);
- }
- mem_r = plr->kmem;
- size = plr->size;
- line_size = plr->line_size;
- for (i = 0; i < size; i += line_size) {
- asm volatile("mov (%0,%1,1), %%eax\n\t"
- :
- : "r" (mem_r), "r" (i)
- : "%eax", "memory");
- }
+ trace_pseudo_lock_l2(counts.hits_after - counts.hits_before,
+ counts.miss_after - counts.miss_before);
+out:
+ plr->thread_done = 1;
+ wake_up_interruptible(&plr->lock_thread_wq);
+ return 0;
+}
+
+static int measure_l3_residency(void *_plr)
+{
+ struct pseudo_lock_region *plr = _plr;
+ struct residency_counts counts = {0};
+
/*
- * Call wrmsr directly (no tracing) to not influence
- * the cache access counters as they are disabled.
+ * On Broadwell Microarchitecture the MEM_LOAD_UOPS_RETIRED event
+ * has two "no fix" errata associated with it: BDM35 and BDM100. On
+ * this platform the following events are used instead:
+ * LONGEST_LAT_CACHE 2EH (Documented in SDM)
+ * REFERENCE 4FH
+ * MISS 41H
*/
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0,
- l2_hit_bits & ~(0x40ULL << 16));
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1,
- l2_miss_bits & ~(0x40ULL << 16));
- if (l3_hit_bits > 0) {
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2,
- l3_hit_bits & ~(0x40ULL << 16));
- pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3,
- l3_miss_bits & ~(0x40ULL << 16));
- }
- l2_hits = native_read_pmc(0);
- l2_miss = native_read_pmc(1);
- if (l3_hit_bits > 0) {
- l3_hits = native_read_pmc(2);
- l3_miss = native_read_pmc(3);
+
+ switch (boot_cpu_data.x86_model) {
+ case INTEL_FAM6_BROADWELL_X:
+ /* On BDW the hit event counts references, not hits */
+ perf_hit_attr.config = X86_CONFIG(.event = 0x2e,
+ .umask = 0x4f);
+ perf_miss_attr.config = X86_CONFIG(.event = 0x2e,
+ .umask = 0x41);
+ break;
+ default:
+ goto out;
}
- wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
- local_irq_enable();
+
+ measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
/*
- * On BDW we count references and misses, need to adjust. Sometimes
- * the "hits" counter is a bit more than the references, for
- * example, x references but x + 1 hits. To not report invalid
- * hit values in this case we treat that as misses eaqual to
- * references.
+ * If a failure prevented the measurements from succeeding
+ * tracepoints will still be written and all counts will be zero.
*/
- if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X)
- l2_hits -= (l2_miss > l2_hits ? l2_hits : l2_miss);
- trace_pseudo_lock_l2(l2_hits, l2_miss);
- if (l3_hit_bits > 0) {
- if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X)
- l3_hits -= (l3_miss > l3_hits ? l3_hits : l3_miss);
- trace_pseudo_lock_l3(l3_hits, l3_miss);
+
+ counts.miss_after -= counts.miss_before;
+ if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X) {
+ /*
+ * On BDW references and misses are counted, need to adjust.
+ * Sometimes the "hits" counter is a bit more than the
+ * references, for example, x references but x + 1 hits.
+ * To not report invalid hit values in this case we treat
+ * that as misses equal to references.
+ */
+ /* First compute the number of cache references measured */
+ counts.hits_after -= counts.hits_before;
+ /* Next convert references to cache hits */
+ counts.hits_after -= min(counts.miss_after, counts.hits_after);
+ } else {
+ counts.hits_after -= counts.hits_before;
}
+ trace_pseudo_lock_l3(counts.hits_after, counts.miss_after);
out:
plr->thread_done = 1;
wake_up_interruptible(&plr->lock_thread_wq);
@@ -1116,6 +1174,11 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
goto out;
}
+ if (!plr->d) {
+ ret = -ENODEV;
+ goto out;
+ }
+
plr->thread_done = 0;
cpu = cpumask_first(&plr->d->cpu_mask);
if (!cpu_online(cpu)) {
@@ -1123,13 +1186,20 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
goto out;
}
+ plr->cpu = cpu;
+
if (sel == 1)
thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
cpu_to_node(cpu),
"pseudo_lock_measure/%u",
cpu);
else if (sel == 2)
- thread = kthread_create_on_node(measure_cycles_perf_fn, plr,
+ thread = kthread_create_on_node(measure_l2_residency, plr,
+ cpu_to_node(cpu),
+ "pseudo_lock_measure/%u",
+ cpu);
+ else if (sel == 3)
+ thread = kthread_create_on_node(measure_l3_residency, plr,
cpu_to_node(cpu),
"pseudo_lock_measure/%u",
cpu);
@@ -1173,7 +1243,7 @@ static ssize_t pseudo_lock_measure_trigger(struct file *file,
buf[buf_size] = '\0';
ret = kstrtoint(buf, 10, &sel);
if (ret == 0) {
- if (sel != 1)
+ if (sel != 1 && sel != 2 && sel != 3)
return -EINVAL;
ret = debugfs_file_get(file->f_path.dentry);
if (ret)
@@ -1429,6 +1499,11 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
plr = rdtgrp->plr;
+ if (!plr->d) {
+ mutex_unlock(&rdtgroup_mutex);
+ return -ENODEV;
+ }
+
/*
* Task is required to run with affinity to the cpus associated
* with the pseudo-locked region. If this is not the case the task
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index b140c68bc14b..f27b8115ffa2 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -268,17 +268,27 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
struct seq_file *s, void *v)
{
struct rdtgroup *rdtgrp;
+ struct cpumask *mask;
int ret = 0;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (rdtgrp) {
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
- seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
- cpumask_pr_args(&rdtgrp->plr->d->cpu_mask));
- else
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
+ if (!rdtgrp->plr->d) {
+ rdt_last_cmd_clear();
+ rdt_last_cmd_puts("Cache domain offline\n");
+ ret = -ENODEV;
+ } else {
+ mask = &rdtgrp->plr->d->cpu_mask;
+ seq_printf(s, is_cpu_list(of) ?
+ "%*pbl\n" : "%*pb\n",
+ cpumask_pr_args(mask));
+ }
+ } else {
seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
cpumask_pr_args(&rdtgrp->cpu_mask));
+ }
} else {
ret = -ENOENT;
}
@@ -961,7 +971,78 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of,
}
/**
- * rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
+ * rdt_cdp_peer_get - Retrieve CDP peer if it exists
+ * @r: RDT resource to which RDT domain @d belongs
+ * @d: Cache instance for which a CDP peer is requested
+ * @r_cdp: RDT resource that shares hardware with @r (RDT resource peer)
+ * Used to return the result.
+ * @d_cdp: RDT domain that shares hardware with @d (RDT domain peer)
+ * Used to return the result.
+ *
+ * RDT resources are managed independently and by extension the RDT domains
+ * (RDT resource instances) are managed independently also. The Code and
+ * Data Prioritization (CDP) RDT resources, while managed independently,
+ * could refer to the same underlying hardware. For example,
+ * RDT_RESOURCE_L2CODE and RDT_RESOURCE_L2DATA both refer to the L2 cache.
+ *
+ * When provided with an RDT resource @r and an instance of that RDT
+ * resource @d rdt_cdp_peer_get() will return if there is a peer RDT
+ * resource and the exact instance that shares the same hardware.
+ *
+ * Return: 0 if a CDP peer was found, <0 on error or if no CDP peer exists.
+ * If a CDP peer was found, @r_cdp will point to the peer RDT resource
+ * and @d_cdp will point to the peer RDT domain.
+ */
+static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d,
+ struct rdt_resource **r_cdp,
+ struct rdt_domain **d_cdp)
+{
+ struct rdt_resource *_r_cdp = NULL;
+ struct rdt_domain *_d_cdp = NULL;
+ int ret = 0;
+
+ switch (r->rid) {
+ case RDT_RESOURCE_L3DATA:
+ _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3CODE];
+ break;
+ case RDT_RESOURCE_L3CODE:
+ _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3DATA];
+ break;
+ case RDT_RESOURCE_L2DATA:
+ _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2CODE];
+ break;
+ case RDT_RESOURCE_L2CODE:
+ _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2DATA];
+ break;
+ default:
+ ret = -ENOENT;
+ goto out;
+ }
+
+ /*
+ * When a new CPU comes online and CDP is enabled then the new
+ * RDT domains (if any) associated with both CDP RDT resources
+ * are added in the same CPU online routine while the
+ * rdtgroup_mutex is held. It should thus not happen for one
+ * RDT domain to exist and be associated with its RDT CDP
+ * resource but there is no RDT domain associated with the
+ * peer RDT CDP resource. Hence the WARN.
+ */
+ _d_cdp = rdt_find_domain(_r_cdp, d->id, NULL);
+ if (WARN_ON(!_d_cdp)) {
+ _r_cdp = NULL;
+ ret = -EINVAL;
+ }
+
+out:
+ *r_cdp = _r_cdp;
+ *d_cdp = _d_cdp;
+
+ return ret;
+}
+
+/**
+ * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
* @r: Resource to which domain instance @d belongs.
* @d: The domain instance for which @closid is being tested.
* @cbm: Capacity bitmask being tested.
@@ -980,8 +1061,8 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of,
*
* Return: false if CBM does not overlap, true if it does.
*/
-bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
- unsigned long cbm, int closid, bool exclusive)
+static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
+ unsigned long cbm, int closid, bool exclusive)
{
enum rdtgrp_mode mode;
unsigned long ctrl_b;
@@ -1017,6 +1098,41 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
}
/**
+ * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
+ * @r: Resource to which domain instance @d belongs.
+ * @d: The domain instance for which @closid is being tested.
+ * @cbm: Capacity bitmask being tested.
+ * @closid: Intended closid for @cbm.
+ * @exclusive: Only check if overlaps with exclusive resource groups
+ *
+ * Resources that can be allocated using a CBM can use the CBM to control
+ * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
+ * for overlap. Overlap test is not limited to the specific resource for
+ * which the CBM is intended though - when dealing with CDP resources that
+ * share the underlying hardware the overlap check should be performed on
+ * the CDP resource sharing the hardware also.
+ *
+ * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
+ * overlap test.
+ *
+ * Return: true if CBM overlap detected, false if there is no overlap
+ */
+bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
+ unsigned long cbm, int closid, bool exclusive)
+{
+ struct rdt_resource *r_cdp;
+ struct rdt_domain *d_cdp;
+
+ if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, exclusive))
+ return true;
+
+ if (rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp) < 0)
+ return false;
+
+ return __rdtgroup_cbm_overlaps(r_cdp, d_cdp, cbm, closid, exclusive);
+}
+
+/**
* rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
*
* An exclusive resource group implies that there should be no sharing of
@@ -1176,6 +1292,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
struct rdt_resource *r;
struct rdt_domain *d;
unsigned int size;
+ int ret = 0;
bool sep;
u32 ctrl;
@@ -1186,11 +1303,18 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
}
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
- seq_printf(s, "%*s:", max_name_width, rdtgrp->plr->r->name);
- size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
- rdtgrp->plr->d,
- rdtgrp->plr->cbm);
- seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
+ if (!rdtgrp->plr->d) {
+ rdt_last_cmd_clear();
+ rdt_last_cmd_puts("Cache domain offline\n");
+ ret = -ENODEV;
+ } else {
+ seq_printf(s, "%*s:", max_name_width,
+ rdtgrp->plr->r->name);
+ size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
+ rdtgrp->plr->d,
+ rdtgrp->plr->cbm);
+ seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
+ }
goto out;
}
@@ -1220,7 +1344,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
out:
rdtgroup_kn_unlock(of->kn);
- return 0;
+ return ret;
}
/* rdtgroup information files for one cache resource. */
@@ -2354,14 +2478,16 @@ static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
*/
static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
{
+ struct rdt_resource *r_cdp = NULL;
+ struct rdt_domain *d_cdp = NULL;
u32 used_b = 0, unused_b = 0;
u32 closid = rdtgrp->closid;
struct rdt_resource *r;
unsigned long tmp_cbm;
enum rdtgrp_mode mode;
struct rdt_domain *d;
+ u32 peer_ctl, *ctrl;
int i, ret;
- u32 *ctrl;
for_each_alloc_enabled_rdt_resource(r) {
/*
@@ -2371,6 +2497,7 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
if (r->rid == RDT_RESOURCE_MBA)
continue;
list_for_each_entry(d, &r->domains, list) {
+ rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp);
d->have_new_ctrl = false;
d->new_ctrl = r->cache.shareable_bits;
used_b = r->cache.shareable_bits;
@@ -2380,9 +2507,19 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
mode = rdtgroup_mode_by_closid(i);
if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
break;
- used_b |= *ctrl;
+ /*
+ * If CDP is active include peer
+ * domain's usage to ensure there
+ * is no overlap with an exclusive
+ * group.
+ */
+ if (d_cdp)
+ peer_ctl = d_cdp->ctrl_val[i];
+ else
+ peer_ctl = 0;
+ used_b |= *ctrl | peer_ctl;
if (mode == RDT_MODE_SHAREABLE)
- d->new_ctrl |= *ctrl;
+ d->new_ctrl |= *ctrl | peer_ctl;
}
}
if (d->plr && d->plr->cbm > 0)
@@ -2805,6 +2942,13 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
{
if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
seq_puts(seq, ",cdp");
+
+ if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
+ seq_puts(seq, ",cdpl2");
+
+ if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA]))
+ seq_puts(seq, ",mba_MBps");
+
return 0;
}
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index eaf02f2e7300..40b16b270656 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -179,7 +179,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
opt_pre_handler(&op->kp, regs);
__this_cpu_write(current_kprobe, NULL);
}
- preempt_enable_no_resched();
+ preempt_enable();
}
NOKPROBE_SYMBOL(optimized_callback);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6d5dc5dabfd7..03b7529333a6 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -636,7 +636,7 @@ unsigned long native_calibrate_tsc(void)
case INTEL_FAM6_KABYLAKE_DESKTOP:
crystal_khz = 24000; /* 24.0 MHz */
break;
- case INTEL_FAM6_ATOM_DENVERTON:
+ case INTEL_FAM6_ATOM_GOLDMONT_X:
crystal_khz = 25000; /* 25.0 MHz */
break;
case INTEL_FAM6_ATOM_GOLDMONT:
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 27ef714d886c..3d0e9aeea7c8 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -59,12 +59,12 @@ static const struct freq_desc freq_desc_ann = {
};
static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
- INTEL_CPU_FAM6(ATOM_PENWELL, freq_desc_pnw),
- INTEL_CPU_FAM6(ATOM_CLOVERVIEW, freq_desc_clv),
- INTEL_CPU_FAM6(ATOM_SILVERMONT1, freq_desc_byt),
+ INTEL_CPU_FAM6(ATOM_SALTWELL_MID, freq_desc_pnw),
+ INTEL_CPU_FAM6(ATOM_SALTWELL_TABLET, freq_desc_clv),
+ INTEL_CPU_FAM6(ATOM_SILVERMONT, freq_desc_byt),
+ INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, freq_desc_tng),
INTEL_CPU_FAM6(ATOM_AIRMONT, freq_desc_cht),
- INTEL_CPU_FAM6(ATOM_MERRIFIELD, freq_desc_tng),
- INTEL_CPU_FAM6(ATOM_MOOREFIELD, freq_desc_ann),
+ INTEL_CPU_FAM6(ATOM_AIRMONT_MID, freq_desc_ann),
{}
};
diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c
index 034813d4ab1e..6cb6076223ba 100644
--- a/arch/x86/platform/atom/punit_atom_debug.c
+++ b/arch/x86/platform/atom/punit_atom_debug.c
@@ -115,7 +115,7 @@ static struct dentry *punit_dbg_file;
static int punit_dbgfs_register(struct punit_device *punit_device)
{
- static struct dentry *dev_state;
+ struct dentry *dev_state;
punit_dbg_file = debugfs_create_dir("punit_atom", NULL);
if (!punit_dbg_file)
@@ -143,8 +143,8 @@ static void punit_dbgfs_unregister(void)
(kernel_ulong_t)&drv_data }
static const struct x86_cpu_id intel_punit_cpu_ids[] = {
- ICPU(INTEL_FAM6_ATOM_SILVERMONT1, punit_device_byt),
- ICPU(INTEL_FAM6_ATOM_MERRIFIELD, punit_device_tng),
+ ICPU(INTEL_FAM6_ATOM_SILVERMONT, punit_device_byt),
+ ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, punit_device_tng),
ICPU(INTEL_FAM6_ATOM_AIRMONT, punit_device_cht),
{}
};
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
index 5a0483e7bf66..31dce781364c 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
@@ -68,7 +68,7 @@ static struct bt_sfi_data tng_bt_sfi_data __initdata = {
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (kernel_ulong_t)&ddata }
static const struct x86_cpu_id bt_sfi_cpu_ids[] = {
- ICPU(INTEL_FAM6_ATOM_MERRIFIELD, tng_bt_sfi_data),
+ ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, tng_bt_sfi_data),
{}
};