aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2019-07-10 23:24:10 -0700
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2019-07-10 23:24:10 -0700
commit597473720f4dc69749542bfcfed4a927a43d935e (patch)
tree711bf773910fb93d1dd9120c633adc807685e0d8 /arch/x86/kernel/cpu
parentInput: atmel_mxt_ts - fix leak in mxt_update_cfg() (diff)
parentInput: gpio_keys_polled - allow specifying name of input device (diff)
downloadlinux-dev-597473720f4dc69749542bfcfed4a927a43d935e.tar.xz
linux-dev-597473720f4dc69749542bfcfed4a927a43d935e.zip
Merge branch 'next' into for-linus
Prepare input updates for 5.3 merge window.
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/Makefile7
-rw-r--r--arch/x86/kernel/cpu/amd.c9
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c1
-rw-r--r--arch/x86/kernel/cpu/bugs.c61
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/cpu.h3
-rw-r--r--arch/x86/kernel/cpu/cyrix.c14
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/match.c31
-rw-r--r--arch/x86/kernel/cpu/mce/Makefile (renamed from arch/x86/kernel/cpu/mcheck/Makefile)10
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c (renamed from arch/x86/kernel/cpu/mcheck/mce_amd.c)69
-rw-r--r--arch/x86/kernel/cpu/mce/apei.c (renamed from arch/x86/kernel/cpu/mcheck/mce-apei.c)12
-rw-r--r--arch/x86/kernel/cpu/mce/core.c (renamed from arch/x86/kernel/cpu/mcheck/mce.c)37
-rw-r--r--arch/x86/kernel/cpu/mce/dev-mcelog.c (renamed from arch/x86/kernel/cpu/mcheck/dev-mcelog.c)4
-rw-r--r--arch/x86/kernel/cpu/mce/genpool.c (renamed from arch/x86/kernel/cpu/mcheck/mce-genpool.c)2
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c (renamed from arch/x86/kernel/cpu/mcheck/mce-inject.c)2
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c (renamed from arch/x86/kernel/cpu/mcheck/mce_intel.c)2
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h (renamed from arch/x86/kernel/cpu/mcheck/mce-internal.h)3
-rw-r--r--arch/x86/kernel/cpu/mce/p5.c (renamed from arch/x86/kernel/cpu/mcheck/p5.c)2
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c (renamed from arch/x86/kernel/cpu/mcheck/mce-severity.c)7
-rw-r--r--arch/x86/kernel/cpu/mce/therm_throt.c (renamed from arch/x86/kernel/cpu/mcheck/therm_throt.c)5
-rw-r--r--arch/x86/kernel/cpu/mce/threshold.c (renamed from arch/x86/kernel/cpu/mcheck/threshold.c)5
-rw-r--r--arch/x86/kernel/cpu/mce/winchip.c (renamed from arch/x86/kernel/cpu/mcheck/winchip.c)2
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c472
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c7
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c12
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c3
-rw-r--r--arch/x86/kernel/cpu/resctrl/Makefile4
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c (renamed from arch/x86/kernel/cpu/intel_rdt.c)186
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c (renamed from arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c)107
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h (renamed from arch/x86/kernel/cpu/intel_rdt.h)71
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c (renamed from arch/x86/kernel/cpu/intel_rdt_monitor.c)19
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c (renamed from arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c)47
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock_event.h (renamed from arch/x86/kernel/cpu/intel_rdt_pseudo_lock_event.h)2
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c (renamed from arch/x86/kernel/cpu/intel_rdt_rdtgroup.c)241
-rw-r--r--arch/x86/kernel/cpu/scattered.c34
-rw-r--r--arch/x86/kernel/cpu/topology.c2
38 files changed, 1000 insertions, 503 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 1f5d2291c31e..cfd24f9f7614 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -36,13 +36,10 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
-obj-$(CONFIG_INTEL_RDT) += intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_monitor.o
-obj-$(CONFIG_INTEL_RDT) += intel_rdt_ctrlmondata.o intel_rdt_pseudo_lock.o
-CFLAGS_intel_rdt_pseudo_lock.o = -I$(src)
-
-obj-$(CONFIG_X86_MCE) += mcheck/
+obj-$(CONFIG_X86_MCE) += mce/
obj-$(CONFIG_MTRR) += mtrr/
obj-$(CONFIG_MICROCODE) += microcode/
+obj-$(CONFIG_X86_CPU_RESCTRL) += resctrl/
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index eeea634bee0a..01004bfb1a1b 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -15,6 +15,7 @@
#include <asm/smp.h>
#include <asm/pci-direct.h>
#include <asm/delay.h>
+#include <asm/debugreg.h>
#ifdef CONFIG_X86_64
# include <asm/mmconfig.h>
@@ -818,11 +819,9 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
static void init_amd_zn(struct cpuinfo_x86 *c)
{
set_cpu_cap(c, X86_FEATURE_ZEN);
- /*
- * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
- * all up to and including B1.
- */
- if (c->x86_model <= 1 && c->x86_stepping <= 1)
+
+ /* Fix erratum 1076: CPB feature bit not being set in CPUID. */
+ if (!cpu_has(c, X86_FEATURE_CPB))
set_cpu_cap(c, X86_FEATURE_CPB);
}
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 7eba34df54c3..804c49493938 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -12,6 +12,7 @@
#include <linux/ktime.h>
#include <linux/math64.h>
#include <linux/percpu.h>
+#include <linux/cpufreq.h>
#include <linux/smp.h>
#include "cpu.h"
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 500278f5308e..b91b3bfa5cfb 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -32,6 +32,8 @@
#include <asm/e820/api.h>
#include <asm/hypervisor.h>
+#include "cpu.h"
+
static void __init spectre_v2_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);
@@ -54,7 +56,7 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
u64 __ro_after_init x86_amd_ls_cfg_base;
u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
-/* Control conditional STIPB in switch_to() */
+/* Control conditional STIBP in switch_to() */
DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp);
/* Control conditional IBPB in switch_mm() */
DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
@@ -69,7 +71,7 @@ void __init check_bugs(void)
* identify_boot_cpu() initialized SMT support information, let the
* core code know.
*/
- cpu_smt_check_topology_early();
+ cpu_smt_check_topology();
if (!IS_ENABLED(CONFIG_SMP)) {
pr_info("CPU: ");
@@ -213,7 +215,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
SPECTRE_V2_USER_NONE;
-#ifdef RETPOLINE
+#ifdef CONFIG_RETPOLINE
static bool spectre_v2_bad_module;
bool retpoline_module_ok(bool has_retpoline)
@@ -262,17 +264,18 @@ enum spectre_v2_user_cmd {
};
static const char * const spectre_v2_user_strings[] = {
- [SPECTRE_V2_USER_NONE] = "User space: Vulnerable",
- [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection",
- [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl",
- [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl",
+ [SPECTRE_V2_USER_NONE] = "User space: Vulnerable",
+ [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection",
+ [SPECTRE_V2_USER_STRICT_PREFERRED] = "User space: Mitigation: STIBP always-on protection",
+ [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl",
+ [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl",
};
static const struct {
const char *option;
enum spectre_v2_user_cmd cmd;
bool secure;
-} v2_user_options[] __initdata = {
+} v2_user_options[] __initconst = {
{ "auto", SPECTRE_V2_USER_CMD_AUTO, false },
{ "off", SPECTRE_V2_USER_CMD_NONE, false },
{ "on", SPECTRE_V2_USER_CMD_FORCE, true },
@@ -355,6 +358,15 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
break;
}
+ /*
+ * At this point, an STIBP mode other than "off" has been set.
+ * If STIBP support is not being forced, check if STIBP always-on
+ * is preferred.
+ */
+ if (mode != SPECTRE_V2_USER_STRICT &&
+ boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
+ mode = SPECTRE_V2_USER_STRICT_PREFERRED;
+
/* Initialize Indirect Branch Prediction Barrier */
if (boot_cpu_has(X86_FEATURE_IBPB)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
@@ -379,12 +391,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
"always-on" : "conditional");
}
- /* If enhanced IBRS is enabled no STIPB required */
+ /* If enhanced IBRS is enabled no STIBP required */
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return;
/*
- * If SMT is not possible or STIBP is not available clear the STIPB
+ * If SMT is not possible or STIBP is not available clear the STIBP
* mode.
*/
if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP))
@@ -407,7 +419,7 @@ static const struct {
const char *option;
enum spectre_v2_mitigation_cmd cmd;
bool secure;
-} mitigation_options[] __initdata = {
+} mitigation_options[] __initconst = {
{ "off", SPECTRE_V2_CMD_NONE, false },
{ "on", SPECTRE_V2_CMD_FORCE, true },
{ "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
@@ -610,6 +622,7 @@ void arch_smt_update(void)
case SPECTRE_V2_USER_NONE:
break;
case SPECTRE_V2_USER_STRICT:
+ case SPECTRE_V2_USER_STRICT_PREFERRED:
update_stibp_strict();
break;
case SPECTRE_V2_USER_PRCTL:
@@ -645,7 +658,7 @@ static const char * const ssb_strings[] = {
static const struct {
const char *option;
enum ssb_mitigation_cmd cmd;
-} ssb_mitigation_options[] __initdata = {
+} ssb_mitigation_options[] __initconst = {
{ "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
{ "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
{ "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
@@ -785,15 +798,25 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
if (task_spec_ssb_force_disable(task))
return -EPERM;
task_clear_spec_ssb_disable(task);
+ task_clear_spec_ssb_noexec(task);
task_update_spec_tif(task);
break;
case PR_SPEC_DISABLE:
task_set_spec_ssb_disable(task);
+ task_clear_spec_ssb_noexec(task);
task_update_spec_tif(task);
break;
case PR_SPEC_FORCE_DISABLE:
task_set_spec_ssb_disable(task);
task_set_spec_ssb_force_disable(task);
+ task_clear_spec_ssb_noexec(task);
+ task_update_spec_tif(task);
+ break;
+ case PR_SPEC_DISABLE_NOEXEC:
+ if (task_spec_ssb_force_disable(task))
+ return -EPERM;
+ task_set_spec_ssb_disable(task);
+ task_set_spec_ssb_noexec(task);
task_update_spec_tif(task);
break;
default:
@@ -812,7 +835,8 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
* Indirect branch speculation is always disabled in strict
* mode.
*/
- if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
+ if (spectre_v2_user == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED)
return -EPERM;
task_clear_spec_ib_disable(task);
task_update_spec_tif(task);
@@ -825,7 +849,8 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
*/
if (spectre_v2_user == SPECTRE_V2_USER_NONE)
return -EPERM;
- if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
+ if (spectre_v2_user == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED)
return 0;
task_set_spec_ib_disable(task);
if (ctrl == PR_SPEC_FORCE_DISABLE)
@@ -870,6 +895,8 @@ static int ssb_prctl_get(struct task_struct *task)
case SPEC_STORE_BYPASS_PRCTL:
if (task_spec_ssb_force_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+ if (task_spec_ssb_noexec(task))
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE_NOEXEC;
if (task_spec_ssb_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
@@ -896,6 +923,7 @@ static int ib_prctl_get(struct task_struct *task)
return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
case SPECTRE_V2_USER_STRICT:
+ case SPECTRE_V2_USER_STRICT_PREFERRED:
return PR_SPEC_DISABLE;
default:
return PR_SPEC_NOT_AFFECTED;
@@ -1002,7 +1030,8 @@ static void __init l1tf_select_mitigation(void)
#endif
half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
- if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
+ if (l1tf_mitigation != L1TF_MITIGATION_OFF &&
+ e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n",
half_pa);
@@ -1088,6 +1117,8 @@ static char *stibp_state(void)
return ", STIBP: disabled";
case SPECTRE_V2_USER_STRICT:
return ", STIBP: forced";
+ case SPECTRE_V2_USER_STRICT_PREFERRED:
+ return ", STIBP: always-on";
case SPECTRE_V2_USER_PRCTL:
case SPECTRE_V2_USER_SECCOMP:
if (static_key_enabled(&switch_to_cond_stibp))
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index dc1b9342e9c4..395d46f78582 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -17,6 +17,7 @@
#include <linux/pci.h>
#include <asm/cpufeature.h>
+#include <asm/cacheinfo.h>
#include <asm/amd_nb.h>
#include <asm/smp.h>
@@ -247,6 +248,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
switch (leaf) {
case 1:
l1 = &l1i;
+ /* fall through */
case 0:
if (!l1->val)
return;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ffb181f959d2..cb28e98a0659 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -353,7 +353,7 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
cr4_set_bits(X86_CR4_UMIP);
- pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n");
+ pr_info_once("x86/cpu: User Mode Instruction Prevention (UMIP) activated\n");
return;
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index da5446acc241..5eb946b9a9f3 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -49,9 +49,6 @@ extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void get_cpu_address_sizes(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
-extern u32 get_scattered_cpuid_leaf(unsigned int level,
- unsigned int sub_leaf,
- enum cpuid_regs_idx reg);
extern void init_intel_cacheinfo(struct cpuinfo_x86 *c);
extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
extern void init_hygon_cacheinfo(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index d12226f60168..1d9b8aaea06c 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -124,7 +124,7 @@ static void set_cx86_reorder(void)
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
/* Load/Store Serialize to mem access disable (=reorder it) */
- setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80);
+ setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80);
/* set load/store serialize from 1GB to 4GB */
ccr3 |= 0xe0;
setCx86(CX86_CCR3, ccr3);
@@ -135,11 +135,11 @@ static void set_cx86_memwb(void)
pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
/* CCR2 bit 2: unlock NW bit */
- setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
+ setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
/* set 'Not Write-through' */
write_cr0(read_cr0() | X86_CR0_NW);
/* CCR2 bit 2: lock NW bit and set WT1 */
- setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14);
+ setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14);
}
/*
@@ -153,14 +153,14 @@ static void geode_configure(void)
local_irq_save(flags);
/* Suspend on halt power saving and enable #SUSP pin */
- setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88);
+ setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88);
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
/* FPU fast, DTE cache, Mem bypass */
- setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38);
+ setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38);
setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
set_cx86_memwb();
@@ -296,7 +296,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
/* GXm supports extended cpuid levels 'ala' AMD */
if (c->cpuid_level == 2) {
/* Enable cxMMX extensions (GX1 Datasheet 54) */
- setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1);
+ setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1);
/*
* GXm : 0x30 ... 0x5f GXm datasheet 51
@@ -319,7 +319,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
if (dir1 > 7) {
dir0_msn++; /* M II */
/* Enable MMX extensions (App note 108) */
- setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1);
+ setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);
} else {
/* A 6x86MX - it has the bug. */
set_cpu_bug(c, X86_BUG_COMA);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fc3c07fe7df5..3142fd7a9b32 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -611,8 +611,8 @@ static void init_intel_energy_perf(struct cpuinfo_x86 *c)
if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE)
return;
- pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
- pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
+ pr_info_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
+ pr_info_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
}
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index 3fed38812eea..6dd78d8235e4 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -48,3 +48,34 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
return NULL;
}
EXPORT_SYMBOL(x86_match_cpu);
+
+static const struct x86_cpu_desc *
+x86_match_cpu_with_stepping(const struct x86_cpu_desc *match)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+ const struct x86_cpu_desc *m;
+
+ for (m = match; m->x86_family | m->x86_model; m++) {
+ if (c->x86_vendor != m->x86_vendor)
+ continue;
+ if (c->x86 != m->x86_family)
+ continue;
+ if (c->x86_model != m->x86_model)
+ continue;
+ if (c->x86_stepping != m->x86_stepping)
+ continue;
+ return m;
+ }
+ return NULL;
+}
+
+bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table)
+{
+ const struct x86_cpu_desc *res = x86_match_cpu_with_stepping(table);
+
+ if (!res || res->x86_microcode_rev > boot_cpu_data.microcode)
+ return false;
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(x86_cpu_has_min_microcode_rev);
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mce/Makefile
index bcc7c54c7041..9f020c994154 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mce/Makefile
@@ -1,14 +1,16 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y = mce.o mce-severity.o mce-genpool.o
+obj-y = core.o severity.o genpool.o
obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
-obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
-obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
+obj-$(CONFIG_X86_MCE_INTEL) += intel.o
+obj-$(CONFIG_X86_MCE_AMD) += amd.o
obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
+
+mce-inject-y := inject.o
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
-obj-$(CONFIG_ACPI_APEI) += mce-apei.o
+obj-$(CONFIG_ACPI_APEI) += apei.o
obj-$(CONFIG_X86_MCELOG_LEGACY) += dev-mcelog.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mce/amd.c
index e12454e21b8a..e64de5149e50 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -23,12 +23,13 @@
#include <linux/string.h>
#include <asm/amd_nb.h>
+#include <asm/traps.h>
#include <asm/apic.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/trace/irq_vectors.h>
-#include "mce-internal.h"
+#include "internal.h"
#define NR_BLOCKS 5
#define THRESHOLD_MAX 0xFFF
@@ -87,11 +88,17 @@ static struct smca_bank_name smca_names[] = {
[SMCA_FP] = { "floating_point", "Floating Point Unit" },
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
[SMCA_CS] = { "coherent_slave", "Coherent Slave" },
+ [SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" },
[SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
[SMCA_UMC] = { "umc", "Unified Memory Controller" },
[SMCA_PB] = { "param_block", "Parameter Block" },
[SMCA_PSP] = { "psp", "Platform Security Processor" },
+ [SMCA_PSP_V2] = { "psp", "Platform Security Processor" },
[SMCA_SMU] = { "smu", "System Management Unit" },
+ [SMCA_SMU_V2] = { "smu", "System Management Unit" },
+ [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" },
+ [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" },
+ [SMCA_PCIE] = { "pcie", "PCI Express Unit" },
};
static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
@@ -99,7 +106,7 @@ static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
[0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 }
};
-const char *smca_get_name(enum smca_bank_types t)
+static const char *smca_get_name(enum smca_bank_types t)
{
if (t >= N_SMCA_BANK_TYPES)
return NULL;
@@ -137,30 +144,42 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
/* ZN Core (HWID=0xB0) MCA types */
- { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
+ { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
/* HWID 0xB0 MCATYPE 0x4 is Reserved */
- { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF },
+ { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0xFFF },
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
/* Data Fabric MCA types */
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
- { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF },
+ { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0x1F },
+ { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
/* Unified Memory Controller MCA type */
- { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F },
+ { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0xFF },
/* Parameter Block MCA type */
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
/* Platform Security Processor MCA type */
{ SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
+ { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1), 0x3FFFF },
/* System Management Unit MCA type */
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
+ { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1), 0x7FF },
+
+ /* Microprocessor 5 Unit MCA type */
+ { SMCA_MP5, HWID_MCATYPE(0x01, 0x2), 0x3FF },
+
+ /* Northbridge IO Unit MCA type */
+ { SMCA_NBIO, HWID_MCATYPE(0x18, 0x0), 0x1F },
+
+ /* PCI Express Unit MCA type */
+ { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0), 0x1F },
};
struct smca_bank smca_banks[MAX_NR_BANKS];
@@ -544,6 +563,40 @@ out:
return offset;
}
+/*
+ * Turn off MC4_MISC thresholding banks on all family 0x15 models since
+ * they're not supported there.
+ */
+void disable_err_thresholding(struct cpuinfo_x86 *c)
+{
+ int i;
+ u64 hwcr;
+ bool need_toggle;
+ u32 msrs[] = {
+ 0x00000413, /* MC4_MISC0 */
+ 0xc0000408, /* MC4_MISC1 */
+ };
+
+ if (c->x86 != 0x15)
+ return;
+
+ rdmsrl(MSR_K7_HWCR, hwcr);
+
+ /* McStatusWrEn has to be set */
+ need_toggle = !(hwcr & BIT(18));
+
+ if (need_toggle)
+ wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
+
+ /* Clear CntP bit safely */
+ for (i = 0; i < ARRAY_SIZE(msrs); i++)
+ msr_clear_bit(msrs[i], 62);
+
+ /* restore old settings */
+ if (need_toggle)
+ wrmsrl(MSR_K7_HWCR, hwcr);
+}
+
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
@@ -551,6 +604,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
unsigned int bank, block, cpu = smp_processor_id();
int offset = -1;
+ disable_err_thresholding(c);
+
for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (mce_flags.smca)
smca_configure(bank, cpu);
@@ -824,7 +879,7 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
mce_log(&m);
}
-asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void)
+asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(struct pt_regs *regs)
{
entering_irq();
trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mce/apei.c
index 2eee85379689..c038e5c00a59 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -36,7 +36,7 @@
#include <acpi/ghes.h>
#include <asm/mce.h>
-#include "mce-internal.h"
+#include "internal.h"
void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
{
@@ -64,11 +64,11 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
#define CPER_CREATOR_MCE \
- UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
- 0x64, 0x90, 0xb8, 0x9d)
+ GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
+ 0x64, 0x90, 0xb8, 0x9d)
#define CPER_SECTION_TYPE_MCE \
- UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
- 0x04, 0x4a, 0x38, 0xfc)
+ GUID_INIT(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
+ 0x04, 0x4a, 0x38, 0xfc)
/*
* CPER specification (in UEFI specification 2.3 appendix N) requires
@@ -135,7 +135,7 @@ retry:
goto out;
/* try to skip other type records in storage */
else if (rc != sizeof(rcd) ||
- uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
+ !guid_equal(&rcd.hdr.creator_id, &CPER_CREATOR_MCE))
goto retry;
memcpy(m, &rcd.mce, sizeof(*m));
rc = sizeof(*m);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mce/core.c
index 36d2696c9563..b7fb541a4873 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -8,8 +8,6 @@
* Author: Andi Kleen
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
#include <linux/thread_info.h>
#include <linux/capability.h>
#include <linux/miscdevice.h>
@@ -52,7 +50,7 @@
#include <asm/msr.h>
#include <asm/reboot.h>
-#include "mce-internal.h"
+#include "internal.h"
static DEFINE_MUTEX(mce_log_mutex);
@@ -686,7 +684,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
* errors here. However this would be quite problematic --
* we would need to reimplement the Monarch handling and
* it would mess up the exclusion between exception handler
- * and poll hander -- * so we skip this for now.
+ * and poll handler -- * so we skip this for now.
* These cases should not happen anyways, or only when the CPU
* is already totally * confused. In this case it's likely it will
* not fully execute the machine check handler either.
@@ -786,6 +784,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
quirk_no_way_out(i, m, regs);
if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+ m->bank = i;
mce_read_aux(m, i);
*msg = tmp;
return 1;
@@ -1613,36 +1612,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
if (c->x86 == 0x15 && c->x86_model <= 0xf)
mce_flags.overflow_recov = 1;
- /*
- * Turn off MC4_MISC thresholding banks on those models since
- * they're not supported there.
- */
- if (c->x86 == 0x15 &&
- (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
- int i;
- u64 hwcr;
- bool need_toggle;
- u32 msrs[] = {
- 0x00000413, /* MC4_MISC0 */
- 0xc0000408, /* MC4_MISC1 */
- };
-
- rdmsrl(MSR_K7_HWCR, hwcr);
-
- /* McStatusWrEn has to be set */
- need_toggle = !(hwcr & BIT(18));
-
- if (need_toggle)
- wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
-
- /* Clear CntP bit safely */
- for (i = 0; i < ARRAY_SIZE(msrs); i++)
- msr_clear_bit(msrs[i], 62);
-
- /* restore old settings */
- if (need_toggle)
- wrmsrl(MSR_K7_HWCR, hwcr);
- }
}
if (c->x86_vendor == X86_VENDOR_INTEL) {
diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c
index 27f394ac983f..9690ec5c8051 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c
@@ -8,14 +8,12 @@
* Author: Andi Kleen
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
#include <linux/miscdevice.h>
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/poll.h>
-#include "mce-internal.h"
+#include "internal.h"
static BLOCKING_NOTIFIER_HEAD(mce_injector_chain);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mce/genpool.c
index 217cd4449bc9..3395549c51d3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c
+++ b/arch/x86/kernel/cpu/mce/genpool.c
@@ -10,7 +10,7 @@
#include <linux/mm.h>
#include <linux/genalloc.h>
#include <linux/llist.h>
-#include "mce-internal.h"
+#include "internal.h"
/*
* printk() is not safe in MCE context. This is a lock-less memory allocator
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 1fc424c40a31..8492ef7d9015 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -38,7 +38,7 @@
#include <asm/nmi.h>
#include <asm/smp.h>
-#include "mce-internal.h"
+#include "internal.h"
/*
* Collect all the MCi_XXX settings
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mce/intel.c
index d05be307d081..e43eb6732630 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -18,7 +18,7 @@
#include <asm/msr.h>
#include <asm/mce.h>
-#include "mce-internal.h"
+#include "internal.h"
/*
* Support for Intel Correct Machine Check Interrupts. This allows
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mce/internal.h
index ceb67cd5918f..af5eab1e65e2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -2,6 +2,9 @@
#ifndef __X86_MCE_INTERNAL_H__
#define __X86_MCE_INTERNAL_H__
+#undef pr_fmt
+#define pr_fmt(fmt) "mce: " fmt
+
#include <linux/device.h>
#include <asm/mce.h>
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mce/p5.c
index 5cddf831720f..4ae6df556526 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mce/p5.c
@@ -14,6 +14,8 @@
#include <asm/mce.h>
#include <asm/msr.h>
+#include "internal.h"
+
/* By default disabled */
int mce_p5_enabled __read_mostly;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 44396d521987..65201e180fe0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -16,7 +16,7 @@
#include <asm/mce.h>
#include <linux/uaccess.h>
-#include "mce-internal.h"
+#include "internal.h"
/*
* Grade an mce by severity. In general the most severe ones are processed
@@ -165,6 +165,11 @@ static struct severity {
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
KERNEL
),
+ MCESEV(
+ PANIC, "Instruction fetch error in kernel",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
+ KERNEL
+ ),
#endif
MCESEV(
PANIC, "Action required: unknown MCACOD",
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c
index 2da67b70ba98..10a3b0599300 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mce/therm_throt.c
@@ -25,11 +25,14 @@
#include <linux/cpu.h>
#include <asm/processor.h>
+#include <asm/traps.h>
#include <asm/apic.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/trace/irq_vectors.h>
+#include "internal.h"
+
/* How long to wait between reporting thermal events */
#define CHECK_INTERVAL (300 * HZ)
@@ -390,7 +393,7 @@ static void unexpected_thermal_interrupt(void)
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
-asmlinkage __visible void __irq_entry smp_thermal_interrupt(struct pt_regs *r)
+asmlinkage __visible void __irq_entry smp_thermal_interrupt(struct pt_regs *regs)
{
entering_irq();
trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mce/threshold.c
index 2b584b319eff..28812cc15300 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mce/threshold.c
@@ -6,10 +6,13 @@
#include <linux/kernel.h>
#include <asm/irq_vectors.h>
+#include <asm/traps.h>
#include <asm/apic.h>
#include <asm/mce.h>
#include <asm/trace/irq_vectors.h>
+#include "internal.h"
+
static void default_threshold_interrupt(void)
{
pr_err("Unexpected threshold interrupt at vector %x\n",
@@ -18,7 +21,7 @@ static void default_threshold_interrupt(void)
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
-asmlinkage __visible void __irq_entry smp_threshold_interrupt(void)
+asmlinkage __visible void __irq_entry smp_threshold_interrupt(struct pt_regs *regs)
{
entering_irq();
trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mce/winchip.c
index 3b45b270a865..a30ea13cccc2 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mce/winchip.c
@@ -13,6 +13,8 @@
#include <asm/mce.h>
#include <asm/msr.h>
+#include "internal.h"
+
/* Machine check handler for WinChip C6: */
static void winchip_machine_check(struct pt_regs *regs, long error_code)
{
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 07b5fc00b188..e1f3ba19ba54 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -5,7 +5,7 @@
* CPUs and later.
*
* Copyright (C) 2008-2011 Advanced Micro Devices Inc.
- * 2013-2016 Borislav Petkov <bp@alien8.de>
+ * 2013-2018 Borislav Petkov <bp@alien8.de>
*
* Author: Peter Oruba <peter.oruba@amd.com>
*
@@ -38,7 +38,10 @@
#include <asm/cpu.h>
#include <asm/msr.h>
-static struct equiv_cpu_entry *equiv_cpu_table;
+static struct equiv_cpu_table {
+ unsigned int num_entries;
+ struct equiv_cpu_entry *entry;
+} equiv_table;
/*
* This points to the current valid container of microcode patches which we will
@@ -63,13 +66,225 @@ static u8 amd_ucode_patch[PATCH_MAX_SIZE];
static const char
ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin";
-static u16 find_equiv_id(struct equiv_cpu_entry *equiv_table, u32 sig)
+static u16 find_equiv_id(struct equiv_cpu_table *et, u32 sig)
{
- for (; equiv_table && equiv_table->installed_cpu; equiv_table++) {
- if (sig == equiv_table->installed_cpu)
- return equiv_table->equiv_cpu;
+ unsigned int i;
+
+ if (!et || !et->num_entries)
+ return 0;
+
+ for (i = 0; i < et->num_entries; i++) {
+ struct equiv_cpu_entry *e = &et->entry[i];
+
+ if (sig == e->installed_cpu)
+ return e->equiv_cpu;
+
+ e++;
+ }
+ return 0;
+}
+
+/*
+ * Check whether there is a valid microcode container file at the beginning
+ * of @buf of size @buf_size. Set @early to use this function in the early path.
+ */
+static bool verify_container(const u8 *buf, size_t buf_size, bool early)
+{
+ u32 cont_magic;
+
+ if (buf_size <= CONTAINER_HDR_SZ) {
+ if (!early)
+ pr_debug("Truncated microcode container header.\n");
+
+ return false;
+ }
+
+ cont_magic = *(const u32 *)buf;
+ if (cont_magic != UCODE_MAGIC) {
+ if (!early)
+ pr_debug("Invalid magic value (0x%08x).\n", cont_magic);
+
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Check whether there is a valid, non-truncated CPU equivalence table at the
+ * beginning of @buf of size @buf_size. Set @early to use this function in the
+ * early path.
+ */
+static bool verify_equivalence_table(const u8 *buf, size_t buf_size, bool early)
+{
+ const u32 *hdr = (const u32 *)buf;
+ u32 cont_type, equiv_tbl_len;
+
+ if (!verify_container(buf, buf_size, early))
+ return false;
+
+ cont_type = hdr[1];
+ if (cont_type != UCODE_EQUIV_CPU_TABLE_TYPE) {
+ if (!early)
+ pr_debug("Wrong microcode container equivalence table type: %u.\n",
+ cont_type);
+
+ return false;
+ }
+
+ buf_size -= CONTAINER_HDR_SZ;
+
+ equiv_tbl_len = hdr[2];
+ if (equiv_tbl_len < sizeof(struct equiv_cpu_entry) ||
+ buf_size < equiv_tbl_len) {
+ if (!early)
+ pr_debug("Truncated equivalence table.\n");
+
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Check whether there is a valid, non-truncated microcode patch section at the
+ * beginning of @buf of size @buf_size. Set @early to use this function in the
+ * early path.
+ *
+ * On success, @sh_psize returns the patch size according to the section header,
+ * to the caller.
+ */
+static bool
+__verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize, bool early)
+{
+ u32 p_type, p_size;
+ const u32 *hdr;
+
+ if (buf_size < SECTION_HDR_SIZE) {
+ if (!early)
+ pr_debug("Truncated patch section.\n");
+
+ return false;
+ }
+
+ hdr = (const u32 *)buf;
+ p_type = hdr[0];
+ p_size = hdr[1];
+
+ if (p_type != UCODE_UCODE_TYPE) {
+ if (!early)
+ pr_debug("Invalid type field (0x%x) in container file section header.\n",
+ p_type);
+
+ return false;
+ }
+
+ if (p_size < sizeof(struct microcode_header_amd)) {
+ if (!early)
+ pr_debug("Patch of size %u too short.\n", p_size);
+
+ return false;
+ }
+
+ *sh_psize = p_size;
+
+ return true;
+}
+
+/*
+ * Check whether the passed remaining file @buf_size is large enough to contain
+ * a patch of the indicated @sh_psize (and also whether this size does not
+ * exceed the per-family maximum). @sh_psize is the size read from the section
+ * header.
+ */
+static unsigned int __verify_patch_size(u8 family, u32 sh_psize, size_t buf_size)
+{
+ u32 max_size;
+
+ if (family >= 0x15)
+ return min_t(u32, sh_psize, buf_size);
+
+#define F1XH_MPB_MAX_SIZE 2048
+#define F14H_MPB_MAX_SIZE 1824
+
+ switch (family) {
+ case 0x10 ... 0x12:
+ max_size = F1XH_MPB_MAX_SIZE;
+ break;
+ case 0x14:
+ max_size = F14H_MPB_MAX_SIZE;
+ break;
+ default:
+ WARN(1, "%s: WTF family: 0x%x\n", __func__, family);
+ return 0;
+ break;
+ }
+
+ if (sh_psize > min_t(u32, buf_size, max_size))
+ return 0;
+
+ return sh_psize;
+}
+
+/*
+ * Verify the patch in @buf.
+ *
+ * Returns:
+ * negative: on error
+ * positive: patch is not for this family, skip it
+ * 0: success
+ */
+static int
+verify_patch(u8 family, const u8 *buf, size_t buf_size, u32 *patch_size, bool early)
+{
+ struct microcode_header_amd *mc_hdr;
+ unsigned int ret;
+ u32 sh_psize;
+ u16 proc_id;
+ u8 patch_fam;
+
+ if (!__verify_patch_section(buf, buf_size, &sh_psize, early))
+ return -1;
+
+ /*
+ * The section header length is not included in this indicated size
+ * but is present in the leftover file length so we need to subtract
+ * it before passing this value to the function below.
+ */
+ buf_size -= SECTION_HDR_SIZE;
+
+ /*
+ * Check if the remaining buffer is big enough to contain a patch of
+ * size sh_psize, as the section claims.
+ */
+ if (buf_size < sh_psize) {
+ if (!early)
+ pr_debug("Patch of size %u truncated.\n", sh_psize);
+
+ return -1;
+ }
+
+ ret = __verify_patch_size(family, sh_psize, buf_size);
+ if (!ret) {
+ if (!early)
+ pr_debug("Per-family patch size mismatch.\n");
+ return -1;
+ }
+
+ *patch_size = sh_psize;
+
+ mc_hdr = (struct microcode_header_amd *)(buf + SECTION_HDR_SIZE);
+ if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
+ if (!early)
+ pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n", mc_hdr->patch_id);
+ return -1;
}
+ proc_id = mc_hdr->processor_rev_id;
+ patch_fam = 0xf + (proc_id >> 12);
+ if (patch_fam != family)
+ return 1;
+
return 0;
}
@@ -80,26 +295,28 @@ static u16 find_equiv_id(struct equiv_cpu_entry *equiv_table, u32 sig)
* Returns the amount of bytes consumed while scanning. @desc contains all the
* data we're going to use in later stages of the application.
*/
-static ssize_t parse_container(u8 *ucode, ssize_t size, struct cont_desc *desc)
+static size_t parse_container(u8 *ucode, size_t size, struct cont_desc *desc)
{
- struct equiv_cpu_entry *eq;
- ssize_t orig_size = size;
+ struct equiv_cpu_table table;
+ size_t orig_size = size;
u32 *hdr = (u32 *)ucode;
u16 eq_id;
u8 *buf;
- /* Am I looking at an equivalence table header? */
- if (hdr[0] != UCODE_MAGIC ||
- hdr[1] != UCODE_EQUIV_CPU_TABLE_TYPE ||
- hdr[2] == 0)
- return CONTAINER_HDR_SZ;
+ if (!verify_equivalence_table(ucode, size, true))
+ return 0;
buf = ucode;
- eq = (struct equiv_cpu_entry *)(buf + CONTAINER_HDR_SZ);
+ table.entry = (struct equiv_cpu_entry *)(buf + CONTAINER_HDR_SZ);
+ table.num_entries = hdr[2] / sizeof(struct equiv_cpu_entry);
- /* Find the equivalence ID of our CPU in this table: */
- eq_id = find_equiv_id(eq, desc->cpuid_1_eax);
+ /*
+ * Find the equivalence ID of our CPU in this table. Even if this table
+ * doesn't contain a patch for the CPU, scan through the whole container
+ * so that it can be skipped in case there are other containers appended.
+ */
+ eq_id = find_equiv_id(&table, desc->cpuid_1_eax);
buf += hdr[2] + CONTAINER_HDR_SZ;
size -= hdr[2] + CONTAINER_HDR_SZ;
@@ -111,29 +328,29 @@ static ssize_t parse_container(u8 *ucode, ssize_t size, struct cont_desc *desc)
while (size > 0) {
struct microcode_amd *mc;
u32 patch_size;
+ int ret;
+
+ ret = verify_patch(x86_family(desc->cpuid_1_eax), buf, size, &patch_size, true);
+ if (ret < 0) {
+ /*
+ * Patch verification failed, skip to the next
+ * container, if there's one:
+ */
+ goto out;
+ } else if (ret > 0) {
+ goto skip;
+ }
- hdr = (u32 *)buf;
-
- if (hdr[0] != UCODE_UCODE_TYPE)
- break;
-
- /* Sanity-check patch size. */
- patch_size = hdr[1];
- if (patch_size > PATCH_MAX_SIZE)
- break;
-
- /* Skip patch section header: */
- buf += SECTION_HDR_SIZE;
- size -= SECTION_HDR_SIZE;
-
- mc = (struct microcode_amd *)buf;
+ mc = (struct microcode_amd *)(buf + SECTION_HDR_SIZE);
if (eq_id == mc->hdr.processor_rev_id) {
desc->psize = patch_size;
desc->mc = mc;
}
- buf += patch_size;
- size -= patch_size;
+skip:
+ /* Skip patch section header too: */
+ buf += patch_size + SECTION_HDR_SIZE;
+ size -= patch_size + SECTION_HDR_SIZE;
}
/*
@@ -150,6 +367,7 @@ static ssize_t parse_container(u8 *ucode, ssize_t size, struct cont_desc *desc)
return 0;
}
+out:
return orig_size - size;
}
@@ -159,15 +377,18 @@ static ssize_t parse_container(u8 *ucode, ssize_t size, struct cont_desc *desc)
*/
static void scan_containers(u8 *ucode, size_t size, struct cont_desc *desc)
{
- ssize_t rem = size;
-
- while (rem >= 0) {
- ssize_t s = parse_container(ucode, rem, desc);
+ while (size) {
+ size_t s = parse_container(ucode, size, desc);
if (!s)
return;
- ucode += s;
- rem -= s;
+ /* catch wraparound */
+ if (size >= s) {
+ ucode += s;
+ size -= s;
+ } else {
+ return;
+ }
}
}
@@ -364,21 +585,7 @@ void reload_ucode_amd(void)
static u16 __find_equiv_id(unsigned int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- return find_equiv_id(equiv_cpu_table, uci->cpu_sig.sig);
-}
-
-static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu)
-{
- int i = 0;
-
- BUG_ON(!equiv_cpu_table);
-
- while (equiv_cpu_table[i].equiv_cpu != 0) {
- if (equiv_cpu == equiv_cpu_table[i].equiv_cpu)
- return equiv_cpu_table[i].installed_cpu;
- i++;
- }
- return 0;
+ return find_equiv_id(&equiv_table, uci->cpu_sig.sig);
}
/*
@@ -461,43 +668,6 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
return 0;
}
-static unsigned int verify_patch_size(u8 family, u32 patch_size,
- unsigned int size)
-{
- u32 max_size;
-
-#define F1XH_MPB_MAX_SIZE 2048
-#define F14H_MPB_MAX_SIZE 1824
-#define F15H_MPB_MAX_SIZE 4096
-#define F16H_MPB_MAX_SIZE 3458
-#define F17H_MPB_MAX_SIZE 3200
-
- switch (family) {
- case 0x14:
- max_size = F14H_MPB_MAX_SIZE;
- break;
- case 0x15:
- max_size = F15H_MPB_MAX_SIZE;
- break;
- case 0x16:
- max_size = F16H_MPB_MAX_SIZE;
- break;
- case 0x17:
- max_size = F17H_MPB_MAX_SIZE;
- break;
- default:
- max_size = F1XH_MPB_MAX_SIZE;
- break;
- }
-
- if (patch_size > min_t(u32, size, max_size)) {
- pr_err("patch size mismatch\n");
- return 0;
- }
-
- return patch_size;
-}
-
static enum ucode_state apply_microcode_amd(int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -548,34 +718,34 @@ out:
return ret;
}
-static int install_equiv_cpu_table(const u8 *buf)
+static size_t install_equiv_cpu_table(const u8 *buf, size_t buf_size)
{
- unsigned int *ibuf = (unsigned int *)buf;
- unsigned int type = ibuf[1];
- unsigned int size = ibuf[2];
+ u32 equiv_tbl_len;
+ const u32 *hdr;
- if (type != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
- pr_err("empty section/"
- "invalid type field in container file section header\n");
- return -EINVAL;
- }
+ if (!verify_equivalence_table(buf, buf_size, false))
+ return 0;
+
+ hdr = (const u32 *)buf;
+ equiv_tbl_len = hdr[2];
- equiv_cpu_table = vmalloc(size);
- if (!equiv_cpu_table) {
+ equiv_table.entry = vmalloc(equiv_tbl_len);
+ if (!equiv_table.entry) {
pr_err("failed to allocate equivalent CPU table\n");
- return -ENOMEM;
+ return 0;
}
- memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size);
+ memcpy(equiv_table.entry, buf + CONTAINER_HDR_SZ, equiv_tbl_len);
+ equiv_table.num_entries = equiv_tbl_len / sizeof(struct equiv_cpu_entry);
/* add header length */
- return size + CONTAINER_HDR_SZ;
+ return equiv_tbl_len + CONTAINER_HDR_SZ;
}
static void free_equiv_cpu_table(void)
{
- vfree(equiv_cpu_table);
- equiv_cpu_table = NULL;
+ vfree(equiv_table.entry);
+ memset(&equiv_table, 0, sizeof(equiv_table));
}
static void cleanup(void)
@@ -585,47 +755,23 @@ static void cleanup(void)
}
/*
- * We return the current size even if some of the checks failed so that
+ * Return a non-negative value even if some of the checks failed so that
* we can skip over the next patch. If we return a negative value, we
* signal a grave error like a memory allocation has failed and the
* driver cannot continue functioning normally. In such cases, we tear
* down everything we've used up so far and exit.
*/
-static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover)
+static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover,
+ unsigned int *patch_size)
{
struct microcode_header_amd *mc_hdr;
struct ucode_patch *patch;
- unsigned int patch_size, crnt_size, ret;
- u32 proc_fam;
u16 proc_id;
+ int ret;
- patch_size = *(u32 *)(fw + 4);
- crnt_size = patch_size + SECTION_HDR_SIZE;
- mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE);
- proc_id = mc_hdr->processor_rev_id;
-
- proc_fam = find_cpu_family_by_equiv_cpu(proc_id);
- if (!proc_fam) {
- pr_err("No patch family for equiv ID: 0x%04x\n", proc_id);
- return crnt_size;
- }
-
- /* check if patch is for the current family */
- proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff);
- if (proc_fam != family)
- return crnt_size;
-
- if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
- pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n",
- mc_hdr->patch_id);
- return crnt_size;
- }
-
- ret = verify_patch_size(family, patch_size, leftover);
- if (!ret) {
- pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id);
- return crnt_size;
- }
+ ret = verify_patch(family, fw, leftover, patch_size, false);
+ if (ret)
+ return ret;
patch = kzalloc(sizeof(*patch), GFP_KERNEL);
if (!patch) {
@@ -633,13 +779,16 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover)
return -EINVAL;
}
- patch->data = kmemdup(fw + SECTION_HDR_SIZE, patch_size, GFP_KERNEL);
+ patch->data = kmemdup(fw + SECTION_HDR_SIZE, *patch_size, GFP_KERNEL);
if (!patch->data) {
pr_err("Patch data allocation failure.\n");
kfree(patch);
return -EINVAL;
}
+ mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE);
+ proc_id = mc_hdr->processor_rev_id;
+
INIT_LIST_HEAD(&patch->plist);
patch->patch_id = mc_hdr->patch_id;
patch->equiv_cpu = proc_id;
@@ -650,39 +799,38 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover)
/* ... and add to cache. */
update_cache(patch);
- return crnt_size;
+ return 0;
}
static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
size_t size)
{
- enum ucode_state ret = UCODE_ERROR;
- unsigned int leftover;
u8 *fw = (u8 *)data;
- int crnt_size = 0;
- int offset;
+ size_t offset;
- offset = install_equiv_cpu_table(data);
- if (offset < 0) {
- pr_err("failed to create equivalent cpu table\n");
- return ret;
- }
- fw += offset;
- leftover = size - offset;
+ offset = install_equiv_cpu_table(data, size);
+ if (!offset)
+ return UCODE_ERROR;
+
+ fw += offset;
+ size -= offset;
if (*(u32 *)fw != UCODE_UCODE_TYPE) {
pr_err("invalid type field in container file section header\n");
free_equiv_cpu_table();
- return ret;
+ return UCODE_ERROR;
}
- while (leftover) {
- crnt_size = verify_and_add_patch(family, fw, leftover);
- if (crnt_size < 0)
- return ret;
+ while (size > 0) {
+ unsigned int crnt_size = 0;
+ int ret;
- fw += crnt_size;
- leftover -= crnt_size;
+ ret = verify_and_add_patch(family, fw, size, &crnt_size);
+ if (ret < 0)
+ return UCODE_ERROR;
+
+ fw += crnt_size + SECTION_HDR_SIZE;
+ size -= (crnt_size + SECTION_HDR_SIZE);
}
return UCODE_OK;
@@ -707,7 +855,7 @@ load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
if (!p) {
return ret;
} else {
- if (boot_cpu_data.microcode == p->patch_id)
+ if (boot_cpu_data.microcode >= p->patch_id)
return ret;
ret = UCODE_NEW;
@@ -761,10 +909,8 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
}
ret = UCODE_ERROR;
- if (*(u32 *)fw->data != UCODE_MAGIC) {
- pr_err("invalid magic value (0x%08x)\n", *(u32 *)fw->data);
+ if (!verify_container(fw->data, fw->size, false))
goto fw_release;
- }
ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size);
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 2637ff09d6a0..5260185cbf7b 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -434,9 +434,10 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos)
{
ssize_t ret = -EINVAL;
+ unsigned long nr_pages = totalram_pages();
- if ((len >> PAGE_SHIFT) > totalram_pages) {
- pr_err("too much data (max %ld pages)\n", totalram_pages);
+ if ((len >> PAGE_SHIFT) > nr_pages) {
+ pr_err("too much data (max %ld pages)\n", nr_pages);
return ret;
}
@@ -607,6 +608,8 @@ static int microcode_reload_late(void)
if (ret > 0)
microcode_check();
+ pr_info("Reload completed, microcode revision: 0x%x\n", boot_cpu_data.microcode);
+
return ret;
}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index e81a2db42df7..3fa238a137d2 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -328,6 +328,18 @@ static void __init ms_hyperv_init_platform(void)
# ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
# endif
+
+ /*
+ * Hyper-V doesn't provide irq remapping for IO-APIC. To enable x2apic,
+ * set x2apic destination mode to physcial mode when x2apic is available
+ * and Hyper-V IOMMU driver makes sure cpus assigned with IO-APIC irqs
+ * have 8-bit APIC id.
+ */
+# ifdef CONFIG_X86_X2APIC
+ if (x2apic_supported())
+ x2apic_phys = 1;
+# endif
+
#endif
}
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 3668c5df90c6..5bd011737272 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -296,7 +296,7 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
unsigned long sizek)
{
unsigned long hole_basek, hole_sizek;
- unsigned long second_basek, second_sizek;
+ unsigned long second_sizek;
unsigned long range0_basek, range0_sizek;
unsigned long range_basek, range_sizek;
unsigned long chunk_sizek;
@@ -304,7 +304,6 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
hole_basek = 0;
hole_sizek = 0;
- second_basek = 0;
second_sizek = 0;
chunk_sizek = state->chunk_sizek;
gran_sizek = state->gran_sizek;
diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
new file mode 100644
index 000000000000..4a06c37b9cf1
--- /dev/null
+++ b/arch/x86/kernel/cpu/resctrl/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o
+obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o pseudo_lock.o
+CFLAGS_pseudo_lock.o = -I$(src)
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/resctrl/core.c
index 44272b7107ad..c3a9dc63edf2 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -22,7 +22,7 @@
* Software Developer Manual June 2016, volume 3, section 17.17.
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define pr_fmt(fmt) "resctrl: " fmt
#include <linux/slab.h>
#include <linux/err.h>
@@ -30,22 +30,19 @@
#include <linux/cpuhotplug.h>
#include <asm/intel-family.h>
-#include <asm/intel_rdt_sched.h>
-#include "intel_rdt.h"
-
-#define MBA_IS_LINEAR 0x4
-#define MBA_MAX_MBPS U32_MAX
+#include <asm/resctrl_sched.h>
+#include "internal.h"
/* Mutex to protect rdtgroup access. */
DEFINE_MUTEX(rdtgroup_mutex);
/*
- * The cached intel_pqr_state is strictly per CPU and can never be
+ * The cached resctrl_pqr_state is strictly per CPU and can never be
* updated from a remote CPU. Functions which modify the state
* are called with interrupts disabled and no preemption, which
* is sufficient for the protection.
*/
-DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
+DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state);
/*
* Used to store the max resource name width and max resource data width
@@ -60,9 +57,13 @@ int max_name_width, max_data_width;
bool rdt_alloc_capable;
static void
-mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
+mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m,
+ struct rdt_resource *r);
static void
cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
+static void
+mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m,
+ struct rdt_resource *r);
#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)
@@ -72,7 +73,7 @@ struct rdt_resource rdt_resources_all[] = {
.rid = RDT_RESOURCE_L3,
.name = "L3",
.domains = domain_init(RDT_RESOURCE_L3),
- .msr_base = IA32_L3_CBM_BASE,
+ .msr_base = MSR_IA32_L3_CBM_BASE,
.msr_update = cat_wrmsr,
.cache_level = 3,
.cache = {
@@ -89,7 +90,7 @@ struct rdt_resource rdt_resources_all[] = {
.rid = RDT_RESOURCE_L3DATA,
.name = "L3DATA",
.domains = domain_init(RDT_RESOURCE_L3DATA),
- .msr_base = IA32_L3_CBM_BASE,
+ .msr_base = MSR_IA32_L3_CBM_BASE,
.msr_update = cat_wrmsr,
.cache_level = 3,
.cache = {
@@ -106,7 +107,7 @@ struct rdt_resource rdt_resources_all[] = {
.rid = RDT_RESOURCE_L3CODE,
.name = "L3CODE",
.domains = domain_init(RDT_RESOURCE_L3CODE),
- .msr_base = IA32_L3_CBM_BASE,
+ .msr_base = MSR_IA32_L3_CBM_BASE,
.msr_update = cat_wrmsr,
.cache_level = 3,
.cache = {
@@ -123,7 +124,7 @@ struct rdt_resource rdt_resources_all[] = {
.rid = RDT_RESOURCE_L2,
.name = "L2",
.domains = domain_init(RDT_RESOURCE_L2),
- .msr_base = IA32_L2_CBM_BASE,
+ .msr_base = MSR_IA32_L2_CBM_BASE,
.msr_update = cat_wrmsr,
.cache_level = 2,
.cache = {
@@ -140,7 +141,7 @@ struct rdt_resource rdt_resources_all[] = {
.rid = RDT_RESOURCE_L2DATA,
.name = "L2DATA",
.domains = domain_init(RDT_RESOURCE_L2DATA),
- .msr_base = IA32_L2_CBM_BASE,
+ .msr_base = MSR_IA32_L2_CBM_BASE,
.msr_update = cat_wrmsr,
.cache_level = 2,
.cache = {
@@ -157,7 +158,7 @@ struct rdt_resource rdt_resources_all[] = {
.rid = RDT_RESOURCE_L2CODE,
.name = "L2CODE",
.domains = domain_init(RDT_RESOURCE_L2CODE),
- .msr_base = IA32_L2_CBM_BASE,
+ .msr_base = MSR_IA32_L2_CBM_BASE,
.msr_update = cat_wrmsr,
.cache_level = 2,
.cache = {
@@ -174,10 +175,7 @@ struct rdt_resource rdt_resources_all[] = {
.rid = RDT_RESOURCE_MBA,
.name = "MB",
.domains = domain_init(RDT_RESOURCE_MBA),
- .msr_base = IA32_MBA_THRTL_BASE,
- .msr_update = mba_wrmsr,
.cache_level = 3,
- .parse_ctrlval = parse_bw,
.format_str = "%d=%*u",
.fflags = RFTYPE_RES_MB,
},
@@ -211,9 +209,10 @@ static inline void cache_alloc_hsw_probe(void)
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
u32 l, h, max_cbm = BIT_MASK(20) - 1;
- if (wrmsr_safe(IA32_L3_CBM_BASE, max_cbm, 0))
+ if (wrmsr_safe(MSR_IA32_L3_CBM_BASE, max_cbm, 0))
return;
- rdmsr(IA32_L3_CBM_BASE, l, h);
+
+ rdmsr(MSR_IA32_L3_CBM_BASE, l, h);
/* If all the bits were set in MSR, return success */
if (l != max_cbm)
@@ -259,7 +258,7 @@ static inline bool rdt_get_mb_table(struct rdt_resource *r)
return false;
}
-static bool rdt_get_mem_config(struct rdt_resource *r)
+static bool __get_mem_config_intel(struct rdt_resource *r)
{
union cpuid_0x10_3_eax eax;
union cpuid_0x10_x_edx edx;
@@ -285,6 +284,30 @@ static bool rdt_get_mem_config(struct rdt_resource *r)
return true;
}
+static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
+{
+ union cpuid_0x10_3_eax eax;
+ union cpuid_0x10_x_edx edx;
+ u32 ebx, ecx;
+
+ cpuid_count(0x80000020, 1, &eax.full, &ebx, &ecx, &edx.full);
+ r->num_closid = edx.split.cos_max + 1;
+ r->default_ctrl = MAX_MBA_BW_AMD;
+
+ /* AMD does not use delay */
+ r->membw.delay_linear = false;
+
+ r->membw.min_bw = 0;
+ r->membw.bw_gran = 1;
+ /* Max value is 2048, Data width should be 4 in decimal */
+ r->data_width = 4;
+
+ r->alloc_capable = true;
+ r->alloc_enabled = true;
+
+ return true;
+}
+
static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
{
union cpuid_0x10_1_eax eax;
@@ -344,6 +367,15 @@ static int get_cache_id(int cpu, int level)
return -1;
}
+static void
+mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
+{
+ unsigned int i;
+
+ for (i = m->low; i < m->high; i++)
+ wrmsrl(r->msr_base + i, d->ctrl_val[i]);
+}
+
/*
* Map the memory b/w percentage value to delay values
* that can be written to QOS_MSRs.
@@ -359,7 +391,8 @@ u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
}
static void
-mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
+mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m,
+ struct rdt_resource *r)
{
unsigned int i;
@@ -421,7 +454,7 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
struct list_head *l;
if (id < 0)
- return ERR_PTR(id);
+ return ERR_PTR(-ENODEV);
list_for_each(l, &r->domains) {
d = list_entry(l, struct rdt_domain, list);
@@ -639,7 +672,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
static void clear_closid_rmid(int cpu)
{
- struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+ struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
state->default_closid = 0;
state->default_rmid = 0;
@@ -648,7 +681,7 @@ static void clear_closid_rmid(int cpu)
wrmsr(IA32_PQR_ASSOC, 0, 0);
}
-static int intel_rdt_online_cpu(unsigned int cpu)
+static int resctrl_online_cpu(unsigned int cpu)
{
struct rdt_resource *r;
@@ -674,7 +707,7 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
}
}
-static int intel_rdt_offline_cpu(unsigned int cpu)
+static int resctrl_offline_cpu(unsigned int cpu)
{
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
@@ -794,6 +827,19 @@ static bool __init rdt_cpu_has(int flag)
return ret;
}
+static __init bool get_mem_config(void)
+{
+ if (!rdt_cpu_has(X86_FEATURE_MBA))
+ return false;
+
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ return __get_mem_config_intel(&rdt_resources_all[RDT_RESOURCE_MBA]);
+ else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ return __rdt_get_mem_config_amd(&rdt_resources_all[RDT_RESOURCE_MBA]);
+
+ return false;
+}
+
static __init bool get_rdt_alloc_resources(void)
{
bool ret = false;
@@ -818,10 +864,9 @@ static __init bool get_rdt_alloc_resources(void)
ret = true;
}
- if (rdt_cpu_has(X86_FEATURE_MBA)) {
- if (rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA]))
- ret = true;
- }
+ if (get_mem_config())
+ ret = true;
+
return ret;
}
@@ -840,7 +885,7 @@ static __init bool get_rdt_mon_resources(void)
return !rdt_get_mon_l3_config(&rdt_resources_all[RDT_RESOURCE_L3]);
}
-static __init void rdt_quirks(void)
+static __init void __check_quirks_intel(void)
{
switch (boot_cpu_data.x86_model) {
case INTEL_FAM6_HASWELL_X:
@@ -855,30 +900,91 @@ static __init void rdt_quirks(void)
}
}
+static __init void check_quirks(void)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ __check_quirks_intel();
+}
+
static __init bool get_rdt_resources(void)
{
- rdt_quirks();
rdt_alloc_capable = get_rdt_alloc_resources();
rdt_mon_capable = get_rdt_mon_resources();
return (rdt_mon_capable || rdt_alloc_capable);
}
+static __init void rdt_init_res_defs_intel(void)
+{
+ struct rdt_resource *r;
+
+ for_each_rdt_resource(r) {
+ if (r->rid == RDT_RESOURCE_L3 ||
+ r->rid == RDT_RESOURCE_L3DATA ||
+ r->rid == RDT_RESOURCE_L3CODE ||
+ r->rid == RDT_RESOURCE_L2 ||
+ r->rid == RDT_RESOURCE_L2DATA ||
+ r->rid == RDT_RESOURCE_L2CODE)
+ r->cbm_validate = cbm_validate_intel;
+ else if (r->rid == RDT_RESOURCE_MBA) {
+ r->msr_base = MSR_IA32_MBA_THRTL_BASE;
+ r->msr_update = mba_wrmsr_intel;
+ r->parse_ctrlval = parse_bw_intel;
+ }
+ }
+}
+
+static __init void rdt_init_res_defs_amd(void)
+{
+ struct rdt_resource *r;
+
+ for_each_rdt_resource(r) {
+ if (r->rid == RDT_RESOURCE_L3 ||
+ r->rid == RDT_RESOURCE_L3DATA ||
+ r->rid == RDT_RESOURCE_L3CODE ||
+ r->rid == RDT_RESOURCE_L2 ||
+ r->rid == RDT_RESOURCE_L2DATA ||
+ r->rid == RDT_RESOURCE_L2CODE)
+ r->cbm_validate = cbm_validate_amd;
+ else if (r->rid == RDT_RESOURCE_MBA) {
+ r->msr_base = MSR_IA32_MBA_BW_BASE;
+ r->msr_update = mba_wrmsr_amd;
+ r->parse_ctrlval = parse_bw_amd;
+ }
+ }
+}
+
+static __init void rdt_init_res_defs(void)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ rdt_init_res_defs_intel();
+ else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ rdt_init_res_defs_amd();
+}
+
static enum cpuhp_state rdt_online;
-static int __init intel_rdt_late_init(void)
+static int __init resctrl_late_init(void)
{
struct rdt_resource *r;
int state, ret;
+ /*
+ * Initialize functions(or definitions) that are different
+ * between vendors here.
+ */
+ rdt_init_res_defs();
+
+ check_quirks();
+
if (!get_rdt_resources())
return -ENODEV;
rdt_init_padding();
state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
- "x86/rdt/cat:online:",
- intel_rdt_online_cpu, intel_rdt_offline_cpu);
+ "x86/resctrl/cat:online:",
+ resctrl_online_cpu, resctrl_offline_cpu);
if (state < 0)
return state;
@@ -890,20 +996,20 @@ static int __init intel_rdt_late_init(void)
rdt_online = state;
for_each_alloc_capable_rdt_resource(r)
- pr_info("Intel RDT %s allocation detected\n", r->name);
+ pr_info("%s allocation detected\n", r->name);
for_each_mon_capable_rdt_resource(r)
- pr_info("Intel RDT %s monitoring detected\n", r->name);
+ pr_info("%s monitoring detected\n", r->name);
return 0;
}
-late_initcall(intel_rdt_late_init);
+late_initcall(resctrl_late_init);
-static void __exit intel_rdt_exit(void)
+static void __exit resctrl_exit(void)
{
cpuhp_remove_state(rdt_online);
rdtgroup_exit();
}
-__exitcall(intel_rdt_exit);
+__exitcall(resctrl_exit);
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index efa4a519f5e5..2dbd990a2eb7 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -27,7 +27,54 @@
#include <linux/kernfs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
-#include "intel_rdt.h"
+#include "internal.h"
+
+/*
+ * Check whether MBA bandwidth percentage value is correct. The value is
+ * checked against the minimum and maximum bandwidth values specified by
+ * the hardware. The allocated bandwidth percentage is rounded to the next
+ * control step available on the hardware.
+ */
+static bool bw_validate_amd(char *buf, unsigned long *data,
+ struct rdt_resource *r)
+{
+ unsigned long bw;
+ int ret;
+
+ ret = kstrtoul(buf, 10, &bw);
+ if (ret) {
+ rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf);
+ return false;
+ }
+
+ if (bw < r->membw.min_bw || bw > r->default_ctrl) {
+ rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
+ r->membw.min_bw, r->default_ctrl);
+ return false;
+ }
+
+ *data = roundup(bw, (unsigned long)r->membw.bw_gran);
+ return true;
+}
+
+int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d)
+{
+ unsigned long bw_val;
+
+ if (d->have_new_ctrl) {
+ rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
+ return -EINVAL;
+ }
+
+ if (!bw_validate_amd(data->buf, &bw_val, r))
+ return -EINVAL;
+
+ d->new_ctrl = bw_val;
+ d->have_new_ctrl = true;
+
+ return 0;
+}
/*
* Check whether MBA bandwidth percentage value is correct. The value is
@@ -65,13 +112,13 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return true;
}
-int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
- struct rdt_domain *d)
+int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d)
{
unsigned long bw_val;
if (d->have_new_ctrl) {
- rdt_last_cmd_printf("duplicate domain %d\n", d->id);
+ rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
return -EINVAL;
}
@@ -89,7 +136,7 @@ int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
* are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
* Additionally Haswell requires at least two bits set.
*/
-static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
+bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
{
unsigned long first_bit, zero_bit, val;
unsigned int cbm_len = r->cache.cbm_len;
@@ -97,12 +144,12 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
ret = kstrtoul(buf, 16, &val);
if (ret) {
- rdt_last_cmd_printf("non-hex character in mask %s\n", buf);
+ rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf);
return false;
}
if (val == 0 || val > r->default_ctrl) {
- rdt_last_cmd_puts("mask out of range\n");
+ rdt_last_cmd_puts("Mask out of range\n");
return false;
}
@@ -110,12 +157,12 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) {
- rdt_last_cmd_printf("mask %lx has non-consecutive 1-bits\n", val);
+ rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val);
return false;
}
if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
- rdt_last_cmd_printf("Need at least %d bits in mask\n",
+ rdt_last_cmd_printf("Need at least %d bits in the mask\n",
r->cache.min_cbm_bits);
return false;
}
@@ -125,6 +172,30 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
}
/*
+ * Check whether a cache bit mask is valid. AMD allows non-contiguous
+ * bitmasks
+ */
+bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r)
+{
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul(buf, 16, &val);
+ if (ret) {
+ rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf);
+ return false;
+ }
+
+ if (val > r->default_ctrl) {
+ rdt_last_cmd_puts("Mask out of range\n");
+ return false;
+ }
+
+ *data = val;
+ return true;
+}
+
+/*
* Read one cache bit mask (hex). Check that it is valid for the current
* resource type.
*/
@@ -135,7 +206,7 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
u32 cbm_val;
if (d->have_new_ctrl) {
- rdt_last_cmd_printf("duplicate domain %d\n", d->id);
+ rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
return -EINVAL;
}
@@ -145,17 +216,17 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
*/
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
rdtgroup_pseudo_locked_in_hierarchy(d)) {
- rdt_last_cmd_printf("pseudo-locked region in hierarchy\n");
+ rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n");
return -EINVAL;
}
- if (!cbm_validate(data->buf, &cbm_val, r))
+ if (!r->cbm_validate(data->buf, &cbm_val, r))
return -EINVAL;
if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
rdtgrp->mode == RDT_MODE_SHAREABLE) &&
rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) {
- rdt_last_cmd_printf("CBM overlaps with pseudo-locked region\n");
+ rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n");
return -EINVAL;
}
@@ -164,14 +235,14 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
* either is exclusive.
*/
if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, true)) {
- rdt_last_cmd_printf("overlaps with exclusive group\n");
+ rdt_last_cmd_puts("Overlaps with exclusive group\n");
return -EINVAL;
}
if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, false)) {
if (rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
- rdt_last_cmd_printf("overlaps with other group\n");
+ rdt_last_cmd_puts("Overlaps with other group\n");
return -EINVAL;
}
}
@@ -293,7 +364,7 @@ static int rdtgroup_parse_resource(char *resname, char *tok,
if (!strcmp(resname, r->name) && rdtgrp->closid < r->num_closid)
return parse_line(tok, r, rdtgrp);
}
- rdt_last_cmd_printf("unknown/unsupported resource name '%s'\n", resname);
+ rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname);
return -EINVAL;
}
@@ -326,7 +397,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
*/
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
ret = -EINVAL;
- rdt_last_cmd_puts("resource group is pseudo-locked\n");
+ rdt_last_cmd_puts("Resource group is pseudo-locked\n");
goto out;
}
@@ -467,7 +538,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
r = &rdt_resources_all[resid];
d = rdt_find_domain(r, domid, NULL);
- if (!d) {
+ if (IS_ERR_OR_NULL(d)) {
ret = -ENOENT;
goto out;
}
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 3736f6dc9545..e49b77283924 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -1,20 +1,25 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_INTEL_RDT_H
-#define _ASM_X86_INTEL_RDT_H
+#ifndef _ASM_X86_RESCTRL_INTERNAL_H
+#define _ASM_X86_RESCTRL_INTERNAL_H
#include <linux/sched.h>
#include <linux/kernfs.h>
+#include <linux/fs_context.h>
#include <linux/jump_label.h>
-#define IA32_L3_QOS_CFG 0xc81
-#define IA32_L2_QOS_CFG 0xc82
-#define IA32_L3_CBM_BASE 0xc90
-#define IA32_L2_CBM_BASE 0xd10
-#define IA32_MBA_THRTL_BASE 0xd50
+#define MSR_IA32_L3_QOS_CFG 0xc81
+#define MSR_IA32_L2_QOS_CFG 0xc82
+#define MSR_IA32_L3_CBM_BASE 0xc90
+#define MSR_IA32_L2_CBM_BASE 0xd10
+#define MSR_IA32_MBA_THRTL_BASE 0xd50
+#define MSR_IA32_MBA_BW_BASE 0xc0000200
-#define L3_QOS_CDP_ENABLE 0x01ULL
+#define MSR_IA32_QM_CTR 0x0c8e
+#define MSR_IA32_QM_EVTSEL 0x0c8d
-#define L2_QOS_CDP_ENABLE 0x01ULL
+#define L3_QOS_CDP_ENABLE 0x01ULL
+
+#define L2_QOS_CDP_ENABLE 0x01ULL
/*
* Event IDs are used to program IA32_QM_EVTSEL before reading event
@@ -29,10 +34,28 @@
#define MBM_CNTR_WIDTH 24
#define MBM_OVERFLOW_INTERVAL 1000
#define MAX_MBA_BW 100u
+#define MBA_IS_LINEAR 0x4
+#define MBA_MAX_MBPS U32_MAX
+#define MAX_MBA_BW_AMD 0x800
#define RMID_VAL_ERROR BIT_ULL(63)
#define RMID_VAL_UNAVAIL BIT_ULL(62)
+
+struct rdt_fs_context {
+ struct kernfs_fs_context kfc;
+ bool enable_cdpl2;
+ bool enable_cdpl3;
+ bool enable_mba_mbps;
+};
+
+static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
+{
+ struct kernfs_fs_context *kfc = fc->fs_private;
+
+ return container_of(kfc, struct rdt_fs_context, kfc);
+}
+
DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
/**
@@ -69,7 +92,7 @@ struct rmid_read {
u64 val;
};
-extern unsigned int intel_cqm_threshold;
+extern unsigned int resctrl_cqm_threshold;
extern bool rdt_alloc_capable;
extern bool rdt_mon_capable;
extern unsigned int rdt_mon_features;
@@ -391,9 +414,9 @@ struct rdt_parse_data {
* struct rdt_resource - attributes of an RDT resource
* @rid: The index of the resource
* @alloc_enabled: Is allocation enabled on this machine
- * @mon_enabled: Is monitoring enabled for this feature
+ * @mon_enabled: Is monitoring enabled for this feature
* @alloc_capable: Is allocation available on this machine
- * @mon_capable: Is monitor feature available on this machine
+ * @mon_capable: Is monitor feature available on this machine
* @name: Name to use in "schemata" file
* @num_closid: Number of CLOSIDs available
* @cache_level: Which cache level defines scope of this resource
@@ -405,10 +428,11 @@ struct rdt_parse_data {
* @cache: Cache allocation related data
* @format_str: Per resource format string to show domain value
* @parse_ctrlval: Per resource function pointer to parse control values
- * @evt_list: List of monitoring events
- * @num_rmid: Number of RMIDs available
- * @mon_scale: cqm counter * mon_scale = occupancy in bytes
- * @fflags: flags to choose base and info files
+ * @cbm_validate Cache bitmask validate function
+ * @evt_list: List of monitoring events
+ * @num_rmid: Number of RMIDs available
+ * @mon_scale: cqm counter * mon_scale = occupancy in bytes
+ * @fflags: flags to choose base and info files
*/
struct rdt_resource {
int rid;
@@ -431,6 +455,7 @@ struct rdt_resource {
int (*parse_ctrlval)(struct rdt_parse_data *data,
struct rdt_resource *r,
struct rdt_domain *d);
+ bool (*cbm_validate)(char *buf, u32 *data, struct rdt_resource *r);
struct list_head evt_list;
int num_rmid;
unsigned int mon_scale;
@@ -439,8 +464,10 @@ struct rdt_resource {
int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
struct rdt_domain *d);
-int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
- struct rdt_domain *d);
+int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d);
+int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d);
extern struct mutex rdtgroup_mutex;
@@ -463,6 +490,10 @@ enum {
RDT_NUM_RESOURCES,
};
+#define for_each_rdt_resource(r) \
+ for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+ r++)
+
#define for_each_capable_rdt_resource(r) \
for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
r++) \
@@ -567,5 +598,7 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
void cqm_handle_limbo(struct work_struct *work);
bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
void __check_limbo(struct rdt_domain *d, bool force_free);
+bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r);
+bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r);
-#endif /* _ASM_X86_INTEL_RDT_H */
+#endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index b0f3aed76b75..1573a0a6b525 100644
--- a/arch/x86/kernel/cpu/intel_rdt_monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -26,10 +26,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <asm/cpu_device_id.h>
-#include "intel_rdt.h"
-
-#define MSR_IA32_QM_CTR 0x0c8e
-#define MSR_IA32_QM_EVTSEL 0x0c8d
+#include "internal.h"
struct rmid_entry {
u32 rmid;
@@ -73,7 +70,7 @@ unsigned int rdt_mon_features;
* This is the threshold cache occupancy at which we will consider an
* RMID available for re-allocation.
*/
-unsigned int intel_cqm_threshold;
+unsigned int resctrl_cqm_threshold;
static inline struct rmid_entry *__rmid_entry(u32 rmid)
{
@@ -107,7 +104,7 @@ static bool rmid_dirty(struct rmid_entry *entry)
{
u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
- return val >= intel_cqm_threshold;
+ return val >= resctrl_cqm_threshold;
}
/*
@@ -187,7 +184,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
list_for_each_entry(d, &r->domains, list) {
if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
- if (val <= intel_cqm_threshold)
+ if (val <= resctrl_cqm_threshold)
continue;
}
@@ -504,11 +501,8 @@ out_unlock:
void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
{
unsigned long delay = msecs_to_jiffies(delay_ms);
- struct rdt_resource *r;
int cpu;
- r = &rdt_resources_all[RDT_RESOURCE_L3];
-
cpu = cpumask_any(&dom->cpu_mask);
dom->cqm_work_cpu = cpu;
@@ -625,6 +619,7 @@ static void l3_mon_evt_init(struct rdt_resource *r)
int rdt_get_mon_l3_config(struct rdt_resource *r)
{
+ unsigned int cl_size = boot_cpu_data.x86_cache_size;
int ret;
r->mon_scale = boot_cpu_data.x86_cache_occ_scale;
@@ -637,10 +632,10 @@ int rdt_get_mon_l3_config(struct rdt_resource *r)
*
* For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
*/
- intel_cqm_threshold = boot_cpu_data.x86_cache_size * 1024 / r->num_rmid;
+ resctrl_cqm_threshold = cl_size * 1024 / r->num_rmid;
/* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */
- intel_cqm_threshold /= r->mon_scale;
+ resctrl_cqm_threshold /= r->mon_scale;
ret = dom_data_init(r);
if (ret)
diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 815b4e92522c..604c0e3bcc83 100644
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -24,21 +24,14 @@
#include <asm/cacheflush.h>
#include <asm/intel-family.h>
-#include <asm/intel_rdt_sched.h>
+#include <asm/resctrl_sched.h>
#include <asm/perf_event.h>
#include "../../events/perf_event.h" /* For X86_CONFIG() */
-#include "intel_rdt.h"
+#include "internal.h"
#define CREATE_TRACE_POINTS
-#include "intel_rdt_pseudo_lock_event.h"
-
-/*
- * MSR_MISC_FEATURE_CONTROL register enables the modification of hardware
- * prefetcher state. Details about this register can be found in the MSR
- * tables for specific platforms found in Intel's SDM.
- */
-#define MSR_MISC_FEATURE_CONTROL 0x000001a4
+#include "pseudo_lock_event.h"
/*
* The bits needed to disable hardware prefetching varies based on the
@@ -213,7 +206,7 @@ static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
for_each_cpu(cpu, &plr->d->cpu_mask) {
pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
if (!pm_req) {
- rdt_last_cmd_puts("fail allocating mem for PM QoS\n");
+ rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n");
ret = -ENOMEM;
goto out_err;
}
@@ -222,7 +215,7 @@ static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
DEV_PM_QOS_RESUME_LATENCY,
30);
if (ret < 0) {
- rdt_last_cmd_printf("fail to add latency req cpu%d\n",
+ rdt_last_cmd_printf("Failed to add latency req CPU%d\n",
cpu);
kfree(pm_req);
ret = -1;
@@ -289,7 +282,7 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
plr->cpu = cpumask_first(&plr->d->cpu_mask);
if (!cpu_online(plr->cpu)) {
- rdt_last_cmd_printf("cpu %u associated with cache not online\n",
+ rdt_last_cmd_printf("CPU %u associated with cache not online\n",
plr->cpu);
ret = -ENODEV;
goto out_region;
@@ -307,7 +300,7 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
}
ret = -1;
- rdt_last_cmd_puts("unable to determine cache line size\n");
+ rdt_last_cmd_puts("Unable to determine cache line size\n");
out_region:
pseudo_lock_region_clear(plr);
return ret;
@@ -361,14 +354,14 @@ static int pseudo_lock_region_alloc(struct pseudo_lock_region *plr)
* KMALLOC_MAX_SIZE.
*/
if (plr->size > KMALLOC_MAX_SIZE) {
- rdt_last_cmd_puts("requested region exceeds maximum size\n");
+ rdt_last_cmd_puts("Requested region exceeds maximum size\n");
ret = -E2BIG;
goto out_region;
}
plr->kmem = kzalloc(plr->size, GFP_KERNEL);
if (!plr->kmem) {
- rdt_last_cmd_puts("unable to allocate memory\n");
+ rdt_last_cmd_puts("Unable to allocate memory\n");
ret = -ENOMEM;
goto out_region;
}
@@ -665,7 +658,7 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
* default closid associated with it.
*/
if (rdtgrp == &rdtgroup_default) {
- rdt_last_cmd_puts("cannot pseudo-lock default group\n");
+ rdt_last_cmd_puts("Cannot pseudo-lock default group\n");
return -EINVAL;
}
@@ -707,17 +700,17 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
*/
prefetch_disable_bits = get_prefetch_disable_bits();
if (prefetch_disable_bits == 0) {
- rdt_last_cmd_puts("pseudo-locking not supported\n");
+ rdt_last_cmd_puts("Pseudo-locking not supported\n");
return -EINVAL;
}
if (rdtgroup_monitor_in_progress(rdtgrp)) {
- rdt_last_cmd_puts("monitoring in progress\n");
+ rdt_last_cmd_puts("Monitoring in progress\n");
return -EINVAL;
}
if (rdtgroup_tasks_assigned(rdtgrp)) {
- rdt_last_cmd_puts("tasks assigned to resource group\n");
+ rdt_last_cmd_puts("Tasks assigned to resource group\n");
return -EINVAL;
}
@@ -727,13 +720,13 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
}
if (rdtgroup_locksetup_user_restrict(rdtgrp)) {
- rdt_last_cmd_puts("unable to modify resctrl permissions\n");
+ rdt_last_cmd_puts("Unable to modify resctrl permissions\n");
return -EIO;
}
ret = pseudo_lock_init(rdtgrp);
if (ret) {
- rdt_last_cmd_puts("unable to init pseudo-lock region\n");
+ rdt_last_cmd_puts("Unable to init pseudo-lock region\n");
goto out_release;
}
@@ -770,7 +763,7 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
if (rdt_mon_capable) {
ret = alloc_rmid();
if (ret < 0) {
- rdt_last_cmd_puts("out of RMIDs\n");
+ rdt_last_cmd_puts("Out of RMIDs\n");
return ret;
}
rdtgrp->mon.rmid = ret;
@@ -1304,7 +1297,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
"pseudo_lock/%u", plr->cpu);
if (IS_ERR(thread)) {
ret = PTR_ERR(thread);
- rdt_last_cmd_printf("locking thread returned error %d\n", ret);
+ rdt_last_cmd_printf("Locking thread returned error %d\n", ret);
goto out_cstates;
}
@@ -1322,13 +1315,13 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
* the cleared, but not freed, plr struct resulting in an
* empty pseudo-locking loop.
*/
- rdt_last_cmd_puts("locking thread interrupted\n");
+ rdt_last_cmd_puts("Locking thread interrupted\n");
goto out_cstates;
}
ret = pseudo_lock_minor_get(&new_minor);
if (ret < 0) {
- rdt_last_cmd_puts("unable to obtain a new minor number\n");
+ rdt_last_cmd_puts("Unable to obtain a new minor number\n");
goto out_cstates;
}
@@ -1360,7 +1353,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
if (IS_ERR(dev)) {
ret = PTR_ERR(dev);
- rdt_last_cmd_printf("failed to create character device: %d\n",
+ rdt_last_cmd_printf("Failed to create character device: %d\n",
ret);
goto out_debugfs;
}
diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock_event.h b/arch/x86/kernel/cpu/resctrl/pseudo_lock_event.h
index 2c041e6d9f05..428ebbd4270b 100644
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock_event.h
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock_event.h
@@ -39,5 +39,5 @@ TRACE_EVENT(pseudo_lock_l3,
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE intel_rdt_pseudo_lock_event
+#define TRACE_INCLUDE_FILE pseudo_lock_event
#include <trace/define_trace.h>
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index f27b8115ffa2..85212a32b54d 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -24,6 +24,7 @@
#include <linux/cpu.h>
#include <linux/debugfs.h>
#include <linux/fs.h>
+#include <linux/fs_parser.h>
#include <linux/sysfs.h>
#include <linux/kernfs.h>
#include <linux/seq_buf.h>
@@ -32,11 +33,12 @@
#include <linux/sched/task.h>
#include <linux/slab.h>
#include <linux/task_work.h>
+#include <linux/user_namespace.h>
#include <uapi/linux/magic.h>
-#include <asm/intel_rdt_sched.h>
-#include "intel_rdt.h"
+#include <asm/resctrl_sched.h>
+#include "internal.h"
DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
@@ -298,7 +300,7 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
}
/*
- * This is safe against intel_rdt_sched_in() called from __switch_to()
+ * This is safe against resctrl_sched_in() called from __switch_to()
* because __switch_to() is executed with interrupts disabled. A local call
* from update_closid_rmid() is proteced against __switch_to() because
* preemption is disabled.
@@ -317,7 +319,7 @@ static void update_cpu_closid_rmid(void *info)
* executing task might have its own closid selected. Just reuse
* the context switch code.
*/
- intel_rdt_sched_in();
+ resctrl_sched_in();
}
/*
@@ -345,7 +347,7 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
/* Check whether cpus belong to parent ctrl group */
cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
if (cpumask_weight(tmpmask)) {
- rdt_last_cmd_puts("can only add CPUs to mongroup that belong to parent\n");
+ rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
return -EINVAL;
}
@@ -470,14 +472,14 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
rdt_last_cmd_clear();
if (!rdtgrp) {
ret = -ENOENT;
- rdt_last_cmd_puts("directory was removed\n");
+ rdt_last_cmd_puts("Directory was removed\n");
goto unlock;
}
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
ret = -EINVAL;
- rdt_last_cmd_puts("pseudo-locking in progress\n");
+ rdt_last_cmd_puts("Pseudo-locking in progress\n");
goto unlock;
}
@@ -487,7 +489,7 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
ret = cpumask_parse(buf, newmask);
if (ret) {
- rdt_last_cmd_puts("bad cpu list/mask\n");
+ rdt_last_cmd_puts("Bad CPU list/mask\n");
goto unlock;
}
@@ -495,7 +497,7 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
cpumask_andnot(tmpmask, newmask, cpu_online_mask);
if (cpumask_weight(tmpmask)) {
ret = -EINVAL;
- rdt_last_cmd_puts("can only assign online cpus\n");
+ rdt_last_cmd_puts("Can only assign online CPUs\n");
goto unlock;
}
@@ -542,7 +544,7 @@ static void move_myself(struct callback_head *head)
preempt_disable();
/* update PQR_ASSOC MSR to make resource group go into effect */
- intel_rdt_sched_in();
+ resctrl_sched_in();
preempt_enable();
kfree(callback);
@@ -574,7 +576,7 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
*/
atomic_dec(&rdtgrp->waitcount);
kfree(callback);
- rdt_last_cmd_puts("task exited\n");
+ rdt_last_cmd_puts("Task exited\n");
} else {
/*
* For ctrl_mon groups move both closid and rmid.
@@ -692,7 +694,7 @@ static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
ret = -EINVAL;
- rdt_last_cmd_puts("pseudo-locking in progress\n");
+ rdt_last_cmd_puts("Pseudo-locking in progress\n");
goto unlock;
}
@@ -926,7 +928,7 @@ static int max_threshold_occ_show(struct kernfs_open_file *of,
{
struct rdt_resource *r = of->kn->parent->priv;
- seq_printf(seq, "%u\n", intel_cqm_threshold * r->mon_scale);
+ seq_printf(seq, "%u\n", resctrl_cqm_threshold * r->mon_scale);
return 0;
}
@@ -945,7 +947,7 @@ static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
if (bytes > (boot_cpu_data.x86_cache_size * 1024))
return -EINVAL;
- intel_cqm_threshold = bytes / r->mon_scale;
+ resctrl_cqm_threshold = bytes / r->mon_scale;
return nbytes;
}
@@ -1029,7 +1031,7 @@ static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d,
* peer RDT CDP resource. Hence the WARN.
*/
_d_cdp = rdt_find_domain(_r_cdp, d->id, NULL);
- if (WARN_ON(!_d_cdp)) {
+ if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) {
_r_cdp = NULL;
ret = -EINVAL;
}
@@ -1158,14 +1160,14 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
list_for_each_entry(d, &r->domains, list) {
if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid],
rdtgrp->closid, false)) {
- rdt_last_cmd_puts("schemata overlaps\n");
+ rdt_last_cmd_puts("Schemata overlaps\n");
return false;
}
}
}
if (!has_cache) {
- rdt_last_cmd_puts("cannot be exclusive without CAT/CDP\n");
+ rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
return false;
}
@@ -1206,7 +1208,7 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
goto out;
if (mode == RDT_MODE_PSEUDO_LOCKED) {
- rdt_last_cmd_printf("cannot change pseudo-locked group\n");
+ rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
ret = -EINVAL;
goto out;
}
@@ -1235,7 +1237,7 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
goto out;
rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
} else {
- rdt_last_cmd_printf("unknown/unsupported mode\n");
+ rdt_last_cmd_puts("Unknown or unsupported mode\n");
ret = -EINVAL;
}
@@ -1722,14 +1724,14 @@ static void l3_qos_cfg_update(void *arg)
{
bool *enable = arg;
- wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
+ wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
}
static void l2_qos_cfg_update(void *arg)
{
bool *enable = arg;
- wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
+ wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
}
static inline bool is_mba_linear(void)
@@ -1858,43 +1860,6 @@ static void cdp_disable_all(void)
cdpl2_disable();
}
-static int parse_rdtgroupfs_options(char *data)
-{
- char *token, *o = data;
- int ret = 0;
-
- while ((token = strsep(&o, ",")) != NULL) {
- if (!*token) {
- ret = -EINVAL;
- goto out;
- }
-
- if (!strcmp(token, "cdp")) {
- ret = cdpl3_enable();
- if (ret)
- goto out;
- } else if (!strcmp(token, "cdpl2")) {
- ret = cdpl2_enable();
- if (ret)
- goto out;
- } else if (!strcmp(token, "mba_MBps")) {
- ret = set_mba_sc(true);
- if (ret)
- goto out;
- } else {
- ret = -EINVAL;
- goto out;
- }
- }
-
- return 0;
-
-out:
- pr_err("Invalid mount option \"%s\"\n", token);
-
- return ret;
-}
-
/*
* We don't allow rdtgroup directories to be created anywhere
* except the root directory. Thus when looking for the rdtgroup
@@ -1966,13 +1931,27 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
struct rdtgroup *prgrp,
struct kernfs_node **mon_data_kn);
-static struct dentry *rdt_mount(struct file_system_type *fs_type,
- int flags, const char *unused_dev_name,
- void *data)
+static int rdt_enable_ctx(struct rdt_fs_context *ctx)
{
+ int ret = 0;
+
+ if (ctx->enable_cdpl2)
+ ret = cdpl2_enable();
+
+ if (!ret && ctx->enable_cdpl3)
+ ret = cdpl3_enable();
+
+ if (!ret && ctx->enable_mba_mbps)
+ ret = set_mba_sc(true);
+
+ return ret;
+}
+
+static int rdt_get_tree(struct fs_context *fc)
+{
+ struct rdt_fs_context *ctx = rdt_fc2context(fc);
struct rdt_domain *dom;
struct rdt_resource *r;
- struct dentry *dentry;
int ret;
cpus_read_lock();
@@ -1981,53 +1960,42 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
* resctrl file system can only be mounted once.
*/
if (static_branch_unlikely(&rdt_enable_key)) {
- dentry = ERR_PTR(-EBUSY);
+ ret = -EBUSY;
goto out;
}
- ret = parse_rdtgroupfs_options(data);
- if (ret) {
- dentry = ERR_PTR(ret);
+ ret = rdt_enable_ctx(ctx);
+ if (ret < 0)
goto out_cdp;
- }
closid_init();
ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
- if (ret) {
- dentry = ERR_PTR(ret);
- goto out_cdp;
- }
+ if (ret < 0)
+ goto out_mba;
if (rdt_mon_capable) {
ret = mongroup_create_dir(rdtgroup_default.kn,
NULL, "mon_groups",
&kn_mongrp);
- if (ret) {
- dentry = ERR_PTR(ret);
+ if (ret < 0)
goto out_info;
- }
kernfs_get(kn_mongrp);
ret = mkdir_mondata_all(rdtgroup_default.kn,
&rdtgroup_default, &kn_mondata);
- if (ret) {
- dentry = ERR_PTR(ret);
+ if (ret < 0)
goto out_mongrp;
- }
kernfs_get(kn_mondata);
rdtgroup_default.mon.mon_data_kn = kn_mondata;
}
ret = rdt_pseudo_lock_init();
- if (ret) {
- dentry = ERR_PTR(ret);
+ if (ret)
goto out_mondata;
- }
- dentry = kernfs_mount(fs_type, flags, rdt_root,
- RDTGROUP_SUPER_MAGIC, NULL);
- if (IS_ERR(dentry))
+ ret = kernfs_get_tree(fc);
+ if (ret < 0)
goto out_psl;
if (rdt_alloc_capable)
@@ -2056,14 +2024,95 @@ out_mongrp:
kernfs_remove(kn_mongrp);
out_info:
kernfs_remove(kn_info);
+out_mba:
+ if (ctx->enable_mba_mbps)
+ set_mba_sc(false);
out_cdp:
cdp_disable_all();
out:
rdt_last_cmd_clear();
mutex_unlock(&rdtgroup_mutex);
cpus_read_unlock();
+ return ret;
+}
- return dentry;
+enum rdt_param {
+ Opt_cdp,
+ Opt_cdpl2,
+ Opt_mba_mbps,
+ nr__rdt_params
+};
+
+static const struct fs_parameter_spec rdt_param_specs[] = {
+ fsparam_flag("cdp", Opt_cdp),
+ fsparam_flag("cdpl2", Opt_cdpl2),
+ fsparam_flag("mba_MBps", Opt_mba_mbps),
+ {}
+};
+
+static const struct fs_parameter_description rdt_fs_parameters = {
+ .name = "rdt",
+ .specs = rdt_param_specs,
+};
+
+static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct rdt_fs_context *ctx = rdt_fc2context(fc);
+ struct fs_parse_result result;
+ int opt;
+
+ opt = fs_parse(fc, &rdt_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_cdp:
+ ctx->enable_cdpl3 = true;
+ return 0;
+ case Opt_cdpl2:
+ ctx->enable_cdpl2 = true;
+ return 0;
+ case Opt_mba_mbps:
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return -EINVAL;
+ ctx->enable_mba_mbps = true;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static void rdt_fs_context_free(struct fs_context *fc)
+{
+ struct rdt_fs_context *ctx = rdt_fc2context(fc);
+
+ kernfs_free_fs_context(fc);
+ kfree(ctx);
+}
+
+static const struct fs_context_operations rdt_fs_context_ops = {
+ .free = rdt_fs_context_free,
+ .parse_param = rdt_parse_param,
+ .get_tree = rdt_get_tree,
+};
+
+static int rdt_init_fs_context(struct fs_context *fc)
+{
+ struct rdt_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->kfc.root = rdt_root;
+ ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
+ fc->fs_private = &ctx->kfc;
+ fc->ops = &rdt_fs_context_ops;
+ if (fc->user_ns)
+ put_user_ns(fc->user_ns);
+ fc->user_ns = get_user_ns(&init_user_ns);
+ fc->global = true;
+ return 0;
}
static int reset_all_ctrls(struct rdt_resource *r)
@@ -2236,9 +2285,10 @@ static void rdt_kill_sb(struct super_block *sb)
}
static struct file_system_type rdt_fs_type = {
- .name = "resctrl",
- .mount = rdt_mount,
- .kill_sb = rdt_kill_sb,
+ .name = "resctrl",
+ .init_fs_context = rdt_init_fs_context,
+ .parameters = &rdt_fs_parameters,
+ .kill_sb = rdt_kill_sb,
};
static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
@@ -2540,7 +2590,7 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
tmp_cbm = d->new_ctrl;
if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) <
r->cache.min_cbm_bits) {
- rdt_last_cmd_printf("no space on %s:%d\n",
+ rdt_last_cmd_printf("No space on %s:%d\n",
r->name, d->id);
return -ENOSPC;
}
@@ -2557,12 +2607,13 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
continue;
ret = update_domains(r, rdtgrp->closid);
if (ret < 0) {
- rdt_last_cmd_puts("failed to initialize allocations\n");
+ rdt_last_cmd_puts("Failed to initialize allocations\n");
return ret;
}
- rdtgrp->mode = RDT_MODE_SHAREABLE;
}
+ rdtgrp->mode = RDT_MODE_SHAREABLE;
+
return 0;
}
@@ -2580,7 +2631,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
rdt_last_cmd_clear();
if (!prdtgrp) {
ret = -ENODEV;
- rdt_last_cmd_puts("directory was removed\n");
+ rdt_last_cmd_puts("Directory was removed\n");
goto out_unlock;
}
@@ -2588,7 +2639,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
(prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
ret = -EINVAL;
- rdt_last_cmd_puts("pseudo-locking in progress\n");
+ rdt_last_cmd_puts("Pseudo-locking in progress\n");
goto out_unlock;
}
@@ -2596,7 +2647,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
if (!rdtgrp) {
ret = -ENOSPC;
- rdt_last_cmd_puts("kernel out of memory\n");
+ rdt_last_cmd_puts("Kernel out of memory\n");
goto out_unlock;
}
*r = rdtgrp;
@@ -2637,7 +2688,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
if (rdt_mon_capable) {
ret = alloc_rmid();
if (ret < 0) {
- rdt_last_cmd_puts("out of RMIDs\n");
+ rdt_last_cmd_puts("Out of RMIDs\n");
goto out_destroy;
}
rdtgrp->mon.rmid = ret;
@@ -2725,7 +2776,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
kn = rdtgrp->kn;
ret = closid_alloc();
if (ret < 0) {
- rdt_last_cmd_puts("out of CLOSIDs\n");
+ rdt_last_cmd_puts("Out of CLOSIDs\n");
goto out_common_fail;
}
closid = ret;
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 772c219b6889..94aa1c72ca98 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -5,9 +5,10 @@
#include <linux/cpu.h>
#include <asm/pat.h>
+#include <asm/apic.h>
#include <asm/processor.h>
-#include <asm/apic.h>
+#include "cpu.h"
struct cpuid_bit {
u16 feature;
@@ -17,7 +18,11 @@ struct cpuid_bit {
u32 sub_leaf;
};
-/* Please keep the leaf sorted by cpuid_bit.level for faster search. */
+/*
+ * Please keep the leaf sorted by cpuid_bit.level for faster search.
+ * X86_FEATURE_MBA is supported by both Intel and AMD. But the CPUID
+ * levels are different and there is a separate entry for each.
+ */
static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
@@ -29,6 +34,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
+ { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
{ X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 },
{ X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 },
{ 0, 0, 0, 0, 0 }
@@ -56,27 +62,3 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
set_cpu_cap(c, cb->feature);
}
}
-
-u32 get_scattered_cpuid_leaf(unsigned int level, unsigned int sub_leaf,
- enum cpuid_regs_idx reg)
-{
- const struct cpuid_bit *cb;
- u32 cpuid_val = 0;
-
- for (cb = cpuid_bits; cb->feature; cb++) {
-
- if (level > cb->level)
- continue;
-
- if (level < cb->level)
- break;
-
- if (reg == cb->reg && sub_leaf == cb->sub_leaf) {
- if (cpu_has(&boot_cpu_data, cb->feature))
- cpuid_val |= BIT(cb->bit);
- }
- }
-
- return cpuid_val;
-}
-EXPORT_SYMBOL_GPL(get_scattered_cpuid_leaf);
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 71ca064e3794..8f6c784141d1 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -10,6 +10,8 @@
#include <asm/pat.h>
#include <asm/processor.h>
+#include "cpu.h"
+
/* leaf 0xb SMT level */
#define SMT_LEVEL 0