aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/amd.c28
-rw-r--r--arch/x86/kernel/cpu/bugs.c283
-rw-r--r--arch/x86/kernel/cpu/centaur.c4
-rw-r--r--arch/x86/kernel/cpu/common.c166
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c66
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.c76
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.h5
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c117
-rw-r--r--arch/x86/kernel/cpu/mcheck/dev-mcelog.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c26
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c31
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c29
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c4
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c42
-rw-r--r--arch/x86/kernel/cpu/scattered.c4
16 files changed, 749 insertions, 138 deletions
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bcb75dc97d44..ea831c858195 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -829,8 +829,32 @@ static void init_amd(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_K8);
if (cpu_has(c, X86_FEATURE_XMM2)) {
- /* MFENCE stops RDTSC speculation */
- set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ unsigned long long val;
+ int ret;
+
+ /*
+ * A serializing LFENCE has less overhead than MFENCE, so
+ * use it for execution serialization. On families which
+ * don't have that MSR, LFENCE is already serializing.
+ * msr_set_bit() uses the safe accessors, too, even if the MSR
+ * is not present.
+ */
+ msr_set_bit(MSR_F10H_DECFG,
+ MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+
+ /*
+ * Verify that the MSR write was successful (could be running
+ * under a hypervisor) and only then assume that LFENCE is
+ * serializing.
+ */
+ ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
+ if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
+ /* A serializing LFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+ } else {
+ /* MFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ }
}
/*
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ba0b2424c9b0..71949bf2de5a 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -10,6 +10,11 @@
*/
#include <linux/init.h>
#include <linux/utsname.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+
+#include <asm/nospec-branch.h>
+#include <asm/cmdline.h>
#include <asm/bugs.h>
#include <asm/processor.h>
#include <asm/processor-flags.h>
@@ -19,6 +24,9 @@
#include <asm/alternative.h>
#include <asm/pgtable.h>
#include <asm/set_memory.h>
+#include <asm/intel-family.h>
+
+static void __init spectre_v2_select_mitigation(void);
void __init check_bugs(void)
{
@@ -29,6 +37,9 @@ void __init check_bugs(void)
print_cpu_info(&boot_cpu_data);
}
+ /* Select the proper spectre mitigation before patching alternatives */
+ spectre_v2_select_mitigation();
+
#ifdef CONFIG_X86_32
/*
* Check whether we are able to run this kernel safely on SMP.
@@ -60,3 +71,275 @@ void __init check_bugs(void)
set_memory_4k((unsigned long)__va(0), 1);
#endif
}
+
+/* The kernel command line selection */
+enum spectre_v2_mitigation_cmd {
+ SPECTRE_V2_CMD_NONE,
+ SPECTRE_V2_CMD_AUTO,
+ SPECTRE_V2_CMD_FORCE,
+ SPECTRE_V2_CMD_RETPOLINE,
+ SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+ SPECTRE_V2_CMD_RETPOLINE_AMD,
+};
+
+static const char *spectre_v2_strings[] = {
+ [SPECTRE_V2_NONE] = "Vulnerable",
+ [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
+ [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
+ [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
+ [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
+};
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Spectre V2 : " fmt
+
+static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+
+#ifdef RETPOLINE
+static bool spectre_v2_bad_module;
+
+bool retpoline_module_ok(bool has_retpoline)
+{
+ if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
+ return true;
+
+ pr_err("System may be vulnerable to spectre v2\n");
+ spectre_v2_bad_module = true;
+ return false;
+}
+
+static inline const char *spectre_v2_module_string(void)
+{
+ return spectre_v2_bad_module ? " - vulnerable module loaded" : "";
+}
+#else
+static inline const char *spectre_v2_module_string(void) { return ""; }
+#endif
+
+static void __init spec2_print_if_insecure(const char *reason)
+{
+ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ pr_info("%s selected on command line.\n", reason);
+}
+
+static void __init spec2_print_if_secure(const char *reason)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ pr_info("%s selected on command line.\n", reason);
+}
+
+static inline bool retp_compiler(void)
+{
+ return __is_defined(RETPOLINE);
+}
+
+static inline bool match_option(const char *arg, int arglen, const char *opt)
+{
+ int len = strlen(opt);
+
+ return len == arglen && !strncmp(arg, opt, len);
+}
+
+static const struct {
+ const char *option;
+ enum spectre_v2_mitigation_cmd cmd;
+ bool secure;
+} mitigation_options[] = {
+ { "off", SPECTRE_V2_CMD_NONE, false },
+ { "on", SPECTRE_V2_CMD_FORCE, true },
+ { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
+ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
+ { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
+ { "auto", SPECTRE_V2_CMD_AUTO, false },
+};
+
+static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+{
+ char arg[20];
+ int ret, i;
+ enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
+
+ if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+ return SPECTRE_V2_CMD_NONE;
+ else {
+ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
+ sizeof(arg));
+ if (ret < 0)
+ return SPECTRE_V2_CMD_AUTO;
+
+ for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
+ if (!match_option(arg, ret, mitigation_options[i].option))
+ continue;
+ cmd = mitigation_options[i].cmd;
+ break;
+ }
+
+ if (i >= ARRAY_SIZE(mitigation_options)) {
+ pr_err("unknown option (%s). Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+ }
+
+ if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
+ cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
+ cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
+ !IS_ENABLED(CONFIG_RETPOLINE)) {
+ pr_err("%s selected but not compiled in. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+ pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (mitigation_options[i].secure)
+ spec2_print_if_secure(mitigation_options[i].option);
+ else
+ spec2_print_if_insecure(mitigation_options[i].option);
+
+ return cmd;
+}
+
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ boot_cpu_data.x86 == 6) {
+ switch (boot_cpu_data.x86_model) {
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
+ return true;
+ }
+ }
+ return false;
+}
+
+static void __init spectre_v2_select_mitigation(void)
+{
+ enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+ enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
+
+ /*
+ * If the CPU is not affected and the command line mode is NONE or AUTO
+ * then nothing to do.
+ */
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
+ (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
+ return;
+
+ switch (cmd) {
+ case SPECTRE_V2_CMD_NONE:
+ return;
+
+ case SPECTRE_V2_CMD_FORCE:
+ case SPECTRE_V2_CMD_AUTO:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_auto;
+ break;
+ case SPECTRE_V2_CMD_RETPOLINE_AMD:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_amd;
+ break;
+ case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_generic;
+ break;
+ case SPECTRE_V2_CMD_RETPOLINE:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_auto;
+ break;
+ }
+ pr_err("kernel not compiled with retpoline; no mitigation available!");
+ return;
+
+retpoline_auto:
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ retpoline_amd:
+ if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+ pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+ goto retpoline_generic;
+ }
+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
+ SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ } else {
+ retpoline_generic:
+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
+ SPECTRE_V2_RETPOLINE_MINIMAL;
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ }
+
+ spectre_v2_enabled = mode;
+ pr_info("%s\n", spectre_v2_strings[mode]);
+
+ /*
+ * If neither SMEP or KPTI are available, there is a risk of
+ * hitting userspace addresses in the RSB after a context switch
+ * from a shallow call stack to a deeper one. To prevent this fill
+ * the entire RSB, even when using IBRS.
+ *
+ * Skylake era CPUs have a separate issue with *underflow* of the
+ * RSB, when they will predict 'ret' targets from the generic BTB.
+ * The proper mitigation for this is IBRS. If IBRS is not supported
+ * or deactivated in favour of retpolines the RSB fill on context
+ * switch is required.
+ */
+ if ((!boot_cpu_has(X86_FEATURE_PTI) &&
+ !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+ pr_info("Filling RSB on context switch\n");
+ }
+
+ /* Initialize Indirect Branch Prediction Barrier if supported */
+ if (boot_cpu_has(X86_FEATURE_IBPB)) {
+ setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
+ pr_info("Enabling Indirect Branch Prediction Barrier\n");
+ }
+}
+
+#undef pr_fmt
+
+#ifdef CONFIG_SYSFS
+ssize_t cpu_show_meltdown(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+ return sprintf(buf, "Not affected\n");
+ if (boot_cpu_has(X86_FEATURE_PTI))
+ return sprintf(buf, "Mitigation: PTI\n");
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
+ return sprintf(buf, "Not affected\n");
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ spectre_v2_module_string());
+}
+#endif
+
+void __ibp_barrier(void)
+{
+ __wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
+}
+EXPORT_SYMBOL_GPL(__ibp_barrier);
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 68bc6d9b3132..c578cd29c2d2 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -106,6 +106,10 @@ static void early_init_centaur(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_SYSENTER32);
#endif
+ if (c->x86_power & (1 << 8)) {
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+ set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+ }
}
static void init_centaur(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fa998ca8aa5a..d63f4b5706e4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -47,6 +47,8 @@
#include <asm/pat.h>
#include <asm/microcode.h>
#include <asm/microcode_intel.h>
+#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/uv/uv.h>
@@ -476,8 +478,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
return NULL; /* Not found */
}
-__u32 cpu_caps_cleared[NCAPINTS];
-__u32 cpu_caps_set[NCAPINTS];
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
void load_percpu_segment(int cpu)
{
@@ -490,28 +492,23 @@ void load_percpu_segment(int cpu)
load_stack_canary_segment();
}
-/* Setup the fixmap mapping only once per-processor */
-static inline void setup_fixmap_gdt(int cpu)
-{
-#ifdef CONFIG_X86_64
- /* On 64-bit systems, we use a read-only fixmap GDT. */
- pgprot_t prot = PAGE_KERNEL_RO;
-#else
- /*
- * On native 32-bit systems, the GDT cannot be read-only because
- * our double fault handler uses a task gate, and entering through
- * a task gate needs to change an available TSS to busy. If the GDT
- * is read-only, that will triple fault.
- *
- * On Xen PV, the GDT must be read-only because the hypervisor requires
- * it.
- */
- pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
- PAGE_KERNEL_RO : PAGE_KERNEL;
+#ifdef CONFIG_X86_32
+/* The 32-bit entry code needs to find cpu_entry_area. */
+DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
#endif
- __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
-}
+#ifdef CONFIG_X86_64
+/*
+ * Special IST stacks which the CPU switches to when it calls
+ * an IST-marked descriptor entry. Up to 7 stacks (hardware
+ * limit), all of them are 4K, except the debug stack which
+ * is 8K.
+ */
+static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
+ [DEBUG_STACK - 1] = DEBUG_STKSZ
+};
+#endif
/* Load the original GDT from the per-cpu structure */
void load_direct_gdt(int cpu)
@@ -747,12 +744,32 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
{
int i;
- for (i = 0; i < NCAPINTS; i++) {
+ for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
c->x86_capability[i] &= ~cpu_caps_cleared[i];
c->x86_capability[i] |= cpu_caps_set[i];
}
}
+static void init_speculation_control(struct cpuinfo_x86 *c)
+{
+ /*
+ * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
+ * and they also have a different bit for STIBP support. Also,
+ * a hypervisor might have set the individual AMD bits even on
+ * Intel CPUs, for finer-grained selection of what's available.
+ *
+ * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
+ * features, which are visible in /proc/cpuinfo and used by the
+ * kernel. So set those accordingly from the Intel bits.
+ */
+ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
+ set_cpu_cap(c, X86_FEATURE_IBRS);
+ set_cpu_cap(c, X86_FEATURE_IBPB);
+ }
+ if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
+ set_cpu_cap(c, X86_FEATURE_STIBP);
+}
+
void get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
@@ -774,6 +791,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
c->x86_capability[CPUID_7_0_EBX] = ebx;
c->x86_capability[CPUID_7_ECX] = ecx;
+ c->x86_capability[CPUID_7_EDX] = edx;
}
/* Extended state features: level 0x0000000d */
@@ -846,6 +864,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
init_scattered_cpuid_features(c);
+ init_speculation_control(c);
/*
* Clear/Set all flags overridden by options, after probe.
@@ -881,6 +900,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#endif
}
+static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY },
+ { X86_VENDOR_CENTAUR, 5 },
+ { X86_VENDOR_INTEL, 5 },
+ { X86_VENDOR_NSC, 5 },
+ { X86_VENDOR_ANY, 4 },
+ {}
+};
+
+static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
+ { X86_VENDOR_AMD },
+ {}
+};
+
+static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
+{
+ u64 ia32_cap = 0;
+
+ if (x86_match_cpu(cpu_no_meltdown))
+ return false;
+
+ if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+
+ /* Rogue Data Cache Load? No! */
+ if (ia32_cap & ARCH_CAP_RDCL_NO)
+ return false;
+
+ return true;
+}
+
/*
* Do minimum CPU detection early.
* Fields really needed: vendor, cpuid_level, family, model, mask,
@@ -927,6 +981,14 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
}
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+
+ if (!x86_match_cpu(cpu_no_speculation)) {
+ if (cpu_vulnerable_to_meltdown(c))
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+ }
+
fpu__init_system(c);
#ifdef CONFIG_X86_32
@@ -1250,7 +1312,7 @@ void enable_sep_cpu(void)
return;
cpu = get_cpu();
- tss = &per_cpu(cpu_tss, cpu);
+ tss = &per_cpu(cpu_tss_rw, cpu);
/*
* We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
@@ -1259,11 +1321,7 @@ void enable_sep_cpu(void)
tss->x86_tss.ss1 = __KERNEL_CS;
wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
-
- wrmsr(MSR_IA32_SYSENTER_ESP,
- (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
- 0);
-
+ wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
put_cpu();
@@ -1357,25 +1415,22 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
-/*
- * Special IST stacks which the CPU switches to when it calls
- * an IST-marked descriptor entry. Up to 7 stacks (hardware
- * limit), all of them are 4K, except the debug stack which
- * is 8K.
- */
-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
- [DEBUG_STACK - 1] = DEBUG_STKSZ
-};
-
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
- [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
-
/* May not be marked __init: used by software suspend */
void syscall_init(void)
{
+ extern char _entry_trampoline[];
+ extern char entry_SYSCALL_64_trampoline[];
+
+ int cpu = smp_processor_id();
+ unsigned long SYSCALL64_entry_trampoline =
+ (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
+ (entry_SYSCALL_64_trampoline - _entry_trampoline);
+
wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
- wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
+ if (static_cpu_has(X86_FEATURE_PTI))
+ wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
+ else
+ wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
#ifdef CONFIG_IA32_EMULATION
wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
@@ -1386,7 +1441,7 @@ void syscall_init(void)
* AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
*/
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
- wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+ wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
#else
wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
@@ -1530,7 +1585,7 @@ void cpu_init(void)
if (cpu)
load_ucode_ap();
- t = &per_cpu(cpu_tss, cpu);
+ t = &per_cpu(cpu_tss_rw, cpu);
oist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA
@@ -1569,7 +1624,7 @@ void cpu_init(void)
* set up and load the per-CPU TSS
*/
if (!oist->ist[0]) {
- char *estacks = per_cpu(exception_stacks, cpu);
+ char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
estacks += exception_stack_sizes[v];
@@ -1580,7 +1635,7 @@ void cpu_init(void)
}
}
- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
/*
* <= is required because the CPU will access up to
@@ -1596,11 +1651,12 @@ void cpu_init(void)
enter_lazy_tlb(&init_mm, me);
/*
- * Initialize the TSS. Don't bother initializing sp0, as the initial
- * task never enters user mode.
+ * Initialize the TSS. sp0 points to the entry trampoline stack
+ * regardless of what task is running.
*/
- set_tss_desc(cpu, t);
+ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
load_TR_desc();
+ load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
load_mm_ldt(&init_mm);
@@ -1612,7 +1668,6 @@ void cpu_init(void)
if (is_uv_system())
uv_cpu_init();
- setup_fixmap_gdt(cpu);
load_fixmap_gdt(cpu);
}
@@ -1622,7 +1677,7 @@ void cpu_init(void)
{
int cpu = smp_processor_id();
struct task_struct *curr = current;
- struct tss_struct *t = &per_cpu(cpu_tss, cpu);
+ struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
wait_for_master_cpu(cpu);
@@ -1657,12 +1712,12 @@ void cpu_init(void)
* Initialize the TSS. Don't bother initializing sp0, as the initial
* task never enters user mode.
*/
- set_tss_desc(cpu, t);
+ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
load_TR_desc();
load_mm_ldt(&init_mm);
- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
#ifdef CONFIG_DOUBLEFAULT
/* Set up doublefault TSS pointer in the GDT */
@@ -1674,7 +1729,6 @@ void cpu_init(void)
fpu__init_cpu();
- setup_fixmap_gdt(cpu);
load_fixmap_gdt(cpu);
}
#endif
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index bea8d3e24f50..479ca4728de0 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -31,6 +31,7 @@ extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
extern const struct hypervisor_x86 x86_hyper_xen_pv;
extern const struct hypervisor_x86 x86_hyper_xen_hvm;
extern const struct hypervisor_x86 x86_hyper_kvm;
+extern const struct hypervisor_x86 x86_hyper_jailhouse;
static const __initconst struct hypervisor_x86 * const hypervisors[] =
{
@@ -45,6 +46,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
#ifdef CONFIG_KVM_GUEST
&x86_hyper_kvm,
#endif
+#ifdef CONFIG_JAILHOUSE_GUEST
+ &x86_hyper_jailhouse,
+#endif
};
enum x86_hypervisor_type x86_hyper_type;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b1af22073e28..319bf989fad1 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -102,6 +102,59 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
}
+/*
+ * Early microcode releases for the Spectre v2 mitigation were broken.
+ * Information taken from;
+ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf
+ * - https://kb.vmware.com/s/article/52345
+ * - Microcode revisions observed in the wild
+ * - Release note from 20180108 microcode release
+ */
+struct sku_microcode {
+ u8 model;
+ u8 stepping;
+ u32 microcode;
+};
+static const struct sku_microcode spectre_bad_microcodes[] = {
+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 },
+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 },
+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 },
+ { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 },
+ { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 },
+ { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
+ { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
+ { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 },
+ { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 },
+ { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 },
+ { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b },
+ { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 },
+ { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 },
+ { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 },
+ { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 },
+ { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 },
+ { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 },
+ { INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
+ { INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
+ { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
+ /* Updated in the 20180108 release; blacklist until we know otherwise */
+ { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 },
+ /* Observed in the wild */
+ { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b },
+ { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 },
+};
+
+static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
+ if (c->x86_model == spectre_bad_microcodes[i].model &&
+ c->x86_mask == spectre_bad_microcodes[i].stepping)
+ return (c->microcode <= spectre_bad_microcodes[i].microcode);
+ }
+ return false;
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -122,6 +175,19 @@ static void early_init_intel(struct cpuinfo_x86 *c)
if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
c->microcode = intel_get_microcode_revision();
+ /* Now if any of them are set, check the blacklist and clear the lot */
+ if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
+ cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
+ cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
+ cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
+ pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
+ setup_clear_cpu_cap(X86_FEATURE_IBRS);
+ setup_clear_cpu_cap(X86_FEATURE_IBPB);
+ setup_clear_cpu_cap(X86_FEATURE_STIBP);
+ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
+ setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
+ }
+
/*
* Atom erratum AAE44/AAF40/AAG38/AAH41:
*
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 88dcf8479013..410629f10ad3 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -135,6 +135,40 @@ struct rdt_resource rdt_resources_all[] = {
.format_str = "%d=%0*x",
.fflags = RFTYPE_RES_CACHE,
},
+ [RDT_RESOURCE_L2DATA] =
+ {
+ .rid = RDT_RESOURCE_L2DATA,
+ .name = "L2DATA",
+ .domains = domain_init(RDT_RESOURCE_L2DATA),
+ .msr_base = IA32_L2_CBM_BASE,
+ .msr_update = cat_wrmsr,
+ .cache_level = 2,
+ .cache = {
+ .min_cbm_bits = 1,
+ .cbm_idx_mult = 2,
+ .cbm_idx_offset = 0,
+ },
+ .parse_ctrlval = parse_cbm,
+ .format_str = "%d=%0*x",
+ .fflags = RFTYPE_RES_CACHE,
+ },
+ [RDT_RESOURCE_L2CODE] =
+ {
+ .rid = RDT_RESOURCE_L2CODE,
+ .name = "L2CODE",
+ .domains = domain_init(RDT_RESOURCE_L2CODE),
+ .msr_base = IA32_L2_CBM_BASE,
+ .msr_update = cat_wrmsr,
+ .cache_level = 2,
+ .cache = {
+ .min_cbm_bits = 1,
+ .cbm_idx_mult = 2,
+ .cbm_idx_offset = 1,
+ },
+ .parse_ctrlval = parse_cbm,
+ .format_str = "%d=%0*x",
+ .fflags = RFTYPE_RES_CACHE,
+ },
[RDT_RESOURCE_MBA] =
{
.rid = RDT_RESOURCE_MBA,
@@ -259,15 +293,15 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
r->alloc_enabled = true;
}
-static void rdt_get_cdp_l3_config(int type)
+static void rdt_get_cdp_config(int level, int type)
{
- struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3];
+ struct rdt_resource *r_l = &rdt_resources_all[level];
struct rdt_resource *r = &rdt_resources_all[type];
- r->num_closid = r_l3->num_closid / 2;
- r->cache.cbm_len = r_l3->cache.cbm_len;
- r->default_ctrl = r_l3->default_ctrl;
- r->cache.shareable_bits = r_l3->cache.shareable_bits;
+ r->num_closid = r_l->num_closid / 2;
+ r->cache.cbm_len = r_l->cache.cbm_len;
+ r->default_ctrl = r_l->default_ctrl;
+ r->cache.shareable_bits = r_l->cache.shareable_bits;
r->data_width = (r->cache.cbm_len + 3) / 4;
r->alloc_capable = true;
/*
@@ -277,6 +311,18 @@ static void rdt_get_cdp_l3_config(int type)
r->alloc_enabled = false;
}
+static void rdt_get_cdp_l3_config(void)
+{
+ rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA);
+ rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3CODE);
+}
+
+static void rdt_get_cdp_l2_config(void)
+{
+ rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA);
+ rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE);
+}
+
static int get_cache_id(int cpu, int level)
{
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
@@ -525,10 +571,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
*/
if (static_branch_unlikely(&rdt_mon_enable_key))
rmdir_mondata_subdir_allrdtgrp(r, d->id);
- kfree(d->ctrl_val);
- kfree(d->rmid_busy_llc);
- kfree(d->mbm_total);
- kfree(d->mbm_local);
list_del(&d->list);
if (is_mbm_enabled())
cancel_delayed_work(&d->mbm_over);
@@ -545,6 +587,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cancel_delayed_work(&d->cqm_limbo);
}
+ kfree(d->ctrl_val);
+ kfree(d->rmid_busy_llc);
+ kfree(d->mbm_total);
+ kfree(d->mbm_local);
kfree(d);
return;
}
@@ -645,6 +691,7 @@ enum {
RDT_FLAG_L3_CAT,
RDT_FLAG_L3_CDP,
RDT_FLAG_L2_CAT,
+ RDT_FLAG_L2_CDP,
RDT_FLAG_MBA,
};
@@ -667,6 +714,7 @@ static struct rdt_options rdt_options[] __initdata = {
RDT_OPT(RDT_FLAG_L3_CAT, "l3cat", X86_FEATURE_CAT_L3),
RDT_OPT(RDT_FLAG_L3_CDP, "l3cdp", X86_FEATURE_CDP_L3),
RDT_OPT(RDT_FLAG_L2_CAT, "l2cat", X86_FEATURE_CAT_L2),
+ RDT_OPT(RDT_FLAG_L2_CDP, "l2cdp", X86_FEATURE_CDP_L2),
RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA),
};
#define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options)
@@ -729,15 +777,15 @@ static __init bool get_rdt_alloc_resources(void)
if (rdt_cpu_has(X86_FEATURE_CAT_L3)) {
rdt_get_cache_alloc_cfg(1, &rdt_resources_all[RDT_RESOURCE_L3]);
- if (rdt_cpu_has(X86_FEATURE_CDP_L3)) {
- rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA);
- rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE);
- }
+ if (rdt_cpu_has(X86_FEATURE_CDP_L3))
+ rdt_get_cdp_l3_config();
ret = true;
}
if (rdt_cpu_has(X86_FEATURE_CAT_L2)) {
/* CPUID 0x10.2 fields are same format at 0x10.1 */
rdt_get_cache_alloc_cfg(2, &rdt_resources_all[RDT_RESOURCE_L2]);
+ if (rdt_cpu_has(X86_FEATURE_CDP_L2))
+ rdt_get_cdp_l2_config();
ret = true;
}
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 3397244984f5..3fd7a70ee04a 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -7,12 +7,15 @@
#include <linux/jump_label.h>
#define IA32_L3_QOS_CFG 0xc81
+#define IA32_L2_QOS_CFG 0xc82
#define IA32_L3_CBM_BASE 0xc90
#define IA32_L2_CBM_BASE 0xd10
#define IA32_MBA_THRTL_BASE 0xd50
#define L3_QOS_CDP_ENABLE 0x01ULL
+#define L2_QOS_CDP_ENABLE 0x01ULL
+
/*
* Event IDs are used to program IA32_QM_EVTSEL before reading event
* counter from IA32_QM_CTR
@@ -357,6 +360,8 @@ enum {
RDT_RESOURCE_L3DATA,
RDT_RESOURCE_L3CODE,
RDT_RESOURCE_L2,
+ RDT_RESOURCE_L2DATA,
+ RDT_RESOURCE_L2CODE,
RDT_RESOURCE_MBA,
/* Must be the last */
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 64c5ff97ee0d..bdab7d2f51af 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -990,6 +990,7 @@ out_destroy:
kernfs_remove(kn);
return ret;
}
+
static void l3_qos_cfg_update(void *arg)
{
bool *enable = arg;
@@ -997,8 +998,17 @@ static void l3_qos_cfg_update(void *arg)
wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
}
-static int set_l3_qos_cfg(struct rdt_resource *r, bool enable)
+static void l2_qos_cfg_update(void *arg)
{
+ bool *enable = arg;
+
+ wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
+}
+
+static int set_cache_qos_cfg(int level, bool enable)
+{
+ void (*update)(void *arg);
+ struct rdt_resource *r_l;
cpumask_var_t cpu_mask;
struct rdt_domain *d;
int cpu;
@@ -1006,16 +1016,24 @@ static int set_l3_qos_cfg(struct rdt_resource *r, bool enable)
if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
return -ENOMEM;
- list_for_each_entry(d, &r->domains, list) {
+ if (level == RDT_RESOURCE_L3)
+ update = l3_qos_cfg_update;
+ else if (level == RDT_RESOURCE_L2)
+ update = l2_qos_cfg_update;
+ else
+ return -EINVAL;
+
+ r_l = &rdt_resources_all[level];
+ list_for_each_entry(d, &r_l->domains, list) {
/* Pick one CPU from each domain instance to update MSR */
cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
}
cpu = get_cpu();
/* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
if (cpumask_test_cpu(cpu, cpu_mask))
- l3_qos_cfg_update(&enable);
+ update(&enable);
/* Update QOS_CFG MSR on all other cpus in cpu_mask. */
- smp_call_function_many(cpu_mask, l3_qos_cfg_update, &enable, 1);
+ smp_call_function_many(cpu_mask, update, &enable, 1);
put_cpu();
free_cpumask_var(cpu_mask);
@@ -1023,52 +1041,99 @@ static int set_l3_qos_cfg(struct rdt_resource *r, bool enable)
return 0;
}
-static int cdp_enable(void)
+static int cdp_enable(int level, int data_type, int code_type)
{
- struct rdt_resource *r_l3data = &rdt_resources_all[RDT_RESOURCE_L3DATA];
- struct rdt_resource *r_l3code = &rdt_resources_all[RDT_RESOURCE_L3CODE];
- struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3];
+ struct rdt_resource *r_ldata = &rdt_resources_all[data_type];
+ struct rdt_resource *r_lcode = &rdt_resources_all[code_type];
+ struct rdt_resource *r_l = &rdt_resources_all[level];
int ret;
- if (!r_l3->alloc_capable || !r_l3data->alloc_capable ||
- !r_l3code->alloc_capable)
+ if (!r_l->alloc_capable || !r_ldata->alloc_capable ||
+ !r_lcode->alloc_capable)
return -EINVAL;
- ret = set_l3_qos_cfg(r_l3, true);
+ ret = set_cache_qos_cfg(level, true);
if (!ret) {
- r_l3->alloc_enabled = false;
- r_l3data->alloc_enabled = true;
- r_l3code->alloc_enabled = true;
+ r_l->alloc_enabled = false;
+ r_ldata->alloc_enabled = true;
+ r_lcode->alloc_enabled = true;
}
return ret;
}
-static void cdp_disable(void)
+static int cdpl3_enable(void)
{
- struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
+ return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA,
+ RDT_RESOURCE_L3CODE);
+}
+
+static int cdpl2_enable(void)
+{
+ return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA,
+ RDT_RESOURCE_L2CODE);
+}
+
+static void cdp_disable(int level, int data_type, int code_type)
+{
+ struct rdt_resource *r = &rdt_resources_all[level];
r->alloc_enabled = r->alloc_capable;
- if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) {
- rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled = false;
- rdt_resources_all[RDT_RESOURCE_L3CODE].alloc_enabled = false;
- set_l3_qos_cfg(r, false);
+ if (rdt_resources_all[data_type].alloc_enabled) {
+ rdt_resources_all[data_type].alloc_enabled = false;
+ rdt_resources_all[code_type].alloc_enabled = false;
+ set_cache_qos_cfg(level, false);
}
}
+static void cdpl3_disable(void)
+{
+ cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE);
+}
+
+static void cdpl2_disable(void)
+{
+ cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE);
+}
+
+static void cdp_disable_all(void)
+{
+ if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
+ cdpl3_disable();
+ if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
+ cdpl2_disable();
+}
+
static int parse_rdtgroupfs_options(char *data)
{
char *token, *o = data;
int ret = 0;
while ((token = strsep(&o, ",")) != NULL) {
- if (!*token)
- return -EINVAL;
+ if (!*token) {
+ ret = -EINVAL;
+ goto out;
+ }
- if (!strcmp(token, "cdp"))
- ret = cdp_enable();
+ if (!strcmp(token, "cdp")) {
+ ret = cdpl3_enable();
+ if (ret)
+ goto out;
+ } else if (!strcmp(token, "cdpl2")) {
+ ret = cdpl2_enable();
+ if (ret)
+ goto out;
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
}
+ return 0;
+
+out:
+ pr_err("Invalid mount option \"%s\"\n", token);
+
return ret;
}
@@ -1223,7 +1288,7 @@ out_mongrp:
out_info:
kernfs_remove(kn_info);
out_cdp:
- cdp_disable();
+ cdp_disable_all();
out:
rdt_last_cmd_clear();
mutex_unlock(&rdtgroup_mutex);
@@ -1383,7 +1448,7 @@ static void rdt_kill_sb(struct super_block *sb)
/*Put everything back to default values. */
for_each_alloc_enabled_rdt_resource(r)
reset_all_ctrls(r);
- cdp_disable();
+ cdp_disable_all();
rmdir_all_sub();
static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
static_branch_disable_cpuslocked(&rdt_mon_enable_key);
diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 7f85b76f43bc..213e8c2ca702 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -243,7 +243,7 @@ out:
return err ? err : buf - ubuf;
}
-static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait)
+static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &mce_chrdev_wait, wait);
if (READ_ONCE(mcelog.next))
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 4ca632a06e0b..5bbd06f38ff6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -59,6 +59,7 @@ static struct severity {
#define MCGMASK(x, y) .mcgmask = x, .mcgres = y
#define MASK(x, y) .mask = x, .result = y
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
+#define MCI_UC_AR (MCI_STATUS_UC|MCI_STATUS_AR)
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
@@ -101,6 +102,22 @@ static struct severity {
NOSER, BITCLR(MCI_STATUS_UC)
),
+ /*
+ * known AO MCACODs reported via MCE or CMC:
+ *
+ * SRAO could be signaled either via a machine check exception or
+ * CMCI with the corresponding bit S 1 or 0. So we don't need to
+ * check bit S for SRAO.
+ */
+ MCESEV(
+ AO, "Action optional: memory scrubbing error",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
+ ),
+ MCESEV(
+ AO, "Action optional: last level cache writeback error",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
+ ),
+
/* ignore OVER for UCNA */
MCESEV(
UCNA, "Uncorrected no action required",
@@ -149,15 +166,6 @@ static struct severity {
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
),
- /* known AO MCACODs: */
- MCESEV(
- AO, "Action optional: memory scrubbing error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB)
- ),
- MCESEV(
- AO, "Action optional: last level cache writeback error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB)
- ),
MCESEV(
SOME, "Action optional: unknown MCACOD",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b1d616d08eee..3a8e88a611eb 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -14,7 +14,6 @@
#include <linux/capability.h>
#include <linux/miscdevice.h>
#include <linux/ratelimit.h>
-#include <linux/kallsyms.h>
#include <linux/rcupdate.h>
#include <linux/kobject.h>
#include <linux/uaccess.h>
@@ -235,7 +234,7 @@ static void __print_mce(struct mce *m)
m->cs, m->ip);
if (m->cs == __KERNEL_CS)
- print_symbol("{%s}", m->ip);
+ pr_cont("{%pS}", (void *)m->ip);
pr_cont("\n");
}
@@ -503,10 +502,8 @@ static int mce_usable_address(struct mce *m)
bool mce_is_memory_error(struct mce *m)
{
if (m->cpuvendor == X86_VENDOR_AMD) {
- /* ErrCodeExt[20:16] */
- u8 xec = (m->status >> 16) & 0x1f;
+ return amd_mce_is_memory_error(m);
- return (xec == 0x0 || xec == 0x8);
} else if (m->cpuvendor == X86_VENDOR_INTEL) {
/*
* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
@@ -530,6 +527,17 @@ bool mce_is_memory_error(struct mce *m)
}
EXPORT_SYMBOL_GPL(mce_is_memory_error);
+static bool mce_is_correctable(struct mce *m)
+{
+ if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED)
+ return false;
+
+ if (m->status & MCI_STATUS_UC)
+ return false;
+
+ return true;
+}
+
static bool cec_add_mce(struct mce *m)
{
if (!m)
@@ -537,7 +545,7 @@ static bool cec_add_mce(struct mce *m)
/* We eat only correctable DRAM errors with usable addresses. */
if (mce_is_memory_error(m) &&
- !(m->status & MCI_STATUS_UC) &&
+ mce_is_correctable(m) &&
mce_usable_address(m))
if (!cec_add_elem(m->addr >> PAGE_SHIFT))
return true;
@@ -582,7 +590,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
pfn = mce->addr >> PAGE_SHIFT;
- memory_failure(pfn, MCE_VECTOR, 0);
+ memory_failure(pfn, 0);
}
return NOTIFY_OK;
@@ -1046,7 +1054,7 @@ static int do_memory_failure(struct mce *m)
pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
if (!(m->mcgstatus & MCG_STATUS_RIPV))
flags |= MF_MUST_KILL;
- ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags);
+ ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
if (ret)
pr_err("Memory error not recovered");
return ret;
@@ -1325,7 +1333,7 @@ out_ist:
EXPORT_SYMBOL_GPL(do_machine_check);
#ifndef CONFIG_MEMORY_FAILURE
-int memory_failure(unsigned long pfn, int vector, int flags)
+int memory_failure(unsigned long pfn, int flags)
{
/* mce_severity() should not hand us an ACTION_REQUIRED error */
BUG_ON(flags & MF_ACTION_REQUIRED);
@@ -1785,6 +1793,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
void (*machine_check_vector)(struct pt_regs *, long error_code) =
unexpected_machine_check;
+dotraplinkage void do_mce(struct pt_regs *regs, long error_code)
+{
+ machine_check_vector(regs, error_code);
+}
+
/*
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 486f640b02ef..0f32ad242324 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -110,6 +110,20 @@ const char *smca_get_long_name(enum smca_bank_types t)
}
EXPORT_SYMBOL_GPL(smca_get_long_name);
+static enum smca_bank_types smca_get_bank_type(struct mce *m)
+{
+ struct smca_bank *b;
+
+ if (m->bank >= N_SMCA_BANK_TYPES)
+ return N_SMCA_BANK_TYPES;
+
+ b = &smca_banks[m->bank];
+ if (!b->hwid)
+ return N_SMCA_BANK_TYPES;
+
+ return b->hwid->bank_type;
+}
+
static struct smca_hwid smca_hwid_mcatypes[] = {
/* { bank_type, hwid_mcatype, xec_bitmap } */
@@ -407,7 +421,9 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
(deferred_error_int_vector != amd_deferred_error_interrupt))
deferred_error_int_vector = amd_deferred_error_interrupt;
- low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
+ if (!mce_flags.smca)
+ low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
+
wrmsr(MSR_CU_DEF_ERR, low, high);
}
@@ -738,6 +754,17 @@ out_err:
}
EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);
+bool amd_mce_is_memory_error(struct mce *m)
+{
+ /* ErrCodeExt[20:16] */
+ u8 xec = (m->status >> 16) & 0x1f;
+
+ if (mce_flags.smca)
+ return smca_get_bank_type(m) == SMCA_UMC && xec == 0x0;
+
+ return m->bank == 4 && xec == 0x8;
+}
+
static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
{
struct mce m;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index c4fa4a85d4cb..319dd65f98a2 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -239,7 +239,7 @@ static int __init save_microcode_in_initrd(void)
break;
case X86_VENDOR_AMD:
if (c->x86 >= 0x10)
- return save_microcode_in_initrd_amd(cpuid_eax(1));
+ ret = save_microcode_in_initrd_amd(cpuid_eax(1));
break;
default:
break;
@@ -560,7 +560,7 @@ static ssize_t pf_show(struct device *dev,
return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
}
-static DEVICE_ATTR(reload, 0200, NULL, reload_store);
+static DEVICE_ATTR_WO(reload);
static DEVICE_ATTR(version, 0400, version_show, NULL);
static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL);
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 7dbcb7adf797..f7c55b0e753a 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -45,6 +45,9 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
/* Current microcode patch used in early patching on the APs. */
static struct microcode_intel *intel_ucode_patch;
+/* last level cache size per core */
+static int llc_size_per_core;
+
static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1,
unsigned int s2, unsigned int p2)
{
@@ -565,15 +568,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
}
#else
-/*
- * Flush global tlb. We only do this in x86_64 where paging has been enabled
- * already and PGE should be enabled as well.
- */
-static inline void flush_tlb_early(void)
-{
- __native_flush_tlb_global_irq_disabled();
-}
-
static inline void print_ucode(struct ucode_cpu_info *uci)
{
struct microcode_intel *mc;
@@ -602,10 +596,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
if (rev != mc->hdr.rev)
return -1;
-#ifdef CONFIG_X86_64
- /* Flush global tlb. This is precaution. */
- flush_tlb_early();
-#endif
uci->cpu_sig.rev = rev;
if (early)
@@ -923,8 +913,19 @@ static bool is_blacklisted(unsigned int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
- if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
- pr_err_once("late loading on model 79 is disabled.\n");
+ /*
+ * Late loading on model 79 with microcode revision less than 0x0b000021
+ * and LLC size per core bigger than 2.5MB may result in a system hang.
+ * This behavior is documented in item BDF90, #334165 (Intel Xeon
+ * Processor E7-8800/4800 v4 Product Family).
+ */
+ if (c->x86 == 6 &&
+ c->x86_model == INTEL_FAM6_BROADWELL_X &&
+ c->x86_mask == 0x01 &&
+ llc_size_per_core > 2621440 &&
+ c->microcode < 0x0b000021) {
+ pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+ pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
return true;
}
@@ -979,6 +980,15 @@ static struct microcode_ops microcode_intel_ops = {
.apply_microcode = apply_microcode_intel,
};
+static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)
+{
+ u64 llc_size = c->x86_cache_size * 1024;
+
+ do_div(llc_size, c->x86_max_cores);
+
+ return (int)llc_size;
+}
+
struct microcode_ops * __init init_intel_microcode(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -989,5 +999,7 @@ struct microcode_ops * __init init_intel_microcode(void)
return NULL;
}
+ llc_size_per_core = calc_llc_size_per_core(c);
+
return &microcode_intel_ops;
}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 05459ad3db46..4075d2be5357 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -21,12 +21,10 @@ struct cpuid_bit {
static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
- { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 },
- { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 },
- { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 },
{ X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
{ X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
{ X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
+ { X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 },
{ X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 },
{ X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },