aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/amd.c35
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c74
-rw-r--r--arch/x86/kernel/cpu/bugs.c221
-rw-r--r--arch/x86/kernel/cpu/common.c162
-rw-r--r--arch/x86/kernel/cpu/cpu.h3
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c121
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c64
-rw-r--r--arch/x86/kernel/cpu/intel.c15
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.c9
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.h7
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c50
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_monitor.c2
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c131
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c9
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c18
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c4
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c2
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c42
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c6
-rw-r--r--arch/x86/kernel/cpu/proc.c4
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/cpu/vmware.c8
23 files changed, 775 insertions, 214 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 236999c54edc..570e8bb1f386 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -23,6 +23,7 @@ obj-y += rdrand.o
obj-y += match.o
obj-y += bugs.o
obj-y += aperfmperf.o
+obj-y += cpuid-deps.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index d58184b7cd44..ea831c858195 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c)
case 0x17: init_amd_zn(c); break;
}
- /* Enable workaround for FXSAVE leak */
- if (c->x86 >= 6)
+ /*
+ * Enable workaround for FXSAVE leak on CPUs
+ * without a XSaveErPtr feature
+ */
+ if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))
set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
cpu_detect_cache_sizes(c);
@@ -826,8 +829,32 @@ static void init_amd(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_K8);
if (cpu_has(c, X86_FEATURE_XMM2)) {
- /* MFENCE stops RDTSC speculation */
- set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ unsigned long long val;
+ int ret;
+
+ /*
+ * A serializing LFENCE has less overhead than MFENCE, so
+ * use it for execution serialization. On families which
+ * don't have that MSR, LFENCE is already serializing.
+ * msr_set_bit() uses the safe accessors, too, even if the MSR
+ * is not present.
+ */
+ msr_set_bit(MSR_F10H_DECFG,
+ MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+
+ /*
+ * Verify that the MSR write was successful (could be running
+ * under a hypervisor) and only then assume that LFENCE is
+ * serializing.
+ */
+ ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
+ if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
+ /* A serializing LFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+ } else {
+ /* MFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ }
}
/*
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 957813e0180d..7eba34df54c3 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -14,6 +14,8 @@
#include <linux/percpu.h>
#include <linux/smp.h>
+#include "cpu.h"
+
struct aperfmperf_sample {
unsigned int khz;
ktime_t time;
@@ -24,7 +26,7 @@ struct aperfmperf_sample {
static DEFINE_PER_CPU(struct aperfmperf_sample, samples);
#define APERFMPERF_CACHE_THRESHOLD_MS 10
-#define APERFMPERF_REFRESH_DELAY_MS 20
+#define APERFMPERF_REFRESH_DELAY_MS 10
#define APERFMPERF_STALE_THRESHOLD_MS 1000
/*
@@ -38,8 +40,6 @@ static void aperfmperf_snapshot_khz(void *dummy)
u64 aperf, aperf_delta;
u64 mperf, mperf_delta;
struct aperfmperf_sample *s = this_cpu_ptr(&samples);
- ktime_t now = ktime_get();
- s64 time_delta = ktime_ms_delta(now, s->time);
unsigned long flags;
local_irq_save(flags);
@@ -57,38 +57,68 @@ static void aperfmperf_snapshot_khz(void *dummy)
if (mperf_delta == 0)
return;
- s->time = now;
+ s->time = ktime_get();
s->aperf = aperf;
s->mperf = mperf;
-
- /* If the previous iteration was too long ago, discard it. */
- if (time_delta > APERFMPERF_STALE_THRESHOLD_MS)
- s->khz = 0;
- else
- s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
+ s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
}
-unsigned int arch_freq_get_on_cpu(int cpu)
+static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait)
{
- s64 time_delta;
- unsigned int khz;
+ s64 time_delta = ktime_ms_delta(now, per_cpu(samples.time, cpu));
+
+ /* Don't bother re-computing within the cache threshold time. */
+ if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
+ return true;
+
+ smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, wait);
+
+ /* Return false if the previous iteration was too long ago. */
+ return time_delta <= APERFMPERF_STALE_THRESHOLD_MS;
+}
+unsigned int aperfmperf_get_khz(int cpu)
+{
if (!cpu_khz)
return 0;
if (!static_cpu_has(X86_FEATURE_APERFMPERF))
return 0;
- /* Don't bother re-computing within the cache threshold time. */
- time_delta = ktime_ms_delta(ktime_get(), per_cpu(samples.time, cpu));
- khz = per_cpu(samples.khz, cpu);
- if (khz && time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
- return khz;
+ aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
+ return per_cpu(samples.khz, cpu);
+}
- smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
- khz = per_cpu(samples.khz, cpu);
- if (khz)
- return khz;
+void arch_freq_prepare_all(void)
+{
+ ktime_t now = ktime_get();
+ bool wait = false;
+ int cpu;
+
+ if (!cpu_khz)
+ return;
+
+ if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+ return;
+
+ for_each_online_cpu(cpu)
+ if (!aperfmperf_snapshot_cpu(cpu, now, false))
+ wait = true;
+
+ if (wait)
+ msleep(APERFMPERF_REFRESH_DELAY_MS);
+}
+
+unsigned int arch_freq_get_on_cpu(int cpu)
+{
+ if (!cpu_khz)
+ return 0;
+
+ if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+ return 0;
+
+ if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
+ return per_cpu(samples.khz, cpu);
msleep(APERFMPERF_REFRESH_DELAY_MS);
smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ba0b2424c9b0..390b3dc3d438 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -10,6 +10,10 @@
*/
#include <linux/init.h>
#include <linux/utsname.h>
+#include <linux/cpu.h>
+
+#include <asm/nospec-branch.h>
+#include <asm/cmdline.h>
#include <asm/bugs.h>
#include <asm/processor.h>
#include <asm/processor-flags.h>
@@ -19,6 +23,9 @@
#include <asm/alternative.h>
#include <asm/pgtable.h>
#include <asm/set_memory.h>
+#include <asm/intel-family.h>
+
+static void __init spectre_v2_select_mitigation(void);
void __init check_bugs(void)
{
@@ -29,6 +36,9 @@ void __init check_bugs(void)
print_cpu_info(&boot_cpu_data);
}
+ /* Select the proper spectre mitigation before patching alternatives */
+ spectre_v2_select_mitigation();
+
#ifdef CONFIG_X86_32
/*
* Check whether we are able to run this kernel safely on SMP.
@@ -60,3 +70,214 @@ void __init check_bugs(void)
set_memory_4k((unsigned long)__va(0), 1);
#endif
}
+
+/* The kernel command line selection */
+enum spectre_v2_mitigation_cmd {
+ SPECTRE_V2_CMD_NONE,
+ SPECTRE_V2_CMD_AUTO,
+ SPECTRE_V2_CMD_FORCE,
+ SPECTRE_V2_CMD_RETPOLINE,
+ SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+ SPECTRE_V2_CMD_RETPOLINE_AMD,
+};
+
+static const char *spectre_v2_strings[] = {
+ [SPECTRE_V2_NONE] = "Vulnerable",
+ [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
+ [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
+ [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
+ [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
+};
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
+
+static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+
+static void __init spec2_print_if_insecure(const char *reason)
+{
+ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ pr_info("%s\n", reason);
+}
+
+static void __init spec2_print_if_secure(const char *reason)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ pr_info("%s\n", reason);
+}
+
+static inline bool retp_compiler(void)
+{
+ return __is_defined(RETPOLINE);
+}
+
+static inline bool match_option(const char *arg, int arglen, const char *opt)
+{
+ int len = strlen(opt);
+
+ return len == arglen && !strncmp(arg, opt, len);
+}
+
+static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+{
+ char arg[20];
+ int ret;
+
+ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
+ sizeof(arg));
+ if (ret > 0) {
+ if (match_option(arg, ret, "off")) {
+ goto disable;
+ } else if (match_option(arg, ret, "on")) {
+ spec2_print_if_secure("force enabled on command line.");
+ return SPECTRE_V2_CMD_FORCE;
+ } else if (match_option(arg, ret, "retpoline")) {
+ spec2_print_if_insecure("retpoline selected on command line.");
+ return SPECTRE_V2_CMD_RETPOLINE;
+ } else if (match_option(arg, ret, "retpoline,amd")) {
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+ pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
+ return SPECTRE_V2_CMD_AUTO;
+ }
+ spec2_print_if_insecure("AMD retpoline selected on command line.");
+ return SPECTRE_V2_CMD_RETPOLINE_AMD;
+ } else if (match_option(arg, ret, "retpoline,generic")) {
+ spec2_print_if_insecure("generic retpoline selected on command line.");
+ return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
+ } else if (match_option(arg, ret, "auto")) {
+ return SPECTRE_V2_CMD_AUTO;
+ }
+ }
+
+ if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+ return SPECTRE_V2_CMD_AUTO;
+disable:
+ spec2_print_if_insecure("disabled on command line.");
+ return SPECTRE_V2_CMD_NONE;
+}
+
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ boot_cpu_data.x86 == 6) {
+ switch (boot_cpu_data.x86_model) {
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
+ return true;
+ }
+ }
+ return false;
+}
+
+static void __init spectre_v2_select_mitigation(void)
+{
+ enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+ enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
+
+ /*
+ * If the CPU is not affected and the command line mode is NONE or AUTO
+ * then nothing to do.
+ */
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
+ (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
+ return;
+
+ switch (cmd) {
+ case SPECTRE_V2_CMD_NONE:
+ return;
+
+ case SPECTRE_V2_CMD_FORCE:
+ /* FALLTRHU */
+ case SPECTRE_V2_CMD_AUTO:
+ goto retpoline_auto;
+
+ case SPECTRE_V2_CMD_RETPOLINE_AMD:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_amd;
+ break;
+ case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_generic;
+ break;
+ case SPECTRE_V2_CMD_RETPOLINE:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_auto;
+ break;
+ }
+ pr_err("kernel not compiled with retpoline; no mitigation available!");
+ return;
+
+retpoline_auto:
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ retpoline_amd:
+ if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+ pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+ goto retpoline_generic;
+ }
+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
+ SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ } else {
+ retpoline_generic:
+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
+ SPECTRE_V2_RETPOLINE_MINIMAL;
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ }
+
+ spectre_v2_enabled = mode;
+ pr_info("%s\n", spectre_v2_strings[mode]);
+
+ /*
+ * If neither SMEP or KPTI are available, there is a risk of
+ * hitting userspace addresses in the RSB after a context switch
+ * from a shallow call stack to a deeper one. To prevent this fill
+ * the entire RSB, even when using IBRS.
+ *
+ * Skylake era CPUs have a separate issue with *underflow* of the
+ * RSB, when they will predict 'ret' targets from the generic BTB.
+ * The proper mitigation for this is IBRS. If IBRS is not supported
+ * or deactivated in favour of retpolines the RSB fill on context
+ * switch is required.
+ */
+ if ((!boot_cpu_has(X86_FEATURE_PTI) &&
+ !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+ pr_info("Filling RSB on context switch\n");
+ }
+}
+
+#undef pr_fmt
+
+#ifdef CONFIG_SYSFS
+ssize_t cpu_show_meltdown(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+ return sprintf(buf, "Not affected\n");
+ if (boot_cpu_has(X86_FEATURE_PTI))
+ return sprintf(buf, "Mitigation: PTI\n");
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
+ return sprintf(buf, "Not affected\n");
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
+}
+#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c9176bae7fd8..ef29ad001991 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -329,6 +329,30 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
}
}
+static __always_inline void setup_umip(struct cpuinfo_x86 *c)
+{
+ /* Check the boot processor, plus build option for UMIP. */
+ if (!cpu_feature_enabled(X86_FEATURE_UMIP))
+ goto out;
+
+ /* Check the current processor's cpuid bits. */
+ if (!cpu_has(c, X86_FEATURE_UMIP))
+ goto out;
+
+ cr4_set_bits(X86_CR4_UMIP);
+
+ pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n");
+
+ return;
+
+out:
+ /*
+ * Make sure UMIP is disabled in case it was enabled in a
+ * previous boot (e.g., via kexec).
+ */
+ cr4_clear_bits(X86_CR4_UMIP);
+}
+
/*
* Protection Keys are not available in 32-bit mode.
*/
@@ -452,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
return NULL; /* Not found */
}
-__u32 cpu_caps_cleared[NCAPINTS];
-__u32 cpu_caps_set[NCAPINTS];
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
void load_percpu_segment(int cpu)
{
@@ -466,28 +490,23 @@ void load_percpu_segment(int cpu)
load_stack_canary_segment();
}
-/* Setup the fixmap mapping only once per-processor */
-static inline void setup_fixmap_gdt(int cpu)
-{
-#ifdef CONFIG_X86_64
- /* On 64-bit systems, we use a read-only fixmap GDT. */
- pgprot_t prot = PAGE_KERNEL_RO;
-#else
- /*
- * On native 32-bit systems, the GDT cannot be read-only because
- * our double fault handler uses a task gate, and entering through
- * a task gate needs to change an available TSS to busy. If the GDT
- * is read-only, that will triple fault.
- *
- * On Xen PV, the GDT must be read-only because the hypervisor requires
- * it.
- */
- pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
- PAGE_KERNEL_RO : PAGE_KERNEL;
+#ifdef CONFIG_X86_32
+/* The 32-bit entry code needs to find cpu_entry_area. */
+DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
#endif
- __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
-}
+#ifdef CONFIG_X86_64
+/*
+ * Special IST stacks which the CPU switches to when it calls
+ * an IST-marked descriptor entry. Up to 7 stacks (hardware
+ * limit), all of them are 4K, except the debug stack which
+ * is 8K.
+ */
+static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
+ [DEBUG_STACK - 1] = DEBUG_STKSZ
+};
+#endif
/* Load the original GDT from the per-cpu structure */
void load_direct_gdt(int cpu)
@@ -723,7 +742,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
{
int i;
- for (i = 0; i < NCAPINTS; i++) {
+ for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
c->x86_capability[i] &= ~cpu_caps_cleared[i];
c->x86_capability[i] |= cpu_caps_set[i];
}
@@ -863,8 +882,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
* cache alignment.
* The others are not touched to avoid unwanted side effects.
*
- * WARNING: this function is only called on the BP. Don't add code here
- * that is supposed to run on all CPUs.
+ * WARNING: this function is only called on the boot CPU. Don't add code
+ * here that is supposed to run on all CPUs.
*/
static void __init early_identify_cpu(struct cpuinfo_x86 *c)
{
@@ -903,6 +922,13 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
}
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+
+ if (c->x86_vendor != X86_VENDOR_AMD)
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
fpu__init_system(c);
#ifdef CONFIG_X86_32
@@ -1147,9 +1173,10 @@ static void identify_cpu(struct cpuinfo_x86 *c)
/* Disable the PN if appropriate */
squash_the_stupid_serial_number(c);
- /* Set up SMEP/SMAP */
+ /* Set up SMEP/SMAP/UMIP */
setup_smep(c);
setup_smap(c);
+ setup_umip(c);
/*
* The vendor-specific functions might have changed features.
@@ -1225,7 +1252,7 @@ void enable_sep_cpu(void)
return;
cpu = get_cpu();
- tss = &per_cpu(cpu_tss, cpu);
+ tss = &per_cpu(cpu_tss_rw, cpu);
/*
* We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
@@ -1234,11 +1261,7 @@ void enable_sep_cpu(void)
tss->x86_tss.ss1 = __KERNEL_CS;
wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
-
- wrmsr(MSR_IA32_SYSENTER_ESP,
- (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
- 0);
-
+ wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
put_cpu();
@@ -1301,18 +1324,16 @@ void print_cpu_info(struct cpuinfo_x86 *c)
pr_cont(")\n");
}
-static __init int setup_disablecpuid(char *arg)
+/*
+ * clearcpuid= was already parsed in fpu__init_parse_early_param.
+ * But we need to keep a dummy __setup around otherwise it would
+ * show up as an environment variable for init.
+ */
+static __init int setup_clearcpuid(char *arg)
{
- int bit;
-
- if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
- setup_clear_cpu_cap(bit);
- else
- return 0;
-
return 1;
}
-__setup("clearcpuid=", setup_disablecpuid);
+__setup("clearcpuid=", setup_clearcpuid);
#ifdef CONFIG_X86_64
DEFINE_PER_CPU_FIRST(union irq_stack_union,
@@ -1334,25 +1355,22 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
-/*
- * Special IST stacks which the CPU switches to when it calls
- * an IST-marked descriptor entry. Up to 7 stacks (hardware
- * limit), all of them are 4K, except the debug stack which
- * is 8K.
- */
-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
- [DEBUG_STACK - 1] = DEBUG_STKSZ
-};
-
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
- [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
-
/* May not be marked __init: used by software suspend */
void syscall_init(void)
{
+ extern char _entry_trampoline[];
+ extern char entry_SYSCALL_64_trampoline[];
+
+ int cpu = smp_processor_id();
+ unsigned long SYSCALL64_entry_trampoline =
+ (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
+ (entry_SYSCALL_64_trampoline - _entry_trampoline);
+
wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
- wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
+ if (static_cpu_has(X86_FEATURE_PTI))
+ wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
+ else
+ wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
#ifdef CONFIG_IA32_EMULATION
wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
@@ -1363,7 +1381,7 @@ void syscall_init(void)
* AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
*/
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
- wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+ wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
#else
wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
@@ -1507,7 +1525,7 @@ void cpu_init(void)
if (cpu)
load_ucode_ap();
- t = &per_cpu(cpu_tss, cpu);
+ t = &per_cpu(cpu_tss_rw, cpu);
oist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA
@@ -1546,7 +1564,7 @@ void cpu_init(void)
* set up and load the per-CPU TSS
*/
if (!oist->ist[0]) {
- char *estacks = per_cpu(exception_stacks, cpu);
+ char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
estacks += exception_stack_sizes[v];
@@ -1557,7 +1575,7 @@ void cpu_init(void)
}
}
- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
/*
* <= is required because the CPU will access up to
@@ -1572,9 +1590,14 @@ void cpu_init(void)
initialize_tlbstate_and_flush();
enter_lazy_tlb(&init_mm, me);
- load_sp0(t, &current->thread);
- set_tss_desc(cpu, t);
+ /*
+ * Initialize the TSS. sp0 points to the entry trampoline stack
+ * regardless of what task is running.
+ */
+ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
load_TR_desc();
+ load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
+
load_mm_ldt(&init_mm);
clear_all_debug_regs();
@@ -1585,7 +1608,6 @@ void cpu_init(void)
if (is_uv_system())
uv_cpu_init();
- setup_fixmap_gdt(cpu);
load_fixmap_gdt(cpu);
}
@@ -1595,8 +1617,7 @@ void cpu_init(void)
{
int cpu = smp_processor_id();
struct task_struct *curr = current;
- struct tss_struct *t = &per_cpu(cpu_tss, cpu);
- struct thread_struct *thread = &curr->thread;
+ struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
wait_for_master_cpu(cpu);
@@ -1627,12 +1648,16 @@ void cpu_init(void)
initialize_tlbstate_and_flush();
enter_lazy_tlb(&init_mm, curr);
- load_sp0(t, thread);
- set_tss_desc(cpu, t);
+ /*
+ * Initialize the TSS. Don't bother initializing sp0, as the initial
+ * task never enters user mode.
+ */
+ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
load_TR_desc();
+
load_mm_ldt(&init_mm);
- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
#ifdef CONFIG_DOUBLEFAULT
/* Set up doublefault TSS pointer in the GDT */
@@ -1644,7 +1669,6 @@ void cpu_init(void)
fpu__init_cpu();
- setup_fixmap_gdt(cpu);
load_fixmap_gdt(cpu);
}
#endif
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index f52a370b6c00..e806b11a99af 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -47,4 +47,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+
+unsigned int aperfmperf_get_khz(int cpu);
+
#endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
new file mode 100644
index 000000000000..904b0a3c4e53
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -0,0 +1,121 @@
+/* Declare dependencies between CPUIDs */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <asm/cpufeature.h>
+
+struct cpuid_dep {
+ unsigned int feature;
+ unsigned int depends;
+};
+
+/*
+ * Table of CPUID features that depend on others.
+ *
+ * This only includes dependencies that can be usefully disabled, not
+ * features part of the base set (like FPU).
+ *
+ * Note this all is not __init / __initdata because it can be
+ * called from cpu hotplug. It shouldn't do anything in this case,
+ * but it's difficult to tell that to the init reference checker.
+ */
+const static struct cpuid_dep cpuid_deps[] = {
+ { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
+ { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
+ { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
+ { X86_FEATURE_AVX, X86_FEATURE_XSAVE },
+ { X86_FEATURE_PKU, X86_FEATURE_XSAVE },
+ { X86_FEATURE_MPX, X86_FEATURE_XSAVE },
+ { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
+ { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
+ { X86_FEATURE_XMM, X86_FEATURE_FXSR },
+ { X86_FEATURE_XMM2, X86_FEATURE_XMM },
+ { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
+ { X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 },
+ { X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 },
+ { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
+ { X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 },
+ { X86_FEATURE_SSSE3, X86_FEATURE_XMM2, },
+ { X86_FEATURE_F16C, X86_FEATURE_XMM2, },
+ { X86_FEATURE_AES, X86_FEATURE_XMM2 },
+ { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
+ { X86_FEATURE_FMA, X86_FEATURE_AVX },
+ { X86_FEATURE_AVX2, X86_FEATURE_AVX, },
+ { X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
+ { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
+ {}
+};
+
+static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
+{
+ /*
+ * Note: This could use the non atomic __*_bit() variants, but the
+ * rest of the cpufeature code uses atomics as well, so keep it for
+ * consistency. Cleanup all of it separately.
+ */
+ if (!c) {
+ clear_cpu_cap(&boot_cpu_data, feature);
+ set_bit(feature, (unsigned long *)cpu_caps_cleared);
+ } else {
+ clear_bit(feature, (unsigned long *)c->x86_capability);
+ }
+}
+
+/* Take the capabilities and the BUG bits into account */
+#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
+
+static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
+{
+ DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
+ const struct cpuid_dep *d;
+ bool changed;
+
+ if (WARN_ON(feature >= MAX_FEATURE_BITS))
+ return;
+
+ clear_feature(c, feature);
+
+ /* Collect all features to disable, handling dependencies */
+ memset(disable, 0, sizeof(disable));
+ __set_bit(feature, disable);
+
+ /* Loop until we get a stable state. */
+ do {
+ changed = false;
+ for (d = cpuid_deps; d->feature; d++) {
+ if (!test_bit(d->depends, disable))
+ continue;
+ if (__test_and_set_bit(d->feature, disable))
+ continue;
+
+ changed = true;
+ clear_feature(c, d->feature);
+ }
+ } while (changed);
+}
+
+void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
+{
+ do_clear_cpu_cap(c, feature);
+}
+
+void setup_clear_cpu_cap(unsigned int feature)
+{
+ do_clear_cpu_cap(NULL, feature);
+}
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 4fa90006ac68..bea8d3e24f50 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -26,6 +26,12 @@
#include <asm/processor.h>
#include <asm/hypervisor.h>
+extern const struct hypervisor_x86 x86_hyper_vmware;
+extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
+extern const struct hypervisor_x86 x86_hyper_xen_pv;
+extern const struct hypervisor_x86 x86_hyper_xen_hvm;
+extern const struct hypervisor_x86 x86_hyper_kvm;
+
static const __initconst struct hypervisor_x86 * const hypervisors[] =
{
#ifdef CONFIG_XEN_PV
@@ -41,54 +47,52 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
#endif
};
-const struct hypervisor_x86 *x86_hyper;
-EXPORT_SYMBOL(x86_hyper);
+enum x86_hypervisor_type x86_hyper_type;
+EXPORT_SYMBOL(x86_hyper_type);
-static inline void __init
+static inline const struct hypervisor_x86 * __init
detect_hypervisor_vendor(void)
{
- const struct hypervisor_x86 *h, * const *p;
+ const struct hypervisor_x86 *h = NULL, * const *p;
uint32_t pri, max_pri = 0;
for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) {
- h = *p;
- pri = h->detect();
- if (pri != 0 && pri > max_pri) {
+ pri = (*p)->detect();
+ if (pri > max_pri) {
max_pri = pri;
- x86_hyper = h;
+ h = *p;
}
}
- if (max_pri)
- pr_info("Hypervisor detected: %s\n", x86_hyper->name);
+ if (h)
+ pr_info("Hypervisor detected: %s\n", h->name);
+
+ return h;
}
-void __init init_hypervisor_platform(void)
+static void __init copy_array(const void *src, void *target, unsigned int size)
{
+ unsigned int i, n = size / sizeof(void *);
+ const void * const *from = (const void * const *)src;
+ const void **to = (const void **)target;
- detect_hypervisor_vendor();
-
- if (!x86_hyper)
- return;
-
- if (x86_hyper->init_platform)
- x86_hyper->init_platform();
+ for (i = 0; i < n; i++)
+ if (from[i])
+ to[i] = from[i];
}
-bool __init hypervisor_x2apic_available(void)
+void __init init_hypervisor_platform(void)
{
- return x86_hyper &&
- x86_hyper->x2apic_available &&
- x86_hyper->x2apic_available();
-}
+ const struct hypervisor_x86 *h;
-void hypervisor_pin_vcpu(int cpu)
-{
- if (!x86_hyper)
+ h = detect_hypervisor_vendor();
+
+ if (!h)
return;
- if (x86_hyper->pin_vcpu)
- x86_hyper->pin_vcpu(cpu);
- else
- WARN_ONCE(1, "vcpu pinning requested but not supported!\n");
+ copy_array(&h->init, &x86_init.hyper, sizeof(h->init));
+ copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime));
+
+ x86_hyper_type = h->type;
+ x86_init.hyper.init_platform();
}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b720dacac051..b1af22073e28 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -187,21 +187,6 @@ static void early_init_intel(struct cpuinfo_x86 *c)
if (c->x86 == 6 && c->x86_model < 15)
clear_cpu_cap(c, X86_FEATURE_PAT);
-#ifdef CONFIG_KMEMCHECK
- /*
- * P4s have a "fast strings" feature which causes single-
- * stepping REP instructions to only generate a #DB on
- * cache-line boundaries.
- *
- * Ingo Molnar reported a Pentium D (model 6) and a Xeon
- * (model 2) with the same problem.
- */
- if (c->x86 == 15)
- if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
- MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) > 0)
- pr_info("kmemcheck: Disabling fast string operations\n");
-#endif
-
/*
* If fast string is not enabled in IA32_MISC_ENABLE for any reason,
* clear the fast string and enhanced fast string CPU capabilities.
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index cd5fc61ba450..99442370de40 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -267,6 +267,7 @@ static void rdt_get_cdp_l3_config(int type)
r->num_closid = r_l3->num_closid / 2;
r->cache.cbm_len = r_l3->cache.cbm_len;
r->default_ctrl = r_l3->default_ctrl;
+ r->cache.shareable_bits = r_l3->cache.shareable_bits;
r->data_width = (r->cache.cbm_len + 3) / 4;
r->alloc_capable = true;
/*
@@ -524,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
*/
if (static_branch_unlikely(&rdt_mon_enable_key))
rmdir_mondata_subdir_allrdtgrp(r, d->id);
- kfree(d->ctrl_val);
- kfree(d->rmid_busy_llc);
- kfree(d->mbm_total);
- kfree(d->mbm_local);
list_del(&d->list);
if (is_mbm_enabled())
cancel_delayed_work(&d->mbm_over);
@@ -544,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cancel_delayed_work(&d->cqm_limbo);
}
+ kfree(d->ctrl_val);
+ kfree(d->rmid_busy_llc);
+ kfree(d->mbm_total);
+ kfree(d->mbm_local);
kfree(d);
return;
}
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index a43a72d8e88e..3397244984f5 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -127,12 +127,15 @@ struct rdtgroup {
#define RFTYPE_BASE BIT(1)
#define RF_CTRLSHIFT 4
#define RF_MONSHIFT 5
+#define RF_TOPSHIFT 6
#define RFTYPE_CTRL BIT(RF_CTRLSHIFT)
#define RFTYPE_MON BIT(RF_MONSHIFT)
+#define RFTYPE_TOP BIT(RF_TOPSHIFT)
#define RFTYPE_RES_CACHE BIT(8)
#define RFTYPE_RES_MB BIT(9)
#define RF_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL)
#define RF_MON_INFO (RFTYPE_INFO | RFTYPE_MON)
+#define RF_TOP_INFO (RFTYPE_INFO | RFTYPE_TOP)
#define RF_CTRL_BASE (RFTYPE_BASE | RFTYPE_CTRL)
/* List of all resource groups */
@@ -409,6 +412,10 @@ union cpuid_0x10_x_edx {
unsigned int full;
};
+void rdt_last_cmd_clear(void);
+void rdt_last_cmd_puts(const char *s);
+void rdt_last_cmd_printf(const char *fmt, ...);
+
void rdt_ctrl_update(void *arg);
struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
void rdtgroup_kn_unlock(struct kernfs_node *kn);
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index f6ea94f8954a..23e1d5c249c6 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -42,15 +42,22 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
/*
* Only linear delay values is supported for current Intel SKUs.
*/
- if (!r->membw.delay_linear)
+ if (!r->membw.delay_linear) {
+ rdt_last_cmd_puts("No support for non-linear MB domains\n");
return false;
+ }
ret = kstrtoul(buf, 10, &bw);
- if (ret)
+ if (ret) {
+ rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf);
return false;
+ }
- if (bw < r->membw.min_bw || bw > r->default_ctrl)
+ if (bw < r->membw.min_bw || bw > r->default_ctrl) {
+ rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
+ r->membw.min_bw, r->default_ctrl);
return false;
+ }
*data = roundup(bw, (unsigned long)r->membw.bw_gran);
return true;
@@ -60,8 +67,10 @@ int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d)
{
unsigned long data;
- if (d->have_new_ctrl)
+ if (d->have_new_ctrl) {
+ rdt_last_cmd_printf("duplicate domain %d\n", d->id);
return -EINVAL;
+ }
if (!bw_validate(buf, &data, r))
return -EINVAL;
@@ -84,20 +93,29 @@ static bool cbm_validate(char *buf, unsigned long *data, struct rdt_resource *r)
int ret;
ret = kstrtoul(buf, 16, &val);
- if (ret)
+ if (ret) {
+ rdt_last_cmd_printf("non-hex character in mask %s\n", buf);
return false;
+ }
- if (val == 0 || val > r->default_ctrl)
+ if (val == 0 || val > r->default_ctrl) {
+ rdt_last_cmd_puts("mask out of range\n");
return false;
+ }
first_bit = find_first_bit(&val, cbm_len);
zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
- if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)
+ if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) {
+ rdt_last_cmd_printf("mask %lx has non-consecutive 1-bits\n", val);
return false;
+ }
- if ((zero_bit - first_bit) < r->cache.min_cbm_bits)
+ if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
+ rdt_last_cmd_printf("Need at least %d bits in mask\n",
+ r->cache.min_cbm_bits);
return false;
+ }
*data = val;
return true;
@@ -111,8 +129,10 @@ int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
{
unsigned long data;
- if (d->have_new_ctrl)
+ if (d->have_new_ctrl) {
+ rdt_last_cmd_printf("duplicate domain %d\n", d->id);
return -EINVAL;
+ }
if(!cbm_validate(buf, &data, r))
return -EINVAL;
@@ -139,8 +159,10 @@ next:
return 0;
dom = strsep(&line, ";");
id = strsep(&dom, "=");
- if (!dom || kstrtoul(id, 10, &dom_id))
+ if (!dom || kstrtoul(id, 10, &dom_id)) {
+ rdt_last_cmd_puts("Missing '=' or non-numeric domain\n");
return -EINVAL;
+ }
dom = strim(dom);
list_for_each_entry(d, &r->domains, list) {
if (d->id == dom_id) {
@@ -196,6 +218,7 @@ static int rdtgroup_parse_resource(char *resname, char *tok, int closid)
if (!strcmp(resname, r->name) && closid < r->num_closid)
return parse_line(tok, r);
}
+ rdt_last_cmd_printf("unknown/unsupported resource name '%s'\n", resname);
return -EINVAL;
}
@@ -218,6 +241,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
rdtgroup_kn_unlock(of->kn);
return -ENOENT;
}
+ rdt_last_cmd_clear();
closid = rdtgrp->closid;
@@ -229,6 +253,12 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
while ((tok = strsep(&buf, "\n")) != NULL) {
resname = strim(strsep(&tok, ":"));
if (!tok) {
+ rdt_last_cmd_puts("Missing ':'\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ if (tok[0] == '\0') {
+ rdt_last_cmd_printf("Missing '%s' value\n", resname);
ret = -EINVAL;
goto out;
}
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c
index 30827510094b..681450eee428 100644
--- a/arch/x86/kernel/cpu/intel_rdt_monitor.c
+++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c
@@ -51,7 +51,7 @@ static LIST_HEAD(rmid_free_lru);
* may have a occupancy value > intel_cqm_threshold. User can change
* the threshold occupancy value.
*/
-unsigned int rmid_limbo_count;
+static unsigned int rmid_limbo_count;
/**
* @rmid_entry - The entry in the limbo and free lists.
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index a869d4a073c5..64c5ff97ee0d 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -24,6 +24,7 @@
#include <linux/fs.h>
#include <linux/sysfs.h>
#include <linux/kernfs.h>
+#include <linux/seq_buf.h>
#include <linux/seq_file.h>
#include <linux/sched/signal.h>
#include <linux/sched/task.h>
@@ -51,6 +52,31 @@ static struct kernfs_node *kn_mongrp;
/* Kernel fs node for "mon_data" directory under root */
static struct kernfs_node *kn_mondata;
+static struct seq_buf last_cmd_status;
+static char last_cmd_status_buf[512];
+
+void rdt_last_cmd_clear(void)
+{
+ lockdep_assert_held(&rdtgroup_mutex);
+ seq_buf_clear(&last_cmd_status);
+}
+
+void rdt_last_cmd_puts(const char *s)
+{
+ lockdep_assert_held(&rdtgroup_mutex);
+ seq_buf_puts(&last_cmd_status, s);
+}
+
+void rdt_last_cmd_printf(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ lockdep_assert_held(&rdtgroup_mutex);
+ seq_buf_vprintf(&last_cmd_status, fmt, ap);
+ va_end(ap);
+}
+
/*
* Trivial allocator for CLOSIDs. Since h/w only supports a small number,
* we can keep a bitmap of free CLOSIDs in a single integer.
@@ -238,8 +264,10 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
/* Check whether cpus belong to parent ctrl group */
cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
- if (cpumask_weight(tmpmask))
+ if (cpumask_weight(tmpmask)) {
+ rdt_last_cmd_puts("can only add CPUs to mongroup that belong to parent\n");
return -EINVAL;
+ }
/* Check whether cpus are dropped from this group */
cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
@@ -291,8 +319,10 @@ static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
if (cpumask_weight(tmpmask)) {
/* Can't drop from default group */
- if (rdtgrp == &rdtgroup_default)
+ if (rdtgrp == &rdtgroup_default) {
+ rdt_last_cmd_puts("Can't drop CPUs from default group\n");
return -EINVAL;
+ }
/* Give any dropped cpus to rdtgroup_default */
cpumask_or(&rdtgroup_default.cpu_mask,
@@ -357,8 +387,10 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
}
rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ rdt_last_cmd_clear();
if (!rdtgrp) {
ret = -ENOENT;
+ rdt_last_cmd_puts("directory was removed\n");
goto unlock;
}
@@ -367,13 +399,16 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
else
ret = cpumask_parse(buf, newmask);
- if (ret)
+ if (ret) {
+ rdt_last_cmd_puts("bad cpu list/mask\n");
goto unlock;
+ }
/* check that user didn't specify any offline cpus */
cpumask_andnot(tmpmask, newmask, cpu_online_mask);
if (cpumask_weight(tmpmask)) {
ret = -EINVAL;
+ rdt_last_cmd_puts("can only assign online cpus\n");
goto unlock;
}
@@ -452,6 +487,7 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
*/
atomic_dec(&rdtgrp->waitcount);
kfree(callback);
+ rdt_last_cmd_puts("task exited\n");
} else {
/*
* For ctrl_mon groups move both closid and rmid.
@@ -462,10 +498,12 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
tsk->closid = rdtgrp->closid;
tsk->rmid = rdtgrp->mon.rmid;
} else if (rdtgrp->type == RDTMON_GROUP) {
- if (rdtgrp->mon.parent->closid == tsk->closid)
+ if (rdtgrp->mon.parent->closid == tsk->closid) {
tsk->rmid = rdtgrp->mon.rmid;
- else
+ } else {
+ rdt_last_cmd_puts("Can't move task to different control group\n");
ret = -EINVAL;
+ }
}
}
return ret;
@@ -484,8 +522,10 @@ static int rdtgroup_task_write_permission(struct task_struct *task,
*/
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
- !uid_eq(cred->euid, tcred->suid))
+ !uid_eq(cred->euid, tcred->suid)) {
+ rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
ret = -EPERM;
+ }
put_cred(tcred);
return ret;
@@ -502,6 +542,7 @@ static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
tsk = find_task_by_vpid(pid);
if (!tsk) {
rcu_read_unlock();
+ rdt_last_cmd_printf("No task %d\n", pid);
return -ESRCH;
}
} else {
@@ -529,6 +570,7 @@ static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
return -EINVAL;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ rdt_last_cmd_clear();
if (rdtgrp)
ret = rdtgroup_move_task(pid, rdtgrp, of);
@@ -569,6 +611,21 @@ static int rdtgroup_tasks_show(struct kernfs_open_file *of,
return ret;
}
+static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ int len;
+
+ mutex_lock(&rdtgroup_mutex);
+ len = seq_buf_used(&last_cmd_status);
+ if (len)
+ seq_printf(seq, "%.*s", len, last_cmd_status_buf);
+ else
+ seq_puts(seq, "ok\n");
+ mutex_unlock(&rdtgroup_mutex);
+ return 0;
+}
+
static int rdt_num_closids_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
@@ -686,6 +743,13 @@ static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
/* rdtgroup information files for one cache resource. */
static struct rftype res_common_files[] = {
{
+ .name = "last_cmd_status",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_last_cmd_status_show,
+ .fflags = RF_TOP_INFO,
+ },
+ {
.name = "num_closids",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
@@ -855,6 +919,10 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
return PTR_ERR(kn_info);
kernfs_get(kn_info);
+ ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);
+ if (ret)
+ goto out_destroy;
+
for_each_alloc_enabled_rdt_resource(r) {
fflags = r->fflags | RF_CTRL_INFO;
ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags);
@@ -1081,6 +1149,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
struct dentry *dentry;
int ret;
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
/*
* resctrl file system can only be mounted once.
@@ -1130,12 +1199,12 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
goto out_mondata;
if (rdt_alloc_capable)
- static_branch_enable(&rdt_alloc_enable_key);
+ static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
if (rdt_mon_capable)
- static_branch_enable(&rdt_mon_enable_key);
+ static_branch_enable_cpuslocked(&rdt_mon_enable_key);
if (rdt_alloc_capable || rdt_mon_capable)
- static_branch_enable(&rdt_enable_key);
+ static_branch_enable_cpuslocked(&rdt_enable_key);
if (is_mbm_enabled()) {
r = &rdt_resources_all[RDT_RESOURCE_L3];
@@ -1156,7 +1225,9 @@ out_info:
out_cdp:
cdp_disable();
out:
+ rdt_last_cmd_clear();
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
return dentry;
}
@@ -1295,9 +1366,7 @@ static void rmdir_all_sub(void)
kfree(rdtgrp);
}
/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
- get_online_cpus();
update_closid_rmid(cpu_online_mask, &rdtgroup_default);
- put_online_cpus();
kernfs_remove(kn_info);
kernfs_remove(kn_mongrp);
@@ -1308,6 +1377,7 @@ static void rdt_kill_sb(struct super_block *sb)
{
struct rdt_resource *r;
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
/*Put everything back to default values. */
@@ -1315,11 +1385,12 @@ static void rdt_kill_sb(struct super_block *sb)
reset_all_ctrls(r);
cdp_disable();
rmdir_all_sub();
- static_branch_disable(&rdt_alloc_enable_key);
- static_branch_disable(&rdt_mon_enable_key);
- static_branch_disable(&rdt_enable_key);
+ static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
+ static_branch_disable_cpuslocked(&rdt_mon_enable_key);
+ static_branch_disable_cpuslocked(&rdt_enable_key);
kernfs_kill_sb(sb);
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
}
static struct file_system_type rdt_fs_type = {
@@ -1524,8 +1595,10 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
int ret;
prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
+ rdt_last_cmd_clear();
if (!prdtgrp) {
ret = -ENODEV;
+ rdt_last_cmd_puts("directory was removed\n");
goto out_unlock;
}
@@ -1533,6 +1606,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
if (!rdtgrp) {
ret = -ENOSPC;
+ rdt_last_cmd_puts("kernel out of memory\n");
goto out_unlock;
}
*r = rdtgrp;
@@ -1544,6 +1618,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
if (IS_ERR(kn)) {
ret = PTR_ERR(kn);
+ rdt_last_cmd_puts("kernfs create error\n");
goto out_free_rgrp;
}
rdtgrp->kn = kn;
@@ -1557,24 +1632,31 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
kernfs_get(kn);
ret = rdtgroup_kn_set_ugid(kn);
- if (ret)
+ if (ret) {
+ rdt_last_cmd_puts("kernfs perm error\n");
goto out_destroy;
+ }
- files = RFTYPE_BASE | RFTYPE_CTRL;
files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype);
ret = rdtgroup_add_files(kn, files);
- if (ret)
+ if (ret) {
+ rdt_last_cmd_puts("kernfs fill error\n");
goto out_destroy;
+ }
if (rdt_mon_capable) {
ret = alloc_rmid();
- if (ret < 0)
+ if (ret < 0) {
+ rdt_last_cmd_puts("out of RMIDs\n");
goto out_destroy;
+ }
rdtgrp->mon.rmid = ret;
ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
- if (ret)
+ if (ret) {
+ rdt_last_cmd_puts("kernfs subdir error\n");
goto out_idfree;
+ }
}
kernfs_activate(kn);
@@ -1652,8 +1734,10 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
kn = rdtgrp->kn;
ret = closid_alloc();
- if (ret < 0)
+ if (ret < 0) {
+ rdt_last_cmd_puts("out of CLOSIDs\n");
goto out_common_fail;
+ }
closid = ret;
rdtgrp->closid = closid;
@@ -1665,8 +1749,10 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
* of tasks and cpus to monitor.
*/
ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
- if (ret)
+ if (ret) {
+ rdt_last_cmd_puts("kernfs subdir error\n");
goto out_id_free;
+ }
}
goto out_unlock;
@@ -1902,6 +1988,9 @@ int __init rdtgroup_init(void)
{
int ret = 0;
+ seq_buf_init(&last_cmd_status, last_cmd_status_buf,
+ sizeof(last_cmd_status_buf));
+
ret = rdtgroup_setup_root();
if (ret)
return ret;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 87cc9ab7a13c..4ca632a06e0b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -204,7 +204,7 @@ static int error_context(struct mce *m)
return IN_KERNEL;
}
-static int mce_severity_amd_smca(struct mce *m, int err_ctx)
+static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
{
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
u32 low, high;
@@ -245,6 +245,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
if (m->status & MCI_STATUS_UC) {
+ if (ctx == IN_KERNEL)
+ return MCE_PANIC_SEVERITY;
+
/*
* On older systems where overflow_recov flag is not present, we
* should simply panic if an error overflow occurs. If
@@ -255,10 +258,6 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
if (mce_flags.smca)
return mce_severity_amd_smca(m, ctx);
- /* software can try to contain */
- if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
- return MCE_PANIC_SEVERITY;
-
/* kill current process */
return MCE_AR_SEVERITY;
} else {
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3b413065c613..868e412b4f0c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1367,13 +1367,12 @@ static void __start_timer(struct timer_list *t, unsigned long interval)
local_irq_restore(flags);
}
-static void mce_timer_fn(unsigned long data)
+static void mce_timer_fn(struct timer_list *t)
{
- struct timer_list *t = this_cpu_ptr(&mce_timer);
- int cpu = smp_processor_id();
+ struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
unsigned long iv;
- WARN_ON(cpu != data);
+ WARN_ON(cpu_t != t);
iv = __this_cpu_read(mce_next_interval);
@@ -1763,17 +1762,15 @@ static void mce_start_timer(struct timer_list *t)
static void __mcheck_cpu_setup_timer(void)
{
struct timer_list *t = this_cpu_ptr(&mce_timer);
- unsigned int cpu = smp_processor_id();
- setup_pinned_timer(t, mce_timer_fn, cpu);
+ timer_setup(t, mce_timer_fn, TIMER_PINNED);
}
static void __mcheck_cpu_init_timer(void)
{
struct timer_list *t = this_cpu_ptr(&mce_timer);
- unsigned int cpu = smp_processor_id();
- setup_pinned_timer(t, mce_timer_fn, cpu);
+ timer_setup(t, mce_timer_fn, TIMER_PINNED);
mce_start_timer(t);
}
@@ -1788,6 +1785,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
void (*machine_check_vector)(struct pt_regs *, long error_code) =
unexpected_machine_check;
+dotraplinkage void do_mce(struct pt_regs *regs, long error_code)
+{
+ machine_check_vector(regs, error_code);
+}
+
/*
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index c6daec4bdba5..330b8462d426 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -470,6 +470,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
#define F14H_MPB_MAX_SIZE 1824
#define F15H_MPB_MAX_SIZE 4096
#define F16H_MPB_MAX_SIZE 3458
+#define F17H_MPB_MAX_SIZE 3200
switch (family) {
case 0x14:
@@ -481,6 +482,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
case 0x16:
max_size = F16H_MPB_MAX_SIZE;
break;
+ case 0x17:
+ max_size = F17H_MPB_MAX_SIZE;
+ break;
default:
max_size = F1XH_MPB_MAX_SIZE;
break;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index c4fa4a85d4cb..e4fc595cd6ea 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -239,7 +239,7 @@ static int __init save_microcode_in_initrd(void)
break;
case X86_VENDOR_AMD:
if (c->x86 >= 0x10)
- return save_microcode_in_initrd_amd(cpuid_eax(1));
+ ret = save_microcode_in_initrd_amd(cpuid_eax(1));
break;
default:
break;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 7dbcb7adf797..f7c55b0e753a 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -45,6 +45,9 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
/* Current microcode patch used in early patching on the APs. */
static struct microcode_intel *intel_ucode_patch;
+/* last level cache size per core */
+static int llc_size_per_core;
+
static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1,
unsigned int s2, unsigned int p2)
{
@@ -565,15 +568,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
}
#else
-/*
- * Flush global tlb. We only do this in x86_64 where paging has been enabled
- * already and PGE should be enabled as well.
- */
-static inline void flush_tlb_early(void)
-{
- __native_flush_tlb_global_irq_disabled();
-}
-
static inline void print_ucode(struct ucode_cpu_info *uci)
{
struct microcode_intel *mc;
@@ -602,10 +596,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
if (rev != mc->hdr.rev)
return -1;
-#ifdef CONFIG_X86_64
- /* Flush global tlb. This is precaution. */
- flush_tlb_early();
-#endif
uci->cpu_sig.rev = rev;
if (early)
@@ -923,8 +913,19 @@ static bool is_blacklisted(unsigned int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
- if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
- pr_err_once("late loading on model 79 is disabled.\n");
+ /*
+ * Late loading on model 79 with microcode revision less than 0x0b000021
+ * and LLC size per core bigger than 2.5MB may result in a system hang.
+ * This behavior is documented in item BDF90, #334165 (Intel Xeon
+ * Processor E7-8800/4800 v4 Product Family).
+ */
+ if (c->x86 == 6 &&
+ c->x86_model == INTEL_FAM6_BROADWELL_X &&
+ c->x86_mask == 0x01 &&
+ llc_size_per_core > 2621440 &&
+ c->microcode < 0x0b000021) {
+ pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+ pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
return true;
}
@@ -979,6 +980,15 @@ static struct microcode_ops microcode_intel_ops = {
.apply_microcode = apply_microcode_intel,
};
+static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)
+{
+ u64 llc_size = c->x86_cache_size * 1024;
+
+ do_div(llc_size, c->x86_max_cores);
+
+ return (int)llc_size;
+}
+
struct microcode_ops * __init init_intel_microcode(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -989,5 +999,7 @@ struct microcode_ops * __init init_intel_microcode(void)
return NULL;
}
+ llc_size_per_core = calc_llc_size_per_core(c);
+
return &microcode_intel_ops;
}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 236324e83a3a..85eb5fc180c8 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -254,9 +254,9 @@ static void __init ms_hyperv_init_platform(void)
#endif
}
-const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
+const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
.name = "Microsoft Hyper-V",
.detect = ms_hyperv_platform,
- .init_platform = ms_hyperv_init_platform,
+ .type = X86_HYPER_MS_HYPERV,
+ .init.init_platform = ms_hyperv_init_platform,
};
-EXPORT_SYMBOL(x86_hyper_ms_hyperv);
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 4378a729b933..e7ecedafa1c8 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -5,6 +5,8 @@
#include <linux/seq_file.h>
#include <linux/cpufreq.h>
+#include "cpu.h"
+
/*
* Get CPU information for use by the procfs.
*/
@@ -78,7 +80,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
if (cpu_has(c, X86_FEATURE_TSC)) {
- unsigned int freq = arch_freq_get_on_cpu(cpu);
+ unsigned int freq = aperfmperf_get_khz(cpu);
if (!freq)
freq = cpufreq_quick_get(cpu);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 05459ad3db46..d0e69769abfd 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -21,7 +21,6 @@ struct cpuid_bit {
static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
- { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 },
{ X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 },
{ X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 },
{ X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 40ed26852ebd..8e005329648b 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -205,10 +205,10 @@ static bool __init vmware_legacy_x2apic_available(void)
(eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
}
-const __refconst struct hypervisor_x86 x86_hyper_vmware = {
+const __initconst struct hypervisor_x86 x86_hyper_vmware = {
.name = "VMware",
.detect = vmware_platform,
- .init_platform = vmware_platform_setup,
- .x2apic_available = vmware_legacy_x2apic_available,
+ .type = X86_HYPER_VMWARE,
+ .init.init_platform = vmware_platform_setup,
+ .init.x2apic_available = vmware_legacy_x2apic_available,
};
-EXPORT_SYMBOL(x86_hyper_vmware);