23 files changed, 775 insertions, 214 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 236999c54edc..570e8bb1f386 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -23,6 +23,7 @@ obj-y			+= rdrand.o
 obj-y			+= match.o
 obj-y			+= bugs.o
 obj-y			+= aperfmperf.o
+obj-y			+= cpuid-deps.o
 
 obj-$(CONFIG_PROC_FS)	+= proc.o
 obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index d58184b7cd44..ea831c858195 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c)
 	case 0x17: init_amd_zn(c); break;
 	}
 
-	/* Enable workaround for FXSAVE leak */
-	if (c->x86 >= 6)
+	/*
+	 * Enable workaround for FXSAVE leak on CPUs
+	 * without a XSaveErPtr feature
+	 */
+	if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))
 		set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
 
 	cpu_detect_cache_sizes(c);
@@ -826,8 +829,32 @@ static void init_amd(struct cpuinfo_x86 *c)
 		set_cpu_cap(c, X86_FEATURE_K8);
 
 	if (cpu_has(c, X86_FEATURE_XMM2)) {
-		/* MFENCE stops RDTSC speculation */
-		set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+		unsigned long long val;
+		int ret;
+
+		/*
+		 * A serializing LFENCE has less overhead than MFENCE, so
+		 * use it for execution serialization.  On families which
+		 * don't have that MSR, LFENCE is already serializing.
+		 * msr_set_bit() uses the safe accessors, too, even if the MSR
+		 * is not present.
+		 */
+		msr_set_bit(MSR_F10H_DECFG,
+			    MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+
+		/*
+		 * Verify that the MSR write was successful (could be running
+		 * under a hypervisor) and only then assume that LFENCE is
+		 * serializing.
+		 */
+		ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
+		if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
+			/* A serializing LFENCE stops RDTSC speculation */
+			set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+		} else {
+			/* MFENCE stops RDTSC speculation */
+			set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+		}
 	}
 
 	/*
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 957813e0180d..7eba34df54c3 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -14,6 +14,8 @@
 #include <linux/percpu.h>
 #include <linux/smp.h>
 
+#include "cpu.h"
+
 struct aperfmperf_sample {
 	unsigned int	khz;
 	ktime_t	time;
@@ -24,7 +26,7 @@ struct aperfmperf_sample {
 static DEFINE_PER_CPU(struct aperfmperf_sample, samples);
 
 #define APERFMPERF_CACHE_THRESHOLD_MS	10
-#define APERFMPERF_REFRESH_DELAY_MS	20
+#define APERFMPERF_REFRESH_DELAY_MS	10
 #define APERFMPERF_STALE_THRESHOLD_MS	1000
 
 /*
@@ -38,8 +40,6 @@ static void aperfmperf_snapshot_khz(void *dummy)
 	u64 aperf, aperf_delta;
 	u64 mperf, mperf_delta;
 	struct aperfmperf_sample *s = this_cpu_ptr(&samples);
-	ktime_t now = ktime_get();
-	s64 time_delta = ktime_ms_delta(now, s->time);
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -57,38 +57,68 @@ static void aperfmperf_snapshot_khz(void *dummy)
 	if (mperf_delta == 0)
 		return;
 
-	s->time = now;
+	s->time = ktime_get();
 	s->aperf = aperf;
 	s->mperf = mperf;
-
-	/* If the previous iteration was too long ago, discard it. */
-	if (time_delta > APERFMPERF_STALE_THRESHOLD_MS)
-		s->khz = 0;
-	else
-		s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
+	s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
 }
 
-unsigned int arch_freq_get_on_cpu(int cpu)
+static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait)
 {
-	s64 time_delta;
-	unsigned int khz;
+	s64 time_delta = ktime_ms_delta(now, per_cpu(samples.time, cpu));
+
+	/* Don't bother re-computing within the cache threshold time. */
+	if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
+		return true;
+
+	smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, wait);
+
+	/* Return false if the previous iteration was too long ago. */
+	return time_delta <= APERFMPERF_STALE_THRESHOLD_MS;
+}
 
+unsigned int aperfmperf_get_khz(int cpu)
+{
 	if (!cpu_khz)
 		return 0;
 
 	if (!static_cpu_has(X86_FEATURE_APERFMPERF))
 		return 0;
 
-	/* Don't bother re-computing within the cache threshold time. */
-	time_delta = ktime_ms_delta(ktime_get(), per_cpu(samples.time, cpu));
-	khz = per_cpu(samples.khz, cpu);
-	if (khz && time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
-		return khz;
+	aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
+	return per_cpu(samples.khz, cpu);
+}
 
-	smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
-	khz = per_cpu(samples.khz, cpu);
-	if (khz)
-		return khz;
+void arch_freq_prepare_all(void)
+{
+	ktime_t now = ktime_get();
+	bool wait = false;
+	int cpu;
+
+	if (!cpu_khz)
+		return;
+
+	if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+		return;
+
+	for_each_online_cpu(cpu)
+		if (!aperfmperf_snapshot_cpu(cpu, now, false))
+			wait = true;
+
+	if (wait)
+		msleep(APERFMPERF_REFRESH_DELAY_MS);
+}
+
+unsigned int arch_freq_get_on_cpu(int cpu)
+{
+	if (!cpu_khz)
+		return 0;
+
+	if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+		return 0;
+
+	if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
+		return per_cpu(samples.khz, cpu);
 
 	msleep(APERFMPERF_REFRESH_DELAY_MS);
 	smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ba0b2424c9b0..390b3dc3d438 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -10,6 +10,10 @@
  */
 #include <linux/init.h>
 #include <linux/utsname.h>
+#include <linux/cpu.h>
+
+#include <asm/nospec-branch.h>
+#include <asm/cmdline.h>
 #include <asm/bugs.h>
 #include <asm/processor.h>
 #include <asm/processor-flags.h>
@@ -19,6 +23,9 @@
 #include <asm/alternative.h>
 #include <asm/pgtable.h>
 #include <asm/set_memory.h>
+#include <asm/intel-family.h>
+
+static void __init spectre_v2_select_mitigation(void);
 
 void __init check_bugs(void)
 {
@@ -29,6 +36,9 @@ void __init check_bugs(void)
 		print_cpu_info(&boot_cpu_data);
 	}
 
+	/* Select the proper spectre mitigation before patching alternatives */
+	spectre_v2_select_mitigation();
+
 #ifdef CONFIG_X86_32
 	/*
 	 * Check whether we are able to run this kernel safely on SMP.
@@ -60,3 +70,214 @@ void __init check_bugs(void)
 		set_memory_4k((unsigned long)__va(0), 1);
 #endif
 }
+
+/* The kernel command line selection */
+enum spectre_v2_mitigation_cmd {
+	SPECTRE_V2_CMD_NONE,
+	SPECTRE_V2_CMD_AUTO,
+	SPECTRE_V2_CMD_FORCE,
+	SPECTRE_V2_CMD_RETPOLINE,
+	SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+	SPECTRE_V2_CMD_RETPOLINE_AMD,
+};
+
+static const char *spectre_v2_strings[] = {
+	[SPECTRE_V2_NONE]			= "Vulnerable",
+	[SPECTRE_V2_RETPOLINE_MINIMAL]		= "Vulnerable: Minimal generic ASM retpoline",
+	[SPECTRE_V2_RETPOLINE_MINIMAL_AMD]	= "Vulnerable: Minimal AMD ASM retpoline",
+	[SPECTRE_V2_RETPOLINE_GENERIC]		= "Mitigation: Full generic retpoline",
+	[SPECTRE_V2_RETPOLINE_AMD]		= "Mitigation: Full AMD retpoline",
+};
+
+#undef pr_fmt
+#define pr_fmt(fmt)     "Spectre V2 mitigation: " fmt
+
+static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+
+static void __init spec2_print_if_insecure(const char *reason)
+{
+	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+		pr_info("%s\n", reason);
+}
+
+static void __init spec2_print_if_secure(const char *reason)
+{
+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+		pr_info("%s\n", reason);
+}
+
+static inline bool retp_compiler(void)
+{
+	return __is_defined(RETPOLINE);
+}
+
+static inline bool match_option(const char *arg, int arglen, const char *opt)
+{
+	int len = strlen(opt);
+
+	return len == arglen && !strncmp(arg, opt, len);
+}
+
+static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+{
+	char arg[20];
+	int ret;
+
+	ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
+				  sizeof(arg));
+	if (ret > 0)  {
+		if (match_option(arg, ret, "off")) {
+			goto disable;
+		} else if (match_option(arg, ret, "on")) {
+			spec2_print_if_secure("force enabled on command line.");
+			return SPECTRE_V2_CMD_FORCE;
+		} else if (match_option(arg, ret, "retpoline")) {
+			spec2_print_if_insecure("retpoline selected on command line.");
+			return SPECTRE_V2_CMD_RETPOLINE;
+		} else if (match_option(arg, ret, "retpoline,amd")) {
+			if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+				pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
+				return SPECTRE_V2_CMD_AUTO;
+			}
+			spec2_print_if_insecure("AMD retpoline selected on command line.");
+			return SPECTRE_V2_CMD_RETPOLINE_AMD;
+		} else if (match_option(arg, ret, "retpoline,generic")) {
+			spec2_print_if_insecure("generic retpoline selected on command line.");
+			return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
+		} else if (match_option(arg, ret, "auto")) {
+			return SPECTRE_V2_CMD_AUTO;
+		}
+	}
+
+	if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+		return SPECTRE_V2_CMD_AUTO;
+disable:
+	spec2_print_if_insecure("disabled on command line.");
+	return SPECTRE_V2_CMD_NONE;
+}
+
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+	    boot_cpu_data.x86 == 6) {
+		switch (boot_cpu_data.x86_model) {
+		case INTEL_FAM6_SKYLAKE_MOBILE:
+		case INTEL_FAM6_SKYLAKE_DESKTOP:
+		case INTEL_FAM6_SKYLAKE_X:
+		case INTEL_FAM6_KABYLAKE_MOBILE:
+		case INTEL_FAM6_KABYLAKE_DESKTOP:
+			return true;
+		}
+	}
+	return false;
+}
+
+static void __init spectre_v2_select_mitigation(void)
+{
+	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+	enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
+
+	/*
+	 * If the CPU is not affected and the command line mode is NONE or AUTO
+	 * then nothing to do.
+	 */
+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
+	    (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
+		return;
+
+	switch (cmd) {
+	case SPECTRE_V2_CMD_NONE:
+		return;
+
+	case SPECTRE_V2_CMD_FORCE:
+		/* FALLTRHU */
+	case SPECTRE_V2_CMD_AUTO:
+		goto retpoline_auto;
+
+	case SPECTRE_V2_CMD_RETPOLINE_AMD:
+		if (IS_ENABLED(CONFIG_RETPOLINE))
+			goto retpoline_amd;
+		break;
+	case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
+		if (IS_ENABLED(CONFIG_RETPOLINE))
+			goto retpoline_generic;
+		break;
+	case SPECTRE_V2_CMD_RETPOLINE:
+		if (IS_ENABLED(CONFIG_RETPOLINE))
+			goto retpoline_auto;
+		break;
+	}
+	pr_err("kernel not compiled with retpoline; no mitigation available!");
+	return;
+
+retpoline_auto:
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+	retpoline_amd:
+		if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+			pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+			goto retpoline_generic;
+		}
+		mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
+					 SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
+		setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+	} else {
+	retpoline_generic:
+		mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
+					 SPECTRE_V2_RETPOLINE_MINIMAL;
+		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+	}
+
+	spectre_v2_enabled = mode;
+	pr_info("%s\n", spectre_v2_strings[mode]);
+
+	/*
+	 * If neither SMEP or KPTI are available, there is a risk of
+	 * hitting userspace addresses in the RSB after a context switch
+	 * from a shallow call stack to a deeper one. To prevent this fill
+	 * the entire RSB, even when using IBRS.
+	 *
+	 * Skylake era CPUs have a separate issue with *underflow* of the
+	 * RSB, when they will predict 'ret' targets from the generic BTB.
+	 * The proper mitigation for this is IBRS. If IBRS is not supported
+	 * or deactivated in favour of retpolines the RSB fill on context
+	 * switch is required.
+	 */
+	if ((!boot_cpu_has(X86_FEATURE_PTI) &&
+	     !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
+		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+		pr_info("Filling RSB on context switch\n");
+	}
+}
+
+#undef pr_fmt
+
+#ifdef CONFIG_SYSFS
+ssize_t cpu_show_meltdown(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+		return sprintf(buf, "Not affected\n");
+	if (boot_cpu_has(X86_FEATURE_PTI))
+		return sprintf(buf, "Mitigation: PTI\n");
+	return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
+		return sprintf(buf, "Not affected\n");
+	return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+		return sprintf(buf, "Not affected\n");
+
+	return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
+}
+#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c9176bae7fd8..ef29ad001991 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -329,6 +329,30 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
 	}
 }
 
+static __always_inline void setup_umip(struct cpuinfo_x86 *c)
+{
+	/* Check the boot processor, plus build option for UMIP. */
+	if (!cpu_feature_enabled(X86_FEATURE_UMIP))
+		goto out;
+
+	/* Check the current processor's cpuid bits. */
+	if (!cpu_has(c, X86_FEATURE_UMIP))
+		goto out;
+
+	cr4_set_bits(X86_CR4_UMIP);
+
+	pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n");
+
+	return;
+
+out:
+	/*
+	 * Make sure UMIP is disabled in case it was enabled in a
+	 * previous boot (e.g., via kexec).
+	 */
+	cr4_clear_bits(X86_CR4_UMIP);
+}
+
 /*
  * Protection Keys are not available in 32-bit mode.
  */
@@ -452,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
 	return NULL;		/* Not found */
 }
 
-__u32 cpu_caps_cleared[NCAPINTS];
-__u32 cpu_caps_set[NCAPINTS];
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
 
 void load_percpu_segment(int cpu)
 {
@@ -466,28 +490,23 @@ void load_percpu_segment(int cpu)
 	load_stack_canary_segment();
 }
 
-/* Setup the fixmap mapping only once per-processor */
-static inline void setup_fixmap_gdt(int cpu)
-{
-#ifdef CONFIG_X86_64
-	/* On 64-bit systems, we use a read-only fixmap GDT. */
-	pgprot_t prot = PAGE_KERNEL_RO;
-#else
-	/*
-	 * On native 32-bit systems, the GDT cannot be read-only because
-	 * our double fault handler uses a task gate, and entering through
-	 * a task gate needs to change an available TSS to busy.  If the GDT
-	 * is read-only, that will triple fault.
-	 *
-	 * On Xen PV, the GDT must be read-only because the hypervisor requires
-	 * it.
-	 */
-	pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
-		PAGE_KERNEL_RO : PAGE_KERNEL;
+#ifdef CONFIG_X86_32
+/* The 32-bit entry code needs to find cpu_entry_area. */
+DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
 #endif
 
-	__set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
-}
+#ifdef CONFIG_X86_64
+/*
+ * Special IST stacks which the CPU switches to when it calls
+ * an IST-marked descriptor entry. Up to 7 stacks (hardware
+ * limit), all of them are 4K, except the debug stack which
+ * is 8K.
+ */
+static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+	  [0 ... N_EXCEPTION_STACKS - 1]	= EXCEPTION_STKSZ,
+	  [DEBUG_STACK - 1]			= DEBUG_STKSZ
+};
+#endif
 
 /* Load the original GDT from the per-cpu structure */
 void load_direct_gdt(int cpu)
@@ -723,7 +742,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
 {
 	int i;
 
-	for (i = 0; i < NCAPINTS; i++) {
+	for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
 		c->x86_capability[i] &= ~cpu_caps_cleared[i];
 		c->x86_capability[i] |= cpu_caps_set[i];
 	}
@@ -863,8 +882,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
  * cache alignment.
  * The others are not touched to avoid unwanted side effects.
  *
- * WARNING: this function is only called on the BP.  Don't add code here
- * that is supposed to run on all CPUs.
+ * WARNING: this function is only called on the boot CPU.  Don't add code
+ * here that is supposed to run on all CPUs.
  */
 static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 {
@@ -903,6 +922,13 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 	}
 
 	setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+
+	if (c->x86_vendor != X86_VENDOR_AMD)
+		setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+
+	setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
 	fpu__init_system(c);
 
 #ifdef CONFIG_X86_32
@@ -1147,9 +1173,10 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	/* Disable the PN if appropriate */
 	squash_the_stupid_serial_number(c);
 
-	/* Set up SMEP/SMAP */
+	/* Set up SMEP/SMAP/UMIP */
 	setup_smep(c);
 	setup_smap(c);
+	setup_umip(c);
 
 	/*
 	 * The vendor-specific functions might have changed features.
@@ -1225,7 +1252,7 @@ void enable_sep_cpu(void)
 		return;
 
 	cpu = get_cpu();
-	tss = &per_cpu(cpu_tss, cpu);
+	tss = &per_cpu(cpu_tss_rw, cpu);
 
 	/*
 	 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
@@ -1234,11 +1261,7 @@ void enable_sep_cpu(void)
 
 	tss->x86_tss.ss1 = __KERNEL_CS;
 	wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
-
-	wrmsr(MSR_IA32_SYSENTER_ESP,
-	      (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
-	      0);
-
+	wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
 	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
 
 	put_cpu();
@@ -1301,18 +1324,16 @@ void print_cpu_info(struct cpuinfo_x86 *c)
 		pr_cont(")\n");
 }
 
-static __init int setup_disablecpuid(char *arg)
+/*
+ * clearcpuid= was already parsed in fpu__init_parse_early_param.
+ * But we need to keep a dummy __setup around otherwise it would
+ * show up as an environment variable for init.
+ */
+static __init int setup_clearcpuid(char *arg)
 {
-	int bit;
-
-	if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
-		setup_clear_cpu_cap(bit);
-	else
-		return 0;
-
 	return 1;
 }
-__setup("clearcpuid=", setup_disablecpuid);
+__setup("clearcpuid=", setup_clearcpuid);
 
 #ifdef CONFIG_X86_64
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
@@ -1334,25 +1355,22 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 
-/*
- * Special IST stacks which the CPU switches to when it calls
- * an IST-marked descriptor entry. Up to 7 stacks (hardware
- * limit), all of them are 4K, except the debug stack which
- * is 8K.
- */
-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
-	  [0 ... N_EXCEPTION_STACKS - 1]	= EXCEPTION_STKSZ,
-	  [DEBUG_STACK - 1]			= DEBUG_STKSZ
-};
-
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
-
 /* May not be marked __init: used by software suspend */
 void syscall_init(void)
 {
+	extern char _entry_trampoline[];
+	extern char entry_SYSCALL_64_trampoline[];
+
+	int cpu = smp_processor_id();
+	unsigned long SYSCALL64_entry_trampoline =
+		(unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
+		(entry_SYSCALL_64_trampoline - _entry_trampoline);
+
 	wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
-	wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
+	if (static_cpu_has(X86_FEATURE_PTI))
+		wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
+	else
+		wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
 
 #ifdef CONFIG_IA32_EMULATION
 	wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
@@ -1363,7 +1381,7 @@ void syscall_init(void)
 	 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
 	 */
 	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
 	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
 #else
 	wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
@@ -1507,7 +1525,7 @@ void cpu_init(void)
 	if (cpu)
 		load_ucode_ap();
 
-	t = &per_cpu(cpu_tss, cpu);
+	t = &per_cpu(cpu_tss_rw, cpu);
 	oist = &per_cpu(orig_ist, cpu);
 
 #ifdef CONFIG_NUMA
@@ -1546,7 +1564,7 @@ void cpu_init(void)
 	 * set up and load the per-CPU TSS
 	 */
 	if (!oist->ist[0]) {
-		char *estacks = per_cpu(exception_stacks, cpu);
+		char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
 
 		for (v = 0; v < N_EXCEPTION_STACKS; v++) {
 			estacks += exception_stack_sizes[v];
@@ -1557,7 +1575,7 @@ void cpu_init(void)
 		}
 	}
 
-	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+	t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
 
 	/*
 	 * <= is required because the CPU will access up to
@@ -1572,9 +1590,14 @@ void cpu_init(void)
 	initialize_tlbstate_and_flush();
 	enter_lazy_tlb(&init_mm, me);
 
-	load_sp0(t, &current->thread);
-	set_tss_desc(cpu, t);
+	/*
+	 * Initialize the TSS.  sp0 points to the entry trampoline stack
+	 * regardless of what task is running.
+	 */
+	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
 	load_TR_desc();
+	load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
+
 	load_mm_ldt(&init_mm);
 
 	clear_all_debug_regs();
@@ -1585,7 +1608,6 @@ void cpu_init(void)
 	if (is_uv_system())
 		uv_cpu_init();
 
-	setup_fixmap_gdt(cpu);
 	load_fixmap_gdt(cpu);
 }
 
@@ -1595,8 +1617,7 @@ void cpu_init(void)
 {
 	int cpu = smp_processor_id();
 	struct task_struct *curr = current;
-	struct tss_struct *t = &per_cpu(cpu_tss, cpu);
-	struct thread_struct *thread = &curr->thread;
+	struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
 
 	wait_for_master_cpu(cpu);
 
@@ -1627,12 +1648,16 @@ void cpu_init(void)
 	initialize_tlbstate_and_flush();
 	enter_lazy_tlb(&init_mm, curr);
 
-	load_sp0(t, thread);
-	set_tss_desc(cpu, t);
+	/*
+	 * Initialize the TSS.  Don't bother initializing sp0, as the initial
+	 * task never enters user mode.
+	 */
+	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
 	load_TR_desc();
+
 	load_mm_ldt(&init_mm);
 
-	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+	t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
 
 #ifdef CONFIG_DOUBLEFAULT
 	/* Set up doublefault TSS pointer in the GDT */
@@ -1644,7 +1669,6 @@ void cpu_init(void)
 
 	fpu__init_cpu();
 
-	setup_fixmap_gdt(cpu);
 	load_fixmap_gdt(cpu);
 }
 #endif
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index f52a370b6c00..e806b11a99af 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -47,4 +47,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
 
 extern void get_cpu_cap(struct cpuinfo_x86 *c);
 extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+
+unsigned int aperfmperf_get_khz(int cpu);
+
 #endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
new file mode 100644
index 000000000000..904b0a3c4e53
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -0,0 +1,121 @@
+/* Declare dependencies between CPUIDs */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <asm/cpufeature.h>
+
+struct cpuid_dep {
+	unsigned int	feature;
+	unsigned int	depends;
+};
+
+/*
+ * Table of CPUID features that depend on others.
+ *
+ * This only includes dependencies that can be usefully disabled, not
+ * features part of the base set (like FPU).
+ *
+ * Note this all is not __init / __initdata because it can be
+ * called from cpu hotplug. It shouldn't do anything in this case,
+ * but it's difficult to tell that to the init reference checker.
+ */
+const static struct cpuid_dep cpuid_deps[] = {
+	{ X86_FEATURE_XSAVEOPT,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_XSAVEC,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_XSAVES,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_AVX,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_PKU,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_MPX,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_XGETBV1,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_FXSR_OPT,		X86_FEATURE_FXSR      },
+	{ X86_FEATURE_XMM,		X86_FEATURE_FXSR      },
+	{ X86_FEATURE_XMM2,		X86_FEATURE_XMM       },
+	{ X86_FEATURE_XMM3,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_XMM4_1,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_XMM4_2,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_XMM3,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_PCLMULQDQ,	X86_FEATURE_XMM2      },
+	{ X86_FEATURE_SSSE3,		X86_FEATURE_XMM2,     },
+	{ X86_FEATURE_F16C,		X86_FEATURE_XMM2,     },
+	{ X86_FEATURE_AES,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_SHA_NI,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_FMA,		X86_FEATURE_AVX       },
+	{ X86_FEATURE_AVX2,		X86_FEATURE_AVX,      },
+	{ X86_FEATURE_AVX512F,		X86_FEATURE_AVX,      },
+	{ X86_FEATURE_AVX512IFMA,	X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512PF,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512ER,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512CD,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512DQ,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512BW,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512VL,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512VBMI,	X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512_VBMI2,	X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_GFNI,		X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_VAES,		X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_VPCLMULQDQ,	X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_AVX512_VNNI,	X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_AVX512_BITALG,	X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_AVX512_4VNNIW,	X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512_4FMAPS,	X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F   },
+	{}
+};
+
+static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
+{
+	/*
+	 * Note: This could use the non atomic __*_bit() variants, but the
+	 * rest of the cpufeature code uses atomics as well, so keep it for
+	 * consistency. Cleanup all of it separately.
+	 */
+	if (!c) {
+		clear_cpu_cap(&boot_cpu_data, feature);
+		set_bit(feature, (unsigned long *)cpu_caps_cleared);
+	} else {
+		clear_bit(feature, (unsigned long *)c->x86_capability);
+	}
+}
+
+/* Take the capabilities and the BUG bits into account */
+#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
+
+static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
+{
+	DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
+	const struct cpuid_dep *d;
+	bool changed;
+
+	if (WARN_ON(feature >= MAX_FEATURE_BITS))
+		return;
+
+	clear_feature(c, feature);
+
+	/* Collect all features to disable, handling dependencies */
+	memset(disable, 0, sizeof(disable));
+	__set_bit(feature, disable);
+
+	/* Loop until we get a stable state. */
+	do {
+		changed = false;
+		for (d = cpuid_deps; d->feature; d++) {
+			if (!test_bit(d->depends, disable))
+				continue;
+			if (__test_and_set_bit(d->feature, disable))
+				continue;
+
+			changed = true;
+			clear_feature(c, d->feature);
+		}
+	} while (changed);
+}
+
+void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
+{
+	do_clear_cpu_cap(c, feature);
+}
+
+void setup_clear_cpu_cap(unsigned int feature)
+{
+	do_clear_cpu_cap(NULL, feature);
+}
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 4fa90006ac68..bea8d3e24f50 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -26,6 +26,12 @@
 #include <asm/processor.h>
 #include <asm/hypervisor.h>
 
+extern const struct hypervisor_x86 x86_hyper_vmware;
+extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
+extern const struct hypervisor_x86 x86_hyper_xen_pv;
+extern const struct hypervisor_x86 x86_hyper_xen_hvm;
+extern const struct hypervisor_x86 x86_hyper_kvm;
+
 static const __initconst struct hypervisor_x86 * const hypervisors[] =
 {
 #ifdef CONFIG_XEN_PV
@@ -41,54 +47,52 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
 #endif
 };
 
-const struct hypervisor_x86 *x86_hyper;
-EXPORT_SYMBOL(x86_hyper);
+enum x86_hypervisor_type x86_hyper_type;
+EXPORT_SYMBOL(x86_hyper_type);
 
-static inline void __init
+static inline const struct hypervisor_x86 * __init
 detect_hypervisor_vendor(void)
 {
-	const struct hypervisor_x86 *h, * const *p;
+	const struct hypervisor_x86 *h = NULL, * const *p;
 	uint32_t pri, max_pri = 0;
 
 	for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) {
-		h = *p;
-		pri = h->detect();
-		if (pri != 0 && pri > max_pri) {
+		pri = (*p)->detect();
+		if (pri > max_pri) {
 			max_pri = pri;
-			x86_hyper = h;
+			h = *p;
 		}
 	}
 
-	if (max_pri)
-		pr_info("Hypervisor detected: %s\n", x86_hyper->name);
+	if (h)
+		pr_info("Hypervisor detected: %s\n", h->name);
+
+	return h;
 }
 
-void __init init_hypervisor_platform(void)
+static void __init copy_array(const void *src, void *target, unsigned int size)
 {
+	unsigned int i, n = size / sizeof(void *);
+	const void * const *from = (const void * const *)src;
+	const void **to = (const void **)target;
 
-	detect_hypervisor_vendor();
-
-	if (!x86_hyper)
-		return;
-
-	if (x86_hyper->init_platform)
-		x86_hyper->init_platform();
+	for (i = 0; i < n; i++)
+		if (from[i])
+			to[i] = from[i];
 }
 
-bool __init hypervisor_x2apic_available(void)
+void __init init_hypervisor_platform(void)
 {
-	return x86_hyper                   &&
-	       x86_hyper->x2apic_available &&
-	       x86_hyper->x2apic_available();
-}
+	const struct hypervisor_x86 *h;
 
-void hypervisor_pin_vcpu(int cpu)
-{
-	if (!x86_hyper)
+	h = detect_hypervisor_vendor();
+
+	if (!h)
 		return;
 
-	if (x86_hyper->pin_vcpu)
-		x86_hyper->pin_vcpu(cpu);
-	else
-		WARN_ONCE(1, "vcpu pinning requested but not supported!\n");
+	copy_array(&h->init, &x86_init.hyper, sizeof(h->init));
+	copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime));
+
+	x86_hyper_type = h->type;
+	x86_init.hyper.init_platform();
 }
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b720dacac051..b1af22073e28 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -187,21 +187,6 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 	if (c->x86 == 6 && c->x86_model < 15)
 		clear_cpu_cap(c, X86_FEATURE_PAT);
 
-#ifdef CONFIG_KMEMCHECK
-	/*
-	 * P4s have a "fast strings" feature which causes single-
-	 * stepping REP instructions to only generate a #DB on
-	 * cache-line boundaries.
-	 *
-	 * Ingo Molnar reported a Pentium D (model 6) and a Xeon
-	 * (model 2) with the same problem.
-	 */
-	if (c->x86 == 15)
-		if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
-				  MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) > 0)
-			pr_info("kmemcheck: Disabling fast string operations\n");
-#endif
-
 	/*
 	 * If fast string is not enabled in IA32_MISC_ENABLE for any reason,
 	 * clear the fast string and enhanced fast string CPU capabilities.
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index cd5fc61ba450..99442370de40 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -267,6 +267,7 @@ static void rdt_get_cdp_l3_config(int type)
 	r->num_closid = r_l3->num_closid / 2;
 	r->cache.cbm_len = r_l3->cache.cbm_len;
 	r->default_ctrl = r_l3->default_ctrl;
+	r->cache.shareable_bits = r_l3->cache.shareable_bits;
 	r->data_width = (r->cache.cbm_len + 3) / 4;
 	r->alloc_capable = true;
 	/*
@@ -524,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 		 */
 		if (static_branch_unlikely(&rdt_mon_enable_key))
 			rmdir_mondata_subdir_allrdtgrp(r, d->id);
-		kfree(d->ctrl_val);
-		kfree(d->rmid_busy_llc);
-		kfree(d->mbm_total);
-		kfree(d->mbm_local);
 		list_del(&d->list);
 		if (is_mbm_enabled())
 			cancel_delayed_work(&d->mbm_over);
@@ -544,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 			cancel_delayed_work(&d->cqm_limbo);
 		}
 
+		kfree(d->ctrl_val);
+		kfree(d->rmid_busy_llc);
+		kfree(d->mbm_total);
+		kfree(d->mbm_local);
 		kfree(d);
 		return;
 	}
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index a43a72d8e88e..3397244984f5 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -127,12 +127,15 @@ struct rdtgroup {
 #define RFTYPE_BASE			BIT(1)
 #define RF_CTRLSHIFT			4
 #define RF_MONSHIFT			5
+#define RF_TOPSHIFT			6
 #define RFTYPE_CTRL			BIT(RF_CTRLSHIFT)
 #define RFTYPE_MON			BIT(RF_MONSHIFT)
+#define RFTYPE_TOP			BIT(RF_TOPSHIFT)
 #define RFTYPE_RES_CACHE		BIT(8)
 #define RFTYPE_RES_MB			BIT(9)
 #define RF_CTRL_INFO			(RFTYPE_INFO | RFTYPE_CTRL)
 #define RF_MON_INFO			(RFTYPE_INFO | RFTYPE_MON)
+#define RF_TOP_INFO			(RFTYPE_INFO | RFTYPE_TOP)
 #define RF_CTRL_BASE			(RFTYPE_BASE | RFTYPE_CTRL)
 
 /* List of all resource groups */
@@ -409,6 +412,10 @@ union cpuid_0x10_x_edx {
 	unsigned int full;
 };
 
+void rdt_last_cmd_clear(void);
+void rdt_last_cmd_puts(const char *s);
+void rdt_last_cmd_printf(const char *fmt, ...);
+
 void rdt_ctrl_update(void *arg);
 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
 void rdtgroup_kn_unlock(struct kernfs_node *kn);
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index f6ea94f8954a..23e1d5c249c6 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -42,15 +42,22 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
 	/*
 	 * Only linear delay values is supported for current Intel SKUs.
 	 */
-	if (!r->membw.delay_linear)
+	if (!r->membw.delay_linear) {
+		rdt_last_cmd_puts("No support for non-linear MB domains\n");
 		return false;
+	}
 
 	ret = kstrtoul(buf, 10, &bw);
-	if (ret)
+	if (ret) {
+		rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf);
 		return false;
+	}
 
-	if (bw < r->membw.min_bw || bw > r->default_ctrl)
+	if (bw < r->membw.min_bw || bw > r->default_ctrl) {
+		rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
+				    r->membw.min_bw, r->default_ctrl);
 		return false;
+	}
 
 	*data = roundup(bw, (unsigned long)r->membw.bw_gran);
 	return true;
@@ -60,8 +67,10 @@ int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d)
 {
 	unsigned long data;
 
-	if (d->have_new_ctrl)
+	if (d->have_new_ctrl) {
+		rdt_last_cmd_printf("duplicate domain %d\n", d->id);
 		return -EINVAL;
+	}
 
 	if (!bw_validate(buf, &data, r))
 		return -EINVAL;
@@ -84,20 +93,29 @@ static bool cbm_validate(char *buf, unsigned long *data, struct rdt_resource *r)
 	int ret;
 
 	ret = kstrtoul(buf, 16, &val);
-	if (ret)
+	if (ret) {
+		rdt_last_cmd_printf("non-hex character in mask %s\n", buf);
 		return false;
+	}
 
-	if (val == 0 || val > r->default_ctrl)
+	if (val == 0 || val > r->default_ctrl) {
+		rdt_last_cmd_puts("mask out of range\n");
 		return false;
+	}
 
 	first_bit = find_first_bit(&val, cbm_len);
 	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
 
-	if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)
+	if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) {
+		rdt_last_cmd_printf("mask %lx has non-consecutive 1-bits\n", val);
 		return false;
+	}
 
-	if ((zero_bit - first_bit) < r->cache.min_cbm_bits)
+	if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
+		rdt_last_cmd_printf("Need at least %d bits in mask\n",
+				    r->cache.min_cbm_bits);
 		return false;
+	}
 
 	*data = val;
 	return true;
@@ -111,8 +129,10 @@ int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
 {
 	unsigned long data;
 
-	if (d->have_new_ctrl)
+	if (d->have_new_ctrl) {
+		rdt_last_cmd_printf("duplicate domain %d\n", d->id);
 		return -EINVAL;
+	}
 
 	if(!cbm_validate(buf, &data, r))
 		return -EINVAL;
@@ -139,8 +159,10 @@ next:
 		return 0;
 	dom = strsep(&line, ";");
 	id = strsep(&dom, "=");
-	if (!dom || kstrtoul(id, 10, &dom_id))
+	if (!dom || kstrtoul(id, 10, &dom_id)) {
+		rdt_last_cmd_puts("Missing '=' or non-numeric domain\n");
 		return -EINVAL;
+	}
 	dom = strim(dom);
 	list_for_each_entry(d, &r->domains, list) {
 		if (d->id == dom_id) {
@@ -196,6 +218,7 @@ static int rdtgroup_parse_resource(char *resname, char *tok, int closid)
 		if (!strcmp(resname, r->name) && closid < r->num_closid)
 			return parse_line(tok, r);
 	}
+	rdt_last_cmd_printf("unknown/unsupported resource name '%s'\n", resname);
 	return -EINVAL;
 }
 
@@ -218,6 +241,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 		rdtgroup_kn_unlock(of->kn);
 		return -ENOENT;
 	}
+	rdt_last_cmd_clear();
 
 	closid = rdtgrp->closid;
 
@@ -229,6 +253,12 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 	while ((tok = strsep(&buf, "\n")) != NULL) {
 		resname = strim(strsep(&tok, ":"));
 		if (!tok) {
+			rdt_last_cmd_puts("Missing ':'\n");
+			ret = -EINVAL;
+			goto out;
+		}
+		if (tok[0] == '\0') {
+			rdt_last_cmd_printf("Missing '%s' value\n", resname);
 			ret = -EINVAL;
 			goto out;
 		}
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c
index 30827510094b..681450eee428 100644
--- a/arch/x86/kernel/cpu/intel_rdt_monitor.c
+++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c
@@ -51,7 +51,7 @@ static LIST_HEAD(rmid_free_lru);
  *     may have a occupancy value > intel_cqm_threshold. User can change
  *     the threshold occupancy value.
  */
-unsigned int rmid_limbo_count;
+static unsigned int rmid_limbo_count;
 
 /**
  * @rmid_entry - The entry in the limbo and free lists.
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index a869d4a073c5..64c5ff97ee0d 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -24,6 +24,7 @@
 #include <linux/fs.h>
 #include <linux/sysfs.h>
 #include <linux/kernfs.h>
+#include <linux/seq_buf.h>
 #include <linux/seq_file.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/task.h>
@@ -51,6 +52,31 @@ static struct kernfs_node *kn_mongrp;
 /* Kernel fs node for "mon_data" directory under root */
 static struct kernfs_node *kn_mondata;
 
+static struct seq_buf last_cmd_status;
+static char last_cmd_status_buf[512];
+
+void rdt_last_cmd_clear(void)
+{
+	lockdep_assert_held(&rdtgroup_mutex);
+	seq_buf_clear(&last_cmd_status);
+}
+
+void rdt_last_cmd_puts(const char *s)
+{
+	lockdep_assert_held(&rdtgroup_mutex);
+	seq_buf_puts(&last_cmd_status, s);
+}
+
+void rdt_last_cmd_printf(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	lockdep_assert_held(&rdtgroup_mutex);
+	seq_buf_vprintf(&last_cmd_status, fmt, ap);
+	va_end(ap);
+}
+
 /*
  * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
  * we can keep a bitmap of free CLOSIDs in a single integer.
@@ -238,8 +264,10 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 
 	/* Check whether cpus belong to parent ctrl group */
 	cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
-	if (cpumask_weight(tmpmask))
+	if (cpumask_weight(tmpmask)) {
+		rdt_last_cmd_puts("can only add CPUs to mongroup that belong to parent\n");
 		return -EINVAL;
+	}
 
 	/* Check whether cpus are dropped from this group */
 	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
@@ -291,8 +319,10 @@ static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
 	if (cpumask_weight(tmpmask)) {
 		/* Can't drop from default group */
-		if (rdtgrp == &rdtgroup_default)
+		if (rdtgrp == &rdtgroup_default) {
+			rdt_last_cmd_puts("Can't drop CPUs from default group\n");
 			return -EINVAL;
+		}
 
 		/* Give any dropped cpus to rdtgroup_default */
 		cpumask_or(&rdtgroup_default.cpu_mask,
@@ -357,8 +387,10 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 	}
 
 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	rdt_last_cmd_clear();
 	if (!rdtgrp) {
 		ret = -ENOENT;
+		rdt_last_cmd_puts("directory was removed\n");
 		goto unlock;
 	}
 
@@ -367,13 +399,16 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 	else
 		ret = cpumask_parse(buf, newmask);
 
-	if (ret)
+	if (ret) {
+		rdt_last_cmd_puts("bad cpu list/mask\n");
 		goto unlock;
+	}
 
 	/* check that user didn't specify any offline cpus */
 	cpumask_andnot(tmpmask, newmask, cpu_online_mask);
 	if (cpumask_weight(tmpmask)) {
 		ret = -EINVAL;
+		rdt_last_cmd_puts("can only assign online cpus\n");
 		goto unlock;
 	}
 
@@ -452,6 +487,7 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
 		 */
 		atomic_dec(&rdtgrp->waitcount);
 		kfree(callback);
+		rdt_last_cmd_puts("task exited\n");
 	} else {
 		/*
 		 * For ctrl_mon groups move both closid and rmid.
@@ -462,10 +498,12 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
 			tsk->closid = rdtgrp->closid;
 			tsk->rmid = rdtgrp->mon.rmid;
 		} else if (rdtgrp->type == RDTMON_GROUP) {
-			if (rdtgrp->mon.parent->closid == tsk->closid)
+			if (rdtgrp->mon.parent->closid == tsk->closid) {
 				tsk->rmid = rdtgrp->mon.rmid;
-			else
+			} else {
+				rdt_last_cmd_puts("Can't move task to different control group\n");
 				ret = -EINVAL;
+			}
 		}
 	}
 	return ret;
@@ -484,8 +522,10 @@ static int rdtgroup_task_write_permission(struct task_struct *task,
 	 */
 	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
 	    !uid_eq(cred->euid, tcred->uid) &&
-	    !uid_eq(cred->euid, tcred->suid))
+	    !uid_eq(cred->euid, tcred->suid)) {
+		rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
 		ret = -EPERM;
+	}
 
 	put_cred(tcred);
 	return ret;
@@ -502,6 +542,7 @@ static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
 		tsk = find_task_by_vpid(pid);
 		if (!tsk) {
 			rcu_read_unlock();
+			rdt_last_cmd_printf("No task %d\n", pid);
 			return -ESRCH;
 		}
 	} else {
@@ -529,6 +570,7 @@ static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
 	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
 		return -EINVAL;
 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	rdt_last_cmd_clear();
 
 	if (rdtgrp)
 		ret = rdtgroup_move_task(pid, rdtgrp, of);
@@ -569,6 +611,21 @@ static int rdtgroup_tasks_show(struct kernfs_open_file *of,
 	return ret;
 }
 
+static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
+				    struct seq_file *seq, void *v)
+{
+	int len;
+
+	mutex_lock(&rdtgroup_mutex);
+	len = seq_buf_used(&last_cmd_status);
+	if (len)
+		seq_printf(seq, "%.*s", len, last_cmd_status_buf);
+	else
+		seq_puts(seq, "ok\n");
+	mutex_unlock(&rdtgroup_mutex);
+	return 0;
+}
+
 static int rdt_num_closids_show(struct kernfs_open_file *of,
 				struct seq_file *seq, void *v)
 {
@@ -686,6 +743,13 @@ static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
 /* rdtgroup information files for one cache resource. */
 static struct rftype res_common_files[] = {
 	{
+		.name		= "last_cmd_status",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_last_cmd_status_show,
+		.fflags		= RF_TOP_INFO,
+	},
+	{
 		.name		= "num_closids",
 		.mode		= 0444,
 		.kf_ops		= &rdtgroup_kf_single_ops,
@@ -855,6 +919,10 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
 		return PTR_ERR(kn_info);
 	kernfs_get(kn_info);
 
+	ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);
+	if (ret)
+		goto out_destroy;
+
 	for_each_alloc_enabled_rdt_resource(r) {
 		fflags =  r->fflags | RF_CTRL_INFO;
 		ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags);
@@ -1081,6 +1149,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
 	struct dentry *dentry;
 	int ret;
 
+	cpus_read_lock();
 	mutex_lock(&rdtgroup_mutex);
 	/*
 	 * resctrl file system can only be mounted once.
@@ -1130,12 +1199,12 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
 		goto out_mondata;
 
 	if (rdt_alloc_capable)
-		static_branch_enable(&rdt_alloc_enable_key);
+		static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
 	if (rdt_mon_capable)
-		static_branch_enable(&rdt_mon_enable_key);
+		static_branch_enable_cpuslocked(&rdt_mon_enable_key);
 
 	if (rdt_alloc_capable || rdt_mon_capable)
-		static_branch_enable(&rdt_enable_key);
+		static_branch_enable_cpuslocked(&rdt_enable_key);
 
 	if (is_mbm_enabled()) {
 		r = &rdt_resources_all[RDT_RESOURCE_L3];
@@ -1156,7 +1225,9 @@ out_info:
 out_cdp:
 	cdp_disable();
 out:
+	rdt_last_cmd_clear();
 	mutex_unlock(&rdtgroup_mutex);
+	cpus_read_unlock();
 
 	return dentry;
 }
@@ -1295,9 +1366,7 @@ static void rmdir_all_sub(void)
 		kfree(rdtgrp);
 	}
 	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
-	get_online_cpus();
 	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
-	put_online_cpus();
 
 	kernfs_remove(kn_info);
 	kernfs_remove(kn_mongrp);
@@ -1308,6 +1377,7 @@ static void rdt_kill_sb(struct super_block *sb)
 {
 	struct rdt_resource *r;
 
+	cpus_read_lock();
 	mutex_lock(&rdtgroup_mutex);
 
 	/*Put everything back to default values. */
@@ -1315,11 +1385,12 @@ static void rdt_kill_sb(struct super_block *sb)
 		reset_all_ctrls(r);
 	cdp_disable();
 	rmdir_all_sub();
-	static_branch_disable(&rdt_alloc_enable_key);
-	static_branch_disable(&rdt_mon_enable_key);
-	static_branch_disable(&rdt_enable_key);
+	static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
+	static_branch_disable_cpuslocked(&rdt_mon_enable_key);
+	static_branch_disable_cpuslocked(&rdt_enable_key);
 	kernfs_kill_sb(sb);
 	mutex_unlock(&rdtgroup_mutex);
+	cpus_read_unlock();
 }
 
 static struct file_system_type rdt_fs_type = {
@@ -1524,8 +1595,10 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 	int ret;
 
 	prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
+	rdt_last_cmd_clear();
 	if (!prdtgrp) {
 		ret = -ENODEV;
+		rdt_last_cmd_puts("directory was removed\n");
 		goto out_unlock;
 	}
 
@@ -1533,6 +1606,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
 	if (!rdtgrp) {
 		ret = -ENOSPC;
+		rdt_last_cmd_puts("kernel out of memory\n");
 		goto out_unlock;
 	}
 	*r = rdtgrp;
@@ -1544,6 +1618,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 	kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
 	if (IS_ERR(kn)) {
 		ret = PTR_ERR(kn);
+		rdt_last_cmd_puts("kernfs create error\n");
 		goto out_free_rgrp;
 	}
 	rdtgrp->kn = kn;
@@ -1557,24 +1632,31 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 	kernfs_get(kn);
 
 	ret = rdtgroup_kn_set_ugid(kn);
-	if (ret)
+	if (ret) {
+		rdt_last_cmd_puts("kernfs perm error\n");
 		goto out_destroy;
+	}
 
-	files = RFTYPE_BASE | RFTYPE_CTRL;
 	files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype);
 	ret = rdtgroup_add_files(kn, files);
-	if (ret)
+	if (ret) {
+		rdt_last_cmd_puts("kernfs fill error\n");
 		goto out_destroy;
+	}
 
 	if (rdt_mon_capable) {
 		ret = alloc_rmid();
-		if (ret < 0)
+		if (ret < 0) {
+			rdt_last_cmd_puts("out of RMIDs\n");
 			goto out_destroy;
+		}
 		rdtgrp->mon.rmid = ret;
 
 		ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
-		if (ret)
+		if (ret) {
+			rdt_last_cmd_puts("kernfs subdir error\n");
 			goto out_idfree;
+		}
 	}
 	kernfs_activate(kn);
 
@@ -1652,8 +1734,10 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
 
 	kn = rdtgrp->kn;
 	ret = closid_alloc();
-	if (ret < 0)
+	if (ret < 0) {
+		rdt_last_cmd_puts("out of CLOSIDs\n");
 		goto out_common_fail;
+	}
 	closid = ret;
 
 	rdtgrp->closid = closid;
@@ -1665,8 +1749,10 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
 		 * of tasks and cpus to monitor.
 		 */
 		ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
-		if (ret)
+		if (ret) {
+			rdt_last_cmd_puts("kernfs subdir error\n");
 			goto out_id_free;
+		}
 	}
 
 	goto out_unlock;
@@ -1902,6 +1988,9 @@ int __init rdtgroup_init(void)
 {
 	int ret = 0;
 
+	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
+		     sizeof(last_cmd_status_buf));
+
 	ret = rdtgroup_setup_root();
 	if (ret)
 		return ret;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 87cc9ab7a13c..4ca632a06e0b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -204,7 +204,7 @@ static int error_context(struct mce *m)
 	return IN_KERNEL;
 }
 
-static int mce_severity_amd_smca(struct mce *m, int err_ctx)
+static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
 {
 	u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
 	u32 low, high;
@@ -245,6 +245,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
 
 	if (m->status & MCI_STATUS_UC) {
 
+		if (ctx == IN_KERNEL)
+			return MCE_PANIC_SEVERITY;
+
 		/*
 		 * On older systems where overflow_recov flag is not present, we
 		 * should simply panic if an error overflow occurs. If
@@ -255,10 +258,6 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
 			if (mce_flags.smca)
 				return mce_severity_amd_smca(m, ctx);
 
-			/* software can try to contain */
-			if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
-				return MCE_PANIC_SEVERITY;
-
 			/* kill current process */
 			return MCE_AR_SEVERITY;
 		} else {
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3b413065c613..868e412b4f0c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1367,13 +1367,12 @@ static void __start_timer(struct timer_list *t, unsigned long interval)
 	local_irq_restore(flags);
 }
 
-static void mce_timer_fn(unsigned long data)
+static void mce_timer_fn(struct timer_list *t)
 {
-	struct timer_list *t = this_cpu_ptr(&mce_timer);
-	int cpu = smp_processor_id();
+	struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
 	unsigned long iv;
 
-	WARN_ON(cpu != data);
+	WARN_ON(cpu_t != t);
 
 	iv = __this_cpu_read(mce_next_interval);
 
@@ -1763,17 +1762,15 @@ static void mce_start_timer(struct timer_list *t)
 static void __mcheck_cpu_setup_timer(void)
 {
 	struct timer_list *t = this_cpu_ptr(&mce_timer);
-	unsigned int cpu = smp_processor_id();
 
-	setup_pinned_timer(t, mce_timer_fn, cpu);
+	timer_setup(t, mce_timer_fn, TIMER_PINNED);
 }
 
 static void __mcheck_cpu_init_timer(void)
 {
 	struct timer_list *t = this_cpu_ptr(&mce_timer);
-	unsigned int cpu = smp_processor_id();
 
-	setup_pinned_timer(t, mce_timer_fn, cpu);
+	timer_setup(t, mce_timer_fn, TIMER_PINNED);
 	mce_start_timer(t);
 }
 
@@ -1788,6 +1785,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
 void (*machine_check_vector)(struct pt_regs *, long error_code) =
 						unexpected_machine_check;
 
+dotraplinkage void do_mce(struct pt_regs *regs, long error_code)
+{
+	machine_check_vector(regs, error_code);
+}
+
 /*
  * Called for each booted CPU to set up machine checks.
  * Must be called with preempt off:
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index c6daec4bdba5..330b8462d426 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -470,6 +470,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
 #define F14H_MPB_MAX_SIZE 1824
 #define F15H_MPB_MAX_SIZE 4096
 #define F16H_MPB_MAX_SIZE 3458
+#define F17H_MPB_MAX_SIZE 3200
 
 	switch (family) {
 	case 0x14:
@@ -481,6 +482,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
 	case 0x16:
 		max_size = F16H_MPB_MAX_SIZE;
 		break;
+	case 0x17:
+		max_size = F17H_MPB_MAX_SIZE;
+		break;
 	default:
 		max_size = F1XH_MPB_MAX_SIZE;
 		break;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index c4fa4a85d4cb..e4fc595cd6ea 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -239,7 +239,7 @@ static int __init save_microcode_in_initrd(void)
 		break;
 	case X86_VENDOR_AMD:
 		if (c->x86 >= 0x10)
-			return save_microcode_in_initrd_amd(cpuid_eax(1));
+			ret = save_microcode_in_initrd_amd(cpuid_eax(1));
 		break;
 	default:
 		break;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 7dbcb7adf797..f7c55b0e753a 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -45,6 +45,9 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
 /* Current microcode patch used in early patching on the APs. */
 static struct microcode_intel *intel_ucode_patch;
 
+/* last level cache size per core */
+static int llc_size_per_core;
+
 static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1,
 					unsigned int s2, unsigned int p2)
 {
@@ -565,15 +568,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
 }
 #else
 
-/*
- * Flush global tlb. We only do this in x86_64 where paging has been enabled
- * already and PGE should be enabled as well.
- */
-static inline void flush_tlb_early(void)
-{
-	__native_flush_tlb_global_irq_disabled();
-}
-
 static inline void print_ucode(struct ucode_cpu_info *uci)
 {
 	struct microcode_intel *mc;
@@ -602,10 +596,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 	if (rev != mc->hdr.rev)
 		return -1;
 
-#ifdef CONFIG_X86_64
-	/* Flush global tlb. This is precaution. */
-	flush_tlb_early();
-#endif
 	uci->cpu_sig.rev = rev;
 
 	if (early)
@@ -923,8 +913,19 @@ static bool is_blacklisted(unsigned int cpu)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-	if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
-		pr_err_once("late loading on model 79 is disabled.\n");
+	/*
+	 * Late loading on model 79 with microcode revision less than 0x0b000021
+	 * and LLC size per core bigger than 2.5MB may result in a system hang.
+	 * This behavior is documented in item BDF90, #334165 (Intel Xeon
+	 * Processor E7-8800/4800 v4 Product Family).
+	 */
+	if (c->x86 == 6 &&
+	    c->x86_model == INTEL_FAM6_BROADWELL_X &&
+	    c->x86_mask == 0x01 &&
+	    llc_size_per_core > 2621440 &&
+	    c->microcode < 0x0b000021) {
+		pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+		pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
 		return true;
 	}
 
@@ -979,6 +980,15 @@ static struct microcode_ops microcode_intel_ops = {
 	.apply_microcode                  = apply_microcode_intel,
 };
 
+static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)
+{
+	u64 llc_size = c->x86_cache_size * 1024;
+
+	do_div(llc_size, c->x86_max_cores);
+
+	return (int)llc_size;
+}
+
 struct microcode_ops * __init init_intel_microcode(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -989,5 +999,7 @@ struct microcode_ops * __init init_intel_microcode(void)
 		return NULL;
 	}
 
+	llc_size_per_core = calc_llc_size_per_core(c);
+
 	return &microcode_intel_ops;
 }
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 236324e83a3a..85eb5fc180c8 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -254,9 +254,9 @@ static void __init ms_hyperv_init_platform(void)
 #endif
 }
 
-const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
+const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
 	.name			= "Microsoft Hyper-V",
 	.detect			= ms_hyperv_platform,
-	.init_platform		= ms_hyperv_init_platform,
+	.type			= X86_HYPER_MS_HYPERV,
+	.init.init_platform	= ms_hyperv_init_platform,
 };
-EXPORT_SYMBOL(x86_hyper_ms_hyperv);
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 4378a729b933..e7ecedafa1c8 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -5,6 +5,8 @@
 #include <linux/seq_file.h>
 #include <linux/cpufreq.h>
 
+#include "cpu.h"
+
 /*
  *	Get CPU information for use by the procfs.
  */
@@ -78,7 +80,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
 
 	if (cpu_has(c, X86_FEATURE_TSC)) {
-		unsigned int freq = arch_freq_get_on_cpu(cpu);
+		unsigned int freq = aperfmperf_get_khz(cpu);
 
 		if (!freq)
 			freq = cpufreq_quick_get(cpu);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 05459ad3db46..d0e69769abfd 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -21,7 +21,6 @@ struct cpuid_bit {
 static const struct cpuid_bit cpuid_bits[] = {
 	{ X86_FEATURE_APERFMPERF,       CPUID_ECX,  0, 0x00000006, 0 },
 	{ X86_FEATURE_EPB,		CPUID_ECX,  3, 0x00000006, 0 },
-	{ X86_FEATURE_INTEL_PT,		CPUID_EBX, 25, 0x00000007, 0 },
 	{ X86_FEATURE_AVX512_4VNNIW,    CPUID_EDX,  2, 0x00000007, 0 },
 	{ X86_FEATURE_AVX512_4FMAPS,    CPUID_EDX,  3, 0x00000007, 0 },
 	{ X86_FEATURE_CAT_L3,		CPUID_EBX,  1, 0x00000010, 0 },
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 40ed26852ebd..8e005329648b 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -205,10 +205,10 @@ static bool __init vmware_legacy_x2apic_available(void)
 	       (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
 }
 
-const __refconst struct hypervisor_x86 x86_hyper_vmware = {
+const __initconst struct hypervisor_x86 x86_hyper_vmware = {
 	.name			= "VMware",
 	.detect			= vmware_platform,
-	.init_platform		= vmware_platform_setup,
-	.x2apic_available	= vmware_legacy_x2apic_available,
+	.type			= X86_HYPER_VMWARE,
+	.init.init_platform	= vmware_platform_setup,
+	.init.x2apic_available	= vmware_legacy_x2apic_available,
 };
-EXPORT_SYMBOL(x86_hyper_vmware);