From 10a70416e1f067f6c4efda6ffd8ea96002ac4223 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:25 -0700
Subject: x86/speculation/l1tf: Make sure the first page is always reserved

The L1TF workaround doesn't make any attempt to mitigate speculate accesses
to the first physical page for zeroed PTEs. Normally it only contains some
data from the early real mode BIOS.

It's not entirely clear that the first page is reserved in all
configurations, so add an extra reservation call to make sure it is really
reserved. In most configurations (e.g.  with the standard reservations)
it's likely a nop.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/kernel/setup.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2f86d883dd95..74b4472ba0a6 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -823,6 +823,12 @@ void __init setup_arch(char **cmdline_p)
 	memblock_reserve(__pa_symbol(_text),
 			 (unsigned long)__bss_stop - (unsigned long)_text);
 
+	/*
+	 * Make sure page 0 is always reserved because on systems with
+	 * L1TF its contents can be leaked to user processes.
+	 */
+	memblock_reserve(0, PAGE_SIZE);
+
 	early_reserve_initrd();
 
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From 17dbca119312b4e8173d4e25ff64262119fcef38 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:26 -0700
Subject: x86/speculation/l1tf: Add sysfs reporting for l1tf

L1TF core kernel workarounds are cheap and normally always enabled, However
they still should be reported in sysfs if the system is vulnerable or
mitigated. Add the necessary CPU feature/bug bits.

- Extend the existing checks for Meltdowns to determine if the system is
  vulnerable. All CPUs which are not vulnerable to Meltdown are also not
  vulnerable to L1TF

- Check for 32bit non PAE and emit a warning as there is no practical way
  for mitigation due to the limited physical address bits

- If the system has more than MAX_PA/2 physical memory the invert page
  workarounds don't protect the system against the L1TF attack anymore,
  because an inverted physical address will also point to valid
  memory. Print a warning in this case and report that the system is
  vulnerable.

Add a function which returns the PFN limit for the L1TF mitigation, which
will be used in follow up patches for sanity and range checks.

[ tglx: Renamed the CPU feature bit to L1TF_PTEINV ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/include/asm/cpufeatures.h |  2 ++
 arch/x86/include/asm/processor.h   |  5 +++++
 arch/x86/kernel/cpu/bugs.c         | 40 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/common.c       | 20 +++++++++++++++++++
 drivers/base/cpu.c                 |  8 ++++++++
 include/linux/cpu.h                |  2 ++
 6 files changed, 77 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 5701f5cecd31..f41cf9df4a83 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -219,6 +219,7 @@
 #define X86_FEATURE_IBPB		( 7*32+26) /* Indirect Branch Prediction Barrier */
 #define X86_FEATURE_STIBP		( 7*32+27) /* Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_ZEN			( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
+#define X86_FEATURE_L1TF_PTEINV		( 7*32+29) /* "" L1TF workaround PTE inversion */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
@@ -373,5 +374,6 @@
 #define X86_BUG_SPECTRE_V1		X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
 #define X86_BUG_SPECTRE_V2		X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
 #define X86_BUG_SPEC_STORE_BYPASS	X86_BUG(17) /* CPU is affected by speculative store bypass attack */
+#define X86_BUG_L1TF			X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cfd29ee8c3da..7e3ac5eedcd6 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -181,6 +181,11 @@ extern const struct seq_operations cpuinfo_op;
 
 extern void cpu_detect(struct cpuinfo_x86 *c);
 
+static inline unsigned long l1tf_pfn_limit(void)
+{
+	return BIT(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
+}
+
 extern void early_cpu_init(void);
 extern void identify_boot_cpu(void);
 extern void identify_secondary_cpu(struct cpuinfo_x86 *);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index cd0fda1fff6d..3ce9674e4887 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -27,9 +27,11 @@
 #include <asm/pgtable.h>
 #include <asm/set_memory.h>
 #include <asm/intel-family.h>
+#include <asm/e820/api.h>
 
 static void __init spectre_v2_select_mitigation(void);
 static void __init ssb_select_mitigation(void);
+static void __init l1tf_select_mitigation(void);
 
 /*
  * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
@@ -81,6 +83,8 @@ void __init check_bugs(void)
 	 */
 	ssb_select_mitigation();
 
+	l1tf_select_mitigation();
+
 #ifdef CONFIG_X86_32
 	/*
 	 * Check whether we are able to run this kernel safely on SMP.
@@ -205,6 +209,32 @@ static void x86_amd_ssb_disable(void)
 		wrmsrl(MSR_AMD64_LS_CFG, msrval);
 }
 
+static void __init l1tf_select_mitigation(void)
+{
+	u64 half_pa;
+
+	if (!boot_cpu_has_bug(X86_BUG_L1TF))
+		return;
+
+#if CONFIG_PGTABLE_LEVELS == 2
+	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
+	return;
+#endif
+
+	/*
+	 * This is extremely unlikely to happen because almost all
+	 * systems have far more MAX_PA/2 than RAM can be fit into
+	 * DIMM slots.
+	 */
+	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
+	if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
+		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
+		return;
+	}
+
+	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
+}
+
 #ifdef RETPOLINE
 static bool spectre_v2_bad_module;
 
@@ -678,6 +708,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 	case X86_BUG_SPEC_STORE_BYPASS:
 		return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
 
+	case X86_BUG_L1TF:
+		if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
+			return sprintf(buf, "Mitigation: Page Table Inversion\n");
+		break;
+
 	default:
 		break;
 	}
@@ -704,4 +739,9 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *
 {
 	return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
 }
+
+ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return cpu_show_common(dev, attr, buf, X86_BUG_L1TF);
+}
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0df7151cfef4..9baf684e2b55 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -984,6 +984,21 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
 	{}
 };
 
+static const __initconst struct x86_cpu_id cpu_no_l1tf[] = {
+	/* in addition to cpu_no_speculation */
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_SILVERMONT1	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_SILVERMONT2	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_AIRMONT		},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_MERRIFIELD	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_MOOREFIELD	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_GOLDMONT	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_DENVERTON	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_GEMINI_LAKE	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_XEON_PHI_KNL		},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_XEON_PHI_KNM		},
+	{}
+};
+
 static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 {
 	u64 ia32_cap = 0;
@@ -1010,6 +1025,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 		return;
 
 	setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+
+	if (x86_match_cpu(cpu_no_l1tf))
+		return;
+
+	setup_force_cpu_bug(X86_BUG_L1TF);
 }
 
 /*
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 30cc9c877ebb..eb9443d5bae1 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -540,16 +540,24 @@ ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
 	return sprintf(buf, "Not affected\n");
 }
 
+ssize_t __weak cpu_show_l1tf(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Not affected\n");
+}
+
 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
 static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
 static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
+static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
 
 static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_meltdown.attr,
 	&dev_attr_spectre_v1.attr,
 	&dev_attr_spectre_v2.attr,
 	&dev_attr_spec_store_bypass.attr,
+	&dev_attr_l1tf.attr,
 	NULL
 };
 
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index a97a63eef59f..d3da5184aa1c 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -55,6 +55,8 @@ extern ssize_t cpu_show_spectre_v2(struct device *dev,
 				   struct device_attribute *attr, char *buf);
 extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
 					  struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_l1tf(struct device *dev,
+			     struct device_attribute *attr, char *buf);
 
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,
-- 
cgit v1.2.3-59-g8ed1b


From 56563f53d3066afa9e63d6c997bf67e76a8b05c0 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 20 Jun 2018 16:42:57 -0400
Subject: x86/bugs: Move the l1tf function and define pr_fmt properly

The pr_warn in l1tf_select_mitigation would have used the prior pr_fmt
which was defined as "Spectre V2 : ".

Move the function to be past SSBD and also define the pr_fmt.

Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf")
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/bugs.c | 55 ++++++++++++++++++++++++----------------------
 1 file changed, 29 insertions(+), 26 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 3ce9674e4887..50500cea6eba 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -209,32 +209,6 @@ static void x86_amd_ssb_disable(void)
 		wrmsrl(MSR_AMD64_LS_CFG, msrval);
 }
 
-static void __init l1tf_select_mitigation(void)
-{
-	u64 half_pa;
-
-	if (!boot_cpu_has_bug(X86_BUG_L1TF))
-		return;
-
-#if CONFIG_PGTABLE_LEVELS == 2
-	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
-	return;
-#endif
-
-	/*
-	 * This is extremely unlikely to happen because almost all
-	 * systems have far more MAX_PA/2 than RAM can be fit into
-	 * DIMM slots.
-	 */
-	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
-	if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
-		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
-		return;
-	}
-
-	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
-}
-
 #ifdef RETPOLINE
 static bool spectre_v2_bad_module;
 
@@ -681,6 +655,35 @@ void x86_spec_ctrl_setup_ap(void)
 		x86_amd_ssb_disable();
 }
 
+#undef pr_fmt
+#define pr_fmt(fmt)	"L1TF: " fmt
+static void __init l1tf_select_mitigation(void)
+{
+	u64 half_pa;
+
+	if (!boot_cpu_has_bug(X86_BUG_L1TF))
+		return;
+
+#if CONFIG_PGTABLE_LEVELS == 2
+	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
+	return;
+#endif
+
+	/*
+	 * This is extremely unlikely to happen because almost all
+	 * systems have far more MAX_PA/2 than RAM can be fit into
+	 * DIMM slots.
+	 */
+	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
+	if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
+		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
+		return;
+	}
+
+	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
+}
+#undef pr_fmt
+
 #ifdef CONFIG_SYSFS
 
 static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
-- 
cgit v1.2.3-59-g8ed1b


From 6a4d2657e048f096c7ffcad254010bd94891c8c0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 29 May 2018 17:50:22 +0200
Subject: x86/smp: Provide topology_is_primary_thread()

If the CPU is supporting SMT then the primary thread can be found by
checking the lower APIC ID bits for zero. smp_num_siblings is used to build
the mask for the APIC ID bits which need to be taken into account.

This uses the MPTABLE or ACPI/MADT supplied APIC ID, which can be different
than the initial APIC ID in CPUID. But according to AMD the lower bits have
to be consistent. Intel gave a tentative confirmation as well.

Preparatory patch to support disabling SMT at boot/runtime.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/apic.h     |  6 ++++++
 arch/x86/include/asm/topology.h |  4 +++-
 arch/x86/kernel/apic/apic.c     | 15 +++++++++++++++
 arch/x86/kernel/smpboot.c       |  9 +++++++++
 4 files changed, 33 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 74a9e06b6cfd..9362a3aae927 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -502,6 +502,12 @@ extern int default_check_phys_apicid_present(int phys_apicid);
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
+#ifdef CONFIG_SMP
+bool apic_id_is_primary_thread(unsigned int id);
+#else
+static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
+#endif
+
 extern void irq_enter(void);
 extern void irq_exit(void);
 
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index c1d2a9892352..661e361a12a6 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -123,13 +123,15 @@ static inline int topology_max_smt_threads(void)
 }
 
 int topology_update_package_map(unsigned int apicid, unsigned int cpu);
-extern int topology_phys_to_logical_pkg(unsigned int pkg);
+int topology_phys_to_logical_pkg(unsigned int pkg);
+bool topology_is_primary_thread(unsigned int cpu);
 #else
 #define topology_max_packages()			(1)
 static inline int
 topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
 static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
 static inline int topology_max_smt_threads(void) { return 1; }
+static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
 #endif
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2aabd4cb0e3f..8703caa9d6db 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2189,6 +2189,21 @@ static int cpuid_to_apicid[] = {
 	[0 ... NR_CPUS - 1] = -1,
 };
 
+/**
+ * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
+ * @id:	APIC ID to check
+ */
+bool apic_id_is_primary_thread(unsigned int apicid)
+{
+	u32 mask;
+
+	if (smp_num_siblings == 1)
+		return true;
+	/* Isolate the SMT bit(s) in the APICID and check for 0 */
+	mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
+	return !(apicid & mask);
+}
+
 /*
  * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids
  * and cpuid_to_apicid[] synchronized.
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c2f7d1d2a5c3..f9c731240142 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -265,6 +265,15 @@ static void notrace start_secondary(void *unused)
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
+/**
+ * topology_is_primary_thread - Check whether CPU is the primary SMT thread
+ * @cpu:	CPU to check
+ */
+bool topology_is_primary_thread(unsigned int cpu)
+{
+	return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu));
+}
+
 /**
  * topology_phys_to_logical_pkg - Map a physical package id to a logical
  *
-- 
cgit v1.2.3-59-g8ed1b


From f048c399e0f7490ab7296bc2c255d37eb14a9675 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 21 Jun 2018 10:37:20 +0200
Subject: x86/topology: Provide topology_smt_supported()

Provide information whether SMT is supoorted by the CPUs. Preparatory patch
for SMT control mechanism.

Suggested-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/topology.h | 2 ++
 arch/x86/kernel/smpboot.c       | 8 ++++++++
 2 files changed, 10 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 661e361a12a6..453cf38a1c33 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -125,6 +125,7 @@ static inline int topology_max_smt_threads(void)
 int topology_update_package_map(unsigned int apicid, unsigned int cpu);
 int topology_phys_to_logical_pkg(unsigned int pkg);
 bool topology_is_primary_thread(unsigned int cpu);
+bool topology_smt_supported(void);
 #else
 #define topology_max_packages()			(1)
 static inline int
@@ -132,6 +133,7 @@ topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
 static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
 static inline int topology_max_smt_threads(void) { return 1; }
 static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
+static inline bool topology_smt_supported(void) { return false; }
 #endif
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f9c731240142..7f7def989fb0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -274,6 +274,14 @@ bool topology_is_primary_thread(unsigned int cpu)
 	return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu));
 }
 
+/**
+ * topology_smt_supported - Check whether SMT is supported by the CPUs
+ */
+bool topology_smt_supported(void)
+{
+	return smp_num_siblings > 1;
+}
+
 /**
  * topology_phys_to_logical_pkg - Map a physical package id to a logical
  *
-- 
cgit v1.2.3-59-g8ed1b


From 55e6d279abd92cfd7576bba031e7589be8475edb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 6 Jun 2018 00:36:15 +0200
Subject: x86/cpu: Remove the pointless CPU printout

The value of this printout is dubious at best and there is no point in
having it in two different places along with convoluted ways to reach it.

Remove it completely.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/common.c   | 20 +++++---------------
 arch/x86/kernel/cpu/topology.c | 10 ----------
 2 files changed, 5 insertions(+), 25 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9baf684e2b55..81fbb267489e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -663,13 +663,12 @@ void detect_ht(struct cpuinfo_x86 *c)
 #ifdef CONFIG_SMP
 	u32 eax, ebx, ecx, edx;
 	int index_msb, core_bits;
-	static bool printed;
 
 	if (!cpu_has(c, X86_FEATURE_HT))
 		return;
 
 	if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
-		goto out;
+		return;
 
 	if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
 		return;
@@ -678,14 +677,14 @@ void detect_ht(struct cpuinfo_x86 *c)
 
 	smp_num_siblings = (ebx & 0xff0000) >> 16;
 
+	if (!smp_num_siblings)
+		smp_num_siblings = 1;
+
 	if (smp_num_siblings == 1) {
 		pr_info_once("CPU0: Hyper-Threading is disabled\n");
-		goto out;
+		return;
 	}
 
-	if (smp_num_siblings <= 1)
-		goto out;
-
 	index_msb = get_count_order(smp_num_siblings);
 	c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
 
@@ -697,15 +696,6 @@ void detect_ht(struct cpuinfo_x86 *c)
 
 	c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
 				       ((1 << core_bits) - 1);
-
-out:
-	if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
-		pr_info("CPU: Physical Processor ID: %d\n",
-			c->phys_proc_id);
-		pr_info("CPU: Processor Core ID: %d\n",
-			c->cpu_core_id);
-		printed = 1;
-	}
 #endif
 }
 
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 81c0afb39d0a..1072b5eda607 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -33,7 +33,6 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
 	unsigned int eax, ebx, ecx, edx, sub_index;
 	unsigned int ht_mask_width, core_plus_mask_width;
 	unsigned int core_select_mask, core_level_siblings;
-	static bool printed;
 
 	if (c->cpuid_level < 0xb)
 		return -1;
@@ -86,15 +85,6 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
 	c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
 
 	c->x86_max_cores = (core_level_siblings / smp_num_siblings);
-
-	if (!printed) {
-		pr_info("CPU: Physical Processor ID: %d\n",
-		       c->phys_proc_id);
-		if (c->x86_max_cores > 1)
-			pr_info("CPU: Processor Core ID: %d\n",
-			       c->cpu_core_id);
-		printed = 1;
-	}
 #endif
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 44ca36de56d1bf196dca2eb67cd753a46961ffe6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 6 Jun 2018 00:47:10 +0200
Subject: x86/cpu/AMD: Remove the pointless detect_ht() call

Real 32bit AMD CPUs do not have SMT and the only value of the call was to
reach the magic printout which got removed.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/amd.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 082d7875cef8..9fb1c681b63a 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -855,10 +855,6 @@ static void init_amd(struct cpuinfo_x86 *c)
 		srat_detect_node(c);
 	}
 
-#ifdef CONFIG_X86_32
-	detect_ht(c);
-#endif
-
 	init_amd_cacheinfo(c);
 
 	if (c->x86 >= 0xf)
-- 
cgit v1.2.3-59-g8ed1b


From 545401f4448a807b963ff17b575e0a393e68b523 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 6 Jun 2018 00:53:57 +0200
Subject: x86/cpu/common: Provide detect_ht_early()

To support force disabling of SMT it's required to know the number of
thread siblings early. detect_ht() cannot be called before the APIC driver
is selected, so split out the part which initializes smp_num_siblings.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/common.c | 24 ++++++++++++++----------
 arch/x86/kernel/cpu/cpu.h    |  1 +
 2 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 81fbb267489e..0768492f4687 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -658,32 +658,36 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c)
 		tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]);
 }
 
-void detect_ht(struct cpuinfo_x86 *c)
+int detect_ht_early(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
 	u32 eax, ebx, ecx, edx;
-	int index_msb, core_bits;
 
 	if (!cpu_has(c, X86_FEATURE_HT))
-		return;
+		return -1;
 
 	if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
-		return;
+		return -1;
 
 	if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
-		return;
+		return -1;
 
 	cpuid(1, &eax, &ebx, &ecx, &edx);
 
 	smp_num_siblings = (ebx & 0xff0000) >> 16;
+	if (smp_num_siblings == 1)
+		pr_info_once("CPU0: Hyper-Threading is disabled\n");
+#endif
+	return 0;
+}
 
-	if (!smp_num_siblings)
-		smp_num_siblings = 1;
+void detect_ht(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+	int index_msb, core_bits;
 
-	if (smp_num_siblings == 1) {
-		pr_info_once("CPU0: Hyper-Threading is disabled\n");
+	if (detect_ht_early(c) < 0)
 		return;
-	}
 
 	index_msb = get_count_order(smp_num_siblings);
 	c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 38216f678fc3..898da0b974e2 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -56,6 +56,7 @@ extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
 
 extern void detect_num_cpu_cores(struct cpuinfo_x86 *c);
 extern int detect_extended_topology(struct cpuinfo_x86 *c);
+extern int detect_ht_early(struct cpuinfo_x86 *c);
 extern void detect_ht(struct cpuinfo_x86 *c);
 
 unsigned int aperfmperf_get_khz(int cpu);
-- 
cgit v1.2.3-59-g8ed1b


From 95f3d39ccf7aaea79d1ffdac1c887c2e100ec1b6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 6 Jun 2018 00:55:39 +0200
Subject: x86/cpu/topology: Provide detect_extended_topology_early()

To support force disabling of SMT it's required to know the number of
thread siblings early. detect_extended_topology() cannot be called before
the APIC driver is selected, so split out the part which initializes
smp_num_siblings.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/cpu.h      |  1 +
 arch/x86/kernel/cpu/topology.c | 31 ++++++++++++++++++++++---------
 2 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 898da0b974e2..e59c0ea82a33 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -55,6 +55,7 @@ extern void init_intel_cacheinfo(struct cpuinfo_x86 *c);
 extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
 
 extern void detect_num_cpu_cores(struct cpuinfo_x86 *c);
+extern int detect_extended_topology_early(struct cpuinfo_x86 *c);
 extern int detect_extended_topology(struct cpuinfo_x86 *c);
 extern int detect_ht_early(struct cpuinfo_x86 *c);
 extern void detect_ht(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 1072b5eda607..71ca064e3794 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -22,17 +22,10 @@
 #define BITS_SHIFT_NEXT_LEVEL(eax)	((eax) & 0x1f)
 #define LEVEL_MAX_SIBLINGS(ebx)		((ebx) & 0xffff)
 
-/*
- * Check for extended topology enumeration cpuid leaf 0xb and if it
- * exists, use it for populating initial_apicid and cpu topology
- * detection.
- */
-int detect_extended_topology(struct cpuinfo_x86 *c)
+int detect_extended_topology_early(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
-	unsigned int eax, ebx, ecx, edx, sub_index;
-	unsigned int ht_mask_width, core_plus_mask_width;
-	unsigned int core_select_mask, core_level_siblings;
+	unsigned int eax, ebx, ecx, edx;
 
 	if (c->cpuid_level < 0xb)
 		return -1;
@@ -51,10 +44,30 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
 	 * initial apic id, which also represents 32-bit extended x2apic id.
 	 */
 	c->initial_apicid = edx;
+	smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
+#endif
+	return 0;
+}
+
+/*
+ * Check for extended topology enumeration cpuid leaf 0xb and if it
+ * exists, use it for populating initial_apicid and cpu topology
+ * detection.
+ */
+int detect_extended_topology(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+	unsigned int eax, ebx, ecx, edx, sub_index;
+	unsigned int ht_mask_width, core_plus_mask_width;
+	unsigned int core_select_mask, core_level_siblings;
+
+	if (detect_extended_topology_early(c) < 0)
+		return -1;
 
 	/*
 	 * Populate HT related information from sub-leaf level 0.
 	 */
+	cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
 	core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
 	core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
 
-- 
cgit v1.2.3-59-g8ed1b


From 1910ad5624968f93be48e8e265513c54d66b897c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 6 Jun 2018 01:00:55 +0200
Subject: x86/cpu/intel: Evaluate smp_num_siblings early

Make use of the new early detection function to initialize smp_num_siblings
on the boot cpu before the MP-Table or ACPI/MADT scan happens. That's
required for force disabling SMT.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/intel.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index eb75564f2d25..6602941cfebf 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -301,6 +301,13 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 	}
 
 	check_mpx_erratum(c);
+
+	/*
+	 * Get the number of SMT siblings early from the extended topology
+	 * leaf, if available. Otherwise try the legacy SMT detection.
+	 */
+	if (detect_extended_topology_early(c) < 0)
+		detect_ht_early(c);
 }
 
 #ifdef CONFIG_X86_32
-- 
cgit v1.2.3-59-g8ed1b


From 119bff8a9c9bb00116a844ec68be7bc4b1c768f5 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 15 Jun 2018 20:48:39 +0200
Subject: x86/CPU/AMD: Do not check CPUID max ext level before parsing SMP info

Old code used to check whether CPUID ext max level is >= 0x80000008 because
that last leaf contains the number of cores of the physical CPU.  The three
functions called there now do not depend on that leaf anymore so the check
can go.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/amd.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9fb1c681b63a..d120e5cefca9 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -848,12 +848,9 @@ static void init_amd(struct cpuinfo_x86 *c)
 
 	cpu_detect_cache_sizes(c);
 
-	/* Multi core CPU? */
-	if (c->extended_cpuid_level >= 0x80000008) {
-		amd_detect_cmp(c);
-		amd_get_topology(c);
-		srat_detect_node(c);
-	}
+	amd_detect_cmp(c);
+	amd_get_topology(c);
+	srat_detect_node(c);
 
 	init_amd_cacheinfo(c);
 
-- 
cgit v1.2.3-59-g8ed1b


From 1e1d7e25fd759eddf96d8ab39d0a90a1979b2d8c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 6 Jun 2018 00:57:38 +0200
Subject: x86/cpu/AMD: Evaluate smp_num_siblings early

To support force disabling of SMT it's required to know the number of
thread siblings early. amd_get_topology() cannot be called before the APIC
driver is selected, so split out the part which initializes
smp_num_siblings and invoke it from amd_early_init().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/amd.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index d120e5cefca9..b20dee982a53 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -315,6 +315,17 @@ static void legacy_fixup_core_id(struct cpuinfo_x86 *c)
 	c->cpu_core_id %= cus_per_node;
 }
 
+
+static void amd_get_topology_early(struct cpuinfo_x86 *c)
+{
+	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+		u32 eax, ebx, ecx, edx;
+
+		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+		smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
+	}
+}
+
 /*
  * Fixup core topology information for
  * (1) AMD multi-node processors
@@ -681,6 +692,8 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 		set_cpu_bug(c, X86_BUG_AMD_E400);
 
 	early_detect_mem_encrypt(c);
+
+	amd_get_topology_early(c);
 }
 
 static void init_amd_k8(struct cpuinfo_x86 *c)
-- 
cgit v1.2.3-59-g8ed1b


From 2207def700f902f169fc237b717252c326f9e464 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 Jun 2018 14:00:11 +0200
Subject: x86/apic: Ignore secondary threads if nosmt=force
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

nosmt on the kernel command line merely prevents the onlining of the
secondary SMT siblings.

nosmt=force makes the APIC detection code ignore the secondary SMT siblings
completely, so they even do not show up as possible CPUs. That reduces the
amount of memory allocations for per cpu variables and saves other
resources from being allocated too large.

This is not fully equivalent to disabling SMT in the BIOS because the low
level SMT enabling in the BIOS can result in partitioning of resources
between the siblings, which is not undone by just ignoring them. Some CPUs
can use the full resources when their sibling is not onlined, but this is
depending on the CPU family and model and it's not well documented whether
this applies to all partitioned resources. That means depending on the
workload disabling SMT in the BIOS might result in better performance.

Linus analysis of the Intel manual:

  The intel optimization manual is not very clear on what the partitioning
  rules are.

  I find:

    "In general, the buffers for staging instructions between major pipe
     stages  are partitioned. These buffers include µop queues after the
     execution trace cache, the queues after the register rename stage, the
     reorder buffer which stages instructions for retirement, and the load
     and store buffers.

     In the case of load and store buffers, partitioning also provided an
     easier implementation to maintain memory ordering for each logical
     processor and detect memory ordering violations"

  but some of that partitioning may be relaxed if the HT thread is "not
  active":

    "In Intel microarchitecture code name Sandy Bridge, the micro-op queue
     is statically partitioned to provide 28 entries for each logical
     processor,  irrespective of software executing in single thread or
     multiple threads. If one logical processor is not active in Intel
     microarchitecture code name Ivy Bridge, then a single thread executing
     on that processor  core can use the 56 entries in the micro-op queue"

  but I do not know what "not active" means, and how dynamic it is. Some of
  that partitioning may be entirely static and depend on the early BIOS
  disabling of HT, and even if we park the cores, the resources will just be
  wasted.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/apic.h |  2 ++
 arch/x86/kernel/acpi/boot.c |  3 ++-
 arch/x86/kernel/apic/apic.c | 19 +++++++++++++++++++
 3 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 9362a3aae927..ef40e4acd7c2 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -504,8 +504,10 @@ extern int default_check_phys_apicid_present(int phys_apicid);
 
 #ifdef CONFIG_SMP
 bool apic_id_is_primary_thread(unsigned int id);
+bool apic_id_disabled(unsigned int id);
 #else
 static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
+static inline bool apic_id_disabled(unsigned int id) { return false; }
 #endif
 
 extern void irq_enter(void);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 3b20607d581b..db4c118a7e4d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -181,7 +181,8 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
 	}
 
 	if (!enabled) {
-		++disabled_cpus;
+		if (!apic_id_disabled(id))
+			++disabled_cpus;
 		return -EINVAL;
 	}
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 8703caa9d6db..b86091add294 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2204,6 +2204,16 @@ bool apic_id_is_primary_thread(unsigned int apicid)
 	return !(apicid & mask);
 }
 
+/**
+ * apic_id_disabled - Check whether APIC ID is disabled via SMT control
+ * @id:	APIC ID to check
+ */
+bool apic_id_disabled(unsigned int id)
+{
+	return (cpu_smt_control == CPU_SMT_FORCE_DISABLED &&
+		!apic_id_is_primary_thread(id));
+}
+
 /*
  * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids
  * and cpuid_to_apicid[] synchronized.
@@ -2299,6 +2309,15 @@ int generic_processor_info(int apicid, int version)
 		return -EINVAL;
 	}
 
+	/*
+	 * If SMT is force disabled and the APIC ID belongs to
+	 * a secondary thread, ignore it.
+	 */
+	if (apic_id_disabled(apicid)) {
+		pr_info_once("Ignoring secondary SMT threads\n");
+		return -EINVAL;
+	}
+
 	if (apicid == boot_cpu_physical_apicid) {
 		/*
 		 * x86_bios_cpu_apicid is required to have processors listed
-- 
cgit v1.2.3-59-g8ed1b


From 7ce2f0393ea2396142b7faf6ee9b1f3676d08a5f Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 22 Jun 2018 11:34:11 +0200
Subject: x86/CPU/AMD: Move TOPOEXT reenablement before reading
 smp_num_siblings

The TOPOEXT reenablement is a workaround for broken BIOSen which didn't
enable the CPUID bit. amd_get_topology_early(), however, relies on
that bit being set so that it can read out the CPUID leaf and set
smp_num_siblings properly.

Move the reenablement up to early_init_amd(). While at it, simplify
amd_get_topology_early().

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/amd.c | 37 +++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index b20dee982a53..74061e421e62 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -318,12 +318,8 @@ static void legacy_fixup_core_id(struct cpuinfo_x86 *c)
 
 static void amd_get_topology_early(struct cpuinfo_x86 *c)
 {
-	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
-		u32 eax, ebx, ecx, edx;
-
-		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
-		smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
-	}
+	if (cpu_has(c, X86_FEATURE_TOPOEXT))
+		smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1;
 }
 
 /*
@@ -345,7 +341,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
 		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
 
 		node_id  = ecx & 0xff;
-		smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
 
 		if (c->x86 == 0x15)
 			c->cu_id = ebx & 0xff;
@@ -622,6 +617,7 @@ clear_sev:
 
 static void early_init_amd(struct cpuinfo_x86 *c)
 {
+	u64 value;
 	u32 dummy;
 
 	early_init_amd_mc(c);
@@ -693,6 +689,20 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 
 	early_detect_mem_encrypt(c);
 
+	/* Re-enable TopologyExtensions if switched off by BIOS */
+	if (c->x86 == 0x15 &&
+	    (c->x86_model >= 0x10 && c->x86_model <= 0x6f) &&
+	    !cpu_has(c, X86_FEATURE_TOPOEXT)) {
+
+		if (msr_set_bit(0xc0011005, 54) > 0) {
+			rdmsrl(0xc0011005, value);
+			if (value & BIT_64(54)) {
+				set_cpu_cap(c, X86_FEATURE_TOPOEXT);
+				pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
+			}
+		}
+	}
+
 	amd_get_topology_early(c);
 }
 
@@ -785,19 +795,6 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
 {
 	u64 value;
 
-	/* re-enable TopologyExtensions if switched off by BIOS */
-	if ((c->x86_model >= 0x10) && (c->x86_model <= 0x6f) &&
-	    !cpu_has(c, X86_FEATURE_TOPOEXT)) {
-
-		if (msr_set_bit(0xc0011005, 54) > 0) {
-			rdmsrl(0xc0011005, value);
-			if (value & BIT_64(54)) {
-				set_cpu_cap(c, X86_FEATURE_TOPOEXT);
-				pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
-			}
-		}
-	}
-
 	/*
 	 * The way access filter has a performance penalty on some workloads.
 	 * Disable it on the affected CPUs.
-- 
cgit v1.2.3-59-g8ed1b


From 506a66f374891ff08e064a058c446b336c5ac760 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 29 Jun 2018 16:05:47 +0200
Subject: Revert "x86/apic: Ignore secondary threads if nosmt=force"

Dave Hansen reported, that it's outright dangerous to keep SMT siblings
disabled completely so they are stuck in the BIOS and wait for SIPI.

The reason is that Machine Check Exceptions are broadcasted to siblings and
the soft disabled sibling has CR4.MCE = 0. If a MCE is delivered to a
logical core with CR4.MCE = 0, it asserts IERR#, which shuts down or
reboots the machine. The MCE chapter in the SDM contains the following
blurb:

    Because the logical processors within a physical package are tightly
    coupled with respect to shared hardware resources, both logical
    processors are notified of machine check errors that occur within a
    given physical processor. If machine-check exceptions are enabled when
    a fatal error is reported, all the logical processors within a physical
    package are dispatched to the machine-check exception handler. If
    machine-check exceptions are disabled, the logical processors enter the
    shutdown state and assert the IERR# signal. When enabling machine-check
    exceptions, the MCE flag in control register CR4 should be set for each
    logical processor.

Reverting the commit which ignores siblings at enumeration time solves only
half of the problem. The core cpuhotplug logic needs to be adjusted as
well.

This thoughtful engineered mechanism also turns the boot process on all
Intel HT enabled systems into a MCE lottery. MCE is enabled on the boot CPU
before the secondary CPUs are brought up. Depending on the number of
physical cores the window in which this situation can happen is smaller or
larger. On a HSW-EX it's about 750ms:

MCE is enabled on the boot CPU:

[    0.244017] mce: CPU supports 22 MCE banks

The corresponding sibling #72 boots:

[    1.008005] .... node  #0, CPUs:    #72

That means if an MCE hits on physical core 0 (logical CPUs 0 and 72)
between these two points the machine is going to shutdown. At least it's a
known safe state.

It's obvious that the early boot can be hit by an MCE as well and then runs
into the same situation because MCEs are not yet enabled on the boot CPU.
But after enabling them on the boot CPU, it does not make any sense to
prevent the kernel from recovering.

Adjust the nosmt kernel parameter documentation as well.

Reverts: 2207def700f9 ("x86/apic: Ignore secondary threads if nosmt=force")
Reported-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Tony Luck <tony.luck@intel.com>
---
 Documentation/admin-guide/kernel-parameters.txt |  8 ++------
 arch/x86/include/asm/apic.h                     |  2 --
 arch/x86/kernel/acpi/boot.c                     |  3 +--
 arch/x86/kernel/apic/apic.c                     | 19 -------------------
 4 files changed, 3 insertions(+), 29 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 8e29c4b6756f..5da7b0b30432 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2688,12 +2688,8 @@
 			Equivalent to smt=1.
 
 			[KNL,x86] Disable symmetric multithreading (SMT).
-			nosmt=force: Force disable SMT, similar to disabling
-				     it in the BIOS except that some of the
-				     resource partitioning effects which are
-				     caused by having SMT enabled in the BIOS
-				     cannot be undone. Depending on the CPU
-				     type this might have a performance impact.
+			nosmt=force: Force disable SMT, cannot be undone
+				     via the sysfs control file.
 
 	nospectre_v2	[X86] Disable all mitigations for the Spectre variant 2
 			(indirect branch prediction) vulnerability. System may
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index ef40e4acd7c2..9362a3aae927 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -504,10 +504,8 @@ extern int default_check_phys_apicid_present(int phys_apicid);
 
 #ifdef CONFIG_SMP
 bool apic_id_is_primary_thread(unsigned int id);
-bool apic_id_disabled(unsigned int id);
 #else
 static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
-static inline bool apic_id_disabled(unsigned int id) { return false; }
 #endif
 
 extern void irq_enter(void);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index db4c118a7e4d..3b20607d581b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -181,8 +181,7 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
 	}
 
 	if (!enabled) {
-		if (!apic_id_disabled(id))
-			++disabled_cpus;
+		++disabled_cpus;
 		return -EINVAL;
 	}
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b86091add294..8703caa9d6db 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2204,16 +2204,6 @@ bool apic_id_is_primary_thread(unsigned int apicid)
 	return !(apicid & mask);
 }
 
-/**
- * apic_id_disabled - Check whether APIC ID is disabled via SMT control
- * @id:	APIC ID to check
- */
-bool apic_id_disabled(unsigned int id)
-{
-	return (cpu_smt_control == CPU_SMT_FORCE_DISABLED &&
-		!apic_id_is_primary_thread(id));
-}
-
 /*
  * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids
  * and cpuid_to_apicid[] synchronized.
@@ -2309,15 +2299,6 @@ int generic_processor_info(int apicid, int version)
 		return -EINVAL;
 	}
 
-	/*
-	 * If SMT is force disabled and the APIC ID belongs to
-	 * a secondary thread, ignore it.
-	 */
-	if (apic_id_disabled(apicid)) {
-		pr_info_once("Ignoring secondary SMT threads\n");
-		return -EINVAL;
-	}
-
 	if (apicid == boot_cpu_physical_apicid) {
 		/*
 		 * x86_bios_cpu_apicid is required to have processors listed
-- 
cgit v1.2.3-59-g8ed1b


From 72c6d2db64fa18c996ece8f06e499509e6c9a37e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 13 Jul 2018 16:23:16 +0200
Subject: x86/litf: Introduce vmx status variable

Store the effective mitigation of VMX in a status variable and use it to
report the VMX state in the l1tf sysfs file.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142322.433098358@linutronix.de
---
 arch/x86/include/asm/vmx.h |  9 +++++++++
 arch/x86/kernel/cpu/bugs.c | 36 ++++++++++++++++++++++++++++++++++--
 arch/x86/kvm/vmx.c         | 22 +++++++++++-----------
 3 files changed, 54 insertions(+), 13 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 425e6b8b9547..c98aa9aed056 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -573,4 +573,13 @@ enum vm_instruction_error_number {
 	VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID = 28,
 };
 
+enum vmx_l1d_flush_state {
+	VMENTER_L1D_FLUSH_AUTO,
+	VMENTER_L1D_FLUSH_NEVER,
+	VMENTER_L1D_FLUSH_COND,
+	VMENTER_L1D_FLUSH_ALWAYS,
+};
+
+extern enum vmx_l1d_flush_state l1tf_vmx_mitigation;
+
 #endif
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 50500cea6eba..8aba7d3abccb 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -22,6 +22,7 @@
 #include <asm/processor-flags.h>
 #include <asm/fpu/internal.h>
 #include <asm/msr.h>
+#include <asm/vmx.h>
 #include <asm/paravirt.h>
 #include <asm/alternative.h>
 #include <asm/pgtable.h>
@@ -657,6 +658,12 @@ void x86_spec_ctrl_setup_ap(void)
 
 #undef pr_fmt
 #define pr_fmt(fmt)	"L1TF: " fmt
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+enum vmx_l1d_flush_state l1tf_vmx_mitigation __ro_after_init = VMENTER_L1D_FLUSH_AUTO;
+EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation);
+#endif
+
 static void __init l1tf_select_mitigation(void)
 {
 	u64 half_pa;
@@ -686,6 +693,32 @@ static void __init l1tf_select_mitigation(void)
 
 #ifdef CONFIG_SYSFS
 
+#define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion"
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+static const char *l1tf_vmx_states[] = {
+	[VMENTER_L1D_FLUSH_AUTO]	= "auto",
+	[VMENTER_L1D_FLUSH_NEVER]	= "vulnerable",
+	[VMENTER_L1D_FLUSH_COND]	= "conditional cache flushes",
+	[VMENTER_L1D_FLUSH_ALWAYS]	= "cache flushes",
+};
+
+static ssize_t l1tf_show_state(char *buf)
+{
+	if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO)
+		return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
+
+	return sprintf(buf, "%s; VMX: SMT %s, L1D %s\n", L1TF_DEFAULT_MSG,
+		       cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled",
+		       l1tf_vmx_states[l1tf_vmx_mitigation]);
+}
+#else
+static ssize_t l1tf_show_state(char *buf)
+{
+	return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
+}
+#endif
+
 static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
 			       char *buf, unsigned int bug)
 {
@@ -713,9 +746,8 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 
 	case X86_BUG_L1TF:
 		if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
-			return sprintf(buf, "Mitigation: Page Table Inversion\n");
+			return l1tf_show_state(buf);
 		break;
-
 	default:
 		break;
 	}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index eb7c207a3bc3..385cbfaa3c71 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -193,19 +193,13 @@ extern const ulong vmx_return;
 
 static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
 
-/* These MUST be in sync with vmentry_l1d_param order. */
-enum vmx_l1d_flush_state {
-	VMENTER_L1D_FLUSH_NEVER,
-	VMENTER_L1D_FLUSH_COND,
-	VMENTER_L1D_FLUSH_ALWAYS,
-};
-
 static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush = VMENTER_L1D_FLUSH_COND;
 
 static const struct {
 	const char *option;
 	enum vmx_l1d_flush_state cmd;
 } vmentry_l1d_param[] = {
+	{"auto",	VMENTER_L1D_FLUSH_AUTO},
 	{"never",	VMENTER_L1D_FLUSH_NEVER},
 	{"cond",	VMENTER_L1D_FLUSH_COND},
 	{"always",	VMENTER_L1D_FLUSH_ALWAYS},
@@ -13235,8 +13229,12 @@ static int __init vmx_setup_l1d_flush(void)
 {
 	struct page *page;
 
+	if (!boot_cpu_has_bug(X86_BUG_L1TF))
+		return 0;
+
+	l1tf_vmx_mitigation = vmentry_l1d_flush;
+
 	if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER ||
-	    !boot_cpu_has_bug(X86_BUG_L1TF) ||
 	    vmx_l1d_use_msr_save_list())
 		return 0;
 
@@ -13251,12 +13249,14 @@ static int __init vmx_setup_l1d_flush(void)
 	return 0;
 }
 
-static void vmx_free_l1d_flush_pages(void)
+static void vmx_cleanup_l1d_flush(void)
 {
 	if (vmx_l1d_flush_pages) {
 		free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
 		vmx_l1d_flush_pages = NULL;
 	}
+	/* Restore state so sysfs ignores VMX */
+	l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
 }
 
 static int __init vmx_init(void)
@@ -13299,7 +13299,7 @@ static int __init vmx_init(void)
 	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
 		     __alignof__(struct vcpu_vmx), THIS_MODULE);
 	if (r) {
-		vmx_free_l1d_flush_pages();
+		vmx_cleanup_l1d_flush();
 		return r;
 	}
 
@@ -13343,7 +13343,7 @@ static void __exit vmx_exit(void)
 		static_branch_disable(&enable_evmcs);
 	}
 #endif
-	vmx_free_l1d_flush_pages();
+	vmx_cleanup_l1d_flush();
 }
 
 module_init(vmx_init)
-- 
cgit v1.2.3-59-g8ed1b


From a7b9020b06ec6d7c3f3b0d4ef1a9eba12654f4f7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 13 Jul 2018 16:23:18 +0200
Subject: x86/l1tf: Handle EPT disabled state proper

If Extended Page Tables (EPT) are disabled or not supported, no L1D
flushing is required. The setup function can just avoid setting up the L1D
flush for the EPT=n case.

Invoke it after the hardware setup has be done and enable_ept has the
correct state and expose the EPT disabled state in the mitigation status as
well.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142322.612160168@linutronix.de
---
 arch/x86/include/asm/vmx.h |  1 +
 arch/x86/kernel/cpu/bugs.c |  9 ++---
 arch/x86/kvm/vmx.c         | 89 +++++++++++++++++++++++++---------------------
 3 files changed, 54 insertions(+), 45 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index c98aa9aed056..94a8547d915b 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -578,6 +578,7 @@ enum vmx_l1d_flush_state {
 	VMENTER_L1D_FLUSH_NEVER,
 	VMENTER_L1D_FLUSH_COND,
 	VMENTER_L1D_FLUSH_ALWAYS,
+	VMENTER_L1D_FLUSH_EPT_DISABLED,
 };
 
 extern enum vmx_l1d_flush_state l1tf_vmx_mitigation;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 8aba7d3abccb..1b3dc586d29d 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -697,10 +697,11 @@ static void __init l1tf_select_mitigation(void)
 
 #if IS_ENABLED(CONFIG_KVM_INTEL)
 static const char *l1tf_vmx_states[] = {
-	[VMENTER_L1D_FLUSH_AUTO]	= "auto",
-	[VMENTER_L1D_FLUSH_NEVER]	= "vulnerable",
-	[VMENTER_L1D_FLUSH_COND]	= "conditional cache flushes",
-	[VMENTER_L1D_FLUSH_ALWAYS]	= "cache flushes",
+	[VMENTER_L1D_FLUSH_AUTO]		= "auto",
+	[VMENTER_L1D_FLUSH_NEVER]		= "vulnerable",
+	[VMENTER_L1D_FLUSH_COND]		= "conditional cache flushes",
+	[VMENTER_L1D_FLUSH_ALWAYS]		= "cache flushes",
+	[VMENTER_L1D_FLUSH_EPT_DISABLED]	= "EPT disabled",
 };
 
 static ssize_t l1tf_show_state(char *buf)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e553e43a114c..78593550059b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -13204,6 +13204,11 @@ static int __init vmx_setup_l1d_flush(void)
 	if (!boot_cpu_has_bug(X86_BUG_L1TF))
 		return 0;
 
+	if (!enable_ept) {
+		l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
+		return 0;
+	}
+
 	l1tf_vmx_mitigation = vmentry_l1d_flush;
 
 	if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER)
@@ -13230,6 +13235,41 @@ static void vmx_cleanup_l1d_flush(void)
 	l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
 }
 
+static void vmx_exit(void)
+{
+#ifdef CONFIG_KEXEC_CORE
+	RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
+	synchronize_rcu();
+#endif
+
+	kvm_exit();
+
+#if IS_ENABLED(CONFIG_HYPERV)
+	if (static_branch_unlikely(&enable_evmcs)) {
+		int cpu;
+		struct hv_vp_assist_page *vp_ap;
+		/*
+		 * Reset everything to support using non-enlightened VMCS
+		 * access later (e.g. when we reload the module with
+		 * enlightened_vmcs=0)
+		 */
+		for_each_online_cpu(cpu) {
+			vp_ap =	hv_get_vp_assist_page(cpu);
+
+			if (!vp_ap)
+				continue;
+
+			vp_ap->current_nested_vmcs = 0;
+			vp_ap->enlighten_vmentry = 0;
+		}
+
+		static_branch_disable(&enable_evmcs);
+	}
+#endif
+	vmx_cleanup_l1d_flush();
+}
+module_exit(vmx_exit);
+
 static int __init vmx_init(void)
 {
 	int r;
@@ -13263,14 +13303,17 @@ static int __init vmx_init(void)
 	}
 #endif
 
-	r = vmx_setup_l1d_flush();
+	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
+		     __alignof__(struct vcpu_vmx), THIS_MODULE);
 	if (r)
 		return r;
 
-	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
-		     __alignof__(struct vcpu_vmx), THIS_MODULE);
+	/*
+	 * Must be called after kvm_init() so enable_ept is properly set up
+	 */
+	r = vmx_setup_l1d_flush();
 	if (r) {
-		vmx_cleanup_l1d_flush();
+		vmx_exit();
 		return r;
 	}
 
@@ -13282,40 +13325,4 @@ static int __init vmx_init(void)
 
 	return 0;
 }
-
-static void __exit vmx_exit(void)
-{
-#ifdef CONFIG_KEXEC_CORE
-	RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
-	synchronize_rcu();
-#endif
-
-	kvm_exit();
-
-#if IS_ENABLED(CONFIG_HYPERV)
-	if (static_branch_unlikely(&enable_evmcs)) {
-		int cpu;
-		struct hv_vp_assist_page *vp_ap;
-		/*
-		 * Reset everything to support using non-enlightened VMCS
-		 * access later (e.g. when we reload the module with
-		 * enlightened_vmcs=0)
-		 */
-		for_each_online_cpu(cpu) {
-			vp_ap =	hv_get_vp_assist_page(cpu);
-
-			if (!vp_ap)
-				continue;
-
-			vp_ap->current_nested_vmcs = 0;
-			vp_ap->enlighten_vmentry = 0;
-		}
-
-		static_branch_disable(&enable_evmcs);
-	}
-#endif
-	vmx_cleanup_l1d_flush();
-}
-
-module_init(vmx_init)
-module_exit(vmx_exit)
+module_init(vmx_init);
-- 
cgit v1.2.3-59-g8ed1b


From 895ae47f9918833c3a880fbccd41e0692b37e7d9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 13 Jul 2018 16:23:22 +0200
Subject: x86/kvm: Allow runtime control of L1D flush

All mitigation modes can be switched at run time with a static key now:

 - Use sysfs_streq() instead of strcmp() to handle the trailing new line
   from sysfs writes correctly.
 - Make the static key management handle multiple invocations properly.
 - Set the module parameter file to RW

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142322.954525119@linutronix.de
---
 arch/x86/kernel/cpu/bugs.c |  2 +-
 arch/x86/kvm/vmx.c         | 13 ++++++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 1b3dc586d29d..7df7df9a2753 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -660,7 +660,7 @@ void x86_spec_ctrl_setup_ap(void)
 #define pr_fmt(fmt)	"L1TF: " fmt
 
 #if IS_ENABLED(CONFIG_KVM_INTEL)
-enum vmx_l1d_flush_state l1tf_vmx_mitigation __ro_after_init = VMENTER_L1D_FLUSH_AUTO;
+enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
 EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation);
 #endif
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 12b1a5dae767..08fcd59342ed 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -234,12 +234,15 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 
 	l1tf_vmx_mitigation = l1tf;
 
-	if (l1tf == VMENTER_L1D_FLUSH_NEVER)
-		return 0;
+	if (l1tf != VMENTER_L1D_FLUSH_NEVER)
+		static_branch_enable(&vmx_l1d_should_flush);
+	else
+		static_branch_disable(&vmx_l1d_should_flush);
 
-	static_branch_enable(&vmx_l1d_should_flush);
 	if (l1tf == VMENTER_L1D_FLUSH_ALWAYS)
 		static_branch_enable(&vmx_l1d_flush_always);
+	else
+		static_branch_disable(&vmx_l1d_flush_always);
 	return 0;
 }
 
@@ -249,7 +252,7 @@ static int vmentry_l1d_flush_parse(const char *s)
 
 	if (s) {
 		for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
-			if (!strcmp(s, vmentry_l1d_param[i].option))
+			if (sysfs_streq(s, vmentry_l1d_param[i].option))
 				return vmentry_l1d_param[i].cmd;
 		}
 	}
@@ -293,7 +296,7 @@ static const struct kernel_param_ops vmentry_l1d_flush_ops = {
 	.set = vmentry_l1d_flush_set,
 	.get = vmentry_l1d_flush_get,
 };
-module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, S_IRUGO);
+module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
 
 struct kvm_vmx {
 	struct kvm kvm;
-- 
cgit v1.2.3-59-g8ed1b


From fee0aede6f4739c87179eca76136f83210953b86 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 13 Jul 2018 16:23:24 +0200
Subject: cpu/hotplug: Set CPU_SMT_NOT_SUPPORTED early

The CPU_SMT_NOT_SUPPORTED state is set (if the processor does not support
SMT) when the sysfs SMT control file is initialized.

That was fine so far as this was only required to make the output of the
control file correct and to prevent writes in that case.

With the upcoming l1tf command line parameter, this needs to be set up
before the L1TF mitigation selection and command line parsing happens.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142323.121795971@linutronix.de
---
 arch/x86/kernel/cpu/bugs.c |  6 ++++++
 include/linux/cpu.h        |  2 ++
 kernel/cpu.c               | 13 ++++++++++---
 3 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 7df7df9a2753..4be69a7bef0b 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -58,6 +58,12 @@ void __init check_bugs(void)
 {
 	identify_boot_cpu();
 
+	/*
+	 * identify_boot_cpu() initialized SMT support information, let the
+	 * core code know.
+	 */
+	cpu_smt_check_topology();
+
 	if (!IS_ENABLED(CONFIG_SMP)) {
 		pr_info("CPU: ");
 		print_cpu_info(&boot_cpu_data);
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index c9b23ad27b38..ccdf3a67ce56 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -178,9 +178,11 @@ enum cpuhp_smt_control {
 #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
 extern enum cpuhp_smt_control cpu_smt_control;
 extern void cpu_smt_disable(bool force);
+extern void cpu_smt_check_topology(void);
 #else
 # define cpu_smt_control		(CPU_SMT_ENABLED)
 static inline void cpu_smt_disable(bool force) { }
+static inline void cpu_smt_check_topology(void) { }
 #endif
 
 #endif /* _LINUX_CPU_H_ */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8453e31f2d1a..37eec872042b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -361,6 +361,16 @@ void __init cpu_smt_disable(bool force)
 	}
 }
 
+/*
+ * The decision whether SMT is supported can only be done after the full
+ * CPU identification. Called from architecture code.
+ */
+void __init cpu_smt_check_topology(void)
+{
+	if (!topology_smt_supported())
+		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+}
+
 static int __init smt_cmdline_disable(char *str)
 {
 	cpu_smt_disable(str && !strcmp(str, "force"));
@@ -2115,9 +2125,6 @@ static const struct attribute_group cpuhp_smt_attr_group = {
 
 static int __init cpu_smt_state_init(void)
 {
-	if (!topology_smt_supported())
-		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
-
 	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
 				  &cpuhp_smt_attr_group);
 }
-- 
cgit v1.2.3-59-g8ed1b


From d90a7a0ec83fb86622cd7dae23255d3c50a99ec8 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 13 Jul 2018 16:23:25 +0200
Subject: x86/bugs, kvm: Introduce boot-time control of L1TF mitigations

Introduce the 'l1tf=' kernel command line option to allow for boot-time
switching of mitigation that is used on processors affected by L1TF.

The possible values are:

  full
	Provides all available mitigations for the L1TF vulnerability. Disables
	SMT and enables all mitigations in the hypervisors. SMT control via
	/sys/devices/system/cpu/smt/control is still possible after boot.
	Hypervisors will issue a warning when the first VM is started in
	a potentially insecure configuration, i.e. SMT enabled or L1D flush
	disabled.

  full,force
	Same as 'full', but disables SMT control. Implies the 'nosmt=force'
	command line option. sysfs control of SMT and the hypervisor flush
	control is disabled.

  flush
	Leaves SMT enabled and enables the conditional hypervisor mitigation.
	Hypervisors will issue a warning when the first VM is started in a
	potentially insecure configuration, i.e. SMT enabled or L1D flush
	disabled.

  flush,nosmt
	Disables SMT and enables the conditional hypervisor mitigation. SMT
	control via /sys/devices/system/cpu/smt/control is still possible
	after boot. If SMT is reenabled or flushing disabled at runtime
	hypervisors will issue a warning.

  flush,nowarn
	Same as 'flush', but hypervisors will not warn when
	a VM is started in a potentially insecure configuration.

  off
	Disables hypervisor mitigations and doesn't emit any warnings.

Default is 'flush'.

Let KVM adhere to these semantics, which means:

  - 'lt1f=full,force'	: Performe L1D flushes. No runtime control
    			  possible.

  - 'l1tf=full'
  - 'l1tf-flush'
  - 'l1tf=flush,nosmt'	: Perform L1D flushes and warn on VM start if
			  SMT has been runtime enabled or L1D flushing
			  has been run-time enabled

  - 'l1tf=flush,nowarn'	: Perform L1D flushes and no warnings are emitted.

  - 'l1tf=off'		: L1D flushes are not performed and no warnings
			  are emitted.

KVM can always override the L1D flushing behavior using its 'vmentry_l1d_flush'
module parameter except when lt1f=full,force is set.

This makes KVM's private 'nosmt' option redundant, and as it is a bit
non-systematic anyway (this is something to control globally, not on
hypervisor level), remove that option.

Add the missing Documentation entry for the l1tf vulnerability sysfs file
while at it.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142323.202758176@linutronix.de
---
 Documentation/ABI/testing/sysfs-devices-system-cpu |  4 ++
 Documentation/admin-guide/kernel-parameters.txt    | 68 ++++++++++++++++++++--
 arch/x86/include/asm/processor.h                   | 12 ++++
 arch/x86/kernel/cpu/bugs.c                         | 44 ++++++++++++++
 arch/x86/kvm/vmx.c                                 | 56 +++++++++++++-----
 5 files changed, 165 insertions(+), 19 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 65d9b844ecfd..73318225a368 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -476,6 +476,7 @@ What:		/sys/devices/system/cpu/vulnerabilities
 		/sys/devices/system/cpu/vulnerabilities/spectre_v1
 		/sys/devices/system/cpu/vulnerabilities/spectre_v2
 		/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
+		/sys/devices/system/cpu/vulnerabilities/l1tf
 Date:		January 2018
 Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:	Information about CPU vulnerabilities
@@ -488,6 +489,9 @@ Description:	Information about CPU vulnerabilities
 		"Vulnerable"	  CPU is affected and no mitigation in effect
 		"Mitigation: $M"  CPU is affected and mitigation $M is in effect
 
+		Details about the l1tf file can be found in
+		Documentation/admin-guide/l1tf.rst
+
 What:		/sys/devices/system/cpu/smt
 		/sys/devices/system/cpu/smt/active
 		/sys/devices/system/cpu/smt/control
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 4f790566ad91..9a08a7aa2bc3 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1946,12 +1946,6 @@
 			[KVM,ARM] Allow use of GICv4 for direct injection of
 			LPIs.
 
-	kvm-intel.nosmt=[KVM,Intel] If the L1TF CPU bug is present (CVE-2018-3620)
-			and the system has SMT (aka Hyper-Threading) enabled then
-			don't allow guests to be created.
-
-			Default is 0 (allow guests to be created).
-
 	kvm-intel.ept=	[KVM,Intel] Disable extended page tables
 			(virtualized MMU) support on capable Intel chips.
 			Default is 1 (enabled)
@@ -1989,6 +1983,68 @@
 			feature (tagged TLBs) on capable Intel chips.
 			Default is 1 (enabled)
 
+	l1tf=           [X86] Control mitigation of the L1TF vulnerability on
+			      affected CPUs
+
+			The kernel PTE inversion protection is unconditionally
+			enabled and cannot be disabled.
+
+			full
+				Provides all available mitigations for the
+				L1TF vulnerability. Disables SMT and
+				enables all mitigations in the
+				hypervisors, i.e. unconditional L1D flush.
+
+				SMT control and L1D flush control via the
+				sysfs interface is still possible after
+				boot.  Hypervisors will issue a warning
+				when the first VM is started in a
+				potentially insecure configuration,
+				i.e. SMT enabled or L1D flush disabled.
+
+			full,force
+				Same as 'full', but disables SMT and L1D
+				flush runtime control. Implies the
+				'nosmt=force' command line option.
+				(i.e. sysfs control of SMT is disabled.)
+
+			flush
+				Leaves SMT enabled and enables the default
+				hypervisor mitigation, i.e. conditional
+				L1D flush.
+
+				SMT control and L1D flush control via the
+				sysfs interface is still possible after
+				boot.  Hypervisors will issue a warning
+				when the first VM is started in a
+				potentially insecure configuration,
+				i.e. SMT enabled or L1D flush disabled.
+
+			flush,nosmt
+
+				Disables SMT and enables the default
+				hypervisor mitigation.
+
+				SMT control and L1D flush control via the
+				sysfs interface is still possible after
+				boot.  Hypervisors will issue a warning
+				when the first VM is started in a
+				potentially insecure configuration,
+				i.e. SMT enabled or L1D flush disabled.
+
+			flush,nowarn
+				Same as 'flush', but hypervisors will not
+				warn when a VM is started in a potentially
+				insecure configuration.
+
+			off
+				Disables hypervisor mitigations and doesn't
+				emit any warnings.
+
+			Default is 'flush'.
+
+			For details see: Documentation/admin-guide/l1tf.rst
+
 	l2cr=		[PPC]
 
 	l3cr=		[PPC]
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7e3ac5eedcd6..79e409974ccc 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -982,4 +982,16 @@ bool xen_set_default_idle(void);
 void stop_this_cpu(void *dummy);
 void df_debug(struct pt_regs *regs, long error_code);
 void microcode_check(void);
+
+enum l1tf_mitigations {
+	L1TF_MITIGATION_OFF,
+	L1TF_MITIGATION_FLUSH_NOWARN,
+	L1TF_MITIGATION_FLUSH,
+	L1TF_MITIGATION_FLUSH_NOSMT,
+	L1TF_MITIGATION_FULL,
+	L1TF_MITIGATION_FULL_FORCE
+};
+
+extern enum l1tf_mitigations l1tf_mitigation;
+
 #endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 4be69a7bef0b..fa6123bdd032 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -665,7 +665,11 @@ void x86_spec_ctrl_setup_ap(void)
 #undef pr_fmt
 #define pr_fmt(fmt)	"L1TF: " fmt
 
+/* Default mitigation for L1TF-affected CPUs */
+enum l1tf_mitigations l1tf_mitigation __ro_after_init = L1TF_MITIGATION_FLUSH;
 #if IS_ENABLED(CONFIG_KVM_INTEL)
+EXPORT_SYMBOL_GPL(l1tf_mitigation);
+
 enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
 EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation);
 #endif
@@ -677,6 +681,20 @@ static void __init l1tf_select_mitigation(void)
 	if (!boot_cpu_has_bug(X86_BUG_L1TF))
 		return;
 
+	switch (l1tf_mitigation) {
+	case L1TF_MITIGATION_OFF:
+	case L1TF_MITIGATION_FLUSH_NOWARN:
+	case L1TF_MITIGATION_FLUSH:
+		break;
+	case L1TF_MITIGATION_FLUSH_NOSMT:
+	case L1TF_MITIGATION_FULL:
+		cpu_smt_disable(false);
+		break;
+	case L1TF_MITIGATION_FULL_FORCE:
+		cpu_smt_disable(true);
+		break;
+	}
+
 #if CONFIG_PGTABLE_LEVELS == 2
 	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
 	return;
@@ -695,6 +713,32 @@ static void __init l1tf_select_mitigation(void)
 
 	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
 }
+
+static int __init l1tf_cmdline(char *str)
+{
+	if (!boot_cpu_has_bug(X86_BUG_L1TF))
+		return 0;
+
+	if (!str)
+		return -EINVAL;
+
+	if (!strcmp(str, "off"))
+		l1tf_mitigation = L1TF_MITIGATION_OFF;
+	else if (!strcmp(str, "flush,nowarn"))
+		l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOWARN;
+	else if (!strcmp(str, "flush"))
+		l1tf_mitigation = L1TF_MITIGATION_FLUSH;
+	else if (!strcmp(str, "flush,nosmt"))
+		l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT;
+	else if (!strcmp(str, "full"))
+		l1tf_mitigation = L1TF_MITIGATION_FULL;
+	else if (!strcmp(str, "full,force"))
+		l1tf_mitigation = L1TF_MITIGATION_FULL_FORCE;
+
+	return 0;
+}
+early_param("l1tf", l1tf_cmdline);
+
 #undef pr_fmt
 
 #ifdef CONFIG_SYSFS
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 08fcd59342ed..c5c0118b126d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -71,9 +71,6 @@ static const struct x86_cpu_id vmx_cpu_id[] = {
 };
 MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
 
-static bool __read_mostly nosmt;
-module_param(nosmt, bool, S_IRUGO);
-
 static bool __read_mostly enable_vpid = 1;
 module_param_named(vpid, enable_vpid, bool, 0444);
 
@@ -215,15 +212,31 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 {
 	struct page *page;
 
-	/* If set to 'auto' select 'cond' */
-	if (l1tf == VMENTER_L1D_FLUSH_AUTO)
-		l1tf = VMENTER_L1D_FLUSH_COND;
-
 	if (!enable_ept) {
 		l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
 		return 0;
 	}
 
+	/* If set to auto use the default l1tf mitigation method */
+	if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
+		switch (l1tf_mitigation) {
+		case L1TF_MITIGATION_OFF:
+			l1tf = VMENTER_L1D_FLUSH_NEVER;
+			break;
+		case L1TF_MITIGATION_FLUSH_NOWARN:
+		case L1TF_MITIGATION_FLUSH:
+		case L1TF_MITIGATION_FLUSH_NOSMT:
+			l1tf = VMENTER_L1D_FLUSH_COND;
+			break;
+		case L1TF_MITIGATION_FULL:
+		case L1TF_MITIGATION_FULL_FORCE:
+			l1tf = VMENTER_L1D_FLUSH_ALWAYS;
+			break;
+		}
+	} else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) {
+		l1tf = VMENTER_L1D_FLUSH_ALWAYS;
+	}
+
 	if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
 	    !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
 		page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
@@ -10571,19 +10584,36 @@ free_vcpu:
 	return ERR_PTR(err);
 }
 
-#define L1TF_MSG "SMT enabled with L1TF CPU bug present. Refer to CVE-2018-3620 for details.\n"
+#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n"
+#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n"
 
 static int vmx_vm_init(struct kvm *kvm)
 {
 	if (!ple_gap)
 		kvm->arch.pause_in_guest = true;
 
-	if (boot_cpu_has(X86_BUG_L1TF) && cpu_smt_control == CPU_SMT_ENABLED) {
-		if (nosmt) {
-			pr_err(L1TF_MSG);
-			return -EOPNOTSUPP;
+	if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) {
+		switch (l1tf_mitigation) {
+		case L1TF_MITIGATION_OFF:
+		case L1TF_MITIGATION_FLUSH_NOWARN:
+			/* 'I explicitly don't care' is set */
+			break;
+		case L1TF_MITIGATION_FLUSH:
+		case L1TF_MITIGATION_FLUSH_NOSMT:
+		case L1TF_MITIGATION_FULL:
+			/*
+			 * Warn upon starting the first VM in a potentially
+			 * insecure environment.
+			 */
+			if (cpu_smt_control == CPU_SMT_ENABLED)
+				pr_warn_once(L1TF_MSG_SMT);
+			if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER)
+				pr_warn_once(L1TF_MSG_L1D);
+			break;
+		case L1TF_MITIGATION_FULL_FORCE:
+			/* Flush is enforced */
+			break;
 		}
-		pr_warn(L1TF_MSG);
 	}
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 447ae316670230d7d29430e2cbf1f5db4f49d14c Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Sun, 29 Jul 2018 12:15:33 +0200
Subject: x86: Don't include linux/irq.h from asm/hardirq.h

The next patch in this series will have to make the definition of
irq_cpustat_t available to entering_irq().

Inclusion of asm/hardirq.h into asm/apic.h would cause circular header
dependencies like

  asm/smp.h
    asm/apic.h
      asm/hardirq.h
        linux/irq.h
          linux/topology.h
            linux/smp.h
              asm/smp.h

or

  linux/gfp.h
    linux/mmzone.h
      asm/mmzone.h
        asm/mmzone_64.h
          asm/smp.h
            asm/apic.h
              asm/hardirq.h
                linux/irq.h
                  linux/irqdesc.h
                    linux/kobject.h
                      linux/sysfs.h
                        linux/kernfs.h
                          linux/idr.h
                            linux/gfp.h

and others.

This causes compilation errors because of the header guards becoming
effective in the second inclusion: symbols/macros that had been defined
before wouldn't be available to intermediate headers in the #include chain
anymore.

A possible workaround would be to move the definition of irq_cpustat_t
into its own header and include that from both, asm/hardirq.h and
asm/apic.h.

However, this wouldn't solve the real problem, namely asm/harirq.h
unnecessarily pulling in all the linux/irq.h cruft: nothing in
asm/hardirq.h itself requires it. Also, note that there are some other
archs, like e.g. arm64, which don't have that #include in their
asm/hardirq.h.

Remove the linux/irq.h #include from x86' asm/hardirq.h.

Fix resulting compilation errors by adding appropriate #includes to *.c
files as needed.

Note that some of these *.c files could be cleaned up a bit wrt. to their
set of #includes, but that should better be done from separate patches, if
at all.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/dmi.h                                   | 2 +-
 arch/x86/include/asm/hardirq.h                               | 1 -
 arch/x86/include/asm/kvm_host.h                              | 1 +
 arch/x86/kernel/apic/apic.c                                  | 1 +
 arch/x86/kernel/apic/io_apic.c                               | 1 +
 arch/x86/kernel/apic/msi.c                                   | 1 +
 arch/x86/kernel/apic/vector.c                                | 1 +
 arch/x86/kernel/fpu/core.c                                   | 1 +
 arch/x86/kernel/hpet.c                                       | 1 +
 arch/x86/kernel/i8259.c                                      | 1 +
 arch/x86/kernel/idt.c                                        | 1 +
 arch/x86/kernel/irq.c                                        | 1 +
 arch/x86/kernel/irq_32.c                                     | 1 +
 arch/x86/kernel/irq_64.c                                     | 1 +
 arch/x86/kernel/irqinit.c                                    | 1 +
 arch/x86/kernel/smpboot.c                                    | 1 +
 arch/x86/kernel/time.c                                       | 1 +
 arch/x86/mm/pti.c                                            | 1 +
 arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c | 1 +
 arch/x86/xen/enlighten.c                                     | 1 +
 drivers/gpu/drm/i915/i915_pmu.c                              | 1 +
 drivers/gpu/drm/i915/intel_lpe_audio.c                       | 1 +
 drivers/pci/controller/pci-hyperv.c                          | 1 +
 23 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
index 0ab2ab27ad1f..b825cb201251 100644
--- a/arch/x86/include/asm/dmi.h
+++ b/arch/x86/include/asm/dmi.h
@@ -4,8 +4,8 @@
 
 #include <linux/compiler.h>
 #include <linux/init.h>
+#include <linux/io.h>
 
-#include <asm/io.h>
 #include <asm/setup.h>
 
 static __always_inline __init void *dmi_alloc(unsigned len)
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 0b854ebb99df..d9069bb26c7f 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -3,7 +3,6 @@
 #define _ASM_X86_HARDIRQ_H
 
 #include <linux/threads.h>
-#include <linux/irq.h>
 
 typedef struct {
 	u16	     __softirq_pending;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 57d418061c55..37749429afd9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -17,6 +17,7 @@
 #include <linux/tracepoint.h>
 #include <linux/cpumask.h>
 #include <linux/irq_work.h>
+#include <linux/irq.h>
 
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 8703caa9d6db..f93a21b01072 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -56,6 +56,7 @@
 #include <asm/hypervisor.h>
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
+#include <asm/irq_regs.h>
 
 unsigned int num_processors;
 
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 3982f79d2377..ff0d14cd9e82 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -33,6 +33,7 @@
 
 #include <linux/mm.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index ce503c99f5c4..72a94401f9e0 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -12,6 +12,7 @@
  */
 #include <linux/mm.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/pci.h>
 #include <linux/dmar.h>
 #include <linux/hpet.h>
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 35aaee4fc028..c9b773401fd8 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -11,6 +11,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index f92a6593de1e..2ea85b32421a 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -10,6 +10,7 @@
 #include <asm/fpu/signal.h>
 #include <asm/fpu/types.h>
 #include <asm/traps.h>
+#include <asm/irq_regs.h>
 
 #include <linux/hardirq.h>
 #include <linux/pkeys.h>
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 346b24883911..b0acb22e5a46 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1,6 +1,7 @@
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/export.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 86c4439f9d74..519649ddf100 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -5,6 +5,7 @@
 #include <linux/sched.h>
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/timex.h>
 #include <linux/random.h>
 #include <linux/init.h>
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 74383a3780dc..01adea278a71 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -8,6 +8,7 @@
 #include <asm/traps.h>
 #include <asm/proto.h>
 #include <asm/desc.h>
+#include <asm/hw_irq.h>
 
 struct idt_data {
 	unsigned int	vector;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 328d027d829d..59b5f2ea7c2f 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -10,6 +10,7 @@
 #include <linux/ftrace.h>
 #include <linux/delay.h>
 #include <linux/export.h>
+#include <linux/irq.h>
 
 #include <asm/apic.h>
 #include <asm/io_apic.h>
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index c1bdbd3d3232..95600a99ae93 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -11,6 +11,7 @@
 
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/kernel_stat.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index d86e344f5b3d..0469cd078db1 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -11,6 +11,7 @@
 
 #include <linux/kernel_stat.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/seq_file.h>
 #include <linux/delay.h>
 #include <linux/ftrace.h>
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 772196c1b8c4..a0693b71cfc1 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -5,6 +5,7 @@
 #include <linux/sched.h>
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/timex.h>
 #include <linux/random.h>
 #include <linux/kprobes.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7f7def989fb0..f5d30c68fd09 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -80,6 +80,7 @@
 #include <asm/intel-family.h>
 #include <asm/cpu_device_id.h>
 #include <asm/spec-ctrl.h>
+#include <asm/hw_irq.h>
 
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 774ebafa97c4..be01328eb755 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -12,6 +12,7 @@
 
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/i8253.h>
 #include <linux/time.h>
 #include <linux/export.h>
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 4d418e705878..fb752d9a3ce9 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -45,6 +45,7 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/desc.h>
+#include <asm/sections.h>
 
 #undef pr_fmt
 #define pr_fmt(fmt)     "Kernel/User page tables isolation: " fmt
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
index 4f5fa65a1011..2acd6be13375 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
@@ -18,6 +18,7 @@
 #include <asm/intel-mid.h>
 #include <asm/intel_scu_ipc.h>
 #include <asm/io_apic.h>
+#include <asm/hw_irq.h>
 
 #define TANGIER_EXT_TIMER0_MSI 12
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c9081c6671f0..df208af3cd74 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -3,6 +3,7 @@
 #endif
 #include <linux/cpu.h>
 #include <linux/kexec.h>
+#include <linux/slab.h>
 
 #include <xen/features.h>
 #include <xen/page.h>
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index dc87797db500..b50b74053664 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -4,6 +4,7 @@
  * Copyright © 2017-2018 Intel Corporation
  */
 
+#include <linux/irq.h>
 #include "i915_pmu.h"
 #include "intel_ringbuffer.h"
 #include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c
index 6269750e2b54..b4941101f21a 100644
--- a/drivers/gpu/drm/i915/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/intel_lpe_audio.c
@@ -62,6 +62,7 @@
 
 #include <linux/acpi.h>
 #include <linux/device.h>
+#include <linux/irq.h>
 #include <linux/pci.h>
 #include <linux/pm_runtime.h>
 
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 6cc5036ac83c..fadc305533d9 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -45,6 +45,7 @@
 #include <linux/irqdomain.h>
 #include <asm/irqdomain.h>
 #include <asm/apic.h>
+#include <linux/irq.h>
 #include <linux/msi.h>
 #include <linux/hyperv.h>
 #include <linux/refcount.h>
-- 
cgit v1.2.3-59-g8ed1b


From ffcba43ff66c7dab34ec700debd491d2a4d319b4 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Sun, 29 Jul 2018 13:06:04 +0200
Subject: x86/irq: Let interrupt handlers set kvm_cpu_l1tf_flush_l1d

The last missing piece to having vmx_l1d_flush() take interrupts after
VMEXIT into account is to set the kvm_cpu_l1tf_flush_l1d per-cpu flag on
irq entry.

Issue calls to kvm_set_cpu_l1tf_flush_l1d() from entering_irq(),
ipi_entering_ack_irq(), smp_reschedule_interrupt() and
uv_bau_message_interrupt().

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Nicolai Stange <nstange@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/apic.h   | 3 +++
 arch/x86/kernel/smp.c         | 1 +
 arch/x86/platform/uv/tlb_uv.c | 1 +
 3 files changed, 5 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 9362a3aae927..130e81e10fc7 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -10,6 +10,7 @@
 #include <asm/fixmap.h>
 #include <asm/mpspec.h>
 #include <asm/msr.h>
+#include <asm/hardirq.h>
 
 #define ARCH_APICTIMER_STOPS_ON_C3	1
 
@@ -514,6 +515,7 @@ extern void irq_exit(void);
 static inline void entering_irq(void)
 {
 	irq_enter();
+	kvm_set_cpu_l1tf_flush_l1d();
 }
 
 static inline void entering_ack_irq(void)
@@ -526,6 +528,7 @@ static inline void ipi_entering_ack_irq(void)
 {
 	irq_enter();
 	ack_APIC_irq();
+	kvm_set_cpu_l1tf_flush_l1d();
 }
 
 static inline void exiting_irq(void)
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 5c574dff4c1a..04adc8d60aed 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -261,6 +261,7 @@ __visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_APIC_irq();
 	inc_irq_stat(irq_resched_count);
+	kvm_set_cpu_l1tf_flush_l1d();
 
 	if (trace_resched_ipi_enabled()) {
 		/*
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index ca446da48fd2..3866b96a7ee7 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1285,6 +1285,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
 	struct msg_desc msgdesc;
 
 	ack_APIC_irq();
+	kvm_set_cpu_l1tf_flush_l1d();
 	time_start = get_cycles();
 
 	bcp = &per_cpu(bau_control, smp_processor_id());
-- 
cgit v1.2.3-59-g8ed1b


From ea156d192f5257a5bf393d33910d3b481bf8a401 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sun, 5 Aug 2018 16:07:45 +0200
Subject: x86/speculation: Simplify sysfs report of VMX L1TF vulnerability

Three changes to the content of the sysfs file:

 - If EPT is disabled, L1TF cannot be exploited even across threads on the
   same core, and SMT is irrelevant.

 - If mitigation is completely disabled, and SMT is enabled, print "vulnerable"
   instead of "vulnerable, SMT vulnerable"

 - Reorder the two parts so that the main vulnerability state comes first
   and the detail on SMT is second.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/bugs.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index eb4b574be237..b0886bd24164 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -762,9 +762,15 @@ static ssize_t l1tf_show_state(char *buf)
 	if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO)
 		return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
 
-	return sprintf(buf, "%s; VMX: SMT %s, L1D %s\n", L1TF_DEFAULT_MSG,
-		       cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled",
-		       l1tf_vmx_states[l1tf_vmx_mitigation]);
+	if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED ||
+	    (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER &&
+	     cpu_smt_control == CPU_SMT_ENABLED))
+		return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG,
+			       l1tf_vmx_states[l1tf_vmx_mitigation]);
+
+	return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG,
+		       l1tf_vmx_states[l1tf_vmx_mitigation],
+		       cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled");
 }
 #else
 static ssize_t l1tf_show_state(char *buf)
-- 
cgit v1.2.3-59-g8ed1b


From 8e0b2b916662e09dd4d09e5271cdf214c6b80e62 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sun, 5 Aug 2018 16:07:46 +0200
Subject: x86/speculation: Use ARCH_CAPABILITIES to skip L1D flush on vmentry

Bit 3 of ARCH_CAPABILITIES tells a hypervisor that L1D flush on vmentry is
not needed.  Add a new value to enum vmx_l1d_flush_state, which is used
either if there is no L1TF bug at all, or if bit 3 is set in ARCH_CAPABILITIES.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/msr-index.h |  1 +
 arch/x86/include/asm/vmx.h       |  1 +
 arch/x86/kernel/cpu/bugs.c       |  1 +
 arch/x86/kvm/vmx.c               | 10 ++++++++++
 4 files changed, 13 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 0e7517089b80..4731f0cf97c5 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -70,6 +70,7 @@
 #define MSR_IA32_ARCH_CAPABILITIES	0x0000010a
 #define ARCH_CAP_RDCL_NO		(1 << 0)   /* Not susceptible to Meltdown */
 #define ARCH_CAP_IBRS_ALL		(1 << 1)   /* Enhanced IBRS support */
+#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH	(1 << 3)   /* Skip L1D flush on vmentry */
 #define ARCH_CAP_SSB_NO			(1 << 4)   /*
 						    * Not susceptible to Speculative Store Bypass
 						    * attack, so no Speculative Store Bypass
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 91241ccd4cfa..95f9107449bf 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -582,6 +582,7 @@ enum vmx_l1d_flush_state {
 	VMENTER_L1D_FLUSH_COND,
 	VMENTER_L1D_FLUSH_ALWAYS,
 	VMENTER_L1D_FLUSH_EPT_DISABLED,
+	VMENTER_L1D_FLUSH_NOT_REQUIRED,
 };
 
 extern enum vmx_l1d_flush_state l1tf_vmx_mitigation;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index b0886bd24164..86f4549b7c9f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -755,6 +755,7 @@ static const char *l1tf_vmx_states[] = {
 	[VMENTER_L1D_FLUSH_COND]		= "conditional cache flushes",
 	[VMENTER_L1D_FLUSH_ALWAYS]		= "cache flushes",
 	[VMENTER_L1D_FLUSH_EPT_DISABLED]	= "EPT disabled",
+	[VMENTER_L1D_FLUSH_NOT_REQUIRED]	= "flush not necessary"
 };
 
 static ssize_t l1tf_show_state(char *buf)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8384f7cb30d8..29d6eab6920e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -218,6 +218,16 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 		return 0;
 	}
 
+       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
+	       u64 msr;
+
+	       rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
+	       if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
+		       l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
+		       return 0;
+	       }
+       }
+
 	/* If set to auto use the default l1tf mitigation method */
 	if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
 		switch (l1tf_mitigation) {
-- 
cgit v1.2.3-59-g8ed1b


From bc2d8d262cba5736332cbc866acb11b1c5748aa9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 7 Aug 2018 08:19:57 +0200
Subject: cpu/hotplug: Fix SMT supported evaluation

Josh reported that the late SMT evaluation in cpu_smt_state_init() sets
cpu_smt_control to CPU_SMT_NOT_SUPPORTED in case that 'nosmt' was supplied
on the kernel command line as it cannot differentiate between SMT disabled
by BIOS and SMT soft disable via 'nosmt'. That wreckages the state and
makes the sysfs interface unusable.

Rework this so that during bringup of the non boot CPUs the availability of
SMT is determined in cpu_smt_allowed(). If a newly booted CPU is not a
'primary' thread then set the local cpu_smt_available marker and evaluate
this explicitely right after the initial SMP bringup has finished.

SMT evaulation on x86 is a trainwreck as the firmware has all the
information _before_ booting the kernel, but there is no interface to query
it.

Fixes: 73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS")
Reported-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/bugs.c |  2 +-
 include/linux/cpu.h        |  2 ++
 kernel/cpu.c               | 41 ++++++++++++++++++++++++++++-------------
 kernel/smp.c               |  2 ++
 4 files changed, 33 insertions(+), 14 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 86f4549b7c9f..be79b7599dda 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -63,7 +63,7 @@ void __init check_bugs(void)
 	 * identify_boot_cpu() initialized SMT support information, let the
 	 * core code know.
 	 */
-	cpu_smt_check_topology();
+	cpu_smt_check_topology_early();
 
 	if (!IS_ENABLED(CONFIG_SMP)) {
 		pr_info("CPU: ");
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index ccdf3a67ce56..b216bd5bfd20 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -178,10 +178,12 @@ enum cpuhp_smt_control {
 #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
 extern enum cpuhp_smt_control cpu_smt_control;
 extern void cpu_smt_disable(bool force);
+extern void cpu_smt_check_topology_early(void);
 extern void cpu_smt_check_topology(void);
 #else
 # define cpu_smt_control		(CPU_SMT_ENABLED)
 static inline void cpu_smt_disable(bool force) { }
+static inline void cpu_smt_check_topology_early(void) { }
 static inline void cpu_smt_check_topology(void) { }
 #endif
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2bc3d2f5b2a5..9d2512dd263c 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -347,6 +347,8 @@ EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
 EXPORT_SYMBOL_GPL(cpu_smt_control);
 
+static bool cpu_smt_available __read_mostly;
+
 void __init cpu_smt_disable(bool force)
 {
 	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
@@ -363,14 +365,28 @@ void __init cpu_smt_disable(bool force)
 
 /*
  * The decision whether SMT is supported can only be done after the full
- * CPU identification. Called from architecture code.
+ * CPU identification. Called from architecture code before non boot CPUs
+ * are brought up.
  */
-void __init cpu_smt_check_topology(void)
+void __init cpu_smt_check_topology_early(void)
 {
 	if (!topology_smt_supported())
 		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
 }
 
+/*
+ * If SMT was disabled by BIOS, detect it here, after the CPUs have been
+ * brought online. This ensures the smt/l1tf sysfs entries are consistent
+ * with reality. cpu_smt_available is set to true during the bringup of non
+ * boot CPUs when a SMT sibling is detected. Note, this may overwrite
+ * cpu_smt_control's previous setting.
+ */
+void __init cpu_smt_check_topology(void)
+{
+	if (!cpu_smt_available)
+		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+}
+
 static int __init smt_cmdline_disable(char *str)
 {
 	cpu_smt_disable(str && !strcmp(str, "force"));
@@ -380,10 +396,18 @@ early_param("nosmt", smt_cmdline_disable);
 
 static inline bool cpu_smt_allowed(unsigned int cpu)
 {
-	if (cpu_smt_control == CPU_SMT_ENABLED)
+	if (topology_is_primary_thread(cpu))
 		return true;
 
-	if (topology_is_primary_thread(cpu))
+	/*
+	 * If the CPU is not a 'primary' thread and the booted_once bit is
+	 * set then the processor has SMT support. Store this information
+	 * for the late check of SMT support in cpu_smt_check_topology().
+	 */
+	if (per_cpu(cpuhp_state, cpu).booted_once)
+		cpu_smt_available = true;
+
+	if (cpu_smt_control == CPU_SMT_ENABLED)
 		return true;
 
 	/*
@@ -2125,15 +2149,6 @@ static const struct attribute_group cpuhp_smt_attr_group = {
 
 static int __init cpu_smt_state_init(void)
 {
-	/*
-	 * If SMT was disabled by BIOS, detect it here, after the CPUs have
-	 * been brought online.  This ensures the smt/l1tf sysfs entries are
-	 * consistent with reality.  Note this may overwrite cpu_smt_control's
-	 * previous setting.
-	 */
-	if (topology_max_smt_threads() == 1)
-		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
-
 	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
 				  &cpuhp_smt_attr_group);
 }
diff --git a/kernel/smp.c b/kernel/smp.c
index 084c8b3a2681..d86eec5f51c1 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -584,6 +584,8 @@ void __init smp_init(void)
 		num_nodes, (num_nodes > 1 ? "s" : ""),
 		num_cpus,  (num_cpus  > 1 ? "s" : ""));
 
+	/* Final decision about SMT support */
+	cpu_smt_check_topology();
 	/* Any cleanup work */
 	smp_cpus_done(setup_max_cpus);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 07d981ad4cf1e78361c6db1c28ee5ba105f96cc1 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Fri, 10 Aug 2018 08:31:10 +0100
Subject: x86/microcode: Allow late microcode loading with SMT disabled

The kernel unnecessarily prevents late microcode loading when SMT is
disabled.  It should be safe to allow it if all the primary threads are
online.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/kernel/cpu/microcode/core.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 08286269fd24..b9bc8a1a584e 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -509,12 +509,20 @@ static struct platform_device	*microcode_pdev;
 
 static int check_online_cpus(void)
 {
-	if (num_online_cpus() == num_present_cpus())
-		return 0;
+	unsigned int cpu;
 
-	pr_err("Not all CPUs online, aborting microcode update.\n");
+	/*
+	 * Make sure all CPUs are online.  It's fine for SMT to be disabled if
+	 * all the primary threads are still online.
+	 */
+	for_each_present_cpu(cpu) {
+		if (topology_is_primary_thread(cpu) && !cpu_online(cpu)) {
+			pr_err("Not all CPUs online, aborting microcode update.\n");
+			return -EINVAL;
+		}
+	}
 
-	return -EINVAL;
+	return 0;
 }
 
 static atomic_t late_cpus_in;
-- 
cgit v1.2.3-59-g8ed1b