aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig15
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S5
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S3
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S3
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S3
-rw-r--r--arch/x86/entry/calling.h36
-rw-r--r--arch/x86/entry/entry_32.S5
-rw-r--r--arch/x86/entry/entry_64.S12
-rw-r--r--arch/x86/events/intel/bts.c18
-rw-r--r--arch/x86/include/asm/asm-prototypes.h25
-rw-r--r--arch/x86/include/asm/cpufeatures.h4
-rw-r--r--arch/x86/include/asm/mshyperv.h18
-rw-r--r--arch/x86/include/asm/msr-index.h3
-rw-r--r--arch/x86/include/asm/nospec-branch.h214
-rw-r--r--arch/x86/include/asm/pci_x86.h1
-rw-r--r--arch/x86/include/asm/processor-flags.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h6
-rw-r--r--arch/x86/include/asm/xen/hypercall.h5
-rw-r--r--arch/x86/kernel/alternative.c7
-rw-r--r--arch/x86/kernel/cpu/amd.c28
-rw-r--r--arch/x86/kernel/cpu/bugs.c185
-rw-r--r--arch/x86/kernel/cpu/common.c3
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c13
-rw-r--r--arch/x86/kernel/ftrace_32.S6
-rw-r--r--arch/x86/kernel/ftrace_64.S8
-rw-r--r--arch/x86/kernel/irq_32.c9
-rw-r--r--arch/x86/kernel/tboot.c11
-rw-r--r--arch/x86/kvm/mmu.c19
-rw-r--r--arch/x86/kvm/svm.c13
-rw-r--r--arch/x86/kvm/vmx.c21
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/checksum_32.S7
-rw-r--r--arch/x86/lib/retpoline.S48
-rw-r--r--arch/x86/mm/pti.c32
-rw-r--r--arch/x86/pci/common.c5
-rw-r--r--arch/x86/pci/fixup.c29
-rw-r--r--arch/x86/platform/efi/efi_64.c2
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_bt.c2
-rw-r--r--arch/x86/xen/mmu_pv.c8
-rw-r--r--arch/x86/xen/xen-ops.h2
41 files changed, 719 insertions, 126 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 366b19cb79b7..bc2204f829d3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -55,7 +55,6 @@ config X86
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_PMEM_API if X86_64
- # Causing hangs/crashes, see the commit that added this change for details.
select ARCH_HAS_REFCOUNT
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_SET_MEMORY
@@ -89,6 +88,7 @@ config X86
select GENERIC_CLOCKEVENTS_MIN_ADJUST
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
+ select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
select GENERIC_FIND_FIRST_BIT
select GENERIC_IOMAP
@@ -430,6 +430,19 @@ config GOLDFISH
def_bool y
depends on X86_GOLDFISH
+config RETPOLINE
+ bool "Avoid speculative indirect branches in kernel"
+ default y
+ help
+ Compile kernel with the retpoline compiler options to guard against
+ kernel-to-user data leaks by avoiding speculative indirect
+ branches. Requires a compiler with -mindirect-branch=thunk-extern
+ support for full protection. The kernel may run slower.
+
+ Without compiler support, at least indirect branches in assembler
+ code are eliminated. Since this includes the syscall entry path,
+ it is not entirely pointless.
+
config INTEL_RDT
bool "Intel Resource Director Technology support"
default n
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 3e73bc255e4e..fad55160dcb9 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -230,6 +230,14 @@ KBUILD_CFLAGS += -Wno-sign-compare
#
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+# Avoid indirect branches in kernel to deal with Spectre
+ifdef CONFIG_RETPOLINE
+ RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+ ifneq ($(RETPOLINE_CFLAGS),)
+ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+ endif
+endif
+
archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 16627fec80b2..3d09e3aca18d 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -32,6 +32,7 @@
#include <linux/linkage.h>
#include <asm/inst.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
/*
* The following macros are used to move an (un)aligned 16 byte value to/from
@@ -2884,7 +2885,7 @@ ENTRY(aesni_xts_crypt8)
pxor INC, STATE4
movdqu IV, 0x30(OUTP)
- call *%r11
+ CALL_NOSPEC %r11
movdqu 0x00(OUTP), INC
pxor INC, STATE1
@@ -2929,7 +2930,7 @@ ENTRY(aesni_xts_crypt8)
_aesni_gf128mul_x_ble()
movups IV, (IVP)
- call *%r11
+ CALL_NOSPEC %r11
movdqu 0x40(OUTP), INC
pxor INC, STATE1
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index f7c495e2863c..a14af6eb09cb 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -17,6 +17,7 @@
#include <linux/linkage.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
@@ -1227,7 +1228,7 @@ camellia_xts_crypt_16way:
vpxor 14 * 16(%rax), %xmm15, %xmm14;
vpxor 15 * 16(%rax), %xmm15, %xmm15;
- call *%r9;
+ CALL_NOSPEC %r9;
addq $(16 * 16), %rsp;
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index eee5b3982cfd..b66bbfa62f50 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -12,6 +12,7 @@
#include <linux/linkage.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
@@ -1343,7 +1344,7 @@ camellia_xts_crypt_32way:
vpxor 14 * 32(%rax), %ymm15, %ymm14;
vpxor 15 * 32(%rax), %ymm15, %ymm15;
- call *%r9;
+ CALL_NOSPEC %r9;
addq $(16 * 32), %rsp;
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 7a7de27c6f41..d9b734d0c8cc 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -45,6 +45,7 @@
#include <asm/inst.h>
#include <linux/linkage.h>
+#include <asm/nospec-branch.h>
## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
@@ -172,7 +173,7 @@ continue_block:
movzxw (bufp, %rax, 2), len
lea crc_array(%rip), bufp
lea (bufp, len, 1), bufp
- jmp *bufp
+ JMP_NOSPEC bufp
################################################################
## 2a) PROCESS FULL BLOCKS:
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 45a63e00a6af..3f48f695d5e6 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -198,8 +198,11 @@ For 32-bit we have the following conventions - kernel is built with
* PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
* halves:
*/
-#define PTI_SWITCH_PGTABLES_MASK (1<<PAGE_SHIFT)
-#define PTI_SWITCH_MASK (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
+#define PTI_USER_PGTABLE_BIT PAGE_SHIFT
+#define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT)
+#define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT
+#define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT)
+#define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
.macro SET_NOFLUSH_BIT reg:req
bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
@@ -208,7 +211,7 @@ For 32-bit we have the following conventions - kernel is built with
.macro ADJUST_KERNEL_CR3 reg:req
ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
/* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
- andq $(~PTI_SWITCH_MASK), \reg
+ andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
.endm
.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
@@ -239,15 +242,19 @@ For 32-bit we have the following conventions - kernel is built with
/* Flush needed, clear the bit */
btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
movq \scratch_reg2, \scratch_reg
- jmp .Lwrcr3_\@
+ jmp .Lwrcr3_pcid_\@
.Lnoflush_\@:
movq \scratch_reg2, \scratch_reg
SET_NOFLUSH_BIT \scratch_reg
+.Lwrcr3_pcid_\@:
+ /* Flip the ASID to the user version */
+ orq $(PTI_USER_PCID_MASK), \scratch_reg
+
.Lwrcr3_\@:
- /* Flip the PGD and ASID to the user version */
- orq $(PTI_SWITCH_MASK), \scratch_reg
+ /* Flip the PGD to the user version */
+ orq $(PTI_USER_PGTABLE_MASK), \scratch_reg
mov \scratch_reg, %cr3
.Lend_\@:
.endm
@@ -263,17 +270,12 @@ For 32-bit we have the following conventions - kernel is built with
movq %cr3, \scratch_reg
movq \scratch_reg, \save_reg
/*
- * Is the "switch mask" all zero? That means that both of
- * these are zero:
- *
- * 1. The user/kernel PCID bit, and
- * 2. The user/kernel "bit" that points CR3 to the
- * bottom half of the 8k PGD
- *
- * That indicates a kernel CR3 value, not a user CR3.
+ * Test the user pagetable bit. If set, then the user page tables
+ * are active. If clear CR3 already has the kernel page table
+ * active.
*/
- testq $(PTI_SWITCH_MASK), \scratch_reg
- jz .Ldone_\@
+ bt $PTI_USER_PGTABLE_BIT, \scratch_reg
+ jnc .Ldone_\@
ADJUST_KERNEL_CR3 \scratch_reg
movq \scratch_reg, %cr3
@@ -290,7 +292,7 @@ For 32-bit we have the following conventions - kernel is built with
* KERNEL pages can always resume with NOFLUSH as we do
* explicit flushes.
*/
- bt $X86_CR3_PTI_SWITCH_BIT, \save_reg
+ bt $PTI_USER_PGTABLE_BIT, \save_reg
jnc .Lnoflush_\@
/*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index ace8f321a5a1..a1f28a54f23a 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -44,6 +44,7 @@
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
.section .entry.text, "ax"
@@ -290,7 +291,7 @@ ENTRY(ret_from_fork)
/* kernel thread */
1: movl %edi, %eax
- call *%ebx
+ CALL_NOSPEC %ebx
/*
* A kernel thread is allowed to return here after successfully
* calling do_execve(). Exit to userspace to complete the execve()
@@ -919,7 +920,7 @@ common_exception:
movl %ecx, %es
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
- call *%edi
+ CALL_NOSPEC %edi
jmp ret_from_exception
END(common_exception)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f048e384ff54..4f8e1d35a97c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -37,6 +37,7 @@
#include <asm/pgtable_types.h>
#include <asm/export.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
#include <linux/err.h>
#include "calling.h"
@@ -191,7 +192,7 @@ ENTRY(entry_SYSCALL_64_trampoline)
*/
pushq %rdi
movq $entry_SYSCALL_64_stage2, %rdi
- jmp *%rdi
+ JMP_NOSPEC %rdi
END(entry_SYSCALL_64_trampoline)
.popsection
@@ -270,7 +271,12 @@ entry_SYSCALL_64_fastpath:
* It might end up jumping to the slow path. If it jumps, RAX
* and all argument registers are clobbered.
*/
+#ifdef CONFIG_RETPOLINE
+ movq sys_call_table(, %rax, 8), %rax
+ call __x86_indirect_thunk_rax
+#else
call *sys_call_table(, %rax, 8)
+#endif
.Lentry_SYSCALL_64_after_fastpath_call:
movq %rax, RAX(%rsp)
@@ -442,7 +448,7 @@ ENTRY(stub_ptregs_64)
jmp entry_SYSCALL64_slow_path
1:
- jmp *%rax /* Called from C */
+ JMP_NOSPEC %rax /* Called from C */
END(stub_ptregs_64)
.macro ptregs_stub func
@@ -521,7 +527,7 @@ ENTRY(ret_from_fork)
1:
/* kernel thread */
movq %r12, %rdi
- call *%rbx
+ CALL_NOSPEC %rbx
/*
* A kernel thread is allowed to return here after successfully
* calling do_execve(). Exit to userspace to complete the execve()
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 141e07b06216..24ffa1e88cf9 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -582,6 +582,24 @@ static __init int bts_init(void)
if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
return -ENODEV;
+ if (boot_cpu_has(X86_FEATURE_PTI)) {
+ /*
+ * BTS hardware writes through a virtual memory map we must
+ * either use the kernel physical map, or the user mapping of
+ * the AUX buffer.
+ *
+ * However, since this driver supports per-CPU and per-task inherit
+ * we cannot use the user mapping since it will not be availble
+ * if we're not running the owning process.
+ *
+ * With PTI we can't use the kernal map either, because its not
+ * there when we run userspace.
+ *
+ * For now, disable this driver when using PTI.
+ */
+ return -ENODEV;
+ }
+
bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
PERF_PMU_CAP_EXCLUSIVE;
bts_pmu.task_ctx_nr = perf_sw_context;
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index ff700d81e91e..0927cdc4f946 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -11,7 +11,32 @@
#include <asm/pgtable.h>
#include <asm/special_insns.h>
#include <asm/preempt.h>
+#include <asm/asm.h>
#ifndef CONFIG_X86_CMPXCHG64
extern void cmpxchg8b_emu(void);
#endif
+
+#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_X86_32
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
+#else
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
+INDIRECT_THUNK(8)
+INDIRECT_THUNK(9)
+INDIRECT_THUNK(10)
+INDIRECT_THUNK(11)
+INDIRECT_THUNK(12)
+INDIRECT_THUNK(13)
+INDIRECT_THUNK(14)
+INDIRECT_THUNK(15)
+#endif
+INDIRECT_THUNK(ax)
+INDIRECT_THUNK(bx)
+INDIRECT_THUNK(cx)
+INDIRECT_THUNK(dx)
+INDIRECT_THUNK(si)
+INDIRECT_THUNK(di)
+INDIRECT_THUNK(bp)
+INDIRECT_THUNK(sp)
+#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 21ac898df2d8..f275447862f4 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -203,6 +203,8 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
+#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
@@ -342,5 +344,7 @@
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 5400add2885b..8bf450b13d9f 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -7,6 +7,7 @@
#include <linux/nmi.h>
#include <asm/io.h>
#include <asm/hyperv.h>
+#include <asm/nospec-branch.h>
/*
* The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
@@ -186,10 +187,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
return U64_MAX;
__asm__ __volatile__("mov %4, %%r8\n"
- "call *%5"
+ CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input_address)
- : "r" (output_address), "m" (hv_hypercall_pg)
+ : "r" (output_address),
+ THUNK_TARGET(hv_hypercall_pg)
: "cc", "memory", "r8", "r9", "r10", "r11");
#else
u32 input_address_hi = upper_32_bits(input_address);
@@ -200,13 +202,13 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
if (!hv_hypercall_pg)
return U64_MAX;
- __asm__ __volatile__("call *%7"
+ __asm__ __volatile__(CALL_NOSPEC
: "=A" (hv_status),
"+c" (input_address_lo), ASM_CALL_CONSTRAINT
: "A" (control),
"b" (input_address_hi),
"D"(output_address_hi), "S"(output_address_lo),
- "m" (hv_hypercall_pg)
+ THUNK_TARGET(hv_hypercall_pg)
: "cc", "memory");
#endif /* !x86_64 */
return hv_status;
@@ -227,10 +229,10 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
#ifdef CONFIG_X86_64
{
- __asm__ __volatile__("call *%4"
+ __asm__ __volatile__(CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input1)
- : "m" (hv_hypercall_pg)
+ : THUNK_TARGET(hv_hypercall_pg)
: "cc", "r8", "r9", "r10", "r11");
}
#else
@@ -238,13 +240,13 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
u32 input1_hi = upper_32_bits(input1);
u32 input1_lo = lower_32_bits(input1);
- __asm__ __volatile__ ("call *%5"
+ __asm__ __volatile__ (CALL_NOSPEC
: "=A"(hv_status),
"+c"(input1_lo),
ASM_CALL_CONSTRAINT
: "A" (control),
"b" (input1_hi),
- "m" (hv_hypercall_pg)
+ THUNK_TARGET(hv_hypercall_pg)
: "cc", "edi", "esi");
}
#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 34c4922bbc3f..e7b983a35506 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -355,6 +355,9 @@
#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
#define FAM10H_MMIO_CONF_BASE_SHIFT 20
#define MSR_FAM10H_NODE_ID 0xc001100c
+#define MSR_F10H_DECFG 0xc0011029
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
/* K8 MSRs */
#define MSR_K8_TOP_MEM1 0xc001001a
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
new file mode 100644
index 000000000000..402a11c803c3
--- /dev/null
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __NOSPEC_BRANCH_H__
+#define __NOSPEC_BRANCH_H__
+
+#include <asm/alternative.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeatures.h>
+
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
+#define RSB_FILL_LOOPS 16 /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp) \
+ mov $(nr/2), reg; \
+771: \
+ call 772f; \
+773: /* speculation trap */ \
+ pause; \
+ jmp 773b; \
+772: \
+ call 774f; \
+775: /* speculation trap */ \
+ pause; \
+ jmp 775b; \
+774: \
+ dec reg; \
+ jnz 771b; \
+ add $(BITS_PER_LONG/8) * nr, sp;
+
+#ifdef __ASSEMBLY__
+
+/*
+ * This should be used immediately before a retpoline alternative. It tells
+ * objtool where the retpolines are so that it can make sense of the control
+ * flow by just reading the original instruction(s) and ignoring the
+ * alternatives.
+ */
+.macro ANNOTATE_NOSPEC_ALTERNATIVE
+ .Lannotate_\@:
+ .pushsection .discard.nospec
+ .long .Lannotate_\@ - .
+ .popsection
+.endm
+
+/*
+ * These are the bare retpoline primitives for indirect jmp and call.
+ * Do not use these directly; they only exist to make the ALTERNATIVE
+ * invocation below less ugly.
+ */
+.macro RETPOLINE_JMP reg:req
+ call .Ldo_rop_\@
+.Lspec_trap_\@:
+ pause
+ jmp .Lspec_trap_\@
+.Ldo_rop_\@:
+ mov \reg, (%_ASM_SP)
+ ret
+.endm
+
+/*
+ * This is a wrapper around RETPOLINE_JMP so the called function in reg
+ * returns to the instruction after the macro.
+ */
+.macro RETPOLINE_CALL reg:req
+ jmp .Ldo_call_\@
+.Ldo_retpoline_jmp_\@:
+ RETPOLINE_JMP \reg
+.Ldo_call_\@:
+ call .Ldo_retpoline_jmp_\@
+.endm
+
+/*
+ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+ * indirect jmp/call which may be susceptible to the Spectre variant 2
+ * attack.
+ */
+.macro JMP_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+ ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE_2 __stringify(jmp *\reg), \
+ __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
+ __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+ jmp *\reg
+#endif
+.endm
+
+.macro CALL_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+ ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE_2 __stringify(call *\reg), \
+ __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
+ __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+ call *\reg
+#endif
+.endm
+
+ /*
+ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+ * monstrosity above, manually.
+ */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+#ifdef CONFIG_RETPOLINE
+ ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
+ \ftr
+.Lskip_rsb_\@:
+#endif
+.endm
+
+#else /* __ASSEMBLY__ */
+
+#define ANNOTATE_NOSPEC_ALTERNATIVE \
+ "999:\n\t" \
+ ".pushsection .discard.nospec\n\t" \
+ ".long 999b - .\n\t" \
+ ".popsection\n\t"
+
+#if defined(CONFIG_X86_64) && defined(RETPOLINE)
+
+/*
+ * Since the inline asm uses the %V modifier which is only in newer GCC,
+ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
+ */
+# define CALL_NOSPEC \
+ ANNOTATE_NOSPEC_ALTERNATIVE \
+ ALTERNATIVE( \
+ "call *%[thunk_target]\n", \
+ "call __x86_indirect_thunk_%V[thunk_target]\n", \
+ X86_FEATURE_RETPOLINE)
+# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
+
+#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
+/*
+ * For i386 we use the original ret-equivalent retpoline, because
+ * otherwise we'll run out of registers. We don't care about CET
+ * here, anyway.
+ */
+# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \
+ " jmp 904f;\n" \
+ " .align 16\n" \
+ "901: call 903f;\n" \
+ "902: pause;\n" \
+ " jmp 902b;\n" \
+ " .align 16\n" \
+ "903: addl $4, %%esp;\n" \
+ " pushl %[thunk_target];\n" \
+ " ret;\n" \
+ " .align 16\n" \
+ "904: call 901b;\n", \
+ X86_FEATURE_RETPOLINE)
+
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#else /* No retpoline for C / inline asm */
+# define CALL_NOSPEC "call *%[thunk_target]\n"
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#endif
+
+/* The Spectre V2 mitigation variants */
+enum spectre_v2_mitigation {
+ SPECTRE_V2_NONE,
+ SPECTRE_V2_RETPOLINE_MINIMAL,
+ SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
+ SPECTRE_V2_RETPOLINE_GENERIC,
+ SPECTRE_V2_RETPOLINE_AMD,
+ SPECTRE_V2_IBRS,
+};
+
+/*
+ * On VMEXIT we must ensure that no RSB predictions learned in the guest
+ * can be followed in the host, by overwriting the RSB completely. Both
+ * retpoline and IBRS mitigations for Spectre v2 need this; only on future
+ * CPUs with IBRS_ATT *might* it be avoided.
+ */
+static inline void vmexit_fill_RSB(void)
+{
+#ifdef CONFIG_RETPOLINE
+ unsigned long loops = RSB_CLEAR_LOOPS / 2;
+
+ asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE("jmp 910f",
+ __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+ X86_FEATURE_RETPOLINE)
+ "910:"
+ : "=&r" (loops), ASM_CALL_CONSTRAINT
+ : "r" (loops) : "memory" );
+#endif
+}
+#endif /* __ASSEMBLY__ */
+#endif /* __NOSPEC_BRANCH_H__ */
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 7a5d6695abd3..eb66fa9cd0fc 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -38,6 +38,7 @@ do { \
#define PCI_NOASSIGN_ROMS 0x80000
#define PCI_ROOT_NO_CRS 0x100000
#define PCI_NOASSIGN_BARS 0x200000
+#define PCI_BIG_ROOT_WINDOW 0x400000
extern unsigned int pci_probe;
extern unsigned long pirq_table_addr;
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 6a60fea90b9d..625a52a5594f 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -40,7 +40,7 @@
#define CR3_NOFLUSH BIT_ULL(63)
#ifdef CONFIG_PAGE_TABLE_ISOLATION
-# define X86_CR3_PTI_SWITCH_BIT 11
+# define X86_CR3_PTI_PCID_USER_BIT 11
#endif
#else
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 4a08dd2ab32a..d33e4a26dc7e 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -81,13 +81,13 @@ static inline u16 kern_pcid(u16 asid)
* Make sure that the dynamic ASID space does not confict with the
* bit we are using to switch between user and kernel ASIDs.
*/
- BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
+ BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
/*
* The ASID being passed in here should have respected the
* MAX_ASID_AVAILABLE and thus never have the switch bit set.
*/
- VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
+ VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
#endif
/*
* The dynamically-assigned ASIDs that get passed in are small
@@ -112,7 +112,7 @@ static inline u16 user_pcid(u16 asid)
{
u16 ret = kern_pcid(asid);
#ifdef CONFIG_PAGE_TABLE_ISOLATION
- ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
+ ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
#endif
return ret;
}
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 7cb282e9e587..bfd882617613 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -44,6 +44,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/smap.h>
+#include <asm/nospec-branch.h>
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
@@ -217,9 +218,9 @@ privcmd_call(unsigned call,
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
stac();
- asm volatile("call *%[call]"
+ asm volatile(CALL_NOSPEC
: __HYPERCALL_5PARAM
- : [call] "a" (&hypercall_page[call])
+ : [thunk_target] "a" (&hypercall_page[call])
: __HYPERCALL_CLOBBER5);
clac();
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index dbaf14d69ebd..4817d743c263 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -344,9 +344,12 @@ done:
static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
{
unsigned long flags;
+ int i;
- if (instr[0] != 0x90)
- return;
+ for (i = 0; i < a->padlen; i++) {
+ if (instr[i] != 0x90)
+ return;
+ }
local_irq_save(flags);
add_nops(instr + (a->instrlen - a->padlen), a->padlen);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bcb75dc97d44..ea831c858195 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -829,8 +829,32 @@ static void init_amd(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_K8);
if (cpu_has(c, X86_FEATURE_XMM2)) {
- /* MFENCE stops RDTSC speculation */
- set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ unsigned long long val;
+ int ret;
+
+ /*
+ * A serializing LFENCE has less overhead than MFENCE, so
+ * use it for execution serialization. On families which
+ * don't have that MSR, LFENCE is already serializing.
+ * msr_set_bit() uses the safe accessors, too, even if the MSR
+ * is not present.
+ */
+ msr_set_bit(MSR_F10H_DECFG,
+ MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+
+ /*
+ * Verify that the MSR write was successful (could be running
+ * under a hypervisor) and only then assume that LFENCE is
+ * serializing.
+ */
+ ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
+ if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
+ /* A serializing LFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+ } else {
+ /* MFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ }
}
/*
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ba0b2424c9b0..e4dc26185aa7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -10,6 +10,10 @@
*/
#include <linux/init.h>
#include <linux/utsname.h>
+#include <linux/cpu.h>
+
+#include <asm/nospec-branch.h>
+#include <asm/cmdline.h>
#include <asm/bugs.h>
#include <asm/processor.h>
#include <asm/processor-flags.h>
@@ -20,6 +24,8 @@
#include <asm/pgtable.h>
#include <asm/set_memory.h>
+static void __init spectre_v2_select_mitigation(void);
+
void __init check_bugs(void)
{
identify_boot_cpu();
@@ -29,6 +35,9 @@ void __init check_bugs(void)
print_cpu_info(&boot_cpu_data);
}
+ /* Select the proper spectre mitigation before patching alternatives */
+ spectre_v2_select_mitigation();
+
#ifdef CONFIG_X86_32
/*
* Check whether we are able to run this kernel safely on SMP.
@@ -60,3 +69,179 @@ void __init check_bugs(void)
set_memory_4k((unsigned long)__va(0), 1);
#endif
}
+
+/* The kernel command line selection */
+enum spectre_v2_mitigation_cmd {
+ SPECTRE_V2_CMD_NONE,
+ SPECTRE_V2_CMD_AUTO,
+ SPECTRE_V2_CMD_FORCE,
+ SPECTRE_V2_CMD_RETPOLINE,
+ SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+ SPECTRE_V2_CMD_RETPOLINE_AMD,
+};
+
+static const char *spectre_v2_strings[] = {
+ [SPECTRE_V2_NONE] = "Vulnerable",
+ [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
+ [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
+ [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
+ [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
+};
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
+
+static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+
+static void __init spec2_print_if_insecure(const char *reason)
+{
+ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ pr_info("%s\n", reason);
+}
+
+static void __init spec2_print_if_secure(const char *reason)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ pr_info("%s\n", reason);
+}
+
+static inline bool retp_compiler(void)
+{
+ return __is_defined(RETPOLINE);
+}
+
+static inline bool match_option(const char *arg, int arglen, const char *opt)
+{
+ int len = strlen(opt);
+
+ return len == arglen && !strncmp(arg, opt, len);
+}
+
+static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+{
+ char arg[20];
+ int ret;
+
+ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
+ sizeof(arg));
+ if (ret > 0) {
+ if (match_option(arg, ret, "off")) {
+ goto disable;
+ } else if (match_option(arg, ret, "on")) {
+ spec2_print_if_secure("force enabled on command line.");
+ return SPECTRE_V2_CMD_FORCE;
+ } else if (match_option(arg, ret, "retpoline")) {
+ spec2_print_if_insecure("retpoline selected on command line.");
+ return SPECTRE_V2_CMD_RETPOLINE;
+ } else if (match_option(arg, ret, "retpoline,amd")) {
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+ pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
+ return SPECTRE_V2_CMD_AUTO;
+ }
+ spec2_print_if_insecure("AMD retpoline selected on command line.");
+ return SPECTRE_V2_CMD_RETPOLINE_AMD;
+ } else if (match_option(arg, ret, "retpoline,generic")) {
+ spec2_print_if_insecure("generic retpoline selected on command line.");
+ return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
+ } else if (match_option(arg, ret, "auto")) {
+ return SPECTRE_V2_CMD_AUTO;
+ }
+ }
+
+ if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+ return SPECTRE_V2_CMD_AUTO;
+disable:
+ spec2_print_if_insecure("disabled on command line.");
+ return SPECTRE_V2_CMD_NONE;
+}
+
+static void __init spectre_v2_select_mitigation(void)
+{
+ enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+ enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
+
+ /*
+ * If the CPU is not affected and the command line mode is NONE or AUTO
+ * then nothing to do.
+ */
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
+ (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
+ return;
+
+ switch (cmd) {
+ case SPECTRE_V2_CMD_NONE:
+ return;
+
+ case SPECTRE_V2_CMD_FORCE:
+ /* FALLTRHU */
+ case SPECTRE_V2_CMD_AUTO:
+ goto retpoline_auto;
+
+ case SPECTRE_V2_CMD_RETPOLINE_AMD:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_amd;
+ break;
+ case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_generic;
+ break;
+ case SPECTRE_V2_CMD_RETPOLINE:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_auto;
+ break;
+ }
+ pr_err("kernel not compiled with retpoline; no mitigation available!");
+ return;
+
+retpoline_auto:
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ retpoline_amd:
+ if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+ pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+ goto retpoline_generic;
+ }
+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
+ SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ } else {
+ retpoline_generic:
+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
+ SPECTRE_V2_RETPOLINE_MINIMAL;
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ }
+
+ spectre_v2_enabled = mode;
+ pr_info("%s\n", spectre_v2_strings[mode]);
+}
+
+#undef pr_fmt
+
+#ifdef CONFIG_SYSFS
+ssize_t cpu_show_meltdown(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+ return sprintf(buf, "Not affected\n");
+ if (boot_cpu_has(X86_FEATURE_PTI))
+ return sprintf(buf, "Mitigation: PTI\n");
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
+ return sprintf(buf, "Not affected\n");
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
+}
+#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 39d7ea865207..ef29ad001991 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -926,6 +926,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
if (c->x86_vendor != X86_VENDOR_AMD)
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
fpu__init_system(c);
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 8ccdca6d3f9e..d9e460fc7a3b 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -910,8 +910,17 @@ static bool is_blacklisted(unsigned int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
- if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
- pr_err_once("late loading on model 79 is disabled.\n");
+ /*
+ * Late loading on model 79 with microcode revision less than 0x0b000021
+ * may result in a system hang. This behavior is documented in item
+ * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
+ */
+ if (c->x86 == 6 &&
+ c->x86_model == INTEL_FAM6_BROADWELL_X &&
+ c->x86_mask == 0x01 &&
+ c->microcode < 0x0b000021) {
+ pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+ pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
return true;
}
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index b6c6468e10bc..4c8440de3355 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -8,6 +8,7 @@
#include <asm/segment.h>
#include <asm/export.h>
#include <asm/ftrace.h>
+#include <asm/nospec-branch.h>
#ifdef CC_USING_FENTRY
# define function_hook __fentry__
@@ -197,7 +198,8 @@ ftrace_stub:
movl 0x4(%ebp), %edx
subl $MCOUNT_INSN_SIZE, %eax
- call *ftrace_trace_function
+ movl ftrace_trace_function, %ecx
+ CALL_NOSPEC %ecx
popl %edx
popl %ecx
@@ -241,5 +243,5 @@ return_to_handler:
movl %eax, %ecx
popl %edx
popl %eax
- jmp *%ecx
+ JMP_NOSPEC %ecx
#endif
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index c832291d948a..7cb8ba08beb9 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -7,7 +7,7 @@
#include <asm/ptrace.h>
#include <asm/ftrace.h>
#include <asm/export.h>
-
+#include <asm/nospec-branch.h>
.code64
.section .entry.text, "ax"
@@ -286,8 +286,8 @@ trace:
* ip and parent ip are used and the list function is called when
* function tracing is enabled.
*/
- call *ftrace_trace_function
-
+ movq ftrace_trace_function, %r8
+ CALL_NOSPEC %r8
restore_mcount_regs
jmp fgraph_trace
@@ -329,5 +329,5 @@ GLOBAL(return_to_handler)
movq 8(%rsp), %rdx
movq (%rsp), %rax
addq $24, %rsp
- jmp *%rdi
+ JMP_NOSPEC %rdi
#endif
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a83b3346a0e1..c1bdbd3d3232 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -20,6 +20,7 @@
#include <linux/mm.h>
#include <asm/apic.h>
+#include <asm/nospec-branch.h>
#ifdef CONFIG_DEBUG_STACKOVERFLOW
@@ -55,11 +56,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
static void call_on_stack(void *func, void *stack)
{
asm volatile("xchgl %%ebx,%%esp \n"
- "call *%%edi \n"
+ CALL_NOSPEC
"movl %%ebx,%%esp \n"
: "=b" (stack)
: "0" (stack),
- "D"(func)
+ [thunk_target] "D"(func)
: "memory", "cc", "edx", "ecx", "eax");
}
@@ -95,11 +96,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
call_on_stack(print_stack_overflow, isp);
asm volatile("xchgl %%ebx,%%esp \n"
- "call *%%edi \n"
+ CALL_NOSPEC
"movl %%ebx,%%esp \n"
: "=a" (arg1), "=b" (isp)
: "0" (desc), "1" (isp),
- "D" (desc->handle_irq)
+ [thunk_target] "D" (desc->handle_irq)
: "memory", "cc", "ecx");
return 1;
}
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index a4eb27918ceb..a2486f444073 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -138,6 +138,17 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
return -1;
set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
pte_unmap(pte);
+
+ /*
+ * PTI poisons low addresses in the kernel page tables in the
+ * name of making them unusable for userspace. To execute
+ * code at such a low address, the poison must be cleared.
+ *
+ * Note: 'pgd' actually gets set in p4d_alloc() _or_
+ * pud_alloc() depending on 4/5-level paging.
+ */
+ pgd->pgd &= ~_PAGE_NX;
+
return 0;
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index c4deb1f34faa..2b8eb4da4d08 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3781,7 +3781,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
{
if (unlikely(!lapic_in_kernel(vcpu) ||
- kvm_event_needs_reinjection(vcpu)))
+ kvm_event_needs_reinjection(vcpu) ||
+ vcpu->arch.exception.pending))
return false;
if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
@@ -5465,30 +5466,34 @@ static void mmu_destroy_caches(void)
int kvm_mmu_module_init(void)
{
+ int ret = -ENOMEM;
+
kvm_mmu_clear_all_pte_masks();
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
sizeof(struct pte_list_desc),
0, SLAB_ACCOUNT, NULL);
if (!pte_list_desc_cache)
- goto nomem;
+ goto out;
mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
sizeof(struct kvm_mmu_page),
0, SLAB_ACCOUNT, NULL);
if (!mmu_page_header_cache)
- goto nomem;
+ goto out;
if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
- goto nomem;
+ goto out;
- register_shrinker(&mmu_shrinker);
+ ret = register_shrinker(&mmu_shrinker);
+ if (ret)
+ goto out;
return 0;
-nomem:
+out:
mmu_destroy_caches();
- return -ENOMEM;
+ return ret;
}
/*
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index bb31c801f1fc..f40d0da1f1d3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -45,6 +45,7 @@
#include <asm/debugreg.h>
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
+#include <asm/nospec-branch.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -361,7 +362,6 @@ static void recalc_intercepts(struct vcpu_svm *svm)
{
struct vmcb_control_area *c, *h;
struct nested_state *g;
- u32 h_intercept_exceptions;
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
@@ -372,14 +372,9 @@ static void recalc_intercepts(struct vcpu_svm *svm)
h = &svm->nested.hsave->control;
g = &svm->nested;
- /* No need to intercept #UD if L1 doesn't intercept it */
- h_intercept_exceptions =
- h->intercept_exceptions & ~(1U << UD_VECTOR);
-
c->intercept_cr = h->intercept_cr | g->intercept_cr;
c->intercept_dr = h->intercept_dr | g->intercept_dr;
- c->intercept_exceptions =
- h_intercept_exceptions | g->intercept_exceptions;
+ c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
c->intercept = h->intercept | g->intercept;
}
@@ -2202,7 +2197,6 @@ static int ud_interception(struct vcpu_svm *svm)
{
int er;
- WARN_ON_ONCE(is_guest_mode(&svm->vcpu));
er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
if (er == EMULATE_USER_EXIT)
return 0;
@@ -5034,6 +5028,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, svm->host.gs_base);
#else
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5c14d65f676a..c829d89e2e63 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -50,6 +50,7 @@
#include <asm/apic.h>
#include <asm/irq_remapping.h>
#include <asm/mmu_context.h>
+#include <asm/nospec-branch.h>
#include "trace.h"
#include "pmu.h"
@@ -899,8 +900,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
{
BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
- vmcs_field_to_offset_table[field] == 0)
+ if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
+ return -ENOENT;
+
+ /*
+ * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
+ * generic mechanism.
+ */
+ asm("lfence");
+
+ if (vmcs_field_to_offset_table[field] == 0)
return -ENOENT;
return vmcs_field_to_offset_table[field];
@@ -1887,7 +1896,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
{
u32 eb;
- eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) |
+ eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
(1u << DB_VECTOR) | (1u << AC_VECTOR);
if ((vcpu->guest_debug &
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
@@ -1905,8 +1914,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
*/
if (is_guest_mode(vcpu))
eb |= get_vmcs12(vcpu)->exception_bitmap;
- else
- eb |= 1u << UD_VECTOR;
vmcs_write32(EXCEPTION_BITMAP, eb);
}
@@ -5917,7 +5924,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
return 1; /* already handled by vmx_vcpu_run() */
if (is_invalid_opcode(intr_info)) {
- WARN_ON_ONCE(is_guest_mode(vcpu));
er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
if (er == EMULATE_USER_EXIT)
return 0;
@@ -9485,6 +9491,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
if (debugctlmsr)
update_debugctlmsr(debugctlmsr);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 171377b83be1..25a972c61b0a 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -27,6 +27,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+lib-$(CONFIG_RETPOLINE) += retpoline.o
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 4d34bb548b41..46e71a74e612 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -29,7 +29,8 @@
#include <asm/errno.h>
#include <asm/asm.h>
#include <asm/export.h>
-
+#include <asm/nospec-branch.h>
+
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
*/
@@ -156,7 +157,7 @@ ENTRY(csum_partial)
negl %ebx
lea 45f(%ebx,%ebx,2), %ebx
testl %esi, %esi
- jmp *%ebx
+ JMP_NOSPEC %ebx
# Handle 2-byte-aligned regions
20: addw (%esi), %ax
@@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic)
andl $-32,%edx
lea 3f(%ebx,%ebx), %ebx
testl %esi, %esi
- jmp *%ebx
+ JMP_NOSPEC %ebx
1: addl $64,%esi
addl $64,%edi
SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
new file mode 100644
index 000000000000..cb45c6cb465f
--- /dev/null
+++ b/arch/x86/lib/retpoline.S
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeatures.h>
+#include <asm/alternative-asm.h>
+#include <asm/export.h>
+#include <asm/nospec-branch.h>
+
+.macro THUNK reg
+ .section .text.__x86.indirect_thunk.\reg
+
+ENTRY(__x86_indirect_thunk_\reg)
+ CFI_STARTPROC
+ JMP_NOSPEC %\reg
+ CFI_ENDPROC
+ENDPROC(__x86_indirect_thunk_\reg)
+.endm
+
+/*
+ * Despite being an assembler file we can't just use .irp here
+ * because __KSYM_DEPS__ only uses the C preprocessor and would
+ * only see one instance of "__x86_indirect_thunk_\reg" rather
+ * than one per register with the correct names. So we do it
+ * the simple and nasty way...
+ */
+#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
+#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
+
+GENERATE_THUNK(_ASM_AX)
+GENERATE_THUNK(_ASM_BX)
+GENERATE_THUNK(_ASM_CX)
+GENERATE_THUNK(_ASM_DX)
+GENERATE_THUNK(_ASM_SI)
+GENERATE_THUNK(_ASM_DI)
+GENERATE_THUNK(_ASM_BP)
+GENERATE_THUNK(_ASM_SP)
+#ifdef CONFIG_64BIT
+GENERATE_THUNK(r8)
+GENERATE_THUNK(r9)
+GENERATE_THUNK(r10)
+GENERATE_THUNK(r11)
+GENERATE_THUNK(r12)
+GENERATE_THUNK(r13)
+GENERATE_THUNK(r14)
+GENERATE_THUNK(r15)
+#endif
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 43d4a4a29037..ce38f165489b 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -149,7 +149,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
*
* Returns a pointer to a P4D on success, or NULL on failure.
*/
-static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
{
pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
@@ -164,12 +164,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
if (!new_p4d_page)
return NULL;
- if (pgd_none(*pgd)) {
- set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
- new_p4d_page = 0;
- }
- if (new_p4d_page)
- free_page(new_p4d_page);
+ set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
}
BUILD_BUG_ON(pgd_large(*pgd) != 0);
@@ -182,7 +177,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
*
* Returns a pointer to a PMD on success, or NULL on failure.
*/
-static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
{
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
@@ -194,12 +189,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
if (!new_pud_page)
return NULL;
- if (p4d_none(*p4d)) {
- set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
- new_pud_page = 0;
- }
- if (new_pud_page)
- free_page(new_pud_page);
+ set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
}
pud = pud_offset(p4d, address);
@@ -213,12 +203,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
if (!new_pmd_page)
return NULL;
- if (pud_none(*pud)) {
- set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
- new_pmd_page = 0;
- }
- if (new_pmd_page)
- free_page(new_pmd_page);
+ set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
}
return pmd_offset(pud, address);
@@ -251,12 +236,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
if (!new_pte_page)
return NULL;
- if (pmd_none(*pmd)) {
- set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
- new_pte_page = 0;
- }
- if (new_pte_page)
- free_page(new_pte_page);
+ set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
}
pte = pte_offset_kernel(pmd, address);
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 7a5350d08cef..563049c483a1 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -594,6 +594,11 @@ char *__init pcibios_setup(char *str)
} else if (!strcmp(str, "nocrs")) {
pci_probe |= PCI_ROOT_NO_CRS;
return NULL;
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+ } else if (!strcmp(str, "big_root_window")) {
+ pci_probe |= PCI_BIG_ROOT_WINDOW;
+ return NULL;
+#endif
} else if (!strcmp(str, "earlydump")) {
pci_early_dump_regs = 1;
return NULL;
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index e663d6bf1328..f6a26e3cb476 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -662,10 +662,14 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);
*/
static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
{
- unsigned i;
u32 base, limit, high;
- struct resource *res, *conflict;
struct pci_dev *other;
+ struct resource *res;
+ unsigned i;
+ int r;
+
+ if (!(pci_probe & PCI_BIG_ROOT_WINDOW))
+ return;
/* Check that we are the only device of that type */
other = pci_get_device(dev->vendor, dev->device, NULL);
@@ -699,22 +703,25 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
if (!res)
return;
+ /*
+ * Allocate a 256GB window directly below the 0xfd00000000 hardware
+ * limit (see AMD Family 15h Models 30h-3Fh BKDG, sec 2.4.6).
+ */
res->name = "PCI Bus 0000:00";
res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM |
IORESOURCE_MEM_64 | IORESOURCE_WINDOW;
- res->start = 0x100000000ull;
+ res->start = 0xbd00000000ull;
res->end = 0xfd00000000ull - 1;
- /* Just grab the free area behind system memory for this */
- while ((conflict = request_resource_conflict(&iomem_resource, res))) {
- if (conflict->end >= res->end) {
- kfree(res);
- return;
- }
- res->start = conflict->end + 1;
+ r = request_resource(&iomem_resource, res);
+ if (r) {
+ kfree(res);
+ return;
}
- dev_info(&dev->dev, "adding root bus resource %pR\n", res);
+ dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n",
+ res);
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) |
AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index d87ac96e37ed..2dd15e967c3f 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -135,7 +135,9 @@ pgd_t * __init efi_call_phys_prolog(void)
pud[j] = *pud_offset(p4d_k, vaddr);
}
}
+ pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
}
+
out:
__flush_tlb_all();
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
index dc036e511f48..5a0483e7bf66 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
@@ -60,7 +60,7 @@ static int __init tng_bt_sfi_setup(struct bt_sfi_data *ddata)
return 0;
}
-static const struct bt_sfi_data tng_bt_sfi_data __initdata = {
+static struct bt_sfi_data tng_bt_sfi_data __initdata = {
.setup = tng_bt_sfi_setup,
};
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 4d62c071b166..d85076223a69 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1325,20 +1325,18 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
{
struct {
struct mmuext_op op;
-#ifdef CONFIG_SMP
- DECLARE_BITMAP(mask, num_processors);
-#else
DECLARE_BITMAP(mask, NR_CPUS);
-#endif
} *args;
struct multicall_space mcs;
+ const size_t mc_entry_size = sizeof(args->op) +
+ sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus());
trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
if (cpumask_empty(cpus))
return; /* nothing to do */
- mcs = xen_mc_entry(sizeof(*args));
+ mcs = xen_mc_entry(mc_entry_size);
args = mcs.args;
args->op.arg2.vcpumask = to_cpumask(args->mask);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 75011b80660f..3b34745d0a52 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -72,7 +72,7 @@ u64 xen_clocksource_read(void);
void xen_setup_cpu_clockevents(void);
void xen_save_time_memory_area(void);
void xen_restore_time_memory_area(void);
-void __init xen_init_time_ops(void);
+void __ref xen_init_time_ops(void);
void __init xen_hvm_init_time_ops(void);
irqreturn_t xen_debug_interrupt(int irq, void *dev_id);