aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/crypto/blake2s-glue.c10
-rw-r--r--arch/x86/crypto/chacha_glue.c14
-rw-r--r--arch/x86/crypto/nhpoly1305-avx2-glue.c2
-rw-r--r--arch/x86/crypto/nhpoly1305-sse2-glue.c2
-rw-r--r--arch/x86/crypto/poly1305_glue.c13
-rw-r--r--arch/x86/entry/calling.h40
-rw-r--r--arch/x86/entry/entry_64.S14
-rw-r--r--arch/x86/include/asm/ftrace.h11
-rw-r--r--arch/x86/include/asm/kvm_host.h4
-rw-r--r--arch/x86/include/asm/unwind.h2
-rw-r--r--arch/x86/kernel/apic/apic.c27
-rw-r--r--arch/x86/kernel/dumpstack_64.c3
-rw-r--r--arch/x86/kernel/ftrace.c29
-rw-r--r--arch/x86/kernel/unwind_frame.c3
-rw-r--r--arch/x86/kernel/unwind_orc.c113
-rw-r--r--arch/x86/kvm/ioapic.c10
-rw-r--r--arch/x86/kvm/svm/sev.c2
-rw-r--r--arch/x86/kvm/svm/svm.c2
-rw-r--r--arch/x86/kvm/vmx/nested.c2
-rw-r--r--arch/x86/kvm/vmx/vmenter.S3
-rw-r--r--arch/x86/kvm/x86.c21
-rw-r--r--arch/x86/mm/init_64.c3
-rw-r--r--arch/x86/mm/pat/set_memory.c12
23 files changed, 213 insertions, 129 deletions
diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
index 06ef2d4a4701..6737bcea1fa1 100644
--- a/arch/x86/crypto/blake2s-glue.c
+++ b/arch/x86/crypto/blake2s-glue.c
@@ -32,16 +32,16 @@ void blake2s_compress_arch(struct blake2s_state *state,
const u32 inc)
{
/* SIMD disables preemption, so relax after processing each page. */
- BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
+ BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
blake2s_compress_generic(state, block, nblocks, inc);
return;
}
- for (;;) {
+ do {
const size_t blocks = min_t(size_t, nblocks,
- PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
+ SZ_4K / BLAKE2S_BLOCK_SIZE);
kernel_fpu_begin();
if (IS_ENABLED(CONFIG_AS_AVX512) &&
@@ -52,10 +52,8 @@ void blake2s_compress_arch(struct blake2s_state *state,
kernel_fpu_end();
nblocks -= blocks;
- if (!nblocks)
- break;
block += blocks * BLAKE2S_BLOCK_SIZE;
- }
+ } while (nblocks);
}
EXPORT_SYMBOL(blake2s_compress_arch);
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index b412c21ee06e..22250091cdbe 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -153,9 +153,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
bytes <= CHACHA_BLOCK_SIZE)
return chacha_crypt_generic(state, dst, src, bytes, nrounds);
- kernel_fpu_begin();
- chacha_dosimd(state, dst, src, bytes, nrounds);
- kernel_fpu_end();
+ do {
+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
+
+ kernel_fpu_begin();
+ chacha_dosimd(state, dst, src, todo, nrounds);
+ kernel_fpu_end();
+
+ bytes -= todo;
+ src += todo;
+ dst += todo;
+ } while (bytes);
}
EXPORT_SYMBOL(chacha_crypt_arch);
diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c
index f7567cbd35b6..80fcb85736e1 100644
--- a/arch/x86/crypto/nhpoly1305-avx2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c
@@ -29,7 +29,7 @@ static int nhpoly1305_avx2_update(struct shash_desc *desc,
return crypto_nhpoly1305_update(desc, src, srclen);
do {
- unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE);
+ unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_fpu_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_avx2);
diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c
index a661ede3b5cf..cc6b7c1a2705 100644
--- a/arch/x86/crypto/nhpoly1305-sse2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c
@@ -29,7 +29,7 @@ static int nhpoly1305_sse2_update(struct shash_desc *desc,
return crypto_nhpoly1305_update(desc, src, srclen);
do {
- unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE);
+ unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_fpu_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_sse2);
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 6dfec19f7d57..dfe921efa9b2 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -91,8 +91,8 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
struct poly1305_arch_internal *state = ctx;
/* SIMD disables preemption, so relax after processing each page. */
- BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
- PAGE_SIZE % POLY1305_BLOCK_SIZE);
+ BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE ||
+ SZ_4K % POLY1305_BLOCK_SIZE);
if (!static_branch_likely(&poly1305_use_avx) ||
(len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
@@ -102,8 +102,8 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
return;
}
- for (;;) {
- const size_t bytes = min_t(size_t, len, PAGE_SIZE);
+ do {
+ const size_t bytes = min_t(size_t, len, SZ_4K);
kernel_fpu_begin();
if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512))
@@ -113,11 +113,10 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
else
poly1305_blocks_avx(ctx, inp, bytes, padbit);
kernel_fpu_end();
+
len -= bytes;
- if (!len)
- break;
inp += bytes;
- }
+ } while (len);
}
static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 0789e13ece90..1c7f13bb6728 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -98,13 +98,6 @@ For 32-bit we have the following conventions - kernel is built with
#define SIZEOF_PTREGS 21*8
.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
- /*
- * Push registers and sanitize registers of values that a
- * speculation attack might otherwise want to exploit. The
- * lower registers are likely clobbered well before they
- * could be put to use in a speculative execution gadget.
- * Interleave XOR with PUSH for better uop scheduling:
- */
.if \save_ret
pushq %rsi /* pt_regs->si */
movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
@@ -114,34 +107,43 @@ For 32-bit we have the following conventions - kernel is built with
pushq %rsi /* pt_regs->si */
.endif
pushq \rdx /* pt_regs->dx */
- xorl %edx, %edx /* nospec dx */
pushq %rcx /* pt_regs->cx */
- xorl %ecx, %ecx /* nospec cx */
pushq \rax /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
- xorl %r8d, %r8d /* nospec r8 */
pushq %r9 /* pt_regs->r9 */
- xorl %r9d, %r9d /* nospec r9 */
pushq %r10 /* pt_regs->r10 */
- xorl %r10d, %r10d /* nospec r10 */
pushq %r11 /* pt_regs->r11 */
- xorl %r11d, %r11d /* nospec r11*/
pushq %rbx /* pt_regs->rbx */
- xorl %ebx, %ebx /* nospec rbx*/
pushq %rbp /* pt_regs->rbp */
- xorl %ebp, %ebp /* nospec rbp*/
pushq %r12 /* pt_regs->r12 */
- xorl %r12d, %r12d /* nospec r12*/
pushq %r13 /* pt_regs->r13 */
- xorl %r13d, %r13d /* nospec r13*/
pushq %r14 /* pt_regs->r14 */
- xorl %r14d, %r14d /* nospec r14*/
pushq %r15 /* pt_regs->r15 */
- xorl %r15d, %r15d /* nospec r15*/
UNWIND_HINT_REGS
+
.if \save_ret
pushq %rsi /* return address on top of stack */
.endif
+
+ /*
+ * Sanitize registers of values that a speculation attack might
+ * otherwise want to exploit. The lower registers are likely clobbered
+ * well before they could be put to use in a speculative execution
+ * gadget.
+ */
+ xorl %edx, %edx /* nospec dx */
+ xorl %ecx, %ecx /* nospec cx */
+ xorl %r8d, %r8d /* nospec r8 */
+ xorl %r9d, %r9d /* nospec r9 */
+ xorl %r10d, %r10d /* nospec r10 */
+ xorl %r11d, %r11d /* nospec r11 */
+ xorl %ebx, %ebx /* nospec rbx */
+ xorl %ebp, %ebp /* nospec rbp */
+ xorl %r12d, %r12d /* nospec r12 */
+ xorl %r13d, %r13d /* nospec r13 */
+ xorl %r14d, %r14d /* nospec r14 */
+ xorl %r15d, %r15d /* nospec r15 */
+
.endm
.macro POP_REGS pop_rdi=1 skip_r11rcx=0
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 0e9504fabe52..3063aa9090f9 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -249,7 +249,6 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
*/
syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
- UNWIND_HINT_EMPTY
POP_REGS pop_rdi=0 skip_r11rcx=1
/*
@@ -258,6 +257,7 @@ syscall_return_via_sysret:
*/
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+ UNWIND_HINT_EMPTY
pushq RSP-RDI(%rdi) /* RSP */
pushq (%rdi) /* RDI */
@@ -279,8 +279,7 @@ SYM_CODE_END(entry_SYSCALL_64)
* %rdi: prev task
* %rsi: next task
*/
-SYM_CODE_START(__switch_to_asm)
- UNWIND_HINT_FUNC
+SYM_FUNC_START(__switch_to_asm)
/*
* Save callee-saved registers
* This must match the order in inactive_task_frame
@@ -321,7 +320,7 @@ SYM_CODE_START(__switch_to_asm)
popq %rbp
jmp __switch_to
-SYM_CODE_END(__switch_to_asm)
+SYM_FUNC_END(__switch_to_asm)
/*
* A newly forked process directly context switches into this address.
@@ -512,7 +511,7 @@ SYM_CODE_END(spurious_entries_start)
* +----------------------------------------------------+
*/
SYM_CODE_START(interrupt_entry)
- UNWIND_HINT_FUNC
+ UNWIND_HINT_IRET_REGS offset=16
ASM_CLAC
cld
@@ -544,9 +543,9 @@ SYM_CODE_START(interrupt_entry)
pushq 5*8(%rdi) /* regs->eflags */
pushq 4*8(%rdi) /* regs->cs */
pushq 3*8(%rdi) /* regs->ip */
+ UNWIND_HINT_IRET_REGS
pushq 2*8(%rdi) /* regs->orig_ax */
pushq 8(%rdi) /* return address */
- UNWIND_HINT_FUNC
movq (%rdi), %rdi
jmp 2f
@@ -637,6 +636,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
*/
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+ UNWIND_HINT_EMPTY
/* Copy the IRET frame to the trampoline stack. */
pushq 6*8(%rdi) /* SS */
@@ -1739,7 +1739,7 @@ SYM_CODE_START(rewind_stack_do_exit)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
leaq -PTREGS_SIZE(%rax), %rsp
- UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
+ UNWIND_HINT_REGS
call do_exit
SYM_CODE_END(rewind_stack_do_exit)
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 85be2f506272..84b9449be080 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -56,16 +56,23 @@ struct dyn_arch_ftrace {
#ifndef __ASSEMBLY__
+#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
+extern void set_ftrace_ops_ro(void);
+#else
+static inline void set_ftrace_ops_ro(void) { }
+#endif
+
#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
{
/*
* Compare the symbol name with the system call name. Skip the
- * "__x64_sys", "__ia32_sys" or simple "sys" prefix.
+ * "__x64_sys", "__ia32_sys", "__do_sys" or simple "sys" prefix.
*/
return !strcmp(sym + 3, name + 3) ||
(!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) ||
- (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3));
+ (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)) ||
+ (!strncmp(sym, "__do_sys", 8) && !strcmp(sym + 8, name + 3));
}
#ifndef COMPILE_OFFSETS
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 42a2d0d3984a..0dea9f122bb9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1663,8 +1663,8 @@ void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
{
/* We can only post Fixed and LowPrio IRQs */
- return (irq->delivery_mode == dest_Fixed ||
- irq->delivery_mode == dest_LowestPrio);
+ return (irq->delivery_mode == APIC_DM_FIXED ||
+ irq->delivery_mode == APIC_DM_LOWEST);
}
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 499578f7e6d7..70fc159ebe69 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -19,7 +19,7 @@ struct unwind_state {
#if defined(CONFIG_UNWINDER_ORC)
bool signal, full_regs;
unsigned long sp, bp, ip;
- struct pt_regs *regs;
+ struct pt_regs *regs, *prev_regs;
#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
bool got_irq;
unsigned long *bp, *orig_sp, ip;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 81b9c63dae1b..e53dda210cd7 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -352,8 +352,6 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
* According to Intel, MFENCE can do the serialization here.
*/
asm volatile("mfence" : : : "memory");
-
- printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
return;
}
@@ -546,7 +544,7 @@ static struct clock_event_device lapic_clockevent = {
};
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
-static u32 hsx_deadline_rev(void)
+static __init u32 hsx_deadline_rev(void)
{
switch (boot_cpu_data.x86_stepping) {
case 0x02: return 0x3a; /* EP */
@@ -556,7 +554,7 @@ static u32 hsx_deadline_rev(void)
return ~0U;
}
-static u32 bdx_deadline_rev(void)
+static __init u32 bdx_deadline_rev(void)
{
switch (boot_cpu_data.x86_stepping) {
case 0x02: return 0x00000011;
@@ -568,7 +566,7 @@ static u32 bdx_deadline_rev(void)
return ~0U;
}
-static u32 skx_deadline_rev(void)
+static __init u32 skx_deadline_rev(void)
{
switch (boot_cpu_data.x86_stepping) {
case 0x03: return 0x01000136;
@@ -581,7 +579,7 @@ static u32 skx_deadline_rev(void)
return ~0U;
}
-static const struct x86_cpu_id deadline_match[] = {
+static const struct x86_cpu_id deadline_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X, &hsx_deadline_rev),
X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020),
X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D, &bdx_deadline_rev),
@@ -603,18 +601,19 @@ static const struct x86_cpu_id deadline_match[] = {
{},
};
-static void apic_check_deadline_errata(void)
+static __init bool apic_validate_deadline_timer(void)
{
const struct x86_cpu_id *m;
u32 rev;
- if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
- boot_cpu_has(X86_FEATURE_HYPERVISOR))
- return;
+ if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return false;
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return true;
m = x86_match_cpu(deadline_match);
if (!m)
- return;
+ return true;
/*
* Function pointers will have the MSB set due to address layout,
@@ -626,11 +625,12 @@ static void apic_check_deadline_errata(void)
rev = (u32)m->driver_data;
if (boot_cpu_data.microcode >= rev)
- return;
+ return true;
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
"please update microcode to version: 0x%x (or later)\n", rev);
+ return false;
}
/*
@@ -2092,7 +2092,8 @@ void __init init_apic_mappings(void)
{
unsigned int new_apicid;
- apic_check_deadline_errata();
+ if (apic_validate_deadline_timer())
+ pr_debug("TSC deadline timer available\n");
if (x2apic_mode) {
boot_cpu_physical_apicid = read_apic_id();
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 87b97897a881..460ae7f66818 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -183,7 +183,8 @@ recursion_check:
*/
if (visit_mask) {
if (*visit_mask & (1UL << info->type)) {
- printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
+ if (task == current)
+ printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
goto unknown;
}
*visit_mask |= 1UL << info->type;
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 37a0aeaf89e7..b0e641793be4 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -407,7 +407,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
set_vm_flush_reset_perms(trampoline);
- set_memory_ro((unsigned long)trampoline, npages);
+ if (likely(system_state != SYSTEM_BOOTING))
+ set_memory_ro((unsigned long)trampoline, npages);
set_memory_x((unsigned long)trampoline, npages);
return (unsigned long)trampoline;
fail:
@@ -415,6 +416,32 @@ fail:
return 0;
}
+void set_ftrace_ops_ro(void)
+{
+ struct ftrace_ops *ops;
+ unsigned long start_offset;
+ unsigned long end_offset;
+ unsigned long npages;
+ unsigned long size;
+
+ do_for_each_ftrace_op(ops, ftrace_ops_list) {
+ if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
+ continue;
+
+ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
+ start_offset = (unsigned long)ftrace_regs_caller;
+ end_offset = (unsigned long)ftrace_regs_caller_end;
+ } else {
+ start_offset = (unsigned long)ftrace_caller;
+ end_offset = (unsigned long)ftrace_epilogue;
+ }
+ size = end_offset - start_offset;
+ size = size + RET_SIZE + sizeof(void *);
+ npages = DIV_ROUND_UP(size, PAGE_SIZE);
+ set_memory_ro((unsigned long)ops->trampoline, npages);
+ } while_for_each_ftrace_op(ops);
+}
+
static unsigned long calc_trampoline_call_offset(bool save_regs)
{
unsigned long start_offset;
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index a224b5ab103f..54226110bc7f 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -344,6 +344,9 @@ bad_address:
if (IS_ENABLED(CONFIG_X86_32))
goto the_end;
+ if (state->task != current)
+ goto the_end;
+
if (state->regs) {
printk_deferred_once(KERN_WARNING
"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index e9cc182aa97e..5b0bd8581fe6 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -8,19 +8,21 @@
#include <asm/orc_lookup.h>
#define orc_warn(fmt, ...) \
- printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
+ printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)
+
+#define orc_warn_current(args...) \
+({ \
+ if (state->task == current) \
+ orc_warn(args); \
+})
extern int __start_orc_unwind_ip[];
extern int __stop_orc_unwind_ip[];
extern struct orc_entry __start_orc_unwind[];
extern struct orc_entry __stop_orc_unwind[];
-static DEFINE_MUTEX(sort_mutex);
-int *cur_orc_ip_table = __start_orc_unwind_ip;
-struct orc_entry *cur_orc_table = __start_orc_unwind;
-
-unsigned int lookup_num_blocks;
-bool orc_init;
+static bool orc_init __ro_after_init;
+static unsigned int lookup_num_blocks __ro_after_init;
static inline unsigned long orc_ip(const int *ip)
{
@@ -142,9 +144,6 @@ static struct orc_entry *orc_find(unsigned long ip)
{
static struct orc_entry *orc;
- if (!orc_init)
- return NULL;
-
if (ip == 0)
return &null_orc_entry;
@@ -189,6 +188,10 @@ static struct orc_entry *orc_find(unsigned long ip)
#ifdef CONFIG_MODULES
+static DEFINE_MUTEX(sort_mutex);
+static int *cur_orc_ip_table = __start_orc_unwind_ip;
+static struct orc_entry *cur_orc_table = __start_orc_unwind;
+
static void orc_sort_swap(void *_a, void *_b, int size)
{
struct orc_entry *orc_a, *orc_b;
@@ -381,9 +384,38 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
return true;
}
+/*
+ * If state->regs is non-NULL, and points to a full pt_regs, just get the reg
+ * value from state->regs.
+ *
+ * Otherwise, if state->regs just points to IRET regs, and the previous frame
+ * had full regs, it's safe to get the value from the previous regs. This can
+ * happen when early/late IRQ entry code gets interrupted by an NMI.
+ */
+static bool get_reg(struct unwind_state *state, unsigned int reg_off,
+ unsigned long *val)
+{
+ unsigned int reg = reg_off/8;
+
+ if (!state->regs)
+ return false;
+
+ if (state->full_regs) {
+ *val = ((unsigned long *)state->regs)[reg];
+ return true;
+ }
+
+ if (state->prev_regs) {
+ *val = ((unsigned long *)state->prev_regs)[reg];
+ return true;
+ }
+
+ return false;
+}
+
bool unwind_next_frame(struct unwind_state *state)
{
- unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp;
+ unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp;
enum stack_type prev_type = state->stack_info.type;
struct orc_entry *orc;
bool indirect = false;
@@ -445,43 +477,39 @@ bool unwind_next_frame(struct unwind_state *state)
break;
case ORC_REG_R10:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg R10 at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, r10), &sp)) {
+ orc_warn_current("missing R10 value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->r10;
break;
case ORC_REG_R13:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg R13 at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, r13), &sp)) {
+ orc_warn_current("missing R13 value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->r13;
break;
case ORC_REG_DI:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg DI at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, di), &sp)) {
+ orc_warn_current("missing RDI value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->di;
break;
case ORC_REG_DX:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg DX at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, dx), &sp)) {
+ orc_warn_current("missing DX value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->dx;
break;
default:
- orc_warn("unknown SP base reg %d for ip %pB\n",
+ orc_warn("unknown SP base reg %d at %pB\n",
orc->sp_reg, (void *)state->ip);
goto err;
}
@@ -504,44 +532,48 @@ bool unwind_next_frame(struct unwind_state *state)
state->sp = sp;
state->regs = NULL;
+ state->prev_regs = NULL;
state->signal = false;
break;
case ORC_TYPE_REGS:
if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
- orc_warn("can't dereference registers at %p for ip %pB\n",
- (void *)sp, (void *)orig_ip);
+ orc_warn_current("can't access registers at %pB\n",
+ (void *)orig_ip);
goto err;
}
state->regs = (struct pt_regs *)sp;
+ state->prev_regs = NULL;
state->full_regs = true;
state->signal = true;
break;
case ORC_TYPE_REGS_IRET:
if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
- orc_warn("can't dereference iret registers at %p for ip %pB\n",
- (void *)sp, (void *)orig_ip);
+ orc_warn_current("can't access iret registers at %pB\n",
+ (void *)orig_ip);
goto err;
}
+ if (state->full_regs)
+ state->prev_regs = state->regs;
state->regs = (void *)sp - IRET_FRAME_OFFSET;
state->full_regs = false;
state->signal = true;
break;
default:
- orc_warn("unknown .orc_unwind entry type %d for ip %pB\n",
+ orc_warn("unknown .orc_unwind entry type %d at %pB\n",
orc->type, (void *)orig_ip);
- break;
+ goto err;
}
/* Find BP: */
switch (orc->bp_reg) {
case ORC_REG_UNDEFINED:
- if (state->regs && state->full_regs)
- state->bp = state->regs->bp;
+ if (get_reg(state, offsetof(struct pt_regs, bp), &tmp))
+ state->bp = tmp;
break;
case ORC_REG_PREV_SP:
@@ -564,8 +596,8 @@ bool unwind_next_frame(struct unwind_state *state)
if (state->stack_info.type == prev_type &&
on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
state->sp <= prev_sp) {
- orc_warn("stack going in the wrong direction? ip=%pB\n",
- (void *)orig_ip);
+ orc_warn_current("stack going in the wrong direction? at %pB\n",
+ (void *)orig_ip);
goto err;
}
@@ -585,6 +617,9 @@ EXPORT_SYMBOL_GPL(unwind_next_frame);
void __unwind_start(struct unwind_state *state, struct task_struct *task,
struct pt_regs *regs, unsigned long *first_frame)
{
+ if (!orc_init)
+ goto done;
+
memset(state, 0, sizeof(*state));
state->task = task;
@@ -651,7 +686,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
/* Otherwise, skip ahead to the user-specified starting frame: */
while (!unwind_done(state) &&
(!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
- state->sp <= (unsigned long)first_frame))
+ state->sp < (unsigned long)first_frame))
unwind_next_frame(state);
return;
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 750ff0b29404..d057376bd3d3 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -225,12 +225,12 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
}
/*
- * AMD SVM AVIC accelerate EOI write and do not trap,
- * in-kernel IOAPIC will not be able to receive the EOI.
- * In this case, we do lazy update of the pending EOI when
- * trying to set IOAPIC irq.
+ * AMD SVM AVIC accelerate EOI write iff the interrupt is edge
+ * triggered, in which case the in-kernel IOAPIC will not be able
+ * to receive the EOI. In this case, we do a lazy update of the
+ * pending EOI when trying to set IOAPIC irq.
*/
- if (kvm_apicv_activated(ioapic->kvm))
+ if (edge && kvm_apicv_activated(ioapic->kvm))
ioapic_lazy_update_eoi(ioapic, irq);
/*
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index cf912b4aaba8..89f7f3aebd31 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -345,7 +345,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
return NULL;
/* Pin the user virtual address. */
- npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages);
+ npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
if (npinned != npages) {
pr_err("SEV: Failure locking %lu pages.\n", npages);
goto err;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2f379bacbb26..38f6aeefeb55 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1752,6 +1752,8 @@ static int db_interception(struct vcpu_svm *svm)
if (svm->vcpu.guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ kvm_run->debug.arch.dr6 = svm->vmcb->save.dr6;
+ kvm_run->debug.arch.dr7 = svm->vmcb->save.dr7;
kvm_run->debug.arch.pc =
svm->vmcb->save.cs.base + svm->vmcb->save.rip;
kvm_run->debug.arch.exception = DB_VECTOR;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index fd78ffbde644..e44f33c82332 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5165,7 +5165,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
*/
break;
default:
- BUG_ON(1);
+ BUG();
break;
}
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 87f3f24fef37..51d1a82742fd 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -82,6 +82,9 @@ SYM_FUNC_START(vmx_vmexit)
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+ /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */
+ or $1, %_ASM_AX
+
pop %_ASM_AX
.Lvmexit_skip_rsb:
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c5835f9cb9ad..d786c7d27ce5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -926,19 +926,6 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
__reserved_bits; \
})
-static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c)
-{
- u64 reserved_bits = __cr4_reserved_bits(cpu_has, c);
-
- if (kvm_cpu_cap_has(X86_FEATURE_LA57))
- reserved_bits &= ~X86_CR4_LA57;
-
- if (kvm_cpu_cap_has(X86_FEATURE_UMIP))
- reserved_bits &= ~X86_CR4_UMIP;
-
- return reserved_bits;
-}
-
static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
if (cr4 & cr4_reserved_bits)
@@ -3385,6 +3372,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_GET_MSR_FEATURES:
case KVM_CAP_MSR_PLATFORM_INFO:
case KVM_CAP_EXCEPTION_PAYLOAD:
+ case KVM_CAP_SET_GUEST_DEBUG:
r = 1;
break;
case KVM_CAP_SYNC_REGS:
@@ -9675,7 +9663,9 @@ int kvm_arch_hardware_setup(void *opaque)
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
supported_xss = 0;
- cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data);
+#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
+ cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
+#undef __kvm_cpu_cap_has
if (kvm_has_tsc_control) {
/*
@@ -9707,7 +9697,8 @@ int kvm_arch_check_processor_compat(void *opaque)
WARN_ON(!irqs_disabled());
- if (kvm_host_cr4_reserved_bits(c) != cr4_reserved_bits)
+ if (__cr4_reserved_bits(cpu_has, c) !=
+ __cr4_reserved_bits(cpu_has, &boot_cpu_data))
return -EIO;
return ops->check_processor_compatibility();
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3b289c2f75cd..8b5f73f5e207 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -54,6 +54,7 @@
#include <asm/init.h>
#include <asm/uv/uv.h>
#include <asm/setup.h>
+#include <asm/ftrace.h>
#include "mm_internal.h"
@@ -1291,6 +1292,8 @@ void mark_rodata_ro(void)
all_end = roundup((unsigned long)_brk_end, PMD_SIZE);
set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT);
+ set_ftrace_ops_ro();
+
#ifdef CONFIG_CPA_DEBUG
printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end);
set_memory_rw(start, (end-start) >> PAGE_SHIFT);
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 59eca6a94ce7..b8c55a2e402d 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -43,7 +43,8 @@ struct cpa_data {
unsigned long pfn;
unsigned int flags;
unsigned int force_split : 1,
- force_static_prot : 1;
+ force_static_prot : 1,
+ force_flush_all : 1;
struct page **pages;
};
@@ -355,10 +356,10 @@ static void cpa_flush(struct cpa_data *data, int cache)
return;
}
- if (cpa->numpages <= tlb_single_page_flush_ceiling)
- on_each_cpu(__cpa_flush_tlb, cpa, 1);
- else
+ if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling)
flush_tlb_all();
+ else
+ on_each_cpu(__cpa_flush_tlb, cpa, 1);
if (!cache)
return;
@@ -1598,6 +1599,8 @@ static int cpa_process_alias(struct cpa_data *cpa)
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
alias_cpa.curpage = 0;
+ cpa->force_flush_all = 1;
+
ret = __change_page_attr_set_clr(&alias_cpa, 0);
if (ret)
return ret;
@@ -1618,6 +1621,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
alias_cpa.curpage = 0;
+ cpa->force_flush_all = 1;
/*
* The high mapping range is imprecise, so ignore the
* return value.