aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/entry
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/entry')
-rw-r--r--arch/x86/entry/Makefile3
-rw-r--r--arch/x86/entry/calling.h81
-rw-r--r--arch/x86/entry/entry.S22
-rw-r--r--arch/x86/entry/entry_32.S45
-rw-r--r--arch/x86/entry/entry_64.S168
-rw-r--r--arch/x86/entry/entry_64_compat.S139
-rw-r--r--arch/x86/entry/syscalls/Makefile5
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--arch/x86/entry/thunk_32.S4
-rw-r--r--arch/x86/entry/thunk_64.S6
-rw-r--r--arch/x86/entry/vdso/Makefile12
-rw-r--r--arch/x86/entry/vdso/vdso-layout.lds.S1
-rw-r--r--arch/x86/entry/vdso/vdso2c.c2
-rw-r--r--arch/x86/entry/vdso/vdso32/system_call.S2
-rw-r--r--arch/x86/entry/vdso/vma.c13
-rw-r--r--arch/x86/entry/vdso/vsgx.S2
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c2
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_emu_64.S3
19 files changed, 307 insertions, 205 deletions
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 7fec5dcf6438..ca2fe186994b 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -11,12 +11,13 @@ CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE)
CFLAGS_common.o += -fno-stack-protector
-obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
+obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o
obj-y += common.o
obj-y += vdso/
obj-y += vsyscall/
+obj-$(CONFIG_PREEMPTION) += thunk_$(BITS).o
obj-$(CONFIG_IA32_EMULATION) += entry_64_compat.o syscall_32.o
obj-$(CONFIG_X86_X32_ABI) += syscall_x32.o
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index a4c061fb7c6e..f6907627172b 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -7,6 +7,8 @@
#include <asm/asm-offsets.h>
#include <asm/processor-flags.h>
#include <asm/ptrace-abi.h>
+#include <asm/msr.h>
+#include <asm/nospec-branch.h>
/*
@@ -63,7 +65,7 @@ For 32-bit we have the following conventions - kernel is built with
* for assembly code:
*/
-.macro PUSH_REGS rdx=%rdx rax=%rax save_ret=0
+.macro PUSH_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
.if \save_ret
pushq %rsi /* pt_regs->si */
movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
@@ -73,7 +75,7 @@ For 32-bit we have the following conventions - kernel is built with
pushq %rsi /* pt_regs->si */
.endif
pushq \rdx /* pt_regs->dx */
- pushq %rcx /* pt_regs->cx */
+ pushq \rcx /* pt_regs->cx */
pushq \rax /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
pushq %r9 /* pt_regs->r9 */
@@ -99,6 +101,7 @@ For 32-bit we have the following conventions - kernel is built with
* well before they could be put to use in a speculative execution
* gadget.
*/
+ xorl %esi, %esi /* nospec si */
xorl %edx, %edx /* nospec dx */
xorl %ecx, %ecx /* nospec cx */
xorl %r8d, %r8d /* nospec r8 */
@@ -114,32 +117,24 @@ For 32-bit we have the following conventions - kernel is built with
.endm
-.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
- PUSH_REGS rdx=\rdx, rax=\rax, save_ret=\save_ret
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
+ PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret
CLEAR_REGS
.endm
-.macro POP_REGS pop_rdi=1 skip_r11rcx=0
+.macro POP_REGS pop_rdi=1
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
- .if \skip_r11rcx
- popq %rsi
- .else
popq %r11
- .endif
popq %r10
popq %r9
popq %r8
popq %rax
- .if \skip_r11rcx
- popq %rsi
- .else
popq %rcx
- .endif
popq %rdx
popq %rsi
.if \pop_rdi
@@ -290,6 +285,66 @@ For 32-bit we have the following conventions - kernel is built with
#endif
/*
+ * IBRS kernel mitigation for Spectre_v2.
+ *
+ * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers
+ * the regs it uses (AX, CX, DX). Must be called before the first RET
+ * instruction (NOTE! UNTRAIN_RET includes a RET instruction)
+ *
+ * The optional argument is used to save/restore the current value,
+ * which is used on the paranoid paths.
+ *
+ * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
+ */
+.macro IBRS_ENTER save_reg
+#ifdef CONFIG_CPU_IBRS_ENTRY
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
+ movl $MSR_IA32_SPEC_CTRL, %ecx
+
+.ifnb \save_reg
+ rdmsr
+ shl $32, %rdx
+ or %rdx, %rax
+ mov %rax, \save_reg
+ test $SPEC_CTRL_IBRS, %eax
+ jz .Ldo_wrmsr_\@
+ lfence
+ jmp .Lend_\@
+.Ldo_wrmsr_\@:
+.endif
+
+ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
+ movl %edx, %eax
+ shr $32, %rdx
+ wrmsr
+.Lend_\@:
+#endif
+.endm
+
+/*
+ * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX)
+ * regs. Must be called after the last RET.
+ */
+.macro IBRS_EXIT save_reg
+#ifdef CONFIG_CPU_IBRS_ENTRY
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
+ movl $MSR_IA32_SPEC_CTRL, %ecx
+
+.ifnb \save_reg
+ mov \save_reg, %rdx
+.else
+ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
+ andl $(~SPEC_CTRL_IBRS), %edx
+.endif
+
+ movl %edx, %eax
+ shr $32, %rdx
+ wrmsr
+.Lend_\@:
+#endif
+.endm
+
+/*
* Mitigate Spectre v1 for conditional swapgs code paths.
*
* FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
new file mode 100644
index 000000000000..bfb7bcb362bc
--- /dev/null
+++ b/arch/x86/entry/entry.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common place for both 32- and 64-bit entry routines.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+#include <asm/msr-index.h>
+
+.pushsection .noinstr.text, "ax"
+
+SYM_FUNC_START(entry_ibpb)
+ movl $MSR_IA32_PRED_CMD, %ecx
+ movl $PRED_CMD_IBPB, %eax
+ xorl %edx, %edx
+ wrmsr
+ RET
+SYM_FUNC_END(entry_ibpb)
+/* For KVM */
+EXPORT_SYMBOL_GPL(entry_ibpb);
+
+.popsection
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index ccb9d32768f3..e309e7156038 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -268,19 +268,16 @@
1: popl %ds
2: popl %es
3: popl %fs
- addl $(4 + \pop), %esp /* pop the unused "gs" slot */
+4: addl $(4 + \pop), %esp /* pop the unused "gs" slot */
IRET_FRAME
-.pushsection .fixup, "ax"
-4: movl $0, (%esp)
- jmp 1b
-5: movl $0, (%esp)
- jmp 2b
-6: movl $0, (%esp)
- jmp 3b
-.popsection
- _ASM_EXTABLE(1b, 4b)
- _ASM_EXTABLE(2b, 5b)
- _ASM_EXTABLE(3b, 6b)
+
+ /*
+ * There is no _ASM_EXTABLE_TYPE_REG() for ASM, however since this is
+ * ASM the registers are known and we can trivially hard-code them.
+ */
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_POP_ZERO|EX_REG_DS)
+ _ASM_EXTABLE_TYPE(2b, 3b, EX_TYPE_POP_ZERO|EX_REG_ES)
+ _ASM_EXTABLE_TYPE(3b, 4b, EX_TYPE_POP_ZERO|EX_REG_FS)
.endm
.macro RESTORE_ALL_NMI cr3_reg:req pop=0
@@ -701,7 +698,6 @@ SYM_CODE_START(__switch_to_asm)
movl %ebx, PER_CPU_VAR(__stack_chk_guard)
#endif
-#ifdef CONFIG_RETPOLINE
/*
* When switching from a shallower to a deeper call stack
* the RSB may either underflow or use entries populated
@@ -710,7 +706,6 @@ SYM_CODE_START(__switch_to_asm)
* speculative execution to prevent attack.
*/
FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
-#endif
/* Restore flags or the incoming task to restore AC state. */
popfl
@@ -740,7 +735,7 @@ SYM_FUNC_START(schedule_tail_wrapper)
popl %eax
FRAME_END
- ret
+ RET
SYM_FUNC_END(schedule_tail_wrapper)
.popsection
@@ -925,10 +920,8 @@ SYM_FUNC_START(entry_SYSENTER_32)
sti
sysexit
-.pushsection .fixup, "ax"
-2: movl $0, PT_FS(%esp)
- jmp 1b
-.popsection
+2: movl $0, PT_FS(%esp)
+ jmp 1b
_ASM_EXTABLE(1b, 2b)
.Lsysenter_fix_flags:
@@ -996,8 +989,7 @@ restore_all_switch_stack:
*/
iret
-.section .fixup, "ax"
-SYM_CODE_START(asm_iret_error)
+.Lasm_iret_error:
pushl $0 # no error code
pushl $iret_error
@@ -1014,9 +1006,8 @@ SYM_CODE_START(asm_iret_error)
#endif
jmp handle_exception
-SYM_CODE_END(asm_iret_error)
-.previous
- _ASM_EXTABLE(.Lirq_return, asm_iret_error)
+
+ _ASM_EXTABLE(.Lirq_return, .Lasm_iret_error)
SYM_FUNC_END(entry_INT80_32)
.macro FIXUP_ESPFIX_STACK
@@ -1248,14 +1239,14 @@ SYM_CODE_START(asm_exc_nmi)
SYM_CODE_END(asm_exc_nmi)
.pushsection .text, "ax"
-SYM_CODE_START(rewind_stack_do_exit)
+SYM_CODE_START(rewind_stack_and_make_dead)
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
movl PER_CPU_VAR(cpu_current_top_of_stack), %esi
leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
- call do_exit
+ call make_task_dead
1: jmp 1b
-SYM_CODE_END(rewind_stack_do_exit)
+SYM_CODE_END(rewind_stack_and_make_dead)
.popsection
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 97b1f84bb53f..9953d966d124 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -85,7 +85,8 @@
*/
SYM_CODE_START(entry_SYSCALL_64)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
+ ENDBR
swapgs
/* tss.sp2 is scratch space. */
@@ -94,6 +95,7 @@ SYM_CODE_START(entry_SYSCALL_64)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
@@ -110,6 +112,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
movq %rsp, %rdi
/* Sign extend the lower 32bit as syscall numbers are treated as int */
movslq %eax, %rsi
+
+ /* clobbers %rax, make sure it is after saving the syscall nr */
+ IBRS_ENTER
+ UNTRAIN_RET
+
call do_syscall_64 /* returns with IRQs disabled */
/*
@@ -189,8 +196,8 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
* perf profiles. Nothing jumps here.
*/
syscall_return_via_sysret:
- /* rcx and r11 are already restored (see code above) */
- POP_REGS pop_rdi=0 skip_r11rcx=1
+ IBRS_EXIT
+ POP_REGS pop_rdi=0
/*
* Now all regs are restored except RSP and RDI.
@@ -213,8 +220,13 @@ syscall_return_via_sysret:
popq %rdi
popq %rsp
+SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
swapgs
sysretq
+SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
+ int3
SYM_CODE_END(entry_SYSCALL_64)
/*
@@ -243,7 +255,6 @@ SYM_FUNC_START(__switch_to_asm)
movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
#endif
-#ifdef CONFIG_RETPOLINE
/*
* When switching from a shallower to a deeper call stack
* the RSB may either underflow or use entries populated
@@ -252,7 +263,6 @@ SYM_FUNC_START(__switch_to_asm)
* speculative execution to prevent attack.
*/
FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
-#endif
/* restore callee-saved registers */
popq %r15
@@ -276,6 +286,7 @@ SYM_FUNC_END(__switch_to_asm)
.pushsection .text, "ax"
SYM_CODE_START(ret_from_fork)
UNWIND_HINT_EMPTY
+ ANNOTATE_NOENDBR // copy_thread
movq %rax, %rdi
call schedule_tail /* rdi: 'prev' task parameter */
@@ -315,6 +326,14 @@ SYM_CODE_END(ret_from_fork)
#endif
.endm
+SYM_CODE_START_LOCAL(xen_error_entry)
+ UNWIND_HINT_FUNC
+ PUSH_AND_CLEAR_REGS save_ret=1
+ ENCODE_FRAME_POINTER 8
+ UNTRAIN_RET
+ RET
+SYM_CODE_END(xen_error_entry)
+
/**
* idtentry_body - Macro to emit code calling the C function
* @cfunc: C function to be called
@@ -322,7 +341,18 @@ SYM_CODE_END(ret_from_fork)
*/
.macro idtentry_body cfunc has_error_code:req
- call error_entry
+ /*
+ * Call error_entry() and switch to the task stack if from userspace.
+ *
+ * When in XENPV, it is already in the task stack, and it can't fault
+ * for native_iret() nor native_load_gs_index() since XENPV uses its
+ * own pvops for IRET and load_gs_index(). And it doesn't need to
+ * switch the CR3. So it can skip invoking error_entry().
+ */
+ ALTERNATIVE "call error_entry; movq %rax, %rsp", \
+ "call xen_error_entry", X86_FEATURE_XENPV
+
+ ENCODE_FRAME_POINTER
UNWIND_HINT_REGS
movq %rsp, %rdi /* pt_regs pointer into 1st argument*/
@@ -334,6 +364,9 @@ SYM_CODE_END(ret_from_fork)
call \cfunc
+ /* For some configurations \cfunc ends up being a noreturn. */
+ REACHABLE
+
jmp error_return
.endm
@@ -350,7 +383,9 @@ SYM_CODE_END(ret_from_fork)
.macro idtentry vector asmsym cfunc has_error_code:req
SYM_CODE_START(\asmsym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
+ ENDBR
ASM_CLAC
+ cld
.if \has_error_code == 0
pushq $-1 /* ORIG_RAX: no syscall to restart */
@@ -417,7 +452,9 @@ SYM_CODE_END(\asmsym)
.macro idtentry_mce_db vector asmsym cfunc
SYM_CODE_START(\asmsym)
UNWIND_HINT_IRET_REGS
+ ENDBR
ASM_CLAC
+ cld
pushq $-1 /* ORIG_RAX: no syscall to restart */
@@ -472,7 +509,9 @@ SYM_CODE_END(\asmsym)
.macro idtentry_vc vector asmsym cfunc
SYM_CODE_START(\asmsym)
UNWIND_HINT_IRET_REGS
+ ENDBR
ASM_CLAC
+ cld
/*
* If the entry is from userspace, switch stacks and treat it as
@@ -499,6 +538,7 @@ SYM_CODE_START(\asmsym)
call vc_switch_off_ist
movq %rax, %rsp /* Switch to new stack */
+ ENCODE_FRAME_POINTER
UNWIND_HINT_REGS
/* Update pt_regs */
@@ -533,7 +573,9 @@ SYM_CODE_END(\asmsym)
.macro idtentry_df vector asmsym cfunc
SYM_CODE_START(\asmsym)
UNWIND_HINT_IRET_REGS offset=8
+ ENDBR
ASM_CLAC
+ cld
/* paranoid_entry returns GS information for paranoid_exit in EBX. */
call paranoid_entry
@@ -544,6 +586,9 @@ SYM_CODE_START(\asmsym)
movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
call \cfunc
+ /* For some configurations \cfunc ends up being a noreturn. */
+ REACHABLE
+
jmp paranoid_exit
_ASM_NOKPROBE(\asmsym)
@@ -564,9 +609,11 @@ __irqentry_text_start:
.align 16
.globl __irqentry_text_end
__irqentry_text_end:
+ ANNOTATE_NOENDBR
SYM_CODE_START_LOCAL(common_interrupt_return)
SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
+ IBRS_EXIT
#ifdef CONFIG_DEBUG_ENTRY
/* Assert that pt_regs indicates user mode. */
testb $3, CS(%rsp)
@@ -608,8 +655,8 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
/* Restore RDI. */
popq %rdi
- SWAPGS
- INTERRUPT_RETURN
+ swapgs
+ jmp .Lnative_iret
SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL)
@@ -626,9 +673,14 @@ SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL)
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
* when returning from IPI handler.
*/
- INTERRUPT_RETURN
+#ifdef CONFIG_XEN_PV
+SYM_INNER_LABEL(early_xen_iret_patch, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
+ .byte 0xe9
+ .long .Lnative_iret - (. + 4)
+#endif
-SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL)
+.Lnative_iret:
UNWIND_HINT_IRET_REGS
/*
* Are we returning to a stack segment from the LDT? Note: in
@@ -640,6 +692,7 @@ SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL)
#endif
SYM_INNER_LABEL(native_irq_return_iret, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR // exc_double_fault
/*
* This may fault. Non-paranoid faults on return to userspace are
* handled by fixup_bad_iret. These include #SS, #GP, and #NP.
@@ -734,18 +787,15 @@ SYM_FUNC_START(asm_load_gs_index)
FRAME_BEGIN
swapgs
.Lgs_change:
+ ANNOTATE_NOENDBR // error_entry
movl %edi, %gs
2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
swapgs
FRAME_END
- ret
-SYM_FUNC_END(asm_load_gs_index)
-EXPORT_SYMBOL(asm_load_gs_index)
+ RET
- _ASM_EXTABLE(.Lgs_change, .Lbad_gs)
- .section .fixup, "ax"
/* running with kernelgs */
-SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs)
+.Lbad_gs:
swapgs /* switch back to user gs */
.macro ZAP_GS
/* This can't be a string because the preprocessor needs to see it. */
@@ -756,8 +806,11 @@ SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs)
xorl %eax, %eax
movl %eax, %gs
jmp 2b
-SYM_CODE_END(.Lbad_gs)
- .previous
+
+ _ASM_EXTABLE(.Lgs_change, .Lbad_gs)
+
+SYM_FUNC_END(asm_load_gs_index)
+EXPORT_SYMBOL(asm_load_gs_index)
#ifdef CONFIG_XEN_PV
/*
@@ -805,6 +858,7 @@ SYM_CODE_END(exc_xen_hypervisor_callback)
*/
SYM_CODE_START(xen_failsafe_callback)
UNWIND_HINT_EMPTY
+ ENDBR
movl %ds, %ecx
cmpw %cx, 0x10(%rsp)
jne 1f
@@ -845,10 +899,12 @@ SYM_CODE_END(xen_failsafe_callback)
* 1 -> no SWAPGS on exit
*
* Y GSBASE value at entry, must be restored in paranoid_exit
+ *
+ * R14 - old CR3
+ * R15 - old SPEC_CTRL
*/
SYM_CODE_START_LOCAL(paranoid_entry)
UNWIND_HINT_FUNC
- cld
PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
@@ -889,7 +945,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
* is needed here.
*/
SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
- ret
+ jmp .Lparanoid_gsbase_done
.Lparanoid_entry_checkgs:
/* EBX = 1 -> kernel GSBASE active, no restore required */
@@ -908,9 +964,17 @@ SYM_CODE_START_LOCAL(paranoid_entry)
xorl %ebx, %ebx
swapgs
.Lparanoid_kernel_gsbase:
-
FENCE_SWAPGS_KERNEL_ENTRY
- ret
+.Lparanoid_gsbase_done:
+
+ /*
+ * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like
+ * CR3 above, keep the old value in a callee saved register.
+ */
+ IBRS_ENTER save_reg=%r15
+ UNTRAIN_RET
+
+ RET
SYM_CODE_END(paranoid_entry)
/*
@@ -931,9 +995,19 @@ SYM_CODE_END(paranoid_entry)
* 1 -> no SWAPGS on exit
*
* Y User space GSBASE, must be restored unconditionally
+ *
+ * R14 - old CR3
+ * R15 - old SPEC_CTRL
*/
SYM_CODE_START_LOCAL(paranoid_exit)
UNWIND_HINT_REGS
+
+ /*
+ * Must restore IBRS state before both CR3 and %GS since we need access
+ * to the per-CPU x86_spec_ctrl_shadow variable.
+ */
+ IBRS_EXIT save_reg=%r15
+
/*
* The order of operations is important. RESTORE_CR3 requires
* kernel GSBASE.
@@ -962,13 +1036,14 @@ SYM_CODE_START_LOCAL(paranoid_exit)
SYM_CODE_END(paranoid_exit)
/*
- * Save all registers in pt_regs, and switch GS if needed.
+ * Switch GS and CR3 if needed.
*/
SYM_CODE_START_LOCAL(error_entry)
UNWIND_HINT_FUNC
- cld
+
PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
+
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
@@ -976,20 +1051,19 @@ SYM_CODE_START_LOCAL(error_entry)
* We entered from user mode or we're pretending to have entered
* from user mode due to an IRET fault.
*/
- SWAPGS
+ swapgs
FENCE_SWAPGS_USER_ENTRY
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ IBRS_ENTER
+ UNTRAIN_RET
+ leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
.Lerror_entry_from_usermode_after_swapgs:
+
/* Put us onto the real thread stack. */
- popq %r12 /* save return addr in %12 */
- movq %rsp, %rdi /* arg0 = pt_regs pointer */
call sync_regs
- movq %rax, %rsp /* switch stack */
- ENCODE_FRAME_POINTER
- pushq %r12
- ret
+ RET
/*
* There are two places in the kernel that can potentially fault with
@@ -1012,7 +1086,7 @@ SYM_CODE_START_LOCAL(error_entry)
* gsbase and proceed. We'll fix up the exception and land in
* .Lgs_change's error handler with kernel gsbase.
*/
- SWAPGS
+ swapgs
/*
* Issue an LFENCE to prevent GS speculation, regardless of whether it is a
@@ -1020,7 +1094,9 @@ SYM_CODE_START_LOCAL(error_entry)
*/
.Lerror_entry_done_lfence:
FENCE_SWAPGS_KERNEL_ENTRY
- ret
+ leaq 8(%rsp), %rax /* return pt_regs pointer */
+ ANNOTATE_UNRET_END
+ RET
.Lbstep_iret:
/* Fix truncated RIP */
@@ -1032,17 +1108,19 @@ SYM_CODE_START_LOCAL(error_entry)
* We came from an IRET to user mode, so we have user
* gsbase and CR3. Switch to kernel gsbase and CR3:
*/
- SWAPGS
+ swapgs
FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ IBRS_ENTER
+ UNTRAIN_RET
/*
* Pretend that the exception came from user mode: set up pt_regs
* as if we faulted immediately after IRET.
*/
- mov %rsp, %rdi
+ leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
call fixup_bad_iret
- mov %rax, %rsp
+ mov %rax, %rdi
jmp .Lerror_entry_from_usermode_after_swapgs
SYM_CODE_END(error_entry)
@@ -1064,6 +1142,7 @@ SYM_CODE_END(error_return)
*/
SYM_CODE_START(asm_exc_nmi)
UNWIND_HINT_IRET_REGS
+ ENDBR
/*
* We allow breakpoints in NMIs. If a breakpoint occurs, then
@@ -1104,6 +1183,7 @@ SYM_CODE_START(asm_exc_nmi)
*/
ASM_CLAC
+ cld
/* Use %rdx as our temp variable throughout */
pushq %rdx
@@ -1123,7 +1203,6 @@ SYM_CODE_START(asm_exc_nmi)
*/
swapgs
- cld
FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
movq %rsp, %rdx
@@ -1139,6 +1218,9 @@ SYM_CODE_START(asm_exc_nmi)
PUSH_AND_CLEAR_REGS rdx=(%rdx)
ENCODE_FRAME_POINTER
+ IBRS_ENTER
+ UNTRAIN_RET
+
/*
* At this point we no longer need to worry about stack damage
* due to nesting -- we're on the normal thread stack and we're
@@ -1311,6 +1393,7 @@ first_nmi:
#endif
repeat_nmi:
+ ANNOTATE_NOENDBR // this code
/*
* If there was a nested NMI, the first NMI's iret will return
* here. But NMIs are still enabled and we can take another
@@ -1339,6 +1422,7 @@ repeat_nmi:
.endr
subq $(5*8), %rsp
end_repeat_nmi:
+ ANNOTATE_NOENDBR // this code
/*
* Everything below this point can be preempted by a nested NMI.
@@ -1361,6 +1445,9 @@ end_repeat_nmi:
movq $-1, %rsi
call exc_nmi
+ /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */
+ IBRS_EXIT save_reg=%r15
+
/* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
@@ -1422,13 +1509,14 @@ SYM_CODE_END(asm_exc_nmi)
*/
SYM_CODE_START(ignore_sysret)
UNWIND_HINT_EMPTY
+ ENDBR
mov $-ENOSYS, %eax
sysretl
SYM_CODE_END(ignore_sysret)
#endif
.pushsection .text, "ax"
-SYM_CODE_START(rewind_stack_do_exit)
+SYM_CODE_START(rewind_stack_and_make_dead)
UNWIND_HINT_FUNC
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
@@ -1437,6 +1525,6 @@ SYM_CODE_START(rewind_stack_do_exit)
leaq -PTREGS_SIZE(%rax), %rsp
UNWIND_HINT_REGS
- call do_exit
-SYM_CODE_END(rewind_stack_do_exit)
+ call make_task_dead
+SYM_CODE_END(rewind_stack_and_make_dead)
.popsection
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 0051cf5c792d..4dd19819053a 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -4,7 +4,6 @@
*
* Copyright 2000-2002 Andi Kleen, SuSE Labs.
*/
-#include "calling.h"
#include <asm/asm-offsets.h>
#include <asm/current.h>
#include <asm/errno.h>
@@ -14,9 +13,12 @@
#include <asm/irqflags.h>
#include <asm/asm.h>
#include <asm/smap.h>
+#include <asm/nospec-branch.h>
#include <linux/linkage.h>
#include <linux/err.h>
+#include "calling.h"
+
.section .entry.text, "ax"
/*
@@ -47,9 +49,10 @@
* 0(%ebp) arg6
*/
SYM_CODE_START(entry_SYSENTER_compat)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
+ ENDBR
/* Interrupts are off on entry. */
- SWAPGS
+ swapgs
pushq %rax
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
@@ -82,36 +85,14 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
movl %eax, %eax
pushq %rax /* pt_regs->orig_ax */
- pushq %rdi /* pt_regs->di */
- pushq %rsi /* pt_regs->si */
- pushq %rdx /* pt_regs->dx */
- pushq %rcx /* pt_regs->cx */
- pushq $-ENOSYS /* pt_regs->ax */
- pushq $0 /* pt_regs->r8 = 0 */
- xorl %r8d, %r8d /* nospec r8 */
- pushq $0 /* pt_regs->r9 = 0 */
- xorl %r9d, %r9d /* nospec r9 */
- pushq $0 /* pt_regs->r10 = 0 */
- xorl %r10d, %r10d /* nospec r10 */
- pushq $0 /* pt_regs->r11 = 0 */
- xorl %r11d, %r11d /* nospec r11 */
- pushq %rbx /* pt_regs->rbx */
- xorl %ebx, %ebx /* nospec rbx */
- pushq %rbp /* pt_regs->rbp (will be overwritten) */
- xorl %ebp, %ebp /* nospec rbp */
- pushq $0 /* pt_regs->r12 = 0 */
- xorl %r12d, %r12d /* nospec r12 */
- pushq $0 /* pt_regs->r13 = 0 */
- xorl %r13d, %r13d /* nospec r13 */
- pushq $0 /* pt_regs->r14 = 0 */
- xorl %r14d, %r14d /* nospec r14 */
- pushq $0 /* pt_regs->r15 = 0 */
- xorl %r15d, %r15d /* nospec r15 */
-
+ PUSH_AND_CLEAR_REGS rax=$-ENOSYS
UNWIND_HINT_REGS
cld
+ IBRS_ENTER
+ UNTRAIN_RET
+
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
* ourselves. To save a few cycles, we can check whether
@@ -147,6 +128,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
popfq
jmp .Lsysenter_flags_fixed
SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR // is_sysenter_singlestep
SYM_CODE_END(entry_SYSENTER_compat)
/*
@@ -197,7 +179,8 @@ SYM_CODE_END(entry_SYSENTER_compat)
* 0(%esp) arg6
*/
SYM_CODE_START(entry_SYSCALL_compat)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
+ ENDBR
/* Interrupts are off on entry. */
swapgs
@@ -211,6 +194,7 @@ SYM_CODE_START(entry_SYSCALL_compat)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
/* Construct struct pt_regs on stack */
pushq $__USER32_DS /* pt_regs->ss */
@@ -221,37 +205,12 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
movl %eax, %eax /* discard orig_ax high bits */
pushq %rax /* pt_regs->orig_ax */
- pushq %rdi /* pt_regs->di */
- pushq %rsi /* pt_regs->si */
- xorl %esi, %esi /* nospec si */
- pushq %rdx /* pt_regs->dx */
- xorl %edx, %edx /* nospec dx */
- pushq %rbp /* pt_regs->cx (stashed in bp) */
- xorl %ecx, %ecx /* nospec cx */
- pushq $-ENOSYS /* pt_regs->ax */
- pushq $0 /* pt_regs->r8 = 0 */
- xorl %r8d, %r8d /* nospec r8 */
- pushq $0 /* pt_regs->r9 = 0 */
- xorl %r9d, %r9d /* nospec r9 */
- pushq $0 /* pt_regs->r10 = 0 */
- xorl %r10d, %r10d /* nospec r10 */
- pushq $0 /* pt_regs->r11 = 0 */
- xorl %r11d, %r11d /* nospec r11 */
- pushq %rbx /* pt_regs->rbx */
- xorl %ebx, %ebx /* nospec rbx */
- pushq %rbp /* pt_regs->rbp (will be overwritten) */
- xorl %ebp, %ebp /* nospec rbp */
- pushq $0 /* pt_regs->r12 = 0 */
- xorl %r12d, %r12d /* nospec r12 */
- pushq $0 /* pt_regs->r13 = 0 */
- xorl %r13d, %r13d /* nospec r13 */
- pushq $0 /* pt_regs->r14 = 0 */
- xorl %r14d, %r14d /* nospec r14 */
- pushq $0 /* pt_regs->r15 = 0 */
- xorl %r15d, %r15d /* nospec r15 */
-
+ PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS
UNWIND_HINT_REGS
+ IBRS_ENTER
+ UNTRAIN_RET
+
movq %rsp, %rdi
call do_fast_syscall_32
/* XEN PV guests always use IRET path */
@@ -266,6 +225,8 @@ sysret32_from_system_call:
*/
STACKLEAK_ERASE
+ IBRS_EXIT
+
movq RBX(%rsp), %rbx /* pt_regs->rbx */
movq RBP(%rsp), %rbp /* pt_regs->rbp */
movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
@@ -293,6 +254,8 @@ sysret32_from_system_call:
* code. We zero R8-R10 to avoid info leaks.
*/
movq RSP-ORIG_RAX(%rsp), %rsp
+SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
/*
* The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
@@ -310,6 +273,9 @@ sysret32_from_system_call:
xorl %r10d, %r10d
swapgs
sysretl
+SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
+ int3
SYM_CODE_END(entry_SYSCALL_compat)
/*
@@ -339,12 +305,13 @@ SYM_CODE_END(entry_SYSCALL_compat)
* ebp arg6
*/
SYM_CODE_START(entry_INT80_compat)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
+ ENDBR
/*
* Interrupts are off on entry.
*/
ASM_CLAC /* Do this early to minimize exposure */
- SWAPGS
+ ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV
/*
* User tracing code (ptrace or signal handlers) might assume that
@@ -357,58 +324,32 @@ SYM_CODE_START(entry_INT80_compat)
/* switch to thread stack expects orig_ax and rdi to be pushed */
pushq %rax /* pt_regs->orig_ax */
- pushq %rdi /* pt_regs->di */
/* Need to switch before accessing the thread stack. */
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
/* In the Xen PV case we already run on the thread stack. */
ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
- movq %rsp, %rdi
+ movq %rsp, %rax
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- pushq 6*8(%rdi) /* regs->ss */
- pushq 5*8(%rdi) /* regs->rsp */
- pushq 4*8(%rdi) /* regs->eflags */
- pushq 3*8(%rdi) /* regs->cs */
- pushq 2*8(%rdi) /* regs->ip */
- pushq 1*8(%rdi) /* regs->orig_ax */
- pushq (%rdi) /* pt_regs->di */
+ pushq 5*8(%rax) /* regs->ss */
+ pushq 4*8(%rax) /* regs->rsp */
+ pushq 3*8(%rax) /* regs->eflags */
+ pushq 2*8(%rax) /* regs->cs */
+ pushq 1*8(%rax) /* regs->ip */
+ pushq 0*8(%rax) /* regs->orig_ax */
.Lint80_keep_stack:
- pushq %rsi /* pt_regs->si */
- xorl %esi, %esi /* nospec si */
- pushq %rdx /* pt_regs->dx */
- xorl %edx, %edx /* nospec dx */
- pushq %rcx /* pt_regs->cx */
- xorl %ecx, %ecx /* nospec cx */
- pushq $-ENOSYS /* pt_regs->ax */
- pushq %r8 /* pt_regs->r8 */
- xorl %r8d, %r8d /* nospec r8 */
- pushq %r9 /* pt_regs->r9 */
- xorl %r9d, %r9d /* nospec r9 */
- pushq %r10 /* pt_regs->r10*/
- xorl %r10d, %r10d /* nospec r10 */
- pushq %r11 /* pt_regs->r11 */
- xorl %r11d, %r11d /* nospec r11 */
- pushq %rbx /* pt_regs->rbx */
- xorl %ebx, %ebx /* nospec rbx */
- pushq %rbp /* pt_regs->rbp */
- xorl %ebp, %ebp /* nospec rbp */
- pushq %r12 /* pt_regs->r12 */
- xorl %r12d, %r12d /* nospec r12 */
- pushq %r13 /* pt_regs->r13 */
- xorl %r13d, %r13d /* nospec r13 */
- pushq %r14 /* pt_regs->r14 */
- xorl %r14d, %r14d /* nospec r14 */
- pushq %r15 /* pt_regs->r15 */
- xorl %r15d, %r15d /* nospec r15 */
-
+ PUSH_AND_CLEAR_REGS rax=$-ENOSYS
UNWIND_HINT_REGS
cld
+ IBRS_ENTER
+ UNTRAIN_RET
+
movq %rsp, %rdi
call do_int80_syscall_32
jmp swapgs_restore_regs_and_return_to_usermode
diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
index 5b3efed0e4e8..eca5d6eff132 100644
--- a/arch/x86/entry/syscalls/Makefile
+++ b/arch/x86/entry/syscalls/Makefile
@@ -3,8 +3,7 @@ out := arch/$(SRCARCH)/include/generated/asm
uapi := arch/$(SRCARCH)/include/generated/uapi/asm
# Create output directory if not already present
-_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
- $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')
+$(shell mkdir -p $(out) $(uapi))
syscall32 := $(src)/syscall_32.tbl
syscall64 := $(src)/syscall_64.tbl
@@ -67,7 +66,7 @@ uapisyshdr-y += unistd_32.h unistd_64.h unistd_x32.h
syshdr-y += syscalls_32.h
syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h
syshdr-$(CONFIG_X86_64) += syscalls_64.h
-syshdr-$(CONFIG_X86_X32) += syscalls_x32.h
+syshdr-$(CONFIG_X86_X32_ABI) += syscalls_x32.h
syshdr-$(CONFIG_XEN) += xen-hypercalls.h
uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y))
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 7e25543693de..320480a8db4f 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -454,3 +454,4 @@
447 i386 memfd_secret sys_memfd_secret
448 i386 process_mrelease sys_process_mrelease
449 i386 futex_waitv sys_futex_waitv
+450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index fe8f8dd157b4..c84d12608cd2 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -371,6 +371,7 @@
447 common memfd_secret sys_memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
index f1f96d4d8cd6..ff6e7003da97 100644
--- a/arch/x86/entry/thunk_32.S
+++ b/arch/x86/entry/thunk_32.S
@@ -24,15 +24,13 @@ SYM_CODE_START_NOALIGN(\name)
popl %edx
popl %ecx
popl %eax
- ret
+ RET
_ASM_NOKPROBE(\name)
SYM_CODE_END(\name)
.endm
-#ifdef CONFIG_PREEMPTION
THUNK preempt_schedule_thunk, preempt_schedule
THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
EXPORT_SYMBOL(preempt_schedule_thunk)
EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
-#endif
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index 496b11ec469d..f38b07d2768b 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -31,14 +31,11 @@ SYM_FUNC_END(\name)
_ASM_NOKPROBE(\name)
.endm
-#ifdef CONFIG_PREEMPTION
THUNK preempt_schedule_thunk, preempt_schedule
THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
EXPORT_SYMBOL(preempt_schedule_thunk)
EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
-#endif
-#ifdef CONFIG_PREEMPTION
SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore)
popq %r11
popq %r10
@@ -50,7 +47,6 @@ SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore)
popq %rsi
popq %rdi
popq %rbp
- ret
+ RET
_ASM_NOKPROBE(__thunk_restore)
SYM_CODE_END(__thunk_restore)
-#endif
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index a2dddcc189f6..3e88b9df8c8f 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -11,6 +11,9 @@ include $(srctree)/lib/vdso/Makefile
# Sanitizer runtimes are unavailable and cannot be linked here.
KASAN_SANITIZE := n
+KMSAN_SANITIZE_vclock_gettime.o := n
+KMSAN_SANITIZE_vgetcpu.o := n
+
UBSAN_SANITIZE := n
KCSAN_SANITIZE := n
OBJECT_FILES_NON_STANDARD := y
@@ -91,7 +94,8 @@ ifneq ($(RETPOLINE_VDSO_CFLAGS),)
endif
endif
-$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
+$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
+$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO
#
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
@@ -148,9 +152,11 @@ KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out $(RANDSTRUCT_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_CFI),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
KBUILD_CFLAGS_32 += -fno-stack-protector
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
@@ -172,13 +178,13 @@ $(obj)/vdso32.so.dbg: $(obj)/vdso32/vdso32.lds $(vobjs32) FORCE
# The DSO images are built using a special linker script.
#
quiet_cmd_vdso = VDSO $@
- cmd_vdso = $(LD) -nostdlib -o $@ \
+ cmd_vdso = $(LD) -o $@ \
$(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
-T $(filter %.lds,$^) $(filter %.o,$^) && \
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 \
- $(call ld-option, --eh-frame-hdr) -Bsymbolic
+ $(call ld-option, --eh-frame-hdr) -Bsymbolic -z noexecstack
GCOV_PROFILE := n
quiet_cmd_vdso_and_check = VDSO $@
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index dc8da7695859..bafa73f09e92 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -77,7 +77,6 @@ SECTIONS
.text : {
*(.text*)
- *(.fixup)
} :text =0x90909090,
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index edfe9780f6d1..90d15f2a7205 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* vdso2c - A vdso image preparation tool
* Copyright (c) 2014 Andy Lutomirski and others
- * Licensed under the GPL v2
*
* vdso2c requires stripped and unstripped input. It would be trivial
* to fully strip the input in here, but, for reasons described below,
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 6ddd7a937b3e..d33c6513fd2c 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -78,7 +78,7 @@ SYM_INNER_LABEL(int80_landing_pad, SYM_L_GLOBAL)
popl %ecx
CFI_RESTORE ecx
CFI_ADJUST_CFA_OFFSET -4
- ret
+ RET
CFI_ENDPROC
.size __kernel_vsyscall,.-__kernel_vsyscall
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 235a5794296a..311eae30e089 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -127,17 +127,17 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
{
struct mm_struct *mm = task->mm;
struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, 0);
mmap_read_lock(mm);
-
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ for_each_vma(vmi, vma) {
unsigned long size = vma->vm_end - vma->vm_start;
if (vma_is_special_mapping(vma, &vvar_mapping))
zap_page_range(vma, vma->vm_start, size);
}
-
mmap_read_unlock(mm);
+
return 0;
}
#else
@@ -327,7 +327,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
end -= len;
if (end > start) {
- offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
+ offset = prandom_u32_max(((end - start) >> PAGE_SHIFT) + 1);
addr = start + (offset << PAGE_SHIFT);
} else {
addr = start;
@@ -354,6 +354,7 @@ int map_vdso_once(const struct vdso_image *image, unsigned long addr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, 0);
mmap_write_lock(mm);
/*
@@ -363,7 +364,7 @@ int map_vdso_once(const struct vdso_image *image, unsigned long addr)
* We could search vma near context.vdso, but it's a slowpath,
* so let's explicitly check all VMAs to be completely sure.
*/
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ for_each_vma(vmi, vma) {
if (vma_is_special_mapping(vma, &vdso_mapping) ||
vma_is_special_mapping(vma, &vvar_mapping)) {
mmap_write_unlock(mm);
@@ -438,7 +439,7 @@ bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
static __init int vdso_setup(char *s)
{
vdso64_enabled = simple_strtoul(s, NULL, 0);
- return 0;
+ return 1;
}
__setup("vdso=", vdso_setup);
diff --git a/arch/x86/entry/vdso/vsgx.S b/arch/x86/entry/vdso/vsgx.S
index 99dafac992e2..d77d278ee9dd 100644
--- a/arch/x86/entry/vdso/vsgx.S
+++ b/arch/x86/entry/vdso/vsgx.S
@@ -81,7 +81,7 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
pop %rbx
leave
.cfi_def_cfa %rsp, 8
- ret
+ RET
/* The out-of-line code runs with the pre-leave stack frame. */
.cfi_def_cfa %rbp, 16
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index fd2ee9408e91..4af81df133ee 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -48,7 +48,7 @@ static enum { EMULATE, XONLY, NONE } vsyscall_mode __ro_after_init =
#elif defined(CONFIG_LEGACY_VSYSCALL_XONLY)
XONLY;
#else
- EMULATE;
+ #error VSYSCALL config is broken
#endif
static int __init vsyscall_setup(char *str)
diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
index 2e203f3a25a7..ef2dd1827243 100644
--- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S
+++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
@@ -20,16 +20,19 @@ __vsyscall_page:
mov $__NR_gettimeofday, %rax
syscall
ret
+ int3
.balign 1024, 0xcc
mov $__NR_time, %rax
syscall
ret
+ int3
.balign 1024, 0xcc
mov $__NR_getcpu, %rax
syscall
ret
+ int3
.balign 4096, 0xcc