diff options
Diffstat (limited to 'arch/x86/entry')
-rw-r--r-- | arch/x86/entry/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/entry/calling.h | 81 | ||||
-rw-r--r-- | arch/x86/entry/entry.S | 22 | ||||
-rw-r--r-- | arch/x86/entry/entry_32.S | 45 | ||||
-rw-r--r-- | arch/x86/entry/entry_64.S | 168 | ||||
-rw-r--r-- | arch/x86/entry/entry_64_compat.S | 139 | ||||
-rw-r--r-- | arch/x86/entry/syscalls/Makefile | 5 | ||||
-rw-r--r-- | arch/x86/entry/syscalls/syscall_32.tbl | 1 | ||||
-rw-r--r-- | arch/x86/entry/syscalls/syscall_64.tbl | 1 | ||||
-rw-r--r-- | arch/x86/entry/thunk_32.S | 4 | ||||
-rw-r--r-- | arch/x86/entry/thunk_64.S | 6 | ||||
-rw-r--r-- | arch/x86/entry/vdso/Makefile | 12 | ||||
-rw-r--r-- | arch/x86/entry/vdso/vdso-layout.lds.S | 1 | ||||
-rw-r--r-- | arch/x86/entry/vdso/vdso2c.c | 2 | ||||
-rw-r--r-- | arch/x86/entry/vdso/vdso32/system_call.S | 2 | ||||
-rw-r--r-- | arch/x86/entry/vdso/vma.c | 13 | ||||
-rw-r--r-- | arch/x86/entry/vdso/vsgx.S | 2 | ||||
-rw-r--r-- | arch/x86/entry/vsyscall/vsyscall_64.c | 2 | ||||
-rw-r--r-- | arch/x86/entry/vsyscall/vsyscall_emu_64.S | 3 |
19 files changed, 307 insertions, 205 deletions
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 7fec5dcf6438..ca2fe186994b 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -11,12 +11,13 @@ CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) CFLAGS_common.o += -fno-stack-protector -obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o +obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o obj-y += common.o obj-y += vdso/ obj-y += vsyscall/ +obj-$(CONFIG_PREEMPTION) += thunk_$(BITS).o obj-$(CONFIG_IA32_EMULATION) += entry_64_compat.o syscall_32.o obj-$(CONFIG_X86_X32_ABI) += syscall_x32.o diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index a4c061fb7c6e..f6907627172b 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -7,6 +7,8 @@ #include <asm/asm-offsets.h> #include <asm/processor-flags.h> #include <asm/ptrace-abi.h> +#include <asm/msr.h> +#include <asm/nospec-branch.h> /* @@ -63,7 +65,7 @@ For 32-bit we have the following conventions - kernel is built with * for assembly code: */ -.macro PUSH_REGS rdx=%rdx rax=%rax save_ret=0 +.macro PUSH_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 .if \save_ret pushq %rsi /* pt_regs->si */ movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ @@ -73,7 +75,7 @@ For 32-bit we have the following conventions - kernel is built with pushq %rsi /* pt_regs->si */ .endif pushq \rdx /* pt_regs->dx */ - pushq %rcx /* pt_regs->cx */ + pushq \rcx /* pt_regs->cx */ pushq \rax /* pt_regs->ax */ pushq %r8 /* pt_regs->r8 */ pushq %r9 /* pt_regs->r9 */ @@ -99,6 +101,7 @@ For 32-bit we have the following conventions - kernel is built with * well before they could be put to use in a speculative execution * gadget. */ + xorl %esi, %esi /* nospec si */ xorl %edx, %edx /* nospec dx */ xorl %ecx, %ecx /* nospec cx */ xorl %r8d, %r8d /* nospec r8 */ @@ -114,32 +117,24 @@ For 32-bit we have the following conventions - kernel is built with .endm -.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 - PUSH_REGS rdx=\rdx, rax=\rax, save_ret=\save_ret +.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 + PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret CLEAR_REGS .endm -.macro POP_REGS pop_rdi=1 skip_r11rcx=0 +.macro POP_REGS pop_rdi=1 popq %r15 popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx - .if \skip_r11rcx - popq %rsi - .else popq %r11 - .endif popq %r10 popq %r9 popq %r8 popq %rax - .if \skip_r11rcx - popq %rsi - .else popq %rcx - .endif popq %rdx popq %rsi .if \pop_rdi @@ -290,6 +285,66 @@ For 32-bit we have the following conventions - kernel is built with #endif /* + * IBRS kernel mitigation for Spectre_v2. + * + * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers + * the regs it uses (AX, CX, DX). Must be called before the first RET + * instruction (NOTE! UNTRAIN_RET includes a RET instruction) + * + * The optional argument is used to save/restore the current value, + * which is used on the paranoid paths. + * + * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. + */ +.macro IBRS_ENTER save_reg +#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + rdmsr + shl $32, %rdx + or %rdx, %rax + mov %rax, \save_reg + test $SPEC_CTRL_IBRS, %eax + jz .Ldo_wrmsr_\@ + lfence + jmp .Lend_\@ +.Ldo_wrmsr_\@: +.endif + + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +#endif +.endm + +/* + * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX) + * regs. Must be called after the last RET. + */ +.macro IBRS_EXIT save_reg +#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + mov \save_reg, %rdx +.else + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + andl $(~SPEC_CTRL_IBRS), %edx +.endif + + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +#endif +.endm + +/* * Mitigate Spectre v1 for conditional swapgs code paths. * * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S new file mode 100644 index 000000000000..bfb7bcb362bc --- /dev/null +++ b/arch/x86/entry/entry.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common place for both 32- and 64-bit entry routines. + */ + +#include <linux/linkage.h> +#include <asm/export.h> +#include <asm/msr-index.h> + +.pushsection .noinstr.text, "ax" + +SYM_FUNC_START(entry_ibpb) + movl $MSR_IA32_PRED_CMD, %ecx + movl $PRED_CMD_IBPB, %eax + xorl %edx, %edx + wrmsr + RET +SYM_FUNC_END(entry_ibpb) +/* For KVM */ +EXPORT_SYMBOL_GPL(entry_ibpb); + +.popsection diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index ccb9d32768f3..e309e7156038 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -268,19 +268,16 @@ 1: popl %ds 2: popl %es 3: popl %fs - addl $(4 + \pop), %esp /* pop the unused "gs" slot */ +4: addl $(4 + \pop), %esp /* pop the unused "gs" slot */ IRET_FRAME -.pushsection .fixup, "ax" -4: movl $0, (%esp) - jmp 1b -5: movl $0, (%esp) - jmp 2b -6: movl $0, (%esp) - jmp 3b -.popsection - _ASM_EXTABLE(1b, 4b) - _ASM_EXTABLE(2b, 5b) - _ASM_EXTABLE(3b, 6b) + + /* + * There is no _ASM_EXTABLE_TYPE_REG() for ASM, however since this is + * ASM the registers are known and we can trivially hard-code them. + */ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_POP_ZERO|EX_REG_DS) + _ASM_EXTABLE_TYPE(2b, 3b, EX_TYPE_POP_ZERO|EX_REG_ES) + _ASM_EXTABLE_TYPE(3b, 4b, EX_TYPE_POP_ZERO|EX_REG_FS) .endm .macro RESTORE_ALL_NMI cr3_reg:req pop=0 @@ -701,7 +698,6 @@ SYM_CODE_START(__switch_to_asm) movl %ebx, PER_CPU_VAR(__stack_chk_guard) #endif -#ifdef CONFIG_RETPOLINE /* * When switching from a shallower to a deeper call stack * the RSB may either underflow or use entries populated @@ -710,7 +706,6 @@ SYM_CODE_START(__switch_to_asm) * speculative execution to prevent attack. */ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -#endif /* Restore flags or the incoming task to restore AC state. */ popfl @@ -740,7 +735,7 @@ SYM_FUNC_START(schedule_tail_wrapper) popl %eax FRAME_END - ret + RET SYM_FUNC_END(schedule_tail_wrapper) .popsection @@ -925,10 +920,8 @@ SYM_FUNC_START(entry_SYSENTER_32) sti sysexit -.pushsection .fixup, "ax" -2: movl $0, PT_FS(%esp) - jmp 1b -.popsection +2: movl $0, PT_FS(%esp) + jmp 1b _ASM_EXTABLE(1b, 2b) .Lsysenter_fix_flags: @@ -996,8 +989,7 @@ restore_all_switch_stack: */ iret -.section .fixup, "ax" -SYM_CODE_START(asm_iret_error) +.Lasm_iret_error: pushl $0 # no error code pushl $iret_error @@ -1014,9 +1006,8 @@ SYM_CODE_START(asm_iret_error) #endif jmp handle_exception -SYM_CODE_END(asm_iret_error) -.previous - _ASM_EXTABLE(.Lirq_return, asm_iret_error) + + _ASM_EXTABLE(.Lirq_return, .Lasm_iret_error) SYM_FUNC_END(entry_INT80_32) .macro FIXUP_ESPFIX_STACK @@ -1248,14 +1239,14 @@ SYM_CODE_START(asm_exc_nmi) SYM_CODE_END(asm_exc_nmi) .pushsection .text, "ax" -SYM_CODE_START(rewind_stack_do_exit) +SYM_CODE_START(rewind_stack_and_make_dead) /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp movl PER_CPU_VAR(cpu_current_top_of_stack), %esi leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp - call do_exit + call make_task_dead 1: jmp 1b -SYM_CODE_END(rewind_stack_do_exit) +SYM_CODE_END(rewind_stack_and_make_dead) .popsection diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 97b1f84bb53f..9953d966d124 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -85,7 +85,8 @@ */ SYM_CODE_START(entry_SYSCALL_64) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY + ENDBR swapgs /* tss.sp2 is scratch space. */ @@ -94,6 +95,7 @@ SYM_CODE_START(entry_SYSCALL_64) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ @@ -110,6 +112,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) movq %rsp, %rdi /* Sign extend the lower 32bit as syscall numbers are treated as int */ movslq %eax, %rsi + + /* clobbers %rax, make sure it is after saving the syscall nr */ + IBRS_ENTER + UNTRAIN_RET + call do_syscall_64 /* returns with IRQs disabled */ /* @@ -189,8 +196,8 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) * perf profiles. Nothing jumps here. */ syscall_return_via_sysret: - /* rcx and r11 are already restored (see code above) */ - POP_REGS pop_rdi=0 skip_r11rcx=1 + IBRS_EXIT + POP_REGS pop_rdi=0 /* * Now all regs are restored except RSP and RDI. @@ -213,8 +220,13 @@ syscall_return_via_sysret: popq %rdi popq %rsp +SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR swapgs sysretq +SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + int3 SYM_CODE_END(entry_SYSCALL_64) /* @@ -243,7 +255,6 @@ SYM_FUNC_START(__switch_to_asm) movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset #endif -#ifdef CONFIG_RETPOLINE /* * When switching from a shallower to a deeper call stack * the RSB may either underflow or use entries populated @@ -252,7 +263,6 @@ SYM_FUNC_START(__switch_to_asm) * speculative execution to prevent attack. */ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -#endif /* restore callee-saved registers */ popq %r15 @@ -276,6 +286,7 @@ SYM_FUNC_END(__switch_to_asm) .pushsection .text, "ax" SYM_CODE_START(ret_from_fork) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR // copy_thread movq %rax, %rdi call schedule_tail /* rdi: 'prev' task parameter */ @@ -315,6 +326,14 @@ SYM_CODE_END(ret_from_fork) #endif .endm +SYM_CODE_START_LOCAL(xen_error_entry) + UNWIND_HINT_FUNC + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 + UNTRAIN_RET + RET +SYM_CODE_END(xen_error_entry) + /** * idtentry_body - Macro to emit code calling the C function * @cfunc: C function to be called @@ -322,7 +341,18 @@ SYM_CODE_END(ret_from_fork) */ .macro idtentry_body cfunc has_error_code:req - call error_entry + /* + * Call error_entry() and switch to the task stack if from userspace. + * + * When in XENPV, it is already in the task stack, and it can't fault + * for native_iret() nor native_load_gs_index() since XENPV uses its + * own pvops for IRET and load_gs_index(). And it doesn't need to + * switch the CR3. So it can skip invoking error_entry(). + */ + ALTERNATIVE "call error_entry; movq %rax, %rsp", \ + "call xen_error_entry", X86_FEATURE_XENPV + + ENCODE_FRAME_POINTER UNWIND_HINT_REGS movq %rsp, %rdi /* pt_regs pointer into 1st argument*/ @@ -334,6 +364,9 @@ SYM_CODE_END(ret_from_fork) call \cfunc + /* For some configurations \cfunc ends up being a noreturn. */ + REACHABLE + jmp error_return .endm @@ -350,7 +383,9 @@ SYM_CODE_END(ret_from_fork) .macro idtentry vector asmsym cfunc has_error_code:req SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 + ENDBR ASM_CLAC + cld .if \has_error_code == 0 pushq $-1 /* ORIG_RAX: no syscall to restart */ @@ -417,7 +452,9 @@ SYM_CODE_END(\asmsym) .macro idtentry_mce_db vector asmsym cfunc SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS + ENDBR ASM_CLAC + cld pushq $-1 /* ORIG_RAX: no syscall to restart */ @@ -472,7 +509,9 @@ SYM_CODE_END(\asmsym) .macro idtentry_vc vector asmsym cfunc SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS + ENDBR ASM_CLAC + cld /* * If the entry is from userspace, switch stacks and treat it as @@ -499,6 +538,7 @@ SYM_CODE_START(\asmsym) call vc_switch_off_ist movq %rax, %rsp /* Switch to new stack */ + ENCODE_FRAME_POINTER UNWIND_HINT_REGS /* Update pt_regs */ @@ -533,7 +573,9 @@ SYM_CODE_END(\asmsym) .macro idtentry_df vector asmsym cfunc SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS offset=8 + ENDBR ASM_CLAC + cld /* paranoid_entry returns GS information for paranoid_exit in EBX. */ call paranoid_entry @@ -544,6 +586,9 @@ SYM_CODE_START(\asmsym) movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ call \cfunc + /* For some configurations \cfunc ends up being a noreturn. */ + REACHABLE + jmp paranoid_exit _ASM_NOKPROBE(\asmsym) @@ -564,9 +609,11 @@ __irqentry_text_start: .align 16 .globl __irqentry_text_end __irqentry_text_end: + ANNOTATE_NOENDBR SYM_CODE_START_LOCAL(common_interrupt_return) SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) + IBRS_EXIT #ifdef CONFIG_DEBUG_ENTRY /* Assert that pt_regs indicates user mode. */ testb $3, CS(%rsp) @@ -608,8 +655,8 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) /* Restore RDI. */ popq %rdi - SWAPGS - INTERRUPT_RETURN + swapgs + jmp .Lnative_iret SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL) @@ -626,9 +673,14 @@ SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL) * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization * when returning from IPI handler. */ - INTERRUPT_RETURN +#ifdef CONFIG_XEN_PV +SYM_INNER_LABEL(early_xen_iret_patch, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + .byte 0xe9 + .long .Lnative_iret - (. + 4) +#endif -SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL) +.Lnative_iret: UNWIND_HINT_IRET_REGS /* * Are we returning to a stack segment from the LDT? Note: in @@ -640,6 +692,7 @@ SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL) #endif SYM_INNER_LABEL(native_irq_return_iret, SYM_L_GLOBAL) + ANNOTATE_NOENDBR // exc_double_fault /* * This may fault. Non-paranoid faults on return to userspace are * handled by fixup_bad_iret. These include #SS, #GP, and #NP. @@ -734,18 +787,15 @@ SYM_FUNC_START(asm_load_gs_index) FRAME_BEGIN swapgs .Lgs_change: + ANNOTATE_NOENDBR // error_entry movl %edi, %gs 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE swapgs FRAME_END - ret -SYM_FUNC_END(asm_load_gs_index) -EXPORT_SYMBOL(asm_load_gs_index) + RET - _ASM_EXTABLE(.Lgs_change, .Lbad_gs) - .section .fixup, "ax" /* running with kernelgs */ -SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs) +.Lbad_gs: swapgs /* switch back to user gs */ .macro ZAP_GS /* This can't be a string because the preprocessor needs to see it. */ @@ -756,8 +806,11 @@ SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs) xorl %eax, %eax movl %eax, %gs jmp 2b -SYM_CODE_END(.Lbad_gs) - .previous + + _ASM_EXTABLE(.Lgs_change, .Lbad_gs) + +SYM_FUNC_END(asm_load_gs_index) +EXPORT_SYMBOL(asm_load_gs_index) #ifdef CONFIG_XEN_PV /* @@ -805,6 +858,7 @@ SYM_CODE_END(exc_xen_hypervisor_callback) */ SYM_CODE_START(xen_failsafe_callback) UNWIND_HINT_EMPTY + ENDBR movl %ds, %ecx cmpw %cx, 0x10(%rsp) jne 1f @@ -845,10 +899,12 @@ SYM_CODE_END(xen_failsafe_callback) * 1 -> no SWAPGS on exit * * Y GSBASE value at entry, must be restored in paranoid_exit + * + * R14 - old CR3 + * R15 - old SPEC_CTRL */ SYM_CODE_START_LOCAL(paranoid_entry) UNWIND_HINT_FUNC - cld PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 @@ -889,7 +945,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) * is needed here. */ SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx - ret + jmp .Lparanoid_gsbase_done .Lparanoid_entry_checkgs: /* EBX = 1 -> kernel GSBASE active, no restore required */ @@ -908,9 +964,17 @@ SYM_CODE_START_LOCAL(paranoid_entry) xorl %ebx, %ebx swapgs .Lparanoid_kernel_gsbase: - FENCE_SWAPGS_KERNEL_ENTRY - ret +.Lparanoid_gsbase_done: + + /* + * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like + * CR3 above, keep the old value in a callee saved register. + */ + IBRS_ENTER save_reg=%r15 + UNTRAIN_RET + + RET SYM_CODE_END(paranoid_entry) /* @@ -931,9 +995,19 @@ SYM_CODE_END(paranoid_entry) * 1 -> no SWAPGS on exit * * Y User space GSBASE, must be restored unconditionally + * + * R14 - old CR3 + * R15 - old SPEC_CTRL */ SYM_CODE_START_LOCAL(paranoid_exit) UNWIND_HINT_REGS + + /* + * Must restore IBRS state before both CR3 and %GS since we need access + * to the per-CPU x86_spec_ctrl_shadow variable. + */ + IBRS_EXIT save_reg=%r15 + /* * The order of operations is important. RESTORE_CR3 requires * kernel GSBASE. @@ -962,13 +1036,14 @@ SYM_CODE_START_LOCAL(paranoid_exit) SYM_CODE_END(paranoid_exit) /* - * Save all registers in pt_regs, and switch GS if needed. + * Switch GS and CR3 if needed. */ SYM_CODE_START_LOCAL(error_entry) UNWIND_HINT_FUNC - cld + PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 + testb $3, CS+8(%rsp) jz .Lerror_kernelspace @@ -976,20 +1051,19 @@ SYM_CODE_START_LOCAL(error_entry) * We entered from user mode or we're pretending to have entered * from user mode due to an IRET fault. */ - SWAPGS + swapgs FENCE_SWAPGS_USER_ENTRY /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER + UNTRAIN_RET + leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ .Lerror_entry_from_usermode_after_swapgs: + /* Put us onto the real thread stack. */ - popq %r12 /* save return addr in %12 */ - movq %rsp, %rdi /* arg0 = pt_regs pointer */ call sync_regs - movq %rax, %rsp /* switch stack */ - ENCODE_FRAME_POINTER - pushq %r12 - ret + RET /* * There are two places in the kernel that can potentially fault with @@ -1012,7 +1086,7 @@ SYM_CODE_START_LOCAL(error_entry) * gsbase and proceed. We'll fix up the exception and land in * .Lgs_change's error handler with kernel gsbase. */ - SWAPGS + swapgs /* * Issue an LFENCE to prevent GS speculation, regardless of whether it is a @@ -1020,7 +1094,9 @@ SYM_CODE_START_LOCAL(error_entry) */ .Lerror_entry_done_lfence: FENCE_SWAPGS_KERNEL_ENTRY - ret + leaq 8(%rsp), %rax /* return pt_regs pointer */ + ANNOTATE_UNRET_END + RET .Lbstep_iret: /* Fix truncated RIP */ @@ -1032,17 +1108,19 @@ SYM_CODE_START_LOCAL(error_entry) * We came from an IRET to user mode, so we have user * gsbase and CR3. Switch to kernel gsbase and CR3: */ - SWAPGS + swapgs FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER + UNTRAIN_RET /* * Pretend that the exception came from user mode: set up pt_regs * as if we faulted immediately after IRET. */ - mov %rsp, %rdi + leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ call fixup_bad_iret - mov %rax, %rsp + mov %rax, %rdi jmp .Lerror_entry_from_usermode_after_swapgs SYM_CODE_END(error_entry) @@ -1064,6 +1142,7 @@ SYM_CODE_END(error_return) */ SYM_CODE_START(asm_exc_nmi) UNWIND_HINT_IRET_REGS + ENDBR /* * We allow breakpoints in NMIs. If a breakpoint occurs, then @@ -1104,6 +1183,7 @@ SYM_CODE_START(asm_exc_nmi) */ ASM_CLAC + cld /* Use %rdx as our temp variable throughout */ pushq %rdx @@ -1123,7 +1203,6 @@ SYM_CODE_START(asm_exc_nmi) */ swapgs - cld FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx movq %rsp, %rdx @@ -1139,6 +1218,9 @@ SYM_CODE_START(asm_exc_nmi) PUSH_AND_CLEAR_REGS rdx=(%rdx) ENCODE_FRAME_POINTER + IBRS_ENTER + UNTRAIN_RET + /* * At this point we no longer need to worry about stack damage * due to nesting -- we're on the normal thread stack and we're @@ -1311,6 +1393,7 @@ first_nmi: #endif repeat_nmi: + ANNOTATE_NOENDBR // this code /* * If there was a nested NMI, the first NMI's iret will return * here. But NMIs are still enabled and we can take another @@ -1339,6 +1422,7 @@ repeat_nmi: .endr subq $(5*8), %rsp end_repeat_nmi: + ANNOTATE_NOENDBR // this code /* * Everything below this point can be preempted by a nested NMI. @@ -1361,6 +1445,9 @@ end_repeat_nmi: movq $-1, %rsi call exc_nmi + /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */ + IBRS_EXIT save_reg=%r15 + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 @@ -1422,13 +1509,14 @@ SYM_CODE_END(asm_exc_nmi) */ SYM_CODE_START(ignore_sysret) UNWIND_HINT_EMPTY + ENDBR mov $-ENOSYS, %eax sysretl SYM_CODE_END(ignore_sysret) #endif .pushsection .text, "ax" -SYM_CODE_START(rewind_stack_do_exit) +SYM_CODE_START(rewind_stack_and_make_dead) UNWIND_HINT_FUNC /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp @@ -1437,6 +1525,6 @@ SYM_CODE_START(rewind_stack_do_exit) leaq -PTREGS_SIZE(%rax), %rsp UNWIND_HINT_REGS - call do_exit -SYM_CODE_END(rewind_stack_do_exit) + call make_task_dead +SYM_CODE_END(rewind_stack_and_make_dead) .popsection diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 0051cf5c792d..4dd19819053a 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -4,7 +4,6 @@ * * Copyright 2000-2002 Andi Kleen, SuSE Labs. */ -#include "calling.h" #include <asm/asm-offsets.h> #include <asm/current.h> #include <asm/errno.h> @@ -14,9 +13,12 @@ #include <asm/irqflags.h> #include <asm/asm.h> #include <asm/smap.h> +#include <asm/nospec-branch.h> #include <linux/linkage.h> #include <linux/err.h> +#include "calling.h" + .section .entry.text, "ax" /* @@ -47,9 +49,10 @@ * 0(%ebp) arg6 */ SYM_CODE_START(entry_SYSENTER_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY + ENDBR /* Interrupts are off on entry. */ - SWAPGS + swapgs pushq %rax SWITCH_TO_KERNEL_CR3 scratch_reg=%rax @@ -82,36 +85,14 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) movl %eax, %eax pushq %rax /* pt_regs->orig_ax */ - pushq %rdi /* pt_regs->di */ - pushq %rsi /* pt_regs->si */ - pushq %rdx /* pt_regs->dx */ - pushq %rcx /* pt_regs->cx */ - pushq $-ENOSYS /* pt_regs->ax */ - pushq $0 /* pt_regs->r8 = 0 */ - xorl %r8d, %r8d /* nospec r8 */ - pushq $0 /* pt_regs->r9 = 0 */ - xorl %r9d, %r9d /* nospec r9 */ - pushq $0 /* pt_regs->r10 = 0 */ - xorl %r10d, %r10d /* nospec r10 */ - pushq $0 /* pt_regs->r11 = 0 */ - xorl %r11d, %r11d /* nospec r11 */ - pushq %rbx /* pt_regs->rbx */ - xorl %ebx, %ebx /* nospec rbx */ - pushq %rbp /* pt_regs->rbp (will be overwritten) */ - xorl %ebp, %ebp /* nospec rbp */ - pushq $0 /* pt_regs->r12 = 0 */ - xorl %r12d, %r12d /* nospec r12 */ - pushq $0 /* pt_regs->r13 = 0 */ - xorl %r13d, %r13d /* nospec r13 */ - pushq $0 /* pt_regs->r14 = 0 */ - xorl %r14d, %r14d /* nospec r14 */ - pushq $0 /* pt_regs->r15 = 0 */ - xorl %r15d, %r15d /* nospec r15 */ - + PUSH_AND_CLEAR_REGS rax=$-ENOSYS UNWIND_HINT_REGS cld + IBRS_ENTER + UNTRAIN_RET + /* * SYSENTER doesn't filter flags, so we need to clear NT and AC * ourselves. To save a few cycles, we can check whether @@ -147,6 +128,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) popfq jmp .Lsysenter_flags_fixed SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL) + ANNOTATE_NOENDBR // is_sysenter_singlestep SYM_CODE_END(entry_SYSENTER_compat) /* @@ -197,7 +179,8 @@ SYM_CODE_END(entry_SYSENTER_compat) * 0(%esp) arg6 */ SYM_CODE_START(entry_SYSCALL_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY + ENDBR /* Interrupts are off on entry. */ swapgs @@ -211,6 +194,7 @@ SYM_CODE_START(entry_SYSCALL_compat) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ @@ -221,37 +205,12 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) movl %eax, %eax /* discard orig_ax high bits */ pushq %rax /* pt_regs->orig_ax */ - pushq %rdi /* pt_regs->di */ - pushq %rsi /* pt_regs->si */ - xorl %esi, %esi /* nospec si */ - pushq %rdx /* pt_regs->dx */ - xorl %edx, %edx /* nospec dx */ - pushq %rbp /* pt_regs->cx (stashed in bp) */ - xorl %ecx, %ecx /* nospec cx */ - pushq $-ENOSYS /* pt_regs->ax */ - pushq $0 /* pt_regs->r8 = 0 */ - xorl %r8d, %r8d /* nospec r8 */ - pushq $0 /* pt_regs->r9 = 0 */ - xorl %r9d, %r9d /* nospec r9 */ - pushq $0 /* pt_regs->r10 = 0 */ - xorl %r10d, %r10d /* nospec r10 */ - pushq $0 /* pt_regs->r11 = 0 */ - xorl %r11d, %r11d /* nospec r11 */ - pushq %rbx /* pt_regs->rbx */ - xorl %ebx, %ebx /* nospec rbx */ - pushq %rbp /* pt_regs->rbp (will be overwritten) */ - xorl %ebp, %ebp /* nospec rbp */ - pushq $0 /* pt_regs->r12 = 0 */ - xorl %r12d, %r12d /* nospec r12 */ - pushq $0 /* pt_regs->r13 = 0 */ - xorl %r13d, %r13d /* nospec r13 */ - pushq $0 /* pt_regs->r14 = 0 */ - xorl %r14d, %r14d /* nospec r14 */ - pushq $0 /* pt_regs->r15 = 0 */ - xorl %r15d, %r15d /* nospec r15 */ - + PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS UNWIND_HINT_REGS + IBRS_ENTER + UNTRAIN_RET + movq %rsp, %rdi call do_fast_syscall_32 /* XEN PV guests always use IRET path */ @@ -266,6 +225,8 @@ sysret32_from_system_call: */ STACKLEAK_ERASE + IBRS_EXIT + movq RBX(%rsp), %rbx /* pt_regs->rbx */ movq RBP(%rsp), %rbp /* pt_regs->rbp */ movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ @@ -293,6 +254,8 @@ sysret32_from_system_call: * code. We zero R8-R10 to avoid info leaks. */ movq RSP-ORIG_RAX(%rsp), %rsp +SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored @@ -310,6 +273,9 @@ sysret32_from_system_call: xorl %r10d, %r10d swapgs sysretl +SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + int3 SYM_CODE_END(entry_SYSCALL_compat) /* @@ -339,12 +305,13 @@ SYM_CODE_END(entry_SYSCALL_compat) * ebp arg6 */ SYM_CODE_START(entry_INT80_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY + ENDBR /* * Interrupts are off on entry. */ ASM_CLAC /* Do this early to minimize exposure */ - SWAPGS + ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV /* * User tracing code (ptrace or signal handlers) might assume that @@ -357,58 +324,32 @@ SYM_CODE_START(entry_INT80_compat) /* switch to thread stack expects orig_ax and rdi to be pushed */ pushq %rax /* pt_regs->orig_ax */ - pushq %rdi /* pt_regs->di */ /* Need to switch before accessing the thread stack. */ - SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax /* In the Xen PV case we already run on the thread stack. */ ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV - movq %rsp, %rdi + movq %rsp, %rax movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - pushq 6*8(%rdi) /* regs->ss */ - pushq 5*8(%rdi) /* regs->rsp */ - pushq 4*8(%rdi) /* regs->eflags */ - pushq 3*8(%rdi) /* regs->cs */ - pushq 2*8(%rdi) /* regs->ip */ - pushq 1*8(%rdi) /* regs->orig_ax */ - pushq (%rdi) /* pt_regs->di */ + pushq 5*8(%rax) /* regs->ss */ + pushq 4*8(%rax) /* regs->rsp */ + pushq 3*8(%rax) /* regs->eflags */ + pushq 2*8(%rax) /* regs->cs */ + pushq 1*8(%rax) /* regs->ip */ + pushq 0*8(%rax) /* regs->orig_ax */ .Lint80_keep_stack: - pushq %rsi /* pt_regs->si */ - xorl %esi, %esi /* nospec si */ - pushq %rdx /* pt_regs->dx */ - xorl %edx, %edx /* nospec dx */ - pushq %rcx /* pt_regs->cx */ - xorl %ecx, %ecx /* nospec cx */ - pushq $-ENOSYS /* pt_regs->ax */ - pushq %r8 /* pt_regs->r8 */ - xorl %r8d, %r8d /* nospec r8 */ - pushq %r9 /* pt_regs->r9 */ - xorl %r9d, %r9d /* nospec r9 */ - pushq %r10 /* pt_regs->r10*/ - xorl %r10d, %r10d /* nospec r10 */ - pushq %r11 /* pt_regs->r11 */ - xorl %r11d, %r11d /* nospec r11 */ - pushq %rbx /* pt_regs->rbx */ - xorl %ebx, %ebx /* nospec rbx */ - pushq %rbp /* pt_regs->rbp */ - xorl %ebp, %ebp /* nospec rbp */ - pushq %r12 /* pt_regs->r12 */ - xorl %r12d, %r12d /* nospec r12 */ - pushq %r13 /* pt_regs->r13 */ - xorl %r13d, %r13d /* nospec r13 */ - pushq %r14 /* pt_regs->r14 */ - xorl %r14d, %r14d /* nospec r14 */ - pushq %r15 /* pt_regs->r15 */ - xorl %r15d, %r15d /* nospec r15 */ - + PUSH_AND_CLEAR_REGS rax=$-ENOSYS UNWIND_HINT_REGS cld + IBRS_ENTER + UNTRAIN_RET + movq %rsp, %rdi call do_int80_syscall_32 jmp swapgs_restore_regs_and_return_to_usermode diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile index 5b3efed0e4e8..eca5d6eff132 100644 --- a/arch/x86/entry/syscalls/Makefile +++ b/arch/x86/entry/syscalls/Makefile @@ -3,8 +3,7 @@ out := arch/$(SRCARCH)/include/generated/asm uapi := arch/$(SRCARCH)/include/generated/uapi/asm # Create output directory if not already present -_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \ - $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') +$(shell mkdir -p $(out) $(uapi)) syscall32 := $(src)/syscall_32.tbl syscall64 := $(src)/syscall_64.tbl @@ -67,7 +66,7 @@ uapisyshdr-y += unistd_32.h unistd_64.h unistd_x32.h syshdr-y += syscalls_32.h syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h syshdr-$(CONFIG_X86_64) += syscalls_64.h -syshdr-$(CONFIG_X86_X32) += syscalls_x32.h +syshdr-$(CONFIG_X86_X32_ABI) += syscalls_x32.h syshdr-$(CONFIG_XEN) += xen-hypercalls.h uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y)) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 7e25543693de..320480a8db4f 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -454,3 +454,4 @@ 447 i386 memfd_secret sys_memfd_secret 448 i386 process_mrelease sys_process_mrelease 449 i386 futex_waitv sys_futex_waitv +450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index fe8f8dd157b4..c84d12608cd2 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -371,6 +371,7 @@ 447 common memfd_secret sys_memfd_secret 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv +450 common set_mempolicy_home_node sys_set_mempolicy_home_node # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S index f1f96d4d8cd6..ff6e7003da97 100644 --- a/arch/x86/entry/thunk_32.S +++ b/arch/x86/entry/thunk_32.S @@ -24,15 +24,13 @@ SYM_CODE_START_NOALIGN(\name) popl %edx popl %ecx popl %eax - ret + RET _ASM_NOKPROBE(\name) SYM_CODE_END(\name) .endm -#ifdef CONFIG_PREEMPTION THUNK preempt_schedule_thunk, preempt_schedule THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace EXPORT_SYMBOL(preempt_schedule_thunk) EXPORT_SYMBOL(preempt_schedule_notrace_thunk) -#endif diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index 496b11ec469d..f38b07d2768b 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -31,14 +31,11 @@ SYM_FUNC_END(\name) _ASM_NOKPROBE(\name) .endm -#ifdef CONFIG_PREEMPTION THUNK preempt_schedule_thunk, preempt_schedule THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace EXPORT_SYMBOL(preempt_schedule_thunk) EXPORT_SYMBOL(preempt_schedule_notrace_thunk) -#endif -#ifdef CONFIG_PREEMPTION SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore) popq %r11 popq %r10 @@ -50,7 +47,6 @@ SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore) popq %rsi popq %rdi popq %rbp - ret + RET _ASM_NOKPROBE(__thunk_restore) SYM_CODE_END(__thunk_restore) -#endif diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index a2dddcc189f6..3e88b9df8c8f 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -11,6 +11,9 @@ include $(srctree)/lib/vdso/Makefile # Sanitizer runtimes are unavailable and cannot be linked here. KASAN_SANITIZE := n +KMSAN_SANITIZE_vclock_gettime.o := n +KMSAN_SANITIZE_vgetcpu.o := n + UBSAN_SANITIZE := n KCSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y @@ -91,7 +94,8 @@ ifneq ($(RETPOLINE_VDSO_CFLAGS),) endif endif -$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) +$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) +$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO # # vDSO code runs in userspace and -pg doesn't help with profiling anyway. @@ -148,9 +152,11 @@ KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(RANDSTRUCT_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_CFI),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic KBUILD_CFLAGS_32 += -fno-stack-protector KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) @@ -172,13 +178,13 @@ $(obj)/vdso32.so.dbg: $(obj)/vdso32/vdso32.lds $(vobjs32) FORCE # The DSO images are built using a special linker script. # quiet_cmd_vdso = VDSO $@ - cmd_vdso = $(LD) -nostdlib -o $@ \ + cmd_vdso = $(LD) -o $@ \ $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ -T $(filter %.lds,$^) $(filter %.o,$^) && \ sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 \ - $(call ld-option, --eh-frame-hdr) -Bsymbolic + $(call ld-option, --eh-frame-hdr) -Bsymbolic -z noexecstack GCOV_PROFILE := n quiet_cmd_vdso_and_check = VDSO $@ diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S index dc8da7695859..bafa73f09e92 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -77,7 +77,6 @@ SECTIONS .text : { *(.text*) - *(.fixup) } :text =0x90909090, diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c index edfe9780f6d1..90d15f2a7205 100644 --- a/arch/x86/entry/vdso/vdso2c.c +++ b/arch/x86/entry/vdso/vdso2c.c @@ -1,7 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * vdso2c - A vdso image preparation tool * Copyright (c) 2014 Andy Lutomirski and others - * Licensed under the GPL v2 * * vdso2c requires stripped and unstripped input. It would be trivial * to fully strip the input in here, but, for reasons described below, diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index 6ddd7a937b3e..d33c6513fd2c 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -78,7 +78,7 @@ SYM_INNER_LABEL(int80_landing_pad, SYM_L_GLOBAL) popl %ecx CFI_RESTORE ecx CFI_ADJUST_CFA_OFFSET -4 - ret + RET CFI_ENDPROC .size __kernel_vsyscall,.-__kernel_vsyscall diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 235a5794296a..311eae30e089 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -127,17 +127,17 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) { struct mm_struct *mm = task->mm; struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); mmap_read_lock(mm); - - for (vma = mm->mmap; vma; vma = vma->vm_next) { + for_each_vma(vmi, vma) { unsigned long size = vma->vm_end - vma->vm_start; if (vma_is_special_mapping(vma, &vvar_mapping)) zap_page_range(vma, vma->vm_start, size); } - mmap_read_unlock(mm); + return 0; } #else @@ -327,7 +327,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) end -= len; if (end > start) { - offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); + offset = prandom_u32_max(((end - start) >> PAGE_SHIFT) + 1); addr = start + (offset << PAGE_SHIFT); } else { addr = start; @@ -354,6 +354,7 @@ int map_vdso_once(const struct vdso_image *image, unsigned long addr) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); mmap_write_lock(mm); /* @@ -363,7 +364,7 @@ int map_vdso_once(const struct vdso_image *image, unsigned long addr) * We could search vma near context.vdso, but it's a slowpath, * so let's explicitly check all VMAs to be completely sure. */ - for (vma = mm->mmap; vma; vma = vma->vm_next) { + for_each_vma(vmi, vma) { if (vma_is_special_mapping(vma, &vdso_mapping) || vma_is_special_mapping(vma, &vvar_mapping)) { mmap_write_unlock(mm); @@ -438,7 +439,7 @@ bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs) static __init int vdso_setup(char *s) { vdso64_enabled = simple_strtoul(s, NULL, 0); - return 0; + return 1; } __setup("vdso=", vdso_setup); diff --git a/arch/x86/entry/vdso/vsgx.S b/arch/x86/entry/vdso/vsgx.S index 99dafac992e2..d77d278ee9dd 100644 --- a/arch/x86/entry/vdso/vsgx.S +++ b/arch/x86/entry/vdso/vsgx.S @@ -81,7 +81,7 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave) pop %rbx leave .cfi_def_cfa %rsp, 8 - ret + RET /* The out-of-line code runs with the pre-leave stack frame. */ .cfi_def_cfa %rbp, 16 diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index fd2ee9408e91..4af81df133ee 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -48,7 +48,7 @@ static enum { EMULATE, XONLY, NONE } vsyscall_mode __ro_after_init = #elif defined(CONFIG_LEGACY_VSYSCALL_XONLY) XONLY; #else - EMULATE; + #error VSYSCALL config is broken #endif static int __init vsyscall_setup(char *str) diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S index 2e203f3a25a7..ef2dd1827243 100644 --- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S +++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S @@ -20,16 +20,19 @@ __vsyscall_page: mov $__NR_gettimeofday, %rax syscall ret + int3 .balign 1024, 0xcc mov $__NR_time, %rax syscall ret + int3 .balign 1024, 0xcc mov $__NR_getcpu, %rax syscall ret + int3 .balign 4096, 0xcc |