diff options
Diffstat (limited to 'arch/x86/entry')
-rw-r--r-- | arch/x86/entry/calling.h | 2 | ||||
-rw-r--r-- | arch/x86/entry/common.c | 4 | ||||
-rw-r--r-- | arch/x86/entry/entry_32.S | 422 | ||||
-rw-r--r-- | arch/x86/entry/entry_64.S | 112 | ||||
-rw-r--r-- | arch/x86/entry/entry_64_compat.S | 16 | ||||
-rw-r--r-- | arch/x86/entry/syscall_32.c | 8 | ||||
-rw-r--r-- | arch/x86/entry/syscall_64.c | 14 | ||||
-rw-r--r-- | arch/x86/entry/syscalls/syscall_32.tbl | 8 | ||||
-rw-r--r-- | arch/x86/entry/thunk_32.S | 4 | ||||
-rw-r--r-- | arch/x86/entry/thunk_64.S | 7 | ||||
-rw-r--r-- | arch/x86/entry/vdso/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/entry/vdso/vclock_gettime.c | 6 | ||||
-rw-r--r-- | arch/x86/entry/vdso/vdso32/system_call.S | 2 | ||||
-rw-r--r-- | arch/x86/entry/vsyscall/vsyscall_64.c | 6 |
14 files changed, 361 insertions, 252 deletions
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 515c0ceeb4a3..0789e13ece90 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -354,7 +354,7 @@ For 32-bit we have the following conventions - kernel is built with .macro CALL_enter_from_user_mode #ifdef CONFIG_CONTEXT_TRACKING #ifdef CONFIG_JUMP_LABEL - STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0 + STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_key, def=0 #endif call enter_from_user_mode .Lafter_call_\@: diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 3f8e22615812..9747876980b5 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -33,6 +33,7 @@ #include <asm/cpufeature.h> #include <asm/fpu/api.h> #include <asm/nospec-branch.h> +#include <asm/io_bitmap.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> @@ -196,6 +197,9 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) /* Reload ti->flags; we may have rescheduled above. */ cached_flags = READ_ONCE(ti->flags); + if (unlikely(cached_flags & _TIF_IO_BITMAP)) + tss_update_io_bitmap(); + fpregs_assert_state_consistent(); if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD)) switch_fpu_return(); diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index f83ca5aa8b77..7e0560442538 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -172,7 +172,7 @@ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI .if \no_user_check == 0 /* coming from usermode? */ - testl $SEGMENT_RPL_MASK, PT_CS(%esp) + testl $USER_SEGMENT_RPL_MASK, PT_CS(%esp) jz .Lend_\@ .endif /* On user-cr3? */ @@ -205,64 +205,76 @@ #define CS_FROM_ENTRY_STACK (1 << 31) #define CS_FROM_USER_CR3 (1 << 30) #define CS_FROM_KERNEL (1 << 29) +#define CS_FROM_ESPFIX (1 << 28) .macro FIXUP_FRAME /* * The high bits of the CS dword (__csh) are used for CS_FROM_*. * Clear them in case hardware didn't do this for us. */ - andl $0x0000ffff, 3*4(%esp) + andl $0x0000ffff, 4*4(%esp) #ifdef CONFIG_VM86 - testl $X86_EFLAGS_VM, 4*4(%esp) + testl $X86_EFLAGS_VM, 5*4(%esp) jnz .Lfrom_usermode_no_fixup_\@ #endif - testl $SEGMENT_RPL_MASK, 3*4(%esp) + testl $USER_SEGMENT_RPL_MASK, 4*4(%esp) jnz .Lfrom_usermode_no_fixup_\@ - orl $CS_FROM_KERNEL, 3*4(%esp) + orl $CS_FROM_KERNEL, 4*4(%esp) /* * When we're here from kernel mode; the (exception) stack looks like: * - * 5*4(%esp) - <previous context> - * 4*4(%esp) - flags - * 3*4(%esp) - cs - * 2*4(%esp) - ip - * 1*4(%esp) - orig_eax - * 0*4(%esp) - gs / function + * 6*4(%esp) - <previous context> + * 5*4(%esp) - flags + * 4*4(%esp) - cs + * 3*4(%esp) - ip + * 2*4(%esp) - orig_eax + * 1*4(%esp) - gs / function + * 0*4(%esp) - fs * * Lets build a 5 entry IRET frame after that, such that struct pt_regs * is complete and in particular regs->sp is correct. This gives us - * the original 5 enties as gap: + * the original 6 enties as gap: * - * 12*4(%esp) - <previous context> - * 11*4(%esp) - gap / flags - * 10*4(%esp) - gap / cs - * 9*4(%esp) - gap / ip - * 8*4(%esp) - gap / orig_eax - * 7*4(%esp) - gap / gs / function - * 6*4(%esp) - ss - * 5*4(%esp) - sp - * 4*4(%esp) - flags - * 3*4(%esp) - cs - * 2*4(%esp) - ip - * 1*4(%esp) - orig_eax - * 0*4(%esp) - gs / function + * 14*4(%esp) - <previous context> + * 13*4(%esp) - gap / flags + * 12*4(%esp) - gap / cs + * 11*4(%esp) - gap / ip + * 10*4(%esp) - gap / orig_eax + * 9*4(%esp) - gap / gs / function + * 8*4(%esp) - gap / fs + * 7*4(%esp) - ss + * 6*4(%esp) - sp + * 5*4(%esp) - flags + * 4*4(%esp) - cs + * 3*4(%esp) - ip + * 2*4(%esp) - orig_eax + * 1*4(%esp) - gs / function + * 0*4(%esp) - fs */ pushl %ss # ss pushl %esp # sp (points at ss) - addl $6*4, (%esp) # point sp back at the previous context - pushl 6*4(%esp) # flags - pushl 6*4(%esp) # cs - pushl 6*4(%esp) # ip - pushl 6*4(%esp) # orig_eax - pushl 6*4(%esp) # gs / function + addl $7*4, (%esp) # point sp back at the previous context + pushl 7*4(%esp) # flags + pushl 7*4(%esp) # cs + pushl 7*4(%esp) # ip + pushl 7*4(%esp) # orig_eax + pushl 7*4(%esp) # gs / function + pushl 7*4(%esp) # fs .Lfrom_usermode_no_fixup_\@: .endm .macro IRET_FRAME + /* + * We're called with %ds, %es, %fs, and %gs from the interrupted + * frame, so we shouldn't use them. Also, we may be in ESPFIX + * mode and therefore have a nonzero SS base and an offset ESP, + * so any attempt to access the stack needs to use SS. (except for + * accesses through %esp, which automatically use SS.) + */ testl $CS_FROM_KERNEL, 1*4(%esp) jz .Lfinished_frame_\@ @@ -276,31 +288,40 @@ movl 5*4(%esp), %eax # (modified) regs->sp movl 4*4(%esp), %ecx # flags - movl %ecx, -4(%eax) + movl %ecx, %ss:-1*4(%eax) movl 3*4(%esp), %ecx # cs andl $0x0000ffff, %ecx - movl %ecx, -8(%eax) + movl %ecx, %ss:-2*4(%eax) movl 2*4(%esp), %ecx # ip - movl %ecx, -12(%eax) + movl %ecx, %ss:-3*4(%eax) movl 1*4(%esp), %ecx # eax - movl %ecx, -16(%eax) + movl %ecx, %ss:-4*4(%eax) popl %ecx - lea -16(%eax), %esp + lea -4*4(%eax), %esp popl %eax .Lfinished_frame_\@: .endm -.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 +.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0 cld .if \skip_gs == 0 PUSH_GS .endif - FIXUP_FRAME pushl %fs + + pushl %eax + movl $(__KERNEL_PERCPU), %eax + movl %eax, %fs +.if \unwind_espfix > 0 + UNWIND_ESPFIX_STACK +.endif + popl %eax + + FIXUP_FRAME pushl %es pushl %ds pushl \pt_regs_ax @@ -313,8 +334,6 @@ movl $(__USER_DS), %edx movl %edx, %ds movl %edx, %es - movl $(__KERNEL_PERCPU), %edx - movl %edx, %fs .if \skip_gs == 0 SET_KERNEL_GS %edx .endif @@ -324,8 +343,8 @@ .endif .endm -.macro SAVE_ALL_NMI cr3_reg:req - SAVE_ALL +.macro SAVE_ALL_NMI cr3_reg:req unwind_espfix=0 + SAVE_ALL unwind_espfix=\unwind_espfix BUG_IF_WRONG_CR3 @@ -357,6 +376,7 @@ 2: popl %es 3: popl %fs POP_GS \pop + IRET_FRAME .pushsection .fixup, "ax" 4: movl $0, (%esp) jmp 1b @@ -395,7 +415,8 @@ .macro CHECK_AND_APPLY_ESPFIX #ifdef CONFIG_X86_ESPFIX32 -#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) +#define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8) +#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET ALTERNATIVE "jmp .Lend_\@", "", X86_BUG_ESPFIX @@ -709,7 +730,7 @@ * %eax: prev task * %edx: next task */ -ENTRY(__switch_to_asm) +SYM_CODE_START(__switch_to_asm) /* * Save callee-saved registers * This must match the order in struct inactive_task_frame @@ -718,6 +739,11 @@ ENTRY(__switch_to_asm) pushl %ebx pushl %edi pushl %esi + /* + * Flags are saved to prevent AC leakage. This could go + * away if objtool would have 32bit support to verify + * the STAC/CLAC correctness. + */ pushfl /* switch stack */ @@ -740,15 +766,16 @@ ENTRY(__switch_to_asm) FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif - /* restore callee-saved registers */ + /* Restore flags or the incoming task to restore AC state. */ popfl + /* restore callee-saved registers */ popl %esi popl %edi popl %ebx popl %ebp jmp __switch_to -END(__switch_to_asm) +SYM_CODE_END(__switch_to_asm) /* * The unwinder expects the last frame on the stack to always be at the same @@ -757,7 +784,7 @@ END(__switch_to_asm) * asmlinkage function so its argument has to be pushed on the stack. This * wrapper creates a proper "end of stack" frame header before the call. */ -ENTRY(schedule_tail_wrapper) +SYM_FUNC_START(schedule_tail_wrapper) FRAME_BEGIN pushl %eax @@ -766,7 +793,7 @@ ENTRY(schedule_tail_wrapper) FRAME_END ret -ENDPROC(schedule_tail_wrapper) +SYM_FUNC_END(schedule_tail_wrapper) /* * A newly forked process directly context switches into this address. * @@ -774,7 +801,7 @@ ENDPROC(schedule_tail_wrapper) * ebx: kernel thread func (NULL for user thread) * edi: kernel thread arg */ -ENTRY(ret_from_fork) +SYM_CODE_START(ret_from_fork) call schedule_tail_wrapper testl %ebx, %ebx @@ -797,7 +824,7 @@ ENTRY(ret_from_fork) */ movl $0, PT_EAX(%esp) jmp 2b -END(ret_from_fork) +SYM_CODE_END(ret_from_fork) /* * Return to user mode is not as complex as all this looks, @@ -807,8 +834,7 @@ END(ret_from_fork) */ # userspace resumption stub bypassing syscall exit tracing - ALIGN -ret_from_exception: +SYM_CODE_START_LOCAL(ret_from_exception) preempt_stop(CLBR_ANY) ret_from_intr: #ifdef CONFIG_VM86 @@ -825,15 +851,14 @@ ret_from_intr: cmpl $USER_RPL, %eax jb restore_all_kernel # not returning to v8086 or userspace -ENTRY(resume_userspace) DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF movl %esp, %eax call prepare_exit_to_usermode jmp restore_all -END(ret_from_exception) +SYM_CODE_END(ret_from_exception) -GLOBAL(__begin_SYSENTER_singlestep_region) +SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE) /* * All code from here through __end_SYSENTER_singlestep_region is subject * to being single-stepped if a user program sets TF and executes SYSENTER. @@ -848,9 +873,10 @@ GLOBAL(__begin_SYSENTER_singlestep_region) * Xen doesn't set %esp to be precisely what the normal SYSENTER * entry point expects, so fix it up before using the normal path. */ -ENTRY(xen_sysenter_target) +SYM_CODE_START(xen_sysenter_target) addl $5*4, %esp /* remove xen-provided frame */ jmp .Lsysenter_past_esp +SYM_CODE_END(xen_sysenter_target) #endif /* @@ -885,7 +911,7 @@ ENTRY(xen_sysenter_target) * ebp user stack * 0(%ebp) arg6 */ -ENTRY(entry_SYSENTER_32) +SYM_FUNC_START(entry_SYSENTER_32) /* * On entry-stack with all userspace-regs live - save and * restore eflags and %eax to use it as scratch-reg for the cr3 @@ -1012,8 +1038,8 @@ ENTRY(entry_SYSENTER_32) pushl $X86_EFLAGS_FIXED popfl jmp .Lsysenter_flags_fixed -GLOBAL(__end_SYSENTER_singlestep_region) -ENDPROC(entry_SYSENTER_32) +SYM_ENTRY(__end_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE) +SYM_FUNC_END(entry_SYSENTER_32) /* * 32-bit legacy system call entry. @@ -1043,7 +1069,7 @@ ENDPROC(entry_SYSENTER_32) * edi arg5 * ebp arg6 */ -ENTRY(entry_INT80_32) +SYM_FUNC_START(entry_INT80_32) ASM_CLAC pushl %eax /* pt_regs->orig_ax */ @@ -1064,7 +1090,6 @@ ENTRY(entry_INT80_32) restore_all: TRACE_IRQS_IRET SWITCH_TO_ENTRY_STACK -.Lrestore_all_notrace: CHECK_AND_APPLY_ESPFIX .Lrestore_nocheck: /* Switch back to user CR3 */ @@ -1075,7 +1100,6 @@ restore_all: /* Restore user state */ RESTORE_REGS pop=4 # skip orig_eax/error_code .Lirq_return: - IRET_FRAME /* * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization * when returning from IPI handler and when returning from @@ -1100,7 +1124,7 @@ restore_all_kernel: jmp .Lirq_return .section .fixup, "ax" -ENTRY(iret_exc ) +SYM_CODE_START(iret_exc) pushl $0 # no error code pushl $do_iret_error @@ -1117,9 +1141,10 @@ ENTRY(iret_exc ) #endif jmp common_exception +SYM_CODE_END(iret_exc) .previous _ASM_EXTABLE(.Lirq_return, iret_exc) -ENDPROC(entry_INT80_32) +SYM_FUNC_END(entry_INT80_32) .macro FIXUP_ESPFIX_STACK /* @@ -1128,30 +1153,43 @@ ENDPROC(entry_INT80_32) * We can't call C functions using the ESPFIX stack. This code reads * the high word of the segment base from the GDT and swiches to the * normal stack and adjusts ESP with the matching offset. + * + * We might be on user CR3 here, so percpu data is not mapped and we can't + * access the GDT through the percpu segment. Instead, use SGDT to find + * the cpu_entry_area alias of the GDT. */ #ifdef CONFIG_X86_ESPFIX32 /* fixup the stack */ - mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ - mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ + pushl %ecx + subl $2*4, %esp + sgdt (%esp) + movl 2(%esp), %ecx /* GDT address */ + /* + * Careful: ECX is a linear pointer, so we need to force base + * zero. %cs is the only known-linear segment we have right now. + */ + mov %cs:GDT_ESPFIX_OFFSET + 4(%ecx), %al /* bits 16..23 */ + mov %cs:GDT_ESPFIX_OFFSET + 7(%ecx), %ah /* bits 24..31 */ shl $16, %eax + addl $2*4, %esp + popl %ecx addl %esp, %eax /* the adjusted stack pointer */ pushl $__KERNEL_DS pushl %eax lss (%esp), %esp /* switch to the normal stack segment */ #endif .endm + .macro UNWIND_ESPFIX_STACK + /* It's safe to clobber %eax, all other regs need to be preserved */ #ifdef CONFIG_X86_ESPFIX32 movl %ss, %eax /* see if on espfix stack */ cmpw $__ESPFIX_SS, %ax - jne 27f - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %es + jne .Lno_fixup_\@ /* switch to normal stack */ FIXUP_ESPFIX_STACK -27: +.Lno_fixup_\@: #endif .endm @@ -1160,7 +1198,7 @@ ENDPROC(entry_INT80_32) * We pack 1 stub into every 8-byte block. */ .align 8 -ENTRY(irq_entries_start) +SYM_CODE_START(irq_entries_start) vector=FIRST_EXTERNAL_VECTOR .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) pushl $(~vector+0x80) /* Note: always in signed byte range */ @@ -1168,11 +1206,11 @@ ENTRY(irq_entries_start) jmp common_interrupt .align 8 .endr -END(irq_entries_start) +SYM_CODE_END(irq_entries_start) #ifdef CONFIG_X86_LOCAL_APIC .align 8 -ENTRY(spurious_entries_start) +SYM_CODE_START(spurious_entries_start) vector=FIRST_SYSTEM_VECTOR .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) pushl $(~vector+0x80) /* Note: always in signed byte range */ @@ -1180,9 +1218,9 @@ ENTRY(spurious_entries_start) jmp common_spurious .align 8 .endr -END(spurious_entries_start) +SYM_CODE_END(spurious_entries_start) -common_spurious: +SYM_CODE_START_LOCAL(common_spurious) ASM_CLAC addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ SAVE_ALL switch_stacks=1 @@ -1191,7 +1229,7 @@ common_spurious: movl %esp, %eax call smp_spurious_interrupt jmp ret_from_intr -ENDPROC(common_spurious) +SYM_CODE_END(common_spurious) #endif /* @@ -1199,7 +1237,7 @@ ENDPROC(common_spurious) * so IRQ-flags tracing has to follow that: */ .p2align CONFIG_X86_L1_CACHE_SHIFT -common_interrupt: +SYM_CODE_START_LOCAL(common_interrupt) ASM_CLAC addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ @@ -1209,10 +1247,10 @@ common_interrupt: movl %esp, %eax call do_IRQ jmp ret_from_intr -ENDPROC(common_interrupt) +SYM_CODE_END(common_interrupt) #define BUILD_INTERRUPT3(name, nr, fn) \ -ENTRY(name) \ +SYM_FUNC_START(name) \ ASM_CLAC; \ pushl $~(nr); \ SAVE_ALL switch_stacks=1; \ @@ -1221,7 +1259,7 @@ ENTRY(name) \ movl %esp, %eax; \ call fn; \ jmp ret_from_intr; \ -ENDPROC(name) +SYM_FUNC_END(name) #define BUILD_INTERRUPT(name, nr) \ BUILD_INTERRUPT3(name, nr, smp_##name); \ @@ -1229,14 +1267,14 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include <asm/entry_arch.h> -ENTRY(coprocessor_error) +SYM_CODE_START(coprocessor_error) ASM_CLAC pushl $0 pushl $do_coprocessor_error jmp common_exception -END(coprocessor_error) +SYM_CODE_END(coprocessor_error) -ENTRY(simd_coprocessor_error) +SYM_CODE_START(simd_coprocessor_error) ASM_CLAC pushl $0 #ifdef CONFIG_X86_INVD_BUG @@ -1248,104 +1286,99 @@ ENTRY(simd_coprocessor_error) pushl $do_simd_coprocessor_error #endif jmp common_exception -END(simd_coprocessor_error) +SYM_CODE_END(simd_coprocessor_error) -ENTRY(device_not_available) +SYM_CODE_START(device_not_available) ASM_CLAC pushl $-1 # mark this as an int pushl $do_device_not_available jmp common_exception -END(device_not_available) +SYM_CODE_END(device_not_available) #ifdef CONFIG_PARAVIRT -ENTRY(native_iret) +SYM_CODE_START(native_iret) iret _ASM_EXTABLE(native_iret, iret_exc) -END(native_iret) +SYM_CODE_END(native_iret) #endif -ENTRY(overflow) +SYM_CODE_START(overflow) ASM_CLAC pushl $0 pushl $do_overflow jmp common_exception -END(overflow) +SYM_CODE_END(overflow) -ENTRY(bounds) +SYM_CODE_START(bounds) ASM_CLAC pushl $0 pushl $do_bounds jmp common_exception -END(bounds) +SYM_CODE_END(bounds) -ENTRY(invalid_op) +SYM_CODE_START(invalid_op) ASM_CLAC pushl $0 pushl $do_invalid_op jmp common_exception -END(invalid_op) +SYM_CODE_END(invalid_op) -ENTRY(coprocessor_segment_overrun) +SYM_CODE_START(coprocessor_segment_overrun) ASM_CLAC pushl $0 pushl $do_coprocessor_segment_overrun jmp common_exception -END(coprocessor_segment_overrun) +SYM_CODE_END(coprocessor_segment_overrun) -ENTRY(invalid_TSS) +SYM_CODE_START(invalid_TSS) ASM_CLAC pushl $do_invalid_TSS jmp common_exception -END(invalid_TSS) +SYM_CODE_END(invalid_TSS) -ENTRY(segment_not_present) +SYM_CODE_START(segment_not_present) ASM_CLAC pushl $do_segment_not_present jmp common_exception -END(segment_not_present) +SYM_CODE_END(segment_not_present) -ENTRY(stack_segment) +SYM_CODE_START(stack_segment) ASM_CLAC pushl $do_stack_segment jmp common_exception -END(stack_segment) +SYM_CODE_END(stack_segment) -ENTRY(alignment_check) +SYM_CODE_START(alignment_check) ASM_CLAC pushl $do_alignment_check jmp common_exception -END(alignment_check) +SYM_CODE_END(alignment_check) -ENTRY(divide_error) +SYM_CODE_START(divide_error) ASM_CLAC pushl $0 # no error code pushl $do_divide_error jmp common_exception -END(divide_error) +SYM_CODE_END(divide_error) #ifdef CONFIG_X86_MCE -ENTRY(machine_check) +SYM_CODE_START(machine_check) ASM_CLAC pushl $0 pushl machine_check_vector jmp common_exception -END(machine_check) +SYM_CODE_END(machine_check) #endif -ENTRY(spurious_interrupt_bug) +SYM_CODE_START(spurious_interrupt_bug) ASM_CLAC pushl $0 pushl $do_spurious_interrupt_bug jmp common_exception -END(spurious_interrupt_bug) +SYM_CODE_END(spurious_interrupt_bug) #ifdef CONFIG_XEN_PV -ENTRY(xen_hypervisor_callback) - pushl $-1 /* orig_ax = -1 => not a system call */ - SAVE_ALL - ENCODE_FRAME_POINTER - TRACE_IRQS_OFF - +SYM_FUNC_START(xen_hypervisor_callback) /* * Check to see if we got the event in the critical * region in xen_iret_direct, after we've reenabled @@ -1353,22 +1386,23 @@ ENTRY(xen_hypervisor_callback) * iret instruction's behaviour where it delivers a * pending interrupt when enabling interrupts: */ - movl PT_EIP(%esp), %eax - cmpl $xen_iret_start_crit, %eax + cmpl $xen_iret_start_crit, (%esp) jb 1f - cmpl $xen_iret_end_crit, %eax + cmpl $xen_iret_end_crit, (%esp) jae 1f - - jmp xen_iret_crit_fixup - -ENTRY(xen_do_upcall) -1: mov %esp, %eax + call xen_iret_crit_fixup +1: + pushl $-1 /* orig_ax = -1 => not a system call */ + SAVE_ALL + ENCODE_FRAME_POINTER + TRACE_IRQS_OFF + mov %esp, %eax call xen_evtchn_do_upcall #ifndef CONFIG_PREEMPTION call xen_maybe_preempt_hcall #endif jmp ret_from_intr -ENDPROC(xen_hypervisor_callback) +SYM_FUNC_END(xen_hypervisor_callback) /* * Hypervisor uses this for application faults while it executes. @@ -1382,7 +1416,7 @@ ENDPROC(xen_hypervisor_callback) * to pop the stack frame we end up in an infinite loop of failsafe callbacks. * We distinguish between categories by maintaining a status value in EAX. */ -ENTRY(xen_failsafe_callback) +SYM_FUNC_START(xen_failsafe_callback) pushl %eax movl $1, %eax 1: mov 4(%esp), %ds @@ -1419,7 +1453,7 @@ ENTRY(xen_failsafe_callback) _ASM_EXTABLE(2b, 7b) _ASM_EXTABLE(3b, 8b) _ASM_EXTABLE(4b, 9b) -ENDPROC(xen_failsafe_callback) +SYM_FUNC_END(xen_failsafe_callback) #endif /* CONFIG_XEN_PV */ #ifdef CONFIG_XEN_PVHVM @@ -1441,18 +1475,17 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR, #endif /* CONFIG_HYPERV */ -ENTRY(page_fault) +SYM_CODE_START(page_fault) ASM_CLAC pushl $do_page_fault jmp common_exception_read_cr2 -END(page_fault) +SYM_CODE_END(page_fault) -common_exception_read_cr2: +SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2) /* the function address is in %gs's slot on the stack */ - SAVE_ALL switch_stacks=1 skip_gs=1 + SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 ENCODE_FRAME_POINTER - UNWIND_ESPFIX_STACK /* fixup %gs */ GS_TO_REG %ecx @@ -1470,13 +1503,12 @@ common_exception_read_cr2: movl %esp, %eax # pt_regs pointer CALL_NOSPEC %edi jmp ret_from_exception -END(common_exception_read_cr2) +SYM_CODE_END(common_exception_read_cr2) -common_exception: +SYM_CODE_START_LOCAL_NOALIGN(common_exception) /* the function address is in %gs's slot on the stack */ - SAVE_ALL switch_stacks=1 skip_gs=1 + SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 ENCODE_FRAME_POINTER - UNWIND_ESPFIX_STACK /* fixup %gs */ GS_TO_REG %ecx @@ -1492,9 +1524,9 @@ common_exception: movl %esp, %eax # pt_regs pointer CALL_NOSPEC %edi jmp ret_from_exception -END(common_exception) +SYM_CODE_END(common_exception) -ENTRY(debug) +SYM_CODE_START(debug) /* * Entry from sysenter is now handled in common_exception */ @@ -1502,7 +1534,49 @@ ENTRY(debug) pushl $-1 # mark this as an int pushl $do_debug jmp common_exception -END(debug) +SYM_CODE_END(debug) + +#ifdef CONFIG_DOUBLEFAULT +SYM_CODE_START(double_fault) +1: + /* + * This is a task gate handler, not an interrupt gate handler. + * The error code is on the stack, but the stack is otherwise + * empty. Interrupts are off. Our state is sane with the following + * exceptions: + * + * - CR0.TS is set. "TS" literally means "task switched". + * - EFLAGS.NT is set because we're a "nested task". + * - The doublefault TSS has back_link set and has been marked busy. + * - TR points to the doublefault TSS and the normal TSS is busy. + * - CR3 is the normal kernel PGD. This would be delightful, except + * that the CPU didn't bother to save the old CR3 anywhere. This + * would make it very awkward to return back to the context we came + * from. + * + * The rest of EFLAGS is sanitized for us, so we don't need to + * worry about AC or DF. + * + * Don't even bother popping the error code. It's always zero, + * and ignoring it makes us a bit more robust against buggy + * hypervisor task gate implementations. + * + * We will manually undo the task switch instead of doing a + * task-switching IRET. + */ + + clts /* clear CR0.TS */ + pushl $X86_EFLAGS_FIXED + popfl /* clear EFLAGS.NT */ + + call doublefault_shim + + /* We don't support returning, so we have no IRET here. */ +1: + hlt + jmp 1b +SYM_CODE_END(double_fault) +#endif /* * NMI is doubly nasty. It can happen on the first instruction of @@ -1511,10 +1585,14 @@ END(debug) * switched stacks. We handle both conditions by simply checking whether we * interrupted kernel code running on the SYSENTER stack. */ -ENTRY(nmi) +SYM_CODE_START(nmi) ASM_CLAC #ifdef CONFIG_X86_ESPFIX32 + /* + * ESPFIX_SS is only ever set on the return to user path + * after we've switched to the entry stack. + */ pushl %eax movl %ss, %eax cmpw $__ESPFIX_SS, %ax @@ -1550,6 +1628,11 @@ ENTRY(nmi) movl %ebx, %esp .Lnmi_return: +#ifdef CONFIG_X86_ESPFIX32 + testl $CS_FROM_ESPFIX, PT_CS(%esp) + jnz .Lnmi_from_espfix +#endif + CHECK_AND_APPLY_ESPFIX RESTORE_ALL_NMI cr3_reg=%edi pop=4 jmp .Lirq_return @@ -1557,28 +1640,47 @@ ENTRY(nmi) #ifdef CONFIG_X86_ESPFIX32 .Lnmi_espfix_stack: /* - * create the pointer to lss back + * Create the pointer to LSS back */ pushl %ss pushl %esp addl $4, (%esp) - /* copy the iret frame of 12 bytes */ - .rept 3 - pushl 16(%esp) - .endr - pushl %eax - SAVE_ALL_NMI cr3_reg=%edi + + /* Copy the (short) IRET frame */ + pushl 4*4(%esp) # flags + pushl 4*4(%esp) # cs + pushl 4*4(%esp) # ip + + pushl %eax # orig_ax + + SAVE_ALL_NMI cr3_reg=%edi unwind_espfix=1 ENCODE_FRAME_POINTER - FIXUP_ESPFIX_STACK # %eax == %esp + + /* clear CS_FROM_KERNEL, set CS_FROM_ESPFIX */ + xorl $(CS_FROM_ESPFIX | CS_FROM_KERNEL), PT_CS(%esp) + xorl %edx, %edx # zero error code - call do_nmi + movl %esp, %eax # pt_regs pointer + jmp .Lnmi_from_sysenter_stack + +.Lnmi_from_espfix: RESTORE_ALL_NMI cr3_reg=%edi - lss 12+4(%esp), %esp # back to espfix stack + /* + * Because we cleared CS_FROM_KERNEL, IRET_FRAME 'forgot' to + * fix up the gap and long frame: + * + * 3 - original frame (exception) + * 2 - ESPFIX block (above) + * 6 - gap (FIXUP_FRAME) + * 5 - long frame (FIXUP_FRAME) + * 1 - orig_ax + */ + lss (1+5+6)*4(%esp), %esp # back to espfix stack jmp .Lirq_return #endif -END(nmi) +SYM_CODE_END(nmi) -ENTRY(int3) +SYM_CODE_START(int3) ASM_CLAC pushl $-1 # mark this as an int @@ -1589,22 +1691,22 @@ ENTRY(int3) movl %esp, %eax # pt_regs pointer call do_int3 jmp ret_from_exception -END(int3) +SYM_CODE_END(int3) -ENTRY(general_protection) +SYM_CODE_START(general_protection) pushl $do_general_protection jmp common_exception -END(general_protection) +SYM_CODE_END(general_protection) #ifdef CONFIG_KVM_GUEST -ENTRY(async_page_fault) +SYM_CODE_START(async_page_fault) ASM_CLAC pushl $do_async_page_fault jmp common_exception_read_cr2 -END(async_page_fault) +SYM_CODE_END(async_page_fault) #endif -ENTRY(rewind_stack_do_exit) +SYM_CODE_START(rewind_stack_do_exit) /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp @@ -1613,4 +1715,4 @@ ENTRY(rewind_stack_do_exit) call do_exit 1: jmp 1b -END(rewind_stack_do_exit) +SYM_CODE_END(rewind_stack_do_exit) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b7c3ea4cb19d..76942cbd95a1 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -15,7 +15,7 @@ * at the top of the kernel process stack. * * Some macro usage: - * - ENTRY/END: Define functions in the symbol table. + * - SYM_FUNC_START/END:Define functions in the symbol table. * - TRACE_IRQ_*: Trace hardirq state for lock debugging. * - idtentry: Define exception entry points. */ @@ -46,11 +46,11 @@ .section .entry.text, "ax" #ifdef CONFIG_PARAVIRT -ENTRY(native_usergs_sysret64) +SYM_CODE_START(native_usergs_sysret64) UNWIND_HINT_EMPTY swapgs sysretq -END(native_usergs_sysret64) +SYM_CODE_END(native_usergs_sysret64) #endif /* CONFIG_PARAVIRT */ .macro TRACE_IRQS_FLAGS flags:req @@ -142,7 +142,7 @@ END(native_usergs_sysret64) * with them due to bugs in both AMD and Intel CPUs. */ -ENTRY(entry_SYSCALL_64) +SYM_CODE_START(entry_SYSCALL_64) UNWIND_HINT_EMPTY /* * Interrupts are off on entry. @@ -162,7 +162,7 @@ ENTRY(entry_SYSCALL_64) pushq %r11 /* pt_regs->flags */ pushq $__USER_CS /* pt_regs->cs */ pushq %rcx /* pt_regs->ip */ -GLOBAL(entry_SYSCALL_64_after_hwframe) +SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) pushq %rax /* pt_regs->orig_ax */ PUSH_AND_CLEAR_REGS rax=$-ENOSYS @@ -273,13 +273,13 @@ syscall_return_via_sysret: popq %rdi popq %rsp USERGS_SYSRET64 -END(entry_SYSCALL_64) +SYM_CODE_END(entry_SYSCALL_64) /* * %rdi: prev task * %rsi: next task */ -ENTRY(__switch_to_asm) +SYM_CODE_START(__switch_to_asm) UNWIND_HINT_FUNC /* * Save callee-saved registers @@ -321,7 +321,7 @@ ENTRY(__switch_to_asm) popq %rbp jmp __switch_to -END(__switch_to_asm) +SYM_CODE_END(__switch_to_asm) /* * A newly forked process directly context switches into this address. @@ -330,7 +330,7 @@ END(__switch_to_asm) * rbx: kernel thread func (NULL for user thread) * r12: kernel thread arg */ -ENTRY(ret_from_fork) +SYM_CODE_START(ret_from_fork) UNWIND_HINT_EMPTY movq %rax, %rdi call schedule_tail /* rdi: 'prev' task parameter */ @@ -357,14 +357,14 @@ ENTRY(ret_from_fork) */ movq $0, RAX(%rsp) jmp 2b -END(ret_from_fork) +SYM_CODE_END(ret_from_fork) /* * Build the entry stubs with some assembler magic. * We pack 1 stub into every 8-byte block. */ .align 8 -ENTRY(irq_entries_start) +SYM_CODE_START(irq_entries_start) vector=FIRST_EXTERNAL_VECTOR .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) UNWIND_HINT_IRET_REGS @@ -373,10 +373,10 @@ ENTRY(irq_entries_start) .align 8 vector=vector+1 .endr -END(irq_entries_start) +SYM_CODE_END(irq_entries_start) .align 8 -ENTRY(spurious_entries_start) +SYM_CODE_START(spurious_entries_start) vector=FIRST_SYSTEM_VECTOR .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) UNWIND_HINT_IRET_REGS @@ -385,7 +385,7 @@ ENTRY(spurious_entries_start) .align 8 vector=vector+1 .endr -END(spurious_entries_start) +SYM_CODE_END(spurious_entries_start) .macro DEBUG_ENTRY_ASSERT_IRQS_OFF #ifdef CONFIG_DEBUG_ENTRY @@ -511,7 +511,7 @@ END(spurious_entries_start) * | return address | * +----------------------------------------------------+ */ -ENTRY(interrupt_entry) +SYM_CODE_START(interrupt_entry) UNWIND_HINT_FUNC ASM_CLAC cld @@ -579,7 +579,7 @@ ENTRY(interrupt_entry) TRACE_IRQS_OFF ret -END(interrupt_entry) +SYM_CODE_END(interrupt_entry) _ASM_NOKPROBE(interrupt_entry) @@ -589,18 +589,18 @@ _ASM_NOKPROBE(interrupt_entry) * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_spurious/interrupt. */ -common_spurious: +SYM_CODE_START_LOCAL(common_spurious) addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ call interrupt_entry UNWIND_HINT_REGS indirect=1 call smp_spurious_interrupt /* rdi points to pt_regs */ jmp ret_from_intr -END(common_spurious) +SYM_CODE_END(common_spurious) _ASM_NOKPROBE(common_spurious) /* common_interrupt is a hotpath. Align it */ .p2align CONFIG_X86_L1_CACHE_SHIFT -common_interrupt: +SYM_CODE_START_LOCAL(common_interrupt) addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ call interrupt_entry UNWIND_HINT_REGS indirect=1 @@ -616,12 +616,12 @@ ret_from_intr: jz retint_kernel /* Interrupt came from user space */ -GLOBAL(retint_user) +.Lretint_user: mov %rsp,%rdi call prepare_exit_to_usermode TRACE_IRQS_IRETQ -GLOBAL(swapgs_restore_regs_and_return_to_usermode) +SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) #ifdef CONFIG_DEBUG_ENTRY /* Assert that pt_regs indicates user mode. */ testb $3, CS(%rsp) @@ -679,7 +679,7 @@ retint_kernel: */ TRACE_IRQS_IRETQ -GLOBAL(restore_regs_and_return_to_kernel) +SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL) #ifdef CONFIG_DEBUG_ENTRY /* Assert that pt_regs indicates kernel mode. */ testb $3, CS(%rsp) @@ -695,7 +695,7 @@ GLOBAL(restore_regs_and_return_to_kernel) */ INTERRUPT_RETURN -ENTRY(native_iret) +SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL) UNWIND_HINT_IRET_REGS /* * Are we returning to a stack segment from the LDT? Note: in @@ -706,8 +706,7 @@ ENTRY(native_iret) jnz native_irq_return_ldt #endif -.global native_irq_return_iret -native_irq_return_iret: +SYM_INNER_LABEL(native_irq_return_iret, SYM_L_GLOBAL) /* * This may fault. Non-paranoid faults on return to userspace are * handled by fixup_bad_iret. These include #SS, #GP, and #NP. @@ -789,14 +788,14 @@ native_irq_return_ldt: */ jmp native_irq_return_iret #endif -END(common_interrupt) +SYM_CODE_END(common_interrupt) _ASM_NOKPROBE(common_interrupt) /* * APIC interrupts. */ .macro apicinterrupt3 num sym do_sym -ENTRY(\sym) +SYM_CODE_START(\sym) UNWIND_HINT_IRET_REGS pushq $~(\num) .Lcommon_\sym: @@ -804,7 +803,7 @@ ENTRY(\sym) UNWIND_HINT_REGS indirect=1 call \do_sym /* rdi points to pt_regs */ jmp ret_from_intr -END(\sym) +SYM_CODE_END(\sym) _ASM_NOKPROBE(\sym) .endm @@ -969,7 +968,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. */ .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0 -ENTRY(\sym) +SYM_CODE_START(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 /* Sanity check */ @@ -1019,7 +1018,7 @@ ENTRY(\sym) .endif _ASM_NOKPROBE(\sym) -END(\sym) +SYM_CODE_END(\sym) .endm idtentry divide_error do_divide_error has_error_code=0 @@ -1041,7 +1040,7 @@ idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 * Reload gs selector with exception handling * edi: new selector */ -ENTRY(native_load_gs_index) +SYM_FUNC_START(native_load_gs_index) FRAME_BEGIN pushfq DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) @@ -1055,13 +1054,13 @@ ENTRY(native_load_gs_index) popfq FRAME_END ret -ENDPROC(native_load_gs_index) +SYM_FUNC_END(native_load_gs_index) EXPORT_SYMBOL(native_load_gs_index) _ASM_EXTABLE(.Lgs_change, .Lbad_gs) .section .fixup, "ax" /* running with kernelgs */ -.Lbad_gs: +SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs) SWAPGS /* switch back to user gs */ .macro ZAP_GS /* This can't be a string because the preprocessor needs to see it. */ @@ -1072,10 +1071,11 @@ EXPORT_SYMBOL(native_load_gs_index) xorl %eax, %eax movl %eax, %gs jmp 2b +SYM_CODE_END(.Lbad_gs) .previous /* Call softirq on interrupt stack. Interrupts are off. */ -ENTRY(do_softirq_own_stack) +SYM_FUNC_START(do_softirq_own_stack) pushq %rbp mov %rsp, %rbp ENTER_IRQ_STACK regs=0 old_rsp=%r11 @@ -1083,7 +1083,7 @@ ENTRY(do_softirq_own_stack) LEAVE_IRQ_STACK regs=0 leaveq ret -ENDPROC(do_softirq_own_stack) +SYM_FUNC_END(do_softirq_own_stack) #ifdef CONFIG_XEN_PV idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 @@ -1101,7 +1101,8 @@ idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 * existing activation in its critical region -- if so, we pop the current * activation and restart the handler using the previous one. */ -ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */ +/* do_hypervisor_callback(struct *pt_regs) */ +SYM_CODE_START_LOCAL(xen_do_hypervisor_callback) /* * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will @@ -1119,7 +1120,7 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */ call xen_maybe_preempt_hcall #endif jmp error_exit -END(xen_do_hypervisor_callback) +SYM_CODE_END(xen_do_hypervisor_callback) /* * Hypervisor uses this for application faults while it executes. @@ -1134,7 +1135,7 @@ END(xen_do_hypervisor_callback) * We distinguish between categories by comparing each saved segment register * with its current contents: any discrepancy means we in category 1. */ -ENTRY(xen_failsafe_callback) +SYM_CODE_START(xen_failsafe_callback) UNWIND_HINT_EMPTY movl %ds, %ecx cmpw %cx, 0x10(%rsp) @@ -1164,7 +1165,7 @@ ENTRY(xen_failsafe_callback) PUSH_AND_CLEAR_REGS ENCODE_FRAME_POINTER jmp error_exit -END(xen_failsafe_callback) +SYM_CODE_END(xen_failsafe_callback) #endif /* CONFIG_XEN_PV */ #ifdef CONFIG_XEN_PVHVM @@ -1214,7 +1215,7 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1 * Use slow, but surefire "are we in kernel?" check. * Return: ebx=0: need swapgs on exit, ebx=1: otherwise */ -ENTRY(paranoid_entry) +SYM_CODE_START_LOCAL(paranoid_entry) UNWIND_HINT_FUNC cld PUSH_AND_CLEAR_REGS save_ret=1 @@ -1248,7 +1249,7 @@ ENTRY(paranoid_entry) FENCE_SWAPGS_KERNEL_ENTRY ret -END(paranoid_entry) +SYM_CODE_END(paranoid_entry) /* * "Paranoid" exit path from exception stack. This is invoked @@ -1262,7 +1263,7 @@ END(paranoid_entry) * * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ -ENTRY(paranoid_exit) +SYM_CODE_START_LOCAL(paranoid_exit) UNWIND_HINT_REGS DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF_DEBUG @@ -1272,19 +1273,18 @@ ENTRY(paranoid_exit) /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 SWAPGS_UNSAFE_STACK - jmp .Lparanoid_exit_restore + jmp restore_regs_and_return_to_kernel .Lparanoid_exit_no_swapgs: TRACE_IRQS_IRETQ_DEBUG /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 -.Lparanoid_exit_restore: jmp restore_regs_and_return_to_kernel -END(paranoid_exit) +SYM_CODE_END(paranoid_exit) /* * Save all registers in pt_regs, and switch GS if needed. */ -ENTRY(error_entry) +SYM_CODE_START_LOCAL(error_entry) UNWIND_HINT_FUNC cld PUSH_AND_CLEAR_REGS save_ret=1 @@ -1364,16 +1364,16 @@ ENTRY(error_entry) call fixup_bad_iret mov %rax, %rsp jmp .Lerror_entry_from_usermode_after_swapgs -END(error_entry) +SYM_CODE_END(error_entry) -ENTRY(error_exit) +SYM_CODE_START_LOCAL(error_exit) UNWIND_HINT_REGS DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF testb $3, CS(%rsp) jz retint_kernel - jmp retint_user -END(error_exit) + jmp .Lretint_user +SYM_CODE_END(error_exit) /* * Runs on exception stack. Xen PV does not go through this path at all, @@ -1383,7 +1383,7 @@ END(error_exit) * %r14: Used to save/restore the CR3 of the interrupted context * when PAGE_TABLE_ISOLATION is in use. Do not clobber. */ -ENTRY(nmi) +SYM_CODE_START(nmi) UNWIND_HINT_IRET_REGS /* @@ -1718,21 +1718,21 @@ nmi_restore: * about espfix64 on the way back to kernel mode. */ iretq -END(nmi) +SYM_CODE_END(nmi) #ifndef CONFIG_IA32_EMULATION /* * This handles SYSCALL from 32-bit code. There is no way to program * MSRs to fully disable 32-bit SYSCALL. */ -ENTRY(ignore_sysret) +SYM_CODE_START(ignore_sysret) UNWIND_HINT_EMPTY mov $-ENOSYS, %eax sysret -END(ignore_sysret) +SYM_CODE_END(ignore_sysret) #endif -ENTRY(rewind_stack_do_exit) +SYM_CODE_START(rewind_stack_do_exit) UNWIND_HINT_FUNC /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp @@ -1742,4 +1742,4 @@ ENTRY(rewind_stack_do_exit) UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE call do_exit -END(rewind_stack_do_exit) +SYM_CODE_END(rewind_stack_do_exit) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 39913770a44d..f1d3ccae5dd5 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -46,7 +46,7 @@ * ebp user stack * 0(%ebp) arg6 */ -ENTRY(entry_SYSENTER_compat) +SYM_FUNC_START(entry_SYSENTER_compat) /* Interrupts are off on entry. */ SWAPGS @@ -146,8 +146,8 @@ ENTRY(entry_SYSENTER_compat) pushq $X86_EFLAGS_FIXED popfq jmp .Lsysenter_flags_fixed -GLOBAL(__end_entry_SYSENTER_compat) -ENDPROC(entry_SYSENTER_compat) +SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL) +SYM_FUNC_END(entry_SYSENTER_compat) /* * 32-bit SYSCALL entry. @@ -196,7 +196,7 @@ ENDPROC(entry_SYSENTER_compat) * esp user stack * 0(%esp) arg6 */ -ENTRY(entry_SYSCALL_compat) +SYM_CODE_START(entry_SYSCALL_compat) /* Interrupts are off on entry. */ swapgs @@ -215,7 +215,7 @@ ENTRY(entry_SYSCALL_compat) pushq %r11 /* pt_regs->flags */ pushq $__USER32_CS /* pt_regs->cs */ pushq %rcx /* pt_regs->ip */ -GLOBAL(entry_SYSCALL_compat_after_hwframe) +SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) movl %eax, %eax /* discard orig_ax high bits */ pushq %rax /* pt_regs->orig_ax */ pushq %rdi /* pt_regs->di */ @@ -311,7 +311,7 @@ sysret32_from_system_call: xorl %r10d, %r10d swapgs sysretl -END(entry_SYSCALL_compat) +SYM_CODE_END(entry_SYSCALL_compat) /* * 32-bit legacy system call entry. @@ -339,7 +339,7 @@ END(entry_SYSCALL_compat) * edi arg5 * ebp arg6 */ -ENTRY(entry_INT80_compat) +SYM_CODE_START(entry_INT80_compat) /* * Interrupts are off on entry. */ @@ -416,4 +416,4 @@ ENTRY(entry_INT80_compat) /* Go back to user mode. */ TRACE_IRQS_ON jmp swapgs_restore_regs_and_return_to_usermode -END(entry_INT80_compat) +SYM_CODE_END(entry_INT80_compat) diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index aa3336a7cb15..7d17b3addbbb 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -10,13 +10,11 @@ #ifdef CONFIG_IA32_EMULATION /* On X86_64, we use struct pt_regs * to pass parameters to syscalls */ #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); - -/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ -extern asmlinkage long sys_ni_syscall(const struct pt_regs *); - +#define __sys_ni_syscall __ia32_sys_ni_syscall #else /* CONFIG_IA32_EMULATION */ #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +#define __sys_ni_syscall sys_ni_syscall #endif /* CONFIG_IA32_EMULATION */ #include <asm/syscalls_32.h> @@ -29,6 +27,6 @@ __visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ - [0 ... __NR_syscall_compat_max] = &sys_ni_syscall, + [0 ... __NR_syscall_compat_max] = &__sys_ni_syscall, #include <asm/syscalls_32.h> }; diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index b1bf31713374..adf619a856e8 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -4,11 +4,17 @@ #include <linux/linkage.h> #include <linux/sys.h> #include <linux/cache.h> +#include <linux/syscalls.h> #include <asm/asm-offsets.h> #include <asm/syscall.h> -/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ -extern asmlinkage long sys_ni_syscall(const struct pt_regs *); +extern asmlinkage long sys_ni_syscall(void); + +SYSCALL_DEFINE0(ni_syscall) +{ + return sys_ni_syscall(); +} + #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); #define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual) #include <asm/syscalls_64.h> @@ -23,7 +29,7 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ - [0 ... __NR_syscall_max] = &sys_ni_syscall, + [0 ... __NR_syscall_max] = &__x64_sys_ni_syscall, #include <asm/syscalls_64.h> }; @@ -40,7 +46,7 @@ asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = { * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ - [0 ... __NR_syscall_x32_max] = &sys_ni_syscall, + [0 ... __NR_syscall_x32_max] = &__x64_sys_ni_syscall, #include <asm/syscalls_64.h> }; diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 3fe02546aed3..15908eb9b17e 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -124,13 +124,13 @@ 110 i386 iopl sys_iopl __ia32_sys_iopl 111 i386 vhangup sys_vhangup __ia32_sys_vhangup 112 i386 idle -113 i386 vm86old sys_vm86old sys_ni_syscall +113 i386 vm86old sys_vm86old __ia32_sys_ni_syscall 114 i386 wait4 sys_wait4 __ia32_compat_sys_wait4 115 i386 swapoff sys_swapoff __ia32_sys_swapoff 116 i386 sysinfo sys_sysinfo __ia32_compat_sys_sysinfo 117 i386 ipc sys_ipc __ia32_compat_sys_ipc 118 i386 fsync sys_fsync __ia32_sys_fsync -119 i386 sigreturn sys_sigreturn sys32_sigreturn +119 i386 sigreturn sys_sigreturn __ia32_compat_sys_sigreturn 120 i386 clone sys_clone __ia32_compat_sys_x86_clone 121 i386 setdomainname sys_setdomainname __ia32_sys_setdomainname 122 i386 uname sys_newuname __ia32_sys_newuname @@ -177,14 +177,14 @@ 163 i386 mremap sys_mremap __ia32_sys_mremap 164 i386 setresuid sys_setresuid16 __ia32_sys_setresuid16 165 i386 getresuid sys_getresuid16 __ia32_sys_getresuid16 -166 i386 vm86 sys_vm86 sys_ni_syscall +166 i386 vm86 sys_vm86 __ia32_sys_ni_syscall 167 i386 query_module 168 i386 poll sys_poll __ia32_sys_poll 169 i386 nfsservctl 170 i386 setresgid sys_setresgid16 __ia32_sys_setresgid16 171 i386 getresgid sys_getresgid16 __ia32_sys_getresgid16 172 i386 prctl sys_prctl __ia32_sys_prctl -173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn +173 i386 rt_sigreturn sys_rt_sigreturn __ia32_compat_sys_rt_sigreturn 174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction 175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_compat_sys_rt_sigprocmask 176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S index 2713490611a3..e010d4ae11f1 100644 --- a/arch/x86/entry/thunk_32.S +++ b/arch/x86/entry/thunk_32.S @@ -10,8 +10,7 @@ /* put return address in eax (arg1) */ .macro THUNK name, func, put_ret_addr_in_eax=0 - .globl \name -\name: +SYM_CODE_START_NOALIGN(\name) pushl %eax pushl %ecx pushl %edx @@ -27,6 +26,7 @@ popl %eax ret _ASM_NOKPROBE(\name) +SYM_CODE_END(\name) .endm #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index ea5c4167086c..c5c3b6e86e62 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -12,7 +12,7 @@ /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ .macro THUNK name, func, put_ret_addr_in_rdi=0 - ENTRY(\name) +SYM_FUNC_START_NOALIGN(\name) pushq %rbp movq %rsp, %rbp @@ -33,7 +33,7 @@ call \func jmp .L_restore - ENDPROC(\name) +SYM_FUNC_END(\name) _ASM_NOKPROBE(\name) .endm @@ -56,7 +56,7 @@ #if defined(CONFIG_TRACE_IRQFLAGS) \ || defined(CONFIG_DEBUG_LOCK_ALLOC) \ || defined(CONFIG_PREEMPTION) -.L_restore: +SYM_CODE_START_LOCAL_NOALIGN(.L_restore) popq %r11 popq %r10 popq %r9 @@ -69,4 +69,5 @@ popq %rbp ret _ASM_NOKPROBE(.L_restore) +SYM_CODE_END(.L_restore) #endif diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 0f2154106d01..2b75e80f6b41 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -87,11 +87,9 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS # # vDSO code runs in userspace and -pg doesn't help with profiling anyway. # -CFLAGS_REMOVE_vdso-note.o = -pg CFLAGS_REMOVE_vclock_gettime.o = -pg CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg CFLAGS_REMOVE_vgetcpu.o = -pg -CFLAGS_REMOVE_vvar.o = -pg # # X32 processes use x32 vDSO to access 64bit kernel data. diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index d9ff616bb0f6..7d70935b6758 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -15,7 +15,7 @@ #include "../../../../lib/vdso/gettimeofday.c" extern int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); -extern time_t __vdso_time(time_t *t); +extern __kernel_old_time_t __vdso_time(__kernel_old_time_t *t); int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { @@ -25,12 +25,12 @@ int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) int gettimeofday(struct __kernel_old_timeval *, struct timezone *) __attribute__((weak, alias("__vdso_gettimeofday"))); -time_t __vdso_time(time_t *t) +__kernel_old_time_t __vdso_time(__kernel_old_time_t *t) { return __cvdso_time(t); } -time_t time(time_t *t) __attribute__((weak, alias("__vdso_time"))); +__kernel_old_time_t time(__kernel_old_time_t *t) __attribute__((weak, alias("__vdso_time"))); #if defined(CONFIG_X86_64) && !defined(BUILD_VDSO32_64) diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index 263d7433dea8..de1fff7188aa 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -62,7 +62,7 @@ __kernel_vsyscall: /* Enter using int $0x80 */ int $0x80 -GLOBAL(int80_landing_pad) +SYM_INNER_LABEL(int80_landing_pad, SYM_L_GLOBAL) /* * Restore EDX and ECX in case they were clobbered. EBP is not diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index e7c596dea947..44c33103a955 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -184,7 +184,7 @@ bool emulate_vsyscall(unsigned long error_code, */ switch (vsyscall_nr) { case 0: - if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || + if (!write_ok_or_segv(regs->di, sizeof(struct __kernel_old_timeval)) || !write_ok_or_segv(regs->si, sizeof(struct timezone))) { ret = -EFAULT; goto check_fault; @@ -194,7 +194,7 @@ bool emulate_vsyscall(unsigned long error_code, break; case 1: - if (!write_ok_or_segv(regs->di, sizeof(time_t))) { + if (!write_ok_or_segv(regs->di, sizeof(__kernel_old_time_t))) { ret = -EFAULT; goto check_fault; } @@ -222,7 +222,7 @@ bool emulate_vsyscall(unsigned long error_code, */ regs->orig_ax = syscall_nr; regs->ax = -ENOSYS; - tmp = secure_computing(NULL); + tmp = secure_computing(); if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { warn_bad_vsyscall(KERN_DEBUG, regs, "seccomp tried to change syscall nr or ip"); |