diff options
Diffstat (limited to 'arch/x86/entry/entry_32.S')
-rw-r--r-- | arch/x86/entry/entry_32.S | 294 |
1 files changed, 63 insertions, 231 deletions
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 21dc60a60b5f..3eb572ed3d7a 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -3,7 +3,7 @@ * * entry_32.S contains the system-call and low-level fault and trap handling routines. * - * Stack layout in 'syscall_exit': + * Stack layout while running C code: * ptrace needs to have all registers on the stack. * If the order here is changed, it needs to be * updated in fork.c:copy_process(), signal.c:do_signal(), @@ -45,16 +45,6 @@ #include <asm/asm.h> #include <asm/smap.h> -/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ -#include <linux/elf-em.h> -#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) -#define __AUDIT_ARCH_LE 0x40000000 - -#ifndef CONFIG_AUDITSYSCALL -# define sysenter_audit syscall_trace_entry -# define sysexit_audit syscall_exit_work -#endif - .section .entry.text, "ax" /* @@ -163,13 +153,13 @@ #endif /* CONFIG_X86_32_LAZY_GS */ -.macro SAVE_ALL +.macro SAVE_ALL pt_regs_ax=%eax cld PUSH_GS pushl %fs pushl %es pushl %ds - pushl %eax + pushl \pt_regs_ax pushl %ebp pushl %edi pushl %esi @@ -221,7 +211,11 @@ ENTRY(ret_from_fork) popl %eax pushl $0x0202 # Reset kernel eflags popfl - jmp syscall_exit + + /* When we fork, we trace the syscall return in the child, too. */ + movl %esp, %eax + call syscall_return_slowpath + jmp restore_all END(ret_from_fork) ENTRY(ret_from_kernel_thread) @@ -234,7 +228,15 @@ ENTRY(ret_from_kernel_thread) movl PT_EBP(%esp), %eax call *PT_EBX(%esp) movl $0, PT_EAX(%esp) - jmp syscall_exit + + /* + * Kernel threads return to userspace as if returning from a syscall. + * We should check whether anything actually uses this path and, if so, + * consider switching it over to ret_from_fork. + */ + movl %esp, %eax + call syscall_return_slowpath + jmp restore_all ENDPROC(ret_from_kernel_thread) /* @@ -265,15 +267,10 @@ ret_from_intr: jb resume_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) - LOCKDEP_SYS_EXIT - DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret + DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - andl $_TIF_WORK_MASK, %ecx # is there any work to be done on - # int/exception return? - jne work_pending + movl %esp, %eax + call prepare_exit_to_usermode jmp restore_all END(ret_from_exception) @@ -290,111 +287,48 @@ need_resched: END(resume_kernel) #endif -/* - * SYSENTER_RETURN points to after the SYSENTER instruction - * in the vsyscall page. See vsyscall-sysentry.S, which defines - * the symbol. - */ - # SYSENTER call handler stub ENTRY(entry_SYSENTER_32) movl TSS_sysenter_sp0(%esp), %esp sysenter_past_esp: + pushl $__USER_DS /* pt_regs->ss */ + pushl %ecx /* pt_regs->cx */ + pushfl /* pt_regs->flags (except IF = 0) */ + orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ + pushl $__USER_CS /* pt_regs->cs */ + pushl $0 /* pt_regs->ip = 0 (placeholder) */ + pushl %eax /* pt_regs->orig_ax */ + SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ + /* - * Interrupts are disabled here, but we can't trace it until - * enough kernel state to call TRACE_IRQS_OFF can be called - but - * we immediately enable interrupts at that point anyway. - */ - pushl $__USER_DS - pushl %ebp - pushfl - orl $X86_EFLAGS_IF, (%esp) - pushl $__USER_CS - /* - * Push current_thread_info()->sysenter_return to the stack. - * A tiny bit of offset fixup is necessary: TI_sysenter_return - * is relative to thread_info, which is at the bottom of the - * kernel stack page. 4*4 means the 4 words pushed above; - * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack; - * and THREAD_SIZE takes us to the bottom. + * User mode is traced as though IRQs are on, and SYSENTER + * turned them off. */ - pushl ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp) - - pushl %eax - SAVE_ALL - ENABLE_INTERRUPTS(CLBR_NONE) - -/* - * Load the potential sixth argument from user stack. - * Careful about security. - */ - cmpl $__PAGE_OFFSET-3, %ebp - jae syscall_fault - ASM_STAC -1: movl (%ebp), %ebp - ASM_CLAC - movl %ebp, PT_EBP(%esp) - _ASM_EXTABLE(1b, syscall_fault) + TRACE_IRQS_OFF - GET_THREAD_INFO(%ebp) + movl %esp, %eax + call do_fast_syscall_32 + testl %eax, %eax + jz .Lsyscall_32_done - testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp) - jnz sysenter_audit -sysenter_do_call: - cmpl $(NR_syscalls), %eax - jae sysenter_badsys - call *sys_call_table(, %eax, 4) -sysenter_after_call: - movl %eax, PT_EAX(%esp) - LOCKDEP_SYS_EXIT - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - testl $_TIF_ALLWORK_MASK, %ecx - jnz sysexit_audit -sysenter_exit: -/* if something modifies registers it must also disable sysexit */ - movl PT_EIP(%esp), %edx - movl PT_OLDESP(%esp), %ecx - xorl %ebp, %ebp - TRACE_IRQS_ON +/* Opportunistic SYSEXIT */ + TRACE_IRQS_ON /* User mode traces as IRQs on. */ + movl PT_EIP(%esp), %edx /* pt_regs->ip */ + movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */ 1: mov PT_FS(%esp), %fs PTGS_TO_GS - ENABLE_INTERRUPTS_SYSEXIT + popl %ebx /* pt_regs->bx */ + addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */ + popl %esi /* pt_regs->si */ + popl %edi /* pt_regs->di */ + popl %ebp /* pt_regs->bp */ + popl %eax /* pt_regs->ax */ -#ifdef CONFIG_AUDITSYSCALL -sysenter_audit: - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), TI_flags(%ebp) - jnz syscall_trace_entry - /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */ - movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */ - /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */ - pushl PT_ESI(%esp) /* a3: 5th arg */ - pushl PT_EDX+4(%esp) /* a2: 4th arg */ - call __audit_syscall_entry - popl %ecx /* get that remapped edx off the stack */ - popl %ecx /* get that remapped esi off the stack */ - movl PT_EAX(%esp), %eax /* reload syscall number */ - jmp sysenter_do_call - -sysexit_audit: - testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx - jnz syscall_exit_work - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) - movl %eax, %edx /* second arg, syscall return value */ - cmpl $-MAX_ERRNO, %eax /* is it an error ? */ - setbe %al /* 1 if so, 0 if not */ - movzbl %al, %eax /* zero-extend that */ - call __audit_syscall_exit - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx - jnz syscall_exit_work - movl PT_EAX(%esp), %eax /* reload syscall return value */ - jmp sysenter_exit -#endif + /* + * Return back to the vDSO, which will pop ecx and edx. + * Don't bother with DS and ES (they already contain __USER_DS). + */ + ENABLE_INTERRUPTS_SYSEXIT .pushsection .fixup, "ax" 2: movl $0, PT_FS(%esp) @@ -407,27 +341,18 @@ ENDPROC(entry_SYSENTER_32) # system call handler stub ENTRY(entry_INT80_32) ASM_CLAC - pushl %eax # save orig_eax - SAVE_ALL - GET_THREAD_INFO(%ebp) - # system call tracing in operation / emulation - testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp) - jnz syscall_trace_entry - cmpl $(NR_syscalls), %eax - jae syscall_badsys -syscall_call: - call *sys_call_table(, %eax, 4) -syscall_after_call: - movl %eax, PT_EAX(%esp) # store the return value -syscall_exit: - LOCKDEP_SYS_EXIT - DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret - TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - testl $_TIF_ALLWORK_MASK, %ecx # current->work - jnz syscall_exit_work + pushl %eax /* pt_regs->orig_ax */ + SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ + + /* + * User mode is traced as though IRQs are on. Unlike the 64-bit + * case, INT80 is a trap gate on 32-bit kernels, so interrupts + * are already on (unless user code is messing around with iopl). + */ + + movl %esp, %eax + call do_syscall_32_irqs_on +.Lsyscall_32_done: restore_all: TRACE_IRQS_IRET @@ -504,99 +429,6 @@ ldt_ss: #endif ENDPROC(entry_INT80_32) - # perform work that needs to be done immediately before resumption - ALIGN -work_pending: - testb $_TIF_NEED_RESCHED, %cl - jz work_notifysig -work_resched: - call schedule - LOCKDEP_SYS_EXIT - DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret - TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - andl $_TIF_WORK_MASK, %ecx # is there any work to be done other - # than syscall tracing? - jz restore_all - testb $_TIF_NEED_RESCHED, %cl - jnz work_resched - -work_notifysig: # deal with pending signals and - # notify-resume requests -#ifdef CONFIG_VM86 - testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) - movl %esp, %eax - jnz work_notifysig_v86 # returning to kernel-space or - # vm86-space -1: -#else - movl %esp, %eax -#endif - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - movb PT_CS(%esp), %bl - andb $SEGMENT_RPL_MASK, %bl - cmpb $USER_RPL, %bl - jb resume_kernel - xorl %edx, %edx - call do_notify_resume - jmp resume_userspace - -#ifdef CONFIG_VM86 - ALIGN -work_notifysig_v86: - pushl %ecx # save ti_flags for do_notify_resume - call save_v86_state # %eax contains pt_regs pointer - popl %ecx - movl %eax, %esp - jmp 1b -#endif -END(work_pending) - - # perform syscall exit tracing - ALIGN -syscall_trace_entry: - movl $-ENOSYS, PT_EAX(%esp) - movl %esp, %eax - call syscall_trace_enter - /* What it returned is what we'll actually use. */ - cmpl $(NR_syscalls), %eax - jnae syscall_call - jmp syscall_exit -END(syscall_trace_entry) - - # perform syscall exit tracing - ALIGN -syscall_exit_work: - testl $_TIF_WORK_SYSCALL_EXIT, %ecx - jz work_pending - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call - # schedule() instead - movl %esp, %eax - call syscall_trace_leave - jmp resume_userspace -END(syscall_exit_work) - -syscall_fault: - ASM_CLAC - GET_THREAD_INFO(%ebp) - movl $-EFAULT, PT_EAX(%esp) - jmp resume_userspace -END(syscall_fault) - -syscall_badsys: - movl $-ENOSYS, %eax - jmp syscall_after_call -END(syscall_badsys) - -sysenter_badsys: - movl $-ENOSYS, %eax - jmp sysenter_after_call -END(sysenter_badsys) - .macro FIXUP_ESPFIX_STACK /* * Switch back for ESPFIX stack to the normal zerobased stack |