aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/entry/entry_32.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/entry/entry_32.S')
-rw-r--r--arch/x86/entry/entry_32.S294
1 files changed, 63 insertions, 231 deletions
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 21dc60a60b5f..3eb572ed3d7a 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -3,7 +3,7 @@
*
* entry_32.S contains the system-call and low-level fault and trap handling routines.
*
- * Stack layout in 'syscall_exit':
+ * Stack layout while running C code:
* ptrace needs to have all registers on the stack.
* If the order here is changed, it needs to be
* updated in fork.c:copy_process(), signal.c:do_signal(),
@@ -45,16 +45,6 @@
#include <asm/asm.h>
#include <asm/smap.h>
-/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
-#include <linux/elf-em.h>
-#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
-#define __AUDIT_ARCH_LE 0x40000000
-
-#ifndef CONFIG_AUDITSYSCALL
-# define sysenter_audit syscall_trace_entry
-# define sysexit_audit syscall_exit_work
-#endif
-
.section .entry.text, "ax"
/*
@@ -163,13 +153,13 @@
#endif /* CONFIG_X86_32_LAZY_GS */
-.macro SAVE_ALL
+.macro SAVE_ALL pt_regs_ax=%eax
cld
PUSH_GS
pushl %fs
pushl %es
pushl %ds
- pushl %eax
+ pushl \pt_regs_ax
pushl %ebp
pushl %edi
pushl %esi
@@ -221,7 +211,11 @@ ENTRY(ret_from_fork)
popl %eax
pushl $0x0202 # Reset kernel eflags
popfl
- jmp syscall_exit
+
+ /* When we fork, we trace the syscall return in the child, too. */
+ movl %esp, %eax
+ call syscall_return_slowpath
+ jmp restore_all
END(ret_from_fork)
ENTRY(ret_from_kernel_thread)
@@ -234,7 +228,15 @@ ENTRY(ret_from_kernel_thread)
movl PT_EBP(%esp), %eax
call *PT_EBX(%esp)
movl $0, PT_EAX(%esp)
- jmp syscall_exit
+
+ /*
+ * Kernel threads return to userspace as if returning from a syscall.
+ * We should check whether anything actually uses this path and, if so,
+ * consider switching it over to ret_from_fork.
+ */
+ movl %esp, %eax
+ call syscall_return_slowpath
+ jmp restore_all
ENDPROC(ret_from_kernel_thread)
/*
@@ -265,15 +267,10 @@ ret_from_intr:
jb resume_kernel # not returning to v8086 or userspace
ENTRY(resume_userspace)
- LOCKDEP_SYS_EXIT
- DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
- # setting need_resched or sigpending
- # between sampling and the iret
+ DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
- movl TI_flags(%ebp), %ecx
- andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
- # int/exception return?
- jne work_pending
+ movl %esp, %eax
+ call prepare_exit_to_usermode
jmp restore_all
END(ret_from_exception)
@@ -290,111 +287,48 @@ need_resched:
END(resume_kernel)
#endif
-/*
- * SYSENTER_RETURN points to after the SYSENTER instruction
- * in the vsyscall page. See vsyscall-sysentry.S, which defines
- * the symbol.
- */
-
# SYSENTER call handler stub
ENTRY(entry_SYSENTER_32)
movl TSS_sysenter_sp0(%esp), %esp
sysenter_past_esp:
+ pushl $__USER_DS /* pt_regs->ss */
+ pushl %ecx /* pt_regs->cx */
+ pushfl /* pt_regs->flags (except IF = 0) */
+ orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
+ pushl $__USER_CS /* pt_regs->cs */
+ pushl $0 /* pt_regs->ip = 0 (placeholder) */
+ pushl %eax /* pt_regs->orig_ax */
+ SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
+
/*
- * Interrupts are disabled here, but we can't trace it until
- * enough kernel state to call TRACE_IRQS_OFF can be called - but
- * we immediately enable interrupts at that point anyway.
- */
- pushl $__USER_DS
- pushl %ebp
- pushfl
- orl $X86_EFLAGS_IF, (%esp)
- pushl $__USER_CS
- /*
- * Push current_thread_info()->sysenter_return to the stack.
- * A tiny bit of offset fixup is necessary: TI_sysenter_return
- * is relative to thread_info, which is at the bottom of the
- * kernel stack page. 4*4 means the 4 words pushed above;
- * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
- * and THREAD_SIZE takes us to the bottom.
+ * User mode is traced as though IRQs are on, and SYSENTER
+ * turned them off.
*/
- pushl ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
-
- pushl %eax
- SAVE_ALL
- ENABLE_INTERRUPTS(CLBR_NONE)
-
-/*
- * Load the potential sixth argument from user stack.
- * Careful about security.
- */
- cmpl $__PAGE_OFFSET-3, %ebp
- jae syscall_fault
- ASM_STAC
-1: movl (%ebp), %ebp
- ASM_CLAC
- movl %ebp, PT_EBP(%esp)
- _ASM_EXTABLE(1b, syscall_fault)
+ TRACE_IRQS_OFF
- GET_THREAD_INFO(%ebp)
+ movl %esp, %eax
+ call do_fast_syscall_32
+ testl %eax, %eax
+ jz .Lsyscall_32_done
- testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
- jnz sysenter_audit
-sysenter_do_call:
- cmpl $(NR_syscalls), %eax
- jae sysenter_badsys
- call *sys_call_table(, %eax, 4)
-sysenter_after_call:
- movl %eax, PT_EAX(%esp)
- LOCKDEP_SYS_EXIT
- DISABLE_INTERRUPTS(CLBR_ANY)
- TRACE_IRQS_OFF
- movl TI_flags(%ebp), %ecx
- testl $_TIF_ALLWORK_MASK, %ecx
- jnz sysexit_audit
-sysenter_exit:
-/* if something modifies registers it must also disable sysexit */
- movl PT_EIP(%esp), %edx
- movl PT_OLDESP(%esp), %ecx
- xorl %ebp, %ebp
- TRACE_IRQS_ON
+/* Opportunistic SYSEXIT */
+ TRACE_IRQS_ON /* User mode traces as IRQs on. */
+ movl PT_EIP(%esp), %edx /* pt_regs->ip */
+ movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */
1: mov PT_FS(%esp), %fs
PTGS_TO_GS
- ENABLE_INTERRUPTS_SYSEXIT
+ popl %ebx /* pt_regs->bx */
+ addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */
+ popl %esi /* pt_regs->si */
+ popl %edi /* pt_regs->di */
+ popl %ebp /* pt_regs->bp */
+ popl %eax /* pt_regs->ax */
-#ifdef CONFIG_AUDITSYSCALL
-sysenter_audit:
- testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), TI_flags(%ebp)
- jnz syscall_trace_entry
- /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */
- movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */
- /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */
- pushl PT_ESI(%esp) /* a3: 5th arg */
- pushl PT_EDX+4(%esp) /* a2: 4th arg */
- call __audit_syscall_entry
- popl %ecx /* get that remapped edx off the stack */
- popl %ecx /* get that remapped esi off the stack */
- movl PT_EAX(%esp), %eax /* reload syscall number */
- jmp sysenter_do_call
-
-sysexit_audit:
- testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
- jnz syscall_exit_work
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_ANY)
- movl %eax, %edx /* second arg, syscall return value */
- cmpl $-MAX_ERRNO, %eax /* is it an error ? */
- setbe %al /* 1 if so, 0 if not */
- movzbl %al, %eax /* zero-extend that */
- call __audit_syscall_exit
- DISABLE_INTERRUPTS(CLBR_ANY)
- TRACE_IRQS_OFF
- movl TI_flags(%ebp), %ecx
- testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
- jnz syscall_exit_work
- movl PT_EAX(%esp), %eax /* reload syscall return value */
- jmp sysenter_exit
-#endif
+ /*
+ * Return back to the vDSO, which will pop ecx and edx.
+ * Don't bother with DS and ES (they already contain __USER_DS).
+ */
+ ENABLE_INTERRUPTS_SYSEXIT
.pushsection .fixup, "ax"
2: movl $0, PT_FS(%esp)
@@ -407,27 +341,18 @@ ENDPROC(entry_SYSENTER_32)
# system call handler stub
ENTRY(entry_INT80_32)
ASM_CLAC
- pushl %eax # save orig_eax
- SAVE_ALL
- GET_THREAD_INFO(%ebp)
- # system call tracing in operation / emulation
- testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
- jnz syscall_trace_entry
- cmpl $(NR_syscalls), %eax
- jae syscall_badsys
-syscall_call:
- call *sys_call_table(, %eax, 4)
-syscall_after_call:
- movl %eax, PT_EAX(%esp) # store the return value
-syscall_exit:
- LOCKDEP_SYS_EXIT
- DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
- # setting need_resched or sigpending
- # between sampling and the iret
- TRACE_IRQS_OFF
- movl TI_flags(%ebp), %ecx
- testl $_TIF_ALLWORK_MASK, %ecx # current->work
- jnz syscall_exit_work
+ pushl %eax /* pt_regs->orig_ax */
+ SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
+
+ /*
+ * User mode is traced as though IRQs are on. Unlike the 64-bit
+ * case, INT80 is a trap gate on 32-bit kernels, so interrupts
+ * are already on (unless user code is messing around with iopl).
+ */
+
+ movl %esp, %eax
+ call do_syscall_32_irqs_on
+.Lsyscall_32_done:
restore_all:
TRACE_IRQS_IRET
@@ -504,99 +429,6 @@ ldt_ss:
#endif
ENDPROC(entry_INT80_32)
- # perform work that needs to be done immediately before resumption
- ALIGN
-work_pending:
- testb $_TIF_NEED_RESCHED, %cl
- jz work_notifysig
-work_resched:
- call schedule
- LOCKDEP_SYS_EXIT
- DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
- # setting need_resched or sigpending
- # between sampling and the iret
- TRACE_IRQS_OFF
- movl TI_flags(%ebp), %ecx
- andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
- # than syscall tracing?
- jz restore_all
- testb $_TIF_NEED_RESCHED, %cl
- jnz work_resched
-
-work_notifysig: # deal with pending signals and
- # notify-resume requests
-#ifdef CONFIG_VM86
- testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
- movl %esp, %eax
- jnz work_notifysig_v86 # returning to kernel-space or
- # vm86-space
-1:
-#else
- movl %esp, %eax
-#endif
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
- movb PT_CS(%esp), %bl
- andb $SEGMENT_RPL_MASK, %bl
- cmpb $USER_RPL, %bl
- jb resume_kernel
- xorl %edx, %edx
- call do_notify_resume
- jmp resume_userspace
-
-#ifdef CONFIG_VM86
- ALIGN
-work_notifysig_v86:
- pushl %ecx # save ti_flags for do_notify_resume
- call save_v86_state # %eax contains pt_regs pointer
- popl %ecx
- movl %eax, %esp
- jmp 1b
-#endif
-END(work_pending)
-
- # perform syscall exit tracing
- ALIGN
-syscall_trace_entry:
- movl $-ENOSYS, PT_EAX(%esp)
- movl %esp, %eax
- call syscall_trace_enter
- /* What it returned is what we'll actually use. */
- cmpl $(NR_syscalls), %eax
- jnae syscall_call
- jmp syscall_exit
-END(syscall_trace_entry)
-
- # perform syscall exit tracing
- ALIGN
-syscall_exit_work:
- testl $_TIF_WORK_SYSCALL_EXIT, %ecx
- jz work_pending
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
- # schedule() instead
- movl %esp, %eax
- call syscall_trace_leave
- jmp resume_userspace
-END(syscall_exit_work)
-
-syscall_fault:
- ASM_CLAC
- GET_THREAD_INFO(%ebp)
- movl $-EFAULT, PT_EAX(%esp)
- jmp resume_userspace
-END(syscall_fault)
-
-syscall_badsys:
- movl $-ENOSYS, %eax
- jmp syscall_after_call
-END(syscall_badsys)
-
-sysenter_badsys:
- movl $-ENOSYS, %eax
- jmp sysenter_after_call
-END(sysenter_badsys)
-
.macro FIXUP_ESPFIX_STACK
/*
* Switch back for ESPFIX stack to the normal zerobased stack