aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/entry
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/entry')
-rw-r--r--arch/x86/entry/common.c112
-rw-r--r--arch/x86/entry/entry_32.S11
-rw-r--r--arch/x86/entry/entry_64.S11
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl4
-rw-r--r--arch/x86/entry/thunk_64.S6
-rw-r--r--arch/x86/entry/vdso/Makefile10
-rw-r--r--arch/x86/entry/vdso/vdso32/sigreturn.S8
-rw-r--r--arch/x86/entry/vdso/vdso32/system_call.S7
-rw-r--r--arch/x86/entry/vdso/vma.c67
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c12
10 files changed, 109 insertions, 139 deletions
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index ec138e538c44..a1e71d431fed 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -40,10 +40,10 @@ static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
#ifdef CONFIG_CONTEXT_TRACKING
/* Called on entry from user mode with IRQs off. */
-__visible void enter_from_user_mode(void)
+__visible inline void enter_from_user_mode(void)
{
CT_WARN_ON(ct_state() != CONTEXT_USER);
- user_exit();
+ user_exit_irqoff();
}
#else
static inline void enter_from_user_mode(void) {}
@@ -64,22 +64,16 @@ static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
}
/*
- * We can return 0 to resume the syscall or anything else to go to phase
- * 2. If we resume the syscall, we need to put something appropriate in
- * regs->orig_ax.
- *
- * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax
- * are fully functional.
- *
- * For phase 2's benefit, our return value is:
- * 0: resume the syscall
- * 1: go to phase 2; no seccomp phase 2 needed
- * anything else: go to phase 2; pass return value to seccomp
+ * Returns the syscall nr to run (which should match regs->orig_ax) or -1
+ * to skip the syscall.
*/
-unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
+static long syscall_trace_enter(struct pt_regs *regs)
{
+ u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+
struct thread_info *ti = pt_regs_to_thread_info(regs);
unsigned long ret = 0;
+ bool emulated = false;
u32 work;
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
@@ -87,11 +81,19 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
+ if (unlikely(work & _TIF_SYSCALL_EMU))
+ emulated = true;
+
+ if ((emulated || (work & _TIF_SYSCALL_TRACE)) &&
+ tracehook_report_syscall_entry(regs))
+ return -1L;
+
+ if (emulated)
+ return -1L;
+
#ifdef CONFIG_SECCOMP
/*
- * Do seccomp first -- it should minimize exposure of other
- * code, and keeping seccomp fast is probably more valuable
- * than the rest of this.
+ * Do seccomp after ptrace, to catch any tracer changes.
*/
if (work & _TIF_SECCOMP) {
struct seccomp_data sd;
@@ -118,69 +120,12 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
sd.args[5] = regs->bp;
}
- BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0);
- BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1);
-
- ret = seccomp_phase1(&sd);
- if (ret == SECCOMP_PHASE1_SKIP) {
- regs->orig_ax = -1;
- ret = 0;
- } else if (ret != SECCOMP_PHASE1_OK) {
- return ret; /* Go directly to phase 2 */
- }
-
- work &= ~_TIF_SECCOMP;
- }
-#endif
-
- /* Do our best to finish without phase 2. */
- if (work == 0)
- return ret; /* seccomp and/or nohz only (ret == 0 here) */
-
-#ifdef CONFIG_AUDITSYSCALL
- if (work == _TIF_SYSCALL_AUDIT) {
- /*
- * If there is no more work to be done except auditing,
- * then audit in phase 1. Phase 2 always audits, so, if
- * we audit here, then we can't go on to phase 2.
- */
- do_audit_syscall_entry(regs, arch);
- return 0;
+ ret = __secure_computing(&sd);
+ if (ret == -1)
+ return ret;
}
#endif
- return 1; /* Something is enabled that we can't handle in phase 1 */
-}
-
-/* Returns the syscall nr to run (which should match regs->orig_ax). */
-long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
- unsigned long phase1_result)
-{
- struct thread_info *ti = pt_regs_to_thread_info(regs);
- long ret = 0;
- u32 work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
-
- if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
- BUG_ON(regs != task_pt_regs(current));
-
-#ifdef CONFIG_SECCOMP
- /*
- * Call seccomp_phase2 before running the other hooks so that
- * they can see any changes made by a seccomp tracer.
- */
- if (phase1_result > 1 && seccomp_phase2(phase1_result)) {
- /* seccomp failures shouldn't expose any additional code. */
- return -1;
- }
-#endif
-
- if (unlikely(work & _TIF_SYSCALL_EMU))
- ret = -1L;
-
- if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
- tracehook_report_syscall_entry(regs))
- ret = -1L;
-
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->orig_ax);
@@ -189,17 +134,6 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
return ret ?: regs->orig_ax;
}
-long syscall_trace_enter(struct pt_regs *regs)
-{
- u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
- unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
-
- if (phase1_result == 0)
- return regs->orig_ax;
- else
- return syscall_trace_enter_phase2(regs, arch, phase1_result);
-}
-
#define EXIT_TO_USERMODE_LOOP_FLAGS \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY)
@@ -274,7 +208,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
ti->status &= ~TS_COMPAT;
#endif
- user_enter();
+ user_enter_irqoff();
}
#define SYSCALL_EXIT_WORK_FLAGS \
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 983e5d3a0d27..0b56666e6039 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1153,3 +1153,14 @@ ENTRY(async_page_fault)
jmp error_code
END(async_page_fault)
#endif
+
+ENTRY(rewind_stack_do_exit)
+ /* Prevent any naive code from trying to unwind to our caller. */
+ xorl %ebp, %ebp
+
+ movl PER_CPU_VAR(cpu_current_top_of_stack), %esi
+ leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
+
+ call do_exit
+1: jmp 1b
+END(rewind_stack_do_exit)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9ee0da1807ed..b846875aeea6 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1423,3 +1423,14 @@ ENTRY(ignore_sysret)
mov $-ENOSYS, %eax
sysret
END(ignore_sysret)
+
+ENTRY(rewind_stack_do_exit)
+ /* Prevent any naive code from trying to unwind to our caller. */
+ xorl %ebp, %ebp
+
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
+ leaq -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp
+
+ call do_exit
+1: jmp 1b
+END(rewind_stack_do_exit)
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 555263e385c9..e9ce9c7c39b4 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -374,5 +374,5 @@
543 x32 io_setup compat_sys_io_setup
544 x32 io_submit compat_sys_io_submit
545 x32 execveat compat_sys_execveat/ptregs
-534 x32 preadv2 compat_sys_preadv2
-535 x32 pwritev2 compat_sys_pwritev2
+546 x32 preadv2 compat_sys_preadv64v2
+547 x32 pwritev2 compat_sys_pwritev64v2
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index 027aec4a74df..627ecbcb2e62 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -33,7 +33,7 @@
.endif
call \func
- jmp restore
+ jmp .L_restore
_ASM_NOKPROBE(\name)
.endm
@@ -54,7 +54,7 @@
#if defined(CONFIG_TRACE_IRQFLAGS) \
|| defined(CONFIG_DEBUG_LOCK_ALLOC) \
|| defined(CONFIG_PREEMPT)
-restore:
+.L_restore:
popq %r11
popq %r10
popq %r9
@@ -66,5 +66,5 @@ restore:
popq %rdi
popq %rbp
ret
- _ASM_NOKPROBE(restore)
+ _ASM_NOKPROBE(.L_restore)
#endif
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 253b72eaade6..d5409660f5de 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -55,7 +55,7 @@ VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
$(call if_changed,vdso)
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/x86/include/uapi
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi
hostprogs-y += vdso2c
quiet_cmd_vdso2c = VDSO2C $@
@@ -75,7 +75,7 @@ CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
-fno-omit-frame-pointer -foptimize-sibling-calls \
-DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
-$(vobjs): KBUILD_CFLAGS += $(CFL)
+$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
#
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
@@ -134,7 +134,7 @@ VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
targets += vdso32/vdso32.lds
-targets += vdso32/note.o vdso32/vclock_gettime.o vdso32/system_call.o
+targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
targets += vdso32/vclock_gettime.o
KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO
@@ -145,6 +145,7 @@ KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
@@ -156,7 +157,8 @@ $(obj)/vdso32.so.dbg: FORCE \
$(obj)/vdso32/vdso32.lds \
$(obj)/vdso32/vclock_gettime.o \
$(obj)/vdso32/note.o \
- $(obj)/vdso32/system_call.o
+ $(obj)/vdso32/system_call.o \
+ $(obj)/vdso32/sigreturn.o
$(call if_changed,vdso)
#
diff --git a/arch/x86/entry/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S
index d7ec4e251c0a..20633e026e82 100644
--- a/arch/x86/entry/vdso/vdso32/sigreturn.S
+++ b/arch/x86/entry/vdso/vdso32/sigreturn.S
@@ -1,11 +1,3 @@
-/*
- * Common code for the sigreturn entry points in vDSO images.
- * So far this code is the same for both int80 and sysenter versions.
- * This file is #include'd by int80.S et al to define them first thing.
- * The kernel assumes that the addresses of these routines are constant
- * for all vDSO implementations.
- */
-
#include <linux/linkage.h>
#include <asm/unistd_32.h>
#include <asm/asm-offsets.h>
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 0109ac6cb79c..ed4bc9731cbb 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -2,16 +2,11 @@
* AT_SYSINFO entry point
*/
+#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
-/*
- * First get the common code for the sigreturn entry points.
- * This must come first.
- */
-#include "sigreturn.S"
-
.text
.globl __kernel_vsyscall
.type __kernel_vsyscall,@function
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index ab220ac9b3b9..f840766659a8 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -12,6 +12,7 @@
#include <linux/random.h>
#include <linux/elf.h>
#include <linux/cpu.h>
+#include <linux/ptrace.h>
#include <asm/pvclock.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
@@ -97,10 +98,40 @@ static int vdso_fault(const struct vm_special_mapping *sm,
return 0;
}
-static const struct vm_special_mapping text_mapping = {
- .name = "[vdso]",
- .fault = vdso_fault,
-};
+static void vdso_fix_landing(const struct vdso_image *image,
+ struct vm_area_struct *new_vma)
+{
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+ if (in_ia32_syscall() && image == &vdso_image_32) {
+ struct pt_regs *regs = current_pt_regs();
+ unsigned long vdso_land = image->sym_int80_landing_pad;
+ unsigned long old_land_addr = vdso_land +
+ (unsigned long)current->mm->context.vdso;
+
+ /* Fixing userspace landing - look at do_fast_syscall_32 */
+ if (regs->ip == old_land_addr)
+ regs->ip = new_vma->vm_start + vdso_land;
+ }
+#endif
+}
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+ const struct vdso_image *image = current->mm->context.vdso_image;
+
+ if (image->size != new_size)
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
+ return -EFAULT;
+
+ vdso_fix_landing(image, new_vma);
+ current->mm->context.vdso = (void __user *)new_vma->vm_start;
+
+ return 0;
+}
static int vvar_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -151,6 +182,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
struct vm_area_struct *vma;
unsigned long addr, text_start;
int ret = 0;
+
+ static const struct vm_special_mapping vdso_mapping = {
+ .name = "[vdso]",
+ .fault = vdso_fault,
+ .mremap = vdso_mremap,
+ };
static const struct vm_special_mapping vvar_mapping = {
.name = "[vvar]",
.fault = vvar_fault,
@@ -185,7 +222,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
image->size,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- &text_mapping);
+ &vdso_mapping);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
@@ -294,15 +331,9 @@ static void vgetcpu_cpu_init(void *arg)
write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
}
-static int
-vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
+static int vgetcpu_online(unsigned int cpu)
{
- long cpu = (long)arg;
-
- if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
- smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
-
- return NOTIFY_DONE;
+ return smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
}
static int __init init_vdso(void)
@@ -313,15 +344,9 @@ static int __init init_vdso(void)
init_vdso_image(&vdso_image_x32);
#endif
- cpu_notifier_register_begin();
-
- on_each_cpu(vgetcpu_cpu_init, NULL, 1);
/* notifier priority > KVM */
- __hotcpu_notifier(vgetcpu_cpu_notifier, 30);
-
- cpu_notifier_register_done();
-
- return 0;
+ return cpuhp_setup_state(CPUHP_AP_X86_VDSO_VMA_ONLINE,
+ "AP_X86_VDSO_VMA_ONLINE", vgetcpu_online, NULL);
}
subsys_initcall(init_vdso);
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 174c2549939d..636c4b341f36 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -96,7 +96,7 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
{
/*
* XXX: if access_ok, get_user, and put_user handled
- * sig_on_uaccess_error, this could go away.
+ * sig_on_uaccess_err, this could go away.
*/
if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) {
@@ -125,7 +125,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
struct task_struct *tsk;
unsigned long caller;
int vsyscall_nr, syscall_nr, tmp;
- int prev_sig_on_uaccess_error;
+ int prev_sig_on_uaccess_err;
long ret;
/*
@@ -207,7 +207,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
*/
regs->orig_ax = syscall_nr;
regs->ax = -ENOSYS;
- tmp = secure_computing();
+ tmp = secure_computing(NULL);
if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
warn_bad_vsyscall(KERN_DEBUG, regs,
"seccomp tried to change syscall nr or ip");
@@ -221,8 +221,8 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
* With a real vsyscall, page faults cause SIGSEGV. We want to
* preserve that behavior to make writing exploits harder.
*/
- prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
- current_thread_info()->sig_on_uaccess_error = 1;
+ prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err;
+ current->thread.sig_on_uaccess_err = 1;
ret = -EFAULT;
switch (vsyscall_nr) {
@@ -243,7 +243,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
break;
}
- current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
+ current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err;
check_fault:
if (ret == -EFAULT) {