diff options
Diffstat (limited to 'fs/exec.c')
-rw-r--r-- | fs/exec.c | 154 |
1 files changed, 107 insertions, 47 deletions
diff --git a/fs/exec.c b/fs/exec.c index 537d92c41105..a0b1f0337a62 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -28,7 +28,6 @@ #include <linux/file.h> #include <linux/fdtable.h> #include <linux/mm.h> -#include <linux/vmacache.h> #include <linux/stat.h> #include <linux/fcntl.h> #include <linux/swap.h> @@ -56,7 +55,6 @@ #include <linux/tsacct_kern.h> #include <linux/cn_proc.h> #include <linux/audit.h> -#include <linux/tracehook.h> #include <linux/kmod.h> #include <linux/fsnotify.h> #include <linux/fs_struct.h> @@ -65,6 +63,7 @@ #include <linux/vmalloc.h> #include <linux/io_uring.h> #include <linux/syscall_user_dispatch.h> +#include <linux/coredump.h> #include <linux/uaccess.h> #include <asm/mmu_context.h> @@ -117,7 +116,7 @@ bool path_noexec(const struct path *path) * Note that a shared library must be both readable and executable due to * security reasons. * - * Also note that we take the address to load from from the file itself. + * Also note that we take the address to load from the file itself. */ SYSCALL_DEFINE1(uselib, const char __user *, library) { @@ -494,8 +493,14 @@ static int bprm_stack_limits(struct linux_binprm *bprm) * the stack. They aren't stored until much later when we can't * signal to the parent that the child has run out of stack space. * Instead, calculate it here so it's possible to fail gracefully. + * + * In the case of argc = 0, make sure there is space for adding a + * empty string (which will bump argc to 1), to ensure confused + * userspace programs don't start processing from argv[1], thinking + * argc can never be 0, to keep them from walking envp by accident. + * See do_execveat_common(). */ - ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); + ptr_size = (max(bprm->argc, 1) + bprm->envc) * sizeof(void *); if (limit <= ptr_size) return -E2BIG; limit -= ptr_size; @@ -535,7 +540,7 @@ static int copy_strings(int argc, struct user_arg_ptr argv, if (!valid_arg_len(bprm, len)) goto out; - /* We're going to work our way backwords. */ + /* We're going to work our way backwards. */ pos = bprm->p; str += len; bprm->p -= len; @@ -577,11 +582,11 @@ static int copy_strings(int argc, struct user_arg_ptr argv, if (kmapped_page) { flush_dcache_page(kmapped_page); - kunmap(kmapped_page); + kunmap_local(kaddr); put_arg_page(kmapped_page); } kmapped_page = page; - kaddr = kmap(kmapped_page); + kaddr = kmap_local_page(kmapped_page); kpos = pos & PAGE_MASK; flush_arg_page(bprm, kpos, kmapped_page); } @@ -595,7 +600,7 @@ static int copy_strings(int argc, struct user_arg_ptr argv, out: if (kmapped_page) { flush_dcache_page(kmapped_page); - kunmap(kmapped_page); + kunmap_local(kaddr); put_arg_page(kmapped_page); } return ret; @@ -624,7 +629,6 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm) unsigned int bytes_to_copy = min_t(unsigned int, len, min_not_zero(offset_in_page(pos), PAGE_SIZE)); struct page *page; - char *kaddr; pos -= bytes_to_copy; arg -= bytes_to_copy; @@ -633,11 +637,8 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm) page = get_arg_page(bprm, pos, 1); if (!page) return -E2BIG; - kaddr = kmap_atomic(page); flush_arg_page(bprm, pos & PAGE_MASK, page); - memcpy(kaddr + offset_in_page(pos), arg, bytes_to_copy); - flush_dcache_page(page); - kunmap_atomic(kaddr); + memcpy_to_page(page, offset_in_page(pos), arg, bytes_to_copy); put_arg_page(page); } @@ -681,6 +682,8 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) unsigned long length = old_end - old_start; unsigned long new_start = old_start - shift; unsigned long new_end = old_end - shift; + VMA_ITERATOR(vmi, mm, new_start); + struct vm_area_struct *next; struct mmu_gather tlb; BUG_ON(new_start > new_end); @@ -689,7 +692,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) * ensure there are no vmas between where we want to go * and where we are */ - if (vma != find_vma(mm, new_start)) + if (vma != vma_next(&vmi)) return -EFAULT; /* @@ -708,12 +711,13 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) lru_add_drain(); tlb_gather_mmu(&tlb, mm); + next = vma_next(&vmi); if (new_end > old_start) { /* * when the old and new regions overlap clear from new_end. */ free_pgd_range(&tlb, new_end, old_end, new_end, - vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING); + next ? next->vm_start : USER_PGTABLES_CEILING); } else { /* * otherwise, clean from old_start; this is done to not touch @@ -722,7 +726,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) * for the others its just a little faster. */ free_pgd_range(&tlb, old_start, old_end, new_end, - vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING); + next ? next->vm_start : USER_PGTABLES_CEILING); } tlb_finish_mmu(&tlb); @@ -752,6 +756,7 @@ int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_size; unsigned long stack_expand; unsigned long rlim_stack; + struct mmu_gather tlb; #ifdef CONFIG_STACK_GROWSUP /* Limit stack size */ @@ -806,8 +811,11 @@ int setup_arg_pages(struct linux_binprm *bprm, vm_flags |= mm->def_flags; vm_flags |= VM_STACK_INCOMPLETE_SETUP; - ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end, + tlb_gather_mmu(&tlb, mm); + ret = mprotect_fixup(&tlb, vma, &prev, vma->vm_start, vma->vm_end, vm_flags); + tlb_finish_mmu(&tlb); + if (ret) goto out_unlock; BUG_ON(prev != vma); @@ -873,11 +881,11 @@ int transfer_args_to_stack(struct linux_binprm *bprm, for (index = MAX_ARG_PAGES - 1; index >= stop; index--) { unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0; - char *src = kmap(bprm->page[index]) + offset; + char *src = kmap_local_page(bprm->page[index]) + offset; sp -= PAGE_SIZE - offset; if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0) ret = -EFAULT; - kunmap(bprm->page[index]); + kunmap_local(src); if (ret) goto out; } @@ -951,8 +959,7 @@ struct file *open_exec(const char *name) } EXPORT_SYMBOL(open_exec); -#if defined(CONFIG_HAVE_AOUT) || defined(CONFIG_BINFMT_FLAT) || \ - defined(CONFIG_BINFMT_ELF_FDPIC) +#if defined(CONFIG_BINFMT_FLAT) || defined(CONFIG_BINFMT_ELF_FDPIC) ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len) { ssize_t res = vfs_read(file, (void __user *)addr, len, &pos); @@ -1017,9 +1024,9 @@ static int exec_mmap(struct mm_struct *mm) activate_mm(active_mm, mm); if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); - tsk->mm->vmacache_seqnum = 0; - vmacache_flush(tsk); + lru_gen_add_mm(mm); task_unlock(tsk); + lru_gen_use_mm(mm); if (old_mm) { mmap_read_unlock(old_mm); BUG_ON(active_mm != old_mm); @@ -1045,7 +1052,7 @@ static int de_thread(struct task_struct *tsk) * Kill all other threads in the thread group. */ spin_lock_irq(lock); - if (signal_group_exit(sig)) { + if ((sig->flags & SIGNAL_GROUP_EXIT) || sig->group_exec_task) { /* * Another group action in progress, just * return so that the signal is processed. @@ -1054,7 +1061,7 @@ static int de_thread(struct task_struct *tsk) return -EAGAIN; } - sig->group_exit_task = tsk; + sig->group_exec_task = tsk; sig->notify_count = zap_other_threads(tsk); if (!thread_group_leader(tsk)) sig->notify_count--; @@ -1082,7 +1089,7 @@ static int de_thread(struct task_struct *tsk) write_lock_irq(&tasklist_lock); /* * Do this under tasklist_lock to ensure that - * exit_notify() can't miss ->group_exit_task + * exit_notify() can't miss ->group_exec_task */ sig->notify_count = -1; if (likely(leader->exit_state)) @@ -1139,7 +1146,7 @@ static int de_thread(struct task_struct *tsk) /* * We are going to release_task()->ptrace_unlink() silently, * the tracer can sleep in do_wait(). EXIT_DEAD guarantees - * the tracer wont't block again waiting for this thread. + * the tracer won't block again waiting for this thread. */ if (unlikely(leader->ptrace)) __wake_up_parent(leader, leader->parent); @@ -1149,7 +1156,7 @@ static int de_thread(struct task_struct *tsk) release_task(leader); } - sig->group_exit_task = NULL; + sig->group_exec_task = NULL; sig->notify_count = 0; no_thread_group: @@ -1162,7 +1169,7 @@ no_thread_group: killed: /* protects against exit_notify() and __exit_signal() */ read_lock(&tasklist_lock); - sig->group_exit_task = NULL; + sig->group_exec_task = NULL; sig->notify_count = 0; read_unlock(&tasklist_lock); return -EAGAIN; @@ -1190,11 +1197,11 @@ static int unshare_sighand(struct task_struct *me) return -ENOMEM; refcount_set(&newsighand->count, 1); - memcpy(newsighand->action, oldsighand->action, - sizeof(newsighand->action)); write_lock_irq(&tasklist_lock); spin_lock(&oldsighand->siglock); + memcpy(newsighand->action, oldsighand->action, + sizeof(newsighand->action)); rcu_assign_pointer(me->sighand, newsighand); spin_unlock(&oldsighand->siglock); write_unlock_irq(&tasklist_lock); @@ -1207,7 +1214,8 @@ static int unshare_sighand(struct task_struct *me) char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk) { task_lock(tsk); - strncpy(buf, tsk->comm, buf_size); + /* Always NUL terminated and zero-padded */ + strscpy_pad(buf, tsk->comm, buf_size); task_unlock(tsk); return buf; } @@ -1222,7 +1230,7 @@ void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec) { task_lock(tsk); trace_task_rename(tsk, buf); - strlcpy(tsk->comm, buf, sizeof(tsk->comm)); + strscpy_pad(tsk->comm, buf, sizeof(tsk->comm)); task_unlock(tsk); perf_event_comm(tsk, exec); } @@ -1267,7 +1275,7 @@ int begin_new_exec(struct linux_binprm * bprm) /* * Must be called _before_ exec_mmap() as bprm->mm is - * not visibile until then. This also enables the update + * not visible until then. This also enables the update * to be lockless. */ retval = set_mm_exe_file(bprm->mm, bprm->file); @@ -1290,7 +1298,10 @@ int begin_new_exec(struct linux_binprm * bprm) bprm->mm = NULL; #ifdef CONFIG_POSIX_TIMERS - exit_itimers(me->signal); + spin_lock_irq(&me->sighand->siglock); + posix_cpu_timers_exit(me); + spin_unlock_irq(&me->sighand->siglock); + exit_itimers(me); flush_itimer_signals(); #endif @@ -1301,13 +1312,7 @@ int begin_new_exec(struct linux_binprm * bprm) if (retval) goto out_unlock; - /* - * Ensure that the uaccess routines can actually operate on userspace - * pointers: - */ - force_uaccess_begin(); - - me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | + me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_NOFREEZE | PF_NO_SETAFFINITY); flush_thread(); me->personality &= ~bprm->per_clear; @@ -1584,7 +1589,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) { /* Handle suid and sgid on files */ struct user_namespace *mnt_userns; - struct inode *inode; + struct inode *inode = file_inode(file); unsigned int mode; kuid_t uid; kgid_t gid; @@ -1595,7 +1600,6 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) if (task_no_new_privs(current)) return; - inode = file->f_path.dentry->d_inode; mode = READ_ONCE(inode->i_mode); if (!(mode & (S_ISUID|S_ISGID))) return; @@ -1675,13 +1679,13 @@ int remove_arg_zero(struct linux_binprm *bprm) ret = -EFAULT; goto out; } - kaddr = kmap_atomic(page); + kaddr = kmap_local_page(page); for (; offset < PAGE_SIZE && kaddr[offset]; offset++, bprm->p++) ; - kunmap_atomic(kaddr); + kunmap_local(kaddr); put_arg_page(page); } while (offset == PAGE_SIZE); @@ -1877,7 +1881,7 @@ static int do_execveat_common(int fd, struct filename *filename, * whether NPROC limit is still exceeded. */ if ((current->flags & PF_NPROC_EXCEEDED) && - is_ucounts_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) { + is_rlimit_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) { retval = -EAGAIN; goto out_ret; } @@ -1893,6 +1897,9 @@ static int do_execveat_common(int fd, struct filename *filename, } retval = count(argv, MAX_ARG_STRINGS); + if (retval == 0) + pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n", + current->comm, bprm->filename); if (retval < 0) goto out_free; bprm->argc = retval; @@ -1919,6 +1926,19 @@ static int do_execveat_common(int fd, struct filename *filename, if (retval < 0) goto out_free; + /* + * When argv is empty, add an empty string ("") as argv[0] to + * ensure confused userspace programs that start processing + * from argv[1] won't end up walking envp. See also + * bprm_stack_limits(). + */ + if (bprm->argc == 0) { + retval = copy_string_kernel("", bprm); + if (retval < 0) + goto out_free; + bprm->argc = 1; + } + retval = bprm_execve(bprm, fd, filename, flags); out_free: free_bprm(bprm); @@ -1936,6 +1956,10 @@ int kernel_execve(const char *kernel_filename, int fd = AT_FDCWD; int retval; + /* It is non-sense for kernel threads to call execve */ + if (WARN_ON_ONCE(current->flags & PF_KTHREAD)) + return -EINVAL; + filename = getname_kernel(kernel_filename); if (IS_ERR(filename)) return PTR_ERR(filename); @@ -1947,6 +1971,8 @@ int kernel_execve(const char *kernel_filename, } retval = count_strings_kernel(argv); + if (WARN_ON_ONCE(retval == 0)) + retval = -EINVAL; if (retval < 0) goto out_free; bprm->argc = retval; @@ -2096,3 +2122,37 @@ COMPAT_SYSCALL_DEFINE5(execveat, int, fd, argv, envp, flags); } #endif + +#ifdef CONFIG_SYSCTL + +static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + + if (!error) + validate_coredump_safety(); + return error; +} + +static struct ctl_table fs_exec_sysctls[] = { + { + .procname = "suid_dumpable", + .data = &suid_dumpable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_coredump, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, + }, + { } +}; + +static int __init init_fs_exec_sysctls(void) +{ + register_sysctl_init("fs", fs_exec_sysctls); + return 0; +} + +fs_initcall(init_fs_exec_sysctls); +#endif /* CONFIG_SYSCTL */ |