aboutsummaryrefslogtreecommitdiffstats
path: root/fs/exec.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/exec.c')
-rw-r--r--fs/exec.c154
1 files changed, 107 insertions, 47 deletions
diff --git a/fs/exec.c b/fs/exec.c
index 537d92c41105..a0b1f0337a62 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -28,7 +28,6 @@
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/mm.h>
-#include <linux/vmacache.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/swap.h>
@@ -56,7 +55,6 @@
#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
#include <linux/audit.h>
-#include <linux/tracehook.h>
#include <linux/kmod.h>
#include <linux/fsnotify.h>
#include <linux/fs_struct.h>
@@ -65,6 +63,7 @@
#include <linux/vmalloc.h>
#include <linux/io_uring.h>
#include <linux/syscall_user_dispatch.h>
+#include <linux/coredump.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -117,7 +116,7 @@ bool path_noexec(const struct path *path)
* Note that a shared library must be both readable and executable due to
* security reasons.
*
- * Also note that we take the address to load from from the file itself.
+ * Also note that we take the address to load from the file itself.
*/
SYSCALL_DEFINE1(uselib, const char __user *, library)
{
@@ -494,8 +493,14 @@ static int bprm_stack_limits(struct linux_binprm *bprm)
* the stack. They aren't stored until much later when we can't
* signal to the parent that the child has run out of stack space.
* Instead, calculate it here so it's possible to fail gracefully.
+ *
+ * In the case of argc = 0, make sure there is space for adding a
+ * empty string (which will bump argc to 1), to ensure confused
+ * userspace programs don't start processing from argv[1], thinking
+ * argc can never be 0, to keep them from walking envp by accident.
+ * See do_execveat_common().
*/
- ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+ ptr_size = (max(bprm->argc, 1) + bprm->envc) * sizeof(void *);
if (limit <= ptr_size)
return -E2BIG;
limit -= ptr_size;
@@ -535,7 +540,7 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
if (!valid_arg_len(bprm, len))
goto out;
- /* We're going to work our way backwords. */
+ /* We're going to work our way backwards. */
pos = bprm->p;
str += len;
bprm->p -= len;
@@ -577,11 +582,11 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
if (kmapped_page) {
flush_dcache_page(kmapped_page);
- kunmap(kmapped_page);
+ kunmap_local(kaddr);
put_arg_page(kmapped_page);
}
kmapped_page = page;
- kaddr = kmap(kmapped_page);
+ kaddr = kmap_local_page(kmapped_page);
kpos = pos & PAGE_MASK;
flush_arg_page(bprm, kpos, kmapped_page);
}
@@ -595,7 +600,7 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
out:
if (kmapped_page) {
flush_dcache_page(kmapped_page);
- kunmap(kmapped_page);
+ kunmap_local(kaddr);
put_arg_page(kmapped_page);
}
return ret;
@@ -624,7 +629,6 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
unsigned int bytes_to_copy = min_t(unsigned int, len,
min_not_zero(offset_in_page(pos), PAGE_SIZE));
struct page *page;
- char *kaddr;
pos -= bytes_to_copy;
arg -= bytes_to_copy;
@@ -633,11 +637,8 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
page = get_arg_page(bprm, pos, 1);
if (!page)
return -E2BIG;
- kaddr = kmap_atomic(page);
flush_arg_page(bprm, pos & PAGE_MASK, page);
- memcpy(kaddr + offset_in_page(pos), arg, bytes_to_copy);
- flush_dcache_page(page);
- kunmap_atomic(kaddr);
+ memcpy_to_page(page, offset_in_page(pos), arg, bytes_to_copy);
put_arg_page(page);
}
@@ -681,6 +682,8 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
unsigned long length = old_end - old_start;
unsigned long new_start = old_start - shift;
unsigned long new_end = old_end - shift;
+ VMA_ITERATOR(vmi, mm, new_start);
+ struct vm_area_struct *next;
struct mmu_gather tlb;
BUG_ON(new_start > new_end);
@@ -689,7 +692,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* ensure there are no vmas between where we want to go
* and where we are
*/
- if (vma != find_vma(mm, new_start))
+ if (vma != vma_next(&vmi))
return -EFAULT;
/*
@@ -708,12 +711,13 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
lru_add_drain();
tlb_gather_mmu(&tlb, mm);
+ next = vma_next(&vmi);
if (new_end > old_start) {
/*
* when the old and new regions overlap clear from new_end.
*/
free_pgd_range(&tlb, new_end, old_end, new_end,
- vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
+ next ? next->vm_start : USER_PGTABLES_CEILING);
} else {
/*
* otherwise, clean from old_start; this is done to not touch
@@ -722,7 +726,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* for the others its just a little faster.
*/
free_pgd_range(&tlb, old_start, old_end, new_end,
- vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
+ next ? next->vm_start : USER_PGTABLES_CEILING);
}
tlb_finish_mmu(&tlb);
@@ -752,6 +756,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
unsigned long stack_size;
unsigned long stack_expand;
unsigned long rlim_stack;
+ struct mmu_gather tlb;
#ifdef CONFIG_STACK_GROWSUP
/* Limit stack size */
@@ -806,8 +811,11 @@ int setup_arg_pages(struct linux_binprm *bprm,
vm_flags |= mm->def_flags;
vm_flags |= VM_STACK_INCOMPLETE_SETUP;
- ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end,
+ tlb_gather_mmu(&tlb, mm);
+ ret = mprotect_fixup(&tlb, vma, &prev, vma->vm_start, vma->vm_end,
vm_flags);
+ tlb_finish_mmu(&tlb);
+
if (ret)
goto out_unlock;
BUG_ON(prev != vma);
@@ -873,11 +881,11 @@ int transfer_args_to_stack(struct linux_binprm *bprm,
for (index = MAX_ARG_PAGES - 1; index >= stop; index--) {
unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0;
- char *src = kmap(bprm->page[index]) + offset;
+ char *src = kmap_local_page(bprm->page[index]) + offset;
sp -= PAGE_SIZE - offset;
if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0)
ret = -EFAULT;
- kunmap(bprm->page[index]);
+ kunmap_local(src);
if (ret)
goto out;
}
@@ -951,8 +959,7 @@ struct file *open_exec(const char *name)
}
EXPORT_SYMBOL(open_exec);
-#if defined(CONFIG_HAVE_AOUT) || defined(CONFIG_BINFMT_FLAT) || \
- defined(CONFIG_BINFMT_ELF_FDPIC)
+#if defined(CONFIG_BINFMT_FLAT) || defined(CONFIG_BINFMT_ELF_FDPIC)
ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
{
ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
@@ -1017,9 +1024,9 @@ static int exec_mmap(struct mm_struct *mm)
activate_mm(active_mm, mm);
if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
local_irq_enable();
- tsk->mm->vmacache_seqnum = 0;
- vmacache_flush(tsk);
+ lru_gen_add_mm(mm);
task_unlock(tsk);
+ lru_gen_use_mm(mm);
if (old_mm) {
mmap_read_unlock(old_mm);
BUG_ON(active_mm != old_mm);
@@ -1045,7 +1052,7 @@ static int de_thread(struct task_struct *tsk)
* Kill all other threads in the thread group.
*/
spin_lock_irq(lock);
- if (signal_group_exit(sig)) {
+ if ((sig->flags & SIGNAL_GROUP_EXIT) || sig->group_exec_task) {
/*
* Another group action in progress, just
* return so that the signal is processed.
@@ -1054,7 +1061,7 @@ static int de_thread(struct task_struct *tsk)
return -EAGAIN;
}
- sig->group_exit_task = tsk;
+ sig->group_exec_task = tsk;
sig->notify_count = zap_other_threads(tsk);
if (!thread_group_leader(tsk))
sig->notify_count--;
@@ -1082,7 +1089,7 @@ static int de_thread(struct task_struct *tsk)
write_lock_irq(&tasklist_lock);
/*
* Do this under tasklist_lock to ensure that
- * exit_notify() can't miss ->group_exit_task
+ * exit_notify() can't miss ->group_exec_task
*/
sig->notify_count = -1;
if (likely(leader->exit_state))
@@ -1139,7 +1146,7 @@ static int de_thread(struct task_struct *tsk)
/*
* We are going to release_task()->ptrace_unlink() silently,
* the tracer can sleep in do_wait(). EXIT_DEAD guarantees
- * the tracer wont't block again waiting for this thread.
+ * the tracer won't block again waiting for this thread.
*/
if (unlikely(leader->ptrace))
__wake_up_parent(leader, leader->parent);
@@ -1149,7 +1156,7 @@ static int de_thread(struct task_struct *tsk)
release_task(leader);
}
- sig->group_exit_task = NULL;
+ sig->group_exec_task = NULL;
sig->notify_count = 0;
no_thread_group:
@@ -1162,7 +1169,7 @@ no_thread_group:
killed:
/* protects against exit_notify() and __exit_signal() */
read_lock(&tasklist_lock);
- sig->group_exit_task = NULL;
+ sig->group_exec_task = NULL;
sig->notify_count = 0;
read_unlock(&tasklist_lock);
return -EAGAIN;
@@ -1190,11 +1197,11 @@ static int unshare_sighand(struct task_struct *me)
return -ENOMEM;
refcount_set(&newsighand->count, 1);
- memcpy(newsighand->action, oldsighand->action,
- sizeof(newsighand->action));
write_lock_irq(&tasklist_lock);
spin_lock(&oldsighand->siglock);
+ memcpy(newsighand->action, oldsighand->action,
+ sizeof(newsighand->action));
rcu_assign_pointer(me->sighand, newsighand);
spin_unlock(&oldsighand->siglock);
write_unlock_irq(&tasklist_lock);
@@ -1207,7 +1214,8 @@ static int unshare_sighand(struct task_struct *me)
char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
{
task_lock(tsk);
- strncpy(buf, tsk->comm, buf_size);
+ /* Always NUL terminated and zero-padded */
+ strscpy_pad(buf, tsk->comm, buf_size);
task_unlock(tsk);
return buf;
}
@@ -1222,7 +1230,7 @@ void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
{
task_lock(tsk);
trace_task_rename(tsk, buf);
- strlcpy(tsk->comm, buf, sizeof(tsk->comm));
+ strscpy_pad(tsk->comm, buf, sizeof(tsk->comm));
task_unlock(tsk);
perf_event_comm(tsk, exec);
}
@@ -1267,7 +1275,7 @@ int begin_new_exec(struct linux_binprm * bprm)
/*
* Must be called _before_ exec_mmap() as bprm->mm is
- * not visibile until then. This also enables the update
+ * not visible until then. This also enables the update
* to be lockless.
*/
retval = set_mm_exe_file(bprm->mm, bprm->file);
@@ -1290,7 +1298,10 @@ int begin_new_exec(struct linux_binprm * bprm)
bprm->mm = NULL;
#ifdef CONFIG_POSIX_TIMERS
- exit_itimers(me->signal);
+ spin_lock_irq(&me->sighand->siglock);
+ posix_cpu_timers_exit(me);
+ spin_unlock_irq(&me->sighand->siglock);
+ exit_itimers(me);
flush_itimer_signals();
#endif
@@ -1301,13 +1312,7 @@ int begin_new_exec(struct linux_binprm * bprm)
if (retval)
goto out_unlock;
- /*
- * Ensure that the uaccess routines can actually operate on userspace
- * pointers:
- */
- force_uaccess_begin();
-
- me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
+ me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC |
PF_NOFREEZE | PF_NO_SETAFFINITY);
flush_thread();
me->personality &= ~bprm->per_clear;
@@ -1584,7 +1589,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
{
/* Handle suid and sgid on files */
struct user_namespace *mnt_userns;
- struct inode *inode;
+ struct inode *inode = file_inode(file);
unsigned int mode;
kuid_t uid;
kgid_t gid;
@@ -1595,7 +1600,6 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
if (task_no_new_privs(current))
return;
- inode = file->f_path.dentry->d_inode;
mode = READ_ONCE(inode->i_mode);
if (!(mode & (S_ISUID|S_ISGID)))
return;
@@ -1675,13 +1679,13 @@ int remove_arg_zero(struct linux_binprm *bprm)
ret = -EFAULT;
goto out;
}
- kaddr = kmap_atomic(page);
+ kaddr = kmap_local_page(page);
for (; offset < PAGE_SIZE && kaddr[offset];
offset++, bprm->p++)
;
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
put_arg_page(page);
} while (offset == PAGE_SIZE);
@@ -1877,7 +1881,7 @@ static int do_execveat_common(int fd, struct filename *filename,
* whether NPROC limit is still exceeded.
*/
if ((current->flags & PF_NPROC_EXCEEDED) &&
- is_ucounts_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
+ is_rlimit_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
retval = -EAGAIN;
goto out_ret;
}
@@ -1893,6 +1897,9 @@ static int do_execveat_common(int fd, struct filename *filename,
}
retval = count(argv, MAX_ARG_STRINGS);
+ if (retval == 0)
+ pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n",
+ current->comm, bprm->filename);
if (retval < 0)
goto out_free;
bprm->argc = retval;
@@ -1919,6 +1926,19 @@ static int do_execveat_common(int fd, struct filename *filename,
if (retval < 0)
goto out_free;
+ /*
+ * When argv is empty, add an empty string ("") as argv[0] to
+ * ensure confused userspace programs that start processing
+ * from argv[1] won't end up walking envp. See also
+ * bprm_stack_limits().
+ */
+ if (bprm->argc == 0) {
+ retval = copy_string_kernel("", bprm);
+ if (retval < 0)
+ goto out_free;
+ bprm->argc = 1;
+ }
+
retval = bprm_execve(bprm, fd, filename, flags);
out_free:
free_bprm(bprm);
@@ -1936,6 +1956,10 @@ int kernel_execve(const char *kernel_filename,
int fd = AT_FDCWD;
int retval;
+ /* It is non-sense for kernel threads to call execve */
+ if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
+ return -EINVAL;
+
filename = getname_kernel(kernel_filename);
if (IS_ERR(filename))
return PTR_ERR(filename);
@@ -1947,6 +1971,8 @@ int kernel_execve(const char *kernel_filename,
}
retval = count_strings_kernel(argv);
+ if (WARN_ON_ONCE(retval == 0))
+ retval = -EINVAL;
if (retval < 0)
goto out_free;
bprm->argc = retval;
@@ -2096,3 +2122,37 @@ COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
argv, envp, flags);
}
#endif
+
+#ifdef CONFIG_SYSCTL
+
+static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ if (!error)
+ validate_coredump_safety();
+ return error;
+}
+
+static struct ctl_table fs_exec_sysctls[] = {
+ {
+ .procname = "suid_dumpable",
+ .data = &suid_dumpable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_coredump,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ { }
+};
+
+static int __init init_fs_exec_sysctls(void)
+{
+ register_sysctl_init("fs", fs_exec_sysctls);
+ return 0;
+}
+
+fs_initcall(init_fs_exec_sysctls);
+#endif /* CONFIG_SYSCTL */