diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 69 |
1 files changed, 42 insertions, 27 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 86425305cd4a..48ed22774efa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -281,7 +281,7 @@ static inline void free_thread_stack(struct task_struct *tsk) MEMCG_KERNEL_STACK_KB, -(int)(PAGE_SIZE / 1024)); - memcg_kmem_uncharge(vm->pages[i], 0); + memcg_kmem_uncharge_page(vm->pages[i], 0); } for (i = 0; i < NR_CACHED_STACKS; i++) { @@ -361,6 +361,7 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) if (new) { *new = *orig; INIT_LIST_HEAD(&new->anon_vma_chain); + new->vm_next = new->vm_prev = NULL; } return new; } @@ -397,8 +398,8 @@ static void account_kernel_stack(struct task_struct *tsk, int account) mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, THREAD_SIZE / 1024 * account); - mod_memcg_page_state(first_page, MEMCG_KERNEL_STACK_KB, - account * (THREAD_SIZE / 1024)); + mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB, + account * (THREAD_SIZE / 1024)); } } @@ -413,12 +414,13 @@ static int memcg_charge_kernel_stack(struct task_struct *tsk) for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { /* - * If memcg_kmem_charge() fails, page->mem_cgroup - * pointer is NULL, and both memcg_kmem_uncharge() + * If memcg_kmem_charge_page() fails, page->mem_cgroup + * pointer is NULL, and both memcg_kmem_uncharge_page() * and mod_memcg_page_state() in free_thread_stack() * will ignore this page. So it's safe. */ - ret = memcg_kmem_charge(vm->pages[i], GFP_KERNEL, 0); + ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, + 0); if (ret) return ret; @@ -552,14 +554,15 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, if (retval) goto fail_nomem_anon_vma_fork; if (tmp->vm_flags & VM_WIPEONFORK) { - /* VM_WIPEONFORK gets a clean slate in the child. */ + /* + * VM_WIPEONFORK gets a clean slate in the child. + * Don't prepare anon_vma until fault since we don't + * copy page for current vma. + */ tmp->anon_vma = NULL; - if (anon_vma_prepare(tmp)) - goto fail_nomem_anon_vma_fork; } else if (anon_vma_fork(tmp, mpnt)) goto fail_nomem_anon_vma_fork; tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); - tmp->vm_next = tmp->vm_prev = NULL; file = tmp->vm_file; if (file) { struct inode *inode = file_inode(file); @@ -1224,7 +1227,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) struct mm_struct *mm; int err; - err = mutex_lock_killable(&task->signal->cred_guard_mutex); + err = mutex_lock_killable(&task->signal->exec_update_mutex); if (err) return ERR_PTR(err); @@ -1234,7 +1237,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) mmput(mm); mm = ERR_PTR(-EACCES); } - mutex_unlock(&task->signal->cred_guard_mutex); + mutex_unlock(&task->signal->exec_update_mutex); return mm; } @@ -1594,6 +1597,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->oom_score_adj_min = current->signal->oom_score_adj_min; mutex_init(&sig->cred_guard_mutex); + mutex_init(&sig->exec_update_mutex); return 0; } @@ -2174,16 +2178,15 @@ static __latent_entropy struct task_struct *copy_process( INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; - cgroup_threadgroup_change_begin(current); /* * Ensure that the cgroup subsystem policies allow the new process to be * forked. It should be noted the the new process's css_set can be changed * between here and cgroup_post_fork() if an organisation operation is in * progress. */ - retval = cgroup_can_fork(p); + retval = cgroup_can_fork(p, args); if (retval) - goto bad_fork_cgroup_threadgroup_change_end; + goto bad_fork_put_pidfd; /* * From this point on we must avoid any synchronous user-space @@ -2288,8 +2291,7 @@ static __latent_entropy struct task_struct *copy_process( write_unlock_irq(&tasklist_lock); proc_fork_connector(p); - cgroup_post_fork(p); - cgroup_threadgroup_change_end(current); + cgroup_post_fork(p, args); perf_event_fork(p); trace_task_newtask(p, clone_flags); @@ -2300,9 +2302,7 @@ static __latent_entropy struct task_struct *copy_process( bad_fork_cancel_cgroup: spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); - cgroup_cancel_fork(p); -bad_fork_cgroup_threadgroup_change_end: - cgroup_threadgroup_change_end(current); + cgroup_cancel_fork(p, args); bad_fork_put_pidfd: if (clone_flags & CLONE_PIDFD) { fput(pidfile); @@ -2486,11 +2486,11 @@ long do_fork(unsigned long clone_flags, int __user *child_tidptr) { struct kernel_clone_args args = { - .flags = (clone_flags & ~CSIGNAL), + .flags = (lower_32_bits(clone_flags) & ~CSIGNAL), .pidfd = parent_tidptr, .child_tid = child_tidptr, .parent_tid = parent_tidptr, - .exit_signal = (clone_flags & CSIGNAL), + .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL), .stack = stack_start, .stack_size = stack_size, }; @@ -2508,8 +2508,9 @@ long do_fork(unsigned long clone_flags, pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) { struct kernel_clone_args args = { - .flags = ((flags | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL), - .exit_signal = (flags & CSIGNAL), + .flags = ((lower_32_bits(flags) | CLONE_VM | + CLONE_UNTRACED) & ~CSIGNAL), + .exit_signal = (lower_32_bits(flags) & CSIGNAL), .stack = (unsigned long)fn, .stack_size = (unsigned long)arg, }; @@ -2570,11 +2571,11 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, #endif { struct kernel_clone_args args = { - .flags = (clone_flags & ~CSIGNAL), + .flags = (lower_32_bits(clone_flags) & ~CSIGNAL), .pidfd = parent_tidptr, .child_tid = child_tidptr, .parent_tid = parent_tidptr, - .exit_signal = (clone_flags & CSIGNAL), + .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL), .stack = newsp, .tls = tls, }; @@ -2605,6 +2606,14 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, struct clone_args args; pid_t *kset_tid = kargs->set_tid; + BUILD_BUG_ON(offsetofend(struct clone_args, tls) != + CLONE_ARGS_SIZE_VER0); + BUILD_BUG_ON(offsetofend(struct clone_args, set_tid_size) != + CLONE_ARGS_SIZE_VER1); + BUILD_BUG_ON(offsetofend(struct clone_args, cgroup) != + CLONE_ARGS_SIZE_VER2); + BUILD_BUG_ON(sizeof(struct clone_args) != CLONE_ARGS_SIZE_VER2); + if (unlikely(usize > PAGE_SIZE)) return -E2BIG; if (unlikely(usize < CLONE_ARGS_SIZE_VER0)) @@ -2631,6 +2640,10 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, !valid_signal(args.exit_signal))) return -EINVAL; + if ((args.flags & CLONE_INTO_CGROUP) && + (args.cgroup > INT_MAX || usize < CLONE_ARGS_SIZE_VER2)) + return -EINVAL; + *kargs = (struct kernel_clone_args){ .flags = args.flags, .pidfd = u64_to_user_ptr(args.pidfd), @@ -2641,6 +2654,7 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, .stack_size = args.stack_size, .tls = args.tls, .set_tid_size = args.set_tid_size, + .cgroup = args.cgroup, }; if (args.set_tid && @@ -2684,7 +2698,8 @@ static inline bool clone3_stack_valid(struct kernel_clone_args *kargs) static bool clone3_args_valid(struct kernel_clone_args *kargs) { /* Verify that no unknown flags are passed along. */ - if (kargs->flags & ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND)) + if (kargs->flags & + ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP)) return false; /* |