aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2020-11-07 12:50:48 +0100
committerIngo Molnar <mingo@kernel.org>2020-11-07 13:20:17 +0100
commit666fab4a3ea143315a9c059fad9f3a0f1365d54b (patch)
treee9e4be3b0eeac79346d52a86183326617d0c9999 /kernel/fork.c
parentMerge branch 'linus' into perf/kprobes (diff)
parentMerge tag 'ceph-for-5.10-rc3' of git://github.com/ceph/ceph-client (diff)
downloadlinux-dev-666fab4a3ea143315a9c059fad9f3a0f1365d54b.tar.xz
linux-dev-666fab4a3ea143315a9c059fad9f3a0f1365d54b.zip
Merge branch 'linus' into perf/kprobes
Conflicts: include/asm-generic/atomic-instrumented.h kernel/kprobes.c Use the upstream atomic-instrumented.h checksum, and pick the kprobes version of kernel/kprobes.c, which effectively reverts this upstream workaround: 645f224e7ba2: ("kprobes: Tell lockdep about kprobe nesting") Since the new code *should* be fine without nesting. Knock on wood ... Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c52
1 files changed, 40 insertions, 12 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 53a1f508a097..b9c289d0f4ef 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -95,6 +95,7 @@
#include <linux/stackleak.h>
#include <linux/kasan.h>
#include <linux/scs.h>
+#include <linux/io_uring.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -555,10 +556,10 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
get_file(file);
if (tmp->vm_flags & VM_DENYWRITE)
- atomic_dec(&inode->i_writecount);
+ put_write_access(inode);
i_mmap_lock_write(mapping);
if (tmp->vm_flags & VM_SHARED)
- atomic_inc(&mapping->i_mmap_writable);
+ mapping_allow_writable(mapping);
flush_dcache_mmap_lock(mapping);
/* insert tmp into the share list, just after mpnt */
vma_interval_tree_insert_after(tmp, mpnt,
@@ -589,7 +590,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
mm->map_count++;
if (!(tmp->vm_flags & VM_WIPEONFORK))
- retval = copy_page_range(mm, oldmm, mpnt);
+ retval = copy_page_range(tmp, mpnt);
if (tmp->vm_ops && tmp->vm_ops->open)
tmp->vm_ops->open(tmp);
@@ -728,6 +729,7 @@ void __put_task_struct(struct task_struct *tsk)
WARN_ON(refcount_read(&tsk->usage));
WARN_ON(tsk == current);
+ io_uring_free(tsk);
cgroup_free(tsk);
task_numa_free(tsk, true);
security_task_free(tsk);
@@ -1011,6 +1013,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
mm_pgtables_bytes_init(mm);
mm->map_count = 0;
mm->locked_vm = 0;
+ atomic_set(&mm->has_pinned, 0);
atomic64_set(&mm->pinned_vm, 0);
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
spin_lock_init(&mm->page_table_lock);
@@ -1809,6 +1812,25 @@ static __always_inline void delayed_free_task(struct task_struct *tsk)
free_task(tsk);
}
+static void copy_oom_score_adj(u64 clone_flags, struct task_struct *tsk)
+{
+ /* Skip if kernel thread */
+ if (!tsk->mm)
+ return;
+
+ /* Skip if spawning a thread or using vfork */
+ if ((clone_flags & (CLONE_VM | CLONE_THREAD | CLONE_VFORK)) != CLONE_VM)
+ return;
+
+ /* We need to synchronize with __set_oom_adj */
+ mutex_lock(&oom_adj_mutex);
+ set_bit(MMF_MULTIPROCESS, &tsk->mm->flags);
+ /* Update the values in case they were changed after copy_signal */
+ tsk->signal->oom_score_adj = current->signal->oom_score_adj;
+ tsk->signal->oom_score_adj_min = current->signal->oom_score_adj_min;
+ mutex_unlock(&oom_adj_mutex);
+}
+
/*
* This creates a new process as a copy of the old one,
* but does not actually start it yet.
@@ -1982,6 +2004,10 @@ static __latent_entropy struct task_struct *copy_process(
p->vtime.state = VTIME_INACTIVE;
#endif
+#ifdef CONFIG_IO_URING
+ p->io_uring = NULL;
+#endif
+
#if defined(SPLIT_RSS_COUNTING)
memset(&p->rss_stat, 0, sizeof(p->rss_stat));
#endif
@@ -2167,7 +2193,7 @@ static __latent_entropy struct task_struct *copy_process(
/*
* Ensure that the cgroup subsystem policies allow the new process to be
- * forked. It should be noted the the new process's css_set can be changed
+ * forked. It should be noted that the new process's css_set can be changed
* between here and cgroup_post_fork() if an organisation operation is in
* progress.
*/
@@ -2285,6 +2311,8 @@ static __latent_entropy struct task_struct *copy_process(
trace_task_newtask(p, clone_flags);
uprobe_copy_process(p, clone_flags);
+ copy_oom_score_adj(clone_flags, p);
+
return p;
bad_fork_cancel_cgroup:
@@ -2388,14 +2416,14 @@ struct mm_struct *copy_init_mm(void)
*
* args->exit_signal is expected to be checked for sanity by the caller.
*/
-long _do_fork(struct kernel_clone_args *args)
+pid_t kernel_clone(struct kernel_clone_args *args)
{
u64 clone_flags = args->flags;
struct completion vfork;
struct pid *pid;
struct task_struct *p;
int trace = 0;
- long nr;
+ pid_t nr;
/*
* For legacy clone() calls, CLONE_PIDFD uses the parent_tid argument
@@ -2481,7 +2509,7 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
.stack_size = (unsigned long)arg,
};
- return _do_fork(&args);
+ return kernel_clone(&args);
}
#ifdef __ARCH_WANT_SYS_FORK
@@ -2492,7 +2520,7 @@ SYSCALL_DEFINE0(fork)
.exit_signal = SIGCHLD,
};
- return _do_fork(&args);
+ return kernel_clone(&args);
#else
/* can not support in nommu mode */
return -EINVAL;
@@ -2508,7 +2536,7 @@ SYSCALL_DEFINE0(vfork)
.exit_signal = SIGCHLD,
};
- return _do_fork(&args);
+ return kernel_clone(&args);
}
#endif
@@ -2546,7 +2574,7 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
.tls = tls,
};
- return _do_fork(&args);
+ return kernel_clone(&args);
}
#endif
@@ -2704,7 +2732,7 @@ SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size)
if (!clone3_args_valid(&kargs))
return -EINVAL;
- return _do_fork(&kargs);
+ return kernel_clone(&kargs);
}
#endif
@@ -2867,7 +2895,7 @@ int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
/*
* unshare allows a process to 'unshare' part of the process
* context which was originally shared using clone. copy_*
- * functions used by _do_fork() cannot be used here directly
+ * functions used by kernel_clone() cannot be used here directly
* because they modify an inactive task_struct that is being
* constructed. Here we are modifying the current, active,
* task_struct.