aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c135
1 files changed, 103 insertions, 32 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 9440d61b925c..d896e9ca38b0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -303,11 +303,37 @@ struct kmem_cache *files_cachep;
struct kmem_cache *fs_cachep;
/* SLAB cache for vm_area_struct structures */
-struct kmem_cache *vm_area_cachep;
+static struct kmem_cache *vm_area_cachep;
/* SLAB cache for mm_struct structures (tsk->mm) */
static struct kmem_cache *mm_cachep;
+struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+
+ vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+ if (vma)
+ vma_init(vma, mm);
+ return vma;
+}
+
+struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
+{
+ struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+
+ if (new) {
+ *new = *orig;
+ INIT_LIST_HEAD(&new->anon_vma_chain);
+ }
+ return new;
+}
+
+void vm_area_free(struct vm_area_struct *vma)
+{
+ kmem_cache_free(vm_area_cachep, vma);
+}
+
static void account_kernel_stack(struct task_struct *tsk, int account)
{
void *stack = task_stack_page(tsk);
@@ -455,11 +481,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
goto fail_nomem;
charge = len;
}
- tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+ tmp = vm_area_dup(mpnt);
if (!tmp)
goto fail_nomem;
- *tmp = *mpnt;
- INIT_LIST_HEAD(&tmp->anon_vma_chain);
retval = vma_dup_policy(mpnt, tmp);
if (retval)
goto fail_nomem_policy;
@@ -539,7 +563,7 @@ fail_uprobe_end:
fail_nomem_anon_vma_fork:
mpol_put(vma_policy(tmp));
fail_nomem_policy:
- kmem_cache_free(vm_area_cachep, tmp);
+ vm_area_free(tmp);
fail_nomem:
retval = -ENOMEM;
vm_unacct_memory(charge);
@@ -843,6 +867,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->fail_nth = 0;
#endif
+#ifdef CONFIG_BLK_CGROUP
+ tsk->throttle_queue = NULL;
+ tsk->use_memdelay = 0;
+#endif
+
+#ifdef CONFIG_MEMCG
+ tsk->active_memcg = NULL;
+#endif
return tsk;
free_stack:
@@ -1270,6 +1302,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
tsk->nvcsw = tsk->nivcsw = 0;
#ifdef CONFIG_DETECT_HUNG_TASK
tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
+ tsk->last_switch_time = 0;
#endif
tsk->mm = NULL;
@@ -1394,7 +1427,9 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
return -ENOMEM;
atomic_set(&sig->count, 1);
+ spin_lock_irq(&current->sighand->siglock);
memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+ spin_unlock_irq(&current->sighand->siglock);
return 0;
}
@@ -1456,6 +1491,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
init_waitqueue_head(&sig->wait_chldexit);
sig->curr_target = tsk;
init_sigpending(&sig->shared_pending);
+ INIT_HLIST_HEAD(&sig->multiprocess);
seqlock_init(&sig->stats_lock);
prev_cputime_init(&sig->prev_cputime);
@@ -1549,10 +1585,22 @@ static void posix_cpu_timers_init(struct task_struct *tsk)
static inline void posix_cpu_timers_init(struct task_struct *tsk) { }
#endif
+static inline void init_task_pid_links(struct task_struct *task)
+{
+ enum pid_type type;
+
+ for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
+ INIT_HLIST_NODE(&task->pid_links[type]);
+ }
+}
+
static inline void
init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
{
- task->pids[type].pid = pid;
+ if (type == PIDTYPE_PID)
+ task->thread_pid = pid;
+ else
+ task->signal->pids[type] = pid;
}
static inline void rcu_copy_process(struct task_struct *p)
@@ -1590,6 +1638,7 @@ static __latent_entropy struct task_struct *copy_process(
{
int retval;
struct task_struct *p;
+ struct multiprocess_signals delayed;
/*
* Don't allow sharing the root directory with processes in a different
@@ -1637,6 +1686,24 @@ static __latent_entropy struct task_struct *copy_process(
return ERR_PTR(-EINVAL);
}
+ /*
+ * Force any signals received before this point to be delivered
+ * before the fork happens. Collect up signals sent to multiple
+ * processes that happen during the fork and delay them so that
+ * they appear to happen after the fork.
+ */
+ sigemptyset(&delayed.signal);
+ INIT_HLIST_NODE(&delayed.node);
+
+ spin_lock_irq(&current->sighand->siglock);
+ if (!(clone_flags & CLONE_THREAD))
+ hlist_add_head(&delayed.node, &current->signal->multiprocess);
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+ retval = -ERESTARTNOINTR;
+ if (signal_pending(current))
+ goto fork_out;
+
retval = -ENOMEM;
p = dup_task_struct(current, node);
if (!p)
@@ -1910,29 +1977,26 @@ static __latent_entropy struct task_struct *copy_process(
rseq_fork(p, clone_flags);
- /*
- * Process group and session signals need to be delivered to just the
- * parent before the fork or both the parent and the child after the
- * fork. Restart if a signal comes in before we add the new process to
- * it's process group.
- * A fatal signal pending means that current will exit, so the new
- * thread can't slip out of an OOM kill (or normal SIGKILL).
- */
- recalc_sigpending();
- if (signal_pending(current)) {
- retval = -ERESTARTNOINTR;
- goto bad_fork_cancel_cgroup;
- }
+ /* Don't start children in a dying pid namespace */
if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) {
retval = -ENOMEM;
goto bad_fork_cancel_cgroup;
}
+ /* Let kill terminate clone/fork in the middle */
+ if (fatal_signal_pending(current)) {
+ retval = -EINTR;
+ goto bad_fork_cancel_cgroup;
+ }
+
+
+ init_task_pid_links(p);
if (likely(p->pid)) {
ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
init_task_pid(p, PIDTYPE_PID, pid);
if (thread_group_leader(p)) {
+ init_task_pid(p, PIDTYPE_TGID, pid);
init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
init_task_pid(p, PIDTYPE_SID, task_session(current));
@@ -1940,8 +2004,7 @@ static __latent_entropy struct task_struct *copy_process(
ns_of_pid(pid)->child_reaper = p;
p->signal->flags |= SIGNAL_UNKILLABLE;
}
-
- p->signal->leader_pid = pid;
+ p->signal->shared_pending.signal = delayed.signal;
p->signal->tty = tty_kref_get(current->signal->tty);
/*
* Inherit has_child_subreaper flag under the same
@@ -1952,6 +2015,7 @@ static __latent_entropy struct task_struct *copy_process(
p->real_parent->signal->is_child_subreaper;
list_add_tail(&p->sibling, &p->real_parent->children);
list_add_tail_rcu(&p->tasks, &init_task.tasks);
+ attach_pid(p, PIDTYPE_TGID);
attach_pid(p, PIDTYPE_PGID);
attach_pid(p, PIDTYPE_SID);
__this_cpu_inc(process_counts);
@@ -1959,6 +2023,7 @@ static __latent_entropy struct task_struct *copy_process(
current->signal->nr_threads++;
atomic_inc(&current->signal->live);
atomic_inc(&current->signal->sigcnt);
+ task_join_group_stop(p);
list_add_tail_rcu(&p->thread_group,
&p->group_leader->thread_group);
list_add_tail_rcu(&p->thread_node,
@@ -1967,8 +2032,8 @@ static __latent_entropy struct task_struct *copy_process(
attach_pid(p, PIDTYPE_PID);
nr_threads++;
}
-
total_forks++;
+ hlist_del_init(&delayed.node);
spin_unlock(&current->sighand->siglock);
syscall_tracepoint_update(p);
write_unlock_irq(&tasklist_lock);
@@ -2033,16 +2098,19 @@ bad_fork_free:
put_task_stack(p);
free_task(p);
fork_out:
+ spin_lock_irq(&current->sighand->siglock);
+ hlist_del_init(&delayed.node);
+ spin_unlock_irq(&current->sighand->siglock);
return ERR_PTR(retval);
}
-static inline void init_idle_pids(struct pid_link *links)
+static inline void init_idle_pids(struct task_struct *idle)
{
enum pid_type type;
for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
- INIT_HLIST_NODE(&links[type].node); /* not really needed */
- links[type].pid = &init_struct_pid;
+ INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */
+ init_task_pid(idle, type, &init_struct_pid);
}
}
@@ -2052,7 +2120,7 @@ struct task_struct *fork_idle(int cpu)
task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
cpu_to_node(cpu));
if (!IS_ERR(task)) {
- init_idle_pids(task->pids);
+ init_idle_pids(task);
init_idle(task, cpu);
}
@@ -2253,6 +2321,8 @@ static void sighand_ctor(void *data)
void __init proc_caches_init(void)
{
+ unsigned int mm_size;
+
sighand_cachep = kmem_cache_create("sighand_cache",
sizeof(struct sighand_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
@@ -2269,15 +2339,16 @@ void __init proc_caches_init(void)
sizeof(struct fs_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
NULL);
+
/*
- * FIXME! The "sizeof(struct mm_struct)" currently includes the
- * whole struct cpumask for the OFFSTACK case. We could change
- * this to *only* allocate as much of it as required by the
- * maximum number of CPU's we can ever have. The cpumask_allocation
- * is at the end of the structure, exactly for that reason.
+ * The mm_cpumask is located at the end of mm_struct, and is
+ * dynamically sized based on the maximum CPU number this system
+ * can have, taking hotplug into account (nr_cpu_ids).
*/
+ mm_size = sizeof(struct mm_struct) + cpumask_size();
+
mm_cachep = kmem_cache_create_usercopy("mm_struct",
- sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
+ mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
offsetof(struct mm_struct, saved_auxv),
sizeof_field(struct mm_struct, saved_auxv),