aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/exit.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/exit.c')
-rw-r--r--kernel/exit.c142
1 files changed, 82 insertions, 60 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index f702a6a63686..35e0a31a0315 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -49,7 +49,8 @@
#include <linux/audit.h> /* for audit_free() */
#include <linux/resource.h>
#include <linux/task_io_accounting_ops.h>
-#include <linux/tracehook.h>
+#include <linux/blkdev.h>
+#include <linux/task_work.h>
#include <linux/fs_struct.h>
#include <linux/init_task.h>
#include <linux/perf_event.h>
@@ -59,11 +60,13 @@
#include <linux/writeback.h>
#include <linux/shm.h>
#include <linux/kcov.h>
+#include <linux/kmsan.h>
#include <linux/random.h>
#include <linux/rcuwait.h>
#include <linux/compat.h>
#include <linux/io_uring.h>
#include <linux/kprobes.h>
+#include <linux/rethook.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -116,7 +119,7 @@ static void __exit_signal(struct task_struct *tsk)
* then notify it:
*/
if (sig->notify_count > 0 && !--sig->notify_count)
- wake_up_process(sig->group_exit_task);
+ wake_up_process(sig->group_exec_task);
if (tsk == sig->curr_target)
sig->curr_target = next_thread(tsk);
@@ -169,6 +172,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
kprobe_flush_task(tsk);
+ rethook_flush_task(tsk);
perf_event_delayed_put(tsk);
trace_sched_process_free(tsk);
put_task_struct(tsk);
@@ -180,6 +184,10 @@ void put_task_struct_rcu_user(struct task_struct *task)
call_rcu(&task->rcu, delayed_put_task_struct);
}
+void __weak release_thread(struct task_struct *dead_task)
+{
+}
+
void release_task(struct task_struct *p)
{
struct task_struct *leader;
@@ -371,10 +379,10 @@ static void coredump_task_exit(struct task_struct *tsk)
complete(&core_state->startup);
for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE|TASK_FREEZABLE);
if (!self.task) /* see coredump_finish() */
break;
- freezable_schedule();
+ schedule();
}
__set_current_state(TASK_RUNNING);
}
@@ -463,6 +471,7 @@ assign_new_owner:
goto retry;
}
WRITE_ONCE(mm->owner, c);
+ lru_gen_migrate_mm(mm);
task_unlock(c);
put_task_struct(c);
}
@@ -697,7 +706,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
/* mt-exec, de_thread() is waiting for group leader */
if (unlikely(tsk->signal->notify_count < 0))
- wake_up_process(tsk->signal->group_exit_task);
+ wake_up_process(tsk->signal->group_exec_task);
write_unlock_irq(&tasklist_lock);
list_for_each_entry_safe(p, n, &dead, ptrace_entry) {
@@ -730,60 +739,39 @@ static void check_stack_usage(void)
static inline void check_stack_usage(void) {}
#endif
+static void synchronize_group_exit(struct task_struct *tsk, long code)
+{
+ struct sighand_struct *sighand = tsk->sighand;
+ struct signal_struct *signal = tsk->signal;
+
+ spin_lock_irq(&sighand->siglock);
+ signal->quick_threads--;
+ if ((signal->quick_threads == 0) &&
+ !(signal->flags & SIGNAL_GROUP_EXIT)) {
+ signal->flags = SIGNAL_GROUP_EXIT;
+ signal->group_exit_code = code;
+ signal->group_stop_count = 0;
+ }
+ spin_unlock_irq(&sighand->siglock);
+}
+
void __noreturn do_exit(long code)
{
struct task_struct *tsk = current;
int group_dead;
- /*
- * We can get here from a kernel oops, sometimes with preemption off.
- * Start by checking for critical errors.
- * Then fix up important state like USER_DS and preemption.
- * Then do everything else.
- */
-
- WARN_ON(blk_needs_flush_plug(tsk));
-
- if (unlikely(in_interrupt()))
- panic("Aiee, killing interrupt handler!");
- if (unlikely(!tsk->pid))
- panic("Attempted to kill the idle task!");
-
- /*
- * If do_exit is called because this processes oopsed, it's possible
- * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
- * continuing. Amongst other possible reasons, this is to prevent
- * mm_release()->clear_child_tid() from writing to a user-controlled
- * kernel address.
- */
- force_uaccess_begin();
+ synchronize_group_exit(tsk, code);
- if (unlikely(in_atomic())) {
- pr_info("note: %s[%d] exited with preempt_count %d\n",
- current->comm, task_pid_nr(current),
- preempt_count());
- preempt_count_set(PREEMPT_ENABLED);
- }
+ WARN_ON(tsk->plug);
- profile_task_exit(tsk);
kcov_task_exit(tsk);
+ kmsan_task_exit(tsk);
coredump_task_exit(tsk);
ptrace_event(PTRACE_EVENT_EXIT, code);
validate_creds_for_do_exit(tsk);
- /*
- * We're taking recursive faults here in do_exit. Safest is to just
- * leave this task alone and wait for reboot.
- */
- if (unlikely(tsk->flags & PF_EXITING)) {
- pr_alert("Fixing recursive fault but reboot is needed!\n");
- futex_exit_recursive(tsk);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule();
- }
-
io_uring_files_cancel();
exit_signals(tsk); /* sets PF_EXITING */
@@ -803,7 +791,7 @@ void __noreturn do_exit(long code)
#ifdef CONFIG_POSIX_TIMERS
hrtimer_cancel(&tsk->signal->real_timer);
- exit_itimers(tsk->signal);
+ exit_itimers(tsk);
#endif
if (tsk->mm)
setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
@@ -871,6 +859,7 @@ void __noreturn do_exit(long code)
put_page(tsk->task_frag.page);
validate_creds_for_do_exit(tsk);
+ exit_task_stack_account(tsk);
check_stack_usage();
preempt_disable();
@@ -882,16 +871,46 @@ void __noreturn do_exit(long code)
lockdep_free_task(tsk);
do_task_dead();
}
-EXPORT_SYMBOL_GPL(do_exit);
-void complete_and_exit(struct completion *comp, long code)
+void __noreturn make_task_dead(int signr)
{
- if (comp)
- complete(comp);
+ /*
+ * Take the task off the cpu after something catastrophic has
+ * happened.
+ *
+ * We can get here from a kernel oops, sometimes with preemption off.
+ * Start by checking for critical errors.
+ * Then fix up important state like USER_DS and preemption.
+ * Then do everything else.
+ */
+ struct task_struct *tsk = current;
+
+ if (unlikely(in_interrupt()))
+ panic("Aiee, killing interrupt handler!");
+ if (unlikely(!tsk->pid))
+ panic("Attempted to kill the idle task!");
- do_exit(code);
+ if (unlikely(in_atomic())) {
+ pr_info("note: %s[%d] exited with preempt_count %d\n",
+ current->comm, task_pid_nr(current),
+ preempt_count());
+ preempt_count_set(PREEMPT_ENABLED);
+ }
+
+ /*
+ * We're taking recursive faults here in make_task_dead. Safest is to just
+ * leave this task alone and wait for reboot.
+ */
+ if (unlikely(tsk->flags & PF_EXITING)) {
+ pr_alert("Fixing recursive fault but reboot is needed!\n");
+ futex_exit_recursive(tsk);
+ tsk->exit_state = EXIT_DEAD;
+ refcount_inc(&tsk->rcu_users);
+ do_task_dead();
+ }
+
+ do_exit(signr);
}
-EXPORT_SYMBOL(complete_and_exit);
SYSCALL_DEFINE1(exit, int, error_code)
{
@@ -902,22 +921,24 @@ SYSCALL_DEFINE1(exit, int, error_code)
* Take down every thread in the group. This is called by fatal signals
* as well as by sys_exit_group (below).
*/
-void
+void __noreturn
do_group_exit(int exit_code)
{
struct signal_struct *sig = current->signal;
- BUG_ON(exit_code & 0x80); /* core dumps don't get here */
-
- if (signal_group_exit(sig))
+ if (sig->flags & SIGNAL_GROUP_EXIT)
exit_code = sig->group_exit_code;
- else if (!thread_group_empty(current)) {
+ else if (sig->group_exec_task)
+ exit_code = 0;
+ else {
struct sighand_struct *const sighand = current->sighand;
spin_lock_irq(&sighand->siglock);
- if (signal_group_exit(sig))
+ if (sig->flags & SIGNAL_GROUP_EXIT)
/* Another thread got here before we took the lock. */
exit_code = sig->group_exit_code;
+ else if (sig->group_exec_task)
+ exit_code = 0;
else {
sig->group_exit_code = exit_code;
sig->flags = SIGNAL_GROUP_EXIT;
@@ -1012,7 +1033,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
return 0;
if (unlikely(wo->wo_flags & WNOWAIT)) {
- status = p->exit_code;
+ status = (p->signal->flags & SIGNAL_GROUP_EXIT)
+ ? p->signal->group_exit_code : p->exit_code;
get_task_struct(p);
read_unlock(&tasklist_lock);
sched_annotate_sleep();
@@ -1054,7 +1076,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
* p->signal fields because the whole thread group is dead
* and nobody can change them.
*
- * psig->stats_lock also protects us from our sub-theads
+ * psig->stats_lock also protects us from our sub-threads
* which can reap other children at the same time. Until
* we change k_getrusage()-like users to rely on this lock
* we have to take ->siglock as well.