From 3b9b8ab65d8eed784b9164d03807cb2bda7b5cd6 Mon Sep 17 00:00:00 2001 From: Kirill Korotaev Date: Fri, 29 Sep 2006 02:00:05 -0700 Subject: [PATCH] Fix unserialized task->files changing Fixed race on put_files_struct on exec with proc. Restoring files on current on error path may lead to proc having a pointer to already kfree-d files_struct. ->files changing at exit.c and khtread.c are safe as exit_files() makes all things under lock. Found during OpenVZ stress testing. [akpm@osdl.org: add export] Signed-off-by: Pavel Emelianov Signed-off-by: Kirill Korotaev Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index d891883420f7..4b6fb054b25d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -487,6 +487,18 @@ void fastcall put_files_struct(struct files_struct *files) EXPORT_SYMBOL(put_files_struct); +void reset_files_struct(struct task_struct *tsk, struct files_struct *files) +{ + struct files_struct *old; + + old = tsk->files; + task_lock(tsk); + tsk->files = files; + task_unlock(tsk); + put_files_struct(old); +} +EXPORT_SYMBOL(reset_files_struct); + static inline void __exit_files(struct task_struct *tsk) { struct files_struct * files = tsk->files; -- cgit v1.3-8-gc7d7 From f400e198b2ed26ce55b22a1412ded0896e7516ac Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Fri, 29 Sep 2006 02:00:07 -0700 Subject: [PATCH] pidspace: is_init() This is an updated version of Eric Biederman's is_init() patch. (http://lkml.org/lkml/2006/2/6/280). It applies cleanly to 2.6.18-rc3 and replaces a few more instances of ->pid == 1 with is_init(). Further, is_init() checks pid and thus removes dependency on Eric's other patches for now. Eric's original description: There are a lot of places in the kernel where we test for init because we give it special properties. Most significantly init must not die. This results in code all over the kernel test ->pid == 1. Introduce is_init to capture this case. With multiple pid spaces for all of the cases affected we are looking for only the first process on the system, not some other process that has pid == 1. Signed-off-by: Eric W. Biederman Signed-off-by: Sukadev Bhattiprolu Cc: Dave Hansen Cc: Serge Hallyn Cc: Cedric Le Goater Cc: Acked-by: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/mm/fault.c | 2 +- arch/arm/mm/fault.c | 2 +- arch/arm26/mm/fault.c | 2 +- arch/i386/lib/usercopy.c | 2 +- arch/i386/mm/fault.c | 2 +- arch/ia64/mm/fault.c | 2 +- arch/m32r/mm/fault.c | 2 +- arch/m68k/mm/fault.c | 2 +- arch/mips/mm/fault.c | 2 +- arch/powerpc/mm/fault.c | 2 +- arch/powerpc/platforms/pseries/ras.c | 2 +- arch/ppc/kernel/traps.c | 2 +- arch/ppc/mm/fault.c | 2 +- arch/s390/mm/fault.c | 2 +- arch/sh/mm/fault.c | 2 +- arch/sh64/mm/fault.c | 6 +++--- arch/um/kernel/trap.c | 2 +- arch/x86_64/mm/fault.c | 4 ++-- arch/xtensa/mm/fault.c | 2 +- drivers/char/sysrq.c | 2 +- include/linux/sched.h | 10 ++++++++++ kernel/capability.c | 2 +- kernel/cpuset.c | 2 +- kernel/exit.c | 2 +- kernel/kexec.c | 2 +- kernel/ptrace.c | 1 + kernel/sysctl.c | 2 +- mm/oom_kill.c | 2 +- security/commoncap.c | 2 +- 29 files changed, 41 insertions(+), 30 deletions(-) (limited to 'kernel/exit.c') diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 622dabd84680..8871529a34e2 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -193,7 +193,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, /* We ran out of memory, or some other thing happened to us that made us unable to handle the page fault gracefully. */ out_of_memory: - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index a5b33ff3924e..5e658a874498 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -198,7 +198,7 @@ survive: return fault; } - if (tsk->pid != 1) + if (!is_init(tsk)) goto out; /* diff --git a/arch/arm26/mm/fault.c b/arch/arm26/mm/fault.c index a7c4cc922095..a1f6d8a9cc32 100644 --- a/arch/arm26/mm/fault.c +++ b/arch/arm26/mm/fault.c @@ -185,7 +185,7 @@ survive: } fault = -3; /* out of memory */ - if (tsk->pid != 1) + if (!is_init(tsk)) goto out; /* diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c index efc7e7d5f4d0..08502fc6d0cb 100644 --- a/arch/i386/lib/usercopy.c +++ b/arch/i386/lib/usercopy.c @@ -739,7 +739,7 @@ survive: retval = get_user_pages(current, current->mm, (unsigned long )to, 1, 1, 0, &pg, NULL); - if (retval == -ENOMEM && current->pid == 1) { + if (retval == -ENOMEM && is_init(current)) { up_read(¤t->mm->mmap_sem); blk_congestion_wait(WRITE, HZ/50); goto survive; diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 50d8617391dd..2581575786c1 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -589,7 +589,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (tsk->pid == 1) { + if (is_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index d8b1b4ac7f26..59f3ab937615 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -280,7 +280,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c index dc18a33eefef..8d5f551b5754 100644 --- a/arch/m32r/mm/fault.c +++ b/arch/m32r/mm/fault.c @@ -299,7 +299,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (tsk->pid == 1) { + if (is_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 5e2d87c10c87..911f2ce3f53e 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -181,7 +181,7 @@ good_area: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index a4f8c45c4e8e..8423d8590779 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -171,7 +171,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (tsk->pid == 1) { + if (is_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 77953f41d754..e8fa50624b70 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -386,7 +386,7 @@ bad_area_nosemaphore: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 903115d67fdc..311ed1993fc0 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -337,7 +337,7 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err) err->disposition == RTAS_DISP_NOT_RECOVERED && err->target == RTAS_TARGET_MEMORY && err->type == RTAS_TYPE_ECC_UNCORR && - !(current->pid == 0 || current->pid == 1)) { + !(current->pid == 0 || is_init(current))) { /* Kill off a user process with an ECC error */ printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n", current->pid); diff --git a/arch/ppc/kernel/traps.c b/arch/ppc/kernel/traps.c index d7a433049b48..aafc8e8893d1 100644 --- a/arch/ppc/kernel/traps.c +++ b/arch/ppc/kernel/traps.c @@ -119,7 +119,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) * generate the same exception over and over again and we get * nowhere. Better to kill it and let the kernel panic. */ - if (current->pid == 1) { + if (is_init(current)) { __sighandler_t handler; spin_lock_irq(¤t->sighand->siglock); diff --git a/arch/ppc/mm/fault.c b/arch/ppc/mm/fault.c index bc776beb3136..465f451f3bc3 100644 --- a/arch/ppc/mm/fault.c +++ b/arch/ppc/mm/fault.c @@ -291,7 +291,7 @@ bad_area: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index f2b9a84dc2bf..9c3c19fe62fc 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -353,7 +353,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (tsk->pid == 1) { + if (is_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 507f28914706..68663b8f99ae 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -149,7 +149,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/sh64/mm/fault.c b/arch/sh64/mm/fault.c index f08d0eaf6497..8e2f6c28b739 100644 --- a/arch/sh64/mm/fault.c +++ b/arch/sh64/mm/fault.c @@ -277,7 +277,7 @@ bad_area: show_regs(regs); #endif } - if (tsk->pid == 1) { + if (is_init(tsk)) { panic("INIT had user mode bad_area\n"); } tsk->thread.address = address; @@ -319,14 +319,14 @@ no_context: * us unable to handle the page fault gracefully. */ out_of_memory: - if (current->pid == 1) { + if (is_init(current)) { panic("INIT out of memory\n"); yield(); goto survive; } printk("fault:Out of memory\n"); up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 61a23fff4395..c7b195c7e51f 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -120,7 +120,7 @@ out_nosemaphore: * us unable to handle the page fault gracefully. */ out_of_memory: - if (current->pid == 1) { + if (is_init(current)) { up_read(&mm->mmap_sem); yield(); down_read(&mm->mmap_sem); diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 9ba54cc2b5f6..3751b4788e28 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -244,7 +244,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) int unhandled_signal(struct task_struct *tsk, int sig) { - if (tsk->pid == 1) + if (is_init(tsk)) return 1; if (tsk->ptrace & PT_PTRACED) return 0; @@ -580,7 +580,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); goto again; } diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index a945a33e85a1..dd0dbec2e57e 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -144,7 +144,7 @@ bad_area: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index ee3ca8f1768e..0ad6cb081db4 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -208,7 +208,7 @@ static void send_sig_all(int sig) struct task_struct *p; for_each_process(p) { - if (p->mm && p->pid != 1) + if (p->mm && !is_init(p)) /* Not swapper, init nor kernel thread */ force_sig(sig, p); } diff --git a/include/linux/sched.h b/include/linux/sched.h index 3696f2f7126d..ed2af8671589 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1033,6 +1033,16 @@ static inline int pid_alive(struct task_struct *p) return p->pids[PIDTYPE_PID].pid != NULL; } +/** + * is_init - check if a task structure is the first user space + * task the kernel created. + * @p: Task structure to be checked. + */ +static inline int is_init(struct task_struct *tsk) +{ + return tsk->pid == 1; +} + extern void free_task(struct task_struct *tsk); #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) diff --git a/kernel/capability.c b/kernel/capability.c index c7685ad00a97..edb845a6e84a 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -133,7 +133,7 @@ static inline int cap_set_all(kernel_cap_t *effective, int found = 0; do_each_thread(g, target) { - if (target == current || target->pid == 1) + if (target == current || is_init(target)) continue; found = 1; if (security_capset_check(target, effective, inheritable, diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1b32c2c04c15..584bb4e6c042 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -240,7 +240,7 @@ static struct super_block *cpuset_sb; * A cpuset can only be deleted if both its 'count' of using tasks * is zero, and its list of 'children' cpusets is empty. Since all * tasks in the system use _some_ cpuset, and since there is always at - * least one task in the system (init, pid == 1), therefore, top_cpuset + * least one task in the system (init), therefore, top_cpuset * always has either children cpusets and/or using tasks. So we don't * need a special hack to ensure that top_cpuset cannot be deleted. * diff --git a/kernel/exit.c b/kernel/exit.c index 4b6fb054b25d..9961192d6055 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -219,7 +219,7 @@ static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task) do_each_task_pid(pgrp, PIDTYPE_PGID, p) { if (p == ignored_task || p->exit_state - || p->real_parent->pid == 1) + || is_init(p->real_parent)) continue; if (process_group(p->real_parent) != pgrp && p->real_parent->signal->session == p->signal->session) { diff --git a/kernel/kexec.c b/kernel/kexec.c index 50087ecf337e..37cad75cf494 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -40,7 +40,7 @@ struct resource crashk_res = { int kexec_should_crash(struct task_struct *p) { - if (in_interrupt() || !p->pid || p->pid == 1 || panic_on_oops) + if (in_interrupt() || !p->pid || is_init(p) || panic_on_oops) return 1; return 0; } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 8aad0331d82e..4d50e06fd745 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -440,6 +440,7 @@ struct task_struct *ptrace_get_task_struct(pid_t pid) child = find_task_by_pid(pid); if (child) get_task_struct(child); + read_unlock(&tasklist_lock); if (!child) return ERR_PTR(-ESRCH); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8bfa7d117c54..9535a3839930 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1915,7 +1915,7 @@ int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, return -EPERM; } - op = (current->pid == 1) ? OP_SET : OP_AND; + op = is_init(current) ? OP_SET : OP_AND; return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, do_proc_dointvec_bset_conv,&op); } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index bada3d03119f..f3dd79c1c367 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -255,7 +255,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) */ static void __oom_kill_task(struct task_struct *p, const char *message) { - if (p->pid == 1) { + if (is_init(p)) { WARN_ON(1); printk(KERN_WARNING "tried to kill init!\n"); return; diff --git a/security/commoncap.c b/security/commoncap.c index f50fc298cf80..5a5ef5ca7ea9 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -169,7 +169,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) /* For init, we want to retain the capabilities set * in the init_task struct. Thus we skip the usual * capability rules */ - if (current->pid != 1) { + if (!is_init(current)) { current->cap_permitted = new_permitted; current->cap_effective = cap_intersect (new_permitted, bprm->cap_effective); -- cgit v1.3-8-gc7d7 From b9ecb2bd5d3ab8904752685696cb76aac1f3fef2 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 29 Sep 2006 02:00:31 -0700 Subject: [PATCH] has_stopped_jobs() cleanup This check has been obsolete since the introduction of TASK_TRACED. Now TASK_STOPPED always means job control stop. Signed-off-by: Roland McGrath Cc: Oleg Nesterov Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 9961192d6055..3ec7b10eae38 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -249,17 +249,6 @@ static int has_stopped_jobs(int pgrp) do_each_task_pid(pgrp, PIDTYPE_PGID, p) { if (p->state != TASK_STOPPED) continue; - - /* If p is stopped by a debugger on a signal that won't - stop it, then don't count p as stopped. This isn't - perfect but it's a good approximation. */ - if (unlikely (p->ptrace) - && p->exit_code != SIGSTOP - && p->exit_code != SIGTSTP - && p->exit_code != SIGTTOU - && p->exit_code != SIGTTIN) - continue; - retval = 1; break; } while_each_task_pid(pgrp, PIDTYPE_PGID, p); -- cgit v1.3-8-gc7d7 From 54306cf04c0ea0a8c432603dbc657ab62a438668 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 29 Sep 2006 02:00:42 -0700 Subject: [PATCH] exit: fix crash case If we are going to BUG() not panic() here then we should cover the case of the BUG being compiled out Signed-off-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 3ec7b10eae38..a51b347ae67b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -963,7 +963,8 @@ fastcall NORET_TYPE void do_exit(long code) schedule(); BUG(); /* Avoid "noreturn function does return". */ - for (;;) ; + for (;;) + cpu_relax(); /* For when BUG is null */ } EXPORT_SYMBOL_GPL(do_exit); -- cgit v1.3-8-gc7d7 From 1c573afebc6213e4372e0d8352034c23d5262e1f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 29 Sep 2006 02:00:51 -0700 Subject: [PATCH] reparent_to_init(): use has_rt_policy() Remove open-coded has_rt_policy(), no changes in kernel/exit.o Signed-off-by: Oleg Nesterov Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Steven Rostedt Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index a51b347ae67b..4a280856acd2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -281,9 +281,7 @@ static void reparent_to_init(void) /* Set the exit signal to SIGCHLD so we signal init on exit */ current->exit_signal = SIGCHLD; - if ((current->policy == SCHED_NORMAL || - current->policy == SCHED_BATCH) - && (task_nice(current) < 0)) + if (!has_rt_policy(current) && (task_nice(current) < 0)) set_user_nice(current, 0); /* cpus_allowed? */ /* rt_priority? */ -- cgit v1.3-8-gc7d7 From 29b884921634e1e01cbd276e1c9b8fc07a7e4a90 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 29 Sep 2006 02:01:09 -0700 Subject: [PATCH] set EXIT_DEAD state in do_exit(), not in schedule() schedule() checks PF_DEAD on every context switch and sets ->state = EXIT_DEAD to ensure that the exiting task will be deactivated. Note that this EXIT_DEAD is in fact a "random" value, we can use any bit except normal TASK_XXX values. It is better to set this state in do_exit() along with PF_DEAD flag and remove that check in schedule(). We are safe wrt concurrent try_to_wake_up() (for example ptrace, tkill), it can not change task's ->state: the 'state' argument of try_to_wake_up() can't have EXIT_DEAD bit. And in case when try_to_wake_up() sees a stale value of ->state == TASK_RUNNING it will do nothing. Signed-off-by: Oleg Nesterov Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 1 + kernel/sched.c | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 4a280856acd2..3d759c98fb11 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -957,6 +957,7 @@ fastcall NORET_TYPE void do_exit(long code) preempt_disable(); BUG_ON(tsk->flags & PF_DEAD); tsk->flags |= PF_DEAD; + tsk->state = EXIT_DEAD; schedule(); BUG(); diff --git a/kernel/sched.c b/kernel/sched.c index 155a33da7aa7..e1646b044b69 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3348,9 +3348,6 @@ need_resched_nonpreemptible: spin_lock_irq(&rq->lock); - if (unlikely(prev->flags & PF_DEAD)) - prev->state = EXIT_DEAD; - switch_count = &prev->nivcsw; if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { switch_count = &prev->nvcsw; -- cgit v1.3-8-gc7d7 From 55a101f8f71a3d3dbda7b5c77083ffe47552f831 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 29 Sep 2006 02:01:10 -0700 Subject: [PATCH] kill PF_DEAD flag After the previous change (->flags & PF_DEAD) <=> (->state == EXIT_DEAD), we don't need PF_DEAD any longer. Signed-off-by: Oleg Nesterov Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - kernel/exit.c | 6 ++---- kernel/sched.c | 16 ++++++++-------- mm/oom_kill.c | 4 ++-- 4 files changed, 12 insertions(+), 15 deletions(-) (limited to 'kernel/exit.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index fbc69cc3923d..9763de334f09 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1061,7 +1061,6 @@ static inline void put_task_struct(struct task_struct *t) /* Not implemented yet, only for 486*/ #define PF_STARTING 0x00000002 /* being created */ #define PF_EXITING 0x00000004 /* getting shut down */ -#define PF_DEAD 0x00000008 /* Dead */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* dumped core */ diff --git a/kernel/exit.c b/kernel/exit.c index 3d759c98fb11..9dd5f1336da2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -953,10 +953,8 @@ fastcall NORET_TYPE void do_exit(long code) if (tsk->splice_pipe) __free_pipe_info(tsk->splice_pipe); - /* PF_DEAD causes final put_task_struct after we schedule. */ preempt_disable(); - BUG_ON(tsk->flags & PF_DEAD); - tsk->flags |= PF_DEAD; + /* causes final put_task_struct in finish_task_switch(). */ tsk->state = EXIT_DEAD; schedule(); @@ -972,7 +970,7 @@ NORET_TYPE void complete_and_exit(struct completion *comp, long code) { if (comp) complete(comp); - + do_exit(code); } diff --git a/kernel/sched.c b/kernel/sched.c index e1646b044b69..a9405d7cc6ab 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1755,27 +1755,27 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev) __releases(rq->lock) { struct mm_struct *mm = rq->prev_mm; - unsigned long prev_task_flags; + long prev_state; rq->prev_mm = NULL; /* * A task struct has one reference for the use as "current". - * If a task dies, then it sets EXIT_ZOMBIE in tsk->exit_state and - * calls schedule one last time. The schedule call will never return, - * and the scheduled task must drop that reference. - * The test for EXIT_ZOMBIE must occur while the runqueue locks are + * If a task dies, then it sets EXIT_DEAD in tsk->state and calls + * schedule one last time. The schedule call will never return, and + * the scheduled task must drop that reference. + * The test for EXIT_DEAD must occur while the runqueue locks are * still held, otherwise prev could be scheduled on another cpu, die * there before we look at prev->state, and then the reference would * be dropped twice. * Manfred Spraul */ - prev_task_flags = prev->flags; + prev_state = prev->state; finish_arch_switch(prev); finish_lock_switch(rq, prev); if (mm) mmdrop(mm); - if (unlikely(prev_task_flags & PF_DEAD)) { + if (unlikely(prev_state == EXIT_DEAD)) { /* * Remove function-return probe instances associated with this * task and put them back on the free list. @@ -5153,7 +5153,7 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD); /* Cannot have done final schedule yet: would have vanished. */ - BUG_ON(p->flags & PF_DEAD); + BUG_ON(p->state == EXIT_DEAD); get_task_struct(p); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index f3dd79c1c367..202f186a753a 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -226,8 +226,8 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) releasing = test_tsk_thread_flag(p, TIF_MEMDIE) || p->flags & PF_EXITING; if (releasing) { - /* PF_DEAD tasks have already released their mm */ - if (p->flags & PF_DEAD) + /* TASK_DEAD tasks have already released their mm */ + if (p->state == EXIT_DEAD) continue; if (p->flags & PF_EXITING && p == current) { chosen = p; -- cgit v1.3-8-gc7d7 From c394cc9fbb367f87faa2228ec2eabacd2d4701c6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 29 Sep 2006 02:01:11 -0700 Subject: [PATCH] introduce TASK_DEAD state I am not sure about this patch, I am asking Ingo to take a decision. task_struct->state == EXIT_DEAD is a very special case, to avoid a confusion it makes sense to introduce a new state, TASK_DEAD, while EXIT_DEAD should live only in ->exit_state as documented in sched.h. Note that this state is not visible to user-space, get_task_state() masks off unsuitable states. Signed-off-by: Oleg Nesterov Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + kernel/exit.c | 2 +- kernel/sched.c | 8 ++++---- mm/oom_kill.c | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) (limited to 'kernel/exit.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9763de334f09..a06fc89cf6e5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -148,6 +148,7 @@ extern unsigned long weighted_cpuload(const int cpu); #define EXIT_DEAD 32 /* in tsk->state again */ #define TASK_NONINTERACTIVE 64 +#define TASK_DEAD 128 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) diff --git a/kernel/exit.c b/kernel/exit.c index 9dd5f1336da2..2e4c13cba95a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -955,7 +955,7 @@ fastcall NORET_TYPE void do_exit(long code) preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ - tsk->state = EXIT_DEAD; + tsk->state = TASK_DEAD; schedule(); BUG(); diff --git a/kernel/sched.c b/kernel/sched.c index a9405d7cc6ab..74f169ac0773 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1761,10 +1761,10 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev) /* * A task struct has one reference for the use as "current". - * If a task dies, then it sets EXIT_DEAD in tsk->state and calls + * If a task dies, then it sets TASK_DEAD in tsk->state and calls * schedule one last time. The schedule call will never return, and * the scheduled task must drop that reference. - * The test for EXIT_DEAD must occur while the runqueue locks are + * The test for TASK_DEAD must occur while the runqueue locks are * still held, otherwise prev could be scheduled on another cpu, die * there before we look at prev->state, and then the reference would * be dropped twice. @@ -1775,7 +1775,7 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev) finish_lock_switch(rq, prev); if (mm) mmdrop(mm); - if (unlikely(prev_state == EXIT_DEAD)) { + if (unlikely(prev_state == TASK_DEAD)) { /* * Remove function-return probe instances associated with this * task and put them back on the free list. @@ -5153,7 +5153,7 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD); /* Cannot have done final schedule yet: would have vanished. */ - BUG_ON(p->state == EXIT_DEAD); + BUG_ON(p->state == TASK_DEAD); get_task_struct(p); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 202f186a753a..21f0a7e8e514 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -227,7 +227,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) p->flags & PF_EXITING; if (releasing) { /* TASK_DEAD tasks have already released their mm */ - if (p->state == EXIT_DEAD) + if (p->state == TASK_DEAD) continue; if (p->flags & PF_EXITING && p == current) { chosen = p; -- cgit v1.3-8-gc7d7 From 0d67a46df0125e20d14f12dbd3646f1f1bf23e8c Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Aug 2006 19:05:56 +0100 Subject: [PATCH] BLOCK: Remove duplicate declaration of exit_io_context() [try #6] Remove the duplicate declaration of exit_io_context() from linux/sched.h. Signed-Off-By: David Howells Signed-off-by: Jens Axboe --- include/linux/sched.h | 1 - kernel/exit.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/exit.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index a06fc89cf6e5..fc4a9873ec10 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -710,7 +710,6 @@ extern unsigned int max_cache_size; struct io_context; /* See blkdev.h */ -void exit_io_context(void); struct cpuset; #define NGROUPS_SMALL 32 diff --git a/kernel/exit.c b/kernel/exit.c index 2e4c13cba95a..c189de2927ab 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -38,6 +38,7 @@ #include #include /* for audit_free() */ #include +#include #include #include -- cgit v1.3-8-gc7d7 From 8f0ab5147951267134612570604cf8341901a80c Mon Sep 17 00:00:00 2001 From: Jay Lan Date: Sat, 30 Sep 2006 23:28:59 -0700 Subject: [PATCH] csa: convert CONFIG tag for extended accounting routines There were a few accounting data/macros that are used in CSA but are #ifdef'ed inside CONFIG_BSD_PROCESS_ACCT. This patch is to change those ifdef's from CONFIG_BSD_PROCESS_ACCT to CONFIG_TASK_XACCT. A few defines are moved from kernel/acct.c and include/linux/acct.h to kernel/tsacct.c and include/linux/tsacct_kern.h. Signed-off-by: Jay Lan Cc: Shailabh Nagar Cc: Balbir Singh Cc: Jes Sorensen Cc: Chris Sturtivant Cc: Tony Ernst Cc: Guillaume Thouvenin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/compat.c | 2 +- fs/exec.c | 2 +- include/linux/acct.h | 4 ---- include/linux/sched.h | 2 +- include/linux/tsacct_kern.h | 6 ++++++ kernel/acct.c | 30 ------------------------------ kernel/exit.c | 1 + kernel/fork.c | 1 + kernel/sched.c | 2 +- kernel/tsacct.c | 30 ++++++++++++++++++++++++++++++ 10 files changed, 42 insertions(+), 38 deletions(-) (limited to 'kernel/exit.c') diff --git a/fs/compat.c b/fs/compat.c index 6b90bf35f61d..13fb08d096c4 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include /* siocdevprivate_ioctl */ diff --git a/fs/exec.c b/fs/exec.c index a8efe35176b0..0db3fc3c5f0f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include diff --git a/include/linux/acct.h b/include/linux/acct.h index e86bae7324d2..0496d1f09952 100644 --- a/include/linux/acct.h +++ b/include/linux/acct.h @@ -124,16 +124,12 @@ extern void acct_auto_close(struct super_block *sb); extern void acct_init_pacct(struct pacct_struct *pacct); extern void acct_collect(long exitcode, int group_dead); extern void acct_process(void); -extern void acct_update_integrals(struct task_struct *tsk); -extern void acct_clear_integrals(struct task_struct *tsk); #else #define acct_auto_close_mnt(x) do { } while (0) #define acct_auto_close(x) do { } while (0) #define acct_init_pacct(x) do { } while (0) #define acct_collect(x,y) do { } while (0) #define acct_process() do { } while (0) -#define acct_update_integrals(x) do { } while (0) -#define acct_clear_integrals(task) do { } while (0) #endif /* diff --git a/include/linux/sched.h b/include/linux/sched.h index fc4a9873ec10..4ddeb0f982fb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -981,7 +981,7 @@ struct task_struct { wait_queue_t *io_wait; /* i/o counters(bytes read/written, #syscalls */ u64 rchar, wchar, syscr, syscw; -#if defined(CONFIG_BSD_PROCESS_ACCT) +#if defined(CONFIG_TASK_XACCT) u64 acct_rss_mem1; /* accumulated rss usage */ u64 acct_vm_mem1; /* accumulated virtual memory usage */ clock_t acct_stimexpd; /* clock_t-converted stime since last update */ diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h index 74102dcae67a..7e50ac795b0b 100644 --- a/include/linux/tsacct_kern.h +++ b/include/linux/tsacct_kern.h @@ -18,9 +18,15 @@ static inline void bacct_add_tsk(struct taskstats *stats, struct task_struct *ts #ifdef CONFIG_TASK_XACCT extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p); +extern void acct_update_integrals(struct task_struct *tsk); +extern void acct_clear_integrals(struct task_struct *tsk); #else static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) {} +static inline void acct_update_integrals(struct task_struct *tsk) +{} +static inline void acct_clear_integrals(struct task_struct *tsk) +{} #endif /* CONFIG_TASK_XACCT */ #endif diff --git a/kernel/acct.c b/kernel/acct.c index f4330acead46..0aad5ca36a81 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -602,33 +602,3 @@ void acct_process(void) do_acct_process(file); fput(file); } - - -/** - * acct_update_integrals - update mm integral fields in task_struct - * @tsk: task_struct for accounting - */ -void acct_update_integrals(struct task_struct *tsk) -{ - if (likely(tsk->mm)) { - long delta = - cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd; - - if (delta == 0) - return; - tsk->acct_stimexpd = tsk->stime; - tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); - tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; - } -} - -/** - * acct_clear_integrals - clear the mm integral fields in task_struct - * @tsk: task_struct whose accounting fields are cleared - */ -void acct_clear_integrals(struct task_struct *tsk) -{ - tsk->acct_stimexpd = 0; - tsk->acct_rss_mem1 = 0; - tsk->acct_vm_mem1 = 0; -} diff --git a/kernel/exit.c b/kernel/exit.c index c189de2927ab..3b47f26985f2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index 1c999f3e0b47..89f666491d1f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sched.c b/kernel/sched.c index 74f169ac0773..2bbd948f0169 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 410483490cf6..47c71daa416f 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -88,4 +88,34 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) stats->read_syscalls = p->syscr; stats->write_syscalls = p->syscw; } + + +/** + * acct_update_integrals - update mm integral fields in task_struct + * @tsk: task_struct for accounting + */ +void acct_update_integrals(struct task_struct *tsk) +{ + if (likely(tsk->mm)) { + long delta = + cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd; + + if (delta == 0) + return; + tsk->acct_stimexpd = tsk->stime; + tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); + tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; + } +} + +/** + * acct_clear_integrals - clear the mm integral fields in task_struct + * @tsk: task_struct whose accounting fields are cleared + */ +void acct_clear_integrals(struct task_struct *tsk) +{ + tsk->acct_stimexpd = 0; + tsk->acct_rss_mem1 = 0; + tsk->acct_vm_mem1 = 0; +} #endif -- cgit v1.3-8-gc7d7