From e73f8959af0439d114847eab5a8a5ce48f1217c4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 11 May 2012 10:59:07 +1000 Subject: task_work_add: generic process-context callbacks Provide a simple mechanism that allows running code in the (nonatomic) context of the arbitrary task. The caller does task_work_add(task, task_work) and this task executes task_work->func() either from do_notify_resume() or from do_exit(). The callback can rely on PF_EXITING to detect the latter case. "struct task_work" can be embedded in another struct, still it has "void *data" to handle the most common/simple case. This allows us to kill the ->replacement_session_keyring hack, and potentially this can have more users. Performance-wise, this adds 2 "unlikely(!hlist_empty())" checks into tracehook_notify_resume() and do_exit(). But at the same time we can remove the "replacement_session_keyring != NULL" checks from arch/*/signal.c and exit_creds(). Note: task_work_add/task_work_run abuses ->pi_lock. This is only because this lock is already used by lookup_pi_state() to synchronize with do_exit() setting PF_EXITING. Fortunately the scope of this lock in task_work.c is really tiny, and the code is unlikely anyway. Signed-off-by: Oleg Nesterov Acked-by: David Howells Cc: Thomas Gleixner Cc: Richard Kuo Cc: Linus Torvalds Cc: Alexander Gordeev Cc: Chris Zankel Cc: David Smith Cc: "Frank Ch. Eigler" Cc: Geert Uytterhoeven Cc: Larry Woodman Cc: Peter Zijlstra Cc: Tejun Heo Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- kernel/task_work.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 kernel/task_work.c (limited to 'kernel/task_work.c') diff --git a/kernel/task_work.c b/kernel/task_work.c new file mode 100644 index 000000000000..82d1c794066d --- /dev/null +++ b/kernel/task_work.c @@ -0,0 +1,84 @@ +#include +#include +#include + +int +task_work_add(struct task_struct *task, struct task_work *twork, bool notify) +{ + unsigned long flags; + int err = -ESRCH; + +#ifndef TIF_NOTIFY_RESUME + if (notify) + return -ENOTSUPP; +#endif + /* + * We must not insert the new work if the task has already passed + * exit_task_work(). We rely on do_exit()->raw_spin_unlock_wait() + * and check PF_EXITING under pi_lock. + */ + raw_spin_lock_irqsave(&task->pi_lock, flags); + if (likely(!(task->flags & PF_EXITING))) { + hlist_add_head(&twork->hlist, &task->task_works); + err = 0; + } + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + + /* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */ + if (likely(!err) && notify) + set_notify_resume(task); + return err; +} + +struct task_work * +task_work_cancel(struct task_struct *task, task_work_func_t func) +{ + unsigned long flags; + struct task_work *twork; + struct hlist_node *pos; + + raw_spin_lock_irqsave(&task->pi_lock, flags); + hlist_for_each_entry(twork, pos, &task->task_works, hlist) { + if (twork->func == func) { + hlist_del(&twork->hlist); + goto found; + } + } + twork = NULL; + found: + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + + return twork; +} + +void task_work_run(void) +{ + struct task_struct *task = current; + struct hlist_head task_works; + struct hlist_node *pos; + + raw_spin_lock_irq(&task->pi_lock); + hlist_move_list(&task->task_works, &task_works); + raw_spin_unlock_irq(&task->pi_lock); + + if (unlikely(hlist_empty(&task_works))) + return; + /* + * We use hlist to save the space in task_struct, but we want fifo. + * Find the last entry, the list should be short, then process them + * in reverse order. + */ + for (pos = task_works.first; pos->next; pos = pos->next) + ; + + for (;;) { + struct hlist_node **pprev = pos->pprev; + struct task_work *twork = container_of(pos, struct task_work, + hlist); + twork->func(twork); + + if (pprev == &task_works.first) + break; + pos = container_of(pprev, struct hlist_node, next); + } +} -- cgit v1.2.3-59-g8ed1b