From 534a3fbb3f37c42ec32de9876681c3195be0b658 Mon Sep 17 00:00:00 2001 From: Daeseok Youn Date: Fri, 18 Apr 2014 09:08:14 +0900 Subject: workqueue: simplify wq_update_unbound_numa() by jumping to use_dfl_pwq if the target cpumask equals wq's wq_update_unbound_numa(), when it's decided that the newly updated cpumask equals the default, looks at whether the current pwq is already the default one and skips setting pwq to the default one. This extra step is unnecessary and we can always jump to use_dfl_pwq instead. Simplify the code by removing the conditional. This doesn't make any functional difference. Signed-off-by: Daeseok Youn Signed-off-by: Tejun Heo --- kernel/workqueue.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8edc87185427..c3f076f0f8fd 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4103,17 +4103,13 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, * Let's determine what needs to be done. If the target cpumask is * different from wq's, we need to compare it to @pwq's and create * a new one if they don't match. If the target cpumask equals - * wq's, the default pwq should be used. If @pwq is already the - * default one, nothing to do; otherwise, install the default one. + * wq's, the default pwq should be used. */ if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) { if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask)) goto out_unlock; } else { - if (pwq == wq->dfl_pwq) - goto out_unlock; - else - goto use_dfl_pwq; + goto use_dfl_pwq; } mutex_unlock(&wq->mutex); -- cgit v1.2.3-59-g8ed1b From 2d916033a318d7e763eb099c69600d5dcd1ccb6b Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 12 May 2014 13:59:35 -0400 Subject: kernel/workqueue.c: pr_warning/pr_warn & printk/pr_info tj: Refreshed on top of wq/for-3.16. Cc: Andrew Morton Signed-off-by: Fabian Frederick Signed-off-by: Tejun Heo --- kernel/workqueue.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c3f076f0f8fd..c8411085466f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4117,8 +4117,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, /* create a new pwq */ pwq = alloc_unbound_pwq(wq, target_attrs); if (!pwq) { - pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n", - wq->name); + pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n", + wq->name); mutex_lock(&wq->mutex); goto use_dfl_pwq; } @@ -4568,7 +4568,7 @@ void print_worker_info(const char *log_lvl, struct task_struct *task) probe_kernel_read(desc, worker->desc, sizeof(desc) - 1); if (fn || name[0] || desc[0]) { - printk("%sWorkqueue: %s %pf", log_lvl, name, fn); + pr_info("%sWorkqueue: %s %pf", log_lvl, name, fn); if (desc[0]) pr_cont(" (%s)", desc); pr_cont("\n"); -- cgit v1.2.3-59-g8ed1b From 1a56f2aa4752293e5a9c0c3a2331620aa1fdb808 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 14 May 2014 13:43:37 +0900 Subject: workqueue: Remove deprecated flush[_delayed]_work_sync() flush[_delayed]_work_sync() were deprecated by 4382973 ("workqueue: deprecate flush[_delayed]_work_sync()") and have been deprecated for a long time. In addition, these are not used anymore. So, let's remove these functions. Signed-off-by: Jingoo Han Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 1b22c42e9c2d..aa92d0295e28 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -587,18 +587,6 @@ static inline bool keventd_up(void) return system_wq != NULL; } -/* used to be different but now identical to flush_work(), deprecated */ -static inline bool __deprecated flush_work_sync(struct work_struct *work) -{ - return flush_work(work); -} - -/* used to be different but now identical to flush_delayed_work(), deprecated */ -static inline bool __deprecated flush_delayed_work_sync(struct delayed_work *dwork) -{ - return flush_delayed_work(dwork); -} - #ifndef CONFIG_SMP static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg) { -- cgit v1.2.3-59-g8ed1b From cf416171e7e1d966111f53bdae82f51af05e7bf8 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 14 May 2014 13:58:06 +0900 Subject: workqueue: Remove deprecated system_nrt[_freezable]_wq system_nrt[_freezable]_wq were deprecated by 3b07e9c ("workqueue: deprecate system_nrt[_freezable]_wq") and have been deprecated for a long time. In addition, these are not used anymore. So, let's remove these functions. Signed-off-by: Jingoo Han Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index aa92d0295e28..d93d28b2ec73 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -364,20 +364,6 @@ extern struct workqueue_struct *system_freezable_wq; extern struct workqueue_struct *system_power_efficient_wq; extern struct workqueue_struct *system_freezable_power_efficient_wq; -static inline struct workqueue_struct * __deprecated __system_nrt_wq(void) -{ - return system_wq; -} - -static inline struct workqueue_struct * __deprecated __system_nrt_freezable_wq(void) -{ - return system_freezable_wq; -} - -/* equivlalent to system_wq and system_freezable_wq, deprecated */ -#define system_nrt_wq __system_nrt_wq() -#define system_nrt_freezable_wq __system_nrt_freezable_wq() - extern struct workqueue_struct * __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6); -- cgit v1.2.3-59-g8ed1b From 9625ab1727743f6a164df26b7b1eeeced7380b42 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 20 May 2014 17:46:27 +0800 Subject: workqueue: use manager lock only to protect worker_idr worker_idr is highly bound to managers and is always/only accessed in manager lock context. So we don't need pool->lock for it. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c8411085466f..910d963f6b76 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -124,8 +124,7 @@ enum { * cpu or grabbing pool->lock is enough for read access. If * POOL_DISASSOCIATED is set, it's identical to L. * - * MG: pool->manager_mutex and pool->lock protected. Writes require both - * locks. Reads can happen under either lock. + * M: pool->manager_mutex protected. * * PL: wq_pool_mutex protected. * @@ -164,7 +163,7 @@ struct worker_pool { /* see manage_workers() for details on the two manager mutexes */ struct mutex manager_arb; /* manager arbitration */ struct mutex manager_mutex; /* manager exclusion */ - struct idr worker_idr; /* MG: worker IDs and iteration */ + struct idr worker_idr; /* M: worker IDs and iteration */ struct workqueue_attrs *attrs; /* I: worker attributes */ struct hlist_node hash_node; /* PL: unbound_pool_hash node */ @@ -340,16 +339,6 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to, lockdep_is_held(&wq->mutex), \ "sched RCU or wq->mutex should be held") -#ifdef CONFIG_LOCKDEP -#define assert_manager_or_pool_lock(pool) \ - WARN_ONCE(debug_locks && \ - !lockdep_is_held(&(pool)->manager_mutex) && \ - !lockdep_is_held(&(pool)->lock), \ - "pool->manager_mutex or ->lock should be held") -#else -#define assert_manager_or_pool_lock(pool) do { } while (0) -#endif - #define for_each_cpu_worker_pool(pool, cpu) \ for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \ @@ -378,14 +367,14 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to, * @wi: integer used for iteration * @pool: worker_pool to iterate workers of * - * This must be called with either @pool->manager_mutex or ->lock held. + * This must be called with @pool->manager_mutex. * * The if/else clause exists only for the lockdep assertion and can be * ignored. */ #define for_each_pool_worker(worker, wi, pool) \ idr_for_each_entry(&(pool)->worker_idr, (worker), (wi)) \ - if (({ assert_manager_or_pool_lock((pool)); false; })) { } \ + if (({ lockdep_assert_held(&pool->manager_mutex); false; })) { } \ else /** @@ -1725,13 +1714,7 @@ static struct worker *create_worker(struct worker_pool *pool) * ID is needed to determine kthread name. Allocate ID first * without installing the pointer. */ - idr_preload(GFP_KERNEL); - spin_lock_irq(&pool->lock); - - id = idr_alloc(&pool->worker_idr, NULL, 0, 0, GFP_NOWAIT); - - spin_unlock_irq(&pool->lock); - idr_preload_end(); + id = idr_alloc(&pool->worker_idr, NULL, 0, 0, GFP_KERNEL); if (id < 0) goto fail; @@ -1773,18 +1756,13 @@ static struct worker *create_worker(struct worker_pool *pool) worker->flags |= WORKER_UNBOUND; /* successful, commit the pointer to idr */ - spin_lock_irq(&pool->lock); idr_replace(&pool->worker_idr, worker, worker->id); - spin_unlock_irq(&pool->lock); return worker; fail: - if (id >= 0) { - spin_lock_irq(&pool->lock); + if (id >= 0) idr_remove(&pool->worker_idr, id); - spin_unlock_irq(&pool->lock); - } kfree(worker); return NULL; } -- cgit v1.2.3-59-g8ed1b From 73eb7fe73ae303996187fff38b1c162f1df0e9d1 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 20 May 2014 17:46:28 +0800 Subject: workqueue: destroy_worker() should destroy idle workers only We used to have the CPU online failure path where a worker is created and then destroyed without being started. A worker was created for the CPU coming online and if the online operation failed the created worker was shut down without being started. But this behavior was changed. The first worker is created and started at the same time for the CPU coming online. It means that we had already ensured in the code that destroy_worker() destroys only idle workers and we don't want to allow it to destroy any non-idle worker in the future. Otherwise, it may be buggy and it may be extremely hard to check. We should force destroy_worker() to destroy only idle workers explicitly. Since destroy_worker() destroys only idle workers, this patch does not change any functionality. We just need to update the comments and the sanity check code. In the sanity check code, we will refuse to destroy the worker if !(worker->flags & WORKER_IDLE). If the worker entered idle which means it is already started, so we remove the check of "worker->flags & WORKER_STARTED", after this removal, WORKER_STARTED is totally unneeded, so we remove WORKER_STARTED too. In the comments for create_worker(), "Create a new worker which is bound..." is changed to "... which is attached..." due to we change the name of this behavior to attaching. tj: Minor description / comment updates. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 910d963f6b76..189d79be8091 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -73,7 +73,6 @@ enum { POOL_FREEZING = 1 << 3, /* freeze in progress */ /* worker flags */ - WORKER_STARTED = 1 << 0, /* started */ WORKER_DIE = 1 << 1, /* die die die */ WORKER_IDLE = 1 << 2, /* is idle */ WORKER_PREP = 1 << 3, /* preparing to run works */ @@ -1692,9 +1691,8 @@ static struct worker *alloc_worker(void) * create_worker - create a new workqueue worker * @pool: pool the new worker will belong to * - * Create a new worker which is bound to @pool. The returned worker - * can be started by calling start_worker() or destroyed using - * destroy_worker(). + * Create a new worker which is attached to @pool. The new worker must be + * started by start_worker(). * * CONTEXT: * Might sleep. Does GFP_KERNEL allocations. @@ -1778,7 +1776,6 @@ fail: */ static void start_worker(struct worker *worker) { - worker->flags |= WORKER_STARTED; worker->pool->nr_workers++; worker_enter_idle(worker); wake_up_process(worker->task); @@ -1814,7 +1811,8 @@ static int create_and_start_worker(struct worker_pool *pool) * destroy_worker - destroy a workqueue worker * @worker: worker to be destroyed * - * Destroy @worker and adjust @pool stats accordingly. + * Destroy @worker and adjust @pool stats accordingly. The worker should + * be idle. * * CONTEXT: * spin_lock_irq(pool->lock) which is released and regrabbed. @@ -1828,13 +1826,12 @@ static void destroy_worker(struct worker *worker) /* sanity check frenzy */ if (WARN_ON(worker->current_work) || - WARN_ON(!list_empty(&worker->scheduled))) + WARN_ON(!list_empty(&worker->scheduled)) || + WARN_ON(!(worker->flags & WORKER_IDLE))) return; - if (worker->flags & WORKER_STARTED) - pool->nr_workers--; - if (worker->flags & WORKER_IDLE) - pool->nr_idle--; + pool->nr_workers--; + pool->nr_idle--; /* * Once WORKER_DIE is set, the kworker may destroy itself at any -- cgit v1.2.3-59-g8ed1b From 60f5a4bcf852b5dec698b08cd34efc302ea72f2b Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 20 May 2014 17:46:29 +0800 Subject: workqueue: async worker destruction worker destruction includes these parts of code: adjust pool's stats remove the worker from idle list detach the worker from the pool kthread_stop() to wait for the worker's task exit free the worker struct We can find out that there is no essential work to do after kthread_stop(), which means destroy_worker() doesn't need to wait for the worker's task exit, so we can remove kthread_stop() and free the worker struct in the worker exiting path. However, put_unbound_pool() still needs to sync the all the workers' destruction before destroying the pool; otherwise, the workers may access to the invalid pool when they are exiting. So we also move the code of "detach the worker" to the exiting path and let put_unbound_pool() to sync with this code via detach_completion. The code of "detach the worker" is wrapped in a new function "worker_detach_from_pool()" although worker_detach_from_pool() is only called once (in worker_thread()) after this patch, but we need to wrap it for these reasons: 1) The code of "detach the worker" is not short enough to unfold them in worker_thread(). 2) the name of "worker_detach_from_pool()" is self-comment, and we add some comments above the function. 3) it will be shared by rescuer in later patch which allows rescuer and normal thread use the same attach/detach frameworks. The worker id is freed when detaching which happens before the worker is fully dead, but this id of the dying worker may be re-used for a new worker, so the dying worker's task name is changed to "worker/dying" to avoid two or several workers having the same name. Since "detach the worker" is moved out from destroy_worker(), destroy_worker() doesn't require manager_mutex, so the "lockdep_assert_held(&pool->manager_mutex)" in destroy_worker() is removed, and destroy_worker() is not protected by manager_mutex in put_unbound_pool(). tj: Minor description updates. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 62 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 189d79be8091..c458d73022bb 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -163,6 +163,7 @@ struct worker_pool { struct mutex manager_arb; /* manager arbitration */ struct mutex manager_mutex; /* manager exclusion */ struct idr worker_idr; /* M: worker IDs and iteration */ + struct completion *detach_completion; /* all workers detached */ struct workqueue_attrs *attrs; /* I: worker attributes */ struct hlist_node hash_node; /* PL: unbound_pool_hash node */ @@ -1687,6 +1688,30 @@ static struct worker *alloc_worker(void) return worker; } +/** + * worker_detach_from_pool() - detach a worker from its pool + * @worker: worker which is attached to its pool + * @pool: the pool @worker is attached to + * + * Undo the attaching which had been done in create_worker(). The caller + * worker shouldn't access to the pool after detached except it has other + * reference to the pool. + */ +static void worker_detach_from_pool(struct worker *worker, + struct worker_pool *pool) +{ + struct completion *detach_completion = NULL; + + mutex_lock(&pool->manager_mutex); + idr_remove(&pool->worker_idr, worker->id); + if (idr_is_empty(&pool->worker_idr)) + detach_completion = pool->detach_completion; + mutex_unlock(&pool->manager_mutex); + + if (detach_completion) + complete(detach_completion); +} + /** * create_worker - create a new workqueue worker * @pool: pool the new worker will belong to @@ -1815,13 +1840,12 @@ static int create_and_start_worker(struct worker_pool *pool) * be idle. * * CONTEXT: - * spin_lock_irq(pool->lock) which is released and regrabbed. + * spin_lock_irq(pool->lock). */ static void destroy_worker(struct worker *worker) { struct worker_pool *pool = worker->pool; - lockdep_assert_held(&pool->manager_mutex); lockdep_assert_held(&pool->lock); /* sanity check frenzy */ @@ -1833,24 +1857,9 @@ static void destroy_worker(struct worker *worker) pool->nr_workers--; pool->nr_idle--; - /* - * Once WORKER_DIE is set, the kworker may destroy itself at any - * point. Pin to ensure the task stays until we're done with it. - */ - get_task_struct(worker->task); - list_del_init(&worker->entry); worker->flags |= WORKER_DIE; - - idr_remove(&pool->worker_idr, worker->id); - - spin_unlock_irq(&pool->lock); - - kthread_stop(worker->task); - put_task_struct(worker->task); - kfree(worker); - - spin_lock_irq(&pool->lock); + wake_up_process(worker->task); } static void idle_worker_timeout(unsigned long __pool) @@ -2289,6 +2298,10 @@ woke_up: spin_unlock_irq(&pool->lock); WARN_ON_ONCE(!list_empty(&worker->entry)); worker->task->flags &= ~PF_WQ_WORKER; + + set_task_comm(worker->task, "kworker/dying"); + worker_detach_from_pool(worker, pool); + kfree(worker); return 0; } @@ -3560,6 +3573,7 @@ static void rcu_free_pool(struct rcu_head *rcu) */ static void put_unbound_pool(struct worker_pool *pool) { + DECLARE_COMPLETION_ONSTACK(detach_completion); struct worker *worker; lockdep_assert_held(&wq_pool_mutex); @@ -3583,15 +3597,21 @@ static void put_unbound_pool(struct worker_pool *pool) * manager_mutex. */ mutex_lock(&pool->manager_arb); - mutex_lock(&pool->manager_mutex); - spin_lock_irq(&pool->lock); + spin_lock_irq(&pool->lock); while ((worker = first_worker(pool))) destroy_worker(worker); WARN_ON(pool->nr_workers || pool->nr_idle); - spin_unlock_irq(&pool->lock); + + mutex_lock(&pool->manager_mutex); + if (!idr_is_empty(&pool->worker_idr)) + pool->detach_completion = &detach_completion; mutex_unlock(&pool->manager_mutex); + + if (pool->detach_completion) + wait_for_completion(pool->detach_completion); + mutex_unlock(&pool->manager_arb); /* shut down the timers */ -- cgit v1.2.3-59-g8ed1b From 3347fc9f36e7e5d3ebe504fc4034745b5d8971d3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 20 May 2014 17:46:30 +0800 Subject: workqueue: destroy worker directly in the idle timeout handler Since destroy_worker() doesn't need to sleep nor require manager_mutex, destroy_worker() can be directly called in the idle timeout handler, it helps us remove POOL_MANAGE_WORKERS and maybe_destroy_worker() and simplify the manage_workers() After POOL_MANAGE_WORKERS is removed, worker_thread() doesn't need to test whether it needs to manage after processed works. So we can remove the test branch. Signed-off-by: Lai Jiangshan --- kernel/workqueue.c | 67 ++++-------------------------------------------------- 1 file changed, 5 insertions(+), 62 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c458d73022bb..938836bc6207 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -68,7 +68,6 @@ enum { * manager_mutex to avoid changing binding state while * create_worker() is in progress. */ - POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ POOL_FREEZING = 1 << 3, /* freeze in progress */ @@ -752,13 +751,6 @@ static bool need_to_create_worker(struct worker_pool *pool) return need_more_worker(pool) && !may_start_working(pool); } -/* Do I need to be the manager? */ -static bool need_to_manage_workers(struct worker_pool *pool) -{ - return need_to_create_worker(pool) || - (pool->flags & POOL_MANAGE_WORKERS); -} - /* Do we have too many workers and should some go away? */ static bool too_many_workers(struct worker_pool *pool) { @@ -1868,7 +1860,7 @@ static void idle_worker_timeout(unsigned long __pool) spin_lock_irq(&pool->lock); - if (too_many_workers(pool)) { + while (too_many_workers(pool)) { struct worker *worker; unsigned long expires; @@ -1876,13 +1868,12 @@ static void idle_worker_timeout(unsigned long __pool) worker = list_entry(pool->idle_list.prev, struct worker, entry); expires = worker->last_active + IDLE_WORKER_TIMEOUT; - if (time_before(jiffies, expires)) + if (time_before(jiffies, expires)) { mod_timer(&pool->idle_timer, expires); - else { - /* it's been idle for too long, wake up manager */ - pool->flags |= POOL_MANAGE_WORKERS; - wake_up_worker(pool); + break; } + + destroy_worker(worker); } spin_unlock_irq(&pool->lock); @@ -2000,44 +1991,6 @@ restart: return true; } -/** - * maybe_destroy_worker - destroy workers which have been idle for a while - * @pool: pool to destroy workers for - * - * Destroy @pool workers which have been idle for longer than - * IDLE_WORKER_TIMEOUT. - * - * LOCKING: - * spin_lock_irq(pool->lock) which may be released and regrabbed - * multiple times. Called only from manager. - * - * Return: - * %false if no action was taken and pool->lock stayed locked, %true - * otherwise. - */ -static bool maybe_destroy_workers(struct worker_pool *pool) -{ - bool ret = false; - - while (too_many_workers(pool)) { - struct worker *worker; - unsigned long expires; - - worker = list_entry(pool->idle_list.prev, struct worker, entry); - expires = worker->last_active + IDLE_WORKER_TIMEOUT; - - if (time_before(jiffies, expires)) { - mod_timer(&pool->idle_timer, expires); - break; - } - - destroy_worker(worker); - ret = true; - } - - return ret; -} - /** * manage_workers - manage worker pool * @worker: self @@ -2101,13 +2054,6 @@ static bool manage_workers(struct worker *worker) ret = true; } - pool->flags &= ~POOL_MANAGE_WORKERS; - - /* - * Destroy and then create so that may_start_working() is true - * on return. - */ - ret |= maybe_destroy_workers(pool); ret |= maybe_create_worker(pool); mutex_unlock(&pool->manager_mutex); @@ -2349,9 +2295,6 @@ recheck: worker_set_flags(worker, WORKER_PREP, false); sleep: - if (unlikely(need_to_manage_workers(pool)) && manage_workers(worker)) - goto recheck; - /* * pool->lock is held and there's no work to process and no need to * manage, sleep. Workers are woken up only while holding -- cgit v1.2.3-59-g8ed1b From da028469ba173e9c634b6ecf80bb0c69c7d1024d Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 20 May 2014 17:46:31 +0800 Subject: workqueue: separate iteration role from worker_idr worker_idr has the iteration (iterating for attached workers) and worker ID duties. These two duties don't have to be tied together. We can separate them and use a list for tracking attached workers and iteration. Before this separation, it wasn't possible to add rescuer workers to worker_idr due to rescuer workers couldn't allocate ID dynamically because ID-allocation depends on memory-allocation, which rescuer can't depend on. After separation, we can easily add the rescuer workers to the list for iteration without any memory-allocation. It is required when we attach the rescuer worker to the pool in later patch. tj: Minor description updates. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 28 +++++++++++++++------------- kernel/workqueue_internal.h | 2 ++ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 938836bc6207..a6c38b717957 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -161,7 +161,8 @@ struct worker_pool { /* see manage_workers() for details on the two manager mutexes */ struct mutex manager_arb; /* manager arbitration */ struct mutex manager_mutex; /* manager exclusion */ - struct idr worker_idr; /* M: worker IDs and iteration */ + struct idr worker_idr; /* M: worker IDs */ + struct list_head workers; /* M: attached workers */ struct completion *detach_completion; /* all workers detached */ struct workqueue_attrs *attrs; /* I: worker attributes */ @@ -363,7 +364,6 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to, /** * for_each_pool_worker - iterate through all workers of a worker_pool * @worker: iteration cursor - * @wi: integer used for iteration * @pool: worker_pool to iterate workers of * * This must be called with @pool->manager_mutex. @@ -371,8 +371,8 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to, * The if/else clause exists only for the lockdep assertion and can be * ignored. */ -#define for_each_pool_worker(worker, wi, pool) \ - idr_for_each_entry(&(pool)->worker_idr, (worker), (wi)) \ +#define for_each_pool_worker(worker, pool) \ + list_for_each_entry((worker), &(pool)->workers, node) \ if (({ lockdep_assert_held(&pool->manager_mutex); false; })) { } \ else @@ -1674,6 +1674,7 @@ static struct worker *alloc_worker(void) if (worker) { INIT_LIST_HEAD(&worker->entry); INIT_LIST_HEAD(&worker->scheduled); + INIT_LIST_HEAD(&worker->node); /* on creation a worker is in !idle && prep state */ worker->flags = WORKER_PREP; } @@ -1696,7 +1697,8 @@ static void worker_detach_from_pool(struct worker *worker, mutex_lock(&pool->manager_mutex); idr_remove(&pool->worker_idr, worker->id); - if (idr_is_empty(&pool->worker_idr)) + list_del(&worker->node); + if (list_empty(&pool->workers)) detach_completion = pool->detach_completion; mutex_unlock(&pool->manager_mutex); @@ -1772,6 +1774,8 @@ static struct worker *create_worker(struct worker_pool *pool) /* successful, commit the pointer to idr */ idr_replace(&pool->worker_idr, worker, worker->id); + /* successful, attach the worker to the pool */ + list_add_tail(&worker->node, &pool->workers); return worker; @@ -3483,6 +3487,7 @@ static int init_worker_pool(struct worker_pool *pool) mutex_init(&pool->manager_arb); mutex_init(&pool->manager_mutex); idr_init(&pool->worker_idr); + INIT_LIST_HEAD(&pool->workers); INIT_HLIST_NODE(&pool->hash_node); pool->refcnt = 1; @@ -3548,7 +3553,7 @@ static void put_unbound_pool(struct worker_pool *pool) spin_unlock_irq(&pool->lock); mutex_lock(&pool->manager_mutex); - if (!idr_is_empty(&pool->worker_idr)) + if (!list_empty(&pool->workers)) pool->detach_completion = &detach_completion; mutex_unlock(&pool->manager_mutex); @@ -4533,7 +4538,6 @@ static void wq_unbind_fn(struct work_struct *work) int cpu = smp_processor_id(); struct worker_pool *pool; struct worker *worker; - int wi; for_each_cpu_worker_pool(pool, cpu) { WARN_ON_ONCE(cpu != smp_processor_id()); @@ -4548,7 +4552,7 @@ static void wq_unbind_fn(struct work_struct *work) * before the last CPU down must be on the cpu. After * this, they may become diasporas. */ - for_each_pool_worker(worker, wi, pool) + for_each_pool_worker(worker, pool) worker->flags |= WORKER_UNBOUND; pool->flags |= POOL_DISASSOCIATED; @@ -4594,7 +4598,6 @@ static void wq_unbind_fn(struct work_struct *work) static void rebind_workers(struct worker_pool *pool) { struct worker *worker; - int wi; lockdep_assert_held(&pool->manager_mutex); @@ -4605,13 +4608,13 @@ static void rebind_workers(struct worker_pool *pool) * of all workers first and then clear UNBOUND. As we're called * from CPU_ONLINE, the following shouldn't fail. */ - for_each_pool_worker(worker, wi, pool) + for_each_pool_worker(worker, pool) WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask) < 0); spin_lock_irq(&pool->lock); - for_each_pool_worker(worker, wi, pool) { + for_each_pool_worker(worker, pool) { unsigned int worker_flags = worker->flags; /* @@ -4663,7 +4666,6 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu) { static cpumask_t cpumask; struct worker *worker; - int wi; lockdep_assert_held(&pool->manager_mutex); @@ -4677,7 +4679,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu) return; /* as we're called from CPU_ONLINE, the following shouldn't fail */ - for_each_pool_worker(worker, wi, pool) + for_each_pool_worker(worker, pool) WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask) < 0); } diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h index 7e2204db0b1a..8888e0672442 100644 --- a/kernel/workqueue_internal.h +++ b/kernel/workqueue_internal.h @@ -37,6 +37,8 @@ struct worker { struct task_struct *task; /* I: worker task */ struct worker_pool *pool; /* I: the associated pool */ /* L: for rescuers */ + struct list_head node; /* M: anchored at pool->workers */ + /* M: runs through worker->node */ unsigned long last_active; /* L: last active timestamp */ unsigned int flags; /* X: flags */ -- cgit v1.2.3-59-g8ed1b From 7cda9aae0596d871a8d7a6888d7b447c60e5ab30 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 20 May 2014 17:46:32 +0800 Subject: workqueue: convert worker_idr to worker_ida We no longer iterate workers via worker_idr and worker_idr is used only for allocating/freeing ID, so we can convert it to worker_ida. By using ida_simple_get/remove(), worker_ida doesn't require external synchronization, so we don't need manager_mutex to protect it and the ID-removal code is allowed to be moved out from worker_detach_from_pool(). In a later patch, worker_detach_from_pool() will be used in rescuers which don't have IDs, so we move the ID-removal code out from worker_detach_from_pool() into worker_thread(). tj: Minor description updates. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index a6c38b717957..092f2098746d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -161,10 +161,11 @@ struct worker_pool { /* see manage_workers() for details on the two manager mutexes */ struct mutex manager_arb; /* manager arbitration */ struct mutex manager_mutex; /* manager exclusion */ - struct idr worker_idr; /* M: worker IDs */ struct list_head workers; /* M: attached workers */ struct completion *detach_completion; /* all workers detached */ + struct ida worker_ida; /* worker IDs for task name */ + struct workqueue_attrs *attrs; /* I: worker attributes */ struct hlist_node hash_node; /* PL: unbound_pool_hash node */ int refcnt; /* PL: refcnt for unbound pools */ @@ -1696,7 +1697,6 @@ static void worker_detach_from_pool(struct worker *worker, struct completion *detach_completion = NULL; mutex_lock(&pool->manager_mutex); - idr_remove(&pool->worker_idr, worker->id); list_del(&worker->node); if (list_empty(&pool->workers)) detach_completion = pool->detach_completion; @@ -1727,11 +1727,8 @@ static struct worker *create_worker(struct worker_pool *pool) lockdep_assert_held(&pool->manager_mutex); - /* - * ID is needed to determine kthread name. Allocate ID first - * without installing the pointer. - */ - id = idr_alloc(&pool->worker_idr, NULL, 0, 0, GFP_KERNEL); + /* ID is needed to determine kthread name */ + id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL); if (id < 0) goto fail; @@ -1772,8 +1769,6 @@ static struct worker *create_worker(struct worker_pool *pool) if (pool->flags & POOL_DISASSOCIATED) worker->flags |= WORKER_UNBOUND; - /* successful, commit the pointer to idr */ - idr_replace(&pool->worker_idr, worker, worker->id); /* successful, attach the worker to the pool */ list_add_tail(&worker->node, &pool->workers); @@ -1781,7 +1776,7 @@ static struct worker *create_worker(struct worker_pool *pool) fail: if (id >= 0) - idr_remove(&pool->worker_idr, id); + ida_simple_remove(&pool->worker_ida, id); kfree(worker); return NULL; } @@ -2250,6 +2245,7 @@ woke_up: worker->task->flags &= ~PF_WQ_WORKER; set_task_comm(worker->task, "kworker/dying"); + ida_simple_remove(&pool->worker_ida, worker->id); worker_detach_from_pool(worker, pool); kfree(worker); return 0; @@ -3486,9 +3482,9 @@ static int init_worker_pool(struct worker_pool *pool) mutex_init(&pool->manager_arb); mutex_init(&pool->manager_mutex); - idr_init(&pool->worker_idr); INIT_LIST_HEAD(&pool->workers); + ida_init(&pool->worker_ida); INIT_HLIST_NODE(&pool->hash_node); pool->refcnt = 1; @@ -3503,7 +3499,7 @@ static void rcu_free_pool(struct rcu_head *rcu) { struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu); - idr_destroy(&pool->worker_idr); + ida_destroy(&pool->worker_ida); free_workqueue_attrs(pool->attrs); kfree(pool); } -- cgit v1.2.3-59-g8ed1b From 4d757c5c81edba2052aae10d5b36dfcb9902b141 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 20 May 2014 17:46:33 +0800 Subject: workqueue: narrow the protection range of manager_mutex In create_worker(), as pool->worker_ida now uses ida_simple_get()/ida_simple_put() and doesn't require external synchronization, it doesn't need manager_mutex. struct worker allocation and kthread allocation are not visible by any one before attached, so they don't need manager_mutex either. The above operations are before the attaching operation which attaches the worker to the pool. Between attaching and starting the worker, the worker is already attached to the pool, so the cpu hotplug will handle cpu-binding for the worker correctly and we don't need the manager_mutex after attaching. The conclusion is that only the attaching operation needs manager_mutex, so we narrow the protection section of manager_mutex in create_worker(). Some comments about manager_mutex are removed, because we will rename it to attach_mutex and add worker_attach_to_pool() later which will be self-explanatory. tj: Minor description updates. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 35 +++++------------------------------ 1 file changed, 5 insertions(+), 30 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 092f2098746d..d6b31ff60c52 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1725,8 +1725,6 @@ static struct worker *create_worker(struct worker_pool *pool) int id = -1; char id_buf[16]; - lockdep_assert_held(&pool->manager_mutex); - /* ID is needed to determine kthread name */ id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL); if (id < 0) @@ -1755,6 +1753,8 @@ static struct worker *create_worker(struct worker_pool *pool) /* prevent userland from meddling with cpumask of workqueue workers */ worker->task->flags |= PF_NO_SETAFFINITY; + mutex_lock(&pool->manager_mutex); + /* * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any * online CPUs. It'll be re-applied when any of the CPUs come up. @@ -1762,7 +1762,7 @@ static struct worker *create_worker(struct worker_pool *pool) set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); /* - * The caller is responsible for ensuring %POOL_DISASSOCIATED + * The pool->manager_mutex ensures %POOL_DISASSOCIATED * remains stable across this function. See the comments above the * flag definition for details. */ @@ -1772,6 +1772,8 @@ static struct worker *create_worker(struct worker_pool *pool) /* successful, attach the worker to the pool */ list_add_tail(&worker->node, &pool->workers); + mutex_unlock(&pool->manager_mutex); + return worker; fail: @@ -1809,8 +1811,6 @@ static int create_and_start_worker(struct worker_pool *pool) { struct worker *worker; - mutex_lock(&pool->manager_mutex); - worker = create_worker(pool); if (worker) { spin_lock_irq(&pool->lock); @@ -1818,8 +1818,6 @@ static int create_and_start_worker(struct worker_pool *pool) spin_unlock_irq(&pool->lock); } - mutex_unlock(&pool->manager_mutex); - return worker ? 0 : -ENOMEM; } @@ -2019,8 +2017,6 @@ static bool manage_workers(struct worker *worker) bool ret = false; /* - * Managership is governed by two mutexes - manager_arb and - * manager_mutex. manager_arb handles arbitration of manager role. * Anyone who successfully grabs manager_arb wins the arbitration * and becomes the manager. mutex_trylock() on pool->manager_arb * failure while holding pool->lock reliably indicates that someone @@ -2029,33 +2025,12 @@ static bool manage_workers(struct worker *worker) * grabbing manager_arb is responsible for actually performing * manager duties. If manager_arb is grabbed and released without * actual management, the pool may stall indefinitely. - * - * manager_mutex is used for exclusion of actual management - * operations. The holder of manager_mutex can be sure that none - * of management operations, including creation and destruction of - * workers, won't take place until the mutex is released. Because - * manager_mutex doesn't interfere with manager role arbitration, - * it is guaranteed that the pool's management, while may be - * delayed, won't be disturbed by someone else grabbing - * manager_mutex. */ if (!mutex_trylock(&pool->manager_arb)) return ret; - /* - * With manager arbitration won, manager_mutex would be free in - * most cases. trylock first without dropping @pool->lock. - */ - if (unlikely(!mutex_trylock(&pool->manager_mutex))) { - spin_unlock_irq(&pool->lock); - mutex_lock(&pool->manager_mutex); - spin_lock_irq(&pool->lock); - ret = true; - } - ret |= maybe_create_worker(pool); - mutex_unlock(&pool->manager_mutex); mutex_unlock(&pool->manager_arb); return ret; } -- cgit v1.2.3-59-g8ed1b From 92f9c5c40cc67ffcc5ac7f55fdbd6ae8afc7e0b4 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 20 May 2014 17:46:34 +0800 Subject: workqueue: rename manager_mutex to attach_mutex manager_mutex is only used to protect the attaching for the pool and the pool->workers list. It protects the pool->workers and operations based on this list, such as: cpu-binding for the workers in the pool->workers the operations to set/clear WORKER_UNBOUND So let's rename manager_mutex to attach_mutex to better reflect its role. This patch is a pure rename. tj: Minor command and description updates. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 44 ++++++++++++++++++++++---------------------- kernel/workqueue_internal.h | 4 ++-- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index d6b31ff60c52..38b9ea7c204c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -65,7 +65,7 @@ enum { * be executing on any CPU. The pool behaves as an unbound one. * * Note that DISASSOCIATED should be flipped only while holding - * manager_mutex to avoid changing binding state while + * attach_mutex to avoid changing binding state while * create_worker() is in progress. */ POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ @@ -122,7 +122,7 @@ enum { * cpu or grabbing pool->lock is enough for read access. If * POOL_DISASSOCIATED is set, it's identical to L. * - * M: pool->manager_mutex protected. + * A: pool->attach_mutex protected. * * PL: wq_pool_mutex protected. * @@ -160,8 +160,8 @@ struct worker_pool { /* see manage_workers() for details on the two manager mutexes */ struct mutex manager_arb; /* manager arbitration */ - struct mutex manager_mutex; /* manager exclusion */ - struct list_head workers; /* M: attached workers */ + struct mutex attach_mutex; /* attach/detach exclusion */ + struct list_head workers; /* A: attached workers */ struct completion *detach_completion; /* all workers detached */ struct ida worker_ida; /* worker IDs for task name */ @@ -367,14 +367,14 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to, * @worker: iteration cursor * @pool: worker_pool to iterate workers of * - * This must be called with @pool->manager_mutex. + * This must be called with @pool->attach_mutex. * * The if/else clause exists only for the lockdep assertion and can be * ignored. */ #define for_each_pool_worker(worker, pool) \ list_for_each_entry((worker), &(pool)->workers, node) \ - if (({ lockdep_assert_held(&pool->manager_mutex); false; })) { } \ + if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \ else /** @@ -1696,11 +1696,11 @@ static void worker_detach_from_pool(struct worker *worker, { struct completion *detach_completion = NULL; - mutex_lock(&pool->manager_mutex); + mutex_lock(&pool->attach_mutex); list_del(&worker->node); if (list_empty(&pool->workers)) detach_completion = pool->detach_completion; - mutex_unlock(&pool->manager_mutex); + mutex_unlock(&pool->attach_mutex); if (detach_completion) complete(detach_completion); @@ -1753,7 +1753,7 @@ static struct worker *create_worker(struct worker_pool *pool) /* prevent userland from meddling with cpumask of workqueue workers */ worker->task->flags |= PF_NO_SETAFFINITY; - mutex_lock(&pool->manager_mutex); + mutex_lock(&pool->attach_mutex); /* * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any @@ -1762,7 +1762,7 @@ static struct worker *create_worker(struct worker_pool *pool) set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); /* - * The pool->manager_mutex ensures %POOL_DISASSOCIATED + * The pool->attach_mutex ensures %POOL_DISASSOCIATED * remains stable across this function. See the comments above the * flag definition for details. */ @@ -1772,7 +1772,7 @@ static struct worker *create_worker(struct worker_pool *pool) /* successful, attach the worker to the pool */ list_add_tail(&worker->node, &pool->workers); - mutex_unlock(&pool->manager_mutex); + mutex_unlock(&pool->attach_mutex); return worker; @@ -3456,7 +3456,7 @@ static int init_worker_pool(struct worker_pool *pool) (unsigned long)pool); mutex_init(&pool->manager_arb); - mutex_init(&pool->manager_mutex); + mutex_init(&pool->attach_mutex); INIT_LIST_HEAD(&pool->workers); ida_init(&pool->worker_ida); @@ -3513,7 +3513,7 @@ static void put_unbound_pool(struct worker_pool *pool) /* * Become the manager and destroy all workers. Grabbing * manager_arb prevents @pool's workers from blocking on - * manager_mutex. + * attach_mutex. */ mutex_lock(&pool->manager_arb); @@ -3523,10 +3523,10 @@ static void put_unbound_pool(struct worker_pool *pool) WARN_ON(pool->nr_workers || pool->nr_idle); spin_unlock_irq(&pool->lock); - mutex_lock(&pool->manager_mutex); + mutex_lock(&pool->attach_mutex); if (!list_empty(&pool->workers)) pool->detach_completion = &detach_completion; - mutex_unlock(&pool->manager_mutex); + mutex_unlock(&pool->attach_mutex); if (pool->detach_completion) wait_for_completion(pool->detach_completion); @@ -4513,11 +4513,11 @@ static void wq_unbind_fn(struct work_struct *work) for_each_cpu_worker_pool(pool, cpu) { WARN_ON_ONCE(cpu != smp_processor_id()); - mutex_lock(&pool->manager_mutex); + mutex_lock(&pool->attach_mutex); spin_lock_irq(&pool->lock); /* - * We've blocked all manager operations. Make all workers + * We've blocked all attach/detach operations. Make all workers * unbound and set DISASSOCIATED. Before this, all workers * except for the ones which are still executing works from * before the last CPU down must be on the cpu. After @@ -4529,7 +4529,7 @@ static void wq_unbind_fn(struct work_struct *work) pool->flags |= POOL_DISASSOCIATED; spin_unlock_irq(&pool->lock); - mutex_unlock(&pool->manager_mutex); + mutex_unlock(&pool->attach_mutex); /* * Call schedule() so that we cross rq->lock and thus can @@ -4570,7 +4570,7 @@ static void rebind_workers(struct worker_pool *pool) { struct worker *worker; - lockdep_assert_held(&pool->manager_mutex); + lockdep_assert_held(&pool->attach_mutex); /* * Restore CPU affinity of all workers. As all idle workers should @@ -4638,7 +4638,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu) static cpumask_t cpumask; struct worker *worker; - lockdep_assert_held(&pool->manager_mutex); + lockdep_assert_held(&pool->attach_mutex); /* is @cpu allowed for @pool? */ if (!cpumask_test_cpu(cpu, pool->attrs->cpumask)) @@ -4683,7 +4683,7 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb, mutex_lock(&wq_pool_mutex); for_each_pool(pool, pi) { - mutex_lock(&pool->manager_mutex); + mutex_lock(&pool->attach_mutex); if (pool->cpu == cpu) { spin_lock_irq(&pool->lock); @@ -4695,7 +4695,7 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb, restore_unbound_workers_cpumask(pool, cpu); } - mutex_unlock(&pool->manager_mutex); + mutex_unlock(&pool->attach_mutex); } /* update NUMA affinity of unbound workqueues */ diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h index 8888e0672442..45215870ac6c 100644 --- a/kernel/workqueue_internal.h +++ b/kernel/workqueue_internal.h @@ -37,8 +37,8 @@ struct worker { struct task_struct *task; /* I: worker task */ struct worker_pool *pool; /* I: the associated pool */ /* L: for rescuers */ - struct list_head node; /* M: anchored at pool->workers */ - /* M: runs through worker->node */ + struct list_head node; /* A: anchored at pool->workers */ + /* A: runs through worker->node */ unsigned long last_active; /* L: last active timestamp */ unsigned int flags; /* X: flags */ -- cgit v1.2.3-59-g8ed1b From 4736cbf7a4b2a6bbb50a809a6933fb7eb29dc38f Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 20 May 2014 17:46:35 +0800 Subject: workqueue: separate pool-attaching code out from create_worker() Currently, the code to attach a new worker to its pool is embedded in create_worker(). Separating this code out will make the codes clearer and will allow rescuers to share the code path later. tj: Description and comment updates. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 61 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 38b9ea7c204c..b1de6ac4a0e3 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -66,7 +66,7 @@ enum { * * Note that DISASSOCIATED should be flipped only while holding * attach_mutex to avoid changing binding state while - * create_worker() is in progress. + * worker_attach_to_pool() is in progress. */ POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ POOL_FREEZING = 1 << 3, /* freeze in progress */ @@ -1682,14 +1682,47 @@ static struct worker *alloc_worker(void) return worker; } +/** + * worker_attach_to_pool() - attach a worker to a pool + * @worker: worker to be attached + * @pool: the target pool + * + * Attach @worker to @pool. Once attached, the %WORKER_UNBOUND flag and + * cpu-binding of @worker are kept coordinated with the pool across + * cpu-[un]hotplugs. + */ +static void worker_attach_to_pool(struct worker *worker, + struct worker_pool *pool) +{ + mutex_lock(&pool->attach_mutex); + + /* + * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any + * online CPUs. It'll be re-applied when any of the CPUs come up. + */ + set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); + + /* + * The pool->attach_mutex ensures %POOL_DISASSOCIATED remains + * stable across this function. See the comments above the + * flag definition for details. + */ + if (pool->flags & POOL_DISASSOCIATED) + worker->flags |= WORKER_UNBOUND; + + list_add_tail(&worker->node, &pool->workers); + + mutex_unlock(&pool->attach_mutex); +} + /** * worker_detach_from_pool() - detach a worker from its pool * @worker: worker which is attached to its pool * @pool: the pool @worker is attached to * - * Undo the attaching which had been done in create_worker(). The caller - * worker shouldn't access to the pool after detached except it has other - * reference to the pool. + * Undo the attaching which had been done in worker_attach_to_pool(). The + * caller worker shouldn't access to the pool after detached except it has + * other reference to the pool. */ static void worker_detach_from_pool(struct worker *worker, struct worker_pool *pool) @@ -1753,26 +1786,8 @@ static struct worker *create_worker(struct worker_pool *pool) /* prevent userland from meddling with cpumask of workqueue workers */ worker->task->flags |= PF_NO_SETAFFINITY; - mutex_lock(&pool->attach_mutex); - - /* - * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any - * online CPUs. It'll be re-applied when any of the CPUs come up. - */ - set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); - - /* - * The pool->attach_mutex ensures %POOL_DISASSOCIATED - * remains stable across this function. See the comments above the - * flag definition for details. - */ - if (pool->flags & POOL_DISASSOCIATED) - worker->flags |= WORKER_UNBOUND; - /* successful, attach the worker to the pool */ - list_add_tail(&worker->node, &pool->workers); - - mutex_unlock(&pool->attach_mutex); + worker_attach_to_pool(worker, pool); return worker; -- cgit v1.2.3-59-g8ed1b From 51697d393922eb643e78ac5db86e8fa5a45b469a Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 20 May 2014 17:46:36 +0800 Subject: workqueue: use generic attach/detach routine for rescuers There are several problems with the code that rescuers use to bind themselve to the target pool's cpumask. 1) It is very different from how the normal workers bind to cpumask, increasing code complexity and maintenance overhead. 2) The code of cpu-binding for rescuers is complicated. 3) If one or more cpu hotplugs happen while a rescuer is processing its scheduled work items, the rescuer may not stay bound to the cpumask of the pool. This is an allowed behavior, but is still hairy. It will be better if the cpumask of the rescuer is always kept synchronized with the pool across cpu hotplugs. Using generic attach/detach routine will solve the above problems and results in much simpler code. tj: Minor description updates. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 74 ++++++------------------------------------------------ 1 file changed, 8 insertions(+), 66 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b1de6ac4a0e3..6603923b7ede 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1603,70 +1603,6 @@ static void worker_leave_idle(struct worker *worker) list_del_init(&worker->entry); } -/** - * worker_maybe_bind_and_lock - try to bind %current to worker_pool and lock it - * @pool: target worker_pool - * - * Bind %current to the cpu of @pool if it is associated and lock @pool. - * - * Works which are scheduled while the cpu is online must at least be - * scheduled to a worker which is bound to the cpu so that if they are - * flushed from cpu callbacks while cpu is going down, they are - * guaranteed to execute on the cpu. - * - * This function is to be used by unbound workers and rescuers to bind - * themselves to the target cpu and may race with cpu going down or - * coming online. kthread_bind() can't be used because it may put the - * worker to already dead cpu and set_cpus_allowed_ptr() can't be used - * verbatim as it's best effort and blocking and pool may be - * [dis]associated in the meantime. - * - * This function tries set_cpus_allowed() and locks pool and verifies the - * binding against %POOL_DISASSOCIATED which is set during - * %CPU_DOWN_PREPARE and cleared during %CPU_ONLINE, so if the worker - * enters idle state or fetches works without dropping lock, it can - * guarantee the scheduling requirement described in the first paragraph. - * - * CONTEXT: - * Might sleep. Called without any lock but returns with pool->lock - * held. - * - * Return: - * %true if the associated pool is online (@worker is successfully - * bound), %false if offline. - */ -static bool worker_maybe_bind_and_lock(struct worker_pool *pool) -__acquires(&pool->lock) -{ - while (true) { - /* - * The following call may fail, succeed or succeed - * without actually migrating the task to the cpu if - * it races with cpu hotunplug operation. Verify - * against POOL_DISASSOCIATED. - */ - if (!(pool->flags & POOL_DISASSOCIATED)) - set_cpus_allowed_ptr(current, pool->attrs->cpumask); - - spin_lock_irq(&pool->lock); - if (pool->flags & POOL_DISASSOCIATED) - return false; - if (task_cpu(current) == pool->cpu && - cpumask_equal(¤t->cpus_allowed, pool->attrs->cpumask)) - return true; - spin_unlock_irq(&pool->lock); - - /* - * We've raced with CPU hot[un]plug. Give it a breather - * and retry migration. cond_resched() is required here; - * otherwise, we might deadlock against cpu_stop trying to - * bring down the CPU on non-preemptive kernel. - */ - cpu_relax(); - cond_resched(); - } -} - static struct worker *alloc_worker(void) { struct worker *worker; @@ -2361,8 +2297,9 @@ repeat: spin_unlock_irq(&wq_mayday_lock); - /* migrate to the target cpu if possible */ - worker_maybe_bind_and_lock(pool); + worker_attach_to_pool(rescuer, pool); + + spin_lock_irq(&pool->lock); rescuer->pool = pool; /* @@ -2375,6 +2312,11 @@ repeat: move_linked_works(work, scheduled, &n); process_scheduled_works(rescuer); + spin_unlock_irq(&pool->lock); + + worker_detach_from_pool(rescuer, pool); + + spin_lock_irq(&pool->lock); /* * Put the reference grabbed by send_mayday(). @pool won't -- cgit v1.2.3-59-g8ed1b From 73e4354444eef5251e5cdfd388ab02ef9f2e727e Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 22 May 2014 16:42:41 +0800 Subject: workqueue: declare system_highpri_wq system_highpri_wq is exported to modules via EXPORT_SYMBOL_GPL(), but it was forgotten to be declared in workqueue.h. So we add the declaration and a short description for it. tj: Minor comment tweak. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index d93d28b2ec73..b263b29bd98b 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -340,6 +340,9 @@ enum { * short queue flush time. Don't queue works which can run for too * long. * + * system_highpri_wq is similar to system_wq but for work items which + * require WQ_HIGHPRI. + * * system_long_wq is similar to system_wq but may host long running * works. Queue flushing might take relatively long. * @@ -358,6 +361,7 @@ enum { * 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info. */ extern struct workqueue_struct *system_wq; +extern struct workqueue_struct *system_highpri_wq; extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_freezable_wq; -- cgit v1.2.3-59-g8ed1b From 79bc251f0e0aea67bc230c530f7fa57f66f9cdf3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 22 May 2014 16:43:44 +0800 Subject: workqueue: remove unused WORK_CPU_END WORK_CPU_END is totally unused since 4e8b22bd1a37 ("workqueue: fix pool ID allocation leakage and remove BUILD_BUG_ON() in init_workqueues"). It should be removed. After it is removed, the comment "special cpu IDs" is not precise due to there is only one special CPU ID (WORK_CPU_UNBOUND) left, so we also change this comment to the description for WORK_CPU_UNBOUND. tj: Minor description and comment tweaks. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index b263b29bd98b..b8aee9453f22 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -56,9 +56,8 @@ enum { WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS) - 1, WORK_NO_COLOR = WORK_NR_COLORS, - /* special cpu IDs */ + /* not bound to any CPU, prefer the local CPU */ WORK_CPU_UNBOUND = NR_CPUS, - WORK_CPU_END = NR_CPUS + 1, /* * Reserve 7 bits off of pwq pointer w/ debugobjects turned off. -- cgit v1.2.3-59-g8ed1b From cafebac153ae54fd0aba5d4ad28af995532c5375 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 22 May 2014 16:43:56 +0800 Subject: workqueue: remove unused work_clear_pending() In 8930caba3dbd ("workqueue: disable irq while manipulating PENDING"), setting last CPU and clearing PENDING got merged into a single operation (set_work_cpu_and_clear_pending()), which resulted that the internal routine work_clear_pending() is not used any more. tj: Minor description tweak. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index b8aee9453f22..a0cc2e95ed1b 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -273,13 +273,6 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #define delayed_work_pending(w) \ work_pending(&(w)->work) -/** - * work_clear_pending - for internal use only, mark a work item as not pending - * @work: The work item in question - */ -#define work_clear_pending(work) \ - clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) - /* * Workqueue flags and constants. For details, please refer to * Documentation/workqueue.txt. -- cgit v1.2.3-59-g8ed1b From 1037de36edae30f1ddc0a2532decd50f92ac4901 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 22 May 2014 16:44:07 +0800 Subject: workqueue: rename first_worker() to first_idle_worker() first_worker() actually returns the first idle workers, the name first_idle_worker() which is self-commnet will be better. All the callers of first_worker() expect it returns an idle worker, the name first_idle_worker() with "idle" notation makes reviewers happier. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6603923b7ede..dd107b83f45f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -773,8 +773,8 @@ static bool too_many_workers(struct worker_pool *pool) * Wake up functions. */ -/* Return the first worker. Safe with preemption disabled */ -static struct worker *first_worker(struct worker_pool *pool) +/* Return the first idle worker. Safe with preemption disabled */ +static struct worker *first_idle_worker(struct worker_pool *pool) { if (unlikely(list_empty(&pool->idle_list))) return NULL; @@ -793,7 +793,7 @@ static struct worker *first_worker(struct worker_pool *pool) */ static void wake_up_worker(struct worker_pool *pool) { - struct worker *worker = first_worker(pool); + struct worker *worker = first_idle_worker(pool); if (likely(worker)) wake_up_process(worker->task); @@ -867,7 +867,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu) */ if (atomic_dec_and_test(&pool->nr_running) && !list_empty(&pool->worklist)) - to_wakeup = first_worker(pool); + to_wakeup = first_idle_worker(pool); return to_wakeup ? to_wakeup->task : NULL; } @@ -3475,7 +3475,7 @@ static void put_unbound_pool(struct worker_pool *pool) mutex_lock(&pool->manager_arb); spin_lock_irq(&pool->lock); - while ((worker = first_worker(pool))) + while ((worker = first_idle_worker(pool))) destroy_worker(worker); WARN_ON(pool->nr_workers || pool->nr_idle); spin_unlock_irq(&pool->lock); -- cgit v1.2.3-59-g8ed1b From 74b414ead1133972817d3ce7b934356150d03a7d Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 22 May 2014 19:01:16 +0800 Subject: workqueue: remove the confusing POOL_FREEZING Currently, the global freezing state is propagated to worker_pools via POOL_FREEZING and then to each workqueue; however, the middle step - propagation through worker_pools - can be skipped as long as one or more max_active adjustments happens for each workqueue after the update to the global state is visible. The global workqueue freezing state and the max_active adjustments during workqueue creation and [un]freezing are serialized with wq_pool_mutex, so it's trivial to guarantee that max_actives stay in sync with global freezing state. POOL_FREEZING is unnecessary and makes the code more confusing and complicates freeze_workqueues_begin() and thaw_workqueues() by requiring them to walk through all pools. Remove POOL_FREEZING and use workqueue_freezing directly instead. tj: Description and comment updates. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index dd107b83f45f..bc3c18892b7d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -69,7 +69,6 @@ enum { * worker_attach_to_pool() is in progress. */ POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ - POOL_FREEZING = 1 << 3, /* freeze in progress */ /* worker flags */ WORKER_DIE = 1 << 1, /* die die die */ @@ -3533,9 +3532,6 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs) if (!pool || init_worker_pool(pool) < 0) goto fail; - if (workqueue_freezing) - pool->flags |= POOL_FREEZING; - lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */ copy_workqueue_attrs(pool->attrs, attrs); @@ -3642,7 +3638,12 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq) spin_lock_irq(&pwq->pool->lock); - if (!freezable || !(pwq->pool->flags & POOL_FREEZING)) { + /* + * During [un]freezing, the caller is responsible for ensuring that + * this function is called at least once after @workqueue_freezing + * is updated and visible. + */ + if (!freezable || !workqueue_freezing) { pwq->max_active = wq->saved_max_active; while (!list_empty(&pwq->delayed_works) && @@ -4751,24 +4752,14 @@ EXPORT_SYMBOL_GPL(work_on_cpu); */ void freeze_workqueues_begin(void) { - struct worker_pool *pool; struct workqueue_struct *wq; struct pool_workqueue *pwq; - int pi; mutex_lock(&wq_pool_mutex); WARN_ON_ONCE(workqueue_freezing); workqueue_freezing = true; - /* set FREEZING */ - for_each_pool(pool, pi) { - spin_lock_irq(&pool->lock); - WARN_ON_ONCE(pool->flags & POOL_FREEZING); - pool->flags |= POOL_FREEZING; - spin_unlock_irq(&pool->lock); - } - list_for_each_entry(wq, &workqueues, list) { mutex_lock(&wq->mutex); for_each_pwq(pwq, wq) @@ -4838,21 +4829,13 @@ void thaw_workqueues(void) { struct workqueue_struct *wq; struct pool_workqueue *pwq; - struct worker_pool *pool; - int pi; mutex_lock(&wq_pool_mutex); if (!workqueue_freezing) goto out_unlock; - /* clear FREEZING */ - for_each_pool(pool, pi) { - spin_lock_irq(&pool->lock); - WARN_ON_ONCE(!(pool->flags & POOL_FREEZING)); - pool->flags &= ~POOL_FREEZING; - spin_unlock_irq(&pool->lock); - } + workqueue_freezing = false; /* restore max_active and repopulate worklist */ list_for_each_entry(wq, &workqueues, list) { @@ -4862,7 +4845,6 @@ void thaw_workqueues(void) mutex_unlock(&wq->mutex); } - workqueue_freezing = false; out_unlock: mutex_unlock(&wq_pool_mutex); } -- cgit v1.2.3-59-g8ed1b From 015af06e103fa47af29ada0f564301c81d4973b2 Mon Sep 17 00:00:00 2001 From: Valdis Kletnieks Date: Tue, 27 May 2014 14:28:59 -0400 Subject: kernel/workqueue.c: pr_warning/pr_warn & printk/pr_info This commit did an incorrect printk->pr_info conversion. If we were converting to pr_info() we should lose the log_level parameter. The problem is that this is called (indirectly) by show_regs_print_info(), which is called with various log_levels (from _INFO clear to _EMERG). So we leave it as a printk() call so the desired log_level is applied. Not a full revert, as the other half of the patch is correct. Signed-off-by: Valdis Kletnieks Signed-off-by: Tejun Heo --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index bc3c18892b7d..90a0fa592b72 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4440,7 +4440,7 @@ void print_worker_info(const char *log_lvl, struct task_struct *task) probe_kernel_read(desc, worker->desc, sizeof(desc) - 1); if (fn || name[0] || desc[0]) { - pr_info("%sWorkqueue: %s %pf", log_lvl, name, fn); + printk("%sWorkqueue: %s %pf", log_lvl, name, fn); if (desc[0]) pr_cont(" (%s)", desc); pr_cont("\n"); -- cgit v1.2.3-59-g8ed1b