diff options
author | 2022-02-28 23:12:55 -0800 | |
---|---|---|
committer | 2022-02-28 23:12:55 -0800 | |
commit | 1136fa0c07de570dc17858745af8be169d1440ba (patch) | |
tree | 3221b003517dd3cb13df5ba4b85637cd9ed82692 /kernel/workqueue.c | |
parent | Input: samsung-keypad - properly state IOMEM dependency (diff) | |
parent | Linux 5.17-rc4 (diff) | |
download | wireguard-linux-1136fa0c07de570dc17858745af8be169d1440ba.tar.xz wireguard-linux-1136fa0c07de570dc17858745af8be169d1440ba.zip |
Merge tag 'v5.17-rc4' into for-linus
Merge with mainline to get the Intel ASoC generic helpers header and
other changes.
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r-- | kernel/workqueue.c | 290 |
1 files changed, 153 insertions, 137 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1b3eb1e9531f..33f1106b4f99 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -154,6 +154,9 @@ struct worker_pool { unsigned long watchdog_ts; /* L: watchdog timestamp */ + /* The current concurrency level. */ + atomic_t nr_running; + struct list_head worklist; /* L: list of pending works */ int nr_workers; /* L: total number of workers */ @@ -178,18 +181,11 @@ struct worker_pool { int refcnt; /* PL: refcnt for unbound pools */ /* - * The current concurrency level. As it's likely to be accessed - * from other CPUs during try_to_wake_up(), put it in a separate - * cacheline. - */ - atomic_t nr_running ____cacheline_aligned_in_smp; - - /* * Destruction of pool is RCU protected to allow dereferences * from get_work_pool(). */ struct rcu_head rcu; -} ____cacheline_aligned_in_smp; +}; /* * The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS @@ -375,6 +371,7 @@ EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); static int worker_thread(void *__worker); static void workqueue_sysfs_unregister(struct workqueue_struct *wq); static void show_pwq(struct pool_workqueue *pwq); +static void show_one_worker_pool(struct worker_pool *pool); #define CREATE_TRACE_POINTS #include <trace/events/workqueue.h> @@ -867,8 +864,17 @@ void wq_worker_running(struct task_struct *task) if (!worker->sleeping) return; + + /* + * If preempted by unbind_workers() between the WORKER_NOT_RUNNING check + * and the nr_running increment below, we may ruin the nr_running reset + * and leave with an unexpected pool->nr_running == 1 on the newly unbound + * pool. Protect against such race. + */ + preempt_disable(); if (!(worker->flags & WORKER_NOT_RUNNING)) atomic_inc(&worker->pool->nr_running); + preempt_enable(); worker->sleeping = 0; } @@ -877,8 +883,7 @@ void wq_worker_running(struct task_struct *task) * @task: task going to sleep * * This function is called from schedule() when a busy worker is - * going to sleep. Preemption needs to be disabled to protect ->sleeping - * assignment. + * going to sleep. */ void wq_worker_sleeping(struct task_struct *task) { @@ -903,6 +908,16 @@ void wq_worker_sleeping(struct task_struct *task) raw_spin_lock_irq(&pool->lock); /* + * Recheck in case unbind_workers() preempted us. We don't + * want to decrement nr_running after the worker is unbound + * and nr_running has been reset. + */ + if (worker->flags & WORKER_NOT_RUNNING) { + raw_spin_unlock_irq(&pool->lock); + return; + } + + /* * The counterpart of the following dec_and_test, implied mb, * worklist not empty test sequence is in insert_work(). * Please read comment there. @@ -1350,7 +1365,7 @@ static void insert_work(struct pool_workqueue *pwq, struct work_struct *work, struct worker_pool *pool = pwq->pool; /* record the work call stack in order to print it in KASAN reports */ - kasan_record_aux_stack(work); + kasan_record_aux_stack_noalloc(work); /* we own @work, set data and link */ set_work_pwq(work, pwq, extra_flags); @@ -1530,7 +1545,8 @@ out: * @work: work to queue * * We queue the work to a specific CPU, the caller must ensure it - * can't go away. + * can't go away. Callers that fail to ensure that the specified + * CPU cannot go away will execute on a randomly chosen CPU. * * Return: %false if @work was already on a queue, %true otherwise. */ @@ -1810,14 +1826,8 @@ static void worker_enter_idle(struct worker *worker) if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); - /* - * Sanity check nr_running. Because unbind_workers() releases - * pool->lock between setting %WORKER_UNBOUND and zapping - * nr_running, the warning may trigger spuriously. Check iff - * unbind is not in progress. - */ - WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) && - pool->nr_workers == pool->nr_idle && + /* Sanity check nr_running. */ + WARN_ON_ONCE(pool->nr_workers == pool->nr_idle && atomic_read(&pool->nr_running)); } @@ -4447,7 +4457,7 @@ void destroy_workqueue(struct workqueue_struct *wq) raw_spin_unlock_irq(&pwq->pool->lock); mutex_unlock(&wq->mutex); mutex_unlock(&wq_pool_mutex); - show_workqueue_state(); + show_one_workqueue(wq); return; } raw_spin_unlock_irq(&pwq->pool->lock); @@ -4797,97 +4807,116 @@ static void show_pwq(struct pool_workqueue *pwq) } /** - * show_workqueue_state - dump workqueue state - * - * Called from a sysrq handler or try_to_freeze_tasks() and prints out - * all busy workqueues and pools. + * show_one_workqueue - dump state of specified workqueue + * @wq: workqueue whose state will be printed */ -void show_workqueue_state(void) +void show_one_workqueue(struct workqueue_struct *wq) { - struct workqueue_struct *wq; - struct worker_pool *pool; + struct pool_workqueue *pwq; + bool idle = true; unsigned long flags; - int pi; - - rcu_read_lock(); - - pr_info("Showing busy workqueues and worker pools:\n"); - - list_for_each_entry_rcu(wq, &workqueues, list) { - struct pool_workqueue *pwq; - bool idle = true; - for_each_pwq(pwq, wq) { - if (pwq->nr_active || !list_empty(&pwq->inactive_works)) { - idle = false; - break; - } + for_each_pwq(pwq, wq) { + if (pwq->nr_active || !list_empty(&pwq->inactive_works)) { + idle = false; + break; } - if (idle) - continue; + } + if (idle) /* Nothing to print for idle workqueue */ + return; - pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags); + pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags); - for_each_pwq(pwq, wq) { - raw_spin_lock_irqsave(&pwq->pool->lock, flags); - if (pwq->nr_active || !list_empty(&pwq->inactive_works)) { - /* - * Defer printing to avoid deadlocks in console - * drivers that queue work while holding locks - * also taken in their write paths. - */ - printk_deferred_enter(); - show_pwq(pwq); - printk_deferred_exit(); - } - raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); + for_each_pwq(pwq, wq) { + raw_spin_lock_irqsave(&pwq->pool->lock, flags); + if (pwq->nr_active || !list_empty(&pwq->inactive_works)) { /* - * We could be printing a lot from atomic context, e.g. - * sysrq-t -> show_workqueue_state(). Avoid triggering - * hard lockup. + * Defer printing to avoid deadlocks in console + * drivers that queue work while holding locks + * also taken in their write paths. */ - touch_nmi_watchdog(); - } - } - - for_each_pool(pool, pi) { - struct worker *worker; - bool first = true; - - raw_spin_lock_irqsave(&pool->lock, flags); - if (pool->nr_workers == pool->nr_idle) - goto next_pool; - /* - * Defer printing to avoid deadlocks in console drivers that - * queue work while holding locks also taken in their write - * paths. - */ - printk_deferred_enter(); - pr_info("pool %d:", pool->id); - pr_cont_pool_info(pool); - pr_cont(" hung=%us workers=%d", - jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000, - pool->nr_workers); - if (pool->manager) - pr_cont(" manager: %d", - task_pid_nr(pool->manager->task)); - list_for_each_entry(worker, &pool->idle_list, entry) { - pr_cont(" %s%d", first ? "idle: " : "", - task_pid_nr(worker->task)); - first = false; + printk_deferred_enter(); + show_pwq(pwq); + printk_deferred_exit(); } - pr_cont("\n"); - printk_deferred_exit(); - next_pool: - raw_spin_unlock_irqrestore(&pool->lock, flags); + raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); /* * We could be printing a lot from atomic context, e.g. - * sysrq-t -> show_workqueue_state(). Avoid triggering + * sysrq-t -> show_all_workqueues(). Avoid triggering * hard lockup. */ touch_nmi_watchdog(); } +} + +/** + * show_one_worker_pool - dump state of specified worker pool + * @pool: worker pool whose state will be printed + */ +static void show_one_worker_pool(struct worker_pool *pool) +{ + struct worker *worker; + bool first = true; + unsigned long flags; + + raw_spin_lock_irqsave(&pool->lock, flags); + if (pool->nr_workers == pool->nr_idle) + goto next_pool; + /* + * Defer printing to avoid deadlocks in console drivers that + * queue work while holding locks also taken in their write + * paths. + */ + printk_deferred_enter(); + pr_info("pool %d:", pool->id); + pr_cont_pool_info(pool); + pr_cont(" hung=%us workers=%d", + jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000, + pool->nr_workers); + if (pool->manager) + pr_cont(" manager: %d", + task_pid_nr(pool->manager->task)); + list_for_each_entry(worker, &pool->idle_list, entry) { + pr_cont(" %s%d", first ? "idle: " : "", + task_pid_nr(worker->task)); + first = false; + } + pr_cont("\n"); + printk_deferred_exit(); +next_pool: + raw_spin_unlock_irqrestore(&pool->lock, flags); + /* + * We could be printing a lot from atomic context, e.g. + * sysrq-t -> show_all_workqueues(). Avoid triggering + * hard lockup. + */ + touch_nmi_watchdog(); + +} + +/** + * show_all_workqueues - dump workqueue state + * + * Called from a sysrq handler or try_to_freeze_tasks() and prints out + * all busy workqueues and pools. + */ +void show_all_workqueues(void) +{ + struct workqueue_struct *wq; + struct worker_pool *pool; + int pi; + + rcu_read_lock(); + + pr_info("Showing busy workqueues and worker pools:\n"); + + list_for_each_entry_rcu(wq, &workqueues, list) + show_one_workqueue(wq); + + for_each_pool(pool, pi) + show_one_worker_pool(pool); + rcu_read_unlock(); } @@ -4959,38 +4988,22 @@ static void unbind_workers(int cpu) /* * We've blocked all attach/detach operations. Make all workers * unbound and set DISASSOCIATED. Before this, all workers - * except for the ones which are still executing works from - * before the last CPU down must be on the cpu. After - * this, they may become diasporas. + * must be on the cpu. After this, they may become diasporas. + * And the preemption disabled section in their sched callbacks + * are guaranteed to see WORKER_UNBOUND since the code here + * is on the same cpu. */ for_each_pool_worker(worker, pool) worker->flags |= WORKER_UNBOUND; pool->flags |= POOL_DISASSOCIATED; - raw_spin_unlock_irq(&pool->lock); - - for_each_pool_worker(worker, pool) { - kthread_set_per_cpu(worker->task, -1); - WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0); - } - - mutex_unlock(&wq_pool_attach_mutex); - /* - * Call schedule() so that we cross rq->lock and thus can - * guarantee sched callbacks see the %WORKER_UNBOUND flag. - * This is necessary as scheduler callbacks may be invoked - * from other cpus. - */ - schedule(); - - /* - * Sched callbacks are disabled now. Zap nr_running. - * After this, nr_running stays zero and need_more_worker() - * and keep_working() are always true as long as the - * worklist is not empty. This pool now behaves as an - * unbound (in terms of concurrency management) pool which + * The handling of nr_running in sched callbacks are disabled + * now. Zap nr_running. After this, nr_running stays zero and + * need_more_worker() and keep_working() are always true as + * long as the worklist is not empty. This pool now behaves as + * an unbound (in terms of concurrency management) pool which * are served by workers tied to the pool. */ atomic_set(&pool->nr_running, 0); @@ -5000,9 +5013,16 @@ static void unbind_workers(int cpu) * worker blocking could lead to lengthy stalls. Kick off * unbound chain execution of currently pending work items. */ - raw_spin_lock_irq(&pool->lock); wake_up_worker(pool); + raw_spin_unlock_irq(&pool->lock); + + for_each_pool_worker(worker, pool) { + kthread_set_per_cpu(worker->task, -1); + WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0); + } + + mutex_unlock(&wq_pool_attach_mutex); } } @@ -5039,17 +5059,6 @@ static void rebind_workers(struct worker_pool *pool) unsigned int worker_flags = worker->flags; /* - * A bound idle worker should actually be on the runqueue - * of the associated CPU for local wake-ups targeting it to - * work. Kick all idle workers so that they migrate to the - * associated CPU. Doing this in the same loop as - * replacing UNBOUND with REBOUND is safe as no worker will - * be bound before @pool->lock is released. - */ - if (worker_flags & WORKER_IDLE) - wake_up_process(worker->task); - - /* * We want to clear UNBOUND but can't directly call * worker_clr_flags() or adjust nr_running. Atomically * replace UNBOUND with another NOT_RUNNING flag REBOUND. @@ -5384,9 +5393,6 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) int ret = -EINVAL; cpumask_var_t saved_cpumask; - if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) - return -ENOMEM; - /* * Not excluding isolated cpus on purpose. * If the user wishes to include them, we allow that. @@ -5394,6 +5400,15 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) cpumask_and(cpumask, cpumask, cpu_possible_mask); if (!cpumask_empty(cpumask)) { apply_wqattrs_lock(); + if (cpumask_equal(cpumask, wq_unbound_cpumask)) { + ret = 0; + goto out_unlock; + } + + if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) { + ret = -ENOMEM; + goto out_unlock; + } /* save the old wq_unbound_cpumask. */ cpumask_copy(saved_cpumask, wq_unbound_cpumask); @@ -5406,10 +5421,11 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) if (ret < 0) cpumask_copy(wq_unbound_cpumask, saved_cpumask); + free_cpumask_var(saved_cpumask); +out_unlock: apply_wqattrs_unlock(); } - free_cpumask_var(saved_cpumask); return ret; } @@ -5869,7 +5885,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) rcu_read_unlock(); if (lockup_detected) - show_workqueue_state(); + show_all_workqueues(); wq_watchdog_reset_touched(); mod_timer(&wq_watchdog_timer, jiffies + thresh); |