From bfde23ce200e6d33291d29b9b8b60cc2f30f0805 Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Thu, 5 Sep 2019 21:40:28 -0400 Subject: padata: unbind parallel jobs from specific CPUs Padata binds the parallel part of a job to a single CPU and round-robins over all CPUs in the system for each successive job. Though the serial parts rely on per-CPU queues for correct ordering, they're not necessary for parallel work, and it improves performance to run the job locally on NUMA machines and let the scheduler pick the CPU within a node on a busy system. So, make the parallel workqueue unbound. Update the parallel workqueue's cpumask when the instance's parallel cpumask changes. Now that parallel jobs no longer run on max_active=1 workqueues, two or more parallel works that hash to the same CPU may run simultaneously, finish out of order, and so be serialized out of order. Prevent this by keeping the works sorted on the reorder list by sequence number and checking that in the reordering logic. padata_get_next becomes padata_find_next so it can be reused for the end of padata_reorder, where it's used to avoid uselessly queueing work when the next job by sequence number isn't finished yet but a later job that hashed to the same CPU has. The ENODATA case in padata_find_next no longer makes sense because parallel jobs aren't bound to specific CPUs. The EINPROGRESS case takes care of the scenario where a parallel job is potentially running on the same CPU as padata_find_next, and with only one error code left, just use NULL instead. Signed-off-by: Daniel Jordan Cc: Herbert Xu Cc: Lai Jiangshan Cc: Peter Zijlstra Cc: Steffen Klassert Cc: Tejun Heo Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu --- include/linux/padata.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/padata.h') diff --git a/include/linux/padata.h b/include/linux/padata.h index e7978f8942ca..43d3fd9d17fc 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -35,6 +35,7 @@ struct padata_priv { struct parallel_data *pd; int cb_cpu; int cpu; + unsigned int seq_nr; int info; void (*parallel)(struct padata_priv *padata); void (*serial)(struct padata_priv *padata); @@ -105,6 +106,7 @@ struct padata_cpumask { * @reorder_objects: Number of objects waiting in the reorder queues. * @refcnt: Number of objects holding a reference on this parallel_data. * @max_seq_nr: Maximal used sequence number. + * @processed: Number of already processed objects. * @cpu: Next CPU to be processed. * @cpumask: The cpumasks in use for parallel and serial workers. * @reorder_work: work struct for reordering. @@ -117,6 +119,7 @@ struct parallel_data { atomic_t reorder_objects; atomic_t refcnt; atomic_t seq_nr; + unsigned int processed; int cpu; struct padata_cpumask cpumask; struct work_struct reorder_work; -- cgit v1.2.3-59-g8ed1b