aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/crypto
diff options
context:
space:
mode:
authorYury Norov <yury.norov@gmail.com>2025-05-08 15:59:50 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2025-05-14 17:45:22 +0800
commit714ca27e9bf4608fcb1f627cd5599441f448771e (patch)
treea6d186d3d7dc44b19fa81b4db15ef4b52f010d7d /drivers/crypto
parentcrypto: qat/qat_6xxx - Fix NULL vs IS_ERR() check in adf_probe() (diff)
downloadlinux-rng-714ca27e9bf4608fcb1f627cd5599441f448771e.tar.xz
linux-rng-714ca27e9bf4608fcb1f627cd5599441f448771e.zip
crypto: iaa - Optimize rebalance_wq_table()
The function opencodes for_each_cpu() by using a plain for-loop. The loop calls cpumask_weight() inside the conditional section. Because cpumask_weight() is O(1), the overall complexity of the function is O(node * node_cpus^2). Also, cpumask_nth() internally calls hweight(), which, if not hardware accelerated, is slower than cpumask_next() in for_each_cpu(). If switched to the dedicated for_each_cpu(), the rebalance_wq_table() can drop calling cpumask_weight(), together with some housekeeping code. This makes the overall complexity O(node * node_cpus), or simply speaking O(nr_cpu_ids). While there, fix opencoded for_each_possible_cpu() too. Signed-off-by: Yury Norov <yury.norov@gmail.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'drivers/crypto')
-rw-r--r--drivers/crypto/intel/iaa/iaa_crypto_main.c35
1 files changed, 14 insertions, 21 deletions
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index e9c762124f11..23f585219fb4 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -894,7 +894,7 @@ out:
static void rebalance_wq_table(void)
{
const struct cpumask *node_cpus;
- int node, cpu, iaa = -1;
+ int node_cpu, node, cpu, iaa = 0;
if (nr_iaa == 0)
return;
@@ -905,36 +905,29 @@ static void rebalance_wq_table(void)
clear_wq_table();
if (nr_iaa == 1) {
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- if (WARN_ON(wq_table_add_wqs(0, cpu))) {
- pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu);
- return;
- }
+ for_each_possible_cpu(cpu) {
+ if (WARN_ON(wq_table_add_wqs(0, cpu)))
+ goto err;
}
return;
}
for_each_node_with_cpus(node) {
+ cpu = 0;
node_cpus = cpumask_of_node(node);
- for (cpu = 0; cpu < cpumask_weight(node_cpus); cpu++) {
- int node_cpu = cpumask_nth(cpu, node_cpus);
-
- if (WARN_ON(node_cpu >= nr_cpu_ids)) {
- pr_debug("node_cpu %d doesn't exist!\n", node_cpu);
- return;
- }
-
- if ((cpu % cpus_per_iaa) == 0)
- iaa++;
-
- if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) {
- pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
- return;
- }
+ for_each_cpu(node_cpu, node_cpus) {
+ iaa = cpu / cpus_per_iaa;
+ if (WARN_ON(wq_table_add_wqs(iaa, node_cpu)))
+ goto err;
+ cpu++;
}
}
+
+ return;
+err:
+ pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
}
static inline int check_completion(struct device *dev,