diff options
author | 2025-05-08 15:59:50 -0400 | |
---|---|---|
committer | 2025-05-14 17:45:22 +0800 | |
commit | 714ca27e9bf4608fcb1f627cd5599441f448771e (patch) | |
tree | a6d186d3d7dc44b19fa81b4db15ef4b52f010d7d /drivers/crypto | |
parent | crypto: qat/qat_6xxx - Fix NULL vs IS_ERR() check in adf_probe() (diff) | |
download | linux-rng-714ca27e9bf4608fcb1f627cd5599441f448771e.tar.xz linux-rng-714ca27e9bf4608fcb1f627cd5599441f448771e.zip |
crypto: iaa - Optimize rebalance_wq_table()
The function opencodes for_each_cpu() by using a plain for-loop. The
loop calls cpumask_weight() inside the conditional section. Because
cpumask_weight() is O(1), the overall complexity of the function is
O(node * node_cpus^2). Also, cpumask_nth() internally calls hweight(),
which, if not hardware accelerated, is slower than cpumask_next() in
for_each_cpu().
If switched to the dedicated for_each_cpu(), the rebalance_wq_table()
can drop calling cpumask_weight(), together with some housekeeping code.
This makes the overall complexity O(node * node_cpus), or simply speaking
O(nr_cpu_ids).
While there, fix opencoded for_each_possible_cpu() too.
Signed-off-by: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'drivers/crypto')
-rw-r--r-- | drivers/crypto/intel/iaa/iaa_crypto_main.c | 35 |
1 files changed, 14 insertions, 21 deletions
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index e9c762124f11..23f585219fb4 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -894,7 +894,7 @@ out: static void rebalance_wq_table(void) { const struct cpumask *node_cpus; - int node, cpu, iaa = -1; + int node_cpu, node, cpu, iaa = 0; if (nr_iaa == 0) return; @@ -905,36 +905,29 @@ static void rebalance_wq_table(void) clear_wq_table(); if (nr_iaa == 1) { - for (cpu = 0; cpu < nr_cpus; cpu++) { - if (WARN_ON(wq_table_add_wqs(0, cpu))) { - pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu); - return; - } + for_each_possible_cpu(cpu) { + if (WARN_ON(wq_table_add_wqs(0, cpu))) + goto err; } return; } for_each_node_with_cpus(node) { + cpu = 0; node_cpus = cpumask_of_node(node); - for (cpu = 0; cpu < cpumask_weight(node_cpus); cpu++) { - int node_cpu = cpumask_nth(cpu, node_cpus); - - if (WARN_ON(node_cpu >= nr_cpu_ids)) { - pr_debug("node_cpu %d doesn't exist!\n", node_cpu); - return; - } - - if ((cpu % cpus_per_iaa) == 0) - iaa++; - - if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) { - pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu); - return; - } + for_each_cpu(node_cpu, node_cpus) { + iaa = cpu / cpus_per_iaa; + if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) + goto err; + cpu++; } } + + return; +err: + pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu); } static inline int check_completion(struct device *dev, |