mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 14:53:58 -04:00
crypto: iaa - Optimize rebalance_wq_table()
The function opencodes for_each_cpu() by using a plain for-loop. The loop calls cpumask_weight() inside the conditional section. Because cpumask_weight() is O(1), the overall complexity of the function is O(node * node_cpus^2). Also, cpumask_nth() internally calls hweight(), which, if not hardware accelerated, is slower than cpumask_next() in for_each_cpu(). If switched to the dedicated for_each_cpu(), the rebalance_wq_table() can drop calling cpumask_weight(), together with some housekeeping code. This makes the overall complexity O(node * node_cpus), or simply speaking O(nr_cpu_ids). While there, fix opencoded for_each_possible_cpu() too. Signed-off-by: Yury Norov <yury.norov@gmail.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
@@ -894,7 +894,7 @@ out:
|
||||
static void rebalance_wq_table(void)
|
||||
{
|
||||
const struct cpumask *node_cpus;
|
||||
int node, cpu, iaa = -1;
|
||||
int node_cpu, node, cpu, iaa = 0;
|
||||
|
||||
if (nr_iaa == 0)
|
||||
return;
|
||||
@@ -905,36 +905,29 @@ static void rebalance_wq_table(void)
|
||||
clear_wq_table();
|
||||
|
||||
if (nr_iaa == 1) {
|
||||
for (cpu = 0; cpu < nr_cpus; cpu++) {
|
||||
if (WARN_ON(wq_table_add_wqs(0, cpu))) {
|
||||
pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu);
|
||||
return;
|
||||
}
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (WARN_ON(wq_table_add_wqs(0, cpu)))
|
||||
goto err;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
for_each_node_with_cpus(node) {
|
||||
cpu = 0;
|
||||
node_cpus = cpumask_of_node(node);
|
||||
|
||||
for (cpu = 0; cpu < cpumask_weight(node_cpus); cpu++) {
|
||||
int node_cpu = cpumask_nth(cpu, node_cpus);
|
||||
|
||||
if (WARN_ON(node_cpu >= nr_cpu_ids)) {
|
||||
pr_debug("node_cpu %d doesn't exist!\n", node_cpu);
|
||||
return;
|
||||
}
|
||||
|
||||
if ((cpu % cpus_per_iaa) == 0)
|
||||
iaa++;
|
||||
|
||||
if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) {
|
||||
pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
|
||||
return;
|
||||
}
|
||||
for_each_cpu(node_cpu, node_cpus) {
|
||||
iaa = cpu / cpus_per_iaa;
|
||||
if (WARN_ON(wq_table_add_wqs(iaa, node_cpu)))
|
||||
goto err;
|
||||
cpu++;
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
err:
|
||||
pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
|
||||
}
|
||||
|
||||
static inline int check_completion(struct device *dev,
|
||||
|
||||
Reference in New Issue
Block a user