From 77222b0d12e8ae6f082261842174cc2e981bf99c Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Sun, 24 Mar 2024 00:45:49 +0000 Subject: sched/topology: Export asym_cap_list So that we can use it to iterate through available capacities in the system. Sort asym_cap_list in descending order as expected users are likely to be interested on the highest capacity first. Make the list RCU protected to allow for cheap access in hot paths. Signed-off-by: Qais Yousef Signed-off-by: Ingo Molnar Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20240324004552.999936-2-qyousef@layalina.io --- kernel/sched/topology.c | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) (limited to 'kernel/sched/topology.c') diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 99ea5986038c..44ed3d0812ab 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1329,24 +1329,13 @@ next: update_group_capacity(sd, cpu); } -/* - * Asymmetric CPU capacity bits - */ -struct asym_cap_data { - struct list_head link; - unsigned long capacity; - unsigned long cpus[]; -}; - /* * Set of available CPUs grouped by their corresponding capacities * Each list entry contains a CPU mask reflecting CPUs that share the same * capacity. * The lifespan of data is unlimited. */ -static LIST_HEAD(asym_cap_list); - -#define cpu_capacity_span(asym_data) to_cpumask((asym_data)->cpus) +LIST_HEAD(asym_cap_list); /* * Verify whether there is any CPU capacity asymmetry in a given sched domain. @@ -1386,21 +1375,39 @@ asym_cpu_capacity_classify(const struct cpumask *sd_span, } +static void free_asym_cap_entry(struct rcu_head *head) +{ + struct asym_cap_data *entry = container_of(head, struct asym_cap_data, rcu); + kfree(entry); +} + static inline void asym_cpu_capacity_update_data(int cpu) { unsigned long capacity = arch_scale_cpu_capacity(cpu); - struct asym_cap_data *entry = NULL; + struct asym_cap_data *insert_entry = NULL; + struct asym_cap_data *entry; + /* + * Search if capacity already exits. If not, track which the entry + * where we should insert to keep the list ordered descendingly. + */ list_for_each_entry(entry, &asym_cap_list, link) { if (capacity == entry->capacity) goto done; + else if (!insert_entry && capacity > entry->capacity) + insert_entry = list_prev_entry(entry, link); } entry = kzalloc(sizeof(*entry) + cpumask_size(), GFP_KERNEL); if (WARN_ONCE(!entry, "Failed to allocate memory for asymmetry data\n")) return; entry->capacity = capacity; - list_add(&entry->link, &asym_cap_list); + + /* If NULL then the new capacity is the smallest, add last. */ + if (!insert_entry) + list_add_tail_rcu(&entry->link, &asym_cap_list); + else + list_add_rcu(&entry->link, &insert_entry->link); done: __cpumask_set_cpu(cpu, cpu_capacity_span(entry)); } @@ -1423,8 +1430,8 @@ static void asym_cpu_capacity_scan(void) list_for_each_entry_safe(entry, next, &asym_cap_list, link) { if (cpumask_empty(cpu_capacity_span(entry))) { - list_del(&entry->link); - kfree(entry); + list_del_rcu(&entry->link); + call_rcu(&entry->rcu, free_asym_cap_entry); } } @@ -1434,8 +1441,8 @@ static void asym_cpu_capacity_scan(void) */ if (list_is_singular(&asym_cap_list)) { entry = list_first_entry(&asym_cap_list, typeof(*entry), link); - list_del(&entry->link); - kfree(entry); + list_del_rcu(&entry->link); + call_rcu(&entry->rcu, free_asym_cap_entry); } } -- cgit v1.2.3-59-g8ed1b From fa427e8e53d8db15090af7e952a55870dc2a453f Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Sun, 24 Mar 2024 00:45:51 +0000 Subject: sched/topology: Remove root_domain::max_cpu_capacity The value is no longer used as we now keep track of max_allowed_capacity for each task instead. Signed-off-by: Qais Yousef Signed-off-by: Ingo Molnar Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20240324004552.999936-4-qyousef@layalina.io --- kernel/sched/sched.h | 2 -- kernel/sched/topology.c | 13 ++----------- 2 files changed, 2 insertions(+), 13 deletions(-) (limited to 'kernel/sched/topology.c') diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f77c00dddfe1..4f9e952d4fad 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -917,8 +917,6 @@ struct root_domain { cpumask_var_t rto_mask; struct cpupri cpupri; - unsigned long max_cpu_capacity; - /* * NULL-terminated list of performance domains intersecting with the * CPUs of the rd. Protected by RCU. diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 44ed3d0812ab..63aecd2a7a9f 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -2514,16 +2514,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att /* Attach the domains */ rcu_read_lock(); for_each_cpu(i, cpu_map) { - unsigned long capacity; - rq = cpu_rq(i); sd = *per_cpu_ptr(d.sd, i); - capacity = arch_scale_cpu_capacity(i); - /* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */ - if (capacity > READ_ONCE(d.rd->max_cpu_capacity)) - WRITE_ONCE(d.rd->max_cpu_capacity, capacity); - cpu_attach_domain(sd, d.rd, i); if (lowest_flag_domain(i, SD_CLUSTER)) @@ -2537,10 +2530,8 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att if (has_cluster) static_branch_inc_cpuslocked(&sched_cluster_active); - if (rq && sched_debug_verbose) { - pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n", - cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity); - } + if (rq && sched_debug_verbose) + pr_info("root domain span: %*pbl\n", cpumask_pr_args(cpu_map)); ret = 0; error: -- cgit v1.2.3-59-g8ed1b From f532376e881fb370bc2901a9e1cf0b7e461638ad Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Tue, 27 Jun 2023 15:30:21 +0200 Subject: scheduler: Remove the now superfluous sentinel elements from ctl_table array This commit comes at the tail end of a greater effort to remove the empty elements at the end of the ctl_table arrays (sentinels) which will reduce the overall build time size of the kernel and run time memory bloat by ~64 bytes per sentinel (further information Link : https://lore.kernel.org/all/ZO5Yx5JFogGi%2FcBo@bombadil.infradead.org/) rm sentinel element from ctl_table arrays Acked-by: Peter Zijlstra (Intel) Tested-by: Valentin Schneider Reviewed-by: Valentin Schneider Signed-off-by: Joel Granados --- kernel/sched/autogroup.c | 1 - kernel/sched/core.c | 1 - kernel/sched/deadline.c | 1 - kernel/sched/fair.c | 1 - kernel/sched/rt.c | 1 - kernel/sched/topology.c | 1 - 6 files changed, 6 deletions(-) (limited to 'kernel/sched/topology.c') diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c index 991fc9002535..db68a964e34e 100644 --- a/kernel/sched/autogroup.c +++ b/kernel/sched/autogroup.c @@ -19,7 +19,6 @@ static struct ctl_table sched_autogroup_sysctls[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - {} }; static void __init sched_autogroup_sysctl_init(void) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7019a40457a6..7ce76620a308 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4741,7 +4741,6 @@ static struct ctl_table sched_core_sysctls[] = { .extra2 = SYSCTL_FOUR, }, #endif /* CONFIG_NUMA_BALANCING */ - {} }; static int __init sched_core_sysctl_init(void) { diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index a04a436af8cc..c75d1307d86d 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -43,7 +43,6 @@ static struct ctl_table sched_dl_sysctls[] = { .proc_handler = proc_douintvec_minmax, .extra2 = (void *)&sysctl_sched_dl_period_max, }, - {} }; static int __init sched_dl_sysctl_init(void) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 03be0d1330a6..4ac2cf7a918e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -157,7 +157,6 @@ static struct ctl_table sched_fair_sysctls[] = { .extra1 = SYSCTL_ZERO, }, #endif /* CONFIG_NUMA_BALANCING */ - {} }; static int __init sched_fair_sysctl_init(void) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 3261b067b67e..aa4c1c874fa4 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -56,7 +56,6 @@ static struct ctl_table sched_rt_sysctls[] = { .mode = 0644, .proc_handler = sched_rr_handler, }, - {} }; static int __init sched_rt_sysctl_init(void) diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 99ea5986038c..42c22648d124 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -322,7 +322,6 @@ static struct ctl_table sched_energy_aware_sysctls[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - {} }; static int __init sched_energy_aware_sysctl_init(void) -- cgit v1.2.3-59-g8ed1b From 05037e5f0f17935a86861f9610f941ebf346a95e Mon Sep 17 00:00:00 2001 From: Kyle Meyer Date: Wed, 10 Apr 2024 16:33:11 -0500 Subject: sched/topology: Optimize topology_span_sane() Optimize topology_span_sane() by removing duplicate comparisons. Since topology_span_sane() is called inside of for_each_cpu(), each previous CPU has already been compared against every other CPU. The current CPU only needs to be compared against higher-numbered CPUs. The total number of comparisons is reduced from N * (N - 1) to N * (N - 1) / 2 on each non-NUMA scheduling domain level. Signed-off-by: Kyle Meyer Reviewed-by: Yury Norov Acked-by: Vincent Guittot Signed-off-by: Yury Norov --- kernel/sched/topology.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel/sched/topology.c') diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 99ea5986038c..b6bcafc09969 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -2347,7 +2347,7 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve static bool topology_span_sane(struct sched_domain_topology_level *tl, const struct cpumask *cpu_map, int cpu) { - int i; + int i = cpu + 1; /* NUMA levels are allowed to overlap */ if (tl->flags & SDTL_OVERLAP) @@ -2359,9 +2359,7 @@ static bool topology_span_sane(struct sched_domain_topology_level *tl, * breaking the sched_group lists - i.e. a later get_group() pass * breaks the linking done for an earlier span. */ - for_each_cpu(i, cpu_map) { - if (i == cpu) - continue; + for_each_cpu_from(i, cpu_map) { /* * We should 'and' all those masks with 'cpu_map' to exactly * match the topology we're about to build, but that can only -- cgit v1.2.3-59-g8ed1b From a1fd0b9d751f840df23ef0e75b691fc00cfd4743 Mon Sep 17 00:00:00 2001 From: Vitalii Bursov Date: Tue, 30 Apr 2024 18:05:23 +0300 Subject: sched/fair: Allow disabling sched_balance_newidle with sched_relax_domain_level Change relax_domain_level checks so that it would be possible to include or exclude all domains from newidle balancing. This matches the behavior described in the documentation: -1 no request. use system default or follow request of others. 0 no search. 1 search siblings (hyperthreads in a core). "2" enables levels 0 and 1, level_max excludes the last (level_max) level, and level_max+1 includes all levels. Fixes: 1d3504fcf560 ("sched, cpuset: customize sched domains, core") Signed-off-by: Vitalii Bursov Signed-off-by: Ingo Molnar Tested-by: Dietmar Eggemann Reviewed-by: Vincent Guittot Reviewed-by: Valentin Schneider Link: https://lore.kernel.org/r/bd6de28e80073c79466ec6401cdeae78f0d4423d.1714488502.git.vitaly@bursov.com --- kernel/cgroup/cpuset.c | 2 +- kernel/sched/topology.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sched/topology.c') diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 4237c8748715..da24187c4e02 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2948,7 +2948,7 @@ bool current_cpuset_is_being_rebound(void) static int update_relax_domain_level(struct cpuset *cs, s64 val) { #ifdef CONFIG_SMP - if (val < -1 || val >= sched_domain_level_max) + if (val < -1 || val > sched_domain_level_max + 1) return -EINVAL; #endif diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 63aecd2a7a9f..67a777b31743 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1475,7 +1475,7 @@ static void set_domain_attribute(struct sched_domain *sd, } else request = attr->relax_domain_level; - if (sd->level > request) { + if (sd->level >= request) { /* Turn off idle balance on this domain: */ sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); } -- cgit v1.2.3-59-g8ed1b