aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/topology.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/topology.c')
-rw-r--r--kernel/sched/topology.c111
1 files changed, 95 insertions, 16 deletions
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 56a0fed30c0a..9d74371e4aad 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -7,8 +7,8 @@
DEFINE_MUTEX(sched_domains_mutex);
/* Protected by sched_domains_mutex: */
-cpumask_var_t sched_domains_tmpmask;
-cpumask_var_t sched_domains_tmpmask2;
+static cpumask_var_t sched_domains_tmpmask;
+static cpumask_var_t sched_domains_tmpmask2;
#ifdef CONFIG_SCHED_DEBUG
@@ -398,6 +398,7 @@ DEFINE_PER_CPU(int, sd_llc_id);
DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
DEFINE_PER_CPU(struct sched_domain *, sd_numa);
DEFINE_PER_CPU(struct sched_domain *, sd_asym);
+DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
static void update_top_cache_domain(int cpu)
{
@@ -692,6 +693,7 @@ static void init_overlap_sched_group(struct sched_domain *sd,
sg_span = sched_group_span(sg);
sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
+ sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
}
static int
@@ -851,6 +853,7 @@ static struct sched_group *get_group(int cpu, struct sd_data *sdd)
sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));
sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
+ sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
return sg;
}
@@ -1061,7 +1064,6 @@ static struct cpumask ***sched_domains_numa_masks;
* SD_SHARE_PKG_RESOURCES - describes shared caches
* SD_NUMA - describes NUMA topologies
* SD_SHARE_POWERDOMAIN - describes shared power domain
- * SD_ASYM_CPUCAPACITY - describes mixed capacity topologies
*
* Odd one out, which beside describing the topology has a quirk also
* prescribes the desired behaviour that goes along with it:
@@ -1073,13 +1075,12 @@ static struct cpumask ***sched_domains_numa_masks;
SD_SHARE_PKG_RESOURCES | \
SD_NUMA | \
SD_ASYM_PACKING | \
- SD_ASYM_CPUCAPACITY | \
SD_SHARE_POWERDOMAIN)
static struct sched_domain *
sd_init(struct sched_domain_topology_level *tl,
const struct cpumask *cpu_map,
- struct sched_domain *child, int cpu)
+ struct sched_domain *child, int dflags, int cpu)
{
struct sd_data *sdd = &tl->data;
struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
@@ -1100,6 +1101,9 @@ sd_init(struct sched_domain_topology_level *tl,
"wrong sd_flags in topology description\n"))
sd_flags &= ~TOPOLOGY_SD_FLAGS;
+ /* Apply detected topology flags */
+ sd_flags |= dflags;
+
*sd = (struct sched_domain){
.min_interval = sd_weight,
.max_interval = 2*sd_weight,
@@ -1122,7 +1126,7 @@ sd_init(struct sched_domain_topology_level *tl,
| 0*SD_SHARE_CPUCAPACITY
| 0*SD_SHARE_PKG_RESOURCES
| 0*SD_SERIALIZE
- | 0*SD_PREFER_SIBLING
+ | 1*SD_PREFER_SIBLING
| 0*SD_NUMA
| sd_flags
,
@@ -1148,17 +1152,21 @@ sd_init(struct sched_domain_topology_level *tl,
if (sd->flags & SD_ASYM_CPUCAPACITY) {
struct sched_domain *t = sd;
+ /*
+ * Don't attempt to spread across CPUs of different capacities.
+ */
+ if (sd->child)
+ sd->child->flags &= ~SD_PREFER_SIBLING;
+
for_each_lower_domain(t)
t->flags |= SD_BALANCE_WAKE;
}
if (sd->flags & SD_SHARE_CPUCAPACITY) {
- sd->flags |= SD_PREFER_SIBLING;
sd->imbalance_pct = 110;
sd->smt_gain = 1178; /* ~15% */
} else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
- sd->flags |= SD_PREFER_SIBLING;
sd->imbalance_pct = 117;
sd->cache_nice_tries = 1;
sd->busy_idx = 2;
@@ -1169,6 +1177,7 @@ sd_init(struct sched_domain_topology_level *tl,
sd->busy_idx = 3;
sd->idle_idx = 2;
+ sd->flags &= ~SD_PREFER_SIBLING;
sd->flags |= SD_SERIALIZE;
if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) {
sd->flags &= ~(SD_BALANCE_EXEC |
@@ -1178,7 +1187,6 @@ sd_init(struct sched_domain_topology_level *tl,
#endif
} else {
- sd->flags |= SD_PREFER_SIBLING;
sd->cache_nice_tries = 1;
sd->busy_idx = 2;
sd->idle_idx = 1;
@@ -1295,7 +1303,7 @@ static void init_numa_topology_type(void)
n = sched_max_numa_distance;
- if (sched_domains_numa_levels <= 1) {
+ if (sched_domains_numa_levels <= 2) {
sched_numa_topology_type = NUMA_DIRECT;
return;
}
@@ -1380,9 +1388,6 @@ void sched_init_numa(void)
break;
}
- if (!level)
- return;
-
/*
* 'level' contains the number of unique distances
*
@@ -1607,9 +1612,9 @@ static void __sdt_free(const struct cpumask *cpu_map)
static struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
const struct cpumask *cpu_map, struct sched_domain_attr *attr,
- struct sched_domain *child, int cpu)
+ struct sched_domain *child, int dflags, int cpu)
{
- struct sched_domain *sd = sd_init(tl, cpu_map, child, cpu);
+ struct sched_domain *sd = sd_init(tl, cpu_map, child, dflags, cpu);
if (child) {
sd->level = child->level + 1;
@@ -1636,6 +1641,65 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve
}
/*
+ * Find the sched_domain_topology_level where all CPU capacities are visible
+ * for all CPUs.
+ */
+static struct sched_domain_topology_level
+*asym_cpu_capacity_level(const struct cpumask *cpu_map)
+{
+ int i, j, asym_level = 0;
+ bool asym = false;
+ struct sched_domain_topology_level *tl, *asym_tl = NULL;
+ unsigned long cap;
+
+ /* Is there any asymmetry? */
+ cap = arch_scale_cpu_capacity(NULL, cpumask_first(cpu_map));
+
+ for_each_cpu(i, cpu_map) {
+ if (arch_scale_cpu_capacity(NULL, i) != cap) {
+ asym = true;
+ break;
+ }
+ }
+
+ if (!asym)
+ return NULL;
+
+ /*
+ * Examine topology from all CPU's point of views to detect the lowest
+ * sched_domain_topology_level where a highest capacity CPU is visible
+ * to everyone.
+ */
+ for_each_cpu(i, cpu_map) {
+ unsigned long max_capacity = arch_scale_cpu_capacity(NULL, i);
+ int tl_id = 0;
+
+ for_each_sd_topology(tl) {
+ if (tl_id < asym_level)
+ goto next_level;
+
+ for_each_cpu_and(j, tl->mask(i), cpu_map) {
+ unsigned long capacity;
+
+ capacity = arch_scale_cpu_capacity(NULL, j);
+
+ if (capacity <= max_capacity)
+ continue;
+
+ max_capacity = capacity;
+ asym_level = tl_id;
+ asym_tl = tl;
+ }
+next_level:
+ tl_id++;
+ }
+ }
+
+ return asym_tl;
+}
+
+
+/*
* Build sched domains for a given set of CPUs and attach the sched domains
* to the individual CPUs
*/
@@ -1647,18 +1711,30 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
struct s_data d;
struct rq *rq = NULL;
int i, ret = -ENOMEM;
+ struct sched_domain_topology_level *tl_asym;
+ bool has_asym = false;
alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
if (alloc_state != sa_rootdomain)
goto error;
+ tl_asym = asym_cpu_capacity_level(cpu_map);
+
/* Set up domains for CPUs specified by the cpu_map: */
for_each_cpu(i, cpu_map) {
struct sched_domain_topology_level *tl;
sd = NULL;
for_each_sd_topology(tl) {
- sd = build_sched_domain(tl, cpu_map, attr, sd, i);
+ int dflags = 0;
+
+ if (tl == tl_asym) {
+ dflags |= SD_ASYM_CPUCAPACITY;
+ has_asym = true;
+ }
+
+ sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i);
+
if (tl == sched_domain_topology)
*per_cpu_ptr(d.sd, i) = sd;
if (tl->flags & SDTL_OVERLAP)
@@ -1707,6 +1783,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
}
rcu_read_unlock();
+ if (has_asym)
+ static_branch_enable_cpuslocked(&sched_asym_cpucapacity);
+
if (rq && sched_debug_enabled) {
pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);