aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/kernel/sched
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2019-11-11 08:34:59 +0100
committerIngo Molnar <mingo@kernel.org>2019-11-11 08:34:59 +0100
commit6d5a763c303bc9d78b17361d30b692ba2facf9b4 (patch)
tree1ba7059e8b1c47eb075acc906f8ef01f9c490ba1 /kernel/sched
parentleds: Use vtime aware kcpustat accessor to fetch CPUTIME_SYSTEM (diff)
parentLinux 5.4-rc7 (diff)
downloadwireguard-linux-6d5a763c303bc9d78b17361d30b692ba2facf9b4.tar.xz
wireguard-linux-6d5a763c303bc9d78b17361d30b692ba2facf9b4.zip
Merge tag 'v5.4-rc7' into sched/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c66
-rw-r--r--kernel/sched/deadline.c40
-rw-r--r--kernel/sched/fair.c15
-rw-r--r--kernel/sched/idle.c9
-rw-r--r--kernel/sched/rt.c37
-rw-r--r--kernel/sched/sched.h30
-rw-r--r--kernel/sched/stop_task.c18
-rw-r--r--kernel/sched/topology.c11
8 files changed, 129 insertions, 97 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7880f4f64d0e..0f2eb3629070 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1073,6 +1073,7 @@ uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
task_rq_unlock(rq, p, &rf);
}
+#ifdef CONFIG_UCLAMP_TASK_GROUP
static inline void
uclamp_update_active_tasks(struct cgroup_subsys_state *css,
unsigned int clamps)
@@ -1091,7 +1092,6 @@ uclamp_update_active_tasks(struct cgroup_subsys_state *css,
css_task_iter_end(&it);
}
-#ifdef CONFIG_UCLAMP_TASK_GROUP
static void cpu_util_update_eff(struct cgroup_subsys_state *css);
static void uclamp_update_root_tg(void)
{
@@ -3929,13 +3929,22 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
}
restart:
+#ifdef CONFIG_SMP
/*
- * Ensure that we put DL/RT tasks before the pick loop, such that they
- * can PULL higher prio tasks when we lower the RQ 'priority'.
+ * We must do the balancing pass before put_next_task(), such
+ * that when we release the rq->lock the task is in the same
+ * state as before we took rq->lock.
+ *
+ * We can terminate the balance pass as soon as we know there is
+ * a runnable task of @class priority or higher.
*/
- prev->sched_class->put_prev_task(rq, prev, rf);
- if (!rq->nr_running)
- newidle_balance(rq, rf);
+ for_class_range(class, prev->sched_class, &idle_sched_class) {
+ if (class->balance(rq, prev, rf))
+ break;
+ }
+#endif
+
+ put_prev_task(rq, prev);
for_each_class(class) {
p = class->pick_next_task(rq, NULL, NULL);
@@ -5106,9 +5115,6 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
u32 size;
int ret;
- if (!access_ok(uattr, SCHED_ATTR_SIZE_VER0))
- return -EFAULT;
-
/* Zero the full structure, so that a short copy will be nice: */
memset(attr, 0, sizeof(*attr));
@@ -5116,45 +5122,19 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
if (ret)
return ret;
- /* Bail out on silly large: */
- if (size > PAGE_SIZE)
- goto err_size;
-
/* ABI compatibility quirk: */
if (!size)
size = SCHED_ATTR_SIZE_VER0;
-
- if (size < SCHED_ATTR_SIZE_VER0)
+ if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE)
goto err_size;
- /*
- * If we're handed a bigger struct than we know of,
- * ensure all the unknown bits are 0 - i.e. new
- * user-space does not rely on any kernel feature
- * extensions we dont know about yet.
- */
- if (size > sizeof(*attr)) {
- unsigned char __user *addr;
- unsigned char __user *end;
- unsigned char val;
-
- addr = (void __user *)uattr + sizeof(*attr);
- end = (void __user *)uattr + size;
-
- for (; addr < end; addr++) {
- ret = get_user(val, addr);
- if (ret)
- return ret;
- if (val)
- goto err_size;
- }
- size = sizeof(*attr);
+ ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
+ if (ret) {
+ if (ret == -E2BIG)
+ goto err_size;
+ return ret;
}
- ret = copy_from_user(attr, uattr, size);
- if (ret)
- return -EFAULT;
-
if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) &&
size < SCHED_ATTR_SIZE_VER1)
return -EINVAL;
@@ -5354,7 +5334,7 @@ sched_attr_copy_to_user(struct sched_attr __user *uattr,
* sys_sched_getattr - similar to sched_getparam, but with sched_attr
* @pid: the pid in question.
* @uattr: structure containing the extended parameters.
- * @usize: sizeof(attr) that user-space knows about, for forwards and backwards compatibility.
+ * @usize: sizeof(attr) for fwd/bwd comp.
* @flags: for future extension.
*/
SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
@@ -6230,7 +6210,7 @@ static struct task_struct *__pick_migrate_task(struct rq *rq)
for_each_class(class) {
next = class->pick_next_task(rq, NULL, NULL);
if (next) {
- next->sched_class->put_prev_task(rq, next, NULL);
+ next->sched_class->put_prev_task(rq, next);
return next;
}
}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 2dc48720f189..a8a08030a8f7 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1691,6 +1691,22 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
resched_curr(rq);
}
+static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+{
+ if (!on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
+ /*
+ * This is OK, because current is on_cpu, which avoids it being
+ * picked for load-balance and preemption/IRQs are still
+ * disabled avoiding further scheduler activity on it and we've
+ * not yet started the picking loop.
+ */
+ rq_unpin_lock(rq, rf);
+ pull_dl_task(rq);
+ rq_repin_lock(rq, rf);
+ }
+
+ return sched_stop_runnable(rq) || sched_dl_runnable(rq);
+}
#endif /* CONFIG_SMP */
/*
@@ -1758,45 +1774,28 @@ static struct task_struct *
pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
struct sched_dl_entity *dl_se;
+ struct dl_rq *dl_rq = &rq->dl;
struct task_struct *p;
- struct dl_rq *dl_rq;
WARN_ON_ONCE(prev || rf);
- dl_rq = &rq->dl;
-
- if (unlikely(!dl_rq->dl_nr_running))
+ if (!sched_dl_runnable(rq))
return NULL;
dl_se = pick_next_dl_entity(rq, dl_rq);
BUG_ON(!dl_se);
-
p = dl_task_of(dl_se);
-
set_next_task_dl(rq, p);
-
return p;
}
-static void put_prev_task_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
{
update_curr_dl(rq);
update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
enqueue_pushable_dl_task(rq, p);
-
- if (rf && !on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
- /*
- * This is OK, because current is on_cpu, which avoids it being
- * picked for load-balance and preemption/IRQs are still
- * disabled avoiding further scheduler activity on it and we've
- * not yet started the picking loop.
- */
- rq_unpin_lock(rq, rf);
- pull_dl_task(rq);
- rq_repin_lock(rq, rf);
- }
}
/*
@@ -2442,6 +2441,7 @@ const struct sched_class dl_sched_class = {
.set_next_task = set_next_task_dl,
#ifdef CONFIG_SMP
+ .balance = balance_dl,
.select_task_rq = select_task_rq_dl,
.migrate_task_rq = migrate_task_rq_dl,
.set_cpus_allowed = set_cpus_allowed_dl,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a14487462b6c..6e622f48c8de 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6435,6 +6435,15 @@ static void task_dead_fair(struct task_struct *p)
{
remove_entity_load_avg(&p->se);
}
+
+static int
+balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+{
+ if (rq->nr_running)
+ return 1;
+
+ return newidle_balance(rq, rf) != 0;
+}
#endif /* CONFIG_SMP */
static unsigned long wakeup_gran(struct sched_entity *se)
@@ -6611,7 +6620,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
int new_tasks;
again:
- if (!cfs_rq->nr_running)
+ if (!sched_fair_runnable(rq))
goto idle;
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -6749,7 +6758,7 @@ idle:
/*
* Account for a descheduled task:
*/
-static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
{
struct sched_entity *se = &prev->se;
struct cfs_rq *cfs_rq;
@@ -10609,11 +10618,11 @@ const struct sched_class fair_sched_class = {
.check_preempt_curr = check_preempt_wakeup,
.pick_next_task = pick_next_task_fair,
-
.put_prev_task = put_prev_task_fair,
.set_next_task = set_next_task_fair,
#ifdef CONFIG_SMP
+ .balance = balance_fair,
.select_task_rq = select_task_rq_fair,
.migrate_task_rq = migrate_task_rq_fair,
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 8dad5aa600ea..f65ef1e2f204 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -365,6 +365,12 @@ select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
{
return task_cpu(p); /* IDLE tasks as never migrated */
}
+
+static int
+balance_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+{
+ return WARN_ON_ONCE(1);
+}
#endif
/*
@@ -375,7 +381,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
resched_curr(rq);
}
-static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
{
}
@@ -460,6 +466,7 @@ const struct sched_class idle_sched_class = {
.set_next_task = set_next_task_idle,
#ifdef CONFIG_SMP
+ .balance = balance_idle,
.select_task_rq = select_task_rq_idle,
.set_cpus_allowed = set_cpus_allowed_common,
#endif
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index ebaa4e619684..9b8adc01be3d 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1469,6 +1469,22 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
resched_curr(rq);
}
+static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+{
+ if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
+ /*
+ * This is OK, because current is on_cpu, which avoids it being
+ * picked for load-balance and preemption/IRQs are still
+ * disabled avoiding further scheduler activity on it and we've
+ * not yet started the picking loop.
+ */
+ rq_unpin_lock(rq, rf);
+ pull_rt_task(rq);
+ rq_repin_lock(rq, rf);
+ }
+
+ return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq);
+}
#endif /* CONFIG_SMP */
/*
@@ -1552,21 +1568,18 @@ static struct task_struct *
pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
struct task_struct *p;
- struct rt_rq *rt_rq = &rq->rt;
WARN_ON_ONCE(prev || rf);
- if (!rt_rq->rt_queued)
+ if (!sched_rt_runnable(rq))
return NULL;
p = _pick_next_task_rt(rq);
-
set_next_task_rt(rq, p);
-
return p;
}
-static void put_prev_task_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
{
update_curr_rt(rq);
@@ -1578,18 +1591,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p, struct rq_fla
*/
if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
enqueue_pushable_task(rq, p);
-
- if (rf && !on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
- /*
- * This is OK, because current is on_cpu, which avoids it being
- * picked for load-balance and preemption/IRQs are still
- * disabled avoiding further scheduler activity on it and we've
- * not yet started the picking loop.
- */
- rq_unpin_lock(rq, rf);
- pull_rt_task(rq);
- rq_repin_lock(rq, rf);
- }
}
#ifdef CONFIG_SMP
@@ -2366,8 +2367,8 @@ const struct sched_class rt_sched_class = {
.set_next_task = set_next_task_rt,
#ifdef CONFIG_SMP
+ .balance = balance_rt,
.select_task_rq = select_task_rq_rt,
-
.set_cpus_allowed = set_cpus_allowed_common,
.rq_online = rq_online_rt,
.rq_offline = rq_offline_rt,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0db2c1b3361e..c8870c5bd7df 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1727,10 +1727,11 @@ struct sched_class {
struct task_struct * (*pick_next_task)(struct rq *rq,
struct task_struct *prev,
struct rq_flags *rf);
- void (*put_prev_task)(struct rq *rq, struct task_struct *p, struct rq_flags *rf);
+ void (*put_prev_task)(struct rq *rq, struct task_struct *p);
void (*set_next_task)(struct rq *rq, struct task_struct *p);
#ifdef CONFIG_SMP
+ int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
@@ -1773,7 +1774,7 @@ struct sched_class {
static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
{
WARN_ON_ONCE(rq->curr != prev);
- prev->sched_class->put_prev_task(rq, prev, NULL);
+ prev->sched_class->put_prev_task(rq, prev);
}
static inline void set_next_task(struct rq *rq, struct task_struct *next)
@@ -1787,8 +1788,12 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)
#else
#define sched_class_highest (&dl_sched_class)
#endif
+
+#define for_class_range(class, _from, _to) \
+ for (class = (_from); class != (_to); class = class->next)
+
#define for_each_class(class) \
- for (class = sched_class_highest; class; class = class->next)
+ for_class_range(class, sched_class_highest, NULL)
extern const struct sched_class stop_sched_class;
extern const struct sched_class dl_sched_class;
@@ -1796,6 +1801,25 @@ extern const struct sched_class rt_sched_class;
extern const struct sched_class fair_sched_class;
extern const struct sched_class idle_sched_class;
+static inline bool sched_stop_runnable(struct rq *rq)
+{
+ return rq->stop && task_on_rq_queued(rq->stop);
+}
+
+static inline bool sched_dl_runnable(struct rq *rq)
+{
+ return rq->dl.dl_nr_running > 0;
+}
+
+static inline bool sched_rt_runnable(struct rq *rq)
+{
+ return rq->rt.rt_queued > 0;
+}
+
+static inline bool sched_fair_runnable(struct rq *rq)
+{
+ return rq->cfs.nr_running > 0;
+}
#ifdef CONFIG_SMP
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 7e1cee4e65b2..c0640739e05e 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -15,6 +15,12 @@ select_task_rq_stop(struct task_struct *p, int cpu, int sd_flag, int flags)
{
return task_cpu(p); /* stop tasks as never migrate */
}
+
+static int
+balance_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+{
+ return sched_stop_runnable(rq);
+}
#endif /* CONFIG_SMP */
static void
@@ -31,16 +37,13 @@ static void set_next_task_stop(struct rq *rq, struct task_struct *stop)
static struct task_struct *
pick_next_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
- struct task_struct *stop = rq->stop;
-
WARN_ON_ONCE(prev || rf);
- if (!stop || !task_on_rq_queued(stop))
+ if (!sched_stop_runnable(rq))
return NULL;
- set_next_task_stop(rq, stop);
-
- return stop;
+ set_next_task_stop(rq, rq->stop);
+ return rq->stop;
}
static void
@@ -60,7 +63,7 @@ static void yield_task_stop(struct rq *rq)
BUG(); /* the stop task should never yield, its pointless. */
}
-static void put_prev_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
{
struct task_struct *curr = rq->curr;
u64 delta_exec;
@@ -129,6 +132,7 @@ const struct sched_class stop_sched_class = {
.set_next_task = set_next_task_stop,
#ifdef CONFIG_SMP
+ .balance = balance_stop,
.select_task_rq = select_task_rq_stop,
.set_cpus_allowed = set_cpus_allowed_common,
#endif
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 3623ffe85d18..6ec1e595b1d4 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1945,7 +1945,7 @@ next_level:
static int
build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr)
{
- enum s_alloc alloc_state;
+ enum s_alloc alloc_state = sa_none;
struct sched_domain *sd;
struct s_data d;
struct rq *rq = NULL;
@@ -1953,6 +1953,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
struct sched_domain_topology_level *tl_asym;
bool has_asym = false;
+ if (WARN_ON(cpumask_empty(cpu_map)))
+ goto error;
+
alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
if (alloc_state != sa_rootdomain)
goto error;
@@ -2023,7 +2026,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
rcu_read_unlock();
if (has_asym)
- static_branch_enable_cpuslocked(&sched_asym_cpucapacity);
+ static_branch_inc_cpuslocked(&sched_asym_cpucapacity);
if (rq && sched_debug_enabled) {
pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
@@ -2118,8 +2121,12 @@ int sched_init_domains(const struct cpumask *cpu_map)
*/
static void detach_destroy_domains(const struct cpumask *cpu_map)
{
+ unsigned int cpu = cpumask_any(cpu_map);
int i;
+ if (rcu_access_pointer(per_cpu(sd_asym_cpucapacity, cpu)))
+ static_branch_dec_cpuslocked(&sched_asym_cpucapacity);
+
rcu_read_lock();
for_each_cpu(i, cpu_map)
cpu_attach_domain(NULL, &def_root_domain, i);