aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/sched.h9
-rw-r--r--include/uapi/linux/sched.h12
-rw-r--r--include/uapi/linux/sched/types.h66
-rw-r--r--kernel/sched/core.c91
4 files changed, 161 insertions, 17 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5485f411e8e1..1113dd4706ae 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -587,6 +587,7 @@ struct sched_dl_entity {
* @value: clamp value "assigned" to a se
* @bucket_id: bucket index corresponding to the "assigned" value
* @active: the se is currently refcounted in a rq's bucket
+ * @user_defined: the requested clamp value comes from user-space
*
* The bucket_id is the index of the clamp bucket matching the clamp value
* which is pre-computed and stored to avoid expensive integer divisions from
@@ -596,11 +597,19 @@ struct sched_dl_entity {
* which can be different from the clamp value "requested" from user-space.
* This allows to know a task is refcounted in the rq's bucket corresponding
* to the "effective" bucket_id.
+ *
+ * The user_defined bit is set whenever a task has got a task-specific clamp
+ * value requested from userspace, i.e. the system defaults apply to this task
+ * just as a restriction. This allows to relax default clamps when a less
+ * restrictive task-specific value has been requested, thus allowing to
+ * implement a "nice" semantic. For example, a task running with a 20%
+ * default boost can still drop its own boosting to 0%.
*/
struct uclamp_se {
unsigned int value : bits_per(SCHED_CAPACITY_SCALE);
unsigned int bucket_id : bits_per(UCLAMP_BUCKETS);
unsigned int active : 1;
+ unsigned int user_defined : 1;
};
#endif /* CONFIG_UCLAMP_TASK */
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 58b2368d3634..617bb59aa8ba 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -52,10 +52,20 @@
#define SCHED_FLAG_RECLAIM 0x02
#define SCHED_FLAG_DL_OVERRUN 0x04
#define SCHED_FLAG_KEEP_POLICY 0x08
+#define SCHED_FLAG_KEEP_PARAMS 0x10
+#define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
+#define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
+
+#define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
+ SCHED_FLAG_KEEP_PARAMS)
+
+#define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \
+ SCHED_FLAG_UTIL_CLAMP_MAX)
#define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \
SCHED_FLAG_RECLAIM | \
SCHED_FLAG_DL_OVERRUN | \
- SCHED_FLAG_KEEP_POLICY)
+ SCHED_FLAG_KEEP_ALL | \
+ SCHED_FLAG_UTIL_CLAMP)
#endif /* _UAPI_LINUX_SCHED_H */
diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h
index 10fbb8031930..c852153ddb0d 100644
--- a/include/uapi/linux/sched/types.h
+++ b/include/uapi/linux/sched/types.h
@@ -9,6 +9,7 @@ struct sched_param {
};
#define SCHED_ATTR_SIZE_VER0 48 /* sizeof first published struct */
+#define SCHED_ATTR_SIZE_VER1 56 /* add: util_{min,max} */
/*
* Extended scheduling parameters data structure.
@@ -21,8 +22,33 @@ struct sched_param {
* the tasks may be useful for a wide variety of application fields, e.g.,
* multimedia, streaming, automation and control, and many others.
*
- * This variant (sched_attr) is meant at describing a so-called
- * sporadic time-constrained task. In such model a task is specified by:
+ * This variant (sched_attr) allows to define additional attributes to
+ * improve the scheduler knowledge about task requirements.
+ *
+ * Scheduling Class Attributes
+ * ===========================
+ *
+ * A subset of sched_attr attributes specifies the
+ * scheduling policy and relative POSIX attributes:
+ *
+ * @size size of the structure, for fwd/bwd compat.
+ *
+ * @sched_policy task's scheduling policy
+ * @sched_nice task's nice value (SCHED_NORMAL/BATCH)
+ * @sched_priority task's static priority (SCHED_FIFO/RR)
+ *
+ * Certain more advanced scheduling features can be controlled by a
+ * predefined set of flags via the attribute:
+ *
+ * @sched_flags for customizing the scheduler behaviour
+ *
+ * Sporadic Time-Constrained Task Attributes
+ * =========================================
+ *
+ * A subset of sched_attr attributes allows to describe a so-called
+ * sporadic time-constrained task.
+ *
+ * In such a model a task is specified by:
* - the activation period or minimum instance inter-arrival time;
* - the maximum (or average, depending on the actual scheduling
* discipline) computation time of all instances, a.k.a. runtime;
@@ -34,14 +60,8 @@ struct sched_param {
* than the runtime and must be completed by time instant t equal to
* the instance activation time + the deadline.
*
- * This is reflected by the actual fields of the sched_attr structure:
+ * This is reflected by the following fields of the sched_attr structure:
*
- * @size size of the structure, for fwd/bwd compat.
- *
- * @sched_policy task's scheduling policy
- * @sched_flags for customizing the scheduler behaviour
- * @sched_nice task's nice value (SCHED_NORMAL/BATCH)
- * @sched_priority task's static priority (SCHED_FIFO/RR)
* @sched_deadline representative of the task's deadline
* @sched_runtime representative of the task's runtime
* @sched_period representative of the task's period
@@ -53,6 +73,29 @@ struct sched_param {
* As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
* only user of this new interface. More information about the algorithm
* available in the scheduling class file or in Documentation/.
+ *
+ * Task Utilization Attributes
+ * ===========================
+ *
+ * A subset of sched_attr attributes allows to specify the utilization
+ * expected for a task. These attributes allow to inform the scheduler about
+ * the utilization boundaries within which it should schedule the task. These
+ * boundaries are valuable hints to support scheduler decisions on both task
+ * placement and frequency selection.
+ *
+ * @sched_util_min represents the minimum utilization
+ * @sched_util_max represents the maximum utilization
+ *
+ * Utilization is a value in the range [0..SCHED_CAPACITY_SCALE]. It
+ * represents the percentage of CPU time used by a task when running at the
+ * maximum frequency on the highest capacity CPU of the system. For example, a
+ * 20% utilization task is a task running for 2ms every 10ms at maximum
+ * frequency.
+ *
+ * A task with a min utilization value bigger than 0 is more likely scheduled
+ * on a CPU with a capacity big enough to fit the specified value.
+ * A task with a max utilization value smaller than 1024 is more likely
+ * scheduled on a CPU with no more capacity than the specified value.
*/
struct sched_attr {
__u32 size;
@@ -70,6 +113,11 @@ struct sched_attr {
__u64 sched_runtime;
__u64 sched_deadline;
__u64 sched_period;
+
+ /* Utilization hints */
+ __u32 sched_util_min;
+ __u32 sched_util_max;
+
};
#endif /* _UAPI_LINUX_SCHED_TYPES_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6d519f3f9789..e9a669266fa9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -805,10 +805,12 @@ static inline unsigned int uclamp_none(int clamp_id)
return SCHED_CAPACITY_SCALE;
}
-static inline void uclamp_se_set(struct uclamp_se *uc_se, unsigned int value)
+static inline void uclamp_se_set(struct uclamp_se *uc_se,
+ unsigned int value, bool user_defined)
{
uc_se->value = value;
uc_se->bucket_id = uclamp_bucket_id(value);
+ uc_se->user_defined = user_defined;
}
static inline unsigned int
@@ -1016,11 +1018,11 @@ int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
if (old_min != sysctl_sched_uclamp_util_min) {
uclamp_se_set(&uclamp_default[UCLAMP_MIN],
- sysctl_sched_uclamp_util_min);
+ sysctl_sched_uclamp_util_min, false);
}
if (old_max != sysctl_sched_uclamp_util_max) {
uclamp_se_set(&uclamp_default[UCLAMP_MAX],
- sysctl_sched_uclamp_util_max);
+ sysctl_sched_uclamp_util_max, false);
}
/*
@@ -1038,6 +1040,42 @@ done:
return result;
}
+static int uclamp_validate(struct task_struct *p,
+ const struct sched_attr *attr)
+{
+ unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value;
+ unsigned int upper_bound = p->uclamp_req[UCLAMP_MAX].value;
+
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN)
+ lower_bound = attr->sched_util_min;
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX)
+ upper_bound = attr->sched_util_max;
+
+ if (lower_bound > upper_bound)
+ return -EINVAL;
+ if (upper_bound > SCHED_CAPACITY_SCALE)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void __setscheduler_uclamp(struct task_struct *p,
+ const struct sched_attr *attr)
+{
+ if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
+ return;
+
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
+ uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
+ attr->sched_util_min, true);
+ }
+
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
+ uclamp_se_set(&p->uclamp_req[UCLAMP_MAX],
+ attr->sched_util_max, true);
+ }
+}
+
static void uclamp_fork(struct task_struct *p)
{
unsigned int clamp_id;
@@ -1059,11 +1097,11 @@ static void __init init_uclamp(void)
for_each_clamp_id(clamp_id) {
uclamp_se_set(&init_task.uclamp_req[clamp_id],
- uclamp_none(clamp_id));
+ uclamp_none(clamp_id), false);
}
/* System defaults allow max clamp values for both indexes */
- uclamp_se_set(&uc_max, uclamp_none(UCLAMP_MAX));
+ uclamp_se_set(&uc_max, uclamp_none(UCLAMP_MAX), false);
for_each_clamp_id(clamp_id)
uclamp_default[clamp_id] = uc_max;
}
@@ -1071,6 +1109,13 @@ static void __init init_uclamp(void)
#else /* CONFIG_UCLAMP_TASK */
static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
+static inline int uclamp_validate(struct task_struct *p,
+ const struct sched_attr *attr)
+{
+ return -EOPNOTSUPP;
+}
+static void __setscheduler_uclamp(struct task_struct *p,
+ const struct sched_attr *attr) { }
static inline void uclamp_fork(struct task_struct *p) { }
static inline void init_uclamp(void) { }
#endif /* CONFIG_UCLAMP_TASK */
@@ -4412,6 +4457,13 @@ static void __setscheduler_params(struct task_struct *p,
static void __setscheduler(struct rq *rq, struct task_struct *p,
const struct sched_attr *attr, bool keep_boost)
{
+ /*
+ * If params can't change scheduling class changes aren't allowed
+ * either.
+ */
+ if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)
+ return;
+
__setscheduler_params(p, attr);
/*
@@ -4549,6 +4601,13 @@ recheck:
return retval;
}
+ /* Update task specific "requested" clamps */
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) {
+ retval = uclamp_validate(p, attr);
+ if (retval)
+ return retval;
+ }
+
/*
* Make sure no PI-waiters arrive (or leave) while we are
* changing the priority of the task:
@@ -4578,6 +4637,8 @@ recheck:
goto change;
if (dl_policy(policy) && dl_param_changed(p, attr))
goto change;
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
+ goto change;
p->sched_reset_on_fork = reset_on_fork;
task_rq_unlock(rq, p, &rf);
@@ -4658,7 +4719,9 @@ change:
put_prev_task(rq, p);
prev_class = p->sched_class;
+
__setscheduler(rq, p, attr, pi);
+ __setscheduler_uclamp(p, attr);
if (queued) {
/*
@@ -4834,6 +4897,10 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
if (ret)
return -EFAULT;
+ if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) &&
+ size < SCHED_ATTR_SIZE_VER1)
+ return -EINVAL;
+
/*
* XXX: Do we want to be lenient like existing syscalls; or do we want
* to be strict and return an error on out-of-bounds values?
@@ -4903,10 +4970,15 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
rcu_read_lock();
retval = -ESRCH;
p = find_process_by_pid(pid);
- if (p != NULL)
- retval = sched_setattr(p, &attr);
+ if (likely(p))
+ get_task_struct(p);
rcu_read_unlock();
+ if (likely(p)) {
+ retval = sched_setattr(p, &attr);
+ put_task_struct(p);
+ }
+
return retval;
}
@@ -5057,6 +5129,11 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
else
attr.sched_nice = task_nice(p);
+#ifdef CONFIG_UCLAMP_TASK
+ attr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
+ attr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
+#endif
+
rcu_read_unlock();
retval = sched_read_attr(uattr, &attr, size);