From b90ca8badbd11488e5f762346b028666808164e7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Jul 2021 12:24:33 +0100 Subject: sched: Introduce task_struct::user_cpus_ptr to track requested affinity In preparation for saving and restoring the user-requested CPU affinity mask of a task, add a new cpumask_t pointer to 'struct task_struct'. If the pointer is non-NULL, then the mask is copied across fork() and freed on task exit. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lore.kernel.org/r/20210730112443.23245-7-will@kernel.org --- include/linux/sched.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 50db9496c99d..2c5d638daaad 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -748,6 +748,7 @@ struct task_struct { unsigned int policy; int nr_cpus_allowed; const cpumask_t *cpus_ptr; + cpumask_t *user_cpus_ptr; cpumask_t cpus_mask; void *migration_pending; #ifdef CONFIG_SMP @@ -1706,6 +1707,8 @@ extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_ #ifdef CONFIG_SMP extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); +extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node); +extern void release_user_cpus_ptr(struct task_struct *p); #else static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { @@ -1716,6 +1719,16 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma return -EINVAL; return 0; } +static inline int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node) +{ + if (src->user_cpus_ptr) + return -EINVAL; + return 0; +} +static inline void release_user_cpus_ptr(struct task_struct *p) +{ + WARN_ON(p->user_cpus_ptr); +} #endif extern int yield_to(struct task_struct *p, bool preempt); -- cgit v1.2.3-59-g8ed1b From 07ec77a1d4e82526e1588979fff2f024f8e96df2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Jul 2021 12:24:35 +0100 Subject: sched: Allow task CPU affinity to be restricted on asymmetric systems Asymmetric systems may not offer the same level of userspace ISA support across all CPUs, meaning that some applications cannot be executed by some CPUs. As a concrete example, upcoming arm64 big.LITTLE designs do not feature support for 32-bit applications on both clusters. Although userspace can carefully manage the affinity masks for such tasks, one place where it is particularly problematic is execve() because the CPU on which the execve() is occurring may be incompatible with the new application image. In such a situation, it is desirable to restrict the affinity mask of the task and ensure that the new image is entered on a compatible CPU. From userspace's point of view, this looks the same as if the incompatible CPUs have been hotplugged off in the task's affinity mask. Similarly, if a subsequent execve() reverts to a compatible image, then the old affinity is restored if it is still valid. In preparation for restricting the affinity mask for compat tasks on arm64 systems without uniform support for 32-bit applications, introduce {force,relax}_compatible_cpus_allowed_ptr(), which respectively restrict and restore the affinity mask for a task based on the compatible CPUs. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Reviewed-by: Quentin Perret Link: https://lore.kernel.org/r/20210730112443.23245-9-will@kernel.org --- include/linux/sched.h | 2 + kernel/sched/core.c | 198 +++++++++++++++++++++++++++++++++++++++++++++----- kernel/sched/sched.h | 1 + 3 files changed, 183 insertions(+), 18 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c5d638daaad..ce2d5cfc331e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1709,6 +1709,8 @@ extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node); extern void release_user_cpus_ptr(struct task_struct *p); +extern void force_compatible_cpus_allowed_ptr(struct task_struct *p); +extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p); #else static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 672d0fcbf2ef..6ee197049c9c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2494,10 +2494,18 @@ int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, return 0; } +static inline struct cpumask *clear_user_cpus_ptr(struct task_struct *p) +{ + struct cpumask *user_mask = NULL; + + swap(p->user_cpus_ptr, user_mask); + + return user_mask; +} + void release_user_cpus_ptr(struct task_struct *p) { - kfree(p->user_cpus_ptr); - p->user_cpus_ptr = NULL; + kfree(clear_user_cpus_ptr(p)); } /* @@ -2717,27 +2725,23 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag } /* - * Change a given task's CPU affinity. Migrate the thread to a - * proper CPU and schedule it away if the CPU it's executing on - * is removed from the allowed bitmask. - * - * NOTE: the caller must have a valid reference to the task, the - * task must not exit() & deallocate itself prematurely. The - * call is not atomic; no spinlocks may be held. + * Called with both p->pi_lock and rq->lock held; drops both before returning. */ -static int __set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask, - u32 flags) +static int __set_cpus_allowed_ptr_locked(struct task_struct *p, + const struct cpumask *new_mask, + u32 flags, + struct rq *rq, + struct rq_flags *rf) + __releases(rq->lock) + __releases(p->pi_lock) { const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p); const struct cpumask *cpu_valid_mask = cpu_active_mask; bool kthread = p->flags & PF_KTHREAD; + struct cpumask *user_mask = NULL; unsigned int dest_cpu; - struct rq_flags rf; - struct rq *rq; int ret = 0; - rq = task_rq_lock(p, &rf); update_rq_clock(rq); if (kthread || is_migration_disabled(p)) { @@ -2793,20 +2797,178 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, __do_set_cpus_allowed(p, new_mask, flags); - return affine_move_task(rq, p, &rf, dest_cpu, flags); + if (flags & SCA_USER) + user_mask = clear_user_cpus_ptr(p); + + ret = affine_move_task(rq, p, rf, dest_cpu, flags); + + kfree(user_mask); + + return ret; out: - task_rq_unlock(rq, p, &rf); + task_rq_unlock(rq, p, rf); return ret; } +/* + * Change a given task's CPU affinity. Migrate the thread to a + * proper CPU and schedule it away if the CPU it's executing on + * is removed from the allowed bitmask. + * + * NOTE: the caller must have a valid reference to the task, the + * task must not exit() & deallocate itself prematurely. The + * call is not atomic; no spinlocks may be held. + */ +static int __set_cpus_allowed_ptr(struct task_struct *p, + const struct cpumask *new_mask, u32 flags) +{ + struct rq_flags rf; + struct rq *rq; + + rq = task_rq_lock(p, &rf); + return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, &rf); +} + int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { return __set_cpus_allowed_ptr(p, new_mask, 0); } EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); +/* + * Change a given task's CPU affinity to the intersection of its current + * affinity mask and @subset_mask, writing the resulting mask to @new_mask + * and pointing @p->user_cpus_ptr to a copy of the old mask. + * If the resulting mask is empty, leave the affinity unchanged and return + * -EINVAL. + */ +static int restrict_cpus_allowed_ptr(struct task_struct *p, + struct cpumask *new_mask, + const struct cpumask *subset_mask) +{ + struct cpumask *user_mask = NULL; + struct rq_flags rf; + struct rq *rq; + int err; + + if (!p->user_cpus_ptr) { + user_mask = kmalloc(cpumask_size(), GFP_KERNEL); + if (!user_mask) + return -ENOMEM; + } + + rq = task_rq_lock(p, &rf); + + /* + * Forcefully restricting the affinity of a deadline task is + * likely to cause problems, so fail and noisily override the + * mask entirely. + */ + if (task_has_dl_policy(p) && dl_bandwidth_enabled()) { + err = -EPERM; + goto err_unlock; + } + + if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) { + err = -EINVAL; + goto err_unlock; + } + + /* + * We're about to butcher the task affinity, so keep track of what + * the user asked for in case we're able to restore it later on. + */ + if (user_mask) { + cpumask_copy(user_mask, p->cpus_ptr); + p->user_cpus_ptr = user_mask; + } + + return __set_cpus_allowed_ptr_locked(p, new_mask, 0, rq, &rf); + +err_unlock: + task_rq_unlock(rq, p, &rf); + kfree(user_mask); + return err; +} + +/* + * Restrict the CPU affinity of task @p so that it is a subset of + * task_cpu_possible_mask() and point @p->user_cpu_ptr to a copy of the + * old affinity mask. If the resulting mask is empty, we warn and walk + * up the cpuset hierarchy until we find a suitable mask. + */ +void force_compatible_cpus_allowed_ptr(struct task_struct *p) +{ + cpumask_var_t new_mask; + const struct cpumask *override_mask = task_cpu_possible_mask(p); + + alloc_cpumask_var(&new_mask, GFP_KERNEL); + + /* + * __migrate_task() can fail silently in the face of concurrent + * offlining of the chosen destination CPU, so take the hotplug + * lock to ensure that the migration succeeds. + */ + cpus_read_lock(); + if (!cpumask_available(new_mask)) + goto out_set_mask; + + if (!restrict_cpus_allowed_ptr(p, new_mask, override_mask)) + goto out_free_mask; + + /* + * We failed to find a valid subset of the affinity mask for the + * task, so override it based on its cpuset hierarchy. + */ + cpuset_cpus_allowed(p, new_mask); + override_mask = new_mask; + +out_set_mask: + if (printk_ratelimit()) { + printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n", + task_pid_nr(p), p->comm, + cpumask_pr_args(override_mask)); + } + + WARN_ON(set_cpus_allowed_ptr(p, override_mask)); +out_free_mask: + cpus_read_unlock(); + free_cpumask_var(new_mask); +} + +static int +__sched_setaffinity(struct task_struct *p, const struct cpumask *mask); + +/* + * Restore the affinity of a task @p which was previously restricted by a + * call to force_compatible_cpus_allowed_ptr(). This will clear (and free) + * @p->user_cpus_ptr. + * + * It is the caller's responsibility to serialise this with any calls to + * force_compatible_cpus_allowed_ptr(@p). + */ +void relax_compatible_cpus_allowed_ptr(struct task_struct *p) +{ + struct cpumask *user_mask = p->user_cpus_ptr; + unsigned long flags; + + /* + * Try to restore the old affinity mask. If this fails, then + * we free the mask explicitly to avoid it being inherited across + * a subsequent fork(). + */ + if (!user_mask || !__sched_setaffinity(p, user_mask)) + return; + + raw_spin_lock_irqsave(&p->pi_lock, flags); + user_mask = clear_user_cpus_ptr(p); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + + kfree(user_mask); +} + void set_task_cpu(struct task_struct *p, unsigned int new_cpu) { #ifdef CONFIG_SCHED_DEBUG @@ -7629,7 +7791,7 @@ __sched_setaffinity(struct task_struct *p, const struct cpumask *mask) } #endif again: - retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK); + retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK | SCA_USER); if (retval) goto out_free_new_mask; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 5fa02902c143..e7e2bba5b520 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2244,6 +2244,7 @@ extern struct task_struct *pick_next_task_idle(struct rq *rq); #define SCA_CHECK 0x01 #define SCA_MIGRATE_DISABLE 0x02 #define SCA_MIGRATE_ENABLE 0x04 +#define SCA_USER 0x08 #ifdef CONFIG_SMP -- cgit v1.2.3-59-g8ed1b From 234b8ab6476c5edd5262e2ff563de9498d60044a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Jul 2021 12:24:36 +0100 Subject: sched: Introduce dl_task_check_affinity() to check proposed affinity In preparation for restricting the affinity of a task during execve() on arm64, introduce a new dl_task_check_affinity() helper function to give an indication as to whether the restricted mask is admissible for a deadline task. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Bristot de Oliveira Link: https://lore.kernel.org/r/20210730112443.23245-10-will@kernel.org --- include/linux/sched.h | 6 ++++++ kernel/sched/core.c | 46 +++++++++++++++++++++++++++++----------------- 2 files changed, 35 insertions(+), 17 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index ce2d5cfc331e..3bb9fecfdaa1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1709,6 +1709,7 @@ extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node); extern void release_user_cpus_ptr(struct task_struct *p); +extern int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask); extern void force_compatible_cpus_allowed_ptr(struct task_struct *p); extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p); #else @@ -1731,6 +1732,11 @@ static inline void release_user_cpus_ptr(struct task_struct *p) { WARN_ON(p->user_cpus_ptr); } + +static inline int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask) +{ + return 0; +} #endif extern int yield_to(struct task_struct *p, bool preempt); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6ee197049c9c..a22cc3c156ce 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7756,6 +7756,32 @@ out_unlock: return retval; } +#ifdef CONFIG_SMP +int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask) +{ + int ret = 0; + + /* + * If the task isn't a deadline task or admission control is + * disabled then we don't care about affinity changes. + */ + if (!task_has_dl_policy(p) || !dl_bandwidth_enabled()) + return 0; + + /* + * Since bandwidth control happens on root_domain basis, + * if admission test is enabled, we only admit -deadline + * tasks allowed to run on all the CPUs in the task's + * root_domain. + */ + rcu_read_lock(); + if (!cpumask_subset(task_rq(p)->rd->span, mask)) + ret = -EBUSY; + rcu_read_unlock(); + return ret; +} +#endif + static int __sched_setaffinity(struct task_struct *p, const struct cpumask *mask) { @@ -7773,23 +7799,9 @@ __sched_setaffinity(struct task_struct *p, const struct cpumask *mask) cpuset_cpus_allowed(p, cpus_allowed); cpumask_and(new_mask, mask, cpus_allowed); - /* - * Since bandwidth control happens on root_domain basis, - * if admission test is enabled, we only admit -deadline - * tasks allowed to run on all the CPUs in the task's - * root_domain. - */ -#ifdef CONFIG_SMP - if (task_has_dl_policy(p) && dl_bandwidth_enabled()) { - rcu_read_lock(); - if (!cpumask_subset(task_rq(p)->rd->span, new_mask)) { - retval = -EBUSY; - rcu_read_unlock(); - goto out_free_new_mask; - } - rcu_read_unlock(); - } -#endif + retval = dl_task_check_affinity(p, new_mask); + if (retval) + goto out_free_new_mask; again: retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK | SCA_USER); if (retval) -- cgit v1.2.3-59-g8ed1b