aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/scheduler/sched-ext.rst2
-rw-r--r--kernel/sched/core.c21
-rw-r--r--kernel/sched/ext.c249
-rw-r--r--kernel/sched/sched.h3
-rw-r--r--tools/sched_ext/include/scx/common.bpf.h8
-rw-r--r--tools/sched_ext/scx_qmap.bpf.c4
-rw-r--r--tools/sched_ext/scx_show_state.py2
-rw-r--r--tools/testing/selftests/Makefile9
-rw-r--r--tools/testing/selftests/sched_ext/Makefile73
-rw-r--r--tools/testing/selftests/sched_ext/create_dsq.bpf.c6
-rw-r--r--tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c4
-rw-r--r--tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c4
-rw-r--r--tools/testing/selftests/sched_ext/dsp_local_on.bpf.c8
-rw-r--r--tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c8
-rw-r--r--tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c10
-rw-r--r--tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c4
-rw-r--r--tools/testing/selftests/sched_ext/exit.bpf.c22
-rw-r--r--tools/testing/selftests/sched_ext/hotplug.bpf.c8
-rw-r--r--tools/testing/selftests/sched_ext/init_enable_count.bpf.c8
-rw-r--r--tools/testing/selftests/sched_ext/maximal.bpf.c58
-rw-r--r--tools/testing/selftests/sched_ext/maybe_null.bpf.c6
-rw-r--r--tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c4
-rw-r--r--tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c4
-rw-r--r--tools/testing/selftests/sched_ext/prog_run.bpf.c2
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c2
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c6
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c2
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c4
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c4
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c12
30 files changed, 302 insertions, 255 deletions
diff --git a/Documentation/scheduler/sched-ext.rst b/Documentation/scheduler/sched-ext.rst
index 6c0d70e2e27d..7b59bbd2e564 100644
--- a/Documentation/scheduler/sched-ext.rst
+++ b/Documentation/scheduler/sched-ext.rst
@@ -66,7 +66,7 @@ BPF scheduler and reverts all tasks back to CFS.
.. code-block:: none
# make -j16 -C tools/sched_ext
- # tools/sched_ext/scx_simple
+ # tools/sched_ext/build/bin/scx_simple
local=0 global=3
local=5 global=24
local=9 global=44
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 43e453ab7e20..aeb595514461 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3518,14 +3518,16 @@ out:
* The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
*/
static inline
-int select_task_rq(struct task_struct *p, int cpu, int wake_flags)
+int select_task_rq(struct task_struct *p, int cpu, int *wake_flags)
{
lockdep_assert_held(&p->pi_lock);
- if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p))
- cpu = p->sched_class->select_task_rq(p, cpu, wake_flags);
- else
+ if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p)) {
+ cpu = p->sched_class->select_task_rq(p, cpu, *wake_flags);
+ *wake_flags |= WF_RQ_SELECTED;
+ } else {
cpu = cpumask_any(p->cpus_ptr);
+ }
/*
* In order not to call set_task_cpu() on a blocking task we need
@@ -3659,6 +3661,8 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
rq->nr_uninterruptible--;
#ifdef CONFIG_SMP
+ if (wake_flags & WF_RQ_SELECTED)
+ en_flags |= ENQUEUE_RQ_SELECTED;
if (wake_flags & WF_MIGRATED)
en_flags |= ENQUEUE_MIGRATED;
else
@@ -4120,6 +4124,8 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
guard(preempt)();
int cpu, success = 0;
+ wake_flags |= WF_TTWU;
+
if (p == current) {
/*
* We're waking current, this means 'p->on_rq' and 'task_cpu(p)
@@ -4252,7 +4258,7 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
*/
smp_cond_load_acquire(&p->on_cpu, !VAL);
- cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
+ cpu = select_task_rq(p, p->wake_cpu, &wake_flags);
if (task_cpu(p) != cpu) {
if (p->in_iowait) {
delayacct_blkio_end(p);
@@ -4793,6 +4799,7 @@ void wake_up_new_task(struct task_struct *p)
{
struct rq_flags rf;
struct rq *rq;
+ int wake_flags = WF_FORK;
raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
WRITE_ONCE(p->__state, TASK_RUNNING);
@@ -4807,7 +4814,7 @@ void wake_up_new_task(struct task_struct *p)
*/
p->recent_used_cpu = task_cpu(p);
rseq_migrate(p);
- __set_task_cpu(p, select_task_rq(p, task_cpu(p), WF_FORK));
+ __set_task_cpu(p, select_task_rq(p, task_cpu(p), &wake_flags));
#endif
rq = __task_rq_lock(p, &rf);
update_rq_clock(rq);
@@ -4815,7 +4822,7 @@ void wake_up_new_task(struct task_struct *p)
activate_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_INITIAL);
trace_sched_wakeup_new(p);
- wakeup_preempt(rq, p, WF_FORK);
+ wakeup_preempt(rq, p, wake_flags);
#ifdef CONFIG_SMP
if (p->sched_class->task_woken) {
/*
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 012a7fc77263..c074a64c20f0 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -9,7 +9,6 @@
#define SCX_OP_IDX(op) (offsetof(struct sched_ext_ops, op) / sizeof(void (*)(void)))
enum scx_consts {
- SCX_SLICE_BYPASS = SCX_SLICE_DFL / 4,
SCX_DSP_DFL_MAX_BATCH = 32,
SCX_DSP_MAX_LOOPS = 32,
SCX_WATCHDOG_MAX_TIMEOUT = 30 * HZ,
@@ -19,6 +18,12 @@ enum scx_consts {
SCX_EXIT_DUMP_DFL_LEN = 32768,
SCX_CPUPERF_ONE = SCHED_CAPACITY_SCALE,
+
+ /*
+ * Iterating all tasks may take a while. Periodically drop
+ * scx_tasks_lock to avoid causing e.g. CSD and RCU stalls.
+ */
+ SCX_OPS_TASK_ITER_BATCH = 32,
};
enum scx_exit_kind {
@@ -630,6 +635,10 @@ struct sched_ext_ops {
/**
* exit - Clean up after the BPF scheduler
* @info: Exit info
+ *
+ * ops.exit() is also called on ops.init() failure, which is a bit
+ * unusual. This is to allow rich reporting through @info on how
+ * ops.init() failed.
*/
void (*exit)(struct scx_exit_info *info);
@@ -697,6 +706,7 @@ enum scx_enq_flags {
/* expose select ENQUEUE_* flags as enums */
SCX_ENQ_WAKEUP = ENQUEUE_WAKEUP,
SCX_ENQ_HEAD = ENQUEUE_HEAD,
+ SCX_ENQ_CPU_SELECTED = ENQUEUE_RQ_SELECTED,
/* high 32bits are SCX specific */
@@ -857,7 +867,8 @@ static DEFINE_MUTEX(scx_ops_enable_mutex);
DEFINE_STATIC_KEY_FALSE(__scx_ops_enabled);
DEFINE_STATIC_PERCPU_RWSEM(scx_fork_rwsem);
static atomic_t scx_ops_enable_state_var = ATOMIC_INIT(SCX_OPS_DISABLED);
-static atomic_t scx_ops_bypass_depth = ATOMIC_INIT(0);
+static int scx_ops_bypass_depth;
+static DEFINE_RAW_SPINLOCK(__scx_ops_bypass_lock);
static bool scx_ops_init_task_enabled;
static bool scx_switching_all;
DEFINE_STATIC_KEY_FALSE(__scx_switched_all);
@@ -1279,86 +1290,104 @@ struct scx_task_iter {
struct task_struct *locked;
struct rq *rq;
struct rq_flags rf;
+ u32 cnt;
};
/**
- * scx_task_iter_init - Initialize a task iterator
+ * scx_task_iter_start - Lock scx_tasks_lock and start a task iteration
* @iter: iterator to init
*
- * Initialize @iter. Must be called with scx_tasks_lock held. Once initialized,
- * @iter must eventually be exited with scx_task_iter_exit().
+ * Initialize @iter and return with scx_tasks_lock held. Once initialized, @iter
+ * must eventually be stopped with scx_task_iter_stop().
*
- * scx_tasks_lock may be released between this and the first next() call or
- * between any two next() calls. If scx_tasks_lock is released between two
- * next() calls, the caller is responsible for ensuring that the task being
- * iterated remains accessible either through RCU read lock or obtaining a
- * reference count.
+ * scx_tasks_lock and the rq lock may be released using scx_task_iter_unlock()
+ * between this and the first next() call or between any two next() calls. If
+ * the locks are released between two next() calls, the caller is responsible
+ * for ensuring that the task being iterated remains accessible either through
+ * RCU read lock or obtaining a reference count.
*
* All tasks which existed when the iteration started are guaranteed to be
* visited as long as they still exist.
*/
-static void scx_task_iter_init(struct scx_task_iter *iter)
+static void scx_task_iter_start(struct scx_task_iter *iter)
{
- lockdep_assert_held(&scx_tasks_lock);
-
BUILD_BUG_ON(__SCX_DSQ_ITER_ALL_FLAGS &
((1U << __SCX_DSQ_LNODE_PRIV_SHIFT) - 1));
+ spin_lock_irq(&scx_tasks_lock);
+
iter->cursor = (struct sched_ext_entity){ .flags = SCX_TASK_CURSOR };
list_add(&iter->cursor.tasks_node, &scx_tasks);
iter->locked = NULL;
+ iter->cnt = 0;
+}
+
+static void __scx_task_iter_rq_unlock(struct scx_task_iter *iter)
+{
+ if (iter->locked) {
+ task_rq_unlock(iter->rq, iter->locked, &iter->rf);
+ iter->locked = NULL;
+ }
}
/**
- * scx_task_iter_rq_unlock - Unlock rq locked by a task iterator
- * @iter: iterator to unlock rq for
+ * scx_task_iter_unlock - Unlock rq and scx_tasks_lock held by a task iterator
+ * @iter: iterator to unlock
*
* If @iter is in the middle of a locked iteration, it may be locking the rq of
- * the task currently being visited. Unlock the rq if so. This function can be
- * safely called anytime during an iteration.
+ * the task currently being visited in addition to scx_tasks_lock. Unlock both.
+ * This function can be safely called anytime during an iteration.
+ */
+static void scx_task_iter_unlock(struct scx_task_iter *iter)
+{
+ __scx_task_iter_rq_unlock(iter);
+ spin_unlock_irq(&scx_tasks_lock);
+}
+
+/**
+ * scx_task_iter_relock - Lock scx_tasks_lock released by scx_task_iter_unlock()
+ * @iter: iterator to re-lock
*
- * Returns %true if the rq @iter was locking is unlocked. %false if @iter was
- * not locking an rq.
+ * Re-lock scx_tasks_lock unlocked by scx_task_iter_unlock(). Note that it
+ * doesn't re-lock the rq lock. Must be called before other iterator operations.
*/
-static bool scx_task_iter_rq_unlock(struct scx_task_iter *iter)
+static void scx_task_iter_relock(struct scx_task_iter *iter)
{
- if (iter->locked) {
- task_rq_unlock(iter->rq, iter->locked, &iter->rf);
- iter->locked = NULL;
- return true;
- } else {
- return false;
- }
+ spin_lock_irq(&scx_tasks_lock);
}
/**
- * scx_task_iter_exit - Exit a task iterator
+ * scx_task_iter_stop - Stop a task iteration and unlock scx_tasks_lock
* @iter: iterator to exit
*
- * Exit a previously initialized @iter. Must be called with scx_tasks_lock held.
- * If the iterator holds a task's rq lock, that rq lock is released. See
- * scx_task_iter_init() for details.
+ * Exit a previously initialized @iter. Must be called with scx_tasks_lock held
+ * which is released on return. If the iterator holds a task's rq lock, that rq
+ * lock is also released. See scx_task_iter_start() for details.
*/
-static void scx_task_iter_exit(struct scx_task_iter *iter)
+static void scx_task_iter_stop(struct scx_task_iter *iter)
{
- lockdep_assert_held(&scx_tasks_lock);
-
- scx_task_iter_rq_unlock(iter);
list_del_init(&iter->cursor.tasks_node);
+ scx_task_iter_unlock(iter);
}
/**
* scx_task_iter_next - Next task
* @iter: iterator to walk
*
- * Visit the next task. See scx_task_iter_init() for details.
+ * Visit the next task. See scx_task_iter_start() for details. Locks are dropped
+ * and re-acquired every %SCX_OPS_TASK_ITER_BATCH iterations to avoid causing
+ * stalls by holding scx_tasks_lock for too long.
*/
static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
{
struct list_head *cursor = &iter->cursor.tasks_node;
struct sched_ext_entity *pos;
- lockdep_assert_held(&scx_tasks_lock);
+ if (!(++iter->cnt % SCX_OPS_TASK_ITER_BATCH)) {
+ scx_task_iter_unlock(iter);
+ cond_resched();
+ scx_task_iter_relock(iter);
+ }
list_for_each_entry(pos, cursor, tasks_node) {
if (&pos->tasks_node == &scx_tasks)
@@ -1379,14 +1408,14 @@ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
* @include_dead: Whether we should include dead tasks in the iteration
*
* Visit the non-idle task with its rq lock held. Allows callers to specify
- * whether they would like to filter out dead tasks. See scx_task_iter_init()
+ * whether they would like to filter out dead tasks. See scx_task_iter_start()
* for details.
*/
static struct task_struct *scx_task_iter_next_locked(struct scx_task_iter *iter)
{
struct task_struct *p;
- scx_task_iter_rq_unlock(iter);
+ __scx_task_iter_rq_unlock(iter);
while ((p = scx_task_iter_next(iter))) {
/*
@@ -1954,7 +1983,6 @@ static bool scx_rq_online(struct rq *rq)
static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
int sticky_cpu)
{
- bool bypassing = scx_rq_bypassing(rq);
struct task_struct **ddsp_taskp;
unsigned long qseq;
@@ -1972,7 +2000,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
if (!scx_rq_online(rq))
goto local;
- if (bypassing)
+ if (scx_rq_bypassing(rq))
goto global;
if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID)
@@ -2027,7 +2055,7 @@ local_norefill:
global:
touch_core_sched(rq, p); /* see the comment in local: */
- p->scx.slice = bypassing ? SCX_SLICE_BYPASS : SCX_SLICE_DFL;
+ p->scx.slice = SCX_SLICE_DFL;
dispatch_enqueue(find_global_dsq(p), p, enq_flags);
}
@@ -3030,8 +3058,8 @@ static struct task_struct *pick_task_scx(struct rq *rq)
if (unlikely(!p->scx.slice)) {
if (!scx_rq_bypassing(rq) && !scx_warned_zero_slice) {
- printk_deferred(KERN_WARNING "sched_ext: %s[%d] has zero slice in pick_next_task_scx()\n",
- p->comm, p->pid);
+ printk_deferred(KERN_WARNING "sched_ext: %s[%d] has zero slice in %s()\n",
+ p->comm, p->pid, __func__);
scx_warned_zero_slice = true;
}
p->scx.slice = SCX_SLICE_DFL;
@@ -3274,11 +3302,6 @@ static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
*found = false;
- if (!static_branch_likely(&scx_builtin_idle_enabled)) {
- scx_ops_error("built-in idle tracking is disabled");
- return prev_cpu;
- }
-
/*
* Determine the scheduling domain only if the task is allowed to run
* on all CPUs.
@@ -3435,7 +3458,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
if (unlikely(wake_flags & WF_EXEC))
return prev_cpu;
- if (SCX_HAS_OP(select_cpu)) {
+ if (SCX_HAS_OP(select_cpu) && !scx_rq_bypassing(task_rq(p))) {
s32 cpu;
struct task_struct **ddsp_taskp;
@@ -3500,7 +3523,7 @@ void __scx_update_idle(struct rq *rq, bool idle)
{
int cpu = cpu_of(rq);
- if (SCX_HAS_OP(update_idle)) {
+ if (SCX_HAS_OP(update_idle) && !scx_rq_bypassing(rq)) {
SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle);
if (!static_branch_unlikely(&scx_builtin_idle_enabled))
return;
@@ -4358,7 +4381,6 @@ static void scx_cgroup_exit(void)
percpu_rwsem_assert_held(&scx_cgroup_rwsem);
- WARN_ON_ONCE(!scx_cgroup_enabled);
scx_cgroup_enabled = false;
/*
@@ -4427,6 +4449,7 @@ static int scx_cgroup_init(void)
css->cgroup, &args);
if (ret) {
css_put(css);
+ scx_ops_error("ops.cgroup_init() failed (%d)", ret);
return ret;
}
tg->scx_flags |= SCX_TG_INITED;
@@ -4566,36 +4589,40 @@ bool task_should_scx(struct task_struct *p)
* the DISABLING state and then cycling the queued tasks through dequeue/enqueue
* to force global FIFO scheduling.
*
- * a. ops.enqueue() is ignored and tasks are queued in simple global FIFO order.
- * %SCX_OPS_ENQ_LAST is also ignored.
+ * - ops.select_cpu() is ignored and the default select_cpu() is used.
+ *
+ * - ops.enqueue() is ignored and tasks are queued in simple global FIFO order.
+ * %SCX_OPS_ENQ_LAST is also ignored.
*
- * b. ops.dispatch() is ignored.
+ * - ops.dispatch() is ignored.
*
- * c. balance_scx() does not set %SCX_RQ_BAL_KEEP on non-zero slice as slice
- * can't be trusted. Whenever a tick triggers, the running task is rotated to
- * the tail of the queue with core_sched_at touched.
+ * - balance_scx() does not set %SCX_RQ_BAL_KEEP on non-zero slice as slice
+ * can't be trusted. Whenever a tick triggers, the running task is rotated to
+ * the tail of the queue with core_sched_at touched.
*
- * d. pick_next_task() suppresses zero slice warning.
+ * - pick_next_task() suppresses zero slice warning.
*
- * e. scx_bpf_kick_cpu() is disabled to avoid irq_work malfunction during PM
- * operations.
+ * - scx_bpf_kick_cpu() is disabled to avoid irq_work malfunction during PM
+ * operations.
*
- * f. scx_prio_less() reverts to the default core_sched_at order.
+ * - scx_prio_less() reverts to the default core_sched_at order.
*/
static void scx_ops_bypass(bool bypass)
{
- int depth, cpu;
+ int cpu;
+ unsigned long flags;
+ raw_spin_lock_irqsave(&__scx_ops_bypass_lock, flags);
if (bypass) {
- depth = atomic_inc_return(&scx_ops_bypass_depth);
- WARN_ON_ONCE(depth <= 0);
- if (depth != 1)
- return;
+ scx_ops_bypass_depth++;
+ WARN_ON_ONCE(scx_ops_bypass_depth <= 0);
+ if (scx_ops_bypass_depth != 1)
+ goto unlock;
} else {
- depth = atomic_dec_return(&scx_ops_bypass_depth);
- WARN_ON_ONCE(depth < 0);
- if (depth != 0)
- return;
+ scx_ops_bypass_depth--;
+ WARN_ON_ONCE(scx_ops_bypass_depth < 0);
+ if (scx_ops_bypass_depth != 0)
+ goto unlock;
}
/*
@@ -4612,7 +4639,7 @@ static void scx_ops_bypass(bool bypass)
struct rq_flags rf;
struct task_struct *p, *n;
- rq_lock_irqsave(rq, &rf);
+ rq_lock(rq, &rf);
if (bypass) {
WARN_ON_ONCE(rq->scx.flags & SCX_RQ_BYPASSING);
@@ -4648,11 +4675,13 @@ static void scx_ops_bypass(bool bypass)
sched_enq_and_set_task(&ctx);
}
- rq_unlock_irqrestore(rq, &rf);
+ rq_unlock(rq, &rf);
- /* kick to restore ticks */
+ /* resched to restore ticks and idle state */
resched_cpu(cpu);
}
+unlock:
+ raw_spin_unlock_irqrestore(&__scx_ops_bypass_lock, flags);
}
static void free_exit_info(struct scx_exit_info *ei)
@@ -4772,15 +4801,13 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
scx_ops_init_task_enabled = false;
- spin_lock_irq(&scx_tasks_lock);
- scx_task_iter_init(&sti);
+ scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
const struct sched_class *old_class = p->sched_class;
struct sched_enq_and_set_ctx ctx;
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
- p->scx.slice = min_t(u64, p->scx.slice, SCX_SLICE_DFL);
__setscheduler_prio(p, p->prio);
check_class_changing(task_rq(p), p, old_class);
@@ -4789,8 +4816,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
check_class_changed(task_rq(p), p, old_class, p->prio);
scx_ops_exit_task(p);
}
- scx_task_iter_exit(&sti);
- spin_unlock_irq(&scx_tasks_lock);
+ scx_task_iter_stop(&sti);
percpu_up_write(&scx_fork_rwsem);
/* no task is on scx, turn off all the switches and flush in-progress calls */
@@ -5258,7 +5284,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN),
cpu_possible_mask)) {
- pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation");
+ pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n");
return -EINVAL;
}
@@ -5351,6 +5377,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
if (ret) {
ret = ops_sanitize_err("init", ret);
cpus_read_unlock();
+ scx_ops_error("ops.init() failed (%d)", ret);
goto err_disable;
}
}
@@ -5443,8 +5470,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
if (ret)
goto err_disable_unlock_all;
- spin_lock_irq(&scx_tasks_lock);
- scx_task_iter_init(&sti);
+ scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
/*
* @p may already be dead, have lost all its usages counts and
@@ -5454,27 +5480,24 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
if (!tryget_task_struct(p))
continue;
- scx_task_iter_rq_unlock(&sti);
- spin_unlock_irq(&scx_tasks_lock);
+ scx_task_iter_unlock(&sti);
ret = scx_ops_init_task(p, task_group(p), false);
if (ret) {
put_task_struct(p);
- spin_lock_irq(&scx_tasks_lock);
- scx_task_iter_exit(&sti);
- spin_unlock_irq(&scx_tasks_lock);
- pr_err("sched_ext: ops.init_task() failed (%d) for %s[%d] while loading\n",
- ret, p->comm, p->pid);
+ scx_task_iter_relock(&sti);
+ scx_task_iter_stop(&sti);
+ scx_ops_error("ops.init_task() failed (%d) for %s[%d]",
+ ret, p->comm, p->pid);
goto err_disable_unlock_all;
}
scx_set_task_state(p, SCX_TASK_READY);
put_task_struct(p);
- spin_lock_irq(&scx_tasks_lock);
+ scx_task_iter_relock(&sti);
}
- scx_task_iter_exit(&sti);
- spin_unlock_irq(&scx_tasks_lock);
+ scx_task_iter_stop(&sti);
scx_cgroup_unlock();
percpu_up_write(&scx_fork_rwsem);
@@ -5491,14 +5514,14 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
* scx_tasks_lock.
*/
percpu_down_write(&scx_fork_rwsem);
- spin_lock_irq(&scx_tasks_lock);
- scx_task_iter_init(&sti);
+ scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
const struct sched_class *old_class = p->sched_class;
struct sched_enq_and_set_ctx ctx;
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
+ p->scx.slice = SCX_SLICE_DFL;
__setscheduler_prio(p, p->prio);
check_class_changing(task_rq(p), p, old_class);
@@ -5506,20 +5529,13 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
check_class_changed(task_rq(p), p, old_class, p->prio);
}
- scx_task_iter_exit(&sti);
- spin_unlock_irq(&scx_tasks_lock);
+ scx_task_iter_stop(&sti);
percpu_up_write(&scx_fork_rwsem);
scx_ops_bypass(false);
- /*
- * Returning an error code here would lose the recorded error
- * information. Exit indicating success so that the error is notified
- * through ops.exit() with all the details.
- */
if (!scx_ops_tryset_enable_state(SCX_OPS_ENABLED, SCX_OPS_ENABLING)) {
WARN_ON_ONCE(atomic_read(&scx_exit_kind) == SCX_EXIT_NONE);
- ret = 0;
goto err_disable;
}
@@ -5554,10 +5570,18 @@ err_disable_unlock_all:
scx_ops_bypass(false);
err_disable:
mutex_unlock(&scx_ops_enable_mutex);
- /* must be fully disabled before returning */
- scx_ops_disable(SCX_EXIT_ERROR);
+ /*
+ * Returning an error code here would not pass all the error information
+ * to userspace. Record errno using scx_ops_error() for cases
+ * scx_ops_error() wasn't already invoked and exit indicating success so
+ * that the error is notified through ops.exit() with all the details.
+ *
+ * Flush scx_ops_disable_work to ensure that error is reported before
+ * init completion.
+ */
+ scx_ops_error("scx_ops_enable() failed (%d)", ret);
kthread_flush_work(&scx_ops_disable_work);
- return ret;
+ return 0;
}
@@ -6108,16 +6132,21 @@ __bpf_kfunc_start_defs();
__bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
u64 wake_flags, bool *is_idle)
{
- if (!scx_kf_allowed(SCX_KF_SELECT_CPU)) {
- *is_idle = false;
- return prev_cpu;
+ if (!static_branch_likely(&scx_builtin_idle_enabled)) {
+ scx_ops_error("built-in idle tracking is disabled");
+ goto prev_cpu;
}
+
+ if (!scx_kf_allowed(SCX_KF_SELECT_CPU))
+ goto prev_cpu;
+
#ifdef CONFIG_SMP
return scx_select_cpu_dfl(p, prev_cpu, wake_flags, is_idle);
-#else
+#endif
+
+prev_cpu:
*is_idle = false;
return prev_cpu;
-#endif
}
__bpf_kfunc_end_defs();
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b1c3588a8f00..6085ef50febf 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2292,6 +2292,7 @@ static inline int task_on_rq_migrating(struct task_struct *p)
#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
#define WF_MIGRATED 0x20 /* Internal use, task got migrated */
#define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CPU. */
+#define WF_RQ_SELECTED 0x80 /* ->select_task_rq() was called */
#ifdef CONFIG_SMP
static_assert(WF_EXEC == SD_BALANCE_EXEC);
@@ -2334,6 +2335,7 @@ extern const u32 sched_prio_to_wmult[40];
* ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
* ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
* ENQUEUE_MIGRATED - the task was migrated during wakeup
+ * ENQUEUE_RQ_SELECTED - ->select_task_rq() was called
*
*/
@@ -2360,6 +2362,7 @@ extern const u32 sched_prio_to_wmult[40];
#define ENQUEUE_INITIAL 0x80
#define ENQUEUE_MIGRATING 0x100
#define ENQUEUE_DELAYED 0x200
+#define ENQUEUE_RQ_SELECTED 0x400
#define RETRY_TASK ((void *)-1UL)
diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h
index 225f61f9bfca..248ab790d143 100644
--- a/tools/sched_ext/include/scx/common.bpf.h
+++ b/tools/sched_ext/include/scx/common.bpf.h
@@ -41,8 +41,8 @@ void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vt
u32 scx_bpf_dispatch_nr_slots(void) __ksym;
void scx_bpf_dispatch_cancel(void) __ksym;
bool scx_bpf_consume(u64 dsq_id) __ksym;
-void scx_bpf_dispatch_from_dsq_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym;
-void scx_bpf_dispatch_from_dsq_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym;
+void scx_bpf_dispatch_from_dsq_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak;
+void scx_bpf_dispatch_from_dsq_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
bool scx_bpf_dispatch_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
bool scx_bpf_dispatch_vtime_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
u32 scx_bpf_reenqueue_local(void) __ksym;
@@ -71,7 +71,7 @@ s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
bool scx_bpf_task_running(const struct task_struct *p) __ksym;
s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym;
-struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym;
+struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak;
/*
* Use the following as @it__iter when calling
@@ -320,7 +320,7 @@ u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym;
/*
* Access a cpumask in read-only mode (typically to check bits).
*/
-const struct cpumask *cast_mask(struct bpf_cpumask *mask)
+static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask)
{
return (const struct cpumask *)mask;
}
diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c
index 67e2a7968cc9..5d1f880d1149 100644
--- a/tools/sched_ext/scx_qmap.bpf.c
+++ b/tools/sched_ext/scx_qmap.bpf.c
@@ -230,8 +230,8 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
return;
}
- /* if !WAKEUP, select_cpu() wasn't called, try direct dispatch */
- if (!(enq_flags & SCX_ENQ_WAKEUP) &&
+ /* if select_cpu() wasn't called, try direct dispatch */
+ if (!(enq_flags & SCX_ENQ_CPU_SELECTED) &&
(cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) {
__sync_fetch_and_add(&nr_ddsp_from_enq, 1);
scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags);
diff --git a/tools/sched_ext/scx_show_state.py b/tools/sched_ext/scx_show_state.py
index 8bc626ede1c4..c4b3fdda9a0b 100644
--- a/tools/sched_ext/scx_show_state.py
+++ b/tools/sched_ext/scx_show_state.py
@@ -35,6 +35,6 @@ print(f'enabled : {read_static_key("__scx_ops_enabled")}')
print(f'switching_all : {read_int("scx_switching_all")}')
print(f'switched_all : {read_static_key("__scx_switched_all")}')
print(f'enable_state : {ops_state_str(enable_state)} ({enable_state})')
-print(f'bypass_depth : {read_atomic("scx_ops_bypass_depth")}')
+print(f'bypass_depth : {prog["scx_ops_bypass_depth"].value_()}')
print(f'nr_rejected : {read_atomic("scx_nr_rejected")}')
print(f'enable_seq : {read_atomic("scx_enable_seq")}')
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index b38199965f99..363d031a16f7 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -88,6 +88,7 @@ TARGETS += rlimits
TARGETS += rseq
TARGETS += rtc
TARGETS += rust
+TARGETS += sched_ext
TARGETS += seccomp
TARGETS += sgx
TARGETS += sigaltstack
@@ -129,10 +130,10 @@ ifeq ($(filter net/lib,$(TARGETS)),)
endif
endif
-# User can optionally provide a TARGETS skiplist. By default we skip
-# BPF since it has cutting edge build time dependencies which require
-# more effort to install.
-SKIP_TARGETS ?= bpf
+# User can optionally provide a TARGETS skiplist. By default we skip
+# targets using BPF since it has cutting edge build time dependencies
+# which require more effort to install.
+SKIP_TARGETS ?= bpf sched_ext
ifneq ($(SKIP_TARGETS),)
TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS))
override TARGETS := $(TMP)
diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile
index 0754a2c110a1..011762224600 100644
--- a/tools/testing/selftests/sched_ext/Makefile
+++ b/tools/testing/selftests/sched_ext/Makefile
@@ -3,23 +3,12 @@
include ../../../build/Build.include
include ../../../scripts/Makefile.arch
include ../../../scripts/Makefile.include
-include ../lib.mk
-ifneq ($(LLVM),)
-ifneq ($(filter %/,$(LLVM)),)
-LLVM_PREFIX := $(LLVM)
-else ifneq ($(filter -%,$(LLVM)),)
-LLVM_SUFFIX := $(LLVM)
-endif
-
-CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) $(CLANG_FLAGS) -fintegrated-as
-else
-CC := gcc
-endif # LLVM
+TEST_GEN_PROGS := runner
-ifneq ($(CROSS_COMPILE),)
-$(error CROSS_COMPILE not supported for scx selftests)
-endif # CROSS_COMPILE
+# override lib.mk's default rules
+OVERRIDE_TARGETS := 1
+include ../lib.mk
CURDIR := $(abspath .)
REPOROOT := $(abspath ../../../..)
@@ -34,18 +23,23 @@ GENHDR := $(GENDIR)/autoconf.h
SCXTOOLSDIR := $(TOOLSDIR)/sched_ext
SCXTOOLSINCDIR := $(TOOLSDIR)/sched_ext/include
-OUTPUT_DIR := $(CURDIR)/build
+OUTPUT_DIR := $(OUTPUT)/build
OBJ_DIR := $(OUTPUT_DIR)/obj
INCLUDE_DIR := $(OUTPUT_DIR)/include
BPFOBJ_DIR := $(OBJ_DIR)/libbpf
SCXOBJ_DIR := $(OBJ_DIR)/sched_ext
BPFOBJ := $(BPFOBJ_DIR)/libbpf.a
LIBBPF_OUTPUT := $(OBJ_DIR)/libbpf/libbpf.a
-DEFAULT_BPFTOOL := $(OUTPUT_DIR)/sbin/bpftool
-HOST_BUILD_DIR := $(OBJ_DIR)
-HOST_OUTPUT_DIR := $(OUTPUT_DIR)
-VMLINUX_BTF_PATHS ?= ../../../../vmlinux \
+DEFAULT_BPFTOOL := $(OUTPUT_DIR)/host/sbin/bpftool
+HOST_OBJ_DIR := $(OBJ_DIR)/host/bpftool
+HOST_LIBBPF_OUTPUT := $(OBJ_DIR)/host/libbpf/
+HOST_LIBBPF_DESTDIR := $(OUTPUT_DIR)/host/
+HOST_DESTDIR := $(OUTPUT_DIR)/host/
+
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
+ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
+ ../../../../vmlinux \
/sys/kernel/btf/vmlinux \
/boot/vmlinux-$(shell uname -r)
VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
@@ -80,17 +74,23 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
# Use '-idirafter': Don't interfere with include mechanics except where the
# build would have failed anyways.
define get_sys_includes
-$(shell $(1) -v -E - </dev/null 2>&1 \
+$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
-$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
endef
+ifneq ($(CROSS_COMPILE),)
+CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
+
BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \
$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) \
-I$(CURDIR)/include -I$(CURDIR)/include/bpf-compat \
-I$(INCLUDE_DIR) -I$(APIDIR) -I$(SCXTOOLSINCDIR) \
-I$(REPOROOT)/include \
- $(call get_sys_includes,$(CLANG)) \
+ $(CLANG_SYS_INCLUDES) \
-Wall -Wno-compare-distinct-pointer-types \
-Wno-incompatible-function-pointer-types \
-O2 -mcpu=v3
@@ -98,7 +98,7 @@ BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \
# sort removes libbpf duplicates when not cross-building
MAKE_DIRS := $(sort $(OBJ_DIR)/libbpf $(OBJ_DIR)/libbpf \
$(OBJ_DIR)/bpftool $(OBJ_DIR)/resolve_btfids \
- $(INCLUDE_DIR) $(SCXOBJ_DIR))
+ $(HOST_OBJ_DIR) $(INCLUDE_DIR) $(SCXOBJ_DIR))
$(MAKE_DIRS):
$(call msg,MKDIR,,$@)
@@ -108,18 +108,19 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
$(APIDIR)/linux/bpf.h \
| $(OBJ_DIR)/libbpf
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(OBJ_DIR)/libbpf/ \
+ ARCH=$(ARCH) CC="$(CC)" CROSS_COMPILE=$(CROSS_COMPILE) \
EXTRA_CFLAGS='-g -O0 -fPIC' \
DESTDIR=$(OUTPUT_DIR) prefix= all install_headers
$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
- $(LIBBPF_OUTPUT) | $(OBJ_DIR)/bpftool
+ $(LIBBPF_OUTPUT) | $(HOST_OBJ_DIR)
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) \
EXTRA_CFLAGS='-g -O0' \
- OUTPUT=$(OBJ_DIR)/bpftool/ \
- LIBBPF_OUTPUT=$(OBJ_DIR)/libbpf/ \
- LIBBPF_DESTDIR=$(OUTPUT_DIR)/ \
- prefix= DESTDIR=$(OUTPUT_DIR)/ install-bin
+ OUTPUT=$(HOST_OBJ_DIR)/ \
+ LIBBPF_OUTPUT=$(HOST_LIBBPF_OUTPUT) \
+ LIBBPF_DESTDIR=$(HOST_LIBBPF_DESTDIR) \
+ prefix= DESTDIR=$(HOST_DESTDIR) install-bin
$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
ifeq ($(VMLINUX_H),)
@@ -150,9 +151,7 @@ $(INCLUDE_DIR)/%.bpf.skel.h: $(SCXOBJ_DIR)/%.bpf.o $(INCLUDE_DIR)/vmlinux.h $(BP
override define CLEAN
rm -rf $(OUTPUT_DIR)
- rm -f *.o *.bpf.o *.bpf.skel.h *.bpf.subskel.h
rm -f $(TEST_GEN_PROGS)
- rm -f runner
endef
# Every testcase takes all of the BPF progs are dependencies by default. This
@@ -185,7 +184,7 @@ auto-test-targets := \
testcase-targets := $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-test-targets)))
-$(SCXOBJ_DIR)/runner.o: runner.c | $(SCXOBJ_DIR)
+$(SCXOBJ_DIR)/runner.o: runner.c | $(SCXOBJ_DIR) $(BPFOBJ)
$(CC) $(CFLAGS) -c $< -o $@
# Create all of the test targets object files, whose testcase objects will be
@@ -196,21 +195,15 @@ $(SCXOBJ_DIR)/runner.o: runner.c | $(SCXOBJ_DIR)
# function doesn't support using implicit rules otherwise.
$(testcase-targets): $(SCXOBJ_DIR)/%.o: %.c $(SCXOBJ_DIR)/runner.o $(all_test_bpfprogs) | $(SCXOBJ_DIR)
$(eval test=$(patsubst %.o,%.c,$(notdir $@)))
- $(CC) $(CFLAGS) -c $< -o $@ $(SCXOBJ_DIR)/runner.o
+ $(CC) $(CFLAGS) -c $< -o $@
$(SCXOBJ_DIR)/util.o: util.c | $(SCXOBJ_DIR)
$(CC) $(CFLAGS) -c $< -o $@
-runner: $(SCXOBJ_DIR)/runner.o $(SCXOBJ_DIR)/util.o $(BPFOBJ) $(testcase-targets)
+$(OUTPUT)/runner: $(SCXOBJ_DIR)/runner.o $(SCXOBJ_DIR)/util.o $(BPFOBJ) $(testcase-targets)
@echo "$(testcase-targets)"
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
-TEST_GEN_PROGS := runner
-
-all: runner
-
-.PHONY: all clean help
-
.DEFAULT_GOAL := all
.DELETE_ON_ERROR:
diff --git a/tools/testing/selftests/sched_ext/create_dsq.bpf.c b/tools/testing/selftests/sched_ext/create_dsq.bpf.c
index 23f79ed343f0..2cfc4ffd60e2 100644
--- a/tools/testing/selftests/sched_ext/create_dsq.bpf.c
+++ b/tools/testing/selftests/sched_ext/create_dsq.bpf.c
@@ -51,8 +51,8 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(create_dsq_init)
SEC(".struct_ops.link")
struct sched_ext_ops create_dsq_ops = {
- .init_task = create_dsq_init_task,
- .exit_task = create_dsq_exit_task,
- .init = create_dsq_init,
+ .init_task = (void *) create_dsq_init_task,
+ .exit_task = (void *) create_dsq_exit_task,
+ .init = (void *) create_dsq_init,
.name = "create_dsq",
};
diff --git a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c
index e97ad41d354a..37d9bf6fb745 100644
--- a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c
+++ b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c
@@ -35,8 +35,8 @@ void BPF_STRUCT_OPS(ddsp_bogus_dsq_fail_exit, struct scx_exit_info *ei)
SEC(".struct_ops.link")
struct sched_ext_ops ddsp_bogus_dsq_fail_ops = {
- .select_cpu = ddsp_bogus_dsq_fail_select_cpu,
- .exit = ddsp_bogus_dsq_fail_exit,
+ .select_cpu = (void *) ddsp_bogus_dsq_fail_select_cpu,
+ .exit = (void *) ddsp_bogus_dsq_fail_exit,
.name = "ddsp_bogus_dsq_fail",
.timeout_ms = 1000U,
};
diff --git a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c
index dde7e7dafbfb..dffc97d9cdf1 100644
--- a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c
+++ b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c
@@ -32,8 +32,8 @@ void BPF_STRUCT_OPS(ddsp_vtimelocal_fail_exit, struct scx_exit_info *ei)
SEC(".struct_ops.link")
struct sched_ext_ops ddsp_vtimelocal_fail_ops = {
- .select_cpu = ddsp_vtimelocal_fail_select_cpu,
- .exit = ddsp_vtimelocal_fail_exit,
+ .select_cpu = (void *) ddsp_vtimelocal_fail_select_cpu,
+ .exit = (void *) ddsp_vtimelocal_fail_exit,
.name = "ddsp_vtimelocal_fail",
.timeout_ms = 1000U,
};
diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
index efb4672decb4..6a7db1502c29 100644
--- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
+++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
@@ -56,10 +56,10 @@ void BPF_STRUCT_OPS(dsp_local_on_exit, struct scx_exit_info *ei)
SEC(".struct_ops.link")
struct sched_ext_ops dsp_local_on_ops = {
- .select_cpu = dsp_local_on_select_cpu,
- .enqueue = dsp_local_on_enqueue,
- .dispatch = dsp_local_on_dispatch,
- .exit = dsp_local_on_exit,
+ .select_cpu = (void *) dsp_local_on_select_cpu,
+ .enqueue = (void *) dsp_local_on_enqueue,
+ .dispatch = (void *) dsp_local_on_dispatch,
+ .exit = (void *) dsp_local_on_exit,
.name = "dsp_local_on",
.timeout_ms = 1000U,
};
diff --git a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c
index b0b99531d5d5..e1bd13e48889 100644
--- a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c
+++ b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c
@@ -12,10 +12,18 @@
char _license[] SEC("license") = "GPL";
+u32 exit_kind;
+
+void BPF_STRUCT_OPS_SLEEPABLE(enq_last_no_enq_fails_exit, struct scx_exit_info *info)
+{
+ exit_kind = info->kind;
+}
+
SEC(".struct_ops.link")
struct sched_ext_ops enq_last_no_enq_fails_ops = {
.name = "enq_last_no_enq_fails",
/* Need to define ops.enqueue() with SCX_OPS_ENQ_LAST */
.flags = SCX_OPS_ENQ_LAST,
+ .exit = (void *) enq_last_no_enq_fails_exit,
.timeout_ms = 1000U,
};
diff --git a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c
index 2a3eda5e2c0b..73e679953e27 100644
--- a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c
+++ b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c
@@ -31,8 +31,12 @@ static enum scx_test_status run(void *ctx)
struct bpf_link *link;
link = bpf_map__attach_struct_ops(skel->maps.enq_last_no_enq_fails_ops);
- if (link) {
- SCX_ERR("Incorrectly succeeded in to attaching scheduler");
+ if (!link) {
+ SCX_ERR("Incorrectly failed at attaching scheduler");
+ return SCX_TEST_FAIL;
+ }
+ if (!skel->bss->exit_kind) {
+ SCX_ERR("Incorrectly stayed loaded");
return SCX_TEST_FAIL;
}
@@ -50,7 +54,7 @@ static void cleanup(void *ctx)
struct scx_test enq_last_no_enq_fails = {
.name = "enq_last_no_enq_fails",
- .description = "Verify we fail to load a scheduler if we specify "
+ .description = "Verify we eject a scheduler if we specify "
"the SCX_OPS_ENQ_LAST flag without defining "
"ops.enqueue()",
.setup = setup,
diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c
index b3dfc1033cd6..1efb50d61040 100644
--- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c
+++ b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c
@@ -36,8 +36,8 @@ void BPF_STRUCT_OPS(enq_select_cpu_fails_enqueue, struct task_struct *p,
SEC(".struct_ops.link")
struct sched_ext_ops enq_select_cpu_fails_ops = {
- .select_cpu = enq_select_cpu_fails_select_cpu,
- .enqueue = enq_select_cpu_fails_enqueue,
+ .select_cpu = (void *) enq_select_cpu_fails_select_cpu,
+ .enqueue = (void *) enq_select_cpu_fails_enqueue,
.name = "enq_select_cpu_fails",
.timeout_ms = 1000U,
};
diff --git a/tools/testing/selftests/sched_ext/exit.bpf.c b/tools/testing/selftests/sched_ext/exit.bpf.c
index ae12ddaac921..d75d4faf07f6 100644
--- a/tools/testing/selftests/sched_ext/exit.bpf.c
+++ b/tools/testing/selftests/sched_ext/exit.bpf.c
@@ -15,6 +15,8 @@ UEI_DEFINE(uei);
#define EXIT_CLEANLY() scx_bpf_exit(exit_point, "%d", exit_point)
+#define DSQ_ID 0
+
s32 BPF_STRUCT_OPS(exit_select_cpu, struct task_struct *p,
s32 prev_cpu, u64 wake_flags)
{
@@ -31,7 +33,7 @@ void BPF_STRUCT_OPS(exit_enqueue, struct task_struct *p, u64 enq_flags)
if (exit_point == EXIT_ENQUEUE)
EXIT_CLEANLY();
- scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
+ scx_bpf_dispatch(p, DSQ_ID, SCX_SLICE_DFL, enq_flags);
}
void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p)
@@ -39,7 +41,7 @@ void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p)
if (exit_point == EXIT_DISPATCH)
EXIT_CLEANLY();
- scx_bpf_consume(SCX_DSQ_GLOBAL);
+ scx_bpf_consume(DSQ_ID);
}
void BPF_STRUCT_OPS(exit_enable, struct task_struct *p)
@@ -67,18 +69,18 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(exit_init)
if (exit_point == EXIT_INIT)
EXIT_CLEANLY();
- return 0;
+ return scx_bpf_create_dsq(DSQ_ID, -1);
}
SEC(".struct_ops.link")
struct sched_ext_ops exit_ops = {
- .select_cpu = exit_select_cpu,
- .enqueue = exit_enqueue,
- .dispatch = exit_dispatch,
- .init_task = exit_init_task,
- .enable = exit_enable,
- .exit = exit_exit,
- .init = exit_init,
+ .select_cpu = (void *) exit_select_cpu,
+ .enqueue = (void *) exit_enqueue,
+ .dispatch = (void *) exit_dispatch,
+ .init_task = (void *) exit_init_task,
+ .enable = (void *) exit_enable,
+ .exit = (void *) exit_exit,
+ .init = (void *) exit_init,
.name = "exit",
.timeout_ms = 1000U,
};
diff --git a/tools/testing/selftests/sched_ext/hotplug.bpf.c b/tools/testing/selftests/sched_ext/hotplug.bpf.c
index 8f2601db39f3..6c9f25c9bf53 100644
--- a/tools/testing/selftests/sched_ext/hotplug.bpf.c
+++ b/tools/testing/selftests/sched_ext/hotplug.bpf.c
@@ -46,16 +46,16 @@ void BPF_STRUCT_OPS_SLEEPABLE(hotplug_cpu_offline, s32 cpu)
SEC(".struct_ops.link")
struct sched_ext_ops hotplug_cb_ops = {
- .cpu_online = hotplug_cpu_online,
- .cpu_offline = hotplug_cpu_offline,
- .exit = hotplug_exit,
+ .cpu_online = (void *) hotplug_cpu_online,
+ .cpu_offline = (void *) hotplug_cpu_offline,
+ .exit = (void *) hotplug_exit,
.name = "hotplug_cbs",
.timeout_ms = 1000U,
};
SEC(".struct_ops.link")
struct sched_ext_ops hotplug_nocb_ops = {
- .exit = hotplug_exit,
+ .exit = (void *) hotplug_exit,
.name = "hotplug_nocbs",
.timeout_ms = 1000U,
};
diff --git a/tools/testing/selftests/sched_ext/init_enable_count.bpf.c b/tools/testing/selftests/sched_ext/init_enable_count.bpf.c
index 47ea89a626c3..5eb9edb1837d 100644
--- a/tools/testing/selftests/sched_ext/init_enable_count.bpf.c
+++ b/tools/testing/selftests/sched_ext/init_enable_count.bpf.c
@@ -45,9 +45,9 @@ void BPF_STRUCT_OPS(cnt_disable, struct task_struct *p)
SEC(".struct_ops.link")
struct sched_ext_ops init_enable_count_ops = {
- .init_task = cnt_init_task,
- .exit_task = cnt_exit_task,
- .enable = cnt_enable,
- .disable = cnt_disable,
+ .init_task = (void *) cnt_init_task,
+ .exit_task = (void *) cnt_exit_task,
+ .enable = (void *) cnt_enable,
+ .disable = (void *) cnt_disable,
.name = "init_enable_count",
};
diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c
index 00bfa9cb95d3..4d4cd8d966db 100644
--- a/tools/testing/selftests/sched_ext/maximal.bpf.c
+++ b/tools/testing/selftests/sched_ext/maximal.bpf.c
@@ -131,34 +131,34 @@ void BPF_STRUCT_OPS(maximal_exit, struct scx_exit_info *info)
SEC(".struct_ops.link")
struct sched_ext_ops maximal_ops = {
- .select_cpu = maximal_select_cpu,
- .enqueue = maximal_enqueue,
- .dequeue = maximal_dequeue,
- .dispatch = maximal_dispatch,
- .runnable = maximal_runnable,
- .running = maximal_running,
- .stopping = maximal_stopping,
- .quiescent = maximal_quiescent,
- .yield = maximal_yield,
- .core_sched_before = maximal_core_sched_before,
- .set_weight = maximal_set_weight,
- .set_cpumask = maximal_set_cpumask,
- .update_idle = maximal_update_idle,
- .cpu_acquire = maximal_cpu_acquire,
- .cpu_release = maximal_cpu_release,
- .cpu_online = maximal_cpu_online,
- .cpu_offline = maximal_cpu_offline,
- .init_task = maximal_init_task,
- .enable = maximal_enable,
- .exit_task = maximal_exit_task,
- .disable = maximal_disable,
- .cgroup_init = maximal_cgroup_init,
- .cgroup_exit = maximal_cgroup_exit,
- .cgroup_prep_move = maximal_cgroup_prep_move,
- .cgroup_move = maximal_cgroup_move,
- .cgroup_cancel_move = maximal_cgroup_cancel_move,
- .cgroup_set_weight = maximal_cgroup_set_weight,
- .init = maximal_init,
- .exit = maximal_exit,
+ .select_cpu = (void *) maximal_select_cpu,
+ .enqueue = (void *) maximal_enqueue,
+ .dequeue = (void *) maximal_dequeue,
+ .dispatch = (void *) maximal_dispatch,
+ .runnable = (void *) maximal_runnable,
+ .running = (void *) maximal_running,
+ .stopping = (void *) maximal_stopping,
+ .quiescent = (void *) maximal_quiescent,
+ .yield = (void *) maximal_yield,
+ .core_sched_before = (void *) maximal_core_sched_before,
+ .set_weight = (void *) maximal_set_weight,
+ .set_cpumask = (void *) maximal_set_cpumask,
+ .update_idle = (void *) maximal_update_idle,
+ .cpu_acquire = (void *) maximal_cpu_acquire,
+ .cpu_release = (void *) maximal_cpu_release,
+ .cpu_online = (void *) maximal_cpu_online,
+ .cpu_offline = (void *) maximal_cpu_offline,
+ .init_task = (void *) maximal_init_task,
+ .enable = (void *) maximal_enable,
+ .exit_task = (void *) maximal_exit_task,
+ .disable = (void *) maximal_disable,
+ .cgroup_init = (void *) maximal_cgroup_init,
+ .cgroup_exit = (void *) maximal_cgroup_exit,
+ .cgroup_prep_move = (void *) maximal_cgroup_prep_move,
+ .cgroup_move = (void *) maximal_cgroup_move,
+ .cgroup_cancel_move = (void *) maximal_cgroup_cancel_move,
+ .cgroup_set_weight = (void *) maximal_cgroup_set_weight,
+ .init = (void *) maximal_init,
+ .exit = (void *) maximal_exit,
.name = "maximal",
};
diff --git a/tools/testing/selftests/sched_ext/maybe_null.bpf.c b/tools/testing/selftests/sched_ext/maybe_null.bpf.c
index 27d0f386acfb..cf4ae870cd4e 100644
--- a/tools/testing/selftests/sched_ext/maybe_null.bpf.c
+++ b/tools/testing/selftests/sched_ext/maybe_null.bpf.c
@@ -29,8 +29,8 @@ bool BPF_STRUCT_OPS(maybe_null_success_yield, struct task_struct *from,
SEC(".struct_ops.link")
struct sched_ext_ops maybe_null_success = {
- .dispatch = maybe_null_success_dispatch,
- .yield = maybe_null_success_yield,
- .enable = maybe_null_running,
+ .dispatch = (void *) maybe_null_success_dispatch,
+ .yield = (void *) maybe_null_success_yield,
+ .enable = (void *) maybe_null_running,
.name = "minimal",
};
diff --git a/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c b/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c
index c0641050271d..ec724d7b33d1 100644
--- a/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c
+++ b/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c
@@ -19,7 +19,7 @@ void BPF_STRUCT_OPS(maybe_null_fail_dispatch, s32 cpu, struct task_struct *p)
SEC(".struct_ops.link")
struct sched_ext_ops maybe_null_fail = {
- .dispatch = maybe_null_fail_dispatch,
- .enable = maybe_null_running,
+ .dispatch = (void *) maybe_null_fail_dispatch,
+ .enable = (void *) maybe_null_running,
.name = "maybe_null_fail_dispatch",
};
diff --git a/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c b/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c
index 3c1740028e3b..e6552cace020 100644
--- a/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c
+++ b/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c
@@ -22,7 +22,7 @@ bool BPF_STRUCT_OPS(maybe_null_fail_yield, struct task_struct *from,
SEC(".struct_ops.link")
struct sched_ext_ops maybe_null_fail = {
- .yield = maybe_null_fail_yield,
- .enable = maybe_null_running,
+ .yield = (void *) maybe_null_fail_yield,
+ .enable = (void *) maybe_null_running,
.name = "maybe_null_fail_yield",
};
diff --git a/tools/testing/selftests/sched_ext/prog_run.bpf.c b/tools/testing/selftests/sched_ext/prog_run.bpf.c
index 6a4d7c48e3f2..00c267626a68 100644
--- a/tools/testing/selftests/sched_ext/prog_run.bpf.c
+++ b/tools/testing/selftests/sched_ext/prog_run.bpf.c
@@ -28,6 +28,6 @@ void BPF_STRUCT_OPS(prog_run_exit, struct scx_exit_info *ei)
SEC(".struct_ops.link")
struct sched_ext_ops prog_run_ops = {
- .exit = prog_run_exit,
+ .exit = (void *) prog_run_exit,
.name = "prog_run",
};
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c
index 2ed2991afafe..f171ac470970 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c
@@ -35,6 +35,6 @@ void BPF_STRUCT_OPS(select_cpu_dfl_enqueue, struct task_struct *p,
SEC(".struct_ops.link")
struct sched_ext_ops select_cpu_dfl_ops = {
- .enqueue = select_cpu_dfl_enqueue,
+ .enqueue = (void *) select_cpu_dfl_enqueue,
.name = "select_cpu_dfl",
};
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c
index 4bb5abb2d369..9efdbb7da928 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c
@@ -82,8 +82,8 @@ s32 BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_init_task,
SEC(".struct_ops.link")
struct sched_ext_ops select_cpu_dfl_nodispatch_ops = {
- .select_cpu = select_cpu_dfl_nodispatch_select_cpu,
- .enqueue = select_cpu_dfl_nodispatch_enqueue,
- .init_task = select_cpu_dfl_nodispatch_init_task,
+ .select_cpu = (void *) select_cpu_dfl_nodispatch_select_cpu,
+ .enqueue = (void *) select_cpu_dfl_nodispatch_enqueue,
+ .init_task = (void *) select_cpu_dfl_nodispatch_init_task,
.name = "select_cpu_dfl_nodispatch",
};
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c
index f0b96a4a04b2..59bfc4f36167 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c
@@ -35,7 +35,7 @@ dispatch:
SEC(".struct_ops.link")
struct sched_ext_ops select_cpu_dispatch_ops = {
- .select_cpu = select_cpu_dispatch_select_cpu,
+ .select_cpu = (void *) select_cpu_dispatch_select_cpu,
.name = "select_cpu_dispatch",
.timeout_ms = 1000U,
};
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c
index 7b42ddce0f56..3bbd5fcdfb18 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c
@@ -30,8 +30,8 @@ void BPF_STRUCT_OPS(select_cpu_dispatch_bad_dsq_exit, struct scx_exit_info *ei)
SEC(".struct_ops.link")
struct sched_ext_ops select_cpu_dispatch_bad_dsq_ops = {
- .select_cpu = select_cpu_dispatch_bad_dsq_select_cpu,
- .exit = select_cpu_dispatch_bad_dsq_exit,
+ .select_cpu = (void *) select_cpu_dispatch_bad_dsq_select_cpu,
+ .exit = (void *) select_cpu_dispatch_bad_dsq_exit,
.name = "select_cpu_dispatch_bad_dsq",
.timeout_ms = 1000U,
};
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c
index 653e3dc0b4dc..0fda57fe0ecf 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c
@@ -31,8 +31,8 @@ void BPF_STRUCT_OPS(select_cpu_dispatch_dbl_dsp_exit, struct scx_exit_info *ei)
SEC(".struct_ops.link")
struct sched_ext_ops select_cpu_dispatch_dbl_dsp_ops = {
- .select_cpu = select_cpu_dispatch_dbl_dsp_select_cpu,
- .exit = select_cpu_dispatch_dbl_dsp_exit,
+ .select_cpu = (void *) select_cpu_dispatch_dbl_dsp_select_cpu,
+ .exit = (void *) select_cpu_dispatch_dbl_dsp_exit,
.name = "select_cpu_dispatch_dbl_dsp",
.timeout_ms = 1000U,
};
diff --git a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c
index 7f3ebf4fc2ea..e6c67bcf5e6e 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c
@@ -81,12 +81,12 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(select_cpu_vtime_init)
SEC(".struct_ops.link")
struct sched_ext_ops select_cpu_vtime_ops = {
- .select_cpu = select_cpu_vtime_select_cpu,
- .dispatch = select_cpu_vtime_dispatch,
- .running = select_cpu_vtime_running,
- .stopping = select_cpu_vtime_stopping,
- .enable = select_cpu_vtime_enable,
- .init = select_cpu_vtime_init,
+ .select_cpu = (void *) select_cpu_vtime_select_cpu,
+ .dispatch = (void *) select_cpu_vtime_dispatch,
+ .running = (void *) select_cpu_vtime_running,
+ .stopping = (void *) select_cpu_vtime_stopping,
+ .enable = (void *) select_cpu_vtime_enable,
+ .init = (void *) select_cpu_vtime_init,
.name = "select_cpu_vtime",
.timeout_ms = 1000U,
};