diff options
30 files changed, 302 insertions, 255 deletions
diff --git a/Documentation/scheduler/sched-ext.rst b/Documentation/scheduler/sched-ext.rst index 6c0d70e2e27d..7b59bbd2e564 100644 --- a/Documentation/scheduler/sched-ext.rst +++ b/Documentation/scheduler/sched-ext.rst @@ -66,7 +66,7 @@ BPF scheduler and reverts all tasks back to CFS. .. code-block:: none # make -j16 -C tools/sched_ext - # tools/sched_ext/scx_simple + # tools/sched_ext/build/bin/scx_simple local=0 global=3 local=5 global=24 local=9 global=44 diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 43e453ab7e20..aeb595514461 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3518,14 +3518,16 @@ out: * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable. */ static inline -int select_task_rq(struct task_struct *p, int cpu, int wake_flags) +int select_task_rq(struct task_struct *p, int cpu, int *wake_flags) { lockdep_assert_held(&p->pi_lock); - if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p)) - cpu = p->sched_class->select_task_rq(p, cpu, wake_flags); - else + if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p)) { + cpu = p->sched_class->select_task_rq(p, cpu, *wake_flags); + *wake_flags |= WF_RQ_SELECTED; + } else { cpu = cpumask_any(p->cpus_ptr); + } /* * In order not to call set_task_cpu() on a blocking task we need @@ -3659,6 +3661,8 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, rq->nr_uninterruptible--; #ifdef CONFIG_SMP + if (wake_flags & WF_RQ_SELECTED) + en_flags |= ENQUEUE_RQ_SELECTED; if (wake_flags & WF_MIGRATED) en_flags |= ENQUEUE_MIGRATED; else @@ -4120,6 +4124,8 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) guard(preempt)(); int cpu, success = 0; + wake_flags |= WF_TTWU; + if (p == current) { /* * We're waking current, this means 'p->on_rq' and 'task_cpu(p) @@ -4252,7 +4258,7 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) */ smp_cond_load_acquire(&p->on_cpu, !VAL); - cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU); + cpu = select_task_rq(p, p->wake_cpu, &wake_flags); if (task_cpu(p) != cpu) { if (p->in_iowait) { delayacct_blkio_end(p); @@ -4793,6 +4799,7 @@ void wake_up_new_task(struct task_struct *p) { struct rq_flags rf; struct rq *rq; + int wake_flags = WF_FORK; raw_spin_lock_irqsave(&p->pi_lock, rf.flags); WRITE_ONCE(p->__state, TASK_RUNNING); @@ -4807,7 +4814,7 @@ void wake_up_new_task(struct task_struct *p) */ p->recent_used_cpu = task_cpu(p); rseq_migrate(p); - __set_task_cpu(p, select_task_rq(p, task_cpu(p), WF_FORK)); + __set_task_cpu(p, select_task_rq(p, task_cpu(p), &wake_flags)); #endif rq = __task_rq_lock(p, &rf); update_rq_clock(rq); @@ -4815,7 +4822,7 @@ void wake_up_new_task(struct task_struct *p) activate_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_INITIAL); trace_sched_wakeup_new(p); - wakeup_preempt(rq, p, WF_FORK); + wakeup_preempt(rq, p, wake_flags); #ifdef CONFIG_SMP if (p->sched_class->task_woken) { /* diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 012a7fc77263..c074a64c20f0 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -9,7 +9,6 @@ #define SCX_OP_IDX(op) (offsetof(struct sched_ext_ops, op) / sizeof(void (*)(void))) enum scx_consts { - SCX_SLICE_BYPASS = SCX_SLICE_DFL / 4, SCX_DSP_DFL_MAX_BATCH = 32, SCX_DSP_MAX_LOOPS = 32, SCX_WATCHDOG_MAX_TIMEOUT = 30 * HZ, @@ -19,6 +18,12 @@ enum scx_consts { SCX_EXIT_DUMP_DFL_LEN = 32768, SCX_CPUPERF_ONE = SCHED_CAPACITY_SCALE, + + /* + * Iterating all tasks may take a while. Periodically drop + * scx_tasks_lock to avoid causing e.g. CSD and RCU stalls. + */ + SCX_OPS_TASK_ITER_BATCH = 32, }; enum scx_exit_kind { @@ -630,6 +635,10 @@ struct sched_ext_ops { /** * exit - Clean up after the BPF scheduler * @info: Exit info + * + * ops.exit() is also called on ops.init() failure, which is a bit + * unusual. This is to allow rich reporting through @info on how + * ops.init() failed. */ void (*exit)(struct scx_exit_info *info); @@ -697,6 +706,7 @@ enum scx_enq_flags { /* expose select ENQUEUE_* flags as enums */ SCX_ENQ_WAKEUP = ENQUEUE_WAKEUP, SCX_ENQ_HEAD = ENQUEUE_HEAD, + SCX_ENQ_CPU_SELECTED = ENQUEUE_RQ_SELECTED, /* high 32bits are SCX specific */ @@ -857,7 +867,8 @@ static DEFINE_MUTEX(scx_ops_enable_mutex); DEFINE_STATIC_KEY_FALSE(__scx_ops_enabled); DEFINE_STATIC_PERCPU_RWSEM(scx_fork_rwsem); static atomic_t scx_ops_enable_state_var = ATOMIC_INIT(SCX_OPS_DISABLED); -static atomic_t scx_ops_bypass_depth = ATOMIC_INIT(0); +static int scx_ops_bypass_depth; +static DEFINE_RAW_SPINLOCK(__scx_ops_bypass_lock); static bool scx_ops_init_task_enabled; static bool scx_switching_all; DEFINE_STATIC_KEY_FALSE(__scx_switched_all); @@ -1279,86 +1290,104 @@ struct scx_task_iter { struct task_struct *locked; struct rq *rq; struct rq_flags rf; + u32 cnt; }; /** - * scx_task_iter_init - Initialize a task iterator + * scx_task_iter_start - Lock scx_tasks_lock and start a task iteration * @iter: iterator to init * - * Initialize @iter. Must be called with scx_tasks_lock held. Once initialized, - * @iter must eventually be exited with scx_task_iter_exit(). + * Initialize @iter and return with scx_tasks_lock held. Once initialized, @iter + * must eventually be stopped with scx_task_iter_stop(). * - * scx_tasks_lock may be released between this and the first next() call or - * between any two next() calls. If scx_tasks_lock is released between two - * next() calls, the caller is responsible for ensuring that the task being - * iterated remains accessible either through RCU read lock or obtaining a - * reference count. + * scx_tasks_lock and the rq lock may be released using scx_task_iter_unlock() + * between this and the first next() call or between any two next() calls. If + * the locks are released between two next() calls, the caller is responsible + * for ensuring that the task being iterated remains accessible either through + * RCU read lock or obtaining a reference count. * * All tasks which existed when the iteration started are guaranteed to be * visited as long as they still exist. */ -static void scx_task_iter_init(struct scx_task_iter *iter) +static void scx_task_iter_start(struct scx_task_iter *iter) { - lockdep_assert_held(&scx_tasks_lock); - BUILD_BUG_ON(__SCX_DSQ_ITER_ALL_FLAGS & ((1U << __SCX_DSQ_LNODE_PRIV_SHIFT) - 1)); + spin_lock_irq(&scx_tasks_lock); + iter->cursor = (struct sched_ext_entity){ .flags = SCX_TASK_CURSOR }; list_add(&iter->cursor.tasks_node, &scx_tasks); iter->locked = NULL; + iter->cnt = 0; +} + +static void __scx_task_iter_rq_unlock(struct scx_task_iter *iter) +{ + if (iter->locked) { + task_rq_unlock(iter->rq, iter->locked, &iter->rf); + iter->locked = NULL; + } } /** - * scx_task_iter_rq_unlock - Unlock rq locked by a task iterator - * @iter: iterator to unlock rq for + * scx_task_iter_unlock - Unlock rq and scx_tasks_lock held by a task iterator + * @iter: iterator to unlock * * If @iter is in the middle of a locked iteration, it may be locking the rq of - * the task currently being visited. Unlock the rq if so. This function can be - * safely called anytime during an iteration. + * the task currently being visited in addition to scx_tasks_lock. Unlock both. + * This function can be safely called anytime during an iteration. + */ +static void scx_task_iter_unlock(struct scx_task_iter *iter) +{ + __scx_task_iter_rq_unlock(iter); + spin_unlock_irq(&scx_tasks_lock); +} + +/** + * scx_task_iter_relock - Lock scx_tasks_lock released by scx_task_iter_unlock() + * @iter: iterator to re-lock * - * Returns %true if the rq @iter was locking is unlocked. %false if @iter was - * not locking an rq. + * Re-lock scx_tasks_lock unlocked by scx_task_iter_unlock(). Note that it + * doesn't re-lock the rq lock. Must be called before other iterator operations. */ -static bool scx_task_iter_rq_unlock(struct scx_task_iter *iter) +static void scx_task_iter_relock(struct scx_task_iter *iter) { - if (iter->locked) { - task_rq_unlock(iter->rq, iter->locked, &iter->rf); - iter->locked = NULL; - return true; - } else { - return false; - } + spin_lock_irq(&scx_tasks_lock); } /** - * scx_task_iter_exit - Exit a task iterator + * scx_task_iter_stop - Stop a task iteration and unlock scx_tasks_lock * @iter: iterator to exit * - * Exit a previously initialized @iter. Must be called with scx_tasks_lock held. - * If the iterator holds a task's rq lock, that rq lock is released. See - * scx_task_iter_init() for details. + * Exit a previously initialized @iter. Must be called with scx_tasks_lock held + * which is released on return. If the iterator holds a task's rq lock, that rq + * lock is also released. See scx_task_iter_start() for details. */ -static void scx_task_iter_exit(struct scx_task_iter *iter) +static void scx_task_iter_stop(struct scx_task_iter *iter) { - lockdep_assert_held(&scx_tasks_lock); - - scx_task_iter_rq_unlock(iter); list_del_init(&iter->cursor.tasks_node); + scx_task_iter_unlock(iter); } /** * scx_task_iter_next - Next task * @iter: iterator to walk * - * Visit the next task. See scx_task_iter_init() for details. + * Visit the next task. See scx_task_iter_start() for details. Locks are dropped + * and re-acquired every %SCX_OPS_TASK_ITER_BATCH iterations to avoid causing + * stalls by holding scx_tasks_lock for too long. */ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter) { struct list_head *cursor = &iter->cursor.tasks_node; struct sched_ext_entity *pos; - lockdep_assert_held(&scx_tasks_lock); + if (!(++iter->cnt % SCX_OPS_TASK_ITER_BATCH)) { + scx_task_iter_unlock(iter); + cond_resched(); + scx_task_iter_relock(iter); + } list_for_each_entry(pos, cursor, tasks_node) { if (&pos->tasks_node == &scx_tasks) @@ -1379,14 +1408,14 @@ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter) * @include_dead: Whether we should include dead tasks in the iteration * * Visit the non-idle task with its rq lock held. Allows callers to specify - * whether they would like to filter out dead tasks. See scx_task_iter_init() + * whether they would like to filter out dead tasks. See scx_task_iter_start() * for details. */ static struct task_struct *scx_task_iter_next_locked(struct scx_task_iter *iter) { struct task_struct *p; - scx_task_iter_rq_unlock(iter); + __scx_task_iter_rq_unlock(iter); while ((p = scx_task_iter_next(iter))) { /* @@ -1954,7 +1983,6 @@ static bool scx_rq_online(struct rq *rq) static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags, int sticky_cpu) { - bool bypassing = scx_rq_bypassing(rq); struct task_struct **ddsp_taskp; unsigned long qseq; @@ -1972,7 +2000,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags, if (!scx_rq_online(rq)) goto local; - if (bypassing) + if (scx_rq_bypassing(rq)) goto global; if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID) @@ -2027,7 +2055,7 @@ local_norefill: global: touch_core_sched(rq, p); /* see the comment in local: */ - p->scx.slice = bypassing ? SCX_SLICE_BYPASS : SCX_SLICE_DFL; + p->scx.slice = SCX_SLICE_DFL; dispatch_enqueue(find_global_dsq(p), p, enq_flags); } @@ -3030,8 +3058,8 @@ static struct task_struct *pick_task_scx(struct rq *rq) if (unlikely(!p->scx.slice)) { if (!scx_rq_bypassing(rq) && !scx_warned_zero_slice) { - printk_deferred(KERN_WARNING "sched_ext: %s[%d] has zero slice in pick_next_task_scx()\n", - p->comm, p->pid); + printk_deferred(KERN_WARNING "sched_ext: %s[%d] has zero slice in %s()\n", + p->comm, p->pid, __func__); scx_warned_zero_slice = true; } p->scx.slice = SCX_SLICE_DFL; @@ -3274,11 +3302,6 @@ static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, *found = false; - if (!static_branch_likely(&scx_builtin_idle_enabled)) { - scx_ops_error("built-in idle tracking is disabled"); - return prev_cpu; - } - /* * Determine the scheduling domain only if the task is allowed to run * on all CPUs. @@ -3435,7 +3458,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag if (unlikely(wake_flags & WF_EXEC)) return prev_cpu; - if (SCX_HAS_OP(select_cpu)) { + if (SCX_HAS_OP(select_cpu) && !scx_rq_bypassing(task_rq(p))) { s32 cpu; struct task_struct **ddsp_taskp; @@ -3500,7 +3523,7 @@ void __scx_update_idle(struct rq *rq, bool idle) { int cpu = cpu_of(rq); - if (SCX_HAS_OP(update_idle)) { + if (SCX_HAS_OP(update_idle) && !scx_rq_bypassing(rq)) { SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle); if (!static_branch_unlikely(&scx_builtin_idle_enabled)) return; @@ -4358,7 +4381,6 @@ static void scx_cgroup_exit(void) percpu_rwsem_assert_held(&scx_cgroup_rwsem); - WARN_ON_ONCE(!scx_cgroup_enabled); scx_cgroup_enabled = false; /* @@ -4427,6 +4449,7 @@ static int scx_cgroup_init(void) css->cgroup, &args); if (ret) { css_put(css); + scx_ops_error("ops.cgroup_init() failed (%d)", ret); return ret; } tg->scx_flags |= SCX_TG_INITED; @@ -4566,36 +4589,40 @@ bool task_should_scx(struct task_struct *p) * the DISABLING state and then cycling the queued tasks through dequeue/enqueue * to force global FIFO scheduling. * - * a. ops.enqueue() is ignored and tasks are queued in simple global FIFO order. - * %SCX_OPS_ENQ_LAST is also ignored. + * - ops.select_cpu() is ignored and the default select_cpu() is used. + * + * - ops.enqueue() is ignored and tasks are queued in simple global FIFO order. + * %SCX_OPS_ENQ_LAST is also ignored. * - * b. ops.dispatch() is ignored. + * - ops.dispatch() is ignored. * - * c. balance_scx() does not set %SCX_RQ_BAL_KEEP on non-zero slice as slice - * can't be trusted. Whenever a tick triggers, the running task is rotated to - * the tail of the queue with core_sched_at touched. + * - balance_scx() does not set %SCX_RQ_BAL_KEEP on non-zero slice as slice + * can't be trusted. Whenever a tick triggers, the running task is rotated to + * the tail of the queue with core_sched_at touched. * - * d. pick_next_task() suppresses zero slice warning. + * - pick_next_task() suppresses zero slice warning. * - * e. scx_bpf_kick_cpu() is disabled to avoid irq_work malfunction during PM - * operations. + * - scx_bpf_kick_cpu() is disabled to avoid irq_work malfunction during PM + * operations. * - * f. scx_prio_less() reverts to the default core_sched_at order. + * - scx_prio_less() reverts to the default core_sched_at order. */ static void scx_ops_bypass(bool bypass) { - int depth, cpu; + int cpu; + unsigned long flags; + raw_spin_lock_irqsave(&__scx_ops_bypass_lock, flags); if (bypass) { - depth = atomic_inc_return(&scx_ops_bypass_depth); - WARN_ON_ONCE(depth <= 0); - if (depth != 1) - return; + scx_ops_bypass_depth++; + WARN_ON_ONCE(scx_ops_bypass_depth <= 0); + if (scx_ops_bypass_depth != 1) + goto unlock; } else { - depth = atomic_dec_return(&scx_ops_bypass_depth); - WARN_ON_ONCE(depth < 0); - if (depth != 0) - return; + scx_ops_bypass_depth--; + WARN_ON_ONCE(scx_ops_bypass_depth < 0); + if (scx_ops_bypass_depth != 0) + goto unlock; } /* @@ -4612,7 +4639,7 @@ static void scx_ops_bypass(bool bypass) struct rq_flags rf; struct task_struct *p, *n; - rq_lock_irqsave(rq, &rf); + rq_lock(rq, &rf); if (bypass) { WARN_ON_ONCE(rq->scx.flags & SCX_RQ_BYPASSING); @@ -4648,11 +4675,13 @@ static void scx_ops_bypass(bool bypass) sched_enq_and_set_task(&ctx); } - rq_unlock_irqrestore(rq, &rf); + rq_unlock(rq, &rf); - /* kick to restore ticks */ + /* resched to restore ticks and idle state */ resched_cpu(cpu); } +unlock: + raw_spin_unlock_irqrestore(&__scx_ops_bypass_lock, flags); } static void free_exit_info(struct scx_exit_info *ei) @@ -4772,15 +4801,13 @@ static void scx_ops_disable_workfn(struct kthread_work *work) scx_ops_init_task_enabled = false; - spin_lock_irq(&scx_tasks_lock); - scx_task_iter_init(&sti); + scx_task_iter_start(&sti); while ((p = scx_task_iter_next_locked(&sti))) { const struct sched_class *old_class = p->sched_class; struct sched_enq_and_set_ctx ctx; sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx); - p->scx.slice = min_t(u64, p->scx.slice, SCX_SLICE_DFL); __setscheduler_prio(p, p->prio); check_class_changing(task_rq(p), p, old_class); @@ -4789,8 +4816,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work) check_class_changed(task_rq(p), p, old_class, p->prio); scx_ops_exit_task(p); } - scx_task_iter_exit(&sti); - spin_unlock_irq(&scx_tasks_lock); + scx_task_iter_stop(&sti); percpu_up_write(&scx_fork_rwsem); /* no task is on scx, turn off all the switches and flush in-progress calls */ @@ -5258,7 +5284,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN), cpu_possible_mask)) { - pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation"); + pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n"); return -EINVAL; } @@ -5351,6 +5377,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) if (ret) { ret = ops_sanitize_err("init", ret); cpus_read_unlock(); + scx_ops_error("ops.init() failed (%d)", ret); goto err_disable; } } @@ -5443,8 +5470,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) if (ret) goto err_disable_unlock_all; - spin_lock_irq(&scx_tasks_lock); - scx_task_iter_init(&sti); + scx_task_iter_start(&sti); while ((p = scx_task_iter_next_locked(&sti))) { /* * @p may already be dead, have lost all its usages counts and @@ -5454,27 +5480,24 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) if (!tryget_task_struct(p)) continue; - scx_task_iter_rq_unlock(&sti); - spin_unlock_irq(&scx_tasks_lock); + scx_task_iter_unlock(&sti); ret = scx_ops_init_task(p, task_group(p), false); if (ret) { put_task_struct(p); - spin_lock_irq(&scx_tasks_lock); - scx_task_iter_exit(&sti); - spin_unlock_irq(&scx_tasks_lock); - pr_err("sched_ext: ops.init_task() failed (%d) for %s[%d] while loading\n", - ret, p->comm, p->pid); + scx_task_iter_relock(&sti); + scx_task_iter_stop(&sti); + scx_ops_error("ops.init_task() failed (%d) for %s[%d]", + ret, p->comm, p->pid); goto err_disable_unlock_all; } scx_set_task_state(p, SCX_TASK_READY); put_task_struct(p); - spin_lock_irq(&scx_tasks_lock); + scx_task_iter_relock(&sti); } - scx_task_iter_exit(&sti); - spin_unlock_irq(&scx_tasks_lock); + scx_task_iter_stop(&sti); scx_cgroup_unlock(); percpu_up_write(&scx_fork_rwsem); @@ -5491,14 +5514,14 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) * scx_tasks_lock. */ percpu_down_write(&scx_fork_rwsem); - spin_lock_irq(&scx_tasks_lock); - scx_task_iter_init(&sti); + scx_task_iter_start(&sti); while ((p = scx_task_iter_next_locked(&sti))) { const struct sched_class *old_class = p->sched_class; struct sched_enq_and_set_ctx ctx; sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx); + p->scx.slice = SCX_SLICE_DFL; __setscheduler_prio(p, p->prio); check_class_changing(task_rq(p), p, old_class); @@ -5506,20 +5529,13 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) check_class_changed(task_rq(p), p, old_class, p->prio); } - scx_task_iter_exit(&sti); - spin_unlock_irq(&scx_tasks_lock); + scx_task_iter_stop(&sti); percpu_up_write(&scx_fork_rwsem); scx_ops_bypass(false); - /* - * Returning an error code here would lose the recorded error - * information. Exit indicating success so that the error is notified - * through ops.exit() with all the details. - */ if (!scx_ops_tryset_enable_state(SCX_OPS_ENABLED, SCX_OPS_ENABLING)) { WARN_ON_ONCE(atomic_read(&scx_exit_kind) == SCX_EXIT_NONE); - ret = 0; goto err_disable; } @@ -5554,10 +5570,18 @@ err_disable_unlock_all: scx_ops_bypass(false); err_disable: mutex_unlock(&scx_ops_enable_mutex); - /* must be fully disabled before returning */ - scx_ops_disable(SCX_EXIT_ERROR); + /* + * Returning an error code here would not pass all the error information + * to userspace. Record errno using scx_ops_error() for cases + * scx_ops_error() wasn't already invoked and exit indicating success so + * that the error is notified through ops.exit() with all the details. + * + * Flush scx_ops_disable_work to ensure that error is reported before + * init completion. + */ + scx_ops_error("scx_ops_enable() failed (%d)", ret); kthread_flush_work(&scx_ops_disable_work); - return ret; + return 0; } @@ -6108,16 +6132,21 @@ __bpf_kfunc_start_defs(); __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) { - if (!scx_kf_allowed(SCX_KF_SELECT_CPU)) { - *is_idle = false; - return prev_cpu; + if (!static_branch_likely(&scx_builtin_idle_enabled)) { + scx_ops_error("built-in idle tracking is disabled"); + goto prev_cpu; } + + if (!scx_kf_allowed(SCX_KF_SELECT_CPU)) + goto prev_cpu; + #ifdef CONFIG_SMP return scx_select_cpu_dfl(p, prev_cpu, wake_flags, is_idle); -#else +#endif + +prev_cpu: *is_idle = false; return prev_cpu; -#endif } __bpf_kfunc_end_defs(); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b1c3588a8f00..6085ef50febf 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2292,6 +2292,7 @@ static inline int task_on_rq_migrating(struct task_struct *p) #define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */ #define WF_MIGRATED 0x20 /* Internal use, task got migrated */ #define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CPU. */ +#define WF_RQ_SELECTED 0x80 /* ->select_task_rq() was called */ #ifdef CONFIG_SMP static_assert(WF_EXEC == SD_BALANCE_EXEC); @@ -2334,6 +2335,7 @@ extern const u32 sched_prio_to_wmult[40]; * ENQUEUE_HEAD - place at front of runqueue (tail if not specified) * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline) * ENQUEUE_MIGRATED - the task was migrated during wakeup + * ENQUEUE_RQ_SELECTED - ->select_task_rq() was called * */ @@ -2360,6 +2362,7 @@ extern const u32 sched_prio_to_wmult[40]; #define ENQUEUE_INITIAL 0x80 #define ENQUEUE_MIGRATING 0x100 #define ENQUEUE_DELAYED 0x200 +#define ENQUEUE_RQ_SELECTED 0x400 #define RETRY_TASK ((void *)-1UL) diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index 225f61f9bfca..248ab790d143 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -41,8 +41,8 @@ void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vt u32 scx_bpf_dispatch_nr_slots(void) __ksym; void scx_bpf_dispatch_cancel(void) __ksym; bool scx_bpf_consume(u64 dsq_id) __ksym; -void scx_bpf_dispatch_from_dsq_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym; -void scx_bpf_dispatch_from_dsq_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym; +void scx_bpf_dispatch_from_dsq_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak; +void scx_bpf_dispatch_from_dsq_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak; bool scx_bpf_dispatch_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; bool scx_bpf_dispatch_vtime_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; u32 scx_bpf_reenqueue_local(void) __ksym; @@ -71,7 +71,7 @@ s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; bool scx_bpf_task_running(const struct task_struct *p) __ksym; s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym; struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym; -struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym; +struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak; /* * Use the following as @it__iter when calling @@ -320,7 +320,7 @@ u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym; /* * Access a cpumask in read-only mode (typically to check bits). */ -const struct cpumask *cast_mask(struct bpf_cpumask *mask) +static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask) { return (const struct cpumask *)mask; } diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index 67e2a7968cc9..5d1f880d1149 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -230,8 +230,8 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags) return; } - /* if !WAKEUP, select_cpu() wasn't called, try direct dispatch */ - if (!(enq_flags & SCX_ENQ_WAKEUP) && + /* if select_cpu() wasn't called, try direct dispatch */ + if (!(enq_flags & SCX_ENQ_CPU_SELECTED) && (cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) { __sync_fetch_and_add(&nr_ddsp_from_enq, 1); scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags); diff --git a/tools/sched_ext/scx_show_state.py b/tools/sched_ext/scx_show_state.py index 8bc626ede1c4..c4b3fdda9a0b 100644 --- a/tools/sched_ext/scx_show_state.py +++ b/tools/sched_ext/scx_show_state.py @@ -35,6 +35,6 @@ print(f'enabled : {read_static_key("__scx_ops_enabled")}') print(f'switching_all : {read_int("scx_switching_all")}') print(f'switched_all : {read_static_key("__scx_switched_all")}') print(f'enable_state : {ops_state_str(enable_state)} ({enable_state})') -print(f'bypass_depth : {read_atomic("scx_ops_bypass_depth")}') +print(f'bypass_depth : {prog["scx_ops_bypass_depth"].value_()}') print(f'nr_rejected : {read_atomic("scx_nr_rejected")}') print(f'enable_seq : {read_atomic("scx_enable_seq")}') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index b38199965f99..363d031a16f7 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -88,6 +88,7 @@ TARGETS += rlimits TARGETS += rseq TARGETS += rtc TARGETS += rust +TARGETS += sched_ext TARGETS += seccomp TARGETS += sgx TARGETS += sigaltstack @@ -129,10 +130,10 @@ ifeq ($(filter net/lib,$(TARGETS)),) endif endif -# User can optionally provide a TARGETS skiplist. By default we skip -# BPF since it has cutting edge build time dependencies which require -# more effort to install. -SKIP_TARGETS ?= bpf +# User can optionally provide a TARGETS skiplist. By default we skip +# targets using BPF since it has cutting edge build time dependencies +# which require more effort to install. +SKIP_TARGETS ?= bpf sched_ext ifneq ($(SKIP_TARGETS),) TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS)) override TARGETS := $(TMP) diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile index 0754a2c110a1..011762224600 100644 --- a/tools/testing/selftests/sched_ext/Makefile +++ b/tools/testing/selftests/sched_ext/Makefile @@ -3,23 +3,12 @@ include ../../../build/Build.include include ../../../scripts/Makefile.arch include ../../../scripts/Makefile.include -include ../lib.mk -ifneq ($(LLVM),) -ifneq ($(filter %/,$(LLVM)),) -LLVM_PREFIX := $(LLVM) -else ifneq ($(filter -%,$(LLVM)),) -LLVM_SUFFIX := $(LLVM) -endif - -CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) $(CLANG_FLAGS) -fintegrated-as -else -CC := gcc -endif # LLVM +TEST_GEN_PROGS := runner -ifneq ($(CROSS_COMPILE),) -$(error CROSS_COMPILE not supported for scx selftests) -endif # CROSS_COMPILE +# override lib.mk's default rules +OVERRIDE_TARGETS := 1 +include ../lib.mk CURDIR := $(abspath .) REPOROOT := $(abspath ../../../..) @@ -34,18 +23,23 @@ GENHDR := $(GENDIR)/autoconf.h SCXTOOLSDIR := $(TOOLSDIR)/sched_ext SCXTOOLSINCDIR := $(TOOLSDIR)/sched_ext/include -OUTPUT_DIR := $(CURDIR)/build +OUTPUT_DIR := $(OUTPUT)/build OBJ_DIR := $(OUTPUT_DIR)/obj INCLUDE_DIR := $(OUTPUT_DIR)/include BPFOBJ_DIR := $(OBJ_DIR)/libbpf SCXOBJ_DIR := $(OBJ_DIR)/sched_ext BPFOBJ := $(BPFOBJ_DIR)/libbpf.a LIBBPF_OUTPUT := $(OBJ_DIR)/libbpf/libbpf.a -DEFAULT_BPFTOOL := $(OUTPUT_DIR)/sbin/bpftool -HOST_BUILD_DIR := $(OBJ_DIR) -HOST_OUTPUT_DIR := $(OUTPUT_DIR) -VMLINUX_BTF_PATHS ?= ../../../../vmlinux \ +DEFAULT_BPFTOOL := $(OUTPUT_DIR)/host/sbin/bpftool +HOST_OBJ_DIR := $(OBJ_DIR)/host/bpftool +HOST_LIBBPF_OUTPUT := $(OBJ_DIR)/host/libbpf/ +HOST_LIBBPF_DESTDIR := $(OUTPUT_DIR)/host/ +HOST_DESTDIR := $(OUTPUT_DIR)/host/ + +VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ + $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ + ../../../../vmlinux \ /sys/kernel/btf/vmlinux \ /boot/vmlinux-$(shell uname -r) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) @@ -80,17 +74,23 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \ # Use '-idirafter': Don't interfere with include mechanics except where the # build would have failed anyways. define get_sys_includes -$(shell $(1) -v -E - </dev/null 2>&1 \ +$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') endef +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif + +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) + BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \ $(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) \ -I$(CURDIR)/include -I$(CURDIR)/include/bpf-compat \ -I$(INCLUDE_DIR) -I$(APIDIR) -I$(SCXTOOLSINCDIR) \ -I$(REPOROOT)/include \ - $(call get_sys_includes,$(CLANG)) \ + $(CLANG_SYS_INCLUDES) \ -Wall -Wno-compare-distinct-pointer-types \ -Wno-incompatible-function-pointer-types \ -O2 -mcpu=v3 @@ -98,7 +98,7 @@ BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \ # sort removes libbpf duplicates when not cross-building MAKE_DIRS := $(sort $(OBJ_DIR)/libbpf $(OBJ_DIR)/libbpf \ $(OBJ_DIR)/bpftool $(OBJ_DIR)/resolve_btfids \ - $(INCLUDE_DIR) $(SCXOBJ_DIR)) + $(HOST_OBJ_DIR) $(INCLUDE_DIR) $(SCXOBJ_DIR)) $(MAKE_DIRS): $(call msg,MKDIR,,$@) @@ -108,18 +108,19 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(APIDIR)/linux/bpf.h \ | $(OBJ_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(OBJ_DIR)/libbpf/ \ + ARCH=$(ARCH) CC="$(CC)" CROSS_COMPILE=$(CROSS_COMPILE) \ EXTRA_CFLAGS='-g -O0 -fPIC' \ DESTDIR=$(OUTPUT_DIR) prefix= all install_headers $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ - $(LIBBPF_OUTPUT) | $(OBJ_DIR)/bpftool + $(LIBBPF_OUTPUT) | $(HOST_OBJ_DIR) $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) \ EXTRA_CFLAGS='-g -O0' \ - OUTPUT=$(OBJ_DIR)/bpftool/ \ - LIBBPF_OUTPUT=$(OBJ_DIR)/libbpf/ \ - LIBBPF_DESTDIR=$(OUTPUT_DIR)/ \ - prefix= DESTDIR=$(OUTPUT_DIR)/ install-bin + OUTPUT=$(HOST_OBJ_DIR)/ \ + LIBBPF_OUTPUT=$(HOST_LIBBPF_OUTPUT) \ + LIBBPF_DESTDIR=$(HOST_LIBBPF_DESTDIR) \ + prefix= DESTDIR=$(HOST_DESTDIR) install-bin $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR) ifeq ($(VMLINUX_H),) @@ -150,9 +151,7 @@ $(INCLUDE_DIR)/%.bpf.skel.h: $(SCXOBJ_DIR)/%.bpf.o $(INCLUDE_DIR)/vmlinux.h $(BP override define CLEAN rm -rf $(OUTPUT_DIR) - rm -f *.o *.bpf.o *.bpf.skel.h *.bpf.subskel.h rm -f $(TEST_GEN_PROGS) - rm -f runner endef # Every testcase takes all of the BPF progs are dependencies by default. This @@ -185,7 +184,7 @@ auto-test-targets := \ testcase-targets := $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-test-targets))) -$(SCXOBJ_DIR)/runner.o: runner.c | $(SCXOBJ_DIR) +$(SCXOBJ_DIR)/runner.o: runner.c | $(SCXOBJ_DIR) $(BPFOBJ) $(CC) $(CFLAGS) -c $< -o $@ # Create all of the test targets object files, whose testcase objects will be @@ -196,21 +195,15 @@ $(SCXOBJ_DIR)/runner.o: runner.c | $(SCXOBJ_DIR) # function doesn't support using implicit rules otherwise. $(testcase-targets): $(SCXOBJ_DIR)/%.o: %.c $(SCXOBJ_DIR)/runner.o $(all_test_bpfprogs) | $(SCXOBJ_DIR) $(eval test=$(patsubst %.o,%.c,$(notdir $@))) - $(CC) $(CFLAGS) -c $< -o $@ $(SCXOBJ_DIR)/runner.o + $(CC) $(CFLAGS) -c $< -o $@ $(SCXOBJ_DIR)/util.o: util.c | $(SCXOBJ_DIR) $(CC) $(CFLAGS) -c $< -o $@ -runner: $(SCXOBJ_DIR)/runner.o $(SCXOBJ_DIR)/util.o $(BPFOBJ) $(testcase-targets) +$(OUTPUT)/runner: $(SCXOBJ_DIR)/runner.o $(SCXOBJ_DIR)/util.o $(BPFOBJ) $(testcase-targets) @echo "$(testcase-targets)" $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) -TEST_GEN_PROGS := runner - -all: runner - -.PHONY: all clean help - .DEFAULT_GOAL := all .DELETE_ON_ERROR: diff --git a/tools/testing/selftests/sched_ext/create_dsq.bpf.c b/tools/testing/selftests/sched_ext/create_dsq.bpf.c index 23f79ed343f0..2cfc4ffd60e2 100644 --- a/tools/testing/selftests/sched_ext/create_dsq.bpf.c +++ b/tools/testing/selftests/sched_ext/create_dsq.bpf.c @@ -51,8 +51,8 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(create_dsq_init) SEC(".struct_ops.link") struct sched_ext_ops create_dsq_ops = { - .init_task = create_dsq_init_task, - .exit_task = create_dsq_exit_task, - .init = create_dsq_init, + .init_task = (void *) create_dsq_init_task, + .exit_task = (void *) create_dsq_exit_task, + .init = (void *) create_dsq_init, .name = "create_dsq", }; diff --git a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c index e97ad41d354a..37d9bf6fb745 100644 --- a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c +++ b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c @@ -35,8 +35,8 @@ void BPF_STRUCT_OPS(ddsp_bogus_dsq_fail_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops ddsp_bogus_dsq_fail_ops = { - .select_cpu = ddsp_bogus_dsq_fail_select_cpu, - .exit = ddsp_bogus_dsq_fail_exit, + .select_cpu = (void *) ddsp_bogus_dsq_fail_select_cpu, + .exit = (void *) ddsp_bogus_dsq_fail_exit, .name = "ddsp_bogus_dsq_fail", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c index dde7e7dafbfb..dffc97d9cdf1 100644 --- a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c +++ b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c @@ -32,8 +32,8 @@ void BPF_STRUCT_OPS(ddsp_vtimelocal_fail_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops ddsp_vtimelocal_fail_ops = { - .select_cpu = ddsp_vtimelocal_fail_select_cpu, - .exit = ddsp_vtimelocal_fail_exit, + .select_cpu = (void *) ddsp_vtimelocal_fail_select_cpu, + .exit = (void *) ddsp_vtimelocal_fail_exit, .name = "ddsp_vtimelocal_fail", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c index efb4672decb4..6a7db1502c29 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c @@ -56,10 +56,10 @@ void BPF_STRUCT_OPS(dsp_local_on_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops dsp_local_on_ops = { - .select_cpu = dsp_local_on_select_cpu, - .enqueue = dsp_local_on_enqueue, - .dispatch = dsp_local_on_dispatch, - .exit = dsp_local_on_exit, + .select_cpu = (void *) dsp_local_on_select_cpu, + .enqueue = (void *) dsp_local_on_enqueue, + .dispatch = (void *) dsp_local_on_dispatch, + .exit = (void *) dsp_local_on_exit, .name = "dsp_local_on", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c index b0b99531d5d5..e1bd13e48889 100644 --- a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c +++ b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c @@ -12,10 +12,18 @@ char _license[] SEC("license") = "GPL"; +u32 exit_kind; + +void BPF_STRUCT_OPS_SLEEPABLE(enq_last_no_enq_fails_exit, struct scx_exit_info *info) +{ + exit_kind = info->kind; +} + SEC(".struct_ops.link") struct sched_ext_ops enq_last_no_enq_fails_ops = { .name = "enq_last_no_enq_fails", /* Need to define ops.enqueue() with SCX_OPS_ENQ_LAST */ .flags = SCX_OPS_ENQ_LAST, + .exit = (void *) enq_last_no_enq_fails_exit, .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c index 2a3eda5e2c0b..73e679953e27 100644 --- a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c +++ b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c @@ -31,8 +31,12 @@ static enum scx_test_status run(void *ctx) struct bpf_link *link; link = bpf_map__attach_struct_ops(skel->maps.enq_last_no_enq_fails_ops); - if (link) { - SCX_ERR("Incorrectly succeeded in to attaching scheduler"); + if (!link) { + SCX_ERR("Incorrectly failed at attaching scheduler"); + return SCX_TEST_FAIL; + } + if (!skel->bss->exit_kind) { + SCX_ERR("Incorrectly stayed loaded"); return SCX_TEST_FAIL; } @@ -50,7 +54,7 @@ static void cleanup(void *ctx) struct scx_test enq_last_no_enq_fails = { .name = "enq_last_no_enq_fails", - .description = "Verify we fail to load a scheduler if we specify " + .description = "Verify we eject a scheduler if we specify " "the SCX_OPS_ENQ_LAST flag without defining " "ops.enqueue()", .setup = setup, diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c index b3dfc1033cd6..1efb50d61040 100644 --- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c +++ b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c @@ -36,8 +36,8 @@ void BPF_STRUCT_OPS(enq_select_cpu_fails_enqueue, struct task_struct *p, SEC(".struct_ops.link") struct sched_ext_ops enq_select_cpu_fails_ops = { - .select_cpu = enq_select_cpu_fails_select_cpu, - .enqueue = enq_select_cpu_fails_enqueue, + .select_cpu = (void *) enq_select_cpu_fails_select_cpu, + .enqueue = (void *) enq_select_cpu_fails_enqueue, .name = "enq_select_cpu_fails", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/exit.bpf.c b/tools/testing/selftests/sched_ext/exit.bpf.c index ae12ddaac921..d75d4faf07f6 100644 --- a/tools/testing/selftests/sched_ext/exit.bpf.c +++ b/tools/testing/selftests/sched_ext/exit.bpf.c @@ -15,6 +15,8 @@ UEI_DEFINE(uei); #define EXIT_CLEANLY() scx_bpf_exit(exit_point, "%d", exit_point) +#define DSQ_ID 0 + s32 BPF_STRUCT_OPS(exit_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) { @@ -31,7 +33,7 @@ void BPF_STRUCT_OPS(exit_enqueue, struct task_struct *p, u64 enq_flags) if (exit_point == EXIT_ENQUEUE) EXIT_CLEANLY(); - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dispatch(p, DSQ_ID, SCX_SLICE_DFL, enq_flags); } void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p) @@ -39,7 +41,7 @@ void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p) if (exit_point == EXIT_DISPATCH) EXIT_CLEANLY(); - scx_bpf_consume(SCX_DSQ_GLOBAL); + scx_bpf_consume(DSQ_ID); } void BPF_STRUCT_OPS(exit_enable, struct task_struct *p) @@ -67,18 +69,18 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(exit_init) if (exit_point == EXIT_INIT) EXIT_CLEANLY(); - return 0; + return scx_bpf_create_dsq(DSQ_ID, -1); } SEC(".struct_ops.link") struct sched_ext_ops exit_ops = { - .select_cpu = exit_select_cpu, - .enqueue = exit_enqueue, - .dispatch = exit_dispatch, - .init_task = exit_init_task, - .enable = exit_enable, - .exit = exit_exit, - .init = exit_init, + .select_cpu = (void *) exit_select_cpu, + .enqueue = (void *) exit_enqueue, + .dispatch = (void *) exit_dispatch, + .init_task = (void *) exit_init_task, + .enable = (void *) exit_enable, + .exit = (void *) exit_exit, + .init = (void *) exit_init, .name = "exit", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/hotplug.bpf.c b/tools/testing/selftests/sched_ext/hotplug.bpf.c index 8f2601db39f3..6c9f25c9bf53 100644 --- a/tools/testing/selftests/sched_ext/hotplug.bpf.c +++ b/tools/testing/selftests/sched_ext/hotplug.bpf.c @@ -46,16 +46,16 @@ void BPF_STRUCT_OPS_SLEEPABLE(hotplug_cpu_offline, s32 cpu) SEC(".struct_ops.link") struct sched_ext_ops hotplug_cb_ops = { - .cpu_online = hotplug_cpu_online, - .cpu_offline = hotplug_cpu_offline, - .exit = hotplug_exit, + .cpu_online = (void *) hotplug_cpu_online, + .cpu_offline = (void *) hotplug_cpu_offline, + .exit = (void *) hotplug_exit, .name = "hotplug_cbs", .timeout_ms = 1000U, }; SEC(".struct_ops.link") struct sched_ext_ops hotplug_nocb_ops = { - .exit = hotplug_exit, + .exit = (void *) hotplug_exit, .name = "hotplug_nocbs", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/init_enable_count.bpf.c b/tools/testing/selftests/sched_ext/init_enable_count.bpf.c index 47ea89a626c3..5eb9edb1837d 100644 --- a/tools/testing/selftests/sched_ext/init_enable_count.bpf.c +++ b/tools/testing/selftests/sched_ext/init_enable_count.bpf.c @@ -45,9 +45,9 @@ void BPF_STRUCT_OPS(cnt_disable, struct task_struct *p) SEC(".struct_ops.link") struct sched_ext_ops init_enable_count_ops = { - .init_task = cnt_init_task, - .exit_task = cnt_exit_task, - .enable = cnt_enable, - .disable = cnt_disable, + .init_task = (void *) cnt_init_task, + .exit_task = (void *) cnt_exit_task, + .enable = (void *) cnt_enable, + .disable = (void *) cnt_disable, .name = "init_enable_count", }; diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c index 00bfa9cb95d3..4d4cd8d966db 100644 --- a/tools/testing/selftests/sched_ext/maximal.bpf.c +++ b/tools/testing/selftests/sched_ext/maximal.bpf.c @@ -131,34 +131,34 @@ void BPF_STRUCT_OPS(maximal_exit, struct scx_exit_info *info) SEC(".struct_ops.link") struct sched_ext_ops maximal_ops = { - .select_cpu = maximal_select_cpu, - .enqueue = maximal_enqueue, - .dequeue = maximal_dequeue, - .dispatch = maximal_dispatch, - .runnable = maximal_runnable, - .running = maximal_running, - .stopping = maximal_stopping, - .quiescent = maximal_quiescent, - .yield = maximal_yield, - .core_sched_before = maximal_core_sched_before, - .set_weight = maximal_set_weight, - .set_cpumask = maximal_set_cpumask, - .update_idle = maximal_update_idle, - .cpu_acquire = maximal_cpu_acquire, - .cpu_release = maximal_cpu_release, - .cpu_online = maximal_cpu_online, - .cpu_offline = maximal_cpu_offline, - .init_task = maximal_init_task, - .enable = maximal_enable, - .exit_task = maximal_exit_task, - .disable = maximal_disable, - .cgroup_init = maximal_cgroup_init, - .cgroup_exit = maximal_cgroup_exit, - .cgroup_prep_move = maximal_cgroup_prep_move, - .cgroup_move = maximal_cgroup_move, - .cgroup_cancel_move = maximal_cgroup_cancel_move, - .cgroup_set_weight = maximal_cgroup_set_weight, - .init = maximal_init, - .exit = maximal_exit, + .select_cpu = (void *) maximal_select_cpu, + .enqueue = (void *) maximal_enqueue, + .dequeue = (void *) maximal_dequeue, + .dispatch = (void *) maximal_dispatch, + .runnable = (void *) maximal_runnable, + .running = (void *) maximal_running, + .stopping = (void *) maximal_stopping, + .quiescent = (void *) maximal_quiescent, + .yield = (void *) maximal_yield, + .core_sched_before = (void *) maximal_core_sched_before, + .set_weight = (void *) maximal_set_weight, + .set_cpumask = (void *) maximal_set_cpumask, + .update_idle = (void *) maximal_update_idle, + .cpu_acquire = (void *) maximal_cpu_acquire, + .cpu_release = (void *) maximal_cpu_release, + .cpu_online = (void *) maximal_cpu_online, + .cpu_offline = (void *) maximal_cpu_offline, + .init_task = (void *) maximal_init_task, + .enable = (void *) maximal_enable, + .exit_task = (void *) maximal_exit_task, + .disable = (void *) maximal_disable, + .cgroup_init = (void *) maximal_cgroup_init, + .cgroup_exit = (void *) maximal_cgroup_exit, + .cgroup_prep_move = (void *) maximal_cgroup_prep_move, + .cgroup_move = (void *) maximal_cgroup_move, + .cgroup_cancel_move = (void *) maximal_cgroup_cancel_move, + .cgroup_set_weight = (void *) maximal_cgroup_set_weight, + .init = (void *) maximal_init, + .exit = (void *) maximal_exit, .name = "maximal", }; diff --git a/tools/testing/selftests/sched_ext/maybe_null.bpf.c b/tools/testing/selftests/sched_ext/maybe_null.bpf.c index 27d0f386acfb..cf4ae870cd4e 100644 --- a/tools/testing/selftests/sched_ext/maybe_null.bpf.c +++ b/tools/testing/selftests/sched_ext/maybe_null.bpf.c @@ -29,8 +29,8 @@ bool BPF_STRUCT_OPS(maybe_null_success_yield, struct task_struct *from, SEC(".struct_ops.link") struct sched_ext_ops maybe_null_success = { - .dispatch = maybe_null_success_dispatch, - .yield = maybe_null_success_yield, - .enable = maybe_null_running, + .dispatch = (void *) maybe_null_success_dispatch, + .yield = (void *) maybe_null_success_yield, + .enable = (void *) maybe_null_running, .name = "minimal", }; diff --git a/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c b/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c index c0641050271d..ec724d7b33d1 100644 --- a/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c +++ b/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c @@ -19,7 +19,7 @@ void BPF_STRUCT_OPS(maybe_null_fail_dispatch, s32 cpu, struct task_struct *p) SEC(".struct_ops.link") struct sched_ext_ops maybe_null_fail = { - .dispatch = maybe_null_fail_dispatch, - .enable = maybe_null_running, + .dispatch = (void *) maybe_null_fail_dispatch, + .enable = (void *) maybe_null_running, .name = "maybe_null_fail_dispatch", }; diff --git a/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c b/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c index 3c1740028e3b..e6552cace020 100644 --- a/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c +++ b/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c @@ -22,7 +22,7 @@ bool BPF_STRUCT_OPS(maybe_null_fail_yield, struct task_struct *from, SEC(".struct_ops.link") struct sched_ext_ops maybe_null_fail = { - .yield = maybe_null_fail_yield, - .enable = maybe_null_running, + .yield = (void *) maybe_null_fail_yield, + .enable = (void *) maybe_null_running, .name = "maybe_null_fail_yield", }; diff --git a/tools/testing/selftests/sched_ext/prog_run.bpf.c b/tools/testing/selftests/sched_ext/prog_run.bpf.c index 6a4d7c48e3f2..00c267626a68 100644 --- a/tools/testing/selftests/sched_ext/prog_run.bpf.c +++ b/tools/testing/selftests/sched_ext/prog_run.bpf.c @@ -28,6 +28,6 @@ void BPF_STRUCT_OPS(prog_run_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops prog_run_ops = { - .exit = prog_run_exit, + .exit = (void *) prog_run_exit, .name = "prog_run", }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c index 2ed2991afafe..f171ac470970 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c @@ -35,6 +35,6 @@ void BPF_STRUCT_OPS(select_cpu_dfl_enqueue, struct task_struct *p, SEC(".struct_ops.link") struct sched_ext_ops select_cpu_dfl_ops = { - .enqueue = select_cpu_dfl_enqueue, + .enqueue = (void *) select_cpu_dfl_enqueue, .name = "select_cpu_dfl", }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c index 4bb5abb2d369..9efdbb7da928 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c @@ -82,8 +82,8 @@ s32 BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_init_task, SEC(".struct_ops.link") struct sched_ext_ops select_cpu_dfl_nodispatch_ops = { - .select_cpu = select_cpu_dfl_nodispatch_select_cpu, - .enqueue = select_cpu_dfl_nodispatch_enqueue, - .init_task = select_cpu_dfl_nodispatch_init_task, + .select_cpu = (void *) select_cpu_dfl_nodispatch_select_cpu, + .enqueue = (void *) select_cpu_dfl_nodispatch_enqueue, + .init_task = (void *) select_cpu_dfl_nodispatch_init_task, .name = "select_cpu_dfl_nodispatch", }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c index f0b96a4a04b2..59bfc4f36167 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c @@ -35,7 +35,7 @@ dispatch: SEC(".struct_ops.link") struct sched_ext_ops select_cpu_dispatch_ops = { - .select_cpu = select_cpu_dispatch_select_cpu, + .select_cpu = (void *) select_cpu_dispatch_select_cpu, .name = "select_cpu_dispatch", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c index 7b42ddce0f56..3bbd5fcdfb18 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c @@ -30,8 +30,8 @@ void BPF_STRUCT_OPS(select_cpu_dispatch_bad_dsq_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops select_cpu_dispatch_bad_dsq_ops = { - .select_cpu = select_cpu_dispatch_bad_dsq_select_cpu, - .exit = select_cpu_dispatch_bad_dsq_exit, + .select_cpu = (void *) select_cpu_dispatch_bad_dsq_select_cpu, + .exit = (void *) select_cpu_dispatch_bad_dsq_exit, .name = "select_cpu_dispatch_bad_dsq", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c index 653e3dc0b4dc..0fda57fe0ecf 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c @@ -31,8 +31,8 @@ void BPF_STRUCT_OPS(select_cpu_dispatch_dbl_dsp_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops select_cpu_dispatch_dbl_dsp_ops = { - .select_cpu = select_cpu_dispatch_dbl_dsp_select_cpu, - .exit = select_cpu_dispatch_dbl_dsp_exit, + .select_cpu = (void *) select_cpu_dispatch_dbl_dsp_select_cpu, + .exit = (void *) select_cpu_dispatch_dbl_dsp_exit, .name = "select_cpu_dispatch_dbl_dsp", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c index 7f3ebf4fc2ea..e6c67bcf5e6e 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c @@ -81,12 +81,12 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(select_cpu_vtime_init) SEC(".struct_ops.link") struct sched_ext_ops select_cpu_vtime_ops = { - .select_cpu = select_cpu_vtime_select_cpu, - .dispatch = select_cpu_vtime_dispatch, - .running = select_cpu_vtime_running, - .stopping = select_cpu_vtime_stopping, - .enable = select_cpu_vtime_enable, - .init = select_cpu_vtime_init, + .select_cpu = (void *) select_cpu_vtime_select_cpu, + .dispatch = (void *) select_cpu_vtime_dispatch, + .running = (void *) select_cpu_vtime_running, + .stopping = (void *) select_cpu_vtime_stopping, + .enable = (void *) select_cpu_vtime_enable, + .init = (void *) select_cpu_vtime_init, .name = "select_cpu_vtime", .timeout_ms = 1000U, }; |