mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
After commit860683763e("sched_ext: Add enq_flags to scx_bpf_dsq_move_to_local()") some of the kselftests are failing to build: exit.bpf.c:44:34: error: too few arguments provided to function-like macro invocation 44 | scx_bpf_dsq_move_to_local(DSQ_ID); Update the kselftests adding the new argument to scx_bpf_dsq_move_to_local(). Fixes:860683763e("sched_ext: Add enq_flags to scx_bpf_dsq_move_to_local()") Signed-off-by: Andrea Righi <arighi@nvidia.com> Signed-off-by: Tejun Heo <tj@kernel.org>
390 lines
11 KiB
C
390 lines
11 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* A scheduler that validates ops.dequeue() is called correctly:
|
|
* - Tasks dispatched to terminal DSQs (local, global) bypass the BPF
|
|
* scheduler entirely: no ops.dequeue() should be called
|
|
* - Tasks dispatched to user DSQs from ops.enqueue() enter BPF custody:
|
|
* ops.dequeue() must be called when they leave custody
|
|
* - Every ops.enqueue() dispatch to non-terminal DSQs is followed by
|
|
* exactly one ops.dequeue() (validate 1:1 pairing and state machine)
|
|
*
|
|
* Copyright (c) 2026 NVIDIA Corporation.
|
|
*/
|
|
|
|
#include <scx/common.bpf.h>
|
|
|
|
#define SHARED_DSQ 0
|
|
|
|
/*
|
|
* BPF internal queue.
|
|
*
|
|
* Tasks are stored here and consumed from ops.dispatch(), validating that
|
|
* tasks on BPF internal structures still get ops.dequeue() when they
|
|
* leave.
|
|
*/
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_QUEUE);
|
|
__uint(max_entries, 32768);
|
|
__type(value, s32);
|
|
} global_queue SEC(".maps");
|
|
|
|
char _license[] SEC("license") = "GPL";
|
|
|
|
UEI_DEFINE(uei);
|
|
|
|
/*
|
|
* Counters to track the lifecycle of tasks:
|
|
* - enqueue_cnt: Number of times ops.enqueue() was called
|
|
* - dequeue_cnt: Number of times ops.dequeue() was called (any type)
|
|
* - dispatch_dequeue_cnt: Number of regular dispatch dequeues (no flag)
|
|
* - change_dequeue_cnt: Number of property change dequeues
|
|
* - bpf_queue_full: Number of times the BPF internal queue was full
|
|
*/
|
|
u64 enqueue_cnt, dequeue_cnt, dispatch_dequeue_cnt, change_dequeue_cnt, bpf_queue_full;
|
|
|
|
/*
|
|
* Test scenarios:
|
|
* 0) Dispatch to local DSQ from ops.select_cpu() (terminal DSQ, bypasses BPF
|
|
* scheduler, no dequeue callbacks)
|
|
* 1) Dispatch to global DSQ from ops.select_cpu() (terminal DSQ, bypasses BPF
|
|
* scheduler, no dequeue callbacks)
|
|
* 2) Dispatch to shared user DSQ from ops.select_cpu() (enters BPF scheduler,
|
|
* dequeue callbacks expected)
|
|
* 3) Dispatch to local DSQ from ops.enqueue() (terminal DSQ, bypasses BPF
|
|
* scheduler, no dequeue callbacks)
|
|
* 4) Dispatch to global DSQ from ops.enqueue() (terminal DSQ, bypasses BPF
|
|
* scheduler, no dequeue callbacks)
|
|
* 5) Dispatch to shared user DSQ from ops.enqueue() (enters BPF scheduler,
|
|
* dequeue callbacks expected)
|
|
* 6) BPF internal queue from ops.enqueue(): store task PIDs in ops.enqueue(),
|
|
* consume in ops.dispatch() and dispatch to local DSQ (validates dequeue
|
|
* for tasks stored in internal BPF data structures)
|
|
*/
|
|
u32 test_scenario;
|
|
|
|
/*
|
|
* Per-task state to track lifecycle and validate workflow semantics.
|
|
* State transitions:
|
|
* NONE -> ENQUEUED (on enqueue)
|
|
* NONE -> DISPATCHED (on direct dispatch to terminal DSQ)
|
|
* ENQUEUED -> DISPATCHED (on dispatch dequeue)
|
|
* DISPATCHED -> NONE (on property change dequeue or re-enqueue)
|
|
* ENQUEUED -> NONE (on property change dequeue before dispatch)
|
|
*/
|
|
enum task_state {
|
|
TASK_NONE = 0,
|
|
TASK_ENQUEUED,
|
|
TASK_DISPATCHED,
|
|
};
|
|
|
|
struct task_ctx {
|
|
enum task_state state; /* Current state in the workflow */
|
|
u64 enqueue_seq; /* Sequence number for debugging */
|
|
};
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
|
|
__uint(map_flags, BPF_F_NO_PREALLOC);
|
|
__type(key, int);
|
|
__type(value, struct task_ctx);
|
|
} task_ctx_stor SEC(".maps");
|
|
|
|
static struct task_ctx *try_lookup_task_ctx(struct task_struct *p)
|
|
{
|
|
return bpf_task_storage_get(&task_ctx_stor, p, 0, 0);
|
|
}
|
|
|
|
s32 BPF_STRUCT_OPS(dequeue_select_cpu, struct task_struct *p,
|
|
s32 prev_cpu, u64 wake_flags)
|
|
{
|
|
struct task_ctx *tctx;
|
|
|
|
tctx = try_lookup_task_ctx(p);
|
|
if (!tctx)
|
|
return prev_cpu;
|
|
|
|
switch (test_scenario) {
|
|
case 0:
|
|
/*
|
|
* Direct dispatch to the local DSQ.
|
|
*
|
|
* Task bypasses BPF scheduler entirely: no enqueue
|
|
* tracking, no ops.dequeue() callbacks.
|
|
*/
|
|
scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0);
|
|
tctx->state = TASK_DISPATCHED;
|
|
break;
|
|
case 1:
|
|
/*
|
|
* Direct dispatch to the global DSQ.
|
|
*
|
|
* Task bypasses BPF scheduler entirely: no enqueue
|
|
* tracking, no ops.dequeue() callbacks.
|
|
*/
|
|
scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
|
|
tctx->state = TASK_DISPATCHED;
|
|
break;
|
|
case 2:
|
|
/*
|
|
* Dispatch to a shared user DSQ.
|
|
*
|
|
* Task enters BPF scheduler management: track
|
|
* enqueue/dequeue lifecycle and validate state
|
|
* transitions.
|
|
*/
|
|
if (tctx->state == TASK_ENQUEUED)
|
|
scx_bpf_error("%d (%s): enqueue while in ENQUEUED state seq=%llu",
|
|
p->pid, p->comm, tctx->enqueue_seq);
|
|
|
|
scx_bpf_dsq_insert(p, SHARED_DSQ, SCX_SLICE_DFL, 0);
|
|
|
|
__sync_fetch_and_add(&enqueue_cnt, 1);
|
|
|
|
tctx->state = TASK_ENQUEUED;
|
|
tctx->enqueue_seq++;
|
|
break;
|
|
}
|
|
|
|
return prev_cpu;
|
|
}
|
|
|
|
void BPF_STRUCT_OPS(dequeue_enqueue, struct task_struct *p, u64 enq_flags)
|
|
{
|
|
struct task_ctx *tctx;
|
|
s32 pid = p->pid;
|
|
|
|
tctx = try_lookup_task_ctx(p);
|
|
if (!tctx)
|
|
return;
|
|
|
|
switch (test_scenario) {
|
|
case 3:
|
|
/*
|
|
* Direct dispatch to the local DSQ.
|
|
*
|
|
* Task bypasses BPF scheduler entirely: no enqueue
|
|
* tracking, no ops.dequeue() callbacks.
|
|
*/
|
|
scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, enq_flags);
|
|
tctx->state = TASK_DISPATCHED;
|
|
break;
|
|
case 4:
|
|
/*
|
|
* Direct dispatch to the global DSQ.
|
|
*
|
|
* Task bypasses BPF scheduler entirely: no enqueue
|
|
* tracking, no ops.dequeue() callbacks.
|
|
*/
|
|
scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
|
|
tctx->state = TASK_DISPATCHED;
|
|
break;
|
|
case 5:
|
|
/*
|
|
* Dispatch to shared user DSQ.
|
|
*
|
|
* Task enters BPF scheduler management: track
|
|
* enqueue/dequeue lifecycle and validate state
|
|
* transitions.
|
|
*/
|
|
if (tctx->state == TASK_ENQUEUED)
|
|
scx_bpf_error("%d (%s): enqueue while in ENQUEUED state seq=%llu",
|
|
p->pid, p->comm, tctx->enqueue_seq);
|
|
|
|
scx_bpf_dsq_insert(p, SHARED_DSQ, SCX_SLICE_DFL, enq_flags);
|
|
|
|
__sync_fetch_and_add(&enqueue_cnt, 1);
|
|
|
|
tctx->state = TASK_ENQUEUED;
|
|
tctx->enqueue_seq++;
|
|
break;
|
|
case 6:
|
|
/*
|
|
* Store task in BPF internal queue.
|
|
*
|
|
* Task enters BPF scheduler management: track
|
|
* enqueue/dequeue lifecycle and validate state
|
|
* transitions.
|
|
*/
|
|
if (tctx->state == TASK_ENQUEUED)
|
|
scx_bpf_error("%d (%s): enqueue while in ENQUEUED state seq=%llu",
|
|
p->pid, p->comm, tctx->enqueue_seq);
|
|
|
|
if (bpf_map_push_elem(&global_queue, &pid, 0)) {
|
|
scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
|
|
__sync_fetch_and_add(&bpf_queue_full, 1);
|
|
|
|
tctx->state = TASK_DISPATCHED;
|
|
} else {
|
|
__sync_fetch_and_add(&enqueue_cnt, 1);
|
|
|
|
tctx->state = TASK_ENQUEUED;
|
|
tctx->enqueue_seq++;
|
|
}
|
|
break;
|
|
default:
|
|
/* For all other scenarios, dispatch to the global DSQ */
|
|
scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
|
|
tctx->state = TASK_DISPATCHED;
|
|
break;
|
|
}
|
|
|
|
scx_bpf_kick_cpu(scx_bpf_task_cpu(p), SCX_KICK_IDLE);
|
|
}
|
|
|
|
void BPF_STRUCT_OPS(dequeue_dequeue, struct task_struct *p, u64 deq_flags)
|
|
{
|
|
struct task_ctx *tctx;
|
|
|
|
__sync_fetch_and_add(&dequeue_cnt, 1);
|
|
|
|
tctx = try_lookup_task_ctx(p);
|
|
if (!tctx)
|
|
return;
|
|
|
|
/*
|
|
* For scenarios 0, 1, 3, and 4 (terminal DSQs: local and global),
|
|
* ops.dequeue() should never be called because tasks bypass the
|
|
* BPF scheduler entirely. If we get here, it's a kernel bug.
|
|
*/
|
|
if (test_scenario == 0 || test_scenario == 3) {
|
|
scx_bpf_error("%d (%s): dequeue called for local DSQ scenario",
|
|
p->pid, p->comm);
|
|
return;
|
|
}
|
|
|
|
if (test_scenario == 1 || test_scenario == 4) {
|
|
scx_bpf_error("%d (%s): dequeue called for global DSQ scenario",
|
|
p->pid, p->comm);
|
|
return;
|
|
}
|
|
|
|
if (deq_flags & SCX_DEQ_SCHED_CHANGE) {
|
|
/*
|
|
* Property change interrupting the workflow. Valid from
|
|
* both ENQUEUED and DISPATCHED states. Transitions task
|
|
* back to NONE state.
|
|
*/
|
|
__sync_fetch_and_add(&change_dequeue_cnt, 1);
|
|
|
|
/* Validate state transition */
|
|
if (tctx->state != TASK_ENQUEUED && tctx->state != TASK_DISPATCHED)
|
|
scx_bpf_error("%d (%s): invalid property change dequeue state=%d seq=%llu",
|
|
p->pid, p->comm, tctx->state, tctx->enqueue_seq);
|
|
|
|
/*
|
|
* Transition back to NONE: task outside scheduler control.
|
|
*
|
|
* Scenario 6: dispatch() checks tctx->state after popping a
|
|
* PID, if the task is in state NONE, it was dequeued by
|
|
* property change and must not be dispatched (this
|
|
* prevents "target CPU not allowed").
|
|
*/
|
|
tctx->state = TASK_NONE;
|
|
} else {
|
|
/*
|
|
* Regular dispatch dequeue: kernel is moving the task from
|
|
* BPF custody to a terminal DSQ. Normally we come from
|
|
* ENQUEUED state. We can also see TASK_NONE if the task
|
|
* was dequeued by property change (SCX_DEQ_SCHED_CHANGE)
|
|
* while it was already on a DSQ (dispatched but not yet
|
|
* consumed); in that case we just leave state as NONE.
|
|
*/
|
|
__sync_fetch_and_add(&dispatch_dequeue_cnt, 1);
|
|
|
|
/*
|
|
* Must be ENQUEUED (normal path) or NONE (already dequeued
|
|
* by property change while on a DSQ).
|
|
*/
|
|
if (tctx->state != TASK_ENQUEUED && tctx->state != TASK_NONE)
|
|
scx_bpf_error("%d (%s): dispatch dequeue from state %d seq=%llu",
|
|
p->pid, p->comm, tctx->state, tctx->enqueue_seq);
|
|
|
|
if (tctx->state == TASK_ENQUEUED)
|
|
tctx->state = TASK_DISPATCHED;
|
|
|
|
/* NONE: leave as-is, task was already property-change dequeued */
|
|
}
|
|
}
|
|
|
|
void BPF_STRUCT_OPS(dequeue_dispatch, s32 cpu, struct task_struct *prev)
|
|
{
|
|
if (test_scenario == 6) {
|
|
struct task_ctx *tctx;
|
|
struct task_struct *p;
|
|
s32 pid;
|
|
|
|
if (bpf_map_pop_elem(&global_queue, &pid))
|
|
return;
|
|
|
|
p = bpf_task_from_pid(pid);
|
|
if (!p)
|
|
return;
|
|
|
|
/*
|
|
* If the task was dequeued by property change
|
|
* (ops.dequeue() set tctx->state = TASK_NONE), skip
|
|
* dispatch.
|
|
*/
|
|
tctx = try_lookup_task_ctx(p);
|
|
if (!tctx || tctx->state == TASK_NONE) {
|
|
bpf_task_release(p);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Dispatch to this CPU's local DSQ if allowed, otherwise
|
|
* fallback to the global DSQ.
|
|
*/
|
|
if (bpf_cpumask_test_cpu(cpu, p->cpus_ptr))
|
|
scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cpu, SCX_SLICE_DFL, 0);
|
|
else
|
|
scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
|
|
|
|
bpf_task_release(p);
|
|
} else {
|
|
scx_bpf_dsq_move_to_local(SHARED_DSQ, 0);
|
|
}
|
|
}
|
|
|
|
s32 BPF_STRUCT_OPS(dequeue_init_task, struct task_struct *p,
|
|
struct scx_init_task_args *args)
|
|
{
|
|
struct task_ctx *tctx;
|
|
|
|
tctx = bpf_task_storage_get(&task_ctx_stor, p, 0,
|
|
BPF_LOCAL_STORAGE_GET_F_CREATE);
|
|
if (!tctx)
|
|
return -ENOMEM;
|
|
|
|
return 0;
|
|
}
|
|
|
|
s32 BPF_STRUCT_OPS_SLEEPABLE(dequeue_init)
|
|
{
|
|
s32 ret;
|
|
|
|
ret = scx_bpf_create_dsq(SHARED_DSQ, -1);
|
|
if (ret)
|
|
return ret;
|
|
|
|
return 0;
|
|
}
|
|
|
|
void BPF_STRUCT_OPS(dequeue_exit, struct scx_exit_info *ei)
|
|
{
|
|
UEI_RECORD(uei, ei);
|
|
}
|
|
|
|
SEC(".struct_ops.link")
|
|
struct sched_ext_ops dequeue_ops = {
|
|
.select_cpu = (void *)dequeue_select_cpu,
|
|
.enqueue = (void *)dequeue_enqueue,
|
|
.dequeue = (void *)dequeue_dequeue,
|
|
.dispatch = (void *)dequeue_dispatch,
|
|
.init_task = (void *)dequeue_init_task,
|
|
.init = (void *)dequeue_init,
|
|
.exit = (void *)dequeue_exit,
|
|
.flags = SCX_OPS_ENQ_LAST,
|
|
.name = "dequeue_test",
|
|
};
|