Files
linux/tools/sched_ext/scx_central.c
Zhao Mengmeng d6edb15ad9 scx_central: Defer timer start to central dispatch to fix init error
scx_central currently assumes that ops.init() runs on the selected
central CPU and aborts otherwise. This is no longer true, as ops.init()
is invoked from the scx_enable_helper thread, which can run on any
CPU.

As a result, sched_setaffinity() from userspace doesn't work, causing
scx_central to fail when loading with:

[ 1985.319942] sched_ext: central: scx_central.bpf.c:314: init from non-central CPU
[ 1985.320317]    scx_exit+0xa3/0xd0
[ 1985.320535]    scx_bpf_error_bstr+0xbd/0x220
[ 1985.320840]    bpf_prog_3a445a8163fa8149_central_init+0x103/0x1ba
[ 1985.321073]    bpf__sched_ext_ops_init+0x40/0xa8
[ 1985.321286]    scx_root_enable_workfn+0x507/0x1650
[ 1985.321461]    kthread_worker_fn+0x260/0x940
[ 1985.321745]    kthread+0x303/0x3e0
[ 1985.321901]    ret_from_fork+0x589/0x7d0
[ 1985.322065]    ret_from_fork_asm+0x1a/0x30

DEBUG DUMP
===================================================================

central: root
scx_enable_help[134] triggered exit kind 1025:
  scx_bpf_error (scx_central.bpf.c:314: init from non-central CPU)

Fix this by:
- Defer bpf_timer_start() to the first dispatch on the central CPU.
- Initialize the BPF timer in central_init() and kick the central CPU
to guarantee entering the dispatch path on the central CPU immediately.
- Remove the unnecessary sched_setaffinity() call in userspace.

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
Signed-off-by: Tejun Heo <tj@kernel.org>
2026-03-27 07:33:00 -10:00

127 lines
3.4 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
* Copyright (c) 2022 Tejun Heo <tj@kernel.org>
* Copyright (c) 2022 David Vernet <dvernet@meta.com>
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <inttypes.h>
#include <signal.h>
#include <assert.h>
#include <libgen.h>
#include <bpf/bpf.h>
#include <scx/common.h>
#include "scx_central.bpf.skel.h"
const char help_fmt[] =
"A central FIFO sched_ext scheduler.\n"
"\n"
"See the top-level comment in .bpf.c for more details.\n"
"\n"
"Usage: %s [-s SLICE_US] [-c CPU] [-v]\n"
"\n"
" -s SLICE_US Override slice duration\n"
" -c CPU Override the central CPU (default: 0)\n"
" -v Print libbpf debug messages\n"
" -h Display this help and exit\n";
static bool verbose;
static volatile int exit_req;
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
{
if (level == LIBBPF_DEBUG && !verbose)
return 0;
return vfprintf(stderr, format, args);
}
static void sigint_handler(int dummy)
{
exit_req = 1;
}
int main(int argc, char **argv)
{
struct scx_central *skel;
struct bpf_link *link;
__u64 seq = 0, ecode;
__s32 opt;
libbpf_set_print(libbpf_print_fn);
signal(SIGINT, sigint_handler);
signal(SIGTERM, sigint_handler);
restart:
optind = 1;
skel = SCX_OPS_OPEN(central_ops, scx_central);
skel->rodata->central_cpu = 0;
skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
assert(skel->rodata->nr_cpu_ids > 0);
assert(skel->rodata->nr_cpu_ids <= INT32_MAX);
while ((opt = getopt(argc, argv, "s:c:vh")) != -1) {
switch (opt) {
case 's':
skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
break;
case 'c': {
u32 central_cpu = strtoul(optarg, NULL, 0);
if (central_cpu >= skel->rodata->nr_cpu_ids) {
fprintf(stderr, "invalid central CPU id value, %u given (%u max)\n", central_cpu, skel->rodata->nr_cpu_ids);
scx_central__destroy(skel);
return -1;
}
skel->rodata->central_cpu = (s32)central_cpu;
break;
}
case 'v':
verbose = true;
break;
default:
fprintf(stderr, help_fmt, basename(argv[0]));
return opt != 'h';
}
}
/* Resize arrays so their element count is equal to cpu count. */
RESIZE_ARRAY(skel, data, cpu_gimme_task, skel->rodata->nr_cpu_ids);
RESIZE_ARRAY(skel, data, cpu_started_at, skel->rodata->nr_cpu_ids);
SCX_OPS_LOAD(skel, central_ops, scx_central, uei);
link = SCX_OPS_ATTACH(skel, central_ops, scx_central);
if (!skel->data->timer_pinned)
printf("WARNING : BPF_F_TIMER_CPU_PIN not available, timer not pinned to central\n");
while (!exit_req && !UEI_EXITED(skel, uei)) {
printf("[SEQ %llu]\n", seq++);
printf("total :%10" PRIu64 " local:%10" PRIu64 " queued:%10" PRIu64 " lost:%10" PRIu64 "\n",
skel->bss->nr_total,
skel->bss->nr_locals,
skel->bss->nr_queued,
skel->bss->nr_lost_pids);
printf("timer :%10" PRIu64 " dispatch:%10" PRIu64 " mismatch:%10" PRIu64 " retry:%10" PRIu64 "\n",
skel->bss->nr_timers,
skel->bss->nr_dispatches,
skel->bss->nr_mismatches,
skel->bss->nr_retries);
printf("overflow:%10" PRIu64 "\n",
skel->bss->nr_overflows);
fflush(stdout);
sleep(1);
}
bpf_link__destroy(link);
ecode = UEI_REPORT(skel, uei);
scx_central__destroy(skel);
if (UEI_ECODE_RESTART(ecode))
goto restart;
return 0;
}