Files
linux/tools/testing/selftests/bpf/progs/ringbuf_bench.c
Xu Kuohai f9db3a3822 selftests/bpf/benchs: Add overwrite mode benchmark for BPF ring buffer
Add --rb-overwrite option to benchmark BPF ring buffer in overwrite mode.
Since overwrite mode is not yet supported by libbpf for consumer, also add
--rb-bench-producer option to benchmark producer directly without a consumer.

Benchmarks on an x86_64 and an arm64 CPU are shown below for reference.

- AMD EPYC 9654 (x86_64)

Ringbuf, multi-producer contention in overwrite mode, no consumer
=================================================================
rb-prod nr_prod 1    32.180 ± 0.033M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 2    9.617 ± 0.003M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 3    8.810 ± 0.002M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 4    9.272 ± 0.001M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 8    9.173 ± 0.001M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 12   3.086 ± 0.032M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 16   2.945 ± 0.021M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 20   2.519 ± 0.021M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 24   2.545 ± 0.021M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 28   2.363 ± 0.024M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 32   2.357 ± 0.021M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 36   2.267 ± 0.011M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 40   2.284 ± 0.020M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 44   2.215 ± 0.025M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 48   2.193 ± 0.023M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 52   2.208 ± 0.024M/s (drops 0.000 ± 0.000M/s)

- HiSilicon Kunpeng 920 (arm64)

Ringbuf, multi-producer contention in overwrite mode, no consumer
=================================================================
rb-prod nr_prod 1    14.478 ± 0.006M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 2    21.787 ± 0.010M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 3    6.045 ± 0.001M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 4    5.352 ± 0.003M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 8    4.850 ± 0.002M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 12   3.542 ± 0.016M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 16   3.509 ± 0.021M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 20   3.171 ± 0.010M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 24   3.154 ± 0.014M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 28   2.974 ± 0.015M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 32   3.167 ± 0.014M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 36   2.903 ± 0.010M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 40   2.866 ± 0.010M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 44   2.914 ± 0.010M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 48   2.806 ± 0.012M/s (drops 0.000 ± 0.000M/s)
Rb-prod nr_prod 52   2.840 ± 0.012M/s (drops 0.000 ± 0.000M/s)

Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20251018035738.4039621-4-xukuohai@huaweicloud.com
2025-10-27 19:47:32 -07:00

73 lines
1.6 KiB
C

// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Facebook
#include <stdbool.h>
#include <linux/bpf.h>
#include <stdint.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
} ringbuf SEC(".maps");
const volatile int batch_cnt = 0;
const volatile long use_output = 0;
const volatile bool bench_producer = false;
long sample_val = 42;
long dropped __attribute__((aligned(128))) = 0;
long hits __attribute__((aligned(128))) = 0;
const volatile long wakeup_data_size = 0;
static __always_inline long get_flags()
{
long sz;
if (bench_producer)
return BPF_RB_NO_WAKEUP;
if (!wakeup_data_size)
return 0;
sz = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA);
return sz >= wakeup_data_size ? BPF_RB_FORCE_WAKEUP : BPF_RB_NO_WAKEUP;
}
SEC("fentry/" SYS_PREFIX "sys_getpgid")
int bench_ringbuf(void *ctx)
{
long *sample, flags;
int i;
if (!use_output) {
for (i = 0; i < batch_cnt; i++) {
sample = bpf_ringbuf_reserve(&ringbuf,
sizeof(sample_val), 0);
if (!sample) {
__sync_add_and_fetch(&dropped, 1);
} else {
*sample = sample_val;
flags = get_flags();
bpf_ringbuf_submit(sample, flags);
if (bench_producer)
__sync_add_and_fetch(&hits, 1);
}
}
} else {
for (i = 0; i < batch_cnt; i++) {
flags = get_flags();
if (bpf_ringbuf_output(&ringbuf, &sample_val,
sizeof(sample_val), flags))
__sync_add_and_fetch(&dropped, 1);
else if (bench_producer)
__sync_add_and_fetch(&hits, 1);
}
}
return 0;
}