mirror of
https://github.com/torvalds/linux.git
synced 2026-04-23 00:55:48 -04:00
Add --rb-overwrite option to benchmark BPF ring buffer in overwrite mode. Since overwrite mode is not yet supported by libbpf for consumer, also add --rb-bench-producer option to benchmark producer directly without a consumer. Benchmarks on an x86_64 and an arm64 CPU are shown below for reference. - AMD EPYC 9654 (x86_64) Ringbuf, multi-producer contention in overwrite mode, no consumer ================================================================= rb-prod nr_prod 1 32.180 ± 0.033M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 2 9.617 ± 0.003M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 3 8.810 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 4 9.272 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 8 9.173 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 12 3.086 ± 0.032M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 16 2.945 ± 0.021M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 20 2.519 ± 0.021M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 24 2.545 ± 0.021M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 28 2.363 ± 0.024M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 32 2.357 ± 0.021M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 36 2.267 ± 0.011M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 40 2.284 ± 0.020M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 44 2.215 ± 0.025M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 48 2.193 ± 0.023M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 52 2.208 ± 0.024M/s (drops 0.000 ± 0.000M/s) - HiSilicon Kunpeng 920 (arm64) Ringbuf, multi-producer contention in overwrite mode, no consumer ================================================================= rb-prod nr_prod 1 14.478 ± 0.006M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 2 21.787 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 3 6.045 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 4 5.352 ± 0.003M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 8 4.850 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 12 3.542 ± 0.016M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 16 3.509 ± 0.021M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 20 3.171 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 24 3.154 ± 0.014M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 28 2.974 ± 0.015M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 32 3.167 ± 0.014M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 36 2.903 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 40 2.866 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 44 2.914 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 48 2.806 ± 0.012M/s (drops 0.000 ± 0.000M/s) Rb-prod nr_prod 52 2.840 ± 0.012M/s (drops 0.000 ± 0.000M/s) Signed-off-by: Xu Kuohai <xukuohai@huawei.com> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20251018035738.4039621-4-xukuohai@huaweicloud.com
73 lines
1.6 KiB
C
73 lines
1.6 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
// Copyright (c) 2020 Facebook
|
|
|
|
#include <stdbool.h>
|
|
#include <linux/bpf.h>
|
|
#include <stdint.h>
|
|
#include <bpf/bpf_helpers.h>
|
|
#include "bpf_misc.h"
|
|
|
|
char _license[] SEC("license") = "GPL";
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_RINGBUF);
|
|
} ringbuf SEC(".maps");
|
|
|
|
const volatile int batch_cnt = 0;
|
|
const volatile long use_output = 0;
|
|
const volatile bool bench_producer = false;
|
|
|
|
long sample_val = 42;
|
|
long dropped __attribute__((aligned(128))) = 0;
|
|
long hits __attribute__((aligned(128))) = 0;
|
|
|
|
const volatile long wakeup_data_size = 0;
|
|
|
|
static __always_inline long get_flags()
|
|
{
|
|
long sz;
|
|
|
|
if (bench_producer)
|
|
return BPF_RB_NO_WAKEUP;
|
|
|
|
if (!wakeup_data_size)
|
|
return 0;
|
|
|
|
sz = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA);
|
|
return sz >= wakeup_data_size ? BPF_RB_FORCE_WAKEUP : BPF_RB_NO_WAKEUP;
|
|
}
|
|
|
|
SEC("fentry/" SYS_PREFIX "sys_getpgid")
|
|
int bench_ringbuf(void *ctx)
|
|
{
|
|
long *sample, flags;
|
|
int i;
|
|
|
|
if (!use_output) {
|
|
for (i = 0; i < batch_cnt; i++) {
|
|
sample = bpf_ringbuf_reserve(&ringbuf,
|
|
sizeof(sample_val), 0);
|
|
if (!sample) {
|
|
__sync_add_and_fetch(&dropped, 1);
|
|
} else {
|
|
*sample = sample_val;
|
|
flags = get_flags();
|
|
bpf_ringbuf_submit(sample, flags);
|
|
if (bench_producer)
|
|
__sync_add_and_fetch(&hits, 1);
|
|
}
|
|
}
|
|
} else {
|
|
for (i = 0; i < batch_cnt; i++) {
|
|
flags = get_flags();
|
|
if (bpf_ringbuf_output(&ringbuf, &sample_val,
|
|
sizeof(sample_val), flags))
|
|
__sync_add_and_fetch(&dropped, 1);
|
|
else if (bench_producer)
|
|
__sync_add_and_fetch(&hits, 1);
|
|
|
|
}
|
|
}
|
|
return 0;
|
|
}
|