Files
linux/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c
Kuniyuki Iwashima 5f941dd87b selftests/bpf: Add test for sk->sk_bypass_prot_mem.
The test does the following for IPv4/IPv6 x TCP/UDP sockets
with/without sk->sk_bypass_prot_mem, which can be turned on by
net.core.bypass_prot_mem or bpf_setsockopt(SK_BPF_BYPASS_PROT_MEM).

  1. Create socket pairs
  2. Send NR_PAGES (32) of data (TCP consumes around 35 pages,
     and UDP consuems 66 pages due to skb overhead)
  3. Read memory_allocated from sk->sk_prot->memory_allocated and
     sk->sk_prot->memory_per_cpu_fw_alloc
  4. Check if unread data is charged to memory_allocated

If sk->sk_bypass_prot_mem is set, memory_allocated should not be
changed, but we allow a small error (up to 10 pages) in case
other processes on the host use some amounts of TCP/UDP memory.

The amount of allocated pages are buffered to per-cpu variable
{tcp,udp}_memory_per_cpu_fw_alloc up to +/- net.core.mem_pcpu_rsv
before reported to {tcp,udp}_memory_allocated.

At 3., memory_allocated is calculated from the 2 variables at
fentry of socket create function.

We drain the receive queue only for UDP before close() because UDP
recv queue is destroyed after RCU grace period.  When I printed
memory_allocated, UDP bypass cases sometimes saw the no-bypass
case's leftover, but it's still in the small error range (<10 pages).

  bpf_trace_printk: memory_allocated: 0   <-- TCP no-bypass
  bpf_trace_printk: memory_allocated: 35
  bpf_trace_printk: memory_allocated: 0   <-- TCP w/ sysctl
  bpf_trace_printk: memory_allocated: 0
  bpf_trace_printk: memory_allocated: 0   <-- TCP w/ bpf
  bpf_trace_printk: memory_allocated: 0
  bpf_trace_printk: memory_allocated: 0   <-- UDP no-bypass
  bpf_trace_printk: memory_allocated: 66
  bpf_trace_printk: memory_allocated: 2   <-- UDP w/ sysctl (2 pages leftover)
  bpf_trace_printk: memory_allocated: 2
  bpf_trace_printk: memory_allocated: 2   <-- UDP w/ bpf (2 pages leftover)
  bpf_trace_printk: memory_allocated: 2

We prefer finishing tests faster than oversleeping for call_rcu()
 + sk_destruct().

The test completes within 2s on QEMU (64 CPUs) w/ KVM.

  # time ./test_progs -t sk_bypass
  #371/1   sk_bypass_prot_mem/TCP  :OK
  #371/2   sk_bypass_prot_mem/UDP  :OK
  #371/3   sk_bypass_prot_mem/TCPv6:OK
  #371/4   sk_bypass_prot_mem/UDPv6:OK
  #371     sk_bypass_prot_mem:OK
  Summary: 1/4 PASSED, 0 SKIPPED, 0 FAILED

  real	0m1.481s
  user	0m0.181s
  sys	0m0.441s

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Link: https://patch.msgid.link/20251014235604.3057003-7-kuniyu@google.com
2025-10-16 12:04:47 -07:00

105 lines
2.3 KiB
C

// SPDX-License-Identifier: GPL-2.0
/* Copyright 2025 Google LLC */
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <errno.h>
extern int tcp_memory_per_cpu_fw_alloc __ksym;
extern int udp_memory_per_cpu_fw_alloc __ksym;
int nr_cpus;
bool tcp_activated, udp_activated;
long tcp_memory_allocated, udp_memory_allocated;
struct sk_prot {
long *memory_allocated;
int *memory_per_cpu_fw_alloc;
};
static int drain_memory_per_cpu_fw_alloc(__u32 i, struct sk_prot *sk_prot_ctx)
{
int *memory_per_cpu_fw_alloc;
memory_per_cpu_fw_alloc = bpf_per_cpu_ptr(sk_prot_ctx->memory_per_cpu_fw_alloc, i);
if (memory_per_cpu_fw_alloc)
*sk_prot_ctx->memory_allocated += *memory_per_cpu_fw_alloc;
return 0;
}
static long get_memory_allocated(struct sock *_sk, int *memory_per_cpu_fw_alloc)
{
struct sock *sk = bpf_core_cast(_sk, struct sock);
struct sk_prot sk_prot_ctx;
long memory_allocated;
/* net_aligned_data.{tcp,udp}_memory_allocated was not available. */
memory_allocated = sk->__sk_common.skc_prot->memory_allocated->counter;
sk_prot_ctx.memory_allocated = &memory_allocated;
sk_prot_ctx.memory_per_cpu_fw_alloc = memory_per_cpu_fw_alloc;
bpf_loop(nr_cpus, drain_memory_per_cpu_fw_alloc, &sk_prot_ctx, 0);
return memory_allocated;
}
static void fentry_init_sock(struct sock *sk, bool *activated,
long *memory_allocated, int *memory_per_cpu_fw_alloc)
{
if (!*activated)
return;
*memory_allocated = get_memory_allocated(sk, memory_per_cpu_fw_alloc);
*activated = false;
}
SEC("fentry/tcp_init_sock")
int BPF_PROG(fentry_tcp_init_sock, struct sock *sk)
{
fentry_init_sock(sk, &tcp_activated,
&tcp_memory_allocated, &tcp_memory_per_cpu_fw_alloc);
return 0;
}
SEC("fentry/udp_init_sock")
int BPF_PROG(fentry_udp_init_sock, struct sock *sk)
{
fentry_init_sock(sk, &udp_activated,
&udp_memory_allocated, &udp_memory_per_cpu_fw_alloc);
return 0;
}
SEC("cgroup/sock_create")
int sock_create(struct bpf_sock *ctx)
{
int err, val = 1;
err = bpf_setsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM,
&val, sizeof(val));
if (err)
goto err;
val = 0;
err = bpf_getsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM,
&val, sizeof(val));
if (err)
goto err;
if (val != 1) {
err = -EINVAL;
goto err;
}
return 1;
err:
bpf_set_retval(err);
return 0;
}
char LICENSE[] SEC("license") = "GPL";