io_uring/bpf-ops: implement bpf ops registration

Implement BPF struct ops registration. It's registered off the BPF
path, and can be removed by BPF as well as io_uring. To protect it,
introduce a global lock synchronising registration. ctx->uring_lock can
be nested under it. ctx->bpf_ops is write protected by both locks and
so it's safe to read it under either of them.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://patch.msgid.link/1f46bffd76008de49cbafa2ad77d348810a4f69e.1772109579.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Pavel Begunkov
2026-02-26 12:48:41 +00:00
committed by Jens Axboe
parent 890819248a
commit 98f37634b1
4 changed files with 104 additions and 2 deletions

View File

@@ -8,6 +8,9 @@
#include <linux/llist.h>
#include <uapi/linux/io_uring.h>
struct iou_loop_params;
struct io_uring_bpf_ops;
enum {
/*
* A hint to not wake right away but delay until there are enough of
@@ -488,6 +491,8 @@ struct io_ring_ctx {
DECLARE_HASHTABLE(napi_ht, 4);
#endif
struct io_uring_bpf_ops *bpf_ops;
/*
* Protection for resize vs mmap races - both the mmap and resize
* side will need to grab this lock, to prevent either side from

View File

@@ -5,10 +5,11 @@
#include "io_uring.h"
#include "register.h"
#include "loop.h"
#include "memmap.h"
#include "bpf-ops.h"
#include "loop.h"
static DEFINE_MUTEX(io_bpf_ctrl_mutex);
static const struct btf_type *loop_params_type;
__bpf_kfunc_start_defs();
@@ -143,16 +144,103 @@ static int bpf_io_init_member(const struct btf_type *t,
const struct btf_member *member,
void *kdata, const void *udata)
{
u32 moff = __btf_member_bit_offset(t, member) / 8;
const struct io_uring_bpf_ops *uops = udata;
struct io_uring_bpf_ops *ops = kdata;
switch (moff) {
case offsetof(struct io_uring_bpf_ops, ring_fd):
ops->ring_fd = uops->ring_fd;
return 1;
}
return 0;
}
static int io_install_bpf(struct io_ring_ctx *ctx, struct io_uring_bpf_ops *ops)
{
if (ctx->flags & (IORING_SETUP_SQPOLL | IORING_SETUP_IOPOLL))
return -EOPNOTSUPP;
if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
return -EOPNOTSUPP;
if (ctx->bpf_ops)
return -EBUSY;
if (WARN_ON_ONCE(!ops->loop_step))
return -EINVAL;
ops->priv = ctx;
ctx->bpf_ops = ops;
ctx->loop_step = ops->loop_step;
return 0;
}
static int bpf_io_reg(void *kdata, struct bpf_link *link)
{
return -EOPNOTSUPP;
struct io_uring_bpf_ops *ops = kdata;
struct io_ring_ctx *ctx;
struct file *file;
int ret = -EBUSY;
file = io_uring_register_get_file(ops->ring_fd, false);
if (IS_ERR(file))
return PTR_ERR(file);
ctx = file->private_data;
scoped_guard(mutex, &io_bpf_ctrl_mutex) {
guard(mutex)(&ctx->uring_lock);
ret = io_install_bpf(ctx, ops);
}
fput(file);
return ret;
}
static void io_eject_bpf(struct io_ring_ctx *ctx)
{
struct io_uring_bpf_ops *ops = ctx->bpf_ops;
if (WARN_ON_ONCE(!ops))
return;
if (WARN_ON_ONCE(ops->priv != ctx))
return;
ops->priv = NULL;
ctx->bpf_ops = NULL;
ctx->loop_step = NULL;
}
static void bpf_io_unreg(void *kdata, struct bpf_link *link)
{
struct io_uring_bpf_ops *ops = kdata;
struct io_ring_ctx *ctx;
guard(mutex)(&io_bpf_ctrl_mutex);
ctx = ops->priv;
if (ctx) {
guard(mutex)(&ctx->uring_lock);
if (WARN_ON_ONCE(ctx->bpf_ops != ops))
return;
io_eject_bpf(ctx);
}
}
void io_unregister_bpf_ops(struct io_ring_ctx *ctx)
{
/*
* ->bpf_ops is write protected by io_bpf_ctrl_mutex and uring_lock,
* and read protected by either. Try to avoid taking the global lock
* for rings that never had any bpf installed.
*/
scoped_guard(mutex, &ctx->uring_lock) {
if (!ctx->bpf_ops)
return;
}
guard(mutex)(&io_bpf_ctrl_mutex);
guard(mutex)(&ctx->uring_lock);
if (ctx->bpf_ops)
io_eject_bpf(ctx);
}
static struct bpf_struct_ops bpf_ring_ops = {

View File

@@ -17,4 +17,12 @@ struct io_uring_bpf_ops {
void *priv;
};
#ifdef CONFIG_IO_URING_BPF_OPS
void io_unregister_bpf_ops(struct io_ring_ctx *ctx);
#else
static inline void io_unregister_bpf_ops(struct io_ring_ctx *ctx)
{
}
#endif
#endif /* IOU_BPF_OPS_H */

View File

@@ -2148,6 +2148,7 @@ static __cold void io_req_caches_free(struct io_ring_ctx *ctx)
static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
{
io_unregister_bpf_ops(ctx);
io_sq_thread_finish(ctx);
mutex_lock(&ctx->uring_lock);