mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
The io_uring_enter() has a fixed order of execution: it submits requests, waits for completions, and returns to the user. Allow to optionally replace it with a custom loop driven by a callback called loop_step. The basic requirements to the callback is that it should be able to submit requests, wait for completions, parse them and repeat. Most of the communication including parameter passing can be implemented via shared memory. The callback should return IOU_LOOP_CONTINUE to continue execution or IOU_LOOP_STOP to return to the user space. Note that the kernel may decide to prematurely terminate it as well, e.g. in case the process was signalled or killed. The hook takes a structure with parameters. It can be used to ask the kernel to wait for CQEs by setting cq_wait_idx to the CQE index it wants to wait for. Spurious wake ups are possible and even likely, the callback is expected to handle it. There will be more parameters in the future like timeout. It can be used with kernel callbacks, for example, as a slow path deprecation mechanism overwiting SQEs and emulating the wanted behaviour, however it's more useful together with BPF programs implemented in following patches. Note that keeping it separately from the normal io_uring wait loop makes things much simpler and cleaner. It keeps it in one place instead of spreading a bunch of checks in different places including disabling the submission path. It holds the lock by default, which is a better fit for BPF synchronisation and the loop execution model. It nicely avoids existing quirks like forced wake ups on timeout request completion. And it should be easier to implement new features. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://patch.msgid.link/a2d369aa1c9dd23ad7edac9220cffc563abcaed6.1772109579.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
92 lines
1.9 KiB
C
92 lines
1.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#include "io_uring.h"
|
|
#include "wait.h"
|
|
#include "loop.h"
|
|
|
|
static inline int io_loop_nr_cqes(const struct io_ring_ctx *ctx,
|
|
const struct iou_loop_params *lp)
|
|
{
|
|
return lp->cq_wait_idx - READ_ONCE(ctx->rings->cq.tail);
|
|
}
|
|
|
|
static inline void io_loop_wait_start(struct io_ring_ctx *ctx, unsigned nr_wait)
|
|
{
|
|
atomic_set(&ctx->cq_wait_nr, nr_wait);
|
|
set_current_state(TASK_INTERRUPTIBLE);
|
|
}
|
|
|
|
static inline void io_loop_wait_finish(struct io_ring_ctx *ctx)
|
|
{
|
|
__set_current_state(TASK_RUNNING);
|
|
atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
|
|
}
|
|
|
|
static void io_loop_wait(struct io_ring_ctx *ctx, struct iou_loop_params *lp,
|
|
unsigned nr_wait)
|
|
{
|
|
io_loop_wait_start(ctx, nr_wait);
|
|
|
|
if (unlikely(io_local_work_pending(ctx) ||
|
|
io_loop_nr_cqes(ctx, lp) <= 0) ||
|
|
READ_ONCE(ctx->check_cq)) {
|
|
io_loop_wait_finish(ctx);
|
|
return;
|
|
}
|
|
|
|
mutex_unlock(&ctx->uring_lock);
|
|
schedule();
|
|
io_loop_wait_finish(ctx);
|
|
mutex_lock(&ctx->uring_lock);
|
|
}
|
|
|
|
static int __io_run_loop(struct io_ring_ctx *ctx)
|
|
{
|
|
struct iou_loop_params lp = {};
|
|
|
|
while (true) {
|
|
int nr_wait, step_res;
|
|
|
|
if (unlikely(!ctx->loop_step))
|
|
return -EFAULT;
|
|
|
|
step_res = ctx->loop_step(ctx, &lp);
|
|
if (step_res == IOU_LOOP_STOP)
|
|
break;
|
|
if (step_res != IOU_LOOP_CONTINUE)
|
|
return -EINVAL;
|
|
|
|
nr_wait = io_loop_nr_cqes(ctx, &lp);
|
|
if (nr_wait > 0)
|
|
io_loop_wait(ctx, &lp, nr_wait);
|
|
else
|
|
nr_wait = 0;
|
|
|
|
if (task_work_pending(current)) {
|
|
mutex_unlock(&ctx->uring_lock);
|
|
io_run_task_work();
|
|
mutex_lock(&ctx->uring_lock);
|
|
}
|
|
if (unlikely(task_sigpending(current)))
|
|
return -EINTR;
|
|
io_run_local_work_locked(ctx, nr_wait);
|
|
|
|
if (READ_ONCE(ctx->check_cq) & BIT(IO_CHECK_CQ_OVERFLOW_BIT))
|
|
io_cqring_overflow_flush_locked(ctx);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int io_run_loop(struct io_ring_ctx *ctx)
|
|
{
|
|
int ret;
|
|
|
|
if (!io_allowed_run_tw(ctx))
|
|
return -EEXIST;
|
|
|
|
mutex_lock(&ctx->uring_lock);
|
|
ret = __io_run_loop(ctx);
|
|
mutex_unlock(&ctx->uring_lock);
|
|
return ret;
|
|
}
|