Merge tag 'io_uring-6.15-20250403' of git://git.kernel.dk/linux

Pull more io_uring updates from Jens Axboe:
 "Set of fixes/updates for io_uring that should go into this release.

  The ublk bits could've gone via either tree - usually I put them in
  block, but they got a bit mixed this series with the zero-copy
  supported that ended up dipping into both trees.

  This contains:

   - Fix for sendmsg zc, include in pinned pages accounting like we do
     for the other zc types

   - Series for ublk fixing request aborting, doing various little
     cleanups, fixing some zc issues, and adding queue_rqs support

   - Another ublk series doing some code cleanups

   - Series cleaning up the io_uring send path, mostly in preparation
     for registered buffers

   - Series doing little MSG_RING cleanups

   - Fix for the newly added zc rx, fixing len being 0 for the last
     invocation of the callback

   - Add vectored registered buffer support for ublk. With that, then
     ublk also supports this feature in the kernel revision where it
     could generically introduced for rw/net

   - A bunch of selftest additions for ublk. This is the majority of the
     diffstat

   - Silence a KCSAN data race warning for io-wq

   - Various little cleanups and fixes"

* tag 'io_uring-6.15-20250403' of git://git.kernel.dk/linux: (44 commits)
  io_uring: always do atomic put from iowq
  selftests: ublk: enable zero copy for stripe target
  io_uring: support vectored kernel fixed buffer
  block: add for_each_mp_bvec()
  io_uring: add validate_fixed_range() for validate fixed buffer
  selftests: ublk: kublk: fix an error log line
  selftests: ublk: kublk: use ioctl-encoded opcodes
  io_uring/zcrx: return early from io_zcrx_recv_skb if readlen is 0
  io_uring/net: avoid import_ubuf for regvec send
  io_uring/rsrc: check size when importing reg buffer
  io_uring: cleanup {g,s]etsockopt sqe reading
  io_uring: hide caches sqes from drivers
  io_uring: make zcrx depend on CONFIG_IO_URING
  io_uring: add req flag invariant build assertion
  Documentation: ublk: remove dead footnote
  selftests: ublk: specify io_cmd_buf pointer type
  ublk: specify io_cmd_buf pointer type
  io_uring: don't pass ctx to tw add remote helper
  io_uring/msg: initialise msg request opcode
  io_uring/msg: rename io_double_lock_ctx()
  ...
This commit is contained in:
Linus Torvalds
2025-04-03 15:48:58 -07:00
30 changed files with 673 additions and 238 deletions

View File

@@ -4,6 +4,8 @@ CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir)
LDLIBS += -lpthread -lm -luring
TEST_PROGS := test_generic_01.sh
TEST_PROGS += test_generic_02.sh
TEST_PROGS += test_generic_03.sh
TEST_PROGS += test_null_01.sh
TEST_PROGS += test_null_02.sh
@@ -11,8 +13,11 @@ TEST_PROGS += test_loop_01.sh
TEST_PROGS += test_loop_02.sh
TEST_PROGS += test_loop_03.sh
TEST_PROGS += test_loop_04.sh
TEST_PROGS += test_loop_05.sh
TEST_PROGS += test_stripe_01.sh
TEST_PROGS += test_stripe_02.sh
TEST_PROGS += test_stripe_03.sh
TEST_PROGS += test_stripe_04.sh
TEST_PROGS += test_stress_01.sh
TEST_PROGS += test_stress_02.sh

View File

@@ -99,7 +99,7 @@ static int __ublk_ctrl_cmd(struct ublk_dev *dev,
static int ublk_ctrl_stop_dev(struct ublk_dev *dev)
{
struct ublk_ctrl_cmd_data data = {
.cmd_op = UBLK_CMD_STOP_DEV,
.cmd_op = UBLK_U_CMD_STOP_DEV,
};
return __ublk_ctrl_cmd(dev, &data);
@@ -169,7 +169,7 @@ static int ublk_ctrl_get_params(struct ublk_dev *dev,
struct ublk_params *params)
{
struct ublk_ctrl_cmd_data data = {
.cmd_op = UBLK_CMD_GET_PARAMS,
.cmd_op = UBLK_U_CMD_GET_PARAMS,
.flags = CTRL_CMD_HAS_BUF,
.addr = (__u64)params,
.len = sizeof(*params),
@@ -215,7 +215,7 @@ static void ublk_ctrl_dump(struct ublk_dev *dev)
ret = ublk_ctrl_get_params(dev, &p);
if (ret < 0) {
ublk_err("failed to get params %m\n");
ublk_err("failed to get params %d %s\n", ret, strerror(-ret));
return;
}
@@ -322,7 +322,7 @@ static int ublk_queue_init(struct ublk_queue *q)
cmd_buf_size = ublk_queue_cmd_buf_sz(q);
off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz();
q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ,
q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ,
MAP_SHARED | MAP_POPULATE, dev->fds[0], off);
if (q->io_cmd_buf == MAP_FAILED) {
ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n",

View File

@@ -128,7 +128,7 @@ struct ublk_queue {
unsigned int io_inflight;
struct ublk_dev *dev;
const struct ublk_tgt_ops *tgt_ops;
char *io_cmd_buf;
struct ublksrv_io_desc *io_cmd_buf;
struct io_uring ring;
struct ublk_io ios[UBLK_QUEUE_DEPTH];
#define UBLKSRV_QUEUE_STOPPING (1U << 0)
@@ -302,7 +302,7 @@ static inline void ublk_mark_io_done(struct ublk_io *io, int res)
static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag)
{
return (struct ublksrv_io_desc *)&(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]);
return &q->io_cmd_buf[tag];
}
static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op)

View File

@@ -17,7 +17,8 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
dev->tgt.dev_size = dev_size;
dev->tgt.params = (struct ublk_params) {
.types = UBLK_PARAM_TYPE_BASIC,
.types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DMA_ALIGN |
UBLK_PARAM_TYPE_SEGMENT,
.basic = {
.logical_bs_shift = 9,
.physical_bs_shift = 12,
@@ -26,6 +27,14 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
.max_sectors = info->max_io_buf_bytes >> 9,
.dev_sectors = dev_size >> 9,
},
.dma = {
.alignment = 4095,
},
.seg = {
.seg_boundary_mask = 4095,
.max_segment_size = 32 << 10,
.max_segments = 32,
},
};
if (info->flags & UBLK_F_SUPPORT_ZERO_COPY)

View File

@@ -111,43 +111,67 @@ static void calculate_stripe_array(const struct stripe_conf *conf,
}
}
static inline enum io_uring_op stripe_to_uring_op(const struct ublksrv_io_desc *iod)
static inline enum io_uring_op stripe_to_uring_op(
const struct ublksrv_io_desc *iod, int zc)
{
unsigned ublk_op = ublksrv_get_op(iod);
if (ublk_op == UBLK_IO_OP_READ)
return IORING_OP_READV;
return zc ? IORING_OP_READV_FIXED : IORING_OP_READV;
else if (ublk_op == UBLK_IO_OP_WRITE)
return IORING_OP_WRITEV;
return zc ? IORING_OP_WRITEV_FIXED : IORING_OP_WRITEV;
assert(0);
}
static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
{
const struct stripe_conf *conf = get_chunk_shift(q);
enum io_uring_op op = stripe_to_uring_op(iod);
int zc = !!(ublk_queue_use_zc(q) != 0);
enum io_uring_op op = stripe_to_uring_op(iod, zc);
struct io_uring_sqe *sqe[NR_STRIPE];
struct stripe_array *s = alloc_stripe_array(conf, iod);
struct ublk_io *io = ublk_get_io(q, tag);
int i;
int i, extra = zc ? 2 : 0;
io->private_data = s;
calculate_stripe_array(conf, iod, s);
ublk_queue_alloc_sqes(q, sqe, s->nr);
for (i = 0; i < s->nr; i++) {
struct stripe *t = &s->s[i];
ublk_queue_alloc_sqes(q, sqe, s->nr + extra);
if (zc) {
io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag);
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
sqe[0]->user_data = build_user_data(tag,
ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1);
}
for (i = zc; i < s->nr + extra - zc; i++) {
struct stripe *t = &s->s[i - zc];
io_uring_prep_rw(op, sqe[i],
t->seq + 1,
(void *)t->vec,
t->nr_vec,
t->start << 9);
io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
if (zc) {
sqe[i]->buf_index = tag;
io_uring_sqe_set_flags(sqe[i],
IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK);
} else {
io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
}
/* bit63 marks us as tgt io */
sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i, 1);
sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, 1);
}
return s->nr;
if (zc) {
struct io_uring_sqe *unreg = sqe[s->nr + 1];
io_uring_prep_buf_unregister(unreg, 0, tag, q->q_id, tag);
unreg->user_data = build_user_data(tag, ublk_cmd_op_nr(unreg->cmd_op), 0, 1);
}
/* register buffer is skip_success */
return s->nr + zc;
}
static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
@@ -208,19 +232,27 @@ static void ublk_stripe_io_done(struct ublk_queue *q, int tag,
struct ublk_io *io = ublk_get_io(q, tag);
int res = cqe->res;
if (res < 0) {
if (res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) {
if (!io->result)
io->result = res;
ublk_err("%s: io failure %d tag %u\n", __func__, res, tag);
if (res < 0)
ublk_err("%s: io failure %d tag %u\n", __func__, res, tag);
}
/* buffer register op is IOSQE_CQE_SKIP_SUCCESS */
if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF))
io->tgt_ios += 1;
/* fail short READ/WRITE simply */
if (op == UBLK_IO_OP_READ || op == UBLK_IO_OP_WRITE) {
unsigned seq = user_data_to_tgt_data(cqe->user_data);
struct stripe_array *s = io->private_data;
if (res < s->s[seq].vec->iov_len)
if (res < s->s[seq].nr_sects << 9) {
io->result = -EIO;
ublk_err("%s: short rw op %u res %d exp %u tag %u\n",
__func__, op, res, s->s[seq].vec->iov_len, tag);
}
}
if (ublk_completed_tgt_io(q, tag)) {
@@ -253,7 +285,7 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
struct stripe_conf *conf;
unsigned chunk_shift;
loff_t bytes = 0;
int ret, i;
int ret, i, mul = 1;
if ((chunk_size & (chunk_size - 1)) || !chunk_size) {
ublk_err("invalid chunk size %u\n", chunk_size);
@@ -295,8 +327,11 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
dev->tgt.dev_size = bytes;
p.basic.dev_sectors = bytes >> 9;
dev->tgt.params = p;
dev->tgt.sq_depth = dev->dev_info.queue_depth * conf->nr_files;
dev->tgt.cq_depth = dev->dev_info.queue_depth * conf->nr_files;
if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY)
mul = 2;
dev->tgt.sq_depth = mul * dev->dev_info.queue_depth * conf->nr_files;
dev->tgt.cq_depth = mul * dev->dev_info.queue_depth * conf->nr_files;
printf("%s: shift %u files %u\n", __func__, conf->shift, conf->nr_files);

View File

@@ -23,6 +23,12 @@ _get_disk_dev_t() {
echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) ))
}
_run_fio_verify_io() {
fio --name=verify --rw=randwrite --direct=1 --ioengine=libaio \
--bs=8k --iodepth=32 --verify=crc32c --do_verify=1 \
--verify_state_save=0 "$@" > /dev/null
}
_create_backfile() {
local my_size=$1
local my_file

View File

@@ -0,0 +1,44 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="generic_02"
ERR_CODE=0
if ! _have_program bpftrace; then
exit "$UBLK_SKIP_CODE"
fi
_prep_test "null" "sequential io order for MQ"
dev_id=$(_add_ublk_dev -t null -q 2)
_check_add_dev $TID $?
dev_t=$(_get_disk_dev_t "$dev_id")
bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 &
btrace_pid=$!
sleep 2
if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then
_cleanup_test "null"
exit "$UBLK_SKIP_CODE"
fi
# run fio over this ublk disk
fio --name=write_seq \
--filename=/dev/ublkb"${dev_id}" \
--ioengine=libaio --iodepth=16 \
--rw=write \
--size=512M \
--direct=1 \
--bs=4k > /dev/null 2>&1
ERR_CODE=$?
kill "$btrace_pid"
wait
if grep -q "io_out_of_order" "$UBLK_TMP"; then
cat "$UBLK_TMP"
ERR_CODE=255
fi
_cleanup_test "null"
_show_result $TID $ERR_CODE

View File

@@ -0,0 +1,28 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="generic_03"
ERR_CODE=0
_prep_test "null" "check dma & segment limits for zero copy"
dev_id=$(_add_ublk_dev -t null -z)
_check_add_dev $TID $?
sysfs_path=/sys/block/ublkb"${dev_id}"
dma_align=$(cat "$sysfs_path"/queue/dma_alignment)
max_segments=$(cat "$sysfs_path"/queue/max_segments)
max_segment_size=$(cat "$sysfs_path"/queue/max_segment_size)
if [ "$dma_align" != "4095" ]; then
ERR_CODE=255
fi
if [ "$max_segments" != "32" ]; then
ERR_CODE=255
fi
if [ "$max_segment_size" != "32768" ]; then
ERR_CODE=255
fi
_cleanup_test "null"
_show_result $TID $ERR_CODE

View File

@@ -6,6 +6,10 @@
TID="loop_01"
ERR_CODE=0
if ! _have_program fio; then
exit "$UBLK_SKIP_CODE"
fi
_prep_test "loop" "write and verify test"
backfile_0=$(_create_backfile 256M)
@@ -14,15 +18,7 @@ dev_id=$(_add_ublk_dev -t loop "$backfile_0")
_check_add_dev $TID $? "${backfile_0}"
# run fio over the ublk disk
fio --name=write_and_verify \
--filename=/dev/ublkb"${dev_id}" \
--ioengine=libaio --iodepth=16 \
--rw=write \
--size=256M \
--direct=1 \
--verify=crc32c \
--do_verify=1 \
--bs=4k > /dev/null 2>&1
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
ERR_CODE=$?
_cleanup_test "loop"

View File

@@ -6,6 +6,10 @@
TID="loop_03"
ERR_CODE=0
if ! _have_program fio; then
exit "$UBLK_SKIP_CODE"
fi
_prep_test "loop" "write and verify over zero copy"
backfile_0=$(_create_backfile 256M)
@@ -13,15 +17,7 @@ dev_id=$(_add_ublk_dev -t loop -z "$backfile_0")
_check_add_dev $TID $? "$backfile_0"
# run fio over the ublk disk
fio --name=write_and_verify \
--filename=/dev/ublkb"${dev_id}" \
--ioengine=libaio --iodepth=64 \
--rw=write \
--size=256M \
--direct=1 \
--verify=crc32c \
--do_verify=1 \
--bs=4k > /dev/null 2>&1
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
ERR_CODE=$?
_cleanup_test "loop"

View File

@@ -0,0 +1,28 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="loop_05"
ERR_CODE=0
if ! _have_program fio; then
exit "$UBLK_SKIP_CODE"
fi
_prep_test "loop" "write and verify test"
backfile_0=$(_create_backfile 256M)
dev_id=$(_add_ublk_dev -q 2 -t loop "$backfile_0")
_check_add_dev $TID $? "${backfile_0}"
# run fio over the ublk disk
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
ERR_CODE=$?
_cleanup_test "loop"
_remove_backfile "$backfile_0"
_show_result $TID $ERR_CODE

View File

@@ -27,20 +27,20 @@ ublk_io_and_remove()
_prep_test "stress" "run IO and remove device"
ublk_io_and_remove 8G -t null
ublk_io_and_remove 8G -t null -q 4
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
_show_result $TID $ERR_CODE
fi
BACK_FILE=$(_create_backfile 256M)
ublk_io_and_remove 256M -t loop "${BACK_FILE}"
ublk_io_and_remove 256M -t loop -q 4 "${BACK_FILE}"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
_show_result $TID $ERR_CODE
fi
ublk_io_and_remove 256M -t loop -z "${BACK_FILE}"
ublk_io_and_remove 256M -t loop -q 4 -z "${BACK_FILE}"
ERR_CODE=$?
_cleanup_test "stress"
_remove_backfile "${BACK_FILE}"

View File

@@ -27,20 +27,20 @@ ublk_io_and_kill_daemon()
_prep_test "stress" "run IO and kill ublk server"
ublk_io_and_kill_daemon 8G -t null
ublk_io_and_kill_daemon 8G -t null -q 4
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
_show_result $TID $ERR_CODE
fi
BACK_FILE=$(_create_backfile 256M)
ublk_io_and_kill_daemon 256M -t loop "${BACK_FILE}"
ublk_io_and_kill_daemon 256M -t loop -q 4 "${BACK_FILE}"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
_show_result $TID $ERR_CODE
fi
ublk_io_and_kill_daemon 256M -t loop -z "${BACK_FILE}"
ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${BACK_FILE}"
ERR_CODE=$?
_cleanup_test "stress"
_remove_backfile "${BACK_FILE}"

View File

@@ -6,6 +6,10 @@
TID="stripe_01"
ERR_CODE=0
if ! _have_program fio; then
exit "$UBLK_SKIP_CODE"
fi
_prep_test "stripe" "write and verify test"
backfile_0=$(_create_backfile 256M)
@@ -15,15 +19,7 @@ dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1")
_check_add_dev $TID $? "${backfile_0}"
# run fio over the ublk disk
fio --name=write_and_verify \
--filename=/dev/ublkb"${dev_id}" \
--ioengine=libaio --iodepth=32 \
--rw=write \
--size=512M \
--direct=1 \
--verify=crc32c \
--do_verify=1 \
--bs=4k > /dev/null 2>&1
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M
ERR_CODE=$?
_cleanup_test "stripe"

View File

@@ -0,0 +1,30 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="stripe_03"
ERR_CODE=0
if ! _have_program fio; then
exit "$UBLK_SKIP_CODE"
fi
_prep_test "stripe" "write and verify test"
backfile_0=$(_create_backfile 256M)
backfile_1=$(_create_backfile 256M)
dev_id=$(_add_ublk_dev -q 2 -t stripe "$backfile_0" "$backfile_1")
_check_add_dev $TID $? "${backfile_0}"
# run fio over the ublk disk
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M
ERR_CODE=$?
_cleanup_test "stripe"
_remove_backfile "$backfile_0"
_remove_backfile "$backfile_1"
_show_result $TID $ERR_CODE