mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
Cancelling the I/O and admin tagsets during nvme-loop controller reset or shutdown is unnecessary. The subsequent destruction of the I/O and admin queues already waits for all in-flight target operations to complete. Cancelling the tagsets first also opens a race window. After a request tag has been cancelled, a late completion from the target may still arrive before the queues are destroyed. In that case the completion path may access a request whose tag has already been cancelled or freed, which can lead to a kernel crash. Please see below the kernel crash encountered while running blktests nvme/040: run blktests nvme/040 at 2026-03-08 06:34:27 loop0: detected capacity change from 0 to 2097152 nvmet: adding nsid 1 to subsystem blktests-subsystem-1 nvmet: Created nvm controller 1 for subsystem blktests-subsystem-1 for NQN nqn.2014-08.org.nvmexpress:uuid:0f01fb42-9f7f-4856-b0b3-51e60b8de349. nvme nvme6: creating 96 I/O queues. nvme nvme6: new ctrl: "blktests-subsystem-1" nvme_log_error: 1 callbacks suppressed block nvme6n1: no usable path - requeuing I/O nvme6c6n1: Read(0x2) @ LBA 2096384, 128 blocks, Host Aborted Command (sct 0x3 / sc 0x71) blk_print_req_error: 1 callbacks suppressed I/O error, dev nvme6c6n1, sector 2096384 op 0x0:(READ) flags 0x2880700 phys_seg 1 prio class 2 block nvme6n1: no usable path - requeuing I/O Kernel attempted to read user page (236) - exploit attempt? (uid: 0) BUG: Kernel NULL pointer dereference on read at 0x00000236 Faulting instruction address: 0xc000000000961274 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: nvme_loop nvme_fabrics loop nvmet null_blk rpadlpar_io rpaphp xsk_diag bonding rfkill nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink pseries_rng dax_pmem vmx_crypto drm drm_panel_orientation_quirks xfs mlx5_core nvme bnx2x sd_mod nd_pmem nd_btt nvme_core sg papr_scm tls libnvdimm ibmvscsi ibmveth scsi_transport_srp nvme_keyring nvme_auth mdio hkdf pseries_wdt dm_mirror dm_region_hash dm_log dm_mod fuse [last unloaded: loop] CPU: 25 UID: 0 PID: 0 Comm: swapper/25 Kdump: loaded Not tainted 7.0.0-rc3+ #14 PREEMPT Hardware name: IBM,9043-MRX Power11 (architected) 0x820200 0xf000007 of:IBM,FW1120.00 (RF1120_128) hv:phyp pSeries NIP: c000000000961274 LR: c008000009af1808 CTR: c00000000096124c REGS: c0000007ffc0f910 TRAP: 0300 Not tainted (7.0.0-rc3+) MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 22222222 XER: 00000000 CFAR: c008000009af232c DAR: 0000000000000236 DSISR: 40000000 IRQMASK: 0 GPR00: c008000009af17fc c0000007ffc0fbb0 c000000001c78100 c0000000be05cc00 GPR04: 0000000000000001 0000000000000000 0000000000000007 0000000000000000 GPR08: 0000000000000000 0000000000000000 0000000000000002 c008000009af2318 GPR12: c00000000096124c c0000007ffdab880 0000000000000000 0000000000000000 GPR16: 0000000000000010 0000000000000000 0000000000000004 0000000000000000 GPR20: 0000000000000001 c000000002ca2b00 0000000100043bb2 000000000000000a GPR24: 000000000000000a 0000000000000000 0000000000000000 0000000000000000 GPR28: c000000084021d40 c000000084021d50 c0000000be05cd60 c0000000be05cc00 NIP [c000000000961274] blk_mq_complete_request_remote+0x28/0x2d4 LR [c008000009af1808] nvme_loop_queue_response+0x110/0x290 [nvme_loop] Call Trace: 0xc00000000502c640 (unreliable) nvme_loop_queue_response+0x104/0x290 [nvme_loop] __nvmet_req_complete+0x80/0x498 [nvmet] nvmet_req_complete+0x24/0xf8 [nvmet] nvmet_bio_done+0x58/0xcc [nvmet] bio_endio+0x250/0x390 blk_update_request+0x2e8/0x68c blk_mq_end_request+0x30/0x5c lo_complete_rq+0x94/0x110 [loop] blk_complete_reqs+0x78/0x98 handle_softirqs+0x148/0x454 do_softirq_own_stack+0x3c/0x50 __irq_exit_rcu+0x18c/0x1b4 irq_exit+0x1c/0x34 do_IRQ+0x114/0x278 hardware_interrupt_common_virt+0x28c/0x290 Since the queue teardown path already guarantees that all target-side operations have completed, cancelling the tagsets is redundant and unsafe. So avoid cancelling the I/O and admin tagsets during controller reset and shutdown. Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Nilay Shroff <nilay@linux.ibm.com> Signed-off-by: Keith Busch <kbusch@kernel.org>
723 lines
18 KiB
C
723 lines
18 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* NVMe over Fabrics loopback device.
|
|
* Copyright (c) 2015-2016 HGST, a Western Digital Company.
|
|
*/
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
#include <linux/scatterlist.h>
|
|
#include <linux/blk-mq.h>
|
|
#include <linux/nvme.h>
|
|
#include <linux/module.h>
|
|
#include <linux/parser.h>
|
|
#include "nvmet.h"
|
|
#include "../host/nvme.h"
|
|
#include "../host/fabrics.h"
|
|
|
|
#define NVME_LOOP_MAX_SEGMENTS 256
|
|
|
|
struct nvme_loop_iod {
|
|
struct nvme_request nvme_req;
|
|
struct nvme_command cmd;
|
|
struct nvme_completion cqe;
|
|
struct nvmet_req req;
|
|
struct nvme_loop_queue *queue;
|
|
struct work_struct work;
|
|
struct sg_table sg_table;
|
|
struct scatterlist first_sgl[];
|
|
};
|
|
|
|
struct nvme_loop_ctrl {
|
|
struct nvme_loop_queue *queues;
|
|
|
|
struct blk_mq_tag_set admin_tag_set;
|
|
|
|
struct list_head list;
|
|
struct blk_mq_tag_set tag_set;
|
|
struct nvme_ctrl ctrl;
|
|
|
|
struct nvmet_port *port;
|
|
|
|
/* Must be last --ends in a flexible-array member. */
|
|
struct nvme_loop_iod async_event_iod;
|
|
};
|
|
|
|
static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
|
|
{
|
|
return container_of(ctrl, struct nvme_loop_ctrl, ctrl);
|
|
}
|
|
|
|
enum nvme_loop_queue_flags {
|
|
NVME_LOOP_Q_LIVE = 0,
|
|
};
|
|
|
|
struct nvme_loop_queue {
|
|
struct nvmet_cq nvme_cq;
|
|
struct nvmet_sq nvme_sq;
|
|
struct nvme_loop_ctrl *ctrl;
|
|
unsigned long flags;
|
|
};
|
|
|
|
static LIST_HEAD(nvme_loop_ports);
|
|
static DEFINE_MUTEX(nvme_loop_ports_mutex);
|
|
|
|
static LIST_HEAD(nvme_loop_ctrl_list);
|
|
static DEFINE_MUTEX(nvme_loop_ctrl_mutex);
|
|
|
|
static void nvme_loop_queue_response(struct nvmet_req *nvme_req);
|
|
static void nvme_loop_delete_ctrl(struct nvmet_ctrl *ctrl);
|
|
|
|
static const struct nvmet_fabrics_ops nvme_loop_ops;
|
|
|
|
static inline int nvme_loop_queue_idx(struct nvme_loop_queue *queue)
|
|
{
|
|
return queue - queue->ctrl->queues;
|
|
}
|
|
|
|
static void nvme_loop_complete_rq(struct request *req)
|
|
{
|
|
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
|
|
|
|
sg_free_table_chained(&iod->sg_table, NVME_INLINE_SG_CNT);
|
|
nvme_complete_rq(req);
|
|
}
|
|
|
|
static struct blk_mq_tags *nvme_loop_tagset(struct nvme_loop_queue *queue)
|
|
{
|
|
u32 queue_idx = nvme_loop_queue_idx(queue);
|
|
|
|
if (queue_idx == 0)
|
|
return queue->ctrl->admin_tag_set.tags[queue_idx];
|
|
return queue->ctrl->tag_set.tags[queue_idx - 1];
|
|
}
|
|
|
|
static void nvme_loop_queue_response(struct nvmet_req *req)
|
|
{
|
|
struct nvme_loop_queue *queue =
|
|
container_of(req->sq, struct nvme_loop_queue, nvme_sq);
|
|
struct nvme_completion *cqe = req->cqe;
|
|
|
|
/*
|
|
* AEN requests are special as they don't time out and can
|
|
* survive any kind of queue freeze and often don't respond to
|
|
* aborts. We don't even bother to allocate a struct request
|
|
* for them but rather special case them here.
|
|
*/
|
|
if (unlikely(nvme_is_aen_req(nvme_loop_queue_idx(queue),
|
|
cqe->command_id))) {
|
|
nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
|
|
&cqe->result);
|
|
} else {
|
|
struct request *rq;
|
|
|
|
rq = nvme_find_rq(nvme_loop_tagset(queue), cqe->command_id);
|
|
if (!rq) {
|
|
dev_err(queue->ctrl->ctrl.device,
|
|
"got bad command_id %#x on queue %d\n",
|
|
cqe->command_id, nvme_loop_queue_idx(queue));
|
|
return;
|
|
}
|
|
|
|
if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
|
|
nvme_loop_complete_rq(rq);
|
|
}
|
|
}
|
|
|
|
static void nvme_loop_execute_work(struct work_struct *work)
|
|
{
|
|
struct nvme_loop_iod *iod =
|
|
container_of(work, struct nvme_loop_iod, work);
|
|
|
|
iod->req.execute(&iod->req);
|
|
}
|
|
|
|
static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|
const struct blk_mq_queue_data *bd)
|
|
{
|
|
struct nvme_ns *ns = hctx->queue->queuedata;
|
|
struct nvme_loop_queue *queue = hctx->driver_data;
|
|
struct request *req = bd->rq;
|
|
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
|
|
bool queue_ready = test_bit(NVME_LOOP_Q_LIVE, &queue->flags);
|
|
blk_status_t ret;
|
|
|
|
if (!nvme_check_ready(&queue->ctrl->ctrl, req, queue_ready))
|
|
return nvme_fail_nonready_command(&queue->ctrl->ctrl, req);
|
|
|
|
ret = nvme_setup_cmd(ns, req);
|
|
if (ret)
|
|
return ret;
|
|
|
|
nvme_start_request(req);
|
|
iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
|
|
iod->req.port = queue->ctrl->port;
|
|
if (!nvmet_req_init(&iod->req, &queue->nvme_sq, &nvme_loop_ops))
|
|
return BLK_STS_OK;
|
|
|
|
if (blk_rq_nr_phys_segments(req)) {
|
|
iod->sg_table.sgl = iod->first_sgl;
|
|
if (sg_alloc_table_chained(&iod->sg_table,
|
|
blk_rq_nr_phys_segments(req),
|
|
iod->sg_table.sgl, NVME_INLINE_SG_CNT)) {
|
|
nvme_cleanup_cmd(req);
|
|
return BLK_STS_RESOURCE;
|
|
}
|
|
|
|
iod->req.sg = iod->sg_table.sgl;
|
|
iod->req.sg_cnt = blk_rq_map_sg(req, iod->sg_table.sgl);
|
|
iod->req.transfer_len = blk_rq_payload_bytes(req);
|
|
}
|
|
|
|
queue_work(nvmet_wq, &iod->work);
|
|
return BLK_STS_OK;
|
|
}
|
|
|
|
static void nvme_loop_submit_async_event(struct nvme_ctrl *arg)
|
|
{
|
|
struct nvme_loop_ctrl *ctrl = to_loop_ctrl(arg);
|
|
struct nvme_loop_queue *queue = &ctrl->queues[0];
|
|
struct nvme_loop_iod *iod = &ctrl->async_event_iod;
|
|
|
|
memset(&iod->cmd, 0, sizeof(iod->cmd));
|
|
iod->cmd.common.opcode = nvme_admin_async_event;
|
|
iod->cmd.common.command_id = NVME_AQ_BLK_MQ_DEPTH;
|
|
iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
|
|
|
|
if (!nvmet_req_init(&iod->req, &queue->nvme_sq, &nvme_loop_ops)) {
|
|
dev_err(ctrl->ctrl.device, "failed async event work\n");
|
|
return;
|
|
}
|
|
|
|
queue_work(nvmet_wq, &iod->work);
|
|
}
|
|
|
|
static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl,
|
|
struct nvme_loop_iod *iod, unsigned int queue_idx)
|
|
{
|
|
iod->req.cmd = &iod->cmd;
|
|
iod->req.cqe = &iod->cqe;
|
|
iod->queue = &ctrl->queues[queue_idx];
|
|
INIT_WORK(&iod->work, nvme_loop_execute_work);
|
|
return 0;
|
|
}
|
|
|
|
static int nvme_loop_init_request(struct blk_mq_tag_set *set,
|
|
struct request *req, unsigned int hctx_idx,
|
|
unsigned int numa_node)
|
|
{
|
|
struct nvme_loop_ctrl *ctrl = to_loop_ctrl(set->driver_data);
|
|
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
|
|
|
|
nvme_req(req)->ctrl = &ctrl->ctrl;
|
|
nvme_req(req)->cmd = &iod->cmd;
|
|
return nvme_loop_init_iod(ctrl, blk_mq_rq_to_pdu(req),
|
|
(set == &ctrl->tag_set) ? hctx_idx + 1 : 0);
|
|
}
|
|
|
|
static struct lock_class_key loop_hctx_fq_lock_key;
|
|
|
|
static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
|
|
unsigned int hctx_idx)
|
|
{
|
|
struct nvme_loop_ctrl *ctrl = to_loop_ctrl(data);
|
|
struct nvme_loop_queue *queue = &ctrl->queues[hctx_idx + 1];
|
|
|
|
BUG_ON(hctx_idx >= ctrl->ctrl.queue_count);
|
|
|
|
/*
|
|
* flush_end_io() can be called recursively for us, so use our own
|
|
* lock class key for avoiding lockdep possible recursive locking,
|
|
* then we can remove the dynamically allocated lock class for each
|
|
* flush queue, that way may cause horrible boot delay.
|
|
*/
|
|
blk_mq_hctx_set_fq_lock_class(hctx, &loop_hctx_fq_lock_key);
|
|
|
|
hctx->driver_data = queue;
|
|
return 0;
|
|
}
|
|
|
|
static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
|
|
unsigned int hctx_idx)
|
|
{
|
|
struct nvme_loop_ctrl *ctrl = to_loop_ctrl(data);
|
|
struct nvme_loop_queue *queue = &ctrl->queues[0];
|
|
|
|
BUG_ON(hctx_idx != 0);
|
|
|
|
hctx->driver_data = queue;
|
|
return 0;
|
|
}
|
|
|
|
static const struct blk_mq_ops nvme_loop_mq_ops = {
|
|
.queue_rq = nvme_loop_queue_rq,
|
|
.complete = nvme_loop_complete_rq,
|
|
.init_request = nvme_loop_init_request,
|
|
.init_hctx = nvme_loop_init_hctx,
|
|
};
|
|
|
|
static const struct blk_mq_ops nvme_loop_admin_mq_ops = {
|
|
.queue_rq = nvme_loop_queue_rq,
|
|
.complete = nvme_loop_complete_rq,
|
|
.init_request = nvme_loop_init_request,
|
|
.init_hctx = nvme_loop_init_admin_hctx,
|
|
};
|
|
|
|
static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
|
|
{
|
|
if (!test_and_clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags))
|
|
return;
|
|
/*
|
|
* It's possible that some requests might have been added
|
|
* after admin queue is stopped/quiesced. So now start the
|
|
* queue to flush these requests to the completion.
|
|
*/
|
|
nvme_unquiesce_admin_queue(&ctrl->ctrl);
|
|
|
|
nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
|
|
nvmet_cq_put(&ctrl->queues[0].nvme_cq);
|
|
nvme_remove_admin_tag_set(&ctrl->ctrl);
|
|
}
|
|
|
|
static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl)
|
|
{
|
|
struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
|
|
|
|
if (list_empty(&ctrl->list))
|
|
goto free_ctrl;
|
|
|
|
mutex_lock(&nvme_loop_ctrl_mutex);
|
|
list_del(&ctrl->list);
|
|
mutex_unlock(&nvme_loop_ctrl_mutex);
|
|
|
|
if (nctrl->tagset)
|
|
nvme_remove_io_tag_set(nctrl);
|
|
kfree(ctrl->queues);
|
|
nvmf_free_options(nctrl->opts);
|
|
free_ctrl:
|
|
kfree(ctrl);
|
|
}
|
|
|
|
static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl)
|
|
{
|
|
int i;
|
|
|
|
for (i = 1; i < ctrl->ctrl.queue_count; i++) {
|
|
clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags);
|
|
nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
|
|
nvmet_cq_put(&ctrl->queues[i].nvme_cq);
|
|
}
|
|
ctrl->ctrl.queue_count = 1;
|
|
/*
|
|
* It's possible that some requests might have been added
|
|
* after io queue is stopped/quiesced. So now start the
|
|
* queue to flush these requests to the completion.
|
|
*/
|
|
nvme_unquiesce_io_queues(&ctrl->ctrl);
|
|
}
|
|
|
|
static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
|
|
{
|
|
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
|
|
unsigned int nr_io_queues;
|
|
int ret, i;
|
|
|
|
nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
|
|
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
|
|
if (ret || !nr_io_queues)
|
|
return ret;
|
|
|
|
dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues);
|
|
|
|
for (i = 1; i <= nr_io_queues; i++) {
|
|
ctrl->queues[i].ctrl = ctrl;
|
|
nvmet_cq_init(&ctrl->queues[i].nvme_cq);
|
|
ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq,
|
|
&ctrl->queues[i].nvme_cq);
|
|
if (ret) {
|
|
nvmet_cq_put(&ctrl->queues[i].nvme_cq);
|
|
goto out_destroy_queues;
|
|
}
|
|
|
|
ctrl->ctrl.queue_count++;
|
|
}
|
|
|
|
return 0;
|
|
|
|
out_destroy_queues:
|
|
nvme_loop_destroy_io_queues(ctrl);
|
|
return ret;
|
|
}
|
|
|
|
static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl)
|
|
{
|
|
int i, ret;
|
|
|
|
for (i = 1; i < ctrl->ctrl.queue_count; i++) {
|
|
ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
|
|
if (ret)
|
|
return ret;
|
|
set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
|
|
{
|
|
int error;
|
|
|
|
ctrl->queues[0].ctrl = ctrl;
|
|
nvmet_cq_init(&ctrl->queues[0].nvme_cq);
|
|
error = nvmet_sq_init(&ctrl->queues[0].nvme_sq,
|
|
&ctrl->queues[0].nvme_cq);
|
|
if (error) {
|
|
nvmet_cq_put(&ctrl->queues[0].nvme_cq);
|
|
return error;
|
|
}
|
|
ctrl->ctrl.queue_count = 1;
|
|
|
|
error = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set,
|
|
&nvme_loop_admin_mq_ops,
|
|
sizeof(struct nvme_loop_iod) +
|
|
NVME_INLINE_SG_CNT * sizeof(struct scatterlist));
|
|
if (error)
|
|
goto out_free_sq;
|
|
|
|
/* reset stopped state for the fresh admin queue */
|
|
clear_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->ctrl.flags);
|
|
|
|
error = nvmf_connect_admin_queue(&ctrl->ctrl);
|
|
if (error)
|
|
goto out_cleanup_tagset;
|
|
|
|
set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags);
|
|
|
|
error = nvme_enable_ctrl(&ctrl->ctrl);
|
|
if (error)
|
|
goto out_cleanup_tagset;
|
|
|
|
ctrl->ctrl.max_hw_sectors =
|
|
(NVME_LOOP_MAX_SEGMENTS - 1) << PAGE_SECTORS_SHIFT;
|
|
|
|
nvme_unquiesce_admin_queue(&ctrl->ctrl);
|
|
|
|
error = nvme_init_ctrl_finish(&ctrl->ctrl, false);
|
|
if (error)
|
|
goto out_cleanup_tagset;
|
|
|
|
return 0;
|
|
|
|
out_cleanup_tagset:
|
|
clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags);
|
|
nvme_remove_admin_tag_set(&ctrl->ctrl);
|
|
out_free_sq:
|
|
nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
|
|
nvmet_cq_put(&ctrl->queues[0].nvme_cq);
|
|
return error;
|
|
}
|
|
|
|
static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
|
|
{
|
|
if (ctrl->ctrl.queue_count > 1) {
|
|
nvme_quiesce_io_queues(&ctrl->ctrl);
|
|
nvme_loop_destroy_io_queues(ctrl);
|
|
}
|
|
|
|
nvme_quiesce_admin_queue(&ctrl->ctrl);
|
|
if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_LIVE)
|
|
nvme_disable_ctrl(&ctrl->ctrl, true);
|
|
|
|
nvme_loop_destroy_admin_queue(ctrl);
|
|
}
|
|
|
|
static void nvme_loop_delete_ctrl_host(struct nvme_ctrl *ctrl)
|
|
{
|
|
nvme_loop_shutdown_ctrl(to_loop_ctrl(ctrl));
|
|
}
|
|
|
|
static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl)
|
|
{
|
|
struct nvme_loop_ctrl *ctrl;
|
|
|
|
mutex_lock(&nvme_loop_ctrl_mutex);
|
|
list_for_each_entry(ctrl, &nvme_loop_ctrl_list, list) {
|
|
if (ctrl->ctrl.cntlid == nctrl->cntlid)
|
|
nvme_delete_ctrl(&ctrl->ctrl);
|
|
}
|
|
mutex_unlock(&nvme_loop_ctrl_mutex);
|
|
}
|
|
|
|
static void nvme_loop_reset_ctrl_work(struct work_struct *work)
|
|
{
|
|
struct nvme_loop_ctrl *ctrl =
|
|
container_of(work, struct nvme_loop_ctrl, ctrl.reset_work);
|
|
int ret;
|
|
|
|
nvme_stop_ctrl(&ctrl->ctrl);
|
|
nvme_loop_shutdown_ctrl(ctrl);
|
|
|
|
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
|
|
enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
|
|
|
|
if (state != NVME_CTRL_DELETING &&
|
|
state != NVME_CTRL_DELETING_NOIO)
|
|
/* state change failure for non-deleted ctrl? */
|
|
WARN_ON_ONCE(1);
|
|
return;
|
|
}
|
|
|
|
ret = nvme_loop_configure_admin_queue(ctrl);
|
|
if (ret)
|
|
goto out_disable;
|
|
|
|
ret = nvme_loop_init_io_queues(ctrl);
|
|
if (ret)
|
|
goto out_destroy_admin;
|
|
|
|
ret = nvme_loop_connect_io_queues(ctrl);
|
|
if (ret)
|
|
goto out_destroy_io;
|
|
|
|
blk_mq_update_nr_hw_queues(&ctrl->tag_set,
|
|
ctrl->ctrl.queue_count - 1);
|
|
|
|
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE))
|
|
WARN_ON_ONCE(1);
|
|
|
|
nvme_start_ctrl(&ctrl->ctrl);
|
|
|
|
return;
|
|
|
|
out_destroy_io:
|
|
nvme_loop_destroy_io_queues(ctrl);
|
|
out_destroy_admin:
|
|
nvme_quiesce_admin_queue(&ctrl->ctrl);
|
|
nvme_cancel_admin_tagset(&ctrl->ctrl);
|
|
nvme_loop_destroy_admin_queue(ctrl);
|
|
out_disable:
|
|
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
|
|
nvme_uninit_ctrl(&ctrl->ctrl);
|
|
}
|
|
|
|
static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
|
|
.name = "loop",
|
|
.module = THIS_MODULE,
|
|
.flags = NVME_F_FABRICS,
|
|
.reg_read32 = nvmf_reg_read32,
|
|
.reg_read64 = nvmf_reg_read64,
|
|
.reg_write32 = nvmf_reg_write32,
|
|
.free_ctrl = nvme_loop_free_ctrl,
|
|
.submit_async_event = nvme_loop_submit_async_event,
|
|
.delete_ctrl = nvme_loop_delete_ctrl_host,
|
|
.get_address = nvmf_get_address,
|
|
.get_virt_boundary = nvme_get_virt_boundary,
|
|
};
|
|
|
|
static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
|
|
{
|
|
int ret;
|
|
|
|
ret = nvme_loop_init_io_queues(ctrl);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set,
|
|
&nvme_loop_mq_ops, 1,
|
|
sizeof(struct nvme_loop_iod) +
|
|
NVME_INLINE_SG_CNT * sizeof(struct scatterlist));
|
|
if (ret)
|
|
goto out_destroy_queues;
|
|
|
|
ret = nvme_loop_connect_io_queues(ctrl);
|
|
if (ret)
|
|
goto out_cleanup_tagset;
|
|
|
|
return 0;
|
|
|
|
out_cleanup_tagset:
|
|
nvme_remove_io_tag_set(&ctrl->ctrl);
|
|
out_destroy_queues:
|
|
nvme_loop_destroy_io_queues(ctrl);
|
|
return ret;
|
|
}
|
|
|
|
static struct nvmet_port *nvme_loop_find_port(struct nvme_ctrl *ctrl)
|
|
{
|
|
struct nvmet_port *p, *found = NULL;
|
|
|
|
mutex_lock(&nvme_loop_ports_mutex);
|
|
list_for_each_entry(p, &nvme_loop_ports, entry) {
|
|
/* if no transport address is specified use the first port */
|
|
if ((ctrl->opts->mask & NVMF_OPT_TRADDR) &&
|
|
strcmp(ctrl->opts->traddr, p->disc_addr.traddr))
|
|
continue;
|
|
found = p;
|
|
break;
|
|
}
|
|
mutex_unlock(&nvme_loop_ports_mutex);
|
|
return found;
|
|
}
|
|
|
|
static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
|
|
struct nvmf_ctrl_options *opts)
|
|
{
|
|
struct nvme_loop_ctrl *ctrl;
|
|
int ret;
|
|
|
|
ctrl = kzalloc_obj(*ctrl);
|
|
if (!ctrl)
|
|
return ERR_PTR(-ENOMEM);
|
|
ctrl->ctrl.opts = opts;
|
|
INIT_LIST_HEAD(&ctrl->list);
|
|
|
|
INIT_WORK(&ctrl->ctrl.reset_work, nvme_loop_reset_ctrl_work);
|
|
|
|
ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops,
|
|
0 /* no quirks, we're perfect! */);
|
|
if (ret) {
|
|
kfree(ctrl);
|
|
goto out;
|
|
}
|
|
|
|
ret = nvme_add_ctrl(&ctrl->ctrl);
|
|
if (ret)
|
|
goto out_put_ctrl;
|
|
|
|
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
|
|
WARN_ON_ONCE(1);
|
|
|
|
ret = -ENOMEM;
|
|
|
|
ctrl->ctrl.kato = opts->kato;
|
|
ctrl->port = nvme_loop_find_port(&ctrl->ctrl);
|
|
|
|
ctrl->queues = kzalloc_objs(*ctrl->queues, opts->nr_io_queues + 1);
|
|
if (!ctrl->queues)
|
|
goto out_uninit_ctrl;
|
|
|
|
ret = nvme_loop_configure_admin_queue(ctrl);
|
|
if (ret)
|
|
goto out_free_queues;
|
|
|
|
if (opts->queue_size > ctrl->ctrl.maxcmd) {
|
|
/* warn if maxcmd is lower than queue_size */
|
|
dev_warn(ctrl->ctrl.device,
|
|
"queue_size %zu > ctrl maxcmd %u, clamping down\n",
|
|
opts->queue_size, ctrl->ctrl.maxcmd);
|
|
opts->queue_size = ctrl->ctrl.maxcmd;
|
|
}
|
|
ctrl->ctrl.sqsize = opts->queue_size - 1;
|
|
|
|
if (opts->nr_io_queues) {
|
|
ret = nvme_loop_create_io_queues(ctrl);
|
|
if (ret)
|
|
goto out_remove_admin_queue;
|
|
}
|
|
|
|
nvme_loop_init_iod(ctrl, &ctrl->async_event_iod, 0);
|
|
|
|
dev_info(ctrl->ctrl.device,
|
|
"new ctrl: \"%s\"\n", ctrl->ctrl.opts->subsysnqn);
|
|
|
|
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE))
|
|
WARN_ON_ONCE(1);
|
|
|
|
mutex_lock(&nvme_loop_ctrl_mutex);
|
|
list_add_tail(&ctrl->list, &nvme_loop_ctrl_list);
|
|
mutex_unlock(&nvme_loop_ctrl_mutex);
|
|
|
|
nvme_start_ctrl(&ctrl->ctrl);
|
|
|
|
return &ctrl->ctrl;
|
|
|
|
out_remove_admin_queue:
|
|
nvme_quiesce_admin_queue(&ctrl->ctrl);
|
|
nvme_cancel_admin_tagset(&ctrl->ctrl);
|
|
nvme_loop_destroy_admin_queue(ctrl);
|
|
out_free_queues:
|
|
kfree(ctrl->queues);
|
|
out_uninit_ctrl:
|
|
nvme_uninit_ctrl(&ctrl->ctrl);
|
|
out_put_ctrl:
|
|
nvme_put_ctrl(&ctrl->ctrl);
|
|
out:
|
|
if (ret > 0)
|
|
ret = -EIO;
|
|
return ERR_PTR(ret);
|
|
}
|
|
|
|
static int nvme_loop_add_port(struct nvmet_port *port)
|
|
{
|
|
mutex_lock(&nvme_loop_ports_mutex);
|
|
list_add_tail(&port->entry, &nvme_loop_ports);
|
|
mutex_unlock(&nvme_loop_ports_mutex);
|
|
return 0;
|
|
}
|
|
|
|
static void nvme_loop_remove_port(struct nvmet_port *port)
|
|
{
|
|
mutex_lock(&nvme_loop_ports_mutex);
|
|
list_del_init(&port->entry);
|
|
mutex_unlock(&nvme_loop_ports_mutex);
|
|
|
|
/*
|
|
* Ensure any ctrls that are in the process of being
|
|
* deleted are in fact deleted before we return
|
|
* and free the port. This is to prevent active
|
|
* ctrls from using a port after it's freed.
|
|
*/
|
|
flush_workqueue(nvme_delete_wq);
|
|
}
|
|
|
|
static const struct nvmet_fabrics_ops nvme_loop_ops = {
|
|
.owner = THIS_MODULE,
|
|
.type = NVMF_TRTYPE_LOOP,
|
|
.add_port = nvme_loop_add_port,
|
|
.remove_port = nvme_loop_remove_port,
|
|
.queue_response = nvme_loop_queue_response,
|
|
.delete_ctrl = nvme_loop_delete_ctrl,
|
|
};
|
|
|
|
static struct nvmf_transport_ops nvme_loop_transport = {
|
|
.name = "loop",
|
|
.module = THIS_MODULE,
|
|
.create_ctrl = nvme_loop_create_ctrl,
|
|
.allowed_opts = NVMF_OPT_TRADDR,
|
|
};
|
|
|
|
static int __init nvme_loop_init_module(void)
|
|
{
|
|
int ret;
|
|
|
|
ret = nvmet_register_transport(&nvme_loop_ops);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = nvmf_register_transport(&nvme_loop_transport);
|
|
if (ret)
|
|
nvmet_unregister_transport(&nvme_loop_ops);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void __exit nvme_loop_cleanup_module(void)
|
|
{
|
|
struct nvme_loop_ctrl *ctrl, *next;
|
|
|
|
nvmf_unregister_transport(&nvme_loop_transport);
|
|
nvmet_unregister_transport(&nvme_loop_ops);
|
|
|
|
mutex_lock(&nvme_loop_ctrl_mutex);
|
|
list_for_each_entry_safe(ctrl, next, &nvme_loop_ctrl_list, list)
|
|
nvme_delete_ctrl(&ctrl->ctrl);
|
|
mutex_unlock(&nvme_loop_ctrl_mutex);
|
|
|
|
flush_workqueue(nvme_delete_wq);
|
|
}
|
|
|
|
module_init(nvme_loop_init_module);
|
|
module_exit(nvme_loop_cleanup_module);
|
|
|
|
MODULE_DESCRIPTION("NVMe target loop transport driver");
|
|
MODULE_LICENSE("GPL v2");
|
|
MODULE_ALIAS("nvmet-transport-254"); /* 254 == NVMF_TRTYPE_LOOP */
|