mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
Merge tag 'for-7.1/block-20260411' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull block updates from Jens Axboe:
- Add shared memory zero-copy I/O support for ublk, bypassing per-I/O
copies between kernel and userspace by matching registered buffer
PFNs at I/O time. Includes selftests.
- Refactor bio integrity to support filesystem initiated integrity
operations and arbitrary buffer alignment.
- Clean up bio allocation, splitting bio_alloc_bioset() into clear fast
and slow paths. Add bio_await() and bio_submit_or_kill() helpers,
unify synchronous bi_end_io callbacks.
- Fix zone write plug refcount handling and plug removal races. Add
support for serializing zone writes at QD=1 for rotational zoned
devices, yielding significant throughput improvements.
- Add SED-OPAL ioctls for Single User Mode management and a STACK_RESET
command.
- Add io_uring passthrough (uring_cmd) support to the BSG layer.
- Replace pp_buf in partition scanning with struct seq_buf.
- zloop improvements and cleanups.
- drbd genl cleanup, switching to pre_doit/post_doit.
- NVMe pull request via Keith:
- Fabrics authentication updates
- Enhanced block queue limits support
- Workqueue usage updates
- A new write zeroes device quirk
- Tagset cleanup fix for loop device
- MD pull requests via Yu Kuai:
- Fix raid5 soft lockup in retry_aligned_read()
- Fix raid10 deadlock with check operation and nowait requests
- Fix raid1 overlapping writes on writemostly disks
- Fix sysfs deadlock on array_state=clear
- Proactive RAID-5 parity building with llbitmap, with
write_zeroes_unmap optimization for initial sync
- Fix llbitmap barrier ordering, rdev skipping, and bitmap_ops
version mismatch fallback
- Fix bcache use-after-free and uninitialized closure
- Validate raid5 journal metadata payload size
- Various cleanups
- Various other fixes, improvements, and cleanups
* tag 'for-7.1/block-20260411' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: (146 commits)
ublk: fix tautological comparison warning in ublk_ctrl_reg_buf
scsi: bsg: fix buffer overflow in scsi_bsg_uring_cmd()
block: refactor blkdev_zone_mgmt_ioctl
MAINTAINERS: update ublk driver maintainer email
Documentation: ublk: address review comments for SHMEM_ZC docs
ublk: allow buffer registration before device is started
ublk: replace xarray with IDA for shmem buffer index allocation
ublk: simplify PFN range loop in __ublk_ctrl_reg_buf
ublk: verify all pages in multi-page bvec fall within registered range
ublk: widen ublk_shmem_buf_reg.len to __u64 for 4GB buffer support
xfs: use bio_await in xfs_zone_gc_reset_sync
block: add a bio_submit_or_kill helper
block: factor out a bio_await helper
block: unify the synchronous bi_end_io callbacks
xfs: fix number of GC bvecs
selftests/ublk: add read-only buffer registration test
selftests/ublk: add filesystem fio verify test for shmem_zc
selftests/ublk: add hugetlbfs shmem_zc test for loop target
selftests/ublk: add shared memory zero-copy test
selftests/ublk: add UBLK_F_SHMEM_ZC support for loop target
...
This commit is contained in:
@@ -886,6 +886,21 @@ Description:
|
||||
zone commands, they will be treated as regular block devices and
|
||||
zoned will report "none".
|
||||
|
||||
What: /sys/block/<disk>/queue/zoned_qd1_writes
|
||||
Date: January 2026
|
||||
Contact: Damien Le Moal <dlemoal@kernel.org>
|
||||
Description:
|
||||
[RW] zoned_qd1_writes indicates if write operations to a zoned
|
||||
block device are being handled using a single issuer context (a
|
||||
kernel thread) operating at a maximum queue depth of 1. This
|
||||
attribute is visible only for zoned block devices. The default
|
||||
value for zoned block devices that are not rotational devices
|
||||
(e.g. ZNS SSDs or zoned UFS devices) is 0. For rotational zoned
|
||||
block devices (e.g. SMR HDDs) the default value is 1. Since
|
||||
this default may not be appropriate for some devices, e.g.
|
||||
remotely connected devices over high latency networks, the user
|
||||
can disable this feature by setting this attribute to 0.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/hidden
|
||||
Date: March 2023
|
||||
|
||||
13
Documentation/ABI/testing/sysfs-nvme
Normal file
13
Documentation/ABI/testing/sysfs-nvme
Normal file
@@ -0,0 +1,13 @@
|
||||
What: /sys/devices/virtual/nvme-fabrics/ctl/.../tls_configured_key
|
||||
Date: November 2025
|
||||
KernelVersion: 6.19
|
||||
Contact: Linux NVMe mailing list <linux-nvme@lists.infradead.org>
|
||||
Description:
|
||||
The file is avaliable when using a secure concatanation
|
||||
connection to a NVMe target. Reading the file will return
|
||||
the serial of the currently negotiated key.
|
||||
|
||||
Writing 0 to the file will trigger a PSK reauthentication
|
||||
(REPLACETLSPSK) with the target. After a reauthentication
|
||||
the value returned by tls_configured_key will be the new
|
||||
serial.
|
||||
@@ -62,7 +62,7 @@ The options available for the add command can be listed by reading the
|
||||
/dev/zloop-control device::
|
||||
|
||||
$ cat /dev/zloop-control
|
||||
add id=%d,capacity_mb=%u,zone_size_mb=%u,zone_capacity_mb=%u,conv_zones=%u,base_dir=%s,nr_queues=%u,queue_depth=%u,buffered_io
|
||||
add id=%d,capacity_mb=%u,zone_size_mb=%u,zone_capacity_mb=%u,conv_zones=%u,max_open_zones=%u,base_dir=%s,nr_queues=%u,queue_depth=%u,buffered_io,zone_append=%u,ordered_zone_append,discard_write_cache
|
||||
remove id=%d
|
||||
|
||||
In more details, the options that can be used with the "add" command are as
|
||||
@@ -80,6 +80,9 @@ zone_capacity_mb Device zone capacity (must always be equal to or lower
|
||||
conv_zones Total number of conventioanl zones starting from
|
||||
sector 0
|
||||
Default: 8
|
||||
max_open_zones Maximum number of open sequential write required zones
|
||||
(0 for no limit).
|
||||
Default: 0
|
||||
base_dir Path to the base directory where to create the directory
|
||||
containing the zone files of the device.
|
||||
Default=/var/local/zloop.
|
||||
@@ -104,6 +107,11 @@ ordered_zone_append Enable zloop mitigation of zone append reordering.
|
||||
(extents), as when enabled, this can significantly reduce
|
||||
the number of data extents needed to for a file data
|
||||
mapping.
|
||||
discard_write_cache Discard all data that was not explicitly persisted using a
|
||||
flush operation when the device is removed by truncating
|
||||
each zone file to the size recorded during the last flush
|
||||
operation. This simulates power fail events where
|
||||
uncommitted data is lost.
|
||||
=================== =========================================================
|
||||
|
||||
3) Deleting a Zoned Device
|
||||
|
||||
@@ -153,7 +153,7 @@ blk-crypto-fallback completes the original bio. If the original bio is too
|
||||
large, multiple bounce bios may be required; see the code for details.
|
||||
|
||||
For decryption, blk-crypto-fallback "wraps" the bio's completion callback
|
||||
(``bi_complete``) and private data (``bi_private``) with its own, unsets the
|
||||
(``bi_end_io``) and private data (``bi_private``) with its own, unsets the
|
||||
bio's encryption context, then submits the bio. If the read completes
|
||||
successfully, blk-crypto-fallback restores the bio's original completion
|
||||
callback and private data, then decrypts the bio's data in-place using the
|
||||
|
||||
@@ -485,6 +485,125 @@ Limitations
|
||||
in case that too many ublk devices are handled by this single io_ring_ctx
|
||||
and each one has very large queue depth
|
||||
|
||||
Shared Memory Zero Copy (UBLK_F_SHMEM_ZC)
|
||||
------------------------------------------
|
||||
|
||||
The ``UBLK_F_SHMEM_ZC`` feature provides an alternative zero-copy path
|
||||
that works by sharing physical memory pages between the client application
|
||||
and the ublk server. Unlike the io_uring fixed buffer approach above,
|
||||
shared memory zero copy does not require io_uring buffer registration
|
||||
per I/O — instead, it relies on the kernel matching physical pages
|
||||
at I/O time. This allows the ublk server to access the shared
|
||||
buffer directly, which is unlikely for the io_uring fixed buffer
|
||||
approach.
|
||||
|
||||
Motivation
|
||||
~~~~~~~~~~
|
||||
|
||||
Shared memory zero copy takes a different approach: if the client
|
||||
application and the ublk server both map the same physical memory, there is
|
||||
nothing to copy. The kernel detects the shared pages automatically and
|
||||
tells the server where the data already lives.
|
||||
|
||||
``UBLK_F_SHMEM_ZC`` can be thought of as a supplement for optimized client
|
||||
applications — when the client is willing to allocate I/O buffers from
|
||||
shared memory, the entire data path becomes zero-copy.
|
||||
|
||||
Use Cases
|
||||
~~~~~~~~~
|
||||
|
||||
This feature is useful when the client application can be configured to
|
||||
use a specific shared memory region for its I/O buffers:
|
||||
|
||||
- **Custom storage clients** that allocate I/O buffers from shared memory
|
||||
(memfd, hugetlbfs) and issue direct I/O to the ublk device
|
||||
- **Database engines** that use pre-allocated buffer pools with O_DIRECT
|
||||
|
||||
How It Works
|
||||
~~~~~~~~~~~~
|
||||
|
||||
1. The ublk server and client both ``mmap()`` the same file (memfd or
|
||||
hugetlbfs) with ``MAP_SHARED``. This gives both processes access to the
|
||||
same physical pages.
|
||||
|
||||
2. The ublk server registers its mapping with the kernel::
|
||||
|
||||
struct ublk_shmem_buf_reg buf = { .addr = mmap_va, .len = size };
|
||||
ublk_ctrl_cmd(UBLK_U_CMD_REG_BUF, .addr = &buf);
|
||||
|
||||
The kernel pins the pages and builds a PFN lookup tree.
|
||||
|
||||
3. When the client issues direct I/O (``O_DIRECT``) to ``/dev/ublkb*``,
|
||||
the kernel checks whether the I/O buffer pages match any registered
|
||||
pages by comparing PFNs.
|
||||
|
||||
4. On a match, the kernel sets ``UBLK_IO_F_SHMEM_ZC`` in the I/O
|
||||
descriptor and encodes the buffer index and offset in ``addr``::
|
||||
|
||||
if (iod->op_flags & UBLK_IO_F_SHMEM_ZC) {
|
||||
/* Data is already in our shared mapping — zero copy */
|
||||
index = ublk_shmem_zc_index(iod->addr);
|
||||
offset = ublk_shmem_zc_offset(iod->addr);
|
||||
buf = shmem_table[index].mmap_base + offset;
|
||||
}
|
||||
|
||||
5. If pages do not match (e.g., the client used a non-shared buffer),
|
||||
the I/O falls back to the normal copy path silently.
|
||||
|
||||
The shared memory can be set up via two methods:
|
||||
|
||||
- **Socket-based**: the client sends a memfd to the ublk server via
|
||||
``SCM_RIGHTS`` on a unix socket. The server mmaps and registers it.
|
||||
- **Hugetlbfs-based**: both processes ``mmap(MAP_SHARED)`` the same
|
||||
hugetlbfs file. No IPC needed — same file gives same physical pages.
|
||||
|
||||
Advantages
|
||||
~~~~~~~~~~
|
||||
|
||||
- **Simple**: no per-I/O buffer registration or unregistration commands.
|
||||
Once the shared buffer is registered, all matching I/O is zero-copy
|
||||
automatically.
|
||||
- **Direct buffer access**: the ublk server can read and write the shared
|
||||
buffer directly via its own mmap, without going through io_uring fixed
|
||||
buffer operations. This is more friendly for server implementations.
|
||||
- **Fast**: PFN matching is a single maple tree lookup per bvec. No
|
||||
io_uring command round-trips for buffer management.
|
||||
- **Compatible**: non-matching I/O silently falls back to the copy path.
|
||||
The device works normally for any client, with zero-copy as an
|
||||
optimization when shared memory is available.
|
||||
|
||||
Limitations
|
||||
~~~~~~~~~~~
|
||||
|
||||
- **Requires client cooperation**: the client must allocate its I/O
|
||||
buffers from the shared memory region. This requires a custom or
|
||||
configured client — standard applications using their own buffers
|
||||
will not benefit.
|
||||
- **Direct I/O only**: buffered I/O (without ``O_DIRECT``) goes through
|
||||
the page cache, which allocates its own pages. These kernel-allocated
|
||||
pages will never match the registered shared buffer. Only ``O_DIRECT``
|
||||
puts the client's buffer pages directly into the block I/O.
|
||||
- **Contiguous data only**: each I/O request's data must be contiguous
|
||||
within a single registered buffer. Scatter/gather I/O that spans
|
||||
multiple non-adjacent registered buffers cannot use the zero-copy path.
|
||||
|
||||
Control Commands
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
- ``UBLK_U_CMD_REG_BUF``
|
||||
|
||||
Register a shared memory buffer. ``ctrl_cmd.addr`` points to a
|
||||
``struct ublk_shmem_buf_reg`` containing the buffer virtual address and size.
|
||||
Returns the assigned buffer index (>= 0) on success. The kernel pins
|
||||
pages and builds the PFN lookup tree. Queue freeze is handled
|
||||
internally.
|
||||
|
||||
- ``UBLK_U_CMD_UNREG_BUF``
|
||||
|
||||
Unregister a previously registered buffer. ``ctrl_cmd.data[0]`` is the
|
||||
buffer index. Unpins pages and removes PFN entries from the lookup
|
||||
tree.
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
|
||||
@@ -27015,7 +27015,7 @@ F: Documentation/filesystems/ubifs.rst
|
||||
F: fs/ubifs/
|
||||
|
||||
UBLK USERSPACE BLOCK DRIVER
|
||||
M: Ming Lei <ming.lei@redhat.com>
|
||||
M: Ming Lei <tom.leiming@gmail.com>
|
||||
L: linux-block@vger.kernel.org
|
||||
S: Maintained
|
||||
F: Documentation/block/ublk.rst
|
||||
|
||||
319
block/bio.c
319
block/bio.c
@@ -18,6 +18,7 @@
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/blk-crypto.h>
|
||||
#include <linux/xarray.h>
|
||||
#include <linux/kmemleak.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
#include "blk.h"
|
||||
@@ -34,6 +35,8 @@ struct bio_alloc_cache {
|
||||
unsigned int nr_irq;
|
||||
};
|
||||
|
||||
#define BIO_INLINE_VECS 4
|
||||
|
||||
static struct biovec_slab {
|
||||
int nr_vecs;
|
||||
char *name;
|
||||
@@ -114,6 +117,11 @@ static inline unsigned int bs_bio_slab_size(struct bio_set *bs)
|
||||
return bs->front_pad + sizeof(struct bio) + bs->back_pad;
|
||||
}
|
||||
|
||||
static inline void *bio_slab_addr(struct bio *bio)
|
||||
{
|
||||
return (void *)bio - bio->bi_pool->front_pad;
|
||||
}
|
||||
|
||||
static struct kmem_cache *bio_find_or_create_slab(struct bio_set *bs)
|
||||
{
|
||||
unsigned int size = bs_bio_slab_size(bs);
|
||||
@@ -159,57 +167,16 @@ out:
|
||||
mutex_unlock(&bio_slab_lock);
|
||||
}
|
||||
|
||||
void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs)
|
||||
{
|
||||
BUG_ON(nr_vecs > BIO_MAX_VECS);
|
||||
|
||||
if (nr_vecs == BIO_MAX_VECS)
|
||||
mempool_free(bv, pool);
|
||||
else if (nr_vecs > BIO_INLINE_VECS)
|
||||
kmem_cache_free(biovec_slab(nr_vecs)->slab, bv);
|
||||
}
|
||||
|
||||
/*
|
||||
* Make the first allocation restricted and don't dump info on allocation
|
||||
* failures, since we'll fall back to the mempool in case of failure.
|
||||
*/
|
||||
static inline gfp_t bvec_alloc_gfp(gfp_t gfp)
|
||||
static inline gfp_t try_alloc_gfp(gfp_t gfp)
|
||||
{
|
||||
return (gfp & ~(__GFP_DIRECT_RECLAIM | __GFP_IO)) |
|
||||
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
|
||||
}
|
||||
|
||||
struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct biovec_slab *bvs = biovec_slab(*nr_vecs);
|
||||
|
||||
if (WARN_ON_ONCE(!bvs))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Upgrade the nr_vecs request to take full advantage of the allocation.
|
||||
* We also rely on this in the bvec_free path.
|
||||
*/
|
||||
*nr_vecs = bvs->nr_vecs;
|
||||
|
||||
/*
|
||||
* Try a slab allocation first for all smaller allocations. If that
|
||||
* fails and __GFP_DIRECT_RECLAIM is set retry with the mempool.
|
||||
* The mempool is sized to handle up to BIO_MAX_VECS entries.
|
||||
*/
|
||||
if (*nr_vecs < BIO_MAX_VECS) {
|
||||
struct bio_vec *bvl;
|
||||
|
||||
bvl = kmem_cache_alloc(bvs->slab, bvec_alloc_gfp(gfp_mask));
|
||||
if (likely(bvl) || !(gfp_mask & __GFP_DIRECT_RECLAIM))
|
||||
return bvl;
|
||||
*nr_vecs = BIO_MAX_VECS;
|
||||
}
|
||||
|
||||
return mempool_alloc(pool, gfp_mask);
|
||||
}
|
||||
|
||||
void bio_uninit(struct bio *bio)
|
||||
{
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
@@ -231,9 +198,14 @@ static void bio_free(struct bio *bio)
|
||||
void *p = bio;
|
||||
|
||||
WARN_ON_ONCE(!bs);
|
||||
WARN_ON_ONCE(bio->bi_max_vecs > BIO_MAX_VECS);
|
||||
|
||||
bio_uninit(bio);
|
||||
bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
|
||||
if (bio->bi_max_vecs == BIO_MAX_VECS)
|
||||
mempool_free(bio->bi_io_vec, &bs->bvec_pool);
|
||||
else if (bio->bi_max_vecs > BIO_INLINE_VECS)
|
||||
kmem_cache_free(biovec_slab(bio->bi_max_vecs)->slab,
|
||||
bio->bi_io_vec);
|
||||
mempool_free(p - bs->front_pad, &bs->bio_pool);
|
||||
}
|
||||
|
||||
@@ -430,13 +402,31 @@ static void bio_alloc_rescue(struct work_struct *work)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* submit_bio_noacct() converts recursion to iteration; this means if we're
|
||||
* running beneath it, any bios we allocate and submit will not be submitted
|
||||
* (and thus freed) until after we return.
|
||||
*
|
||||
* This exposes us to a potential deadlock if we allocate multiple bios from the
|
||||
* same bio_set while running underneath submit_bio_noacct(). If we were to
|
||||
* allocate multiple bios (say a stacking block driver that was splitting bios),
|
||||
* we would deadlock if we exhausted the mempool's reserve.
|
||||
*
|
||||
* We solve this, and guarantee forward progress by punting the bios on
|
||||
* current->bio_list to a per bio_set rescuer workqueue before blocking to wait
|
||||
* for elements being returned to the mempool.
|
||||
*/
|
||||
static void punt_bios_to_rescuer(struct bio_set *bs)
|
||||
{
|
||||
struct bio_list punt, nopunt;
|
||||
struct bio *bio;
|
||||
|
||||
if (WARN_ON_ONCE(!bs->rescue_workqueue))
|
||||
if (!current->bio_list || !bs->rescue_workqueue)
|
||||
return;
|
||||
if (bio_list_empty(¤t->bio_list[0]) &&
|
||||
bio_list_empty(¤t->bio_list[1]))
|
||||
return;
|
||||
|
||||
/*
|
||||
* In order to guarantee forward progress we must punt only bios that
|
||||
* were allocated from this bio_set; otherwise, if there was a bio on
|
||||
@@ -483,9 +473,7 @@ static void bio_alloc_irq_cache_splice(struct bio_alloc_cache *cache)
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
|
||||
unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp,
|
||||
struct bio_set *bs)
|
||||
static struct bio *bio_alloc_percpu_cache(struct bio_set *bs)
|
||||
{
|
||||
struct bio_alloc_cache *cache;
|
||||
struct bio *bio;
|
||||
@@ -503,12 +491,10 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
|
||||
cache->free_list = bio->bi_next;
|
||||
cache->nr--;
|
||||
put_cpu();
|
||||
|
||||
if (nr_vecs)
|
||||
bio_init_inline(bio, bdev, nr_vecs, opf);
|
||||
else
|
||||
bio_init(bio, bdev, NULL, nr_vecs, opf);
|
||||
bio->bi_pool = bs;
|
||||
|
||||
kmemleak_alloc(bio_slab_addr(bio),
|
||||
kmem_cache_size(bs->bio_slab), 1, GFP_NOIO);
|
||||
return bio;
|
||||
}
|
||||
|
||||
@@ -517,7 +503,7 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
|
||||
* @bdev: block device to allocate the bio for (can be %NULL)
|
||||
* @nr_vecs: number of bvecs to pre-allocate
|
||||
* @opf: operation and flags for bio
|
||||
* @gfp_mask: the GFP_* mask given to the slab allocator
|
||||
* @gfp: the GFP_* mask given to the slab allocator
|
||||
* @bs: the bio_set to allocate from.
|
||||
*
|
||||
* Allocate a bio from the mempools in @bs.
|
||||
@@ -547,91 +533,77 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
|
||||
* Returns: Pointer to new bio on success, NULL on failure.
|
||||
*/
|
||||
struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
|
||||
blk_opf_t opf, gfp_t gfp_mask,
|
||||
struct bio_set *bs)
|
||||
blk_opf_t opf, gfp_t gfp, struct bio_set *bs)
|
||||
{
|
||||
gfp_t saved_gfp = gfp_mask;
|
||||
struct bio *bio;
|
||||
struct bio_vec *bvecs = NULL;
|
||||
struct bio *bio = NULL;
|
||||
gfp_t saved_gfp = gfp;
|
||||
void *p;
|
||||
|
||||
/* should not use nobvec bioset for nr_vecs > 0 */
|
||||
if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0))
|
||||
return NULL;
|
||||
|
||||
gfp = try_alloc_gfp(gfp);
|
||||
if (bs->cache && nr_vecs <= BIO_INLINE_VECS) {
|
||||
opf |= REQ_ALLOC_CACHE;
|
||||
bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf,
|
||||
gfp_mask, bs);
|
||||
if (bio)
|
||||
return bio;
|
||||
/*
|
||||
* No cached bio available, bio returned below marked with
|
||||
* REQ_ALLOC_CACHE to participate in per-cpu alloc cache.
|
||||
* Set REQ_ALLOC_CACHE even if no cached bio is available to
|
||||
* return the allocated bio to the percpu cache when done.
|
||||
*/
|
||||
} else
|
||||
opf &= ~REQ_ALLOC_CACHE;
|
||||
|
||||
/*
|
||||
* submit_bio_noacct() converts recursion to iteration; this means if
|
||||
* we're running beneath it, any bios we allocate and submit will not be
|
||||
* submitted (and thus freed) until after we return.
|
||||
*
|
||||
* This exposes us to a potential deadlock if we allocate multiple bios
|
||||
* from the same bio_set() while running underneath submit_bio_noacct().
|
||||
* If we were to allocate multiple bios (say a stacking block driver
|
||||
* that was splitting bios), we would deadlock if we exhausted the
|
||||
* mempool's reserve.
|
||||
*
|
||||
* We solve this, and guarantee forward progress, with a rescuer
|
||||
* workqueue per bio_set. If we go to allocate and there are bios on
|
||||
* current->bio_list, we first try the allocation without
|
||||
* __GFP_DIRECT_RECLAIM; if that fails, we punt those bios we would be
|
||||
* blocking to the rescuer workqueue before we retry with the original
|
||||
* gfp_flags.
|
||||
*/
|
||||
if (current->bio_list &&
|
||||
(!bio_list_empty(¤t->bio_list[0]) ||
|
||||
!bio_list_empty(¤t->bio_list[1])) &&
|
||||
bs->rescue_workqueue)
|
||||
gfp_mask &= ~__GFP_DIRECT_RECLAIM;
|
||||
|
||||
p = mempool_alloc(&bs->bio_pool, gfp_mask);
|
||||
if (!p && gfp_mask != saved_gfp) {
|
||||
punt_bios_to_rescuer(bs);
|
||||
gfp_mask = saved_gfp;
|
||||
p = mempool_alloc(&bs->bio_pool, gfp_mask);
|
||||
}
|
||||
if (unlikely(!p))
|
||||
return NULL;
|
||||
if (!mempool_is_saturated(&bs->bio_pool))
|
||||
opf &= ~REQ_ALLOC_CACHE;
|
||||
|
||||
bio = p + bs->front_pad;
|
||||
if (nr_vecs > BIO_INLINE_VECS) {
|
||||
struct bio_vec *bvl = NULL;
|
||||
|
||||
bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask);
|
||||
if (!bvl && gfp_mask != saved_gfp) {
|
||||
punt_bios_to_rescuer(bs);
|
||||
gfp_mask = saved_gfp;
|
||||
bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask);
|
||||
}
|
||||
if (unlikely(!bvl))
|
||||
goto err_free;
|
||||
|
||||
bio_init(bio, bdev, bvl, nr_vecs, opf);
|
||||
} else if (nr_vecs) {
|
||||
bio_init_inline(bio, bdev, BIO_INLINE_VECS, opf);
|
||||
opf |= REQ_ALLOC_CACHE;
|
||||
bio = bio_alloc_percpu_cache(bs);
|
||||
} else {
|
||||
bio_init(bio, bdev, NULL, 0, opf);
|
||||
opf &= ~REQ_ALLOC_CACHE;
|
||||
p = kmem_cache_alloc(bs->bio_slab, gfp);
|
||||
if (p)
|
||||
bio = p + bs->front_pad;
|
||||
}
|
||||
|
||||
if (bio && nr_vecs > BIO_INLINE_VECS) {
|
||||
struct biovec_slab *bvs = biovec_slab(nr_vecs);
|
||||
|
||||
/*
|
||||
* Upgrade nr_vecs to take full advantage of the allocation.
|
||||
* We also rely on this in bio_free().
|
||||
*/
|
||||
nr_vecs = bvs->nr_vecs;
|
||||
bvecs = kmem_cache_alloc(bvs->slab, gfp);
|
||||
if (unlikely(!bvecs)) {
|
||||
kmem_cache_free(bs->bio_slab, p);
|
||||
bio = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(!bio)) {
|
||||
/*
|
||||
* Give up if we are not allow to sleep as non-blocking mempool
|
||||
* allocations just go back to the slab allocation.
|
||||
*/
|
||||
if (!(saved_gfp & __GFP_DIRECT_RECLAIM))
|
||||
return NULL;
|
||||
|
||||
punt_bios_to_rescuer(bs);
|
||||
|
||||
/*
|
||||
* Don't rob the mempools by returning to the per-CPU cache if
|
||||
* we're tight on memory.
|
||||
*/
|
||||
opf &= ~REQ_ALLOC_CACHE;
|
||||
|
||||
p = mempool_alloc(&bs->bio_pool, saved_gfp);
|
||||
bio = p + bs->front_pad;
|
||||
if (nr_vecs > BIO_INLINE_VECS) {
|
||||
nr_vecs = BIO_MAX_VECS;
|
||||
bvecs = mempool_alloc(&bs->bvec_pool, saved_gfp);
|
||||
}
|
||||
}
|
||||
|
||||
if (nr_vecs && nr_vecs <= BIO_INLINE_VECS)
|
||||
bio_init_inline(bio, bdev, nr_vecs, opf);
|
||||
else
|
||||
bio_init(bio, bdev, bvecs, nr_vecs, opf);
|
||||
bio->bi_pool = bs;
|
||||
return bio;
|
||||
|
||||
err_free:
|
||||
mempool_free(p, &bs->bio_pool);
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(bio_alloc_bioset);
|
||||
|
||||
@@ -765,6 +737,9 @@ static int __bio_alloc_cache_prune(struct bio_alloc_cache *cache,
|
||||
while ((bio = cache->free_list) != NULL) {
|
||||
cache->free_list = bio->bi_next;
|
||||
cache->nr--;
|
||||
kmemleak_alloc(bio_slab_addr(bio),
|
||||
kmem_cache_size(bio->bi_pool->bio_slab),
|
||||
1, GFP_KERNEL);
|
||||
bio_free(bio);
|
||||
if (++i == nr)
|
||||
break;
|
||||
@@ -828,6 +803,7 @@ static inline void bio_put_percpu_cache(struct bio *bio)
|
||||
bio->bi_bdev = NULL;
|
||||
cache->free_list = bio;
|
||||
cache->nr++;
|
||||
kmemleak_free(bio_slab_addr(bio));
|
||||
} else if (in_hardirq()) {
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
@@ -835,6 +811,7 @@ static inline void bio_put_percpu_cache(struct bio *bio)
|
||||
bio->bi_next = cache->free_list_irq;
|
||||
cache->free_list_irq = bio;
|
||||
cache->nr_irq++;
|
||||
kmemleak_free(bio_slab_addr(bio));
|
||||
} else {
|
||||
goto out_free;
|
||||
}
|
||||
@@ -897,10 +874,11 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
|
||||
* @gfp: allocation priority
|
||||
* @bs: bio_set to allocate from
|
||||
*
|
||||
* Allocate a new bio that is a clone of @bio_src. The caller owns the returned
|
||||
* bio, but not the actual data it points to.
|
||||
*
|
||||
* The caller must ensure that the return bio is not freed before @bio_src.
|
||||
* Allocate a new bio that is a clone of @bio_src. This reuses the bio_vecs
|
||||
* pointed to by @bio_src->bi_io_vec, and clones the iterator pointing to
|
||||
* the current position in it. The caller owns the returned bio, but not
|
||||
* the bio_vecs, and must ensure the bio is freed before the memory
|
||||
* pointed to by @bio_Src->bi_io_vecs.
|
||||
*/
|
||||
struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src,
|
||||
gfp_t gfp, struct bio_set *bs)
|
||||
@@ -929,9 +907,7 @@ EXPORT_SYMBOL(bio_alloc_clone);
|
||||
* @gfp: allocation priority
|
||||
*
|
||||
* Initialize a new bio in caller provided memory that is a clone of @bio_src.
|
||||
* The caller owns the returned bio, but not the actual data it points to.
|
||||
*
|
||||
* The caller must ensure that @bio_src is not freed before @bio.
|
||||
* The same bio_vecs reuse and bio lifetime rules as bio_alloc_clone() apply.
|
||||
*/
|
||||
int bio_init_clone(struct block_device *bdev, struct bio *bio,
|
||||
struct bio *bio_src, gfp_t gfp)
|
||||
@@ -1064,6 +1040,8 @@ int bio_add_page(struct bio *bio, struct page *page,
|
||||
{
|
||||
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
|
||||
return 0;
|
||||
if (WARN_ON_ONCE(len == 0))
|
||||
return 0;
|
||||
if (bio->bi_iter.bi_size > BIO_MAX_SIZE - len)
|
||||
return 0;
|
||||
|
||||
@@ -1484,11 +1462,41 @@ void bio_iov_iter_unbounce(struct bio *bio, bool is_error, bool mark_dirty)
|
||||
bio_iov_iter_unbounce_read(bio, is_error, mark_dirty);
|
||||
}
|
||||
|
||||
static void submit_bio_wait_endio(struct bio *bio)
|
||||
static void bio_wait_end_io(struct bio *bio)
|
||||
{
|
||||
complete(bio->bi_private);
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_await - call a function on a bio, and wait until it completes
|
||||
* @bio: the bio which describes the I/O
|
||||
* @submit: function called to submit the bio
|
||||
* @priv: private data passed to @submit
|
||||
*
|
||||
* Wait for the bio as well as any bio chained off it after executing the
|
||||
* passed in callback @submit. The wait for the bio is set up before calling
|
||||
* @submit to ensure that the completion is captured. If @submit is %NULL,
|
||||
* submit_bio() is used instead to submit the bio.
|
||||
*
|
||||
* Note: this overrides the bi_private and bi_end_io fields in the bio.
|
||||
*/
|
||||
void bio_await(struct bio *bio, void *priv,
|
||||
void (*submit)(struct bio *bio, void *priv))
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK_MAP(done,
|
||||
bio->bi_bdev->bd_disk->lockdep_map);
|
||||
|
||||
bio->bi_private = &done;
|
||||
bio->bi_end_io = bio_wait_end_io;
|
||||
bio->bi_opf |= REQ_SYNC;
|
||||
if (submit)
|
||||
submit(bio, priv);
|
||||
else
|
||||
submit_bio(bio);
|
||||
blk_wait_io(&done);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_await);
|
||||
|
||||
/**
|
||||
* submit_bio_wait - submit a bio, and wait until it completes
|
||||
* @bio: The &struct bio which describes the I/O
|
||||
@@ -1502,19 +1510,30 @@ static void submit_bio_wait_endio(struct bio *bio)
|
||||
*/
|
||||
int submit_bio_wait(struct bio *bio)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK_MAP(done,
|
||||
bio->bi_bdev->bd_disk->lockdep_map);
|
||||
|
||||
bio->bi_private = &done;
|
||||
bio->bi_end_io = submit_bio_wait_endio;
|
||||
bio->bi_opf |= REQ_SYNC;
|
||||
submit_bio(bio);
|
||||
blk_wait_io(&done);
|
||||
|
||||
bio_await(bio, NULL, NULL);
|
||||
return blk_status_to_errno(bio->bi_status);
|
||||
}
|
||||
EXPORT_SYMBOL(submit_bio_wait);
|
||||
|
||||
static void bio_endio_cb(struct bio *bio, void *priv)
|
||||
{
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
/*
|
||||
* Submit @bio synchronously, or call bio_endio on it if the current process
|
||||
* is being killed.
|
||||
*/
|
||||
int bio_submit_or_kill(struct bio *bio, unsigned int flags)
|
||||
{
|
||||
if ((flags & BLKDEV_ZERO_KILLABLE) && fatal_signal_pending(current)) {
|
||||
bio_await(bio, NULL, bio_endio_cb);
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
return submit_bio_wait(bio);
|
||||
}
|
||||
|
||||
/**
|
||||
* bdev_rw_virt - synchronously read into / write from kernel mapping
|
||||
* @bdev: block device to access
|
||||
@@ -1545,26 +1564,6 @@ int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bdev_rw_virt);
|
||||
|
||||
static void bio_wait_end_io(struct bio *bio)
|
||||
{
|
||||
complete(bio->bi_private);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
/*
|
||||
* bio_await_chain - ends @bio and waits for every chained bio to complete
|
||||
*/
|
||||
void bio_await_chain(struct bio *bio)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK_MAP(done,
|
||||
bio->bi_bdev->bd_disk->lockdep_map);
|
||||
|
||||
bio->bi_private = &done;
|
||||
bio->bi_end_io = bio_wait_end_io;
|
||||
bio_endio(bio);
|
||||
blk_wait_io(&done);
|
||||
}
|
||||
|
||||
void __bio_advance(struct bio *bio, unsigned bytes)
|
||||
{
|
||||
if (bio_integrity(bio))
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/wait_bit.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/resume_user_mode.h>
|
||||
@@ -611,6 +612,8 @@ restart:
|
||||
|
||||
q->root_blkg = NULL;
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
|
||||
wake_up_var(&q->root_blkg);
|
||||
}
|
||||
|
||||
static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
|
||||
@@ -1498,6 +1501,18 @@ int blkcg_init_disk(struct gendisk *disk)
|
||||
struct blkcg_gq *new_blkg, *blkg;
|
||||
bool preloaded;
|
||||
|
||||
/*
|
||||
* If the queue is shared across disk rebind (e.g., SCSI), the
|
||||
* previous disk's blkcg state is cleaned up asynchronously via
|
||||
* disk_release() -> blkcg_exit_disk(). Wait for that cleanup to
|
||||
* finish (indicated by root_blkg becoming NULL) before setting up
|
||||
* new blkcg state. Otherwise, we may overwrite q->root_blkg while
|
||||
* the old one is still alive, and radix_tree_insert() in
|
||||
* blkg_create() will fail with -EEXIST because the old entries
|
||||
* still occupy the same queue id slot in blkcg->blkg_tree.
|
||||
*/
|
||||
wait_var_event(&q->root_blkg, !READ_ONCE(q->root_blkg));
|
||||
|
||||
new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
|
||||
if (!new_blkg)
|
||||
return -ENOMEM;
|
||||
@@ -2022,6 +2037,7 @@ void blkcg_maybe_throttle_current(void)
|
||||
return;
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
put_disk(disk);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -18,7 +18,7 @@ struct blk_crypto_kobj {
|
||||
struct blk_crypto_attr {
|
||||
struct attribute attr;
|
||||
ssize_t (*show)(struct blk_crypto_profile *profile,
|
||||
struct blk_crypto_attr *attr, char *page);
|
||||
const struct blk_crypto_attr *attr, char *page);
|
||||
};
|
||||
|
||||
static struct blk_crypto_profile *kobj_to_crypto_profile(struct kobject *kobj)
|
||||
@@ -26,39 +26,39 @@ static struct blk_crypto_profile *kobj_to_crypto_profile(struct kobject *kobj)
|
||||
return container_of(kobj, struct blk_crypto_kobj, kobj)->profile;
|
||||
}
|
||||
|
||||
static struct blk_crypto_attr *attr_to_crypto_attr(struct attribute *attr)
|
||||
static const struct blk_crypto_attr *attr_to_crypto_attr(const struct attribute *attr)
|
||||
{
|
||||
return container_of(attr, struct blk_crypto_attr, attr);
|
||||
return container_of_const(attr, struct blk_crypto_attr, attr);
|
||||
}
|
||||
|
||||
static ssize_t hw_wrapped_keys_show(struct blk_crypto_profile *profile,
|
||||
struct blk_crypto_attr *attr, char *page)
|
||||
const struct blk_crypto_attr *attr, char *page)
|
||||
{
|
||||
/* Always show supported, since the file doesn't exist otherwise. */
|
||||
return sysfs_emit(page, "supported\n");
|
||||
}
|
||||
|
||||
static ssize_t max_dun_bits_show(struct blk_crypto_profile *profile,
|
||||
struct blk_crypto_attr *attr, char *page)
|
||||
const struct blk_crypto_attr *attr, char *page)
|
||||
{
|
||||
return sysfs_emit(page, "%u\n", 8 * profile->max_dun_bytes_supported);
|
||||
}
|
||||
|
||||
static ssize_t num_keyslots_show(struct blk_crypto_profile *profile,
|
||||
struct blk_crypto_attr *attr, char *page)
|
||||
const struct blk_crypto_attr *attr, char *page)
|
||||
{
|
||||
return sysfs_emit(page, "%u\n", profile->num_slots);
|
||||
}
|
||||
|
||||
static ssize_t raw_keys_show(struct blk_crypto_profile *profile,
|
||||
struct blk_crypto_attr *attr, char *page)
|
||||
const struct blk_crypto_attr *attr, char *page)
|
||||
{
|
||||
/* Always show supported, since the file doesn't exist otherwise. */
|
||||
return sysfs_emit(page, "supported\n");
|
||||
}
|
||||
|
||||
#define BLK_CRYPTO_RO_ATTR(_name) \
|
||||
static struct blk_crypto_attr _name##_attr = __ATTR_RO(_name)
|
||||
static const struct blk_crypto_attr _name##_attr = __ATTR_RO(_name)
|
||||
|
||||
BLK_CRYPTO_RO_ATTR(hw_wrapped_keys);
|
||||
BLK_CRYPTO_RO_ATTR(max_dun_bits);
|
||||
@@ -66,10 +66,10 @@ BLK_CRYPTO_RO_ATTR(num_keyslots);
|
||||
BLK_CRYPTO_RO_ATTR(raw_keys);
|
||||
|
||||
static umode_t blk_crypto_is_visible(struct kobject *kobj,
|
||||
struct attribute *attr, int n)
|
||||
const struct attribute *attr, int n)
|
||||
{
|
||||
struct blk_crypto_profile *profile = kobj_to_crypto_profile(kobj);
|
||||
struct blk_crypto_attr *a = attr_to_crypto_attr(attr);
|
||||
const struct blk_crypto_attr *a = attr_to_crypto_attr(attr);
|
||||
|
||||
if (a == &hw_wrapped_keys_attr &&
|
||||
!(profile->key_types_supported & BLK_CRYPTO_KEY_TYPE_HW_WRAPPED))
|
||||
@@ -81,7 +81,7 @@ static umode_t blk_crypto_is_visible(struct kobject *kobj,
|
||||
return 0444;
|
||||
}
|
||||
|
||||
static struct attribute *blk_crypto_attrs[] = {
|
||||
static const struct attribute *const blk_crypto_attrs[] = {
|
||||
&hw_wrapped_keys_attr.attr,
|
||||
&max_dun_bits_attr.attr,
|
||||
&num_keyslots_attr.attr,
|
||||
@@ -90,8 +90,8 @@ static struct attribute *blk_crypto_attrs[] = {
|
||||
};
|
||||
|
||||
static const struct attribute_group blk_crypto_attr_group = {
|
||||
.attrs = blk_crypto_attrs,
|
||||
.is_visible = blk_crypto_is_visible,
|
||||
.attrs_const = blk_crypto_attrs,
|
||||
.is_visible_const = blk_crypto_is_visible,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -99,13 +99,13 @@ static const struct attribute_group blk_crypto_attr_group = {
|
||||
* modes, these are initialized at boot time by blk_crypto_sysfs_init().
|
||||
*/
|
||||
static struct blk_crypto_attr __blk_crypto_mode_attrs[BLK_ENCRYPTION_MODE_MAX];
|
||||
static struct attribute *blk_crypto_mode_attrs[BLK_ENCRYPTION_MODE_MAX + 1];
|
||||
static const struct attribute *blk_crypto_mode_attrs[BLK_ENCRYPTION_MODE_MAX + 1];
|
||||
|
||||
static umode_t blk_crypto_mode_is_visible(struct kobject *kobj,
|
||||
struct attribute *attr, int n)
|
||||
const struct attribute *attr, int n)
|
||||
{
|
||||
struct blk_crypto_profile *profile = kobj_to_crypto_profile(kobj);
|
||||
struct blk_crypto_attr *a = attr_to_crypto_attr(attr);
|
||||
const struct blk_crypto_attr *a = attr_to_crypto_attr(attr);
|
||||
int mode_num = a - __blk_crypto_mode_attrs;
|
||||
|
||||
if (profile->modes_supported[mode_num])
|
||||
@@ -114,7 +114,7 @@ static umode_t blk_crypto_mode_is_visible(struct kobject *kobj,
|
||||
}
|
||||
|
||||
static ssize_t blk_crypto_mode_show(struct blk_crypto_profile *profile,
|
||||
struct blk_crypto_attr *attr, char *page)
|
||||
const struct blk_crypto_attr *attr, char *page)
|
||||
{
|
||||
int mode_num = attr - __blk_crypto_mode_attrs;
|
||||
|
||||
@@ -123,8 +123,8 @@ static ssize_t blk_crypto_mode_show(struct blk_crypto_profile *profile,
|
||||
|
||||
static const struct attribute_group blk_crypto_modes_attr_group = {
|
||||
.name = "modes",
|
||||
.attrs = blk_crypto_mode_attrs,
|
||||
.is_visible = blk_crypto_mode_is_visible,
|
||||
.attrs_const = blk_crypto_mode_attrs,
|
||||
.is_visible_const = blk_crypto_mode_is_visible,
|
||||
};
|
||||
|
||||
static const struct attribute_group *blk_crypto_attr_groups[] = {
|
||||
@@ -137,7 +137,7 @@ static ssize_t blk_crypto_attr_show(struct kobject *kobj,
|
||||
struct attribute *attr, char *page)
|
||||
{
|
||||
struct blk_crypto_profile *profile = kobj_to_crypto_profile(kobj);
|
||||
struct blk_crypto_attr *a = attr_to_crypto_attr(attr);
|
||||
const struct blk_crypto_attr *a = attr_to_crypto_attr(attr);
|
||||
|
||||
return a->show(profile, a, page);
|
||||
}
|
||||
|
||||
@@ -30,17 +30,17 @@ struct blk_ia_range_sysfs_entry {
|
||||
ssize_t (*show)(struct blk_independent_access_range *iar, char *buf);
|
||||
};
|
||||
|
||||
static struct blk_ia_range_sysfs_entry blk_ia_range_sector_entry = {
|
||||
static const struct blk_ia_range_sysfs_entry blk_ia_range_sector_entry = {
|
||||
.attr = { .name = "sector", .mode = 0444 },
|
||||
.show = blk_ia_range_sector_show,
|
||||
};
|
||||
|
||||
static struct blk_ia_range_sysfs_entry blk_ia_range_nr_sectors_entry = {
|
||||
static const struct blk_ia_range_sysfs_entry blk_ia_range_nr_sectors_entry = {
|
||||
.attr = { .name = "nr_sectors", .mode = 0444 },
|
||||
.show = blk_ia_range_nr_sectors_show,
|
||||
};
|
||||
|
||||
static struct attribute *blk_ia_range_attrs[] = {
|
||||
static const struct attribute *const blk_ia_range_attrs[] = {
|
||||
&blk_ia_range_sector_entry.attr,
|
||||
&blk_ia_range_nr_sectors_entry.attr,
|
||||
NULL,
|
||||
|
||||
@@ -1596,7 +1596,8 @@ static enum hrtimer_restart iocg_waitq_timer_fn(struct hrtimer *timer)
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
static void ioc_lat_stat(struct ioc *ioc, u32 *missed_ppm_ar, u32 *rq_wait_pct_p)
|
||||
static void ioc_lat_stat(struct ioc *ioc, u32 *missed_ppm_ar, u32 *rq_wait_pct_p,
|
||||
u32 *nr_done)
|
||||
{
|
||||
u32 nr_met[2] = { };
|
||||
u32 nr_missed[2] = { };
|
||||
@@ -1633,6 +1634,8 @@ static void ioc_lat_stat(struct ioc *ioc, u32 *missed_ppm_ar, u32 *rq_wait_pct_p
|
||||
|
||||
*rq_wait_pct_p = div64_u64(rq_wait_ns * 100,
|
||||
ioc->period_us * NSEC_PER_USEC);
|
||||
|
||||
*nr_done = nr_met[READ] + nr_met[WRITE] + nr_missed[READ] + nr_missed[WRITE];
|
||||
}
|
||||
|
||||
/* was iocg idle this period? */
|
||||
@@ -2250,12 +2253,12 @@ static void ioc_timer_fn(struct timer_list *timer)
|
||||
u64 usage_us_sum = 0;
|
||||
u32 ppm_rthr;
|
||||
u32 ppm_wthr;
|
||||
u32 missed_ppm[2], rq_wait_pct;
|
||||
u32 missed_ppm[2], rq_wait_pct, nr_done;
|
||||
u64 period_vtime;
|
||||
int prev_busy_level;
|
||||
|
||||
/* how were the latencies during the period? */
|
||||
ioc_lat_stat(ioc, missed_ppm, &rq_wait_pct);
|
||||
ioc_lat_stat(ioc, missed_ppm, &rq_wait_pct, &nr_done);
|
||||
|
||||
/* take care of active iocgs */
|
||||
spin_lock_irq(&ioc->lock);
|
||||
@@ -2397,9 +2400,17 @@ static void ioc_timer_fn(struct timer_list *timer)
|
||||
* and should increase vtime rate.
|
||||
*/
|
||||
prev_busy_level = ioc->busy_level;
|
||||
if (rq_wait_pct > RQ_WAIT_BUSY_PCT ||
|
||||
missed_ppm[READ] > ppm_rthr ||
|
||||
missed_ppm[WRITE] > ppm_wthr) {
|
||||
if (!nr_done && nr_lagging) {
|
||||
/*
|
||||
* When there are lagging IOs but no completions, we don't
|
||||
* know if the IO latency will meet the QoS targets. The
|
||||
* disk might be saturated or not. We should not reset
|
||||
* busy_level to 0 (which would prevent vrate from scaling
|
||||
* up or down), but rather to keep it unchanged.
|
||||
*/
|
||||
} else if (rq_wait_pct > RQ_WAIT_BUSY_PCT ||
|
||||
missed_ppm[READ] > ppm_rthr ||
|
||||
missed_ppm[WRITE] > ppm_wthr) {
|
||||
/* clearly missing QoS targets, slow down vrate */
|
||||
ioc->busy_level = max(ioc->busy_level, 0);
|
||||
ioc->busy_level++;
|
||||
|
||||
@@ -155,13 +155,7 @@ static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector,
|
||||
__blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio,
|
||||
flags, limit);
|
||||
if (bio) {
|
||||
if ((flags & BLKDEV_ZERO_KILLABLE) &&
|
||||
fatal_signal_pending(current)) {
|
||||
bio_await_chain(bio);
|
||||
blk_finish_plug(&plug);
|
||||
return -EINTR;
|
||||
}
|
||||
ret = submit_bio_wait(bio);
|
||||
ret = bio_submit_or_kill(bio, flags);
|
||||
bio_put(bio);
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
@@ -236,13 +230,7 @@ static int blkdev_issue_zero_pages(struct block_device *bdev, sector_t sector,
|
||||
blk_start_plug(&plug);
|
||||
__blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp, &bio, flags);
|
||||
if (bio) {
|
||||
if ((flags & BLKDEV_ZERO_KILLABLE) &&
|
||||
fatal_signal_pending(current)) {
|
||||
bio_await_chain(bio);
|
||||
blk_finish_plug(&plug);
|
||||
return -EINTR;
|
||||
}
|
||||
ret = submit_bio_wait(bio);
|
||||
ret = bio_submit_or_kill(bio, flags);
|
||||
bio_put(bio);
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
@@ -97,6 +97,7 @@ static const char *const blk_queue_flag_name[] = {
|
||||
QUEUE_FLAG_NAME(NO_ELV_SWITCH),
|
||||
QUEUE_FLAG_NAME(QOS_ENABLED),
|
||||
QUEUE_FLAG_NAME(BIO_ISSUE_TIME),
|
||||
QUEUE_FLAG_NAME(ZONED_QD1_WRITES),
|
||||
};
|
||||
#undef QUEUE_FLAG_NAME
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
|
||||
struct request_queue *q;
|
||||
ssize_t res;
|
||||
|
||||
entry = container_of(attr, struct blk_mq_hw_ctx_sysfs_entry, attr);
|
||||
entry = container_of_const(attr, struct blk_mq_hw_ctx_sysfs_entry, attr);
|
||||
hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj);
|
||||
q = hctx->queue;
|
||||
|
||||
@@ -101,20 +101,20 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
|
||||
return pos + ret;
|
||||
}
|
||||
|
||||
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
|
||||
static const struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
|
||||
.attr = {.name = "nr_tags", .mode = 0444 },
|
||||
.show = blk_mq_hw_sysfs_nr_tags_show,
|
||||
};
|
||||
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = {
|
||||
static const struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = {
|
||||
.attr = {.name = "nr_reserved_tags", .mode = 0444 },
|
||||
.show = blk_mq_hw_sysfs_nr_reserved_tags_show,
|
||||
};
|
||||
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
|
||||
static const struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
|
||||
.attr = {.name = "cpu_list", .mode = 0444 },
|
||||
.show = blk_mq_hw_sysfs_cpus_show,
|
||||
};
|
||||
|
||||
static struct attribute *default_hw_ctx_attrs[] = {
|
||||
static const struct attribute *const default_hw_ctx_attrs[] = {
|
||||
&blk_mq_hw_sysfs_nr_tags.attr,
|
||||
&blk_mq_hw_sysfs_nr_reserved_tags.attr,
|
||||
&blk_mq_hw_sysfs_cpus.attr,
|
||||
|
||||
@@ -3424,6 +3424,25 @@ EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
|
||||
*/
|
||||
void blk_steal_bios(struct bio_list *list, struct request *rq)
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
for (bio = rq->bio; bio; bio = bio->bi_next) {
|
||||
if (bio->bi_opf & REQ_POLLED) {
|
||||
bio->bi_opf &= ~REQ_POLLED;
|
||||
bio->bi_cookie = BLK_QC_T_NONE;
|
||||
}
|
||||
/*
|
||||
* The alternate request queue that we may end up submitting
|
||||
* the bio to may be frozen temporarily, in this case REQ_NOWAIT
|
||||
* will fail the I/O immediately with EAGAIN to the issuer.
|
||||
* We are not in the issuer context which cannot block. Clear
|
||||
* the flag to avoid spurious EAGAIN I/O failures.
|
||||
*/
|
||||
bio->bi_opf &= ~REQ_NOWAIT;
|
||||
bio_clear_flag(bio, BIO_QOS_THROTTLED);
|
||||
bio_clear_flag(bio, BIO_QOS_MERGED);
|
||||
}
|
||||
|
||||
if (rq->bio) {
|
||||
if (list->tail)
|
||||
list->tail->bi_next = rq->bio;
|
||||
|
||||
@@ -189,11 +189,11 @@ static int blk_validate_integrity_limits(struct queue_limits *lim)
|
||||
}
|
||||
|
||||
/*
|
||||
* The PI generation / validation helpers do not expect intervals to
|
||||
* straddle multiple bio_vecs. Enforce alignment so that those are
|
||||
* Some IO controllers can not handle data intervals straddling
|
||||
* multiple bio_vecs. For those, enforce alignment so that those are
|
||||
* never generated, and that each buffer is aligned as expected.
|
||||
*/
|
||||
if (bi->csum_type) {
|
||||
if (!(bi->flags & BLK_SPLIT_INTERVAL_CAPABLE) && bi->csum_type) {
|
||||
lim->dma_alignment = max(lim->dma_alignment,
|
||||
(1U << bi->interval_exp) - 1);
|
||||
}
|
||||
@@ -992,10 +992,14 @@ bool queue_limits_stack_integrity(struct queue_limits *t,
|
||||
if ((ti->flags & BLK_INTEGRITY_REF_TAG) !=
|
||||
(bi->flags & BLK_INTEGRITY_REF_TAG))
|
||||
goto incompatible;
|
||||
if ((ti->flags & BLK_SPLIT_INTERVAL_CAPABLE) &&
|
||||
!(bi->flags & BLK_SPLIT_INTERVAL_CAPABLE))
|
||||
ti->flags &= ~BLK_SPLIT_INTERVAL_CAPABLE;
|
||||
} else {
|
||||
ti->flags = BLK_INTEGRITY_STACKED;
|
||||
ti->flags |= (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) |
|
||||
(bi->flags & BLK_INTEGRITY_REF_TAG);
|
||||
(bi->flags & BLK_INTEGRITY_REF_TAG) |
|
||||
(bi->flags & BLK_SPLIT_INTERVAL_CAPABLE);
|
||||
ti->csum_type = bi->csum_type;
|
||||
ti->pi_tuple_size = bi->pi_tuple_size;
|
||||
ti->metadata_size = bi->metadata_size;
|
||||
|
||||
@@ -390,6 +390,36 @@ static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page)
|
||||
return queue_var_show(disk_nr_zones(disk), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_zoned_qd1_writes_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
return queue_var_show(!!blk_queue_zoned_qd1_writes(disk->queue),
|
||||
page);
|
||||
}
|
||||
|
||||
static ssize_t queue_zoned_qd1_writes_store(struct gendisk *disk,
|
||||
const char *page, size_t count)
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
unsigned long qd1_writes;
|
||||
unsigned int memflags;
|
||||
ssize_t ret;
|
||||
|
||||
ret = queue_var_store(&qd1_writes, page, count);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
memflags = blk_mq_freeze_queue(q);
|
||||
blk_mq_quiesce_queue(q);
|
||||
if (qd1_writes)
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONED_QD1_WRITES, q);
|
||||
else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ZONED_QD1_WRITES, q);
|
||||
blk_mq_unquiesce_queue(q);
|
||||
blk_mq_unfreeze_queue(q, memflags);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
return queue_var_show(!!blk_queue_passthrough_stat(disk->queue), page);
|
||||
@@ -551,27 +581,27 @@ static int queue_wc_store(struct gendisk *disk, const char *page,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define QUEUE_RO_ENTRY(_prefix, _name) \
|
||||
static struct queue_sysfs_entry _prefix##_entry = { \
|
||||
.attr = { .name = _name, .mode = 0444 }, \
|
||||
.show = _prefix##_show, \
|
||||
#define QUEUE_RO_ENTRY(_prefix, _name) \
|
||||
static const struct queue_sysfs_entry _prefix##_entry = { \
|
||||
.attr = { .name = _name, .mode = 0444 }, \
|
||||
.show = _prefix##_show, \
|
||||
};
|
||||
|
||||
#define QUEUE_RW_ENTRY(_prefix, _name) \
|
||||
static struct queue_sysfs_entry _prefix##_entry = { \
|
||||
.attr = { .name = _name, .mode = 0644 }, \
|
||||
.show = _prefix##_show, \
|
||||
.store = _prefix##_store, \
|
||||
#define QUEUE_RW_ENTRY(_prefix, _name) \
|
||||
static const struct queue_sysfs_entry _prefix##_entry = { \
|
||||
.attr = { .name = _name, .mode = 0644 }, \
|
||||
.show = _prefix##_show, \
|
||||
.store = _prefix##_store, \
|
||||
};
|
||||
|
||||
#define QUEUE_LIM_RO_ENTRY(_prefix, _name) \
|
||||
static struct queue_sysfs_entry _prefix##_entry = { \
|
||||
static const struct queue_sysfs_entry _prefix##_entry = { \
|
||||
.attr = { .name = _name, .mode = 0444 }, \
|
||||
.show_limit = _prefix##_show, \
|
||||
}
|
||||
|
||||
#define QUEUE_LIM_RW_ENTRY(_prefix, _name) \
|
||||
static struct queue_sysfs_entry _prefix##_entry = { \
|
||||
static const struct queue_sysfs_entry _prefix##_entry = { \
|
||||
.attr = { .name = _name, .mode = 0644 }, \
|
||||
.show_limit = _prefix##_show, \
|
||||
.store_limit = _prefix##_store, \
|
||||
@@ -617,6 +647,7 @@ QUEUE_LIM_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes");
|
||||
QUEUE_LIM_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
|
||||
|
||||
QUEUE_LIM_RO_ENTRY(queue_zoned, "zoned");
|
||||
QUEUE_RW_ENTRY(queue_zoned_qd1_writes, "zoned_qd1_writes");
|
||||
QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
|
||||
QUEUE_LIM_RO_ENTRY(queue_max_open_zones, "max_open_zones");
|
||||
QUEUE_LIM_RO_ENTRY(queue_max_active_zones, "max_active_zones");
|
||||
@@ -634,7 +665,7 @@ QUEUE_LIM_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
|
||||
QUEUE_LIM_RO_ENTRY(queue_dma_alignment, "dma_alignment");
|
||||
|
||||
/* legacy alias for logical_block_size: */
|
||||
static struct queue_sysfs_entry queue_hw_sector_size_entry = {
|
||||
static const struct queue_sysfs_entry queue_hw_sector_size_entry = {
|
||||
.attr = {.name = "hw_sector_size", .mode = 0444 },
|
||||
.show_limit = queue_logical_block_size_show,
|
||||
};
|
||||
@@ -700,7 +731,7 @@ QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
|
||||
#endif
|
||||
|
||||
/* Common attributes for bio-based and request-based queues. */
|
||||
static struct attribute *queue_attrs[] = {
|
||||
static const struct attribute *const queue_attrs[] = {
|
||||
/*
|
||||
* Attributes which are protected with q->limits_lock.
|
||||
*/
|
||||
@@ -754,12 +785,13 @@ static struct attribute *queue_attrs[] = {
|
||||
&queue_nomerges_entry.attr,
|
||||
&queue_poll_entry.attr,
|
||||
&queue_poll_delay_entry.attr,
|
||||
&queue_zoned_qd1_writes_entry.attr,
|
||||
|
||||
NULL,
|
||||
};
|
||||
|
||||
/* Request-based queue attributes that are not relevant for bio-based queues. */
|
||||
static struct attribute *blk_mq_queue_attrs[] = {
|
||||
static const struct attribute *const blk_mq_queue_attrs[] = {
|
||||
/*
|
||||
* Attributes which require some form of locking other than
|
||||
* q->sysfs_lock.
|
||||
@@ -779,14 +811,15 @@ static struct attribute *blk_mq_queue_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
|
||||
static umode_t queue_attr_visible(struct kobject *kobj, const struct attribute *attr,
|
||||
int n)
|
||||
{
|
||||
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
|
||||
struct request_queue *q = disk->queue;
|
||||
|
||||
if ((attr == &queue_max_open_zones_entry.attr ||
|
||||
attr == &queue_max_active_zones_entry.attr) &&
|
||||
attr == &queue_max_active_zones_entry.attr ||
|
||||
attr == &queue_zoned_qd1_writes_entry.attr) &&
|
||||
!blk_queue_is_zoned(q))
|
||||
return 0;
|
||||
|
||||
@@ -794,7 +827,7 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
|
||||
}
|
||||
|
||||
static umode_t blk_mq_queue_attr_visible(struct kobject *kobj,
|
||||
struct attribute *attr, int n)
|
||||
const struct attribute *attr, int n)
|
||||
{
|
||||
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
|
||||
struct request_queue *q = disk->queue;
|
||||
@@ -808,17 +841,17 @@ static umode_t blk_mq_queue_attr_visible(struct kobject *kobj,
|
||||
return attr->mode;
|
||||
}
|
||||
|
||||
static struct attribute_group queue_attr_group = {
|
||||
.attrs = queue_attrs,
|
||||
.is_visible = queue_attr_visible,
|
||||
static const struct attribute_group queue_attr_group = {
|
||||
.attrs_const = queue_attrs,
|
||||
.is_visible_const = queue_attr_visible,
|
||||
};
|
||||
|
||||
static struct attribute_group blk_mq_queue_attr_group = {
|
||||
.attrs = blk_mq_queue_attrs,
|
||||
.is_visible = blk_mq_queue_attr_visible,
|
||||
static const struct attribute_group blk_mq_queue_attr_group = {
|
||||
.attrs_const = blk_mq_queue_attrs,
|
||||
.is_visible_const = blk_mq_queue_attr_visible,
|
||||
};
|
||||
|
||||
#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
|
||||
#define to_queue(atr) container_of_const((atr), struct queue_sysfs_entry, attr)
|
||||
|
||||
static ssize_t
|
||||
queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
|
||||
@@ -934,6 +967,14 @@ int blk_register_queue(struct gendisk *disk)
|
||||
blk_mq_debugfs_register(q);
|
||||
blk_debugfs_unlock(q, memflags);
|
||||
|
||||
/*
|
||||
* For blk-mq rotational zoned devices, default to using QD=1
|
||||
* writes. For non-mq rotational zoned devices, the device driver can
|
||||
* set an appropriate default.
|
||||
*/
|
||||
if (queue_is_mq(q) && blk_queue_rot(q) && blk_queue_is_zoned(q))
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONED_QD1_WRITES, q);
|
||||
|
||||
ret = disk_register_independent_access_ranges(disk);
|
||||
if (ret)
|
||||
goto out_debugfs_remove;
|
||||
|
||||
@@ -782,10 +782,11 @@ void wbt_init_enable_default(struct gendisk *disk)
|
||||
return;
|
||||
|
||||
rwb = wbt_alloc();
|
||||
if (WARN_ON_ONCE(!rwb))
|
||||
if (!rwb)
|
||||
return;
|
||||
|
||||
if (WARN_ON_ONCE(wbt_init(disk, rwb))) {
|
||||
if (wbt_init(disk, rwb)) {
|
||||
pr_warn("%s: failed to enable wbt\n", disk->disk_name);
|
||||
wbt_free(rwb);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/mempool.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/freezer.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
|
||||
@@ -40,6 +42,8 @@ static const char *const zone_cond_name[] = {
|
||||
/*
|
||||
* Per-zone write plug.
|
||||
* @node: hlist_node structure for managing the plug using a hash table.
|
||||
* @entry: list_head structure for listing the plug in the disk list of active
|
||||
* zone write plugs.
|
||||
* @bio_list: The list of BIOs that are currently plugged.
|
||||
* @bio_work: Work struct to handle issuing of plugged BIOs
|
||||
* @rcu_head: RCU head to free zone write plugs with an RCU grace period.
|
||||
@@ -62,6 +66,7 @@ static const char *const zone_cond_name[] = {
|
||||
*/
|
||||
struct blk_zone_wplug {
|
||||
struct hlist_node node;
|
||||
struct list_head entry;
|
||||
struct bio_list bio_list;
|
||||
struct work_struct bio_work;
|
||||
struct rcu_head rcu_head;
|
||||
@@ -99,17 +104,17 @@ static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk)
|
||||
* being executed or the zone write plug bio list is not empty.
|
||||
* - BLK_ZONE_WPLUG_NEED_WP_UPDATE: Indicates that we lost track of a zone
|
||||
* write pointer offset and need to update it.
|
||||
* - BLK_ZONE_WPLUG_UNHASHED: Indicates that the zone write plug was removed
|
||||
* from the disk hash table and that the initial reference to the zone
|
||||
* write plug set when the plug was first added to the hash table has been
|
||||
* dropped. This flag is set when a zone is reset, finished or become full,
|
||||
* to prevent new references to the zone write plug to be taken for
|
||||
* newly incoming BIOs. A zone write plug flagged with this flag will be
|
||||
* freed once all remaining references from BIOs or functions are dropped.
|
||||
* - BLK_ZONE_WPLUG_DEAD: Indicates that the zone write plug will be
|
||||
* removed from the disk hash table of zone write plugs when the last
|
||||
* reference on the zone write plug is dropped. If set, this flag also
|
||||
* indicates that the initial extra reference on the zone write plug was
|
||||
* dropped, meaning that the reference count indicates the current number of
|
||||
* active users (code context or BIOs and requests in flight). This flag is
|
||||
* set when a zone is reset, finished or becomes full.
|
||||
*/
|
||||
#define BLK_ZONE_WPLUG_PLUGGED (1U << 0)
|
||||
#define BLK_ZONE_WPLUG_NEED_WP_UPDATE (1U << 1)
|
||||
#define BLK_ZONE_WPLUG_UNHASHED (1U << 2)
|
||||
#define BLK_ZONE_WPLUG_DEAD (1U << 2)
|
||||
|
||||
/**
|
||||
* blk_zone_cond_str - Return a zone condition name string
|
||||
@@ -412,20 +417,32 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int blkdev_truncate_zone_range(struct block_device *bdev,
|
||||
blk_mode_t mode, const struct blk_zone_range *zrange)
|
||||
static int blkdev_reset_zone(struct block_device *bdev, blk_mode_t mode,
|
||||
struct blk_zone_range *zrange)
|
||||
{
|
||||
loff_t start, end;
|
||||
int ret = -EINVAL;
|
||||
|
||||
inode_lock(bdev->bd_mapping->host);
|
||||
filemap_invalidate_lock(bdev->bd_mapping);
|
||||
if (zrange->sector + zrange->nr_sectors <= zrange->sector ||
|
||||
zrange->sector + zrange->nr_sectors > get_capacity(bdev->bd_disk))
|
||||
/* Out of range */
|
||||
return -EINVAL;
|
||||
goto out_unlock;
|
||||
|
||||
start = zrange->sector << SECTOR_SHIFT;
|
||||
end = ((zrange->sector + zrange->nr_sectors) << SECTOR_SHIFT) - 1;
|
||||
|
||||
return truncate_bdev_range(bdev, mode, start, end);
|
||||
ret = truncate_bdev_range(bdev, mode, start, end);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, zrange->sector,
|
||||
zrange->nr_sectors);
|
||||
out_unlock:
|
||||
filemap_invalidate_unlock(bdev->bd_mapping);
|
||||
inode_unlock(bdev->bd_mapping->host);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -438,7 +455,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
void __user *argp = (void __user *)arg;
|
||||
struct blk_zone_range zrange;
|
||||
enum req_op op;
|
||||
int ret;
|
||||
|
||||
if (!argp)
|
||||
return -EINVAL;
|
||||
@@ -454,15 +470,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
|
||||
switch (cmd) {
|
||||
case BLKRESETZONE:
|
||||
op = REQ_OP_ZONE_RESET;
|
||||
|
||||
/* Invalidate the page cache, including dirty pages. */
|
||||
inode_lock(bdev->bd_mapping->host);
|
||||
filemap_invalidate_lock(bdev->bd_mapping);
|
||||
ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
|
||||
if (ret)
|
||||
goto fail;
|
||||
break;
|
||||
return blkdev_reset_zone(bdev, mode, &zrange);
|
||||
case BLKOPENZONE:
|
||||
op = REQ_OP_ZONE_OPEN;
|
||||
break;
|
||||
@@ -476,15 +484,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
||||
ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors);
|
||||
|
||||
fail:
|
||||
if (cmd == BLKRESETZONE) {
|
||||
filemap_invalidate_unlock(bdev->bd_mapping);
|
||||
inode_unlock(bdev->bd_mapping->host);
|
||||
}
|
||||
|
||||
return ret;
|
||||
return blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors);
|
||||
}
|
||||
|
||||
static bool disk_zone_is_last(struct gendisk *disk, struct blk_zone *zone)
|
||||
@@ -492,18 +492,12 @@ static bool disk_zone_is_last(struct gendisk *disk, struct blk_zone *zone)
|
||||
return zone->start + zone->len >= get_capacity(disk);
|
||||
}
|
||||
|
||||
static bool disk_zone_is_full(struct gendisk *disk,
|
||||
unsigned int zno, unsigned int offset_in_zone)
|
||||
{
|
||||
if (zno < disk->nr_zones - 1)
|
||||
return offset_in_zone >= disk->zone_capacity;
|
||||
return offset_in_zone >= disk->last_zone_capacity;
|
||||
}
|
||||
|
||||
static bool disk_zone_wplug_is_full(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
return disk_zone_is_full(disk, zwplug->zone_no, zwplug->wp_offset);
|
||||
if (zwplug->zone_no < disk->nr_zones - 1)
|
||||
return zwplug->wp_offset >= disk->zone_capacity;
|
||||
return zwplug->wp_offset >= disk->last_zone_capacity;
|
||||
}
|
||||
|
||||
static bool disk_insert_zone_wplug(struct gendisk *disk,
|
||||
@@ -520,10 +514,11 @@ static bool disk_insert_zone_wplug(struct gendisk *disk,
|
||||
* are racing with other submission context, so we may already have a
|
||||
* zone write plug for the same zone.
|
||||
*/
|
||||
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
|
||||
spin_lock_irqsave(&disk->zone_wplugs_hash_lock, flags);
|
||||
hlist_for_each_entry_rcu(zwplg, &disk->zone_wplugs_hash[idx], node) {
|
||||
if (zwplg->zone_no == zwplug->zone_no) {
|
||||
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
|
||||
spin_unlock_irqrestore(&disk->zone_wplugs_hash_lock,
|
||||
flags);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -535,7 +530,7 @@ static bool disk_insert_zone_wplug(struct gendisk *disk,
|
||||
* necessarilly in the active condition.
|
||||
*/
|
||||
zones_cond = rcu_dereference_check(disk->zones_cond,
|
||||
lockdep_is_held(&disk->zone_wplugs_lock));
|
||||
lockdep_is_held(&disk->zone_wplugs_hash_lock));
|
||||
if (zones_cond)
|
||||
zwplug->cond = zones_cond[zwplug->zone_no];
|
||||
else
|
||||
@@ -543,7 +538,7 @@ static bool disk_insert_zone_wplug(struct gendisk *disk,
|
||||
|
||||
hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]);
|
||||
atomic_inc(&disk->nr_zone_wplugs);
|
||||
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
|
||||
spin_unlock_irqrestore(&disk->zone_wplugs_hash_lock, flags);
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -587,105 +582,76 @@ static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head)
|
||||
mempool_free(zwplug, zwplug->disk->zone_wplugs_pool);
|
||||
}
|
||||
|
||||
static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
if (refcount_dec_and_test(&zwplug->ref)) {
|
||||
WARN_ON_ONCE(!bio_list_empty(&zwplug->bio_list));
|
||||
WARN_ON_ONCE(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED);
|
||||
WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_UNHASHED));
|
||||
|
||||
call_rcu(&zwplug->rcu_head, disk_free_zone_wplug_rcu);
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool disk_should_remove_zone_wplug(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
lockdep_assert_held(&zwplug->lock);
|
||||
|
||||
/* If the zone write plug was already removed, we are done. */
|
||||
if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED)
|
||||
return false;
|
||||
|
||||
/* If the zone write plug is still plugged, it cannot be removed. */
|
||||
if (zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Completions of BIOs with blk_zone_write_plug_bio_endio() may
|
||||
* happen after handling a request completion with
|
||||
* blk_zone_write_plug_finish_request() (e.g. with split BIOs
|
||||
* that are chained). In such case, disk_zone_wplug_unplug_bio()
|
||||
* should not attempt to remove the zone write plug until all BIO
|
||||
* completions are seen. Check by looking at the zone write plug
|
||||
* reference count, which is 2 when the plug is unused (one reference
|
||||
* taken when the plug was allocated and another reference taken by the
|
||||
* caller context).
|
||||
*/
|
||||
if (refcount_read(&zwplug->ref) > 2)
|
||||
return false;
|
||||
|
||||
/* We can remove zone write plugs for zones that are empty or full. */
|
||||
return !zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug);
|
||||
}
|
||||
|
||||
static void disk_remove_zone_wplug(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
static void disk_free_zone_wplug(struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
struct gendisk *disk = zwplug->disk;
|
||||
unsigned long flags;
|
||||
|
||||
/* If the zone write plug was already removed, we have nothing to do. */
|
||||
if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED)
|
||||
return;
|
||||
WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_DEAD));
|
||||
WARN_ON_ONCE(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED);
|
||||
WARN_ON_ONCE(!bio_list_empty(&zwplug->bio_list));
|
||||
|
||||
/*
|
||||
* Mark the zone write plug as unhashed and drop the extra reference we
|
||||
* took when the plug was inserted in the hash table. Also update the
|
||||
* disk zone condition array with the current condition of the zone
|
||||
* write plug.
|
||||
*/
|
||||
zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED;
|
||||
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
|
||||
spin_lock_irqsave(&disk->zone_wplugs_hash_lock, flags);
|
||||
blk_zone_set_cond(rcu_dereference_check(disk->zones_cond,
|
||||
lockdep_is_held(&disk->zone_wplugs_lock)),
|
||||
lockdep_is_held(&disk->zone_wplugs_hash_lock)),
|
||||
zwplug->zone_no, zwplug->cond);
|
||||
hlist_del_init_rcu(&zwplug->node);
|
||||
atomic_dec(&disk->nr_zone_wplugs);
|
||||
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
|
||||
spin_unlock_irqrestore(&disk->zone_wplugs_hash_lock, flags);
|
||||
|
||||
call_rcu(&zwplug->rcu_head, disk_free_zone_wplug_rcu);
|
||||
}
|
||||
|
||||
static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
if (refcount_dec_and_test(&zwplug->ref))
|
||||
disk_free_zone_wplug(zwplug);
|
||||
}
|
||||
|
||||
/*
|
||||
* Flag the zone write plug as dead and drop the initial reference we got when
|
||||
* the zone write plug was added to the hash table. The zone write plug will be
|
||||
* unhashed when its last reference is dropped.
|
||||
*/
|
||||
static void disk_mark_zone_wplug_dead(struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
lockdep_assert_held(&zwplug->lock);
|
||||
|
||||
if (!(zwplug->flags & BLK_ZONE_WPLUG_DEAD)) {
|
||||
zwplug->flags |= BLK_ZONE_WPLUG_DEAD;
|
||||
disk_put_zone_wplug(zwplug);
|
||||
}
|
||||
}
|
||||
|
||||
static bool disk_zone_wplug_submit_bio(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug);
|
||||
|
||||
static void blk_zone_wplug_bio_work(struct work_struct *work)
|
||||
{
|
||||
struct blk_zone_wplug *zwplug =
|
||||
container_of(work, struct blk_zone_wplug, bio_work);
|
||||
|
||||
disk_zone_wplug_submit_bio(zwplug->disk, zwplug);
|
||||
|
||||
/* Drop the reference we took in disk_zone_wplug_schedule_work(). */
|
||||
disk_put_zone_wplug(zwplug);
|
||||
}
|
||||
|
||||
static void blk_zone_wplug_bio_work(struct work_struct *work);
|
||||
|
||||
/*
|
||||
* Get a reference on the write plug for the zone containing @sector.
|
||||
* If the plug does not exist, it is allocated and hashed.
|
||||
* Return a pointer to the zone write plug with the plug spinlock held.
|
||||
* Get a zone write plug for the zone containing @sector.
|
||||
* If the plug does not exist, it is allocated and inserted in the disk hash
|
||||
* table.
|
||||
*/
|
||||
static struct blk_zone_wplug *disk_get_and_lock_zone_wplug(struct gendisk *disk,
|
||||
sector_t sector, gfp_t gfp_mask,
|
||||
unsigned long *flags)
|
||||
static struct blk_zone_wplug *disk_get_or_alloc_zone_wplug(struct gendisk *disk,
|
||||
sector_t sector, gfp_t gfp_mask)
|
||||
{
|
||||
unsigned int zno = disk_zone_no(disk, sector);
|
||||
struct blk_zone_wplug *zwplug;
|
||||
|
||||
again:
|
||||
zwplug = disk_get_zone_wplug(disk, sector);
|
||||
if (zwplug) {
|
||||
/*
|
||||
* Check that a BIO completion or a zone reset or finish
|
||||
* operation has not already removed the zone write plug from
|
||||
* the hash table and dropped its reference count. In such case,
|
||||
* we need to get a new plug so start over from the beginning.
|
||||
*/
|
||||
spin_lock_irqsave(&zwplug->lock, *flags);
|
||||
if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) {
|
||||
spin_unlock_irqrestore(&zwplug->lock, *flags);
|
||||
disk_put_zone_wplug(zwplug);
|
||||
goto again;
|
||||
}
|
||||
if (zwplug)
|
||||
return zwplug;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate and initialize a zone write plug with an extra reference
|
||||
@@ -704,17 +670,15 @@ again:
|
||||
zwplug->wp_offset = bdev_offset_from_zone_start(disk->part0, sector);
|
||||
bio_list_init(&zwplug->bio_list);
|
||||
INIT_WORK(&zwplug->bio_work, blk_zone_wplug_bio_work);
|
||||
INIT_LIST_HEAD(&zwplug->entry);
|
||||
zwplug->disk = disk;
|
||||
|
||||
spin_lock_irqsave(&zwplug->lock, *flags);
|
||||
|
||||
/*
|
||||
* Insert the new zone write plug in the hash table. This can fail only
|
||||
* if another context already inserted a plug. Retry from the beginning
|
||||
* in such case.
|
||||
*/
|
||||
if (!disk_insert_zone_wplug(disk, zwplug)) {
|
||||
spin_unlock_irqrestore(&zwplug->lock, *flags);
|
||||
mempool_free(zwplug, disk->zone_wplugs_pool);
|
||||
goto again;
|
||||
}
|
||||
@@ -739,6 +703,7 @@ static inline void blk_zone_wplug_bio_io_error(struct blk_zone_wplug *zwplug,
|
||||
*/
|
||||
static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
struct gendisk *disk = zwplug->disk;
|
||||
struct bio *bio;
|
||||
|
||||
lockdep_assert_held(&zwplug->lock);
|
||||
@@ -752,6 +717,20 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
|
||||
blk_zone_wplug_bio_io_error(zwplug, bio);
|
||||
|
||||
zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
|
||||
|
||||
/*
|
||||
* If we are using the per disk zone write plugs worker thread, remove
|
||||
* the zone write plug from the work list and drop the reference we
|
||||
* took when the zone write plug was added to that list.
|
||||
*/
|
||||
if (blk_queue_zoned_qd1_writes(disk->queue)) {
|
||||
spin_lock(&disk->zone_wplugs_list_lock);
|
||||
if (!list_empty(&zwplug->entry)) {
|
||||
list_del_init(&zwplug->entry);
|
||||
disk_put_zone_wplug(zwplug);
|
||||
}
|
||||
spin_unlock(&disk->zone_wplugs_list_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -788,14 +767,8 @@ static void disk_zone_wplug_set_wp_offset(struct gendisk *disk,
|
||||
disk_zone_wplug_update_cond(disk, zwplug);
|
||||
|
||||
disk_zone_wplug_abort(zwplug);
|
||||
|
||||
/*
|
||||
* The zone write plug now has no BIO plugged: remove it from the
|
||||
* hash table so that it cannot be seen. The plug will be freed
|
||||
* when the last reference is dropped.
|
||||
*/
|
||||
if (disk_should_remove_zone_wplug(disk, zwplug))
|
||||
disk_remove_zone_wplug(disk, zwplug);
|
||||
if (!zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug))
|
||||
disk_mark_zone_wplug_dead(zwplug);
|
||||
}
|
||||
|
||||
static unsigned int blk_zone_wp_offset(struct blk_zone *zone)
|
||||
@@ -1192,19 +1165,24 @@ void blk_zone_mgmt_bio_endio(struct bio *bio)
|
||||
}
|
||||
}
|
||||
|
||||
static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
static void disk_zone_wplug_schedule_work(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
lockdep_assert_held(&zwplug->lock);
|
||||
|
||||
/*
|
||||
* Take a reference on the zone write plug and schedule the submission
|
||||
* of the next plugged BIO. blk_zone_wplug_bio_work() will release the
|
||||
* reference we take here.
|
||||
* Schedule the submission of the next plugged BIO. Taking a reference
|
||||
* to the zone write plug is required as the bio_work belongs to the
|
||||
* plug, and thus we must ensure that the write plug does not go away
|
||||
* while the work is being scheduled but has not run yet.
|
||||
* blk_zone_wplug_bio_work() will release the reference we take here,
|
||||
* and we also drop this reference if the work is already scheduled.
|
||||
*/
|
||||
WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED));
|
||||
WARN_ON_ONCE(blk_queue_zoned_qd1_writes(disk->queue));
|
||||
refcount_inc(&zwplug->ref);
|
||||
queue_work(disk->zone_wplugs_wq, &zwplug->bio_work);
|
||||
if (!queue_work(disk->zone_wplugs_wq, &zwplug->bio_work))
|
||||
disk_put_zone_wplug(zwplug);
|
||||
}
|
||||
|
||||
static inline void disk_zone_wplug_add_bio(struct gendisk *disk,
|
||||
@@ -1241,6 +1219,22 @@ static inline void disk_zone_wplug_add_bio(struct gendisk *disk,
|
||||
bio_list_add(&zwplug->bio_list, bio);
|
||||
trace_disk_zone_wplug_add_bio(zwplug->disk->queue, zwplug->zone_no,
|
||||
bio->bi_iter.bi_sector, bio_sectors(bio));
|
||||
|
||||
/*
|
||||
* If we are using the disk zone write plugs worker instead of the per
|
||||
* zone write plug BIO work, add the zone write plug to the work list
|
||||
* if it is not already there. Make sure to also get an extra reference
|
||||
* on the zone write plug so that it does not go away until it is
|
||||
* removed from the work list.
|
||||
*/
|
||||
if (blk_queue_zoned_qd1_writes(disk->queue)) {
|
||||
spin_lock(&disk->zone_wplugs_list_lock);
|
||||
if (list_empty(&zwplug->entry)) {
|
||||
list_add_tail(&zwplug->entry, &disk->zone_wplugs_list);
|
||||
refcount_inc(&zwplug->ref);
|
||||
}
|
||||
spin_unlock(&disk->zone_wplugs_list_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1438,7 +1432,7 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
|
||||
if (bio->bi_opf & REQ_NOWAIT)
|
||||
gfp_mask = GFP_NOWAIT;
|
||||
|
||||
zwplug = disk_get_and_lock_zone_wplug(disk, sector, gfp_mask, &flags);
|
||||
zwplug = disk_get_or_alloc_zone_wplug(disk, sector, gfp_mask);
|
||||
if (!zwplug) {
|
||||
if (bio->bi_opf & REQ_NOWAIT)
|
||||
bio_wouldblock_error(bio);
|
||||
@@ -1447,6 +1441,21 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
|
||||
return true;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&zwplug->lock, flags);
|
||||
|
||||
/*
|
||||
* If we got a zone write plug marked as dead, then the user is issuing
|
||||
* writes to a full zone, or without synchronizing with zone reset or
|
||||
* zone finish operations. In such case, fail the BIO to signal this
|
||||
* invalid usage.
|
||||
*/
|
||||
if (zwplug->flags & BLK_ZONE_WPLUG_DEAD) {
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
disk_put_zone_wplug(zwplug);
|
||||
bio_io_error(bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Indicate that this BIO is being handled using zone write plugging. */
|
||||
bio_set_flag(bio, BIO_ZONE_WRITE_PLUGGING);
|
||||
|
||||
@@ -1459,6 +1468,13 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
|
||||
goto queue_bio;
|
||||
}
|
||||
|
||||
/*
|
||||
* For rotational devices, we will use the gendisk zone write plugs
|
||||
* work instead of the per zone write plug BIO work, so queue the BIO.
|
||||
*/
|
||||
if (blk_queue_zoned_qd1_writes(disk->queue))
|
||||
goto queue_bio;
|
||||
|
||||
/* If the zone is already plugged, add the BIO to the BIO plug list. */
|
||||
if (zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)
|
||||
goto queue_bio;
|
||||
@@ -1481,7 +1497,10 @@ queue_bio:
|
||||
|
||||
if (!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)) {
|
||||
zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED;
|
||||
disk_zone_wplug_schedule_bio_work(disk, zwplug);
|
||||
if (blk_queue_zoned_qd1_writes(disk->queue))
|
||||
wake_up_process(disk->zone_wplugs_worker);
|
||||
else
|
||||
disk_zone_wplug_schedule_work(disk, zwplug);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
@@ -1527,7 +1546,7 @@ static void blk_zone_wplug_handle_native_zone_append(struct bio *bio)
|
||||
disk->disk_name, zwplug->zone_no);
|
||||
disk_zone_wplug_abort(zwplug);
|
||||
}
|
||||
disk_remove_zone_wplug(disk, zwplug);
|
||||
disk_mark_zone_wplug_dead(zwplug);
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
|
||||
disk_put_zone_wplug(zwplug);
|
||||
@@ -1622,21 +1641,21 @@ static void disk_zone_wplug_unplug_bio(struct gendisk *disk,
|
||||
|
||||
spin_lock_irqsave(&zwplug->lock, flags);
|
||||
|
||||
/* Schedule submission of the next plugged BIO if we have one. */
|
||||
if (!bio_list_empty(&zwplug->bio_list)) {
|
||||
disk_zone_wplug_schedule_bio_work(disk, zwplug);
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
|
||||
|
||||
/*
|
||||
* If the zone is full (it was fully written or finished, or empty
|
||||
* (it was reset), remove its zone write plug from the hash table.
|
||||
* For rotational devices, signal the BIO completion to the zone write
|
||||
* plug work. Otherwise, schedule submission of the next plugged BIO
|
||||
* if we have one.
|
||||
*/
|
||||
if (disk_should_remove_zone_wplug(disk, zwplug))
|
||||
disk_remove_zone_wplug(disk, zwplug);
|
||||
if (bio_list_empty(&zwplug->bio_list))
|
||||
zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
|
||||
|
||||
if (blk_queue_zoned_qd1_writes(disk->queue))
|
||||
complete(&disk->zone_wplugs_worker_bio_done);
|
||||
else if (!bio_list_empty(&zwplug->bio_list))
|
||||
disk_zone_wplug_schedule_work(disk, zwplug);
|
||||
|
||||
if (!zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug))
|
||||
disk_mark_zone_wplug_dead(zwplug);
|
||||
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
}
|
||||
@@ -1727,10 +1746,9 @@ void blk_zone_write_plug_finish_request(struct request *req)
|
||||
disk_put_zone_wplug(zwplug);
|
||||
}
|
||||
|
||||
static void blk_zone_wplug_bio_work(struct work_struct *work)
|
||||
static bool disk_zone_wplug_submit_bio(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
struct blk_zone_wplug *zwplug =
|
||||
container_of(work, struct blk_zone_wplug, bio_work);
|
||||
struct block_device *bdev;
|
||||
unsigned long flags;
|
||||
struct bio *bio;
|
||||
@@ -1746,7 +1764,7 @@ again:
|
||||
if (!bio) {
|
||||
zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
goto put_zwplug;
|
||||
return false;
|
||||
}
|
||||
|
||||
trace_blk_zone_wplug_bio(zwplug->disk->queue, zwplug->zone_no,
|
||||
@@ -1760,14 +1778,15 @@ again:
|
||||
goto again;
|
||||
}
|
||||
|
||||
bdev = bio->bi_bdev;
|
||||
|
||||
/*
|
||||
* blk-mq devices will reuse the extra reference on the request queue
|
||||
* usage counter we took when the BIO was plugged, but the submission
|
||||
* path for BIO-based devices will not do that. So drop this extra
|
||||
* reference here.
|
||||
*/
|
||||
if (blk_queue_zoned_qd1_writes(disk->queue))
|
||||
reinit_completion(&disk->zone_wplugs_worker_bio_done);
|
||||
bdev = bio->bi_bdev;
|
||||
if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) {
|
||||
bdev->bd_disk->fops->submit_bio(bio);
|
||||
blk_queue_exit(bdev->bd_disk->queue);
|
||||
@@ -1775,14 +1794,78 @@ again:
|
||||
blk_mq_submit_bio(bio);
|
||||
}
|
||||
|
||||
put_zwplug:
|
||||
/* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */
|
||||
disk_put_zone_wplug(zwplug);
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct blk_zone_wplug *disk_get_zone_wplugs_work(struct gendisk *disk)
|
||||
{
|
||||
struct blk_zone_wplug *zwplug;
|
||||
|
||||
spin_lock_irq(&disk->zone_wplugs_list_lock);
|
||||
zwplug = list_first_entry_or_null(&disk->zone_wplugs_list,
|
||||
struct blk_zone_wplug, entry);
|
||||
if (zwplug)
|
||||
list_del_init(&zwplug->entry);
|
||||
spin_unlock_irq(&disk->zone_wplugs_list_lock);
|
||||
|
||||
return zwplug;
|
||||
}
|
||||
|
||||
static int disk_zone_wplugs_worker(void *data)
|
||||
{
|
||||
struct gendisk *disk = data;
|
||||
struct blk_zone_wplug *zwplug;
|
||||
unsigned int noio_flag;
|
||||
|
||||
noio_flag = memalloc_noio_save();
|
||||
set_user_nice(current, MIN_NICE);
|
||||
set_freezable();
|
||||
|
||||
for (;;) {
|
||||
set_current_state(TASK_INTERRUPTIBLE | TASK_FREEZABLE);
|
||||
|
||||
zwplug = disk_get_zone_wplugs_work(disk);
|
||||
if (zwplug) {
|
||||
/*
|
||||
* Process all BIOs of this zone write plug and then
|
||||
* drop the reference we took when adding the zone write
|
||||
* plug to the active list.
|
||||
*/
|
||||
set_current_state(TASK_RUNNING);
|
||||
while (disk_zone_wplug_submit_bio(disk, zwplug))
|
||||
blk_wait_io(&disk->zone_wplugs_worker_bio_done);
|
||||
disk_put_zone_wplug(zwplug);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Only sleep if nothing sets the state to running. Else check
|
||||
* for zone write plugs work again as a newly submitted BIO
|
||||
* might have added a zone write plug to the work list.
|
||||
*/
|
||||
if (get_current_state() == TASK_RUNNING) {
|
||||
try_to_freeze();
|
||||
} else {
|
||||
if (kthread_should_stop()) {
|
||||
set_current_state(TASK_RUNNING);
|
||||
break;
|
||||
}
|
||||
schedule();
|
||||
}
|
||||
}
|
||||
|
||||
WARN_ON_ONCE(!list_empty(&disk->zone_wplugs_list));
|
||||
memalloc_noio_restore(noio_flag);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void disk_init_zone_resources(struct gendisk *disk)
|
||||
{
|
||||
spin_lock_init(&disk->zone_wplugs_lock);
|
||||
spin_lock_init(&disk->zone_wplugs_hash_lock);
|
||||
spin_lock_init(&disk->zone_wplugs_list_lock);
|
||||
INIT_LIST_HEAD(&disk->zone_wplugs_list);
|
||||
init_completion(&disk->zone_wplugs_worker_bio_done);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1798,6 +1881,7 @@ static int disk_alloc_zone_resources(struct gendisk *disk,
|
||||
unsigned int pool_size)
|
||||
{
|
||||
unsigned int i;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
atomic_set(&disk->nr_zone_wplugs, 0);
|
||||
disk->zone_wplugs_hash_bits =
|
||||
@@ -1823,8 +1907,21 @@ static int disk_alloc_zone_resources(struct gendisk *disk,
|
||||
if (!disk->zone_wplugs_wq)
|
||||
goto destroy_pool;
|
||||
|
||||
disk->zone_wplugs_worker =
|
||||
kthread_create(disk_zone_wplugs_worker, disk,
|
||||
"%s_zwplugs_worker", disk->disk_name);
|
||||
if (IS_ERR(disk->zone_wplugs_worker)) {
|
||||
ret = PTR_ERR(disk->zone_wplugs_worker);
|
||||
disk->zone_wplugs_worker = NULL;
|
||||
goto destroy_wq;
|
||||
}
|
||||
wake_up_process(disk->zone_wplugs_worker);
|
||||
|
||||
return 0;
|
||||
|
||||
destroy_wq:
|
||||
destroy_workqueue(disk->zone_wplugs_wq);
|
||||
disk->zone_wplugs_wq = NULL;
|
||||
destroy_pool:
|
||||
mempool_destroy(disk->zone_wplugs_pool);
|
||||
disk->zone_wplugs_pool = NULL;
|
||||
@@ -1832,7 +1929,7 @@ free_hash:
|
||||
kfree(disk->zone_wplugs_hash);
|
||||
disk->zone_wplugs_hash = NULL;
|
||||
disk->zone_wplugs_hash_bits = 0;
|
||||
return -ENOMEM;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk)
|
||||
@@ -1848,9 +1945,9 @@ static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk)
|
||||
while (!hlist_empty(&disk->zone_wplugs_hash[i])) {
|
||||
zwplug = hlist_entry(disk->zone_wplugs_hash[i].first,
|
||||
struct blk_zone_wplug, node);
|
||||
refcount_inc(&zwplug->ref);
|
||||
disk_remove_zone_wplug(disk, zwplug);
|
||||
disk_put_zone_wplug(zwplug);
|
||||
spin_lock_irq(&zwplug->lock);
|
||||
disk_mark_zone_wplug_dead(zwplug);
|
||||
spin_unlock_irq(&zwplug->lock);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1872,16 +1969,20 @@ static void disk_set_zones_cond_array(struct gendisk *disk, u8 *zones_cond)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
|
||||
spin_lock_irqsave(&disk->zone_wplugs_hash_lock, flags);
|
||||
zones_cond = rcu_replace_pointer(disk->zones_cond, zones_cond,
|
||||
lockdep_is_held(&disk->zone_wplugs_lock));
|
||||
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
|
||||
lockdep_is_held(&disk->zone_wplugs_hash_lock));
|
||||
spin_unlock_irqrestore(&disk->zone_wplugs_hash_lock, flags);
|
||||
|
||||
kfree_rcu_mightsleep(zones_cond);
|
||||
}
|
||||
|
||||
void disk_free_zone_resources(struct gendisk *disk)
|
||||
{
|
||||
if (disk->zone_wplugs_worker)
|
||||
kthread_stop(disk->zone_wplugs_worker);
|
||||
WARN_ON_ONCE(!list_empty(&disk->zone_wplugs_list));
|
||||
|
||||
if (disk->zone_wplugs_wq) {
|
||||
destroy_workqueue(disk->zone_wplugs_wq);
|
||||
disk->zone_wplugs_wq = NULL;
|
||||
@@ -1910,6 +2011,7 @@ static int disk_revalidate_zone_resources(struct gendisk *disk,
|
||||
{
|
||||
struct queue_limits *lim = &disk->queue->limits;
|
||||
unsigned int pool_size;
|
||||
int ret = 0;
|
||||
|
||||
args->disk = disk;
|
||||
args->nr_zones =
|
||||
@@ -1932,10 +2034,13 @@ static int disk_revalidate_zone_resources(struct gendisk *disk,
|
||||
pool_size =
|
||||
min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, args->nr_zones);
|
||||
|
||||
if (!disk->zone_wplugs_hash)
|
||||
return disk_alloc_zone_resources(disk, pool_size);
|
||||
if (!disk->zone_wplugs_hash) {
|
||||
ret = disk_alloc_zone_resources(disk, pool_size);
|
||||
if (ret)
|
||||
kfree(args->zones_cond);
|
||||
}
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1967,6 +2072,7 @@ static int disk_update_zone_resources(struct gendisk *disk,
|
||||
disk->zone_capacity = args->zone_capacity;
|
||||
disk->last_zone_capacity = args->last_zone_capacity;
|
||||
disk_set_zones_cond_array(disk, args->zones_cond);
|
||||
args->zones_cond = NULL;
|
||||
|
||||
/*
|
||||
* Some devices can advertise zone resource limits that are larger than
|
||||
@@ -2078,7 +2184,6 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx,
|
||||
struct gendisk *disk = args->disk;
|
||||
struct blk_zone_wplug *zwplug;
|
||||
unsigned int wp_offset;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Remember the capacity of the first sequential zone and check
|
||||
@@ -2108,10 +2213,9 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx,
|
||||
if (!wp_offset || wp_offset >= zone->capacity)
|
||||
return 0;
|
||||
|
||||
zwplug = disk_get_and_lock_zone_wplug(disk, zone->wp, GFP_NOIO, &flags);
|
||||
zwplug = disk_get_or_alloc_zone_wplug(disk, zone->wp, GFP_NOIO);
|
||||
if (!zwplug)
|
||||
return -ENOMEM;
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
disk_put_zone_wplug(zwplug);
|
||||
|
||||
return 0;
|
||||
@@ -2249,21 +2353,30 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
|
||||
}
|
||||
memalloc_noio_restore(noio_flag);
|
||||
|
||||
if (ret <= 0)
|
||||
goto free_resources;
|
||||
|
||||
/*
|
||||
* If zones where reported, make sure that the entire disk capacity
|
||||
* has been checked.
|
||||
*/
|
||||
if (ret > 0 && args.sector != capacity) {
|
||||
if (args.sector != capacity) {
|
||||
pr_warn("%s: Missing zones from sector %llu\n",
|
||||
disk->disk_name, args.sector);
|
||||
ret = -ENODEV;
|
||||
goto free_resources;
|
||||
}
|
||||
|
||||
if (ret > 0)
|
||||
return disk_update_zone_resources(disk, &args);
|
||||
ret = disk_update_zone_resources(disk, &args);
|
||||
if (ret)
|
||||
goto free_resources;
|
||||
|
||||
return 0;
|
||||
|
||||
free_resources:
|
||||
pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
|
||||
|
||||
kfree(args.zones_cond);
|
||||
memflags = blk_mq_freeze_queue(q);
|
||||
disk_free_zone_resources(disk);
|
||||
blk_mq_unfreeze_queue(q, memflags);
|
||||
|
||||
@@ -55,7 +55,7 @@ bool __blk_freeze_queue_start(struct request_queue *q,
|
||||
struct task_struct *owner);
|
||||
int __bio_queue_enter(struct request_queue *q, struct bio *bio);
|
||||
void submit_bio_noacct_nocheck(struct bio *bio, bool split);
|
||||
void bio_await_chain(struct bio *bio);
|
||||
int bio_submit_or_kill(struct bio *bio, unsigned int flags);
|
||||
|
||||
static inline bool blk_try_enter_queue(struct request_queue *q, bool pm)
|
||||
{
|
||||
@@ -108,11 +108,6 @@ static inline void blk_wait_io(struct completion *done)
|
||||
struct block_device *blkdev_get_no_open(dev_t dev, bool autoload);
|
||||
void blkdev_put_no_open(struct block_device *bdev);
|
||||
|
||||
#define BIO_INLINE_VECS 4
|
||||
struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
|
||||
gfp_t gfp_mask);
|
||||
void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs);
|
||||
|
||||
bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
|
||||
struct page *page, unsigned len, unsigned offset);
|
||||
|
||||
|
||||
@@ -393,7 +393,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
|
||||
|
||||
blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
|
||||
|
||||
bset->bd = bsg_register_queue(q, dev, name, bsg_transport_sg_io_fn);
|
||||
bset->bd = bsg_register_queue(q, dev, name, bsg_transport_sg_io_fn, NULL);
|
||||
if (IS_ERR(bset->bd)) {
|
||||
ret = PTR_ERR(bset->bd);
|
||||
goto out_cleanup_queue;
|
||||
|
||||
33
block/bsg.c
33
block/bsg.c
@@ -12,6 +12,7 @@
|
||||
#include <linux/idr.h>
|
||||
#include <linux/bsg.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/io_uring/cmd.h>
|
||||
|
||||
#include <scsi/scsi.h>
|
||||
#include <scsi/scsi_ioctl.h>
|
||||
@@ -28,6 +29,7 @@ struct bsg_device {
|
||||
unsigned int timeout;
|
||||
unsigned int reserved_size;
|
||||
bsg_sg_io_fn *sg_io_fn;
|
||||
bsg_uring_cmd_fn *uring_cmd_fn;
|
||||
};
|
||||
|
||||
static inline struct bsg_device *to_bsg_device(struct inode *inode)
|
||||
@@ -158,11 +160,38 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
}
|
||||
}
|
||||
|
||||
static int bsg_check_uring_features(unsigned int issue_flags)
|
||||
{
|
||||
/* BSG passthrough requires big SQE/CQE support */
|
||||
if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) !=
|
||||
(IO_URING_F_SQE128|IO_URING_F_CQE32))
|
||||
return -EOPNOTSUPP;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bsg_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
|
||||
{
|
||||
struct bsg_device *bd = to_bsg_device(file_inode(ioucmd->file));
|
||||
bool open_for_write = ioucmd->file->f_mode & FMODE_WRITE;
|
||||
struct request_queue *q = bd->queue;
|
||||
int ret;
|
||||
|
||||
ret = bsg_check_uring_features(issue_flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!bd->uring_cmd_fn)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return bd->uring_cmd_fn(q, ioucmd, issue_flags, open_for_write);
|
||||
}
|
||||
|
||||
static const struct file_operations bsg_fops = {
|
||||
.open = bsg_open,
|
||||
.release = bsg_release,
|
||||
.unlocked_ioctl = bsg_ioctl,
|
||||
.compat_ioctl = compat_ptr_ioctl,
|
||||
.uring_cmd = bsg_uring_cmd,
|
||||
.owner = THIS_MODULE,
|
||||
.llseek = default_llseek,
|
||||
};
|
||||
@@ -187,7 +216,8 @@ void bsg_unregister_queue(struct bsg_device *bd)
|
||||
EXPORT_SYMBOL_GPL(bsg_unregister_queue);
|
||||
|
||||
struct bsg_device *bsg_register_queue(struct request_queue *q,
|
||||
struct device *parent, const char *name, bsg_sg_io_fn *sg_io_fn)
|
||||
struct device *parent, const char *name, bsg_sg_io_fn *sg_io_fn,
|
||||
bsg_uring_cmd_fn *uring_cmd_fn)
|
||||
{
|
||||
struct bsg_device *bd;
|
||||
int ret;
|
||||
@@ -199,6 +229,7 @@ struct bsg_device *bsg_register_queue(struct request_queue *q,
|
||||
bd->reserved_size = INT_MAX;
|
||||
bd->queue = q;
|
||||
bd->sg_io_fn = sg_io_fn;
|
||||
bd->uring_cmd_fn = uring_cmd_fn;
|
||||
|
||||
ret = ida_alloc_max(&bsg_minor_ida, BSG_MAX_DEVS - 1, GFP_KERNEL);
|
||||
if (ret < 0) {
|
||||
|
||||
@@ -290,13 +290,14 @@ EXPORT_SYMBOL(disk_check_media_change);
|
||||
* Should be called when the media changes for @disk. Generates a uevent
|
||||
* and attempts to free all dentries and inodes and invalidates all block
|
||||
* device page cache entries in that case.
|
||||
*
|
||||
* Callers that need a partition re-scan should arrange for one explicitly.
|
||||
*/
|
||||
void disk_force_media_change(struct gendisk *disk)
|
||||
{
|
||||
disk_event_uevent(disk, DISK_EVENT_MEDIA_CHANGE);
|
||||
inc_diskseq(disk);
|
||||
bdev_mark_dead(disk->part0, true);
|
||||
set_bit(GD_NEED_PART_SCAN, &disk->state);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(disk_force_media_change);
|
||||
|
||||
|
||||
@@ -153,13 +153,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
|
||||
nr_sects = len >> SECTOR_SHIFT;
|
||||
|
||||
blk_start_plug(&plug);
|
||||
while (1) {
|
||||
if (fatal_signal_pending(current)) {
|
||||
if (prev)
|
||||
bio_await_chain(prev);
|
||||
err = -EINTR;
|
||||
goto out_unplug;
|
||||
}
|
||||
while (!fatal_signal_pending(current)) {
|
||||
bio = blk_alloc_discard_bio(bdev, §or, &nr_sects,
|
||||
GFP_KERNEL);
|
||||
if (!bio)
|
||||
@@ -167,12 +161,11 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
|
||||
prev = bio_chain_and_submit(prev, bio);
|
||||
}
|
||||
if (prev) {
|
||||
err = submit_bio_wait(prev);
|
||||
err = bio_submit_or_kill(prev, BLKDEV_ZERO_KILLABLE);
|
||||
if (err == -EOPNOTSUPP)
|
||||
err = 0;
|
||||
bio_put(prev);
|
||||
}
|
||||
out_unplug:
|
||||
blk_finish_plug(&plug);
|
||||
fail:
|
||||
filemap_invalidate_unlock(bdev->bd_mapping);
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
enum {
|
||||
TCG_SECP_00 = 0,
|
||||
TCG_SECP_01,
|
||||
TCG_SECP_02,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -125,6 +126,7 @@ enum opal_uid {
|
||||
OPAL_LOCKING_INFO_TABLE,
|
||||
OPAL_ENTERPRISE_LOCKING_INFO_TABLE,
|
||||
OPAL_DATASTORE,
|
||||
OPAL_LOCKING_TABLE,
|
||||
/* C_PIN_TABLE object ID's */
|
||||
OPAL_C_PIN_MSID,
|
||||
OPAL_C_PIN_SID,
|
||||
@@ -154,6 +156,7 @@ enum opal_method {
|
||||
OPAL_AUTHENTICATE,
|
||||
OPAL_RANDOM,
|
||||
OPAL_ERASE,
|
||||
OPAL_REACTIVATE,
|
||||
};
|
||||
|
||||
enum opal_token {
|
||||
@@ -224,6 +227,8 @@ enum opal_lockingstate {
|
||||
|
||||
enum opal_parameter {
|
||||
OPAL_SUM_SET_LIST = 0x060000,
|
||||
OPAL_SUM_RANGE_POLICY = 0x060001,
|
||||
OPAL_SUM_ADMIN1_PIN = 0x060002,
|
||||
};
|
||||
|
||||
enum opal_revertlsp {
|
||||
@@ -269,6 +274,25 @@ struct opal_header {
|
||||
struct opal_data_subpacket subpkt;
|
||||
};
|
||||
|
||||
/*
|
||||
* TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
|
||||
* Section: 3.3.4.7.5 STACK_RESET
|
||||
*/
|
||||
#define OPAL_STACK_RESET 0x0002
|
||||
|
||||
struct opal_stack_reset {
|
||||
u8 extendedComID[4];
|
||||
__be32 request_code;
|
||||
};
|
||||
|
||||
struct opal_stack_reset_response {
|
||||
u8 extendedComID[4];
|
||||
__be32 request_code;
|
||||
u8 reserved0[2];
|
||||
__be16 data_length;
|
||||
__be32 response;
|
||||
};
|
||||
|
||||
#define FC_TPER 0x0001
|
||||
#define FC_LOCKING 0x0002
|
||||
#define FC_GEOMETRY 0x0003
|
||||
|
||||
@@ -40,9 +40,7 @@ adfs_partition(struct parsed_partitions *state, char *name, char *data,
|
||||
(le32_to_cpu(dr->disc_size) >> 9);
|
||||
|
||||
if (name) {
|
||||
strlcat(state->pp_buf, " [", PAGE_SIZE);
|
||||
strlcat(state->pp_buf, name, PAGE_SIZE);
|
||||
strlcat(state->pp_buf, "]", PAGE_SIZE);
|
||||
seq_buf_printf(&state->pp_buf, " [%s]", name);
|
||||
}
|
||||
put_partition(state, slot, first_sector, nr_sects);
|
||||
return dr;
|
||||
@@ -78,14 +76,14 @@ static int riscix_partition(struct parsed_partitions *state,
|
||||
if (!rr)
|
||||
return -1;
|
||||
|
||||
strlcat(state->pp_buf, " [RISCiX]", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " [RISCiX]");
|
||||
|
||||
|
||||
if (rr->magic == RISCIX_MAGIC) {
|
||||
unsigned long size = nr_sects > 2 ? 2 : nr_sects;
|
||||
int part;
|
||||
|
||||
strlcat(state->pp_buf, " <", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " <");
|
||||
|
||||
put_partition(state, slot++, first_sect, size);
|
||||
for (part = 0; part < 8; part++) {
|
||||
@@ -94,13 +92,11 @@ static int riscix_partition(struct parsed_partitions *state,
|
||||
put_partition(state, slot++,
|
||||
le32_to_cpu(rr->part[part].start),
|
||||
le32_to_cpu(rr->part[part].length));
|
||||
strlcat(state->pp_buf, "(", PAGE_SIZE);
|
||||
strlcat(state->pp_buf, rr->part[part].name, PAGE_SIZE);
|
||||
strlcat(state->pp_buf, ")", PAGE_SIZE);
|
||||
seq_buf_printf(&state->pp_buf, "(%s)", rr->part[part].name);
|
||||
}
|
||||
}
|
||||
|
||||
strlcat(state->pp_buf, " >\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " >\n");
|
||||
} else {
|
||||
put_partition(state, slot++, first_sect, nr_sects);
|
||||
}
|
||||
@@ -130,7 +126,7 @@ static int linux_partition(struct parsed_partitions *state,
|
||||
struct linux_part *linuxp;
|
||||
unsigned long size = nr_sects > 2 ? 2 : nr_sects;
|
||||
|
||||
strlcat(state->pp_buf, " [Linux]", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " [Linux]");
|
||||
|
||||
put_partition(state, slot++, first_sect, size);
|
||||
|
||||
@@ -138,7 +134,7 @@ static int linux_partition(struct parsed_partitions *state,
|
||||
if (!linuxp)
|
||||
return -1;
|
||||
|
||||
strlcat(state->pp_buf, " <", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " <");
|
||||
while (linuxp->magic == cpu_to_le32(LINUX_NATIVE_MAGIC) ||
|
||||
linuxp->magic == cpu_to_le32(LINUX_SWAP_MAGIC)) {
|
||||
if (slot == state->limit)
|
||||
@@ -148,7 +144,7 @@ static int linux_partition(struct parsed_partitions *state,
|
||||
le32_to_cpu(linuxp->nr_sects));
|
||||
linuxp ++;
|
||||
}
|
||||
strlcat(state->pp_buf, " >", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " >");
|
||||
|
||||
put_dev_sector(sect);
|
||||
return slot;
|
||||
@@ -293,7 +289,7 @@ int adfspart_check_ADFS(struct parsed_partitions *state)
|
||||
break;
|
||||
}
|
||||
}
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
@@ -366,7 +362,7 @@ int adfspart_check_ICS(struct parsed_partitions *state)
|
||||
return 0;
|
||||
}
|
||||
|
||||
strlcat(state->pp_buf, " [ICS]", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " [ICS]");
|
||||
|
||||
for (slot = 1, p = (const struct ics_part *)data; p->size; p++) {
|
||||
u32 start = le32_to_cpu(p->start);
|
||||
@@ -400,7 +396,7 @@ int adfspart_check_ICS(struct parsed_partitions *state)
|
||||
}
|
||||
|
||||
put_dev_sector(sect);
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
@@ -460,7 +456,7 @@ int adfspart_check_POWERTEC(struct parsed_partitions *state)
|
||||
return 0;
|
||||
}
|
||||
|
||||
strlcat(state->pp_buf, " [POWERTEC]", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " [POWERTEC]");
|
||||
|
||||
for (i = 0, p = (const struct ptec_part *)data; i < 12; i++, p++) {
|
||||
u32 start = le32_to_cpu(p->start);
|
||||
@@ -471,7 +467,7 @@ int adfspart_check_POWERTEC(struct parsed_partitions *state)
|
||||
}
|
||||
|
||||
put_dev_sector(sect);
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
@@ -542,7 +538,7 @@ int adfspart_check_EESOX(struct parsed_partitions *state)
|
||||
|
||||
size = get_capacity(state->disk);
|
||||
put_partition(state, slot++, start, size - start);
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
}
|
||||
|
||||
return i ? 1 : 0;
|
||||
|
||||
@@ -173,24 +173,22 @@ int aix_partition(struct parsed_partitions *state)
|
||||
if (d) {
|
||||
struct lvm_rec *p = (struct lvm_rec *)d;
|
||||
u16 lvm_version = be16_to_cpu(p->version);
|
||||
char tmp[64];
|
||||
|
||||
if (lvm_version == 1) {
|
||||
int pp_size_log2 = be16_to_cpu(p->pp_size);
|
||||
|
||||
pp_bytes_size = 1 << pp_size_log2;
|
||||
pp_blocks_size = pp_bytes_size / 512;
|
||||
snprintf(tmp, sizeof(tmp),
|
||||
" AIX LVM header version %u found\n",
|
||||
lvm_version);
|
||||
seq_buf_printf(&state->pp_buf,
|
||||
" AIX LVM header version %u found\n",
|
||||
lvm_version);
|
||||
vgda_len = be32_to_cpu(p->vgda_len);
|
||||
vgda_sector = be32_to_cpu(p->vgda_psn[0]);
|
||||
} else {
|
||||
snprintf(tmp, sizeof(tmp),
|
||||
" unsupported AIX LVM version %d found\n",
|
||||
lvm_version);
|
||||
seq_buf_printf(&state->pp_buf,
|
||||
" unsupported AIX LVM version %d found\n",
|
||||
lvm_version);
|
||||
}
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
put_dev_sector(sect);
|
||||
}
|
||||
if (vgda_sector && (d = read_part_sector(state, vgda_sector, §))) {
|
||||
@@ -251,14 +249,11 @@ int aix_partition(struct parsed_partitions *state)
|
||||
continue;
|
||||
}
|
||||
if (lp_ix == lvip[lv_ix].pps_per_lv) {
|
||||
char tmp[70];
|
||||
|
||||
put_partition(state, lv_ix + 1,
|
||||
(i + 1 - lp_ix) * pp_blocks_size + psn_part1,
|
||||
lvip[lv_ix].pps_per_lv * pp_blocks_size);
|
||||
snprintf(tmp, sizeof(tmp), " <%s>\n",
|
||||
n[lv_ix].name);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
seq_buf_printf(&state->pp_buf, " <%s>\n",
|
||||
n[lv_ix].name);
|
||||
lvip[lv_ix].lv_is_contiguous = 1;
|
||||
ret = 1;
|
||||
next_lp_ix = 1;
|
||||
|
||||
@@ -81,13 +81,8 @@ int amiga_partition(struct parsed_partitions *state)
|
||||
/* blksize is blocks per 512 byte standard block */
|
||||
blksize = be32_to_cpu( rdb->rdb_BlockBytes ) / 512;
|
||||
|
||||
{
|
||||
char tmp[7 + 10 + 1 + 1];
|
||||
|
||||
/* Be more informative */
|
||||
snprintf(tmp, sizeof(tmp), " RDSK (%d)", blksize * 512);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
}
|
||||
/* Be more informative */
|
||||
seq_buf_printf(&state->pp_buf, " RDSK (%d)", blksize * 512);
|
||||
blk = be32_to_cpu(rdb->rdb_PartitionList);
|
||||
put_dev_sector(sect);
|
||||
for (part = 1; (s32) blk>0 && part<=16; part++, put_dev_sector(sect)) {
|
||||
@@ -179,27 +174,27 @@ int amiga_partition(struct parsed_partitions *state)
|
||||
{
|
||||
/* Be even more informative to aid mounting */
|
||||
char dostype[4];
|
||||
char tmp[42];
|
||||
|
||||
__be32 *dt = (__be32 *)dostype;
|
||||
*dt = pb->pb_Environment[16];
|
||||
if (dostype[3] < ' ')
|
||||
snprintf(tmp, sizeof(tmp), " (%c%c%c^%c)",
|
||||
dostype[0], dostype[1],
|
||||
dostype[2], dostype[3] + '@' );
|
||||
seq_buf_printf(&state->pp_buf,
|
||||
" (%c%c%c^%c)",
|
||||
dostype[0], dostype[1],
|
||||
dostype[2],
|
||||
dostype[3] + '@');
|
||||
else
|
||||
snprintf(tmp, sizeof(tmp), " (%c%c%c%c)",
|
||||
dostype[0], dostype[1],
|
||||
dostype[2], dostype[3]);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
snprintf(tmp, sizeof(tmp), "(res %d spb %d)",
|
||||
be32_to_cpu(pb->pb_Environment[6]),
|
||||
be32_to_cpu(pb->pb_Environment[4]));
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
seq_buf_printf(&state->pp_buf,
|
||||
" (%c%c%c%c)",
|
||||
dostype[0], dostype[1],
|
||||
dostype[2], dostype[3]);
|
||||
seq_buf_printf(&state->pp_buf, "(res %d spb %d)",
|
||||
be32_to_cpu(pb->pb_Environment[6]),
|
||||
be32_to_cpu(pb->pb_Environment[4]));
|
||||
}
|
||||
res = 1;
|
||||
}
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
|
||||
rdb_done:
|
||||
return res;
|
||||
|
||||
@@ -70,7 +70,7 @@ int atari_partition(struct parsed_partitions *state)
|
||||
}
|
||||
|
||||
pi = &rs->part[0];
|
||||
strlcat(state->pp_buf, " AHDI", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " AHDI");
|
||||
for (slot = 1; pi < &rs->part[4] && slot < state->limit; slot++, pi++) {
|
||||
struct rootsector *xrs;
|
||||
Sector sect2;
|
||||
@@ -89,7 +89,7 @@ int atari_partition(struct parsed_partitions *state)
|
||||
#ifdef ICD_PARTS
|
||||
part_fmt = 1;
|
||||
#endif
|
||||
strlcat(state->pp_buf, " XGM<", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " XGM<");
|
||||
partsect = extensect = be32_to_cpu(pi->st);
|
||||
while (1) {
|
||||
xrs = read_part_sector(state, partsect, §2);
|
||||
@@ -128,14 +128,14 @@ int atari_partition(struct parsed_partitions *state)
|
||||
break;
|
||||
}
|
||||
}
|
||||
strlcat(state->pp_buf, " >", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " >");
|
||||
}
|
||||
#ifdef ICD_PARTS
|
||||
if ( part_fmt!=1 ) { /* no extended partitions -> test ICD-format */
|
||||
pi = &rs->icdpart[0];
|
||||
/* sanity check: no ICD format if first partition invalid */
|
||||
if (OK_id(pi->id)) {
|
||||
strlcat(state->pp_buf, " ICD<", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " ICD<");
|
||||
for (; pi < &rs->icdpart[8] && slot < state->limit; slot++, pi++) {
|
||||
/* accept only GEM,BGM,RAW,LNX,SWP partitions */
|
||||
if (!((pi->flg & 1) && OK_id(pi->id)))
|
||||
@@ -144,13 +144,13 @@ int atari_partition(struct parsed_partitions *state)
|
||||
be32_to_cpu(pi->st),
|
||||
be32_to_cpu(pi->siz));
|
||||
}
|
||||
strlcat(state->pp_buf, " >", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " >");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
put_dev_sector(sect);
|
||||
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/seq_buf.h>
|
||||
#include "../blk.h"
|
||||
|
||||
/*
|
||||
@@ -20,7 +21,7 @@ struct parsed_partitions {
|
||||
int next;
|
||||
int limit;
|
||||
bool access_beyond_eod;
|
||||
char *pp_buf;
|
||||
struct seq_buf pp_buf;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
@@ -37,12 +38,9 @@ static inline void
|
||||
put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size)
|
||||
{
|
||||
if (n < p->limit) {
|
||||
char tmp[1 + BDEVNAME_SIZE + 10 + 1];
|
||||
|
||||
p->parts[n].from = from;
|
||||
p->parts[n].size = size;
|
||||
snprintf(tmp, sizeof(tmp), " %s%d", p->name, n);
|
||||
strlcat(p->pp_buf, tmp, PAGE_SIZE);
|
||||
seq_buf_printf(&p->pp_buf, " %s%d", p->name, n);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -229,7 +229,6 @@ static int add_part(int slot, struct cmdline_subpart *subpart,
|
||||
struct parsed_partitions *state)
|
||||
{
|
||||
struct partition_meta_info *info;
|
||||
char tmp[sizeof(info->volname) + 4];
|
||||
|
||||
if (slot >= state->limit)
|
||||
return 1;
|
||||
@@ -244,8 +243,7 @@ static int add_part(int slot, struct cmdline_subpart *subpart,
|
||||
|
||||
strscpy(info->volname, subpart->name, sizeof(info->volname));
|
||||
|
||||
snprintf(tmp, sizeof(tmp), "(%s)", info->volname);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
seq_buf_printf(&state->pp_buf, "(%s)", info->volname);
|
||||
|
||||
state->parts[slot].has_info = true;
|
||||
|
||||
@@ -379,7 +377,7 @@ int cmdline_partition(struct parsed_partitions *state)
|
||||
cmdline_parts_set(parts, disk_size, state);
|
||||
cmdline_parts_verifier(1, state);
|
||||
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <linux/major.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/raid/detect.h>
|
||||
@@ -123,16 +124,16 @@ static struct parsed_partitions *check_partition(struct gendisk *hd)
|
||||
state = allocate_partitions(hd);
|
||||
if (!state)
|
||||
return NULL;
|
||||
state->pp_buf = (char *)__get_free_page(GFP_KERNEL);
|
||||
if (!state->pp_buf) {
|
||||
state->pp_buf.buffer = (char *)__get_free_page(GFP_KERNEL);
|
||||
if (!state->pp_buf.buffer) {
|
||||
free_partitions(state);
|
||||
return NULL;
|
||||
}
|
||||
state->pp_buf[0] = '\0';
|
||||
seq_buf_init(&state->pp_buf, state->pp_buf.buffer, PAGE_SIZE);
|
||||
|
||||
state->disk = hd;
|
||||
strscpy(state->name, hd->disk_name);
|
||||
snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name);
|
||||
seq_buf_printf(&state->pp_buf, " %s:", state->name);
|
||||
if (isdigit(state->name[strlen(state->name)-1]))
|
||||
sprintf(state->name, "p");
|
||||
|
||||
@@ -151,9 +152,9 @@ static struct parsed_partitions *check_partition(struct gendisk *hd)
|
||||
|
||||
}
|
||||
if (res > 0) {
|
||||
printk(KERN_INFO "%s", state->pp_buf);
|
||||
printk(KERN_INFO "%s", seq_buf_str(&state->pp_buf));
|
||||
|
||||
free_page((unsigned long)state->pp_buf);
|
||||
free_page((unsigned long)state->pp_buf.buffer);
|
||||
return state;
|
||||
}
|
||||
if (state->access_beyond_eod)
|
||||
@@ -164,12 +165,12 @@ static struct parsed_partitions *check_partition(struct gendisk *hd)
|
||||
if (err)
|
||||
res = err;
|
||||
if (res) {
|
||||
strlcat(state->pp_buf,
|
||||
" unable to read partition table\n", PAGE_SIZE);
|
||||
printk(KERN_INFO "%s", state->pp_buf);
|
||||
seq_buf_puts(&state->pp_buf,
|
||||
" unable to read partition table\n");
|
||||
printk(KERN_INFO "%s", seq_buf_str(&state->pp_buf));
|
||||
}
|
||||
|
||||
free_page((unsigned long)state->pp_buf);
|
||||
free_page((unsigned long)state->pp_buf.buffer);
|
||||
free_partitions(state);
|
||||
return ERR_PTR(res);
|
||||
}
|
||||
@@ -177,31 +178,31 @@ static struct parsed_partitions *check_partition(struct gendisk *hd)
|
||||
static ssize_t part_partition_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%d\n", bdev_partno(dev_to_bdev(dev)));
|
||||
return sysfs_emit(buf, "%d\n", bdev_partno(dev_to_bdev(dev)));
|
||||
}
|
||||
|
||||
static ssize_t part_start_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%llu\n", dev_to_bdev(dev)->bd_start_sect);
|
||||
return sysfs_emit(buf, "%llu\n", dev_to_bdev(dev)->bd_start_sect);
|
||||
}
|
||||
|
||||
static ssize_t part_ro_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%d\n", bdev_read_only(dev_to_bdev(dev)));
|
||||
return sysfs_emit(buf, "%d\n", bdev_read_only(dev_to_bdev(dev)));
|
||||
}
|
||||
|
||||
static ssize_t part_alignment_offset_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%u\n", bdev_alignment_offset(dev_to_bdev(dev)));
|
||||
return sysfs_emit(buf, "%u\n", bdev_alignment_offset(dev_to_bdev(dev)));
|
||||
}
|
||||
|
||||
static ssize_t part_discard_alignment_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%u\n", bdev_discard_alignment(dev_to_bdev(dev)));
|
||||
return sysfs_emit(buf, "%u\n", bdev_discard_alignment(dev_to_bdev(dev)));
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
|
||||
|
||||
@@ -751,6 +751,6 @@ int efi_partition(struct parsed_partitions *state)
|
||||
}
|
||||
kfree(ptes);
|
||||
kfree(gpt);
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -173,15 +173,13 @@ static int find_vol1_partitions(struct parsed_partitions *state,
|
||||
{
|
||||
sector_t blk;
|
||||
int counter;
|
||||
char tmp[64];
|
||||
Sector sect;
|
||||
unsigned char *data;
|
||||
loff_t offset, size;
|
||||
struct vtoc_format1_label f1;
|
||||
int secperblk;
|
||||
|
||||
snprintf(tmp, sizeof(tmp), "VOL1/%8s:", name);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
seq_buf_printf(&state->pp_buf, "VOL1/%8s:", name);
|
||||
/*
|
||||
* get start of VTOC from the disk label and then search for format1
|
||||
* and format8 labels
|
||||
@@ -219,7 +217,7 @@ static int find_vol1_partitions(struct parsed_partitions *state,
|
||||
blk++;
|
||||
data = read_part_sector(state, blk * secperblk, §);
|
||||
}
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
|
||||
if (!data)
|
||||
return -1;
|
||||
@@ -237,11 +235,9 @@ static int find_lnx1_partitions(struct parsed_partitions *state,
|
||||
dasd_information2_t *info)
|
||||
{
|
||||
loff_t offset, geo_size, size;
|
||||
char tmp[64];
|
||||
int secperblk;
|
||||
|
||||
snprintf(tmp, sizeof(tmp), "LNX1/%8s:", name);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
seq_buf_printf(&state->pp_buf, "LNX1/%8s:", name);
|
||||
secperblk = blocksize >> 9;
|
||||
if (label->lnx.ldl_version == 0xf2) {
|
||||
size = label->lnx.formatted_blocks * secperblk;
|
||||
@@ -258,7 +254,7 @@ static int find_lnx1_partitions(struct parsed_partitions *state,
|
||||
size = nr_sectors;
|
||||
if (size != geo_size) {
|
||||
if (!info) {
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
return 1;
|
||||
}
|
||||
if (!strcmp(info->type, "ECKD"))
|
||||
@@ -270,7 +266,7 @@ static int find_lnx1_partitions(struct parsed_partitions *state,
|
||||
/* first and only partition starts in the first block after the label */
|
||||
offset = labelsect + secperblk;
|
||||
put_partition(state, 1, offset, size - offset);
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -282,7 +278,6 @@ static int find_cms1_partitions(struct parsed_partitions *state,
|
||||
sector_t labelsect)
|
||||
{
|
||||
loff_t offset, size;
|
||||
char tmp[64];
|
||||
int secperblk;
|
||||
|
||||
/*
|
||||
@@ -291,14 +286,12 @@ static int find_cms1_partitions(struct parsed_partitions *state,
|
||||
blocksize = label->cms.block_size;
|
||||
secperblk = blocksize >> 9;
|
||||
if (label->cms.disk_offset != 0) {
|
||||
snprintf(tmp, sizeof(tmp), "CMS1/%8s(MDSK):", name);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
seq_buf_printf(&state->pp_buf, "CMS1/%8s(MDSK):", name);
|
||||
/* disk is reserved minidisk */
|
||||
offset = label->cms.disk_offset * secperblk;
|
||||
size = (label->cms.block_count - 1) * secperblk;
|
||||
} else {
|
||||
snprintf(tmp, sizeof(tmp), "CMS1/%8s:", name);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
seq_buf_printf(&state->pp_buf, "CMS1/%8s:", name);
|
||||
/*
|
||||
* Special case for FBA devices:
|
||||
* If an FBA device is CMS formatted with blocksize > 512 byte
|
||||
@@ -314,7 +307,7 @@ static int find_cms1_partitions(struct parsed_partitions *state,
|
||||
}
|
||||
|
||||
put_partition(state, 1, offset, size-offset);
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -391,11 +384,11 @@ int ibm_partition(struct parsed_partitions *state)
|
||||
*/
|
||||
res = 1;
|
||||
if (info->format == DASD_FORMAT_LDL) {
|
||||
strlcat(state->pp_buf, "(nonl)", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "(nonl)");
|
||||
size = nr_sectors;
|
||||
offset = (info->label_block + 1) * (blocksize >> 9);
|
||||
put_partition(state, 1, offset, size-offset);
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
}
|
||||
} else
|
||||
res = 0;
|
||||
|
||||
@@ -53,7 +53,7 @@ int karma_partition(struct parsed_partitions *state)
|
||||
}
|
||||
slot++;
|
||||
}
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
put_dev_sector(sect);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -582,7 +582,7 @@ static bool ldm_create_data_partitions (struct parsed_partitions *pp,
|
||||
return false;
|
||||
}
|
||||
|
||||
strlcat(pp->pp_buf, " [LDM]", PAGE_SIZE);
|
||||
seq_buf_puts(&pp->pp_buf, " [LDM]");
|
||||
|
||||
/* Create the data partitions */
|
||||
list_for_each (item, &ldb->v_part) {
|
||||
@@ -597,7 +597,7 @@ static bool ldm_create_data_partitions (struct parsed_partitions *pp,
|
||||
part_num++;
|
||||
}
|
||||
|
||||
strlcat(pp->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&pp->pp_buf, "\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -86,7 +86,7 @@ int mac_partition(struct parsed_partitions *state)
|
||||
if (blocks_in_map >= state->limit)
|
||||
blocks_in_map = state->limit - 1;
|
||||
|
||||
strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " [mac]");
|
||||
for (slot = 1; slot <= blocks_in_map; ++slot) {
|
||||
int pos = slot * secsize;
|
||||
put_dev_sector(sect);
|
||||
@@ -152,6 +152,6 @@ int mac_partition(struct parsed_partitions *state)
|
||||
#endif
|
||||
|
||||
put_dev_sector(sect);
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -263,18 +263,11 @@ static void parse_solaris_x86(struct parsed_partitions *state,
|
||||
put_dev_sector(sect);
|
||||
return;
|
||||
}
|
||||
{
|
||||
char tmp[1 + BDEVNAME_SIZE + 10 + 11 + 1];
|
||||
|
||||
snprintf(tmp, sizeof(tmp), " %s%d: <solaris:", state->name, origin);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
}
|
||||
seq_buf_printf(&state->pp_buf, " %s%d: <solaris:", state->name, origin);
|
||||
if (le32_to_cpu(v->v_version) != 1) {
|
||||
char tmp[64];
|
||||
|
||||
snprintf(tmp, sizeof(tmp), " cannot handle version %d vtoc>\n",
|
||||
le32_to_cpu(v->v_version));
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
seq_buf_printf(&state->pp_buf,
|
||||
" cannot handle version %d vtoc>\n",
|
||||
le32_to_cpu(v->v_version));
|
||||
put_dev_sector(sect);
|
||||
return;
|
||||
}
|
||||
@@ -282,12 +275,10 @@ static void parse_solaris_x86(struct parsed_partitions *state,
|
||||
max_nparts = le16_to_cpu(v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8;
|
||||
for (i = 0; i < max_nparts && state->next < state->limit; i++) {
|
||||
struct solaris_x86_slice *s = &v->v_slice[i];
|
||||
char tmp[3 + 10 + 1 + 1];
|
||||
|
||||
if (s->s_size == 0)
|
||||
continue;
|
||||
snprintf(tmp, sizeof(tmp), " [s%d]", i);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
seq_buf_printf(&state->pp_buf, " [s%d]", i);
|
||||
/* solaris partitions are relative to current MS-DOS
|
||||
* one; must add the offset of the current partition */
|
||||
put_partition(state, state->next++,
|
||||
@@ -295,7 +286,7 @@ static void parse_solaris_x86(struct parsed_partitions *state,
|
||||
le32_to_cpu(s->s_size));
|
||||
}
|
||||
put_dev_sector(sect);
|
||||
strlcat(state->pp_buf, " >\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " >\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -359,7 +350,6 @@ static void parse_bsd(struct parsed_partitions *state,
|
||||
Sector sect;
|
||||
struct bsd_disklabel *l;
|
||||
struct bsd_partition *p;
|
||||
char tmp[64];
|
||||
|
||||
l = read_part_sector(state, offset + 1, §);
|
||||
if (!l)
|
||||
@@ -369,8 +359,7 @@ static void parse_bsd(struct parsed_partitions *state,
|
||||
return;
|
||||
}
|
||||
|
||||
snprintf(tmp, sizeof(tmp), " %s%d: <%s:", state->name, origin, flavour);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
seq_buf_printf(&state->pp_buf, " %s%d: <%s:", state->name, origin, flavour);
|
||||
|
||||
if (le16_to_cpu(l->d_npartitions) < max_partitions)
|
||||
max_partitions = le16_to_cpu(l->d_npartitions);
|
||||
@@ -391,18 +380,16 @@ static void parse_bsd(struct parsed_partitions *state,
|
||||
/* full parent partition, we have it already */
|
||||
continue;
|
||||
if (offset > bsd_start || offset+size < bsd_start+bsd_size) {
|
||||
strlcat(state->pp_buf, "bad subpartition - ignored\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "bad subpartition - ignored\n");
|
||||
continue;
|
||||
}
|
||||
put_partition(state, state->next++, bsd_start, bsd_size);
|
||||
}
|
||||
put_dev_sector(sect);
|
||||
if (le16_to_cpu(l->d_npartitions) > max_partitions) {
|
||||
snprintf(tmp, sizeof(tmp), " (ignored %d more)",
|
||||
le16_to_cpu(l->d_npartitions) - max_partitions);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
}
|
||||
strlcat(state->pp_buf, " >\n", PAGE_SIZE);
|
||||
if (le16_to_cpu(l->d_npartitions) > max_partitions)
|
||||
seq_buf_printf(&state->pp_buf, " (ignored %d more)",
|
||||
le16_to_cpu(l->d_npartitions) - max_partitions);
|
||||
seq_buf_puts(&state->pp_buf, " >\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -496,12 +483,7 @@ static void parse_unixware(struct parsed_partitions *state,
|
||||
put_dev_sector(sect);
|
||||
return;
|
||||
}
|
||||
{
|
||||
char tmp[1 + BDEVNAME_SIZE + 10 + 12 + 1];
|
||||
|
||||
snprintf(tmp, sizeof(tmp), " %s%d: <unixware:", state->name, origin);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
}
|
||||
seq_buf_printf(&state->pp_buf, " %s%d: <unixware:", state->name, origin);
|
||||
p = &l->vtoc.v_slice[1];
|
||||
/* I omit the 0th slice as it is the same as whole disk. */
|
||||
while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) {
|
||||
@@ -515,7 +497,7 @@ static void parse_unixware(struct parsed_partitions *state,
|
||||
p++;
|
||||
}
|
||||
put_dev_sector(sect);
|
||||
strlcat(state->pp_buf, " >\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " >\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -546,10 +528,7 @@ static void parse_minix(struct parsed_partitions *state,
|
||||
* the normal boot sector. */
|
||||
if (msdos_magic_present(data + 510) &&
|
||||
p->sys_ind == MINIX_PARTITION) { /* subpartition table present */
|
||||
char tmp[1 + BDEVNAME_SIZE + 10 + 9 + 1];
|
||||
|
||||
snprintf(tmp, sizeof(tmp), " %s%d: <minix:", state->name, origin);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
seq_buf_printf(&state->pp_buf, " %s%d: <minix:", state->name, origin);
|
||||
for (i = 0; i < MINIX_NR_SUBPARTITIONS; i++, p++) {
|
||||
if (state->next == state->limit)
|
||||
break;
|
||||
@@ -558,7 +537,7 @@ static void parse_minix(struct parsed_partitions *state,
|
||||
put_partition(state, state->next++,
|
||||
start_sect(p), nr_sects(p));
|
||||
}
|
||||
strlcat(state->pp_buf, " >\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " >\n");
|
||||
}
|
||||
put_dev_sector(sect);
|
||||
#endif /* CONFIG_MINIX_SUBPARTITION */
|
||||
@@ -602,7 +581,7 @@ int msdos_partition(struct parsed_partitions *state)
|
||||
#ifdef CONFIG_AIX_PARTITION
|
||||
return aix_partition(state);
|
||||
#else
|
||||
strlcat(state->pp_buf, " [AIX]", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " [AIX]");
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
@@ -629,7 +608,7 @@ int msdos_partition(struct parsed_partitions *state)
|
||||
fb = (struct fat_boot_sector *) data;
|
||||
if (slot == 1 && fb->reserved && fb->fats
|
||||
&& fat_valid_media(fb->media)) {
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
put_dev_sector(sect);
|
||||
return 1;
|
||||
} else {
|
||||
@@ -678,9 +657,9 @@ int msdos_partition(struct parsed_partitions *state)
|
||||
n = min(size, max(sector_size, n));
|
||||
put_partition(state, slot, start, n);
|
||||
|
||||
strlcat(state->pp_buf, " <", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " <");
|
||||
parse_extended(state, start, size, disksig);
|
||||
strlcat(state->pp_buf, " >", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, " >");
|
||||
continue;
|
||||
}
|
||||
put_partition(state, slot, start, size);
|
||||
@@ -688,12 +667,12 @@ int msdos_partition(struct parsed_partitions *state)
|
||||
if (p->sys_ind == LINUX_RAID_PARTITION)
|
||||
state->parts[slot].flags = ADDPART_FLAG_RAID;
|
||||
if (p->sys_ind == DM6_PARTITION)
|
||||
strlcat(state->pp_buf, "[DM]", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "[DM]");
|
||||
if (p->sys_ind == EZD_PARTITION)
|
||||
strlcat(state->pp_buf, "[EZD]", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "[EZD]");
|
||||
}
|
||||
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
|
||||
/* second pass - output for each on a separate line */
|
||||
p = (struct msdos_partition *) (0x1be + data);
|
||||
|
||||
@@ -36,7 +36,6 @@ static void add_of_partition(struct parsed_partitions *state, int slot,
|
||||
struct device_node *np)
|
||||
{
|
||||
struct partition_meta_info *info;
|
||||
char tmp[sizeof(info->volname) + 4];
|
||||
const char *partname;
|
||||
int len;
|
||||
|
||||
@@ -63,8 +62,7 @@ static void add_of_partition(struct parsed_partitions *state, int slot,
|
||||
partname = of_get_property(np, "name", &len);
|
||||
strscpy(info->volname, partname, sizeof(info->volname));
|
||||
|
||||
snprintf(tmp, sizeof(tmp), "(%s)", info->volname);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
seq_buf_printf(&state->pp_buf, "(%s)", info->volname);
|
||||
}
|
||||
|
||||
int of_partition(struct parsed_partitions *state)
|
||||
@@ -104,7 +102,7 @@ int of_partition(struct parsed_partitions *state)
|
||||
slot++;
|
||||
}
|
||||
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -81,7 +81,7 @@ int osf_partition(struct parsed_partitions *state)
|
||||
le32_to_cpu(partition->p_size));
|
||||
slot++;
|
||||
}
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
put_dev_sector(sect);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -79,7 +79,7 @@ int sgi_partition(struct parsed_partitions *state)
|
||||
}
|
||||
slot++;
|
||||
}
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
put_dev_sector(sect);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -121,7 +121,7 @@ int sun_partition(struct parsed_partitions *state)
|
||||
}
|
||||
slot++;
|
||||
}
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
put_dev_sector(sect);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -54,7 +54,6 @@ int sysv68_partition(struct parsed_partitions *state)
|
||||
unsigned char *data;
|
||||
struct dkblk0 *b;
|
||||
struct slice *slice;
|
||||
char tmp[64];
|
||||
|
||||
data = read_part_sector(state, 0, §);
|
||||
if (!data)
|
||||
@@ -74,8 +73,7 @@ int sysv68_partition(struct parsed_partitions *state)
|
||||
return -1;
|
||||
|
||||
slices -= 1; /* last slice is the whole disk */
|
||||
snprintf(tmp, sizeof(tmp), "sysV68: %s(s%u)", state->name, slices);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
seq_buf_printf(&state->pp_buf, "sysV68: %s(s%u)", state->name, slices);
|
||||
slice = (struct slice *)data;
|
||||
for (i = 0; i < slices; i++, slice++) {
|
||||
if (slot == state->limit)
|
||||
@@ -84,12 +82,11 @@ int sysv68_partition(struct parsed_partitions *state)
|
||||
put_partition(state, slot,
|
||||
be32_to_cpu(slice->blkoff),
|
||||
be32_to_cpu(slice->nblocks));
|
||||
snprintf(tmp, sizeof(tmp), "(s%u)", i);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
seq_buf_printf(&state->pp_buf, "(s%u)", i);
|
||||
}
|
||||
slot++;
|
||||
}
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
put_dev_sector(sect);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -39,7 +39,7 @@ int ultrix_partition(struct parsed_partitions *state)
|
||||
label->pt_part[i].pi_blkoff,
|
||||
label->pt_part[i].pi_nblocks);
|
||||
put_dev_sector(sect);
|
||||
strlcat(state->pp_buf, "\n", PAGE_SIZE);
|
||||
seq_buf_puts(&state->pp_buf, "\n");
|
||||
return 1;
|
||||
} else {
|
||||
put_dev_sector(sect);
|
||||
|
||||
446
block/sed-opal.c
446
block/sed-opal.c
@@ -160,6 +160,8 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = {
|
||||
{ 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00 },
|
||||
[OPAL_DATASTORE] =
|
||||
{ 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x00, 0x00 },
|
||||
[OPAL_LOCKING_TABLE] =
|
||||
{ 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x00 },
|
||||
|
||||
/* C_PIN_TABLE object ID's */
|
||||
[OPAL_C_PIN_MSID] =
|
||||
@@ -218,6 +220,8 @@ static const u8 opalmethod[][OPAL_METHOD_LENGTH] = {
|
||||
{ 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x06, 0x01 },
|
||||
[OPAL_ERASE] =
|
||||
{ 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x08, 0x03 },
|
||||
[OPAL_REACTIVATE] =
|
||||
{ 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x08, 0x01 },
|
||||
};
|
||||
|
||||
static int end_opal_session_error(struct opal_dev *dev);
|
||||
@@ -1514,7 +1518,7 @@ static inline int enable_global_lr(struct opal_dev *dev, u8 *uid,
|
||||
return err;
|
||||
}
|
||||
|
||||
static int setup_locking_range(struct opal_dev *dev, void *data)
|
||||
static int setup_enable_range(struct opal_dev *dev, void *data)
|
||||
{
|
||||
u8 uid[OPAL_UID_LENGTH];
|
||||
struct opal_user_lr_setup *setup = data;
|
||||
@@ -1528,38 +1532,47 @@ static int setup_locking_range(struct opal_dev *dev, void *data)
|
||||
|
||||
if (lr == 0)
|
||||
err = enable_global_lr(dev, uid, setup);
|
||||
else {
|
||||
err = cmd_start(dev, uid, opalmethod[OPAL_SET]);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_VALUES);
|
||||
add_token_u8(&err, dev, OPAL_STARTLIST);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_RANGESTART);
|
||||
add_token_u64(&err, dev, setup->range_start);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_RANGELENGTH);
|
||||
add_token_u64(&err, dev, setup->range_length);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_READLOCKENABLED);
|
||||
add_token_u64(&err, dev, !!setup->RLE);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_WRITELOCKENABLED);
|
||||
add_token_u64(&err, dev, !!setup->WLE);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
}
|
||||
else
|
||||
err = generic_lr_enable_disable(dev, uid, !!setup->RLE, !!setup->WLE, 0, 0);
|
||||
if (err) {
|
||||
pr_debug("Error building Setup Locking range command.\n");
|
||||
pr_debug("Failed to create enable lr command.\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
}
|
||||
|
||||
static int setup_locking_range_start_length(struct opal_dev *dev, void *data)
|
||||
{
|
||||
int err;
|
||||
u8 uid[OPAL_UID_LENGTH];
|
||||
struct opal_user_lr_setup *setup = data;
|
||||
|
||||
err = build_locking_range(uid, sizeof(uid), setup->session.opal_key.lr);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = cmd_start(dev, uid, opalmethod[OPAL_SET]);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_VALUES);
|
||||
add_token_u8(&err, dev, OPAL_STARTLIST);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_RANGESTART);
|
||||
add_token_u64(&err, dev, setup->range_start);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_RANGELENGTH);
|
||||
add_token_u64(&err, dev, setup->range_length);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
if (err) {
|
||||
pr_debug("Error building Setup Locking RangeStartLength command.\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -1568,7 +1581,7 @@ static int setup_locking_range(struct opal_dev *dev, void *data)
|
||||
|
||||
static int response_get_column(const struct parsed_resp *resp,
|
||||
int *iter,
|
||||
u8 column,
|
||||
u64 column,
|
||||
u64 *value)
|
||||
{
|
||||
const struct opal_resp_tok *tok;
|
||||
@@ -1586,7 +1599,7 @@ static int response_get_column(const struct parsed_resp *resp,
|
||||
n++;
|
||||
|
||||
if (response_get_u64(resp, n) != column) {
|
||||
pr_debug("Token %d does not match expected column %u.\n",
|
||||
pr_debug("Token %d does not match expected column %llu.\n",
|
||||
n, column);
|
||||
return OPAL_INVAL_PARAM;
|
||||
}
|
||||
@@ -1744,6 +1757,12 @@ static int start_anybodyASP_opal_session(struct opal_dev *dev, void *data)
|
||||
OPAL_ADMINSP_UID, NULL, 0);
|
||||
}
|
||||
|
||||
static int start_anybodyLSP_opal_session(struct opal_dev *dev, void *data)
|
||||
{
|
||||
return start_generic_opal_session(dev, OPAL_ANYBODY_UID,
|
||||
OPAL_LOCKINGSP_UID, NULL, 0);
|
||||
}
|
||||
|
||||
static int start_SIDASP_opal_session(struct opal_dev *dev, void *data)
|
||||
{
|
||||
int ret;
|
||||
@@ -2285,6 +2304,74 @@ static int activate_lsp(struct opal_dev *dev, void *data)
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
}
|
||||
|
||||
static int reactivate_lsp(struct opal_dev *dev, void *data)
|
||||
{
|
||||
struct opal_lr_react *opal_react = data;
|
||||
u8 user_lr[OPAL_UID_LENGTH];
|
||||
int err, i;
|
||||
|
||||
err = cmd_start(dev, opaluid[OPAL_THISSP_UID],
|
||||
opalmethod[OPAL_REACTIVATE]);
|
||||
|
||||
if (err) {
|
||||
pr_debug("Error building Reactivate LockingSP command.\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* If neither 'entire_table' nor 'num_lrs' is set, the device
|
||||
* gets reactivated with SUM disabled. Only Admin1PIN will change
|
||||
* if set.
|
||||
*/
|
||||
if (opal_react->entire_table) {
|
||||
/* Entire Locking table (all locking ranges) will be put in SUM. */
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u64(&err, dev, OPAL_SUM_SET_LIST);
|
||||
add_token_bytestring(&err, dev, opaluid[OPAL_LOCKING_TABLE], OPAL_UID_LENGTH);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
} else if (opal_react->num_lrs) {
|
||||
/* Subset of Locking table (selected locking range(s)) to be put in SUM */
|
||||
err = build_locking_range(user_lr, sizeof(user_lr),
|
||||
opal_react->lr[0]);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u64(&err, dev, OPAL_SUM_SET_LIST);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTLIST);
|
||||
add_token_bytestring(&err, dev, user_lr, OPAL_UID_LENGTH);
|
||||
for (i = 1; i < opal_react->num_lrs; i++) {
|
||||
user_lr[7] = opal_react->lr[i];
|
||||
add_token_bytestring(&err, dev, user_lr, OPAL_UID_LENGTH);
|
||||
}
|
||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
}
|
||||
|
||||
/* Skipping the rangle policy parameter is same as setting its value to zero */
|
||||
if (opal_react->range_policy && (opal_react->num_lrs || opal_react->entire_table)) {
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u64(&err, dev, OPAL_SUM_RANGE_POLICY);
|
||||
add_token_u8(&err, dev, 1);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
}
|
||||
|
||||
/*
|
||||
* Optional parameter. If set, it changes the Admin1 PIN even when SUM
|
||||
* is being disabled.
|
||||
*/
|
||||
if (opal_react->new_admin_key.key_len) {
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u64(&err, dev, OPAL_SUM_ADMIN1_PIN);
|
||||
add_token_bytestring(&err, dev, opal_react->new_admin_key.key,
|
||||
opal_react->new_admin_key.key_len);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
}
|
||||
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
}
|
||||
|
||||
/* Determine if we're in the Manufactured Inactive or Active state */
|
||||
static int get_lsp_lifecycle(struct opal_dev *dev, void *data)
|
||||
{
|
||||
@@ -2955,12 +3042,92 @@ static int opal_activate_lsp(struct opal_dev *dev,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int opal_reactivate_lsp(struct opal_dev *dev,
|
||||
struct opal_lr_react *opal_lr_react)
|
||||
{
|
||||
const struct opal_step active_steps[] = {
|
||||
{ start_admin1LSP_opal_session, &opal_lr_react->key },
|
||||
{ reactivate_lsp, opal_lr_react },
|
||||
/* No end_opal_session. The controller terminates the session */
|
||||
};
|
||||
int ret;
|
||||
|
||||
/* use either 'entire_table' parameter or set of locking ranges */
|
||||
if (opal_lr_react->num_lrs > OPAL_MAX_LRS ||
|
||||
(opal_lr_react->num_lrs && opal_lr_react->entire_table))
|
||||
return -EINVAL;
|
||||
|
||||
ret = opal_get_key(dev, &opal_lr_react->key);
|
||||
if (ret)
|
||||
return ret;
|
||||
mutex_lock(&dev->dev_lock);
|
||||
setup_opal_dev(dev);
|
||||
ret = execute_steps(dev, active_steps, ARRAY_SIZE(active_steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int opal_setup_locking_range(struct opal_dev *dev,
|
||||
struct opal_user_lr_setup *opal_lrs)
|
||||
{
|
||||
const struct opal_step lr_steps[] = {
|
||||
{ start_auth_opal_session, &opal_lrs->session },
|
||||
{ setup_locking_range, opal_lrs },
|
||||
{ setup_locking_range_start_length, opal_lrs },
|
||||
{ setup_enable_range, opal_lrs },
|
||||
{ end_opal_session, }
|
||||
}, lr_global_steps[] = {
|
||||
{ start_auth_opal_session, &opal_lrs->session },
|
||||
{ setup_enable_range, opal_lrs },
|
||||
{ end_opal_session, }
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = opal_get_key(dev, &opal_lrs->session.opal_key);
|
||||
if (ret)
|
||||
return ret;
|
||||
mutex_lock(&dev->dev_lock);
|
||||
setup_opal_dev(dev);
|
||||
if (opal_lrs->session.opal_key.lr == 0)
|
||||
ret = execute_steps(dev, lr_global_steps, ARRAY_SIZE(lr_global_steps));
|
||||
else
|
||||
ret = execute_steps(dev, lr_steps, ARRAY_SIZE(lr_steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int opal_setup_locking_range_start_length(struct opal_dev *dev,
|
||||
struct opal_user_lr_setup *opal_lrs)
|
||||
{
|
||||
const struct opal_step lr_steps[] = {
|
||||
{ start_auth_opal_session, &opal_lrs->session },
|
||||
{ setup_locking_range_start_length, opal_lrs },
|
||||
{ end_opal_session, }
|
||||
};
|
||||
int ret;
|
||||
|
||||
/* we can not set global locking range offset or length */
|
||||
if (opal_lrs->session.opal_key.lr == 0)
|
||||
return -EINVAL;
|
||||
|
||||
ret = opal_get_key(dev, &opal_lrs->session.opal_key);
|
||||
if (ret)
|
||||
return ret;
|
||||
mutex_lock(&dev->dev_lock);
|
||||
setup_opal_dev(dev);
|
||||
ret = execute_steps(dev, lr_steps, ARRAY_SIZE(lr_steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int opal_enable_disable_range(struct opal_dev *dev,
|
||||
struct opal_user_lr_setup *opal_lrs)
|
||||
{
|
||||
const struct opal_step lr_steps[] = {
|
||||
{ start_auth_opal_session, &opal_lrs->session },
|
||||
{ setup_enable_range, opal_lrs },
|
||||
{ end_opal_session, }
|
||||
};
|
||||
int ret;
|
||||
@@ -3228,6 +3395,200 @@ static int opal_get_geometry(struct opal_dev *dev, void __user *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_sum_ranges(struct opal_dev *dev, void *data)
|
||||
{
|
||||
const char *lr_uid;
|
||||
size_t lr_uid_len;
|
||||
u64 val;
|
||||
const struct opal_resp_tok *tok;
|
||||
int err, tok_n = 2;
|
||||
struct opal_sum_ranges *sranges = data;
|
||||
const __u8 lr_all[OPAL_MAX_LRS] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
|
||||
|
||||
err = generic_get_columns(dev, opaluid[OPAL_LOCKING_INFO_TABLE], OPAL_SUM_SET_LIST,
|
||||
OPAL_SUM_RANGE_POLICY);
|
||||
if (err) {
|
||||
pr_debug("Couldn't get locking info table columns %d to %d.\n",
|
||||
OPAL_SUM_SET_LIST, OPAL_SUM_RANGE_POLICY);
|
||||
return err;
|
||||
}
|
||||
|
||||
tok = response_get_token(&dev->parsed, tok_n);
|
||||
if (IS_ERR(tok))
|
||||
return PTR_ERR(tok);
|
||||
|
||||
if (!response_token_matches(tok, OPAL_STARTNAME)) {
|
||||
pr_debug("Unexpected response token type %d.\n", tok_n);
|
||||
return OPAL_INVAL_PARAM;
|
||||
}
|
||||
tok_n++;
|
||||
|
||||
if (response_get_u64(&dev->parsed, tok_n) != OPAL_SUM_SET_LIST) {
|
||||
pr_debug("Token %d does not match expected column %u.\n",
|
||||
tok_n, OPAL_SUM_SET_LIST);
|
||||
return OPAL_INVAL_PARAM;
|
||||
}
|
||||
tok_n++;
|
||||
|
||||
tok = response_get_token(&dev->parsed, tok_n);
|
||||
if (IS_ERR(tok))
|
||||
return PTR_ERR(tok);
|
||||
|
||||
/*
|
||||
* The OPAL_SUM_SET_LIST response contains two distinct values:
|
||||
*
|
||||
* - the list of individual locking ranges (UIDs) put in SUM. The list
|
||||
* may also be empty signaling the SUM is disabled.
|
||||
*
|
||||
* - the Locking table UID if the entire Locking table is put in SUM.
|
||||
*/
|
||||
if (response_token_matches(tok, OPAL_STARTLIST)) {
|
||||
sranges->num_lrs = 0;
|
||||
|
||||
tok_n++;
|
||||
tok = response_get_token(&dev->parsed, tok_n);
|
||||
if (IS_ERR(tok))
|
||||
return PTR_ERR(tok);
|
||||
|
||||
while (!response_token_matches(tok, OPAL_ENDLIST)) {
|
||||
lr_uid_len = response_get_string(&dev->parsed, tok_n, &lr_uid);
|
||||
if (lr_uid_len != OPAL_UID_LENGTH) {
|
||||
pr_debug("Unexpected response token type %d.\n", tok_n);
|
||||
return OPAL_INVAL_PARAM;
|
||||
}
|
||||
|
||||
if (memcmp(lr_uid, opaluid[OPAL_LOCKINGRANGE_GLOBAL], OPAL_UID_LENGTH)) {
|
||||
if (lr_uid[5] != LOCKING_RANGE_NON_GLOBAL) {
|
||||
pr_debug("Unexpected byte %d at LR UUID position 5.\n",
|
||||
lr_uid[5]);
|
||||
return OPAL_INVAL_PARAM;
|
||||
}
|
||||
sranges->lr[sranges->num_lrs++] = lr_uid[7];
|
||||
} else
|
||||
sranges->lr[sranges->num_lrs++] = 0;
|
||||
|
||||
tok_n++;
|
||||
tok = response_get_token(&dev->parsed, tok_n);
|
||||
if (IS_ERR(tok))
|
||||
return PTR_ERR(tok);
|
||||
}
|
||||
} else {
|
||||
/* Only OPAL_LOCKING_TABLE UID is an alternative to OPAL_STARTLIST here. */
|
||||
lr_uid_len = response_get_string(&dev->parsed, tok_n, &lr_uid);
|
||||
if (lr_uid_len != OPAL_UID_LENGTH) {
|
||||
pr_debug("Unexpected response token type %d.\n", tok_n);
|
||||
return OPAL_INVAL_PARAM;
|
||||
}
|
||||
|
||||
if (memcmp(lr_uid, opaluid[OPAL_LOCKING_TABLE], OPAL_UID_LENGTH)) {
|
||||
pr_debug("Unexpected response UID.\n");
|
||||
return OPAL_INVAL_PARAM;
|
||||
}
|
||||
|
||||
/* sed-opal kernel API already provides following limit in Activate command */
|
||||
sranges->num_lrs = OPAL_MAX_LRS;
|
||||
memcpy(sranges->lr, lr_all, OPAL_MAX_LRS);
|
||||
}
|
||||
tok_n++;
|
||||
|
||||
tok = response_get_token(&dev->parsed, tok_n);
|
||||
if (IS_ERR(tok))
|
||||
return PTR_ERR(tok);
|
||||
|
||||
if (!response_token_matches(tok, OPAL_ENDNAME)) {
|
||||
pr_debug("Unexpected response token type %d.\n", tok_n);
|
||||
return OPAL_INVAL_PARAM;
|
||||
}
|
||||
tok_n++;
|
||||
|
||||
err = response_get_column(&dev->parsed, &tok_n, OPAL_SUM_RANGE_POLICY, &val);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
sranges->range_policy = val ? 1 : 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int opal_get_sum_ranges(struct opal_dev *dev, struct opal_sum_ranges *opal_sum_rngs,
|
||||
void __user *data)
|
||||
{
|
||||
const struct opal_step admin_steps[] = {
|
||||
{ start_admin1LSP_opal_session, &opal_sum_rngs->key },
|
||||
{ get_sum_ranges, opal_sum_rngs },
|
||||
{ end_opal_session, }
|
||||
}, anybody_steps[] = {
|
||||
{ start_anybodyLSP_opal_session, NULL },
|
||||
{ get_sum_ranges, opal_sum_rngs },
|
||||
{ end_opal_session, }
|
||||
};
|
||||
int ret;
|
||||
|
||||
mutex_lock(&dev->dev_lock);
|
||||
setup_opal_dev(dev);
|
||||
if (opal_sum_rngs->key.key_len)
|
||||
/* Use Admin1 session (authenticated by PIN) to retrieve LockingInfo columns */
|
||||
ret = execute_steps(dev, admin_steps, ARRAY_SIZE(admin_steps));
|
||||
else
|
||||
/* Use Anybody session (no key) to retrieve LockingInfo columns */
|
||||
ret = execute_steps(dev, anybody_steps, ARRAY_SIZE(anybody_steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
/* skip session info when copying back to uspace */
|
||||
if (!ret && copy_to_user(data + offsetof(struct opal_sum_ranges, num_lrs),
|
||||
(void *)opal_sum_rngs + offsetof(struct opal_sum_ranges, num_lrs),
|
||||
sizeof(*opal_sum_rngs) - offsetof(struct opal_sum_ranges, num_lrs))) {
|
||||
pr_debug("Error copying SUM ranges info to userspace\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int opal_stack_reset(struct opal_dev *dev)
|
||||
{
|
||||
struct opal_stack_reset *req;
|
||||
struct opal_stack_reset_response *resp;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&dev->dev_lock);
|
||||
|
||||
memset(dev->cmd, 0, IO_BUFFER_LENGTH);
|
||||
req = (struct opal_stack_reset *)dev->cmd;
|
||||
req->extendedComID[0] = dev->comid >> 8;
|
||||
req->extendedComID[1] = dev->comid & 0xFF;
|
||||
req->request_code = cpu_to_be32(OPAL_STACK_RESET);
|
||||
|
||||
ret = dev->send_recv(dev->data, dev->comid, TCG_SECP_02,
|
||||
dev->cmd, IO_BUFFER_LENGTH, true);
|
||||
if (ret) {
|
||||
pr_debug("Error sending stack reset: %d\n", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
memset(dev->resp, 0, IO_BUFFER_LENGTH);
|
||||
ret = dev->send_recv(dev->data, dev->comid, TCG_SECP_02,
|
||||
dev->resp, IO_BUFFER_LENGTH, false);
|
||||
if (ret) {
|
||||
pr_debug("Error receiving stack reset response: %d\n", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
resp = (struct opal_stack_reset_response *)dev->resp;
|
||||
if (be16_to_cpu(resp->data_length) != 4) {
|
||||
pr_debug("Stack reset pending\n");
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
if (be32_to_cpu(resp->response) != 0) {
|
||||
pr_debug("Stack reset failed: %u\n", be32_to_cpu(resp->response));
|
||||
ret = -EIO;
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
|
||||
{
|
||||
void *p;
|
||||
@@ -3313,6 +3674,21 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
|
||||
case IOC_OPAL_SET_SID_PW:
|
||||
ret = opal_set_new_sid_pw(dev, p);
|
||||
break;
|
||||
case IOC_OPAL_REACTIVATE_LSP:
|
||||
ret = opal_reactivate_lsp(dev, p);
|
||||
break;
|
||||
case IOC_OPAL_LR_SET_START_LEN:
|
||||
ret = opal_setup_locking_range_start_length(dev, p);
|
||||
break;
|
||||
case IOC_OPAL_ENABLE_DISABLE_LR:
|
||||
ret = opal_enable_disable_range(dev, p);
|
||||
break;
|
||||
case IOC_OPAL_GET_SUM_STATUS:
|
||||
ret = opal_get_sum_ranges(dev, p, arg);
|
||||
break;
|
||||
case IOC_OPAL_STACK_RESET:
|
||||
ret = opal_stack_reset(dev);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
|
||||
854
block/t10-pi.c
854
block/t10-pi.c
@@ -12,230 +12,115 @@
|
||||
#include <linux/unaligned.h>
|
||||
#include "blk.h"
|
||||
|
||||
struct blk_integrity_iter {
|
||||
void *prot_buf;
|
||||
void *data_buf;
|
||||
sector_t seed;
|
||||
unsigned int data_size;
|
||||
unsigned short interval;
|
||||
const char *disk_name;
|
||||
#define APP_TAG_ESCAPE 0xffff
|
||||
#define REF_TAG_ESCAPE 0xffffffff
|
||||
|
||||
/*
|
||||
* This union is used for onstack allocations when the pi field is split across
|
||||
* segments. blk_validate_integrity_limits() guarantees pi_tuple_size matches
|
||||
* the sizeof one of these two types.
|
||||
*/
|
||||
union pi_tuple {
|
||||
struct crc64_pi_tuple crc64_pi;
|
||||
struct t10_pi_tuple t10_pi;
|
||||
};
|
||||
|
||||
static __be16 t10_pi_csum(__be16 csum, void *data, unsigned int len,
|
||||
unsigned char csum_type)
|
||||
struct blk_integrity_iter {
|
||||
struct bio *bio;
|
||||
struct bio_integrity_payload *bip;
|
||||
struct blk_integrity *bi;
|
||||
struct bvec_iter data_iter;
|
||||
struct bvec_iter prot_iter;
|
||||
unsigned int interval_remaining;
|
||||
u64 seed;
|
||||
u64 csum;
|
||||
};
|
||||
|
||||
static void blk_calculate_guard(struct blk_integrity_iter *iter, void *data,
|
||||
unsigned int len)
|
||||
{
|
||||
if (csum_type == BLK_INTEGRITY_CSUM_IP)
|
||||
return (__force __be16)ip_compute_csum(data, len);
|
||||
return cpu_to_be16(crc_t10dif_update(be16_to_cpu(csum), data, len));
|
||||
switch (iter->bi->csum_type) {
|
||||
case BLK_INTEGRITY_CSUM_CRC64:
|
||||
iter->csum = crc64_nvme(iter->csum, data, len);
|
||||
break;
|
||||
case BLK_INTEGRITY_CSUM_CRC:
|
||||
iter->csum = crc_t10dif_update(iter->csum, data, len);
|
||||
break;
|
||||
case BLK_INTEGRITY_CSUM_IP:
|
||||
iter->csum = (__force u32)csum_partial(data, len,
|
||||
(__force __wsum)iter->csum);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
iter->csum = U64_MAX;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void blk_integrity_csum_finish(struct blk_integrity_iter *iter)
|
||||
{
|
||||
switch (iter->bi->csum_type) {
|
||||
case BLK_INTEGRITY_CSUM_IP:
|
||||
iter->csum = (__force u16)csum_fold((__force __wsum)iter->csum);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Type 1 and Type 2 protection use the same format: 16 bit guard tag,
|
||||
* 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
|
||||
* tag.
|
||||
* Update the csum for formats that have metadata padding in front of the data
|
||||
* integrity field
|
||||
*/
|
||||
static void t10_pi_generate(struct blk_integrity_iter *iter,
|
||||
struct blk_integrity *bi)
|
||||
static void blk_integrity_csum_offset(struct blk_integrity_iter *iter)
|
||||
{
|
||||
u8 offset = bi->pi_offset;
|
||||
unsigned int i;
|
||||
unsigned int offset = iter->bi->pi_offset;
|
||||
struct bio_vec *bvec = iter->bip->bip_vec;
|
||||
|
||||
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
|
||||
struct t10_pi_tuple *pi = iter->prot_buf + offset;
|
||||
while (offset > 0) {
|
||||
struct bio_vec pbv = bvec_iter_bvec(bvec, iter->prot_iter);
|
||||
unsigned int len = min(pbv.bv_len, offset);
|
||||
void *prot_buf = bvec_kmap_local(&pbv);
|
||||
|
||||
pi->guard_tag = t10_pi_csum(0, iter->data_buf, iter->interval,
|
||||
bi->csum_type);
|
||||
if (offset)
|
||||
pi->guard_tag = t10_pi_csum(pi->guard_tag,
|
||||
iter->prot_buf, offset, bi->csum_type);
|
||||
pi->app_tag = 0;
|
||||
blk_calculate_guard(iter, prot_buf, len);
|
||||
kunmap_local(prot_buf);
|
||||
offset -= len;
|
||||
bvec_iter_advance_single(bvec, &iter->prot_iter, len);
|
||||
}
|
||||
blk_integrity_csum_finish(iter);
|
||||
}
|
||||
|
||||
if (bi->flags & BLK_INTEGRITY_REF_TAG)
|
||||
pi->ref_tag = cpu_to_be32(lower_32_bits(iter->seed));
|
||||
else
|
||||
pi->ref_tag = 0;
|
||||
static void blk_integrity_copy_from_tuple(struct bio_integrity_payload *bip,
|
||||
struct bvec_iter *iter, void *tuple,
|
||||
unsigned int tuple_size)
|
||||
{
|
||||
while (tuple_size) {
|
||||
struct bio_vec pbv = bvec_iter_bvec(bip->bip_vec, *iter);
|
||||
unsigned int len = min(tuple_size, pbv.bv_len);
|
||||
void *prot_buf = bvec_kmap_local(&pbv);
|
||||
|
||||
iter->data_buf += iter->interval;
|
||||
iter->prot_buf += bi->metadata_size;
|
||||
iter->seed++;
|
||||
memcpy(prot_buf, tuple, len);
|
||||
kunmap_local(prot_buf);
|
||||
bvec_iter_advance_single(bip->bip_vec, iter, len);
|
||||
tuple_size -= len;
|
||||
tuple += len;
|
||||
}
|
||||
}
|
||||
|
||||
static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
|
||||
struct blk_integrity *bi)
|
||||
static void blk_integrity_copy_to_tuple(struct bio_integrity_payload *bip,
|
||||
struct bvec_iter *iter, void *tuple,
|
||||
unsigned int tuple_size)
|
||||
{
|
||||
u8 offset = bi->pi_offset;
|
||||
unsigned int i;
|
||||
while (tuple_size) {
|
||||
struct bio_vec pbv = bvec_iter_bvec(bip->bip_vec, *iter);
|
||||
unsigned int len = min(tuple_size, pbv.bv_len);
|
||||
void *prot_buf = bvec_kmap_local(&pbv);
|
||||
|
||||
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
|
||||
struct t10_pi_tuple *pi = iter->prot_buf + offset;
|
||||
__be16 csum;
|
||||
|
||||
if (bi->flags & BLK_INTEGRITY_REF_TAG) {
|
||||
if (pi->app_tag == T10_PI_APP_ESCAPE)
|
||||
goto next;
|
||||
|
||||
if (be32_to_cpu(pi->ref_tag) !=
|
||||
lower_32_bits(iter->seed)) {
|
||||
pr_err("%s: ref tag error at location %llu " \
|
||||
"(rcvd %u)\n", iter->disk_name,
|
||||
(unsigned long long)
|
||||
iter->seed, be32_to_cpu(pi->ref_tag));
|
||||
return BLK_STS_PROTECTION;
|
||||
}
|
||||
} else {
|
||||
if (pi->app_tag == T10_PI_APP_ESCAPE &&
|
||||
pi->ref_tag == T10_PI_REF_ESCAPE)
|
||||
goto next;
|
||||
}
|
||||
|
||||
csum = t10_pi_csum(0, iter->data_buf, iter->interval,
|
||||
bi->csum_type);
|
||||
if (offset)
|
||||
csum = t10_pi_csum(csum, iter->prot_buf, offset,
|
||||
bi->csum_type);
|
||||
|
||||
if (pi->guard_tag != csum) {
|
||||
pr_err("%s: guard tag error at sector %llu " \
|
||||
"(rcvd %04x, want %04x)\n", iter->disk_name,
|
||||
(unsigned long long)iter->seed,
|
||||
be16_to_cpu(pi->guard_tag), be16_to_cpu(csum));
|
||||
return BLK_STS_PROTECTION;
|
||||
}
|
||||
|
||||
next:
|
||||
iter->data_buf += iter->interval;
|
||||
iter->prot_buf += bi->metadata_size;
|
||||
iter->seed++;
|
||||
}
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
/**
|
||||
* t10_pi_type1_prepare - prepare PI prior submitting request to device
|
||||
* @rq: request with PI that should be prepared
|
||||
*
|
||||
* For Type 1/Type 2, the virtual start sector is the one that was
|
||||
* originally submitted by the block layer for the ref_tag usage. Due to
|
||||
* partitioning, MD/DM cloning, etc. the actual physical start sector is
|
||||
* likely to be different. Remap protection information to match the
|
||||
* physical LBA.
|
||||
*/
|
||||
static void t10_pi_type1_prepare(struct request *rq)
|
||||
{
|
||||
struct blk_integrity *bi = &rq->q->limits.integrity;
|
||||
const int tuple_sz = bi->metadata_size;
|
||||
u32 ref_tag = t10_pi_ref_tag(rq);
|
||||
u8 offset = bi->pi_offset;
|
||||
struct bio *bio;
|
||||
|
||||
__rq_for_each_bio(bio, rq) {
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
u32 virt = bip_get_seed(bip) & 0xffffffff;
|
||||
struct bio_vec iv;
|
||||
struct bvec_iter iter;
|
||||
|
||||
/* Already remapped? */
|
||||
if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
|
||||
break;
|
||||
|
||||
bip_for_each_vec(iv, bip, iter) {
|
||||
unsigned int j;
|
||||
void *p;
|
||||
|
||||
p = bvec_kmap_local(&iv);
|
||||
for (j = 0; j < iv.bv_len; j += tuple_sz) {
|
||||
struct t10_pi_tuple *pi = p + offset;
|
||||
|
||||
if (be32_to_cpu(pi->ref_tag) == virt)
|
||||
pi->ref_tag = cpu_to_be32(ref_tag);
|
||||
virt++;
|
||||
ref_tag++;
|
||||
p += tuple_sz;
|
||||
}
|
||||
kunmap_local(p);
|
||||
}
|
||||
|
||||
bip->bip_flags |= BIP_MAPPED_INTEGRITY;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* t10_pi_type1_complete - prepare PI prior returning request to the blk layer
|
||||
* @rq: request with PI that should be prepared
|
||||
* @nr_bytes: total bytes to prepare
|
||||
*
|
||||
* For Type 1/Type 2, the virtual start sector is the one that was
|
||||
* originally submitted by the block layer for the ref_tag usage. Due to
|
||||
* partitioning, MD/DM cloning, etc. the actual physical start sector is
|
||||
* likely to be different. Since the physical start sector was submitted
|
||||
* to the device, we should remap it back to virtual values expected by the
|
||||
* block layer.
|
||||
*/
|
||||
static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
|
||||
{
|
||||
struct blk_integrity *bi = &rq->q->limits.integrity;
|
||||
unsigned intervals = nr_bytes >> bi->interval_exp;
|
||||
const int tuple_sz = bi->metadata_size;
|
||||
u32 ref_tag = t10_pi_ref_tag(rq);
|
||||
u8 offset = bi->pi_offset;
|
||||
struct bio *bio;
|
||||
|
||||
__rq_for_each_bio(bio, rq) {
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
u32 virt = bip_get_seed(bip) & 0xffffffff;
|
||||
struct bio_vec iv;
|
||||
struct bvec_iter iter;
|
||||
|
||||
bip_for_each_vec(iv, bip, iter) {
|
||||
unsigned int j;
|
||||
void *p;
|
||||
|
||||
p = bvec_kmap_local(&iv);
|
||||
for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
|
||||
struct t10_pi_tuple *pi = p + offset;
|
||||
|
||||
if (be32_to_cpu(pi->ref_tag) == ref_tag)
|
||||
pi->ref_tag = cpu_to_be32(virt);
|
||||
virt++;
|
||||
ref_tag++;
|
||||
intervals--;
|
||||
p += tuple_sz;
|
||||
}
|
||||
kunmap_local(p);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static __be64 ext_pi_crc64(u64 crc, void *data, unsigned int len)
|
||||
{
|
||||
return cpu_to_be64(crc64_nvme(crc, data, len));
|
||||
}
|
||||
|
||||
static void ext_pi_crc64_generate(struct blk_integrity_iter *iter,
|
||||
struct blk_integrity *bi)
|
||||
{
|
||||
u8 offset = bi->pi_offset;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
|
||||
struct crc64_pi_tuple *pi = iter->prot_buf + offset;
|
||||
|
||||
pi->guard_tag = ext_pi_crc64(0, iter->data_buf, iter->interval);
|
||||
if (offset)
|
||||
pi->guard_tag = ext_pi_crc64(be64_to_cpu(pi->guard_tag),
|
||||
iter->prot_buf, offset);
|
||||
pi->app_tag = 0;
|
||||
|
||||
if (bi->flags & BLK_INTEGRITY_REF_TAG)
|
||||
put_unaligned_be48(iter->seed, pi->ref_tag);
|
||||
else
|
||||
put_unaligned_be48(0ULL, pi->ref_tag);
|
||||
|
||||
iter->data_buf += iter->interval;
|
||||
iter->prot_buf += bi->metadata_size;
|
||||
iter->seed++;
|
||||
memcpy(tuple, prot_buf, len);
|
||||
kunmap_local(prot_buf);
|
||||
bvec_iter_advance_single(bip->bip_vec, iter, len);
|
||||
tuple_size -= len;
|
||||
tuple += len;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -246,228 +131,437 @@ static bool ext_pi_ref_escape(const u8 ref_tag[6])
|
||||
return memcmp(ref_tag, ref_escape, sizeof(ref_escape)) == 0;
|
||||
}
|
||||
|
||||
static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter,
|
||||
struct blk_integrity *bi)
|
||||
static blk_status_t blk_verify_ext_pi(struct blk_integrity_iter *iter,
|
||||
struct crc64_pi_tuple *pi)
|
||||
{
|
||||
u8 offset = bi->pi_offset;
|
||||
unsigned int i;
|
||||
u64 seed = lower_48_bits(iter->seed);
|
||||
u64 guard = get_unaligned_be64(&pi->guard_tag);
|
||||
u64 ref = get_unaligned_be48(pi->ref_tag);
|
||||
u16 app = get_unaligned_be16(&pi->app_tag);
|
||||
|
||||
for (i = 0; i < iter->data_size; i += iter->interval) {
|
||||
struct crc64_pi_tuple *pi = iter->prot_buf + offset;
|
||||
u64 ref, seed;
|
||||
__be64 csum;
|
||||
|
||||
if (bi->flags & BLK_INTEGRITY_REF_TAG) {
|
||||
if (pi->app_tag == T10_PI_APP_ESCAPE)
|
||||
goto next;
|
||||
|
||||
ref = get_unaligned_be48(pi->ref_tag);
|
||||
seed = lower_48_bits(iter->seed);
|
||||
if (ref != seed) {
|
||||
pr_err("%s: ref tag error at location %llu (rcvd %llu)\n",
|
||||
iter->disk_name, seed, ref);
|
||||
return BLK_STS_PROTECTION;
|
||||
}
|
||||
} else {
|
||||
if (pi->app_tag == T10_PI_APP_ESCAPE &&
|
||||
ext_pi_ref_escape(pi->ref_tag))
|
||||
goto next;
|
||||
}
|
||||
|
||||
csum = ext_pi_crc64(0, iter->data_buf, iter->interval);
|
||||
if (offset)
|
||||
csum = ext_pi_crc64(be64_to_cpu(csum), iter->prot_buf,
|
||||
offset);
|
||||
|
||||
if (pi->guard_tag != csum) {
|
||||
pr_err("%s: guard tag error at sector %llu " \
|
||||
"(rcvd %016llx, want %016llx)\n",
|
||||
iter->disk_name, (unsigned long long)iter->seed,
|
||||
be64_to_cpu(pi->guard_tag), be64_to_cpu(csum));
|
||||
if (iter->bi->flags & BLK_INTEGRITY_REF_TAG) {
|
||||
if (app == APP_TAG_ESCAPE)
|
||||
return BLK_STS_OK;
|
||||
if (ref != seed) {
|
||||
pr_err("%s: ref tag error at location %llu (rcvd %llu)\n",
|
||||
iter->bio->bi_bdev->bd_disk->disk_name, seed,
|
||||
ref);
|
||||
return BLK_STS_PROTECTION;
|
||||
}
|
||||
} else if (app == APP_TAG_ESCAPE && ext_pi_ref_escape(pi->ref_tag)) {
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
next:
|
||||
iter->data_buf += iter->interval;
|
||||
iter->prot_buf += bi->metadata_size;
|
||||
iter->seed++;
|
||||
if (guard != iter->csum) {
|
||||
pr_err("%s: guard tag error at sector %llu (rcvd %016llx, want %016llx)\n",
|
||||
iter->bio->bi_bdev->bd_disk->disk_name, iter->seed,
|
||||
guard, iter->csum);
|
||||
return BLK_STS_PROTECTION;
|
||||
}
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void ext_pi_type1_prepare(struct request *rq)
|
||||
static blk_status_t blk_verify_pi(struct blk_integrity_iter *iter,
|
||||
struct t10_pi_tuple *pi, u16 guard)
|
||||
{
|
||||
struct blk_integrity *bi = &rq->q->limits.integrity;
|
||||
const int tuple_sz = bi->metadata_size;
|
||||
u64 ref_tag = ext_pi_ref_tag(rq);
|
||||
u8 offset = bi->pi_offset;
|
||||
struct bio *bio;
|
||||
u32 seed = lower_32_bits(iter->seed);
|
||||
u32 ref = get_unaligned_be32(&pi->ref_tag);
|
||||
u16 app = get_unaligned_be16(&pi->app_tag);
|
||||
|
||||
__rq_for_each_bio(bio, rq) {
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
u64 virt = lower_48_bits(bip_get_seed(bip));
|
||||
struct bio_vec iv;
|
||||
struct bvec_iter iter;
|
||||
|
||||
/* Already remapped? */
|
||||
if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
|
||||
break;
|
||||
|
||||
bip_for_each_vec(iv, bip, iter) {
|
||||
unsigned int j;
|
||||
void *p;
|
||||
|
||||
p = bvec_kmap_local(&iv);
|
||||
for (j = 0; j < iv.bv_len; j += tuple_sz) {
|
||||
struct crc64_pi_tuple *pi = p + offset;
|
||||
u64 ref = get_unaligned_be48(pi->ref_tag);
|
||||
|
||||
if (ref == virt)
|
||||
put_unaligned_be48(ref_tag, pi->ref_tag);
|
||||
virt++;
|
||||
ref_tag++;
|
||||
p += tuple_sz;
|
||||
}
|
||||
kunmap_local(p);
|
||||
if (iter->bi->flags & BLK_INTEGRITY_REF_TAG) {
|
||||
if (app == APP_TAG_ESCAPE)
|
||||
return BLK_STS_OK;
|
||||
if (ref != seed) {
|
||||
pr_err("%s: ref tag error at location %u (rcvd %u)\n",
|
||||
iter->bio->bi_bdev->bd_disk->disk_name, seed,
|
||||
ref);
|
||||
return BLK_STS_PROTECTION;
|
||||
}
|
||||
} else if (app == APP_TAG_ESCAPE && ref == REF_TAG_ESCAPE) {
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
bip->bip_flags |= BIP_MAPPED_INTEGRITY;
|
||||
if (guard != (u16)iter->csum) {
|
||||
pr_err("%s: guard tag error at sector %llu (rcvd %04x, want %04x)\n",
|
||||
iter->bio->bi_bdev->bd_disk->disk_name, iter->seed,
|
||||
guard, (u16)iter->csum);
|
||||
return BLK_STS_PROTECTION;
|
||||
}
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static blk_status_t blk_verify_t10_pi(struct blk_integrity_iter *iter,
|
||||
struct t10_pi_tuple *pi)
|
||||
{
|
||||
u16 guard = get_unaligned_be16(&pi->guard_tag);
|
||||
|
||||
return blk_verify_pi(iter, pi, guard);
|
||||
}
|
||||
|
||||
static blk_status_t blk_verify_ip_pi(struct blk_integrity_iter *iter,
|
||||
struct t10_pi_tuple *pi)
|
||||
{
|
||||
u16 guard = get_unaligned((u16 *)&pi->guard_tag);
|
||||
|
||||
return blk_verify_pi(iter, pi, guard);
|
||||
}
|
||||
|
||||
static blk_status_t blk_integrity_verify(struct blk_integrity_iter *iter,
|
||||
union pi_tuple *tuple)
|
||||
{
|
||||
switch (iter->bi->csum_type) {
|
||||
case BLK_INTEGRITY_CSUM_CRC64:
|
||||
return blk_verify_ext_pi(iter, &tuple->crc64_pi);
|
||||
case BLK_INTEGRITY_CSUM_CRC:
|
||||
return blk_verify_t10_pi(iter, &tuple->t10_pi);
|
||||
case BLK_INTEGRITY_CSUM_IP:
|
||||
return blk_verify_ip_pi(iter, &tuple->t10_pi);
|
||||
default:
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
}
|
||||
|
||||
static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
|
||||
static void blk_set_ext_pi(struct blk_integrity_iter *iter,
|
||||
struct crc64_pi_tuple *pi)
|
||||
{
|
||||
struct blk_integrity *bi = &rq->q->limits.integrity;
|
||||
unsigned intervals = nr_bytes >> bi->interval_exp;
|
||||
const int tuple_sz = bi->metadata_size;
|
||||
u64 ref_tag = ext_pi_ref_tag(rq);
|
||||
u8 offset = bi->pi_offset;
|
||||
struct bio *bio;
|
||||
put_unaligned_be64(iter->csum, &pi->guard_tag);
|
||||
put_unaligned_be16(0, &pi->app_tag);
|
||||
put_unaligned_be48(iter->seed, &pi->ref_tag);
|
||||
}
|
||||
|
||||
__rq_for_each_bio(bio, rq) {
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
u64 virt = lower_48_bits(bip_get_seed(bip));
|
||||
struct bio_vec iv;
|
||||
struct bvec_iter iter;
|
||||
static void blk_set_pi(struct blk_integrity_iter *iter,
|
||||
struct t10_pi_tuple *pi, __be16 csum)
|
||||
{
|
||||
put_unaligned(csum, &pi->guard_tag);
|
||||
put_unaligned_be16(0, &pi->app_tag);
|
||||
put_unaligned_be32(iter->seed, &pi->ref_tag);
|
||||
}
|
||||
|
||||
bip_for_each_vec(iv, bip, iter) {
|
||||
unsigned int j;
|
||||
void *p;
|
||||
static void blk_set_t10_pi(struct blk_integrity_iter *iter,
|
||||
struct t10_pi_tuple *pi)
|
||||
{
|
||||
blk_set_pi(iter, pi, cpu_to_be16((u16)iter->csum));
|
||||
}
|
||||
|
||||
p = bvec_kmap_local(&iv);
|
||||
for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
|
||||
struct crc64_pi_tuple *pi = p + offset;
|
||||
u64 ref = get_unaligned_be48(pi->ref_tag);
|
||||
static void blk_set_ip_pi(struct blk_integrity_iter *iter,
|
||||
struct t10_pi_tuple *pi)
|
||||
{
|
||||
blk_set_pi(iter, pi, (__force __be16)(u16)iter->csum);
|
||||
}
|
||||
|
||||
if (ref == ref_tag)
|
||||
put_unaligned_be48(virt, pi->ref_tag);
|
||||
virt++;
|
||||
ref_tag++;
|
||||
intervals--;
|
||||
p += tuple_sz;
|
||||
}
|
||||
kunmap_local(p);
|
||||
}
|
||||
static void blk_integrity_set(struct blk_integrity_iter *iter,
|
||||
union pi_tuple *tuple)
|
||||
{
|
||||
switch (iter->bi->csum_type) {
|
||||
case BLK_INTEGRITY_CSUM_CRC64:
|
||||
return blk_set_ext_pi(iter, &tuple->crc64_pi);
|
||||
case BLK_INTEGRITY_CSUM_CRC:
|
||||
return blk_set_t10_pi(iter, &tuple->t10_pi);
|
||||
case BLK_INTEGRITY_CSUM_IP:
|
||||
return blk_set_ip_pi(iter, &tuple->t10_pi);
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static blk_status_t blk_integrity_interval(struct blk_integrity_iter *iter,
|
||||
bool verify)
|
||||
{
|
||||
blk_status_t ret = BLK_STS_OK;
|
||||
union pi_tuple tuple;
|
||||
void *ptuple = &tuple;
|
||||
struct bio_vec pbv;
|
||||
|
||||
blk_integrity_csum_offset(iter);
|
||||
pbv = bvec_iter_bvec(iter->bip->bip_vec, iter->prot_iter);
|
||||
if (pbv.bv_len >= iter->bi->pi_tuple_size) {
|
||||
ptuple = bvec_kmap_local(&pbv);
|
||||
bvec_iter_advance_single(iter->bip->bip_vec, &iter->prot_iter,
|
||||
iter->bi->metadata_size - iter->bi->pi_offset);
|
||||
} else if (verify) {
|
||||
blk_integrity_copy_to_tuple(iter->bip, &iter->prot_iter,
|
||||
ptuple, iter->bi->pi_tuple_size);
|
||||
}
|
||||
|
||||
if (verify)
|
||||
ret = blk_integrity_verify(iter, ptuple);
|
||||
else
|
||||
blk_integrity_set(iter, ptuple);
|
||||
|
||||
if (likely(ptuple != &tuple)) {
|
||||
kunmap_local(ptuple);
|
||||
} else if (!verify) {
|
||||
blk_integrity_copy_from_tuple(iter->bip, &iter->prot_iter,
|
||||
ptuple, iter->bi->pi_tuple_size);
|
||||
}
|
||||
|
||||
iter->interval_remaining = 1 << iter->bi->interval_exp;
|
||||
iter->csum = 0;
|
||||
iter->seed++;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static blk_status_t blk_integrity_iterate(struct bio *bio,
|
||||
struct bvec_iter *data_iter,
|
||||
bool verify)
|
||||
{
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
struct blk_integrity_iter iter = {
|
||||
.bio = bio,
|
||||
.bip = bip,
|
||||
.bi = bi,
|
||||
.data_iter = *data_iter,
|
||||
.prot_iter = bip->bip_iter,
|
||||
.interval_remaining = 1 << bi->interval_exp,
|
||||
.seed = data_iter->bi_sector,
|
||||
.csum = 0,
|
||||
};
|
||||
blk_status_t ret = BLK_STS_OK;
|
||||
|
||||
while (iter.data_iter.bi_size && ret == BLK_STS_OK) {
|
||||
struct bio_vec bv = bvec_iter_bvec(iter.bio->bi_io_vec,
|
||||
iter.data_iter);
|
||||
void *kaddr = bvec_kmap_local(&bv);
|
||||
void *data = kaddr;
|
||||
unsigned int len;
|
||||
|
||||
bvec_iter_advance_single(iter.bio->bi_io_vec, &iter.data_iter,
|
||||
bv.bv_len);
|
||||
while (bv.bv_len && ret == BLK_STS_OK) {
|
||||
len = min(iter.interval_remaining, bv.bv_len);
|
||||
blk_calculate_guard(&iter, data, len);
|
||||
bv.bv_len -= len;
|
||||
data += len;
|
||||
iter.interval_remaining -= len;
|
||||
if (!iter.interval_remaining)
|
||||
ret = blk_integrity_interval(&iter, verify);
|
||||
}
|
||||
kunmap_local(kaddr);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bio_integrity_generate(struct bio *bio)
|
||||
{
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
struct blk_integrity_iter iter;
|
||||
struct bvec_iter bviter;
|
||||
struct bio_vec bv;
|
||||
|
||||
iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
|
||||
iter.interval = 1 << bi->interval_exp;
|
||||
iter.seed = bio->bi_iter.bi_sector;
|
||||
iter.prot_buf = bvec_virt(bip->bip_vec);
|
||||
bio_for_each_segment(bv, bio, bviter) {
|
||||
void *kaddr = bvec_kmap_local(&bv);
|
||||
|
||||
iter.data_buf = kaddr;
|
||||
iter.data_size = bv.bv_len;
|
||||
switch (bi->csum_type) {
|
||||
case BLK_INTEGRITY_CSUM_CRC64:
|
||||
ext_pi_crc64_generate(&iter, bi);
|
||||
break;
|
||||
case BLK_INTEGRITY_CSUM_CRC:
|
||||
case BLK_INTEGRITY_CSUM_IP:
|
||||
t10_pi_generate(&iter, bi);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
kunmap_local(kaddr);
|
||||
switch (bi->csum_type) {
|
||||
case BLK_INTEGRITY_CSUM_CRC64:
|
||||
case BLK_INTEGRITY_CSUM_CRC:
|
||||
case BLK_INTEGRITY_CSUM_IP:
|
||||
blk_integrity_iterate(bio, &bio->bi_iter, false);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
blk_status_t bio_integrity_verify(struct bio *bio, struct bvec_iter *saved_iter)
|
||||
{
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
struct blk_integrity_iter iter;
|
||||
struct bvec_iter bviter;
|
||||
struct bio_vec bv;
|
||||
|
||||
/*
|
||||
* At the moment verify is called bi_iter has been advanced during split
|
||||
* and completion, so use the copy created during submission here.
|
||||
*/
|
||||
iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
|
||||
iter.interval = 1 << bi->interval_exp;
|
||||
iter.seed = saved_iter->bi_sector;
|
||||
iter.prot_buf = bvec_virt(bip->bip_vec);
|
||||
__bio_for_each_segment(bv, bio, bviter, *saved_iter) {
|
||||
void *kaddr = bvec_kmap_local(&bv);
|
||||
blk_status_t ret = BLK_STS_OK;
|
||||
|
||||
iter.data_buf = kaddr;
|
||||
iter.data_size = bv.bv_len;
|
||||
switch (bi->csum_type) {
|
||||
case BLK_INTEGRITY_CSUM_CRC64:
|
||||
ret = ext_pi_crc64_verify(&iter, bi);
|
||||
break;
|
||||
case BLK_INTEGRITY_CSUM_CRC:
|
||||
case BLK_INTEGRITY_CSUM_IP:
|
||||
ret = t10_pi_verify(&iter, bi);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
kunmap_local(kaddr);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
switch (bi->csum_type) {
|
||||
case BLK_INTEGRITY_CSUM_CRC64:
|
||||
case BLK_INTEGRITY_CSUM_CRC:
|
||||
case BLK_INTEGRITY_CSUM_IP:
|
||||
return blk_integrity_iterate(bio, saved_iter, true);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
void blk_integrity_prepare(struct request *rq)
|
||||
/*
|
||||
* Advance @iter past the protection offset for protection formats that
|
||||
* contain front padding on the metadata region.
|
||||
*/
|
||||
static void blk_pi_advance_offset(struct blk_integrity *bi,
|
||||
struct bio_integrity_payload *bip,
|
||||
struct bvec_iter *iter)
|
||||
{
|
||||
unsigned int offset = bi->pi_offset;
|
||||
|
||||
while (offset > 0) {
|
||||
struct bio_vec bv = mp_bvec_iter_bvec(bip->bip_vec, *iter);
|
||||
unsigned int len = min(bv.bv_len, offset);
|
||||
|
||||
bvec_iter_advance_single(bip->bip_vec, iter, len);
|
||||
offset -= len;
|
||||
}
|
||||
}
|
||||
|
||||
static void *blk_tuple_remap_begin(union pi_tuple *tuple,
|
||||
struct blk_integrity *bi,
|
||||
struct bio_integrity_payload *bip,
|
||||
struct bvec_iter *iter)
|
||||
{
|
||||
struct bvec_iter titer;
|
||||
struct bio_vec pbv;
|
||||
|
||||
blk_pi_advance_offset(bi, bip, iter);
|
||||
pbv = bvec_iter_bvec(bip->bip_vec, *iter);
|
||||
if (likely(pbv.bv_len >= bi->pi_tuple_size))
|
||||
return bvec_kmap_local(&pbv);
|
||||
|
||||
/*
|
||||
* We need to preserve the state of the original iter for the
|
||||
* copy_from_tuple at the end, so make a temp iter for here.
|
||||
*/
|
||||
titer = *iter;
|
||||
blk_integrity_copy_to_tuple(bip, &titer, tuple, bi->pi_tuple_size);
|
||||
return tuple;
|
||||
}
|
||||
|
||||
static void blk_tuple_remap_end(union pi_tuple *tuple, void *ptuple,
|
||||
struct blk_integrity *bi,
|
||||
struct bio_integrity_payload *bip,
|
||||
struct bvec_iter *iter)
|
||||
{
|
||||
unsigned int len = bi->metadata_size - bi->pi_offset;
|
||||
|
||||
if (likely(ptuple != tuple)) {
|
||||
kunmap_local(ptuple);
|
||||
} else {
|
||||
blk_integrity_copy_from_tuple(bip, iter, ptuple,
|
||||
bi->pi_tuple_size);
|
||||
len -= bi->pi_tuple_size;
|
||||
}
|
||||
|
||||
bvec_iter_advance(bip->bip_vec, iter, len);
|
||||
}
|
||||
|
||||
static void blk_set_ext_unmap_ref(struct crc64_pi_tuple *pi, u64 virt,
|
||||
u64 ref_tag)
|
||||
{
|
||||
u64 ref = get_unaligned_be48(&pi->ref_tag);
|
||||
|
||||
if (ref == lower_48_bits(ref_tag) && ref != lower_48_bits(virt))
|
||||
put_unaligned_be48(virt, pi->ref_tag);
|
||||
}
|
||||
|
||||
static void blk_set_t10_unmap_ref(struct t10_pi_tuple *pi, u32 virt,
|
||||
u32 ref_tag)
|
||||
{
|
||||
u32 ref = get_unaligned_be32(&pi->ref_tag);
|
||||
|
||||
if (ref == ref_tag && ref != virt)
|
||||
put_unaligned_be32(virt, &pi->ref_tag);
|
||||
}
|
||||
|
||||
static void blk_reftag_remap_complete(struct blk_integrity *bi,
|
||||
union pi_tuple *tuple, u64 virt, u64 ref)
|
||||
{
|
||||
switch (bi->csum_type) {
|
||||
case BLK_INTEGRITY_CSUM_CRC64:
|
||||
blk_set_ext_unmap_ref(&tuple->crc64_pi, virt, ref);
|
||||
break;
|
||||
case BLK_INTEGRITY_CSUM_CRC:
|
||||
case BLK_INTEGRITY_CSUM_IP:
|
||||
blk_set_t10_unmap_ref(&tuple->t10_pi, virt, ref);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void blk_set_ext_map_ref(struct crc64_pi_tuple *pi, u64 virt,
|
||||
u64 ref_tag)
|
||||
{
|
||||
u64 ref = get_unaligned_be48(&pi->ref_tag);
|
||||
|
||||
if (ref == lower_48_bits(virt) && ref != ref_tag)
|
||||
put_unaligned_be48(ref_tag, pi->ref_tag);
|
||||
}
|
||||
|
||||
static void blk_set_t10_map_ref(struct t10_pi_tuple *pi, u32 virt, u32 ref_tag)
|
||||
{
|
||||
u32 ref = get_unaligned_be32(&pi->ref_tag);
|
||||
|
||||
if (ref == virt && ref != ref_tag)
|
||||
put_unaligned_be32(ref_tag, &pi->ref_tag);
|
||||
}
|
||||
|
||||
static void blk_reftag_remap_prepare(struct blk_integrity *bi,
|
||||
union pi_tuple *tuple,
|
||||
u64 virt, u64 ref)
|
||||
{
|
||||
switch (bi->csum_type) {
|
||||
case BLK_INTEGRITY_CSUM_CRC64:
|
||||
blk_set_ext_map_ref(&tuple->crc64_pi, virt, ref);
|
||||
break;
|
||||
case BLK_INTEGRITY_CSUM_CRC:
|
||||
case BLK_INTEGRITY_CSUM_IP:
|
||||
blk_set_t10_map_ref(&tuple->t10_pi, virt, ref);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void __blk_reftag_remap(struct bio *bio, struct blk_integrity *bi,
|
||||
unsigned *intervals, u64 *ref, bool prep)
|
||||
{
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
struct bvec_iter iter = bip->bip_iter;
|
||||
u64 virt = bip_get_seed(bip);
|
||||
union pi_tuple *ptuple;
|
||||
union pi_tuple tuple;
|
||||
|
||||
if (prep && bip->bip_flags & BIP_MAPPED_INTEGRITY) {
|
||||
*ref += bio->bi_iter.bi_size >> bi->interval_exp;
|
||||
return;
|
||||
}
|
||||
|
||||
while (iter.bi_size && *intervals) {
|
||||
ptuple = blk_tuple_remap_begin(&tuple, bi, bip, &iter);
|
||||
|
||||
if (prep)
|
||||
blk_reftag_remap_prepare(bi, ptuple, virt, *ref);
|
||||
else
|
||||
blk_reftag_remap_complete(bi, ptuple, virt, *ref);
|
||||
|
||||
blk_tuple_remap_end(&tuple, ptuple, bi, bip, &iter);
|
||||
(*intervals)--;
|
||||
(*ref)++;
|
||||
virt++;
|
||||
}
|
||||
|
||||
if (prep)
|
||||
bip->bip_flags |= BIP_MAPPED_INTEGRITY;
|
||||
}
|
||||
|
||||
static void blk_integrity_remap(struct request *rq, unsigned int nr_bytes,
|
||||
bool prep)
|
||||
{
|
||||
struct blk_integrity *bi = &rq->q->limits.integrity;
|
||||
u64 ref = blk_rq_pos(rq) >> (bi->interval_exp - SECTOR_SHIFT);
|
||||
unsigned intervals = nr_bytes >> bi->interval_exp;
|
||||
struct bio *bio;
|
||||
|
||||
if (!(bi->flags & BLK_INTEGRITY_REF_TAG))
|
||||
return;
|
||||
|
||||
if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64)
|
||||
ext_pi_type1_prepare(rq);
|
||||
else
|
||||
t10_pi_type1_prepare(rq);
|
||||
__rq_for_each_bio(bio, rq) {
|
||||
__blk_reftag_remap(bio, bi, &intervals, &ref, prep);
|
||||
if (!intervals)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void blk_integrity_prepare(struct request *rq)
|
||||
{
|
||||
blk_integrity_remap(rq, blk_rq_bytes(rq), true);
|
||||
}
|
||||
|
||||
void blk_integrity_complete(struct request *rq, unsigned int nr_bytes)
|
||||
{
|
||||
struct blk_integrity *bi = &rq->q->limits.integrity;
|
||||
|
||||
if (!(bi->flags & BLK_INTEGRITY_REF_TAG))
|
||||
return;
|
||||
|
||||
if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64)
|
||||
ext_pi_type1_complete(rq, nr_bytes);
|
||||
else
|
||||
t10_pi_type1_complete(rq, nr_bytes);
|
||||
blk_integrity_remap(rq, nr_bytes, false);
|
||||
}
|
||||
|
||||
@@ -141,12 +141,6 @@ config CRYPTO_ACOMP
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_ACOMP2
|
||||
|
||||
config CRYPTO_HKDF
|
||||
tristate
|
||||
select CRYPTO_SHA256 if CRYPTO_SELFTESTS
|
||||
select CRYPTO_SHA512 if CRYPTO_SELFTESTS
|
||||
select CRYPTO_HASH2
|
||||
|
||||
config CRYPTO_MANAGER
|
||||
tristate
|
||||
default CRYPTO_ALGAPI if CRYPTO_SELFTESTS
|
||||
|
||||
@@ -36,7 +36,6 @@ obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o
|
||||
obj-$(CONFIG_CRYPTO_AKCIPHER2) += akcipher.o
|
||||
obj-$(CONFIG_CRYPTO_SIG2) += sig.o
|
||||
obj-$(CONFIG_CRYPTO_KPP2) += kpp.o
|
||||
obj-$(CONFIG_CRYPTO_HKDF) += hkdf.o
|
||||
|
||||
dh_generic-y := dh.o
|
||||
dh_generic-y += dh_helper.o
|
||||
|
||||
573
crypto/hkdf.c
573
crypto/hkdf.c
@@ -1,573 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Implementation of HKDF ("HMAC-based Extract-and-Expand Key Derivation
|
||||
* Function"), aka RFC 5869. See also the original paper (Krawczyk 2010):
|
||||
* "Cryptographic Extraction and Key Derivation: The HKDF Scheme".
|
||||
*
|
||||
* Copyright 2019 Google LLC
|
||||
*/
|
||||
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/sha2.h>
|
||||
#include <crypto/hkdf.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/*
|
||||
* HKDF consists of two steps:
|
||||
*
|
||||
* 1. HKDF-Extract: extract a pseudorandom key from the input keying material
|
||||
* and optional salt.
|
||||
* 2. HKDF-Expand: expand the pseudorandom key into output keying material of
|
||||
* any length, parameterized by an application-specific info string.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* hkdf_extract - HKDF-Extract (RFC 5869 section 2.2)
|
||||
* @hmac_tfm: an HMAC transform using the hash function desired for HKDF. The
|
||||
* caller is responsible for setting the @prk afterwards.
|
||||
* @ikm: input keying material
|
||||
* @ikmlen: length of @ikm
|
||||
* @salt: input salt value
|
||||
* @saltlen: length of @salt
|
||||
* @prk: resulting pseudorandom key
|
||||
*
|
||||
* Extracts a pseudorandom key @prk from the input keying material
|
||||
* @ikm with length @ikmlen and salt @salt with length @saltlen.
|
||||
* The length of @prk is given by the digest size of @hmac_tfm.
|
||||
* For an 'unsalted' version of HKDF-Extract @salt must be set
|
||||
* to all zeroes and @saltlen must be set to the length of @prk.
|
||||
*
|
||||
* Returns 0 on success with the pseudorandom key stored in @prk,
|
||||
* or a negative errno value otherwise.
|
||||
*/
|
||||
int hkdf_extract(struct crypto_shash *hmac_tfm, const u8 *ikm,
|
||||
unsigned int ikmlen, const u8 *salt, unsigned int saltlen,
|
||||
u8 *prk)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = crypto_shash_setkey(hmac_tfm, salt, saltlen);
|
||||
if (!err)
|
||||
err = crypto_shash_tfm_digest(hmac_tfm, ikm, ikmlen, prk);
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hkdf_extract);
|
||||
|
||||
/**
|
||||
* hkdf_expand - HKDF-Expand (RFC 5869 section 2.3)
|
||||
* @hmac_tfm: hash context keyed with pseudorandom key
|
||||
* @info: application-specific information
|
||||
* @infolen: length of @info
|
||||
* @okm: output keying material
|
||||
* @okmlen: length of @okm
|
||||
*
|
||||
* This expands the pseudorandom key, which was already keyed into @hmac_tfm,
|
||||
* into @okmlen bytes of output keying material parameterized by the
|
||||
* application-specific @info of length @infolen bytes.
|
||||
* This is thread-safe and may be called by multiple threads in parallel.
|
||||
*
|
||||
* Returns 0 on success with output keying material stored in @okm,
|
||||
* or a negative errno value otherwise.
|
||||
*/
|
||||
int hkdf_expand(struct crypto_shash *hmac_tfm,
|
||||
const u8 *info, unsigned int infolen,
|
||||
u8 *okm, unsigned int okmlen)
|
||||
{
|
||||
SHASH_DESC_ON_STACK(desc, hmac_tfm);
|
||||
unsigned int i, hashlen = crypto_shash_digestsize(hmac_tfm);
|
||||
int err;
|
||||
const u8 *prev = NULL;
|
||||
u8 counter = 1;
|
||||
u8 tmp[HASH_MAX_DIGESTSIZE] = {};
|
||||
|
||||
if (WARN_ON(okmlen > 255 * hashlen))
|
||||
return -EINVAL;
|
||||
|
||||
desc->tfm = hmac_tfm;
|
||||
|
||||
for (i = 0; i < okmlen; i += hashlen) {
|
||||
err = crypto_shash_init(desc);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (prev) {
|
||||
err = crypto_shash_update(desc, prev, hashlen);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (infolen) {
|
||||
err = crypto_shash_update(desc, info, infolen);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
BUILD_BUG_ON(sizeof(counter) != 1);
|
||||
if (okmlen - i < hashlen) {
|
||||
err = crypto_shash_finup(desc, &counter, 1, tmp);
|
||||
if (err)
|
||||
goto out;
|
||||
memcpy(&okm[i], tmp, okmlen - i);
|
||||
memzero_explicit(tmp, sizeof(tmp));
|
||||
} else {
|
||||
err = crypto_shash_finup(desc, &counter, 1, &okm[i]);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
counter++;
|
||||
prev = &okm[i];
|
||||
}
|
||||
err = 0;
|
||||
out:
|
||||
if (unlikely(err))
|
||||
memzero_explicit(okm, okmlen); /* so caller doesn't need to */
|
||||
shash_desc_zero(desc);
|
||||
memzero_explicit(tmp, HASH_MAX_DIGESTSIZE);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hkdf_expand);
|
||||
|
||||
struct hkdf_testvec {
|
||||
const char *test;
|
||||
const u8 *ikm;
|
||||
const u8 *salt;
|
||||
const u8 *info;
|
||||
const u8 *prk;
|
||||
const u8 *okm;
|
||||
u16 ikm_size;
|
||||
u16 salt_size;
|
||||
u16 info_size;
|
||||
u16 prk_size;
|
||||
u16 okm_size;
|
||||
};
|
||||
|
||||
/*
|
||||
* HKDF test vectors from RFC5869
|
||||
*
|
||||
* Additional HKDF test vectors from
|
||||
* https://github.com/brycx/Test-Vector-Generation/blob/master/HKDF/hkdf-hmac-sha2-test-vectors.md
|
||||
*/
|
||||
static const struct hkdf_testvec hkdf_sha256_tv[] = {
|
||||
{
|
||||
.test = "basic hdkf test",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
|
||||
"\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 22,
|
||||
.salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c",
|
||||
.salt_size = 13,
|
||||
.info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
|
||||
.info_size = 10,
|
||||
.prk = "\x07\x77\x09\x36\x2c\x2e\x32\xdf\x0d\xdc\x3f\x0d\xc4\x7b\xba\x63"
|
||||
"\x90\xb6\xc7\x3b\xb5\x0f\x9c\x31\x22\xec\x84\x4a\xd7\xc2\xb3\xe5",
|
||||
.prk_size = 32,
|
||||
.okm = "\x3c\xb2\x5f\x25\xfa\xac\xd5\x7a\x90\x43\x4f\x64\xd0\x36\x2f\x2a"
|
||||
"\x2d\x2d\x0a\x90\xcf\x1a\x5a\x4c\x5d\xb0\x2d\x56\xec\xc4\xc5\xbf"
|
||||
"\x34\x00\x72\x08\xd5\xb8\x87\x18\x58\x65",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "hkdf test with long input",
|
||||
.ikm = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
|
||||
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
|
||||
"\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
|
||||
"\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
|
||||
"\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f",
|
||||
.ikm_size = 80,
|
||||
.salt = "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
|
||||
"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
|
||||
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
|
||||
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
|
||||
"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf",
|
||||
.salt_size = 80,
|
||||
.info = "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
|
||||
"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
|
||||
"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
|
||||
"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
|
||||
"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
|
||||
.info_size = 80,
|
||||
.prk = "\x06\xa6\xb8\x8c\x58\x53\x36\x1a\x06\x10\x4c\x9c\xeb\x35\xb4\x5c"
|
||||
"\xef\x76\x00\x14\x90\x46\x71\x01\x4a\x19\x3f\x40\xc1\x5f\xc2\x44",
|
||||
.prk_size = 32,
|
||||
.okm = "\xb1\x1e\x39\x8d\xc8\x03\x27\xa1\xc8\xe7\xf7\x8c\x59\x6a\x49\x34"
|
||||
"\x4f\x01\x2e\xda\x2d\x4e\xfa\xd8\xa0\x50\xcc\x4c\x19\xaf\xa9\x7c"
|
||||
"\x59\x04\x5a\x99\xca\xc7\x82\x72\x71\xcb\x41\xc6\x5e\x59\x0e\x09"
|
||||
"\xda\x32\x75\x60\x0c\x2f\x09\xb8\x36\x77\x93\xa9\xac\xa3\xdb\x71"
|
||||
"\xcc\x30\xc5\x81\x79\xec\x3e\x87\xc1\x4c\x01\xd5\xc1\xf3\x43\x4f"
|
||||
"\x1d\x87",
|
||||
.okm_size = 82,
|
||||
}, {
|
||||
.test = "hkdf test with zero salt and info",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
|
||||
"\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 22,
|
||||
.salt = NULL,
|
||||
.salt_size = 0,
|
||||
.info = NULL,
|
||||
.info_size = 0,
|
||||
.prk = "\x19\xef\x24\xa3\x2c\x71\x7b\x16\x7f\x33\xa9\x1d\x6f\x64\x8b\xdf"
|
||||
"\x96\x59\x67\x76\xaf\xdb\x63\x77\xac\x43\x4c\x1c\x29\x3c\xcb\x04",
|
||||
.prk_size = 32,
|
||||
.okm = "\x8d\xa4\xe7\x75\xa5\x63\xc1\x8f\x71\x5f\x80\x2a\x06\x3c\x5a\x31"
|
||||
"\xb8\xa1\x1f\x5c\x5e\xe1\x87\x9e\xc3\x45\x4e\x5f\x3c\x73\x8d\x2d"
|
||||
"\x9d\x20\x13\x95\xfa\xa4\xb6\x1a\x96\xc8",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "hkdf test with short input",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 11,
|
||||
.salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c",
|
||||
.salt_size = 13,
|
||||
.info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
|
||||
.info_size = 10,
|
||||
.prk = "\x82\x65\xf6\x9d\x7f\xf7\xe5\x01\x37\x93\x01\x5c\xa0\xef\x92\x0c"
|
||||
"\xb1\x68\x21\x99\xc8\xbc\x3a\x00\xda\x0c\xab\x47\xb7\xb0\x0f\xdf",
|
||||
.prk_size = 32,
|
||||
.okm = "\x58\xdc\xe1\x0d\x58\x01\xcd\xfd\xa8\x31\x72\x6b\xfe\xbc\xb7\x43"
|
||||
"\xd1\x4a\x7e\xe8\x3a\xa0\x57\xa9\x3d\x59\xb0\xa1\x31\x7f\xf0\x9d"
|
||||
"\x10\x5c\xce\xcf\x53\x56\x92\xb1\x4d\xd5",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "unsalted hkdf test with zero info",
|
||||
.ikm = "\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c"
|
||||
"\x0c\x0c\x0c\x0c\x0c\x0c",
|
||||
.ikm_size = 22,
|
||||
.salt = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
|
||||
.salt_size = 32,
|
||||
.info = NULL,
|
||||
.info_size = 0,
|
||||
.prk = "\xaa\x84\x1e\x1f\x35\x74\xf3\x2d\x13\xfb\xa8\x00\x5f\xcd\x9b\x8d"
|
||||
"\x77\x67\x82\xa5\xdf\xa1\x92\x38\x92\xfd\x8b\x63\x5d\x3a\x89\xdf",
|
||||
.prk_size = 32,
|
||||
.okm = "\x59\x68\x99\x17\x9a\xb1\xbc\x00\xa7\xc0\x37\x86\xff\x43\xee\x53"
|
||||
"\x50\x04\xbe\x2b\xb9\xbe\x68\xbc\x14\x06\x63\x6f\x54\xbd\x33\x8a"
|
||||
"\x66\xa2\x37\xba\x2a\xcb\xce\xe3\xc9\xa7",
|
||||
.okm_size = 42,
|
||||
}
|
||||
};
|
||||
|
||||
static const struct hkdf_testvec hkdf_sha384_tv[] = {
|
||||
{
|
||||
.test = "basic hkdf test",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
|
||||
"\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 22,
|
||||
.salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c",
|
||||
.salt_size = 13,
|
||||
.info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
|
||||
.info_size = 10,
|
||||
.prk = "\x70\x4b\x39\x99\x07\x79\xce\x1d\xc5\x48\x05\x2c\x7d\xc3\x9f\x30"
|
||||
"\x35\x70\xdd\x13\xfb\x39\xf7\xac\xc5\x64\x68\x0b\xef\x80\xe8\xde"
|
||||
"\xc7\x0e\xe9\xa7\xe1\xf3\xe2\x93\xef\x68\xec\xeb\x07\x2a\x5a\xde",
|
||||
.prk_size = 48,
|
||||
.okm = "\x9b\x50\x97\xa8\x60\x38\xb8\x05\x30\x90\x76\xa4\x4b\x3a\x9f\x38"
|
||||
"\x06\x3e\x25\xb5\x16\xdc\xbf\x36\x9f\x39\x4c\xfa\xb4\x36\x85\xf7"
|
||||
"\x48\xb6\x45\x77\x63\xe4\xf0\x20\x4f\xc5",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "hkdf test with long input",
|
||||
.ikm = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
|
||||
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
|
||||
"\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
|
||||
"\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
|
||||
"\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f",
|
||||
.ikm_size = 80,
|
||||
.salt = "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
|
||||
"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
|
||||
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
|
||||
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
|
||||
"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf",
|
||||
.salt_size = 80,
|
||||
.info = "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
|
||||
"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
|
||||
"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
|
||||
"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
|
||||
"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
|
||||
.info_size = 80,
|
||||
.prk = "\xb3\x19\xf6\x83\x1d\xff\x93\x14\xef\xb6\x43\xba\xa2\x92\x63\xb3"
|
||||
"\x0e\x4a\x8d\x77\x9f\xe3\x1e\x9c\x90\x1e\xfd\x7d\xe7\x37\xc8\x5b"
|
||||
"\x62\xe6\x76\xd4\xdc\x87\xb0\x89\x5c\x6a\x7d\xc9\x7b\x52\xce\xbb",
|
||||
.prk_size = 48,
|
||||
.okm = "\x48\x4c\xa0\x52\xb8\xcc\x72\x4f\xd1\xc4\xec\x64\xd5\x7b\x4e\x81"
|
||||
"\x8c\x7e\x25\xa8\xe0\xf4\x56\x9e\xd7\x2a\x6a\x05\xfe\x06\x49\xee"
|
||||
"\xbf\x69\xf8\xd5\xc8\x32\x85\x6b\xf4\xe4\xfb\xc1\x79\x67\xd5\x49"
|
||||
"\x75\x32\x4a\x94\x98\x7f\x7f\x41\x83\x58\x17\xd8\x99\x4f\xdb\xd6"
|
||||
"\xf4\xc0\x9c\x55\x00\xdc\xa2\x4a\x56\x22\x2f\xea\x53\xd8\x96\x7a"
|
||||
"\x8b\x2e",
|
||||
.okm_size = 82,
|
||||
}, {
|
||||
.test = "hkdf test with zero salt and info",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
|
||||
"\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 22,
|
||||
.salt = NULL,
|
||||
.salt_size = 0,
|
||||
.info = NULL,
|
||||
.info_size = 0,
|
||||
.prk = "\x10\xe4\x0c\xf0\x72\xa4\xc5\x62\x6e\x43\xdd\x22\xc1\xcf\x72\x7d"
|
||||
"\x4b\xb1\x40\x97\x5c\x9a\xd0\xcb\xc8\xe4\x5b\x40\x06\x8f\x8f\x0b"
|
||||
"\xa5\x7c\xdb\x59\x8a\xf9\xdf\xa6\x96\x3a\x96\x89\x9a\xf0\x47\xe5",
|
||||
.prk_size = 48,
|
||||
.okm = "\xc8\xc9\x6e\x71\x0f\x89\xb0\xd7\x99\x0b\xca\x68\xbc\xde\xc8\xcf"
|
||||
"\x85\x40\x62\xe5\x4c\x73\xa7\xab\xc7\x43\xfa\xde\x9b\x24\x2d\xaa"
|
||||
"\xcc\x1c\xea\x56\x70\x41\x5b\x52\x84\x9c",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "hkdf test with short input",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 11,
|
||||
.salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c",
|
||||
.salt_size = 13,
|
||||
.info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
|
||||
.info_size = 10,
|
||||
.prk = "\x6d\x31\x69\x98\x28\x79\x80\x88\xb3\x59\xda\xd5\x0b\x8f\x01\xb0"
|
||||
"\x15\xf1\x7a\xa3\xbd\x4e\x27\xa6\xe9\xf8\x73\xb7\x15\x85\xca\x6a"
|
||||
"\x00\xd1\xf0\x82\x12\x8a\xdb\x3c\xf0\x53\x0b\x57\xc0\xf9\xac\x72",
|
||||
.prk_size = 48,
|
||||
.okm = "\xfb\x7e\x67\x43\xeb\x42\xcd\xe9\x6f\x1b\x70\x77\x89\x52\xab\x75"
|
||||
"\x48\xca\xfe\x53\x24\x9f\x7f\xfe\x14\x97\xa1\x63\x5b\x20\x1f\xf1"
|
||||
"\x85\xb9\x3e\x95\x19\x92\xd8\x58\xf1\x1a",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "unsalted hkdf test with zero info",
|
||||
.ikm = "\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c"
|
||||
"\x0c\x0c\x0c\x0c\x0c\x0c",
|
||||
.ikm_size = 22,
|
||||
.salt = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
|
||||
.salt_size = 48,
|
||||
.info = NULL,
|
||||
.info_size = 0,
|
||||
.prk = "\x9d\x2d\xa5\x06\x6f\x05\xd1\x6c\x59\xfe\xdf\x6c\x5f\x32\xc7\x5e"
|
||||
"\xda\x9a\x47\xa7\x9c\x93\x6a\xa4\x4c\xb7\x63\xa8\xe2\x2f\xfb\xfc"
|
||||
"\xd8\xfe\x55\x43\x58\x53\x47\x21\x90\x39\xd1\x68\x28\x36\x33\xf5",
|
||||
.prk_size = 48,
|
||||
.okm = "\x6a\xd7\xc7\x26\xc8\x40\x09\x54\x6a\x76\xe0\x54\x5d\xf2\x66\x78"
|
||||
"\x7e\x2b\x2c\xd6\xca\x43\x73\xa1\xf3\x14\x50\xa7\xbd\xf9\x48\x2b"
|
||||
"\xfa\xb8\x11\xf5\x54\x20\x0e\xad\x8f\x53",
|
||||
.okm_size = 42,
|
||||
}
|
||||
};
|
||||
|
||||
static const struct hkdf_testvec hkdf_sha512_tv[] = {
|
||||
{
|
||||
.test = "basic hkdf test",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
|
||||
"\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 22,
|
||||
.salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c",
|
||||
.salt_size = 13,
|
||||
.info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
|
||||
.info_size = 10,
|
||||
.prk = "\x66\x57\x99\x82\x37\x37\xde\xd0\x4a\x88\xe4\x7e\x54\xa5\x89\x0b"
|
||||
"\xb2\xc3\xd2\x47\xc7\xa4\x25\x4a\x8e\x61\x35\x07\x23\x59\x0a\x26"
|
||||
"\xc3\x62\x38\x12\x7d\x86\x61\xb8\x8c\xf8\x0e\xf8\x02\xd5\x7e\x2f"
|
||||
"\x7c\xeb\xcf\x1e\x00\xe0\x83\x84\x8b\xe1\x99\x29\xc6\x1b\x42\x37",
|
||||
.prk_size = 64,
|
||||
.okm = "\x83\x23\x90\x08\x6c\xda\x71\xfb\x47\x62\x5b\xb5\xce\xb1\x68\xe4"
|
||||
"\xc8\xe2\x6a\x1a\x16\xed\x34\xd9\xfc\x7f\xe9\x2c\x14\x81\x57\x93"
|
||||
"\x38\xda\x36\x2c\xb8\xd9\xf9\x25\xd7\xcb",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "hkdf test with long input",
|
||||
.ikm = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
|
||||
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
|
||||
"\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
|
||||
"\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
|
||||
"\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f",
|
||||
.ikm_size = 80,
|
||||
.salt = "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
|
||||
"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
|
||||
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
|
||||
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
|
||||
"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf",
|
||||
.salt_size = 80,
|
||||
.info = "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
|
||||
"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
|
||||
"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
|
||||
"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
|
||||
"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
|
||||
.info_size = 80,
|
||||
.prk = "\x35\x67\x25\x42\x90\x7d\x4e\x14\x2c\x00\xe8\x44\x99\xe7\x4e\x1d"
|
||||
"\xe0\x8b\xe8\x65\x35\xf9\x24\xe0\x22\x80\x4a\xd7\x75\xdd\xe2\x7e"
|
||||
"\xc8\x6c\xd1\xe5\xb7\xd1\x78\xc7\x44\x89\xbd\xbe\xb3\x07\x12\xbe"
|
||||
"\xb8\x2d\x4f\x97\x41\x6c\x5a\x94\xea\x81\xeb\xdf\x3e\x62\x9e\x4a",
|
||||
.prk_size = 64,
|
||||
.okm = "\xce\x6c\x97\x19\x28\x05\xb3\x46\xe6\x16\x1e\x82\x1e\xd1\x65\x67"
|
||||
"\x3b\x84\xf4\x00\xa2\xb5\x14\xb2\xfe\x23\xd8\x4c\xd1\x89\xdd\xf1"
|
||||
"\xb6\x95\xb4\x8c\xbd\x1c\x83\x88\x44\x11\x37\xb3\xce\x28\xf1\x6a"
|
||||
"\xa6\x4b\xa3\x3b\xa4\x66\xb2\x4d\xf6\xcf\xcb\x02\x1e\xcf\xf2\x35"
|
||||
"\xf6\xa2\x05\x6c\xe3\xaf\x1d\xe4\x4d\x57\x20\x97\xa8\x50\x5d\x9e"
|
||||
"\x7a\x93",
|
||||
.okm_size = 82,
|
||||
}, {
|
||||
.test = "hkdf test with zero salt and info",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
|
||||
"\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 22,
|
||||
.salt = NULL,
|
||||
.salt_size = 0,
|
||||
.info = NULL,
|
||||
.info_size = 0,
|
||||
.prk = "\xfd\x20\x0c\x49\x87\xac\x49\x13\x13\xbd\x4a\x2a\x13\x28\x71\x21"
|
||||
"\x24\x72\x39\xe1\x1c\x9e\xf8\x28\x02\x04\x4b\x66\xef\x35\x7e\x5b"
|
||||
"\x19\x44\x98\xd0\x68\x26\x11\x38\x23\x48\x57\x2a\x7b\x16\x11\xde"
|
||||
"\x54\x76\x40\x94\x28\x63\x20\x57\x8a\x86\x3f\x36\x56\x2b\x0d\xf6",
|
||||
.prk_size = 64,
|
||||
.okm = "\xf5\xfa\x02\xb1\x82\x98\xa7\x2a\x8c\x23\x89\x8a\x87\x03\x47\x2c"
|
||||
"\x6e\xb1\x79\xdc\x20\x4c\x03\x42\x5c\x97\x0e\x3b\x16\x4b\xf9\x0f"
|
||||
"\xff\x22\xd0\x48\x36\xd0\xe2\x34\x3b\xac",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "hkdf test with short input",
|
||||
.ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b",
|
||||
.ikm_size = 11,
|
||||
.salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c",
|
||||
.salt_size = 13,
|
||||
.info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
|
||||
.info_size = 10,
|
||||
.prk = "\x67\x40\x9c\x9c\xac\x28\xb5\x2e\xe9\xfa\xd9\x1c\x2f\xda\x99\x9f"
|
||||
"\x7c\xa2\x2e\x34\x34\xf0\xae\x77\x28\x63\x83\x65\x68\xad\x6a\x7f"
|
||||
"\x10\xcf\x11\x3b\xfd\xdd\x56\x01\x29\xa5\x94\xa8\xf5\x23\x85\xc2"
|
||||
"\xd6\x61\xd7\x85\xd2\x9c\xe9\x3a\x11\x40\x0c\x92\x06\x83\x18\x1d",
|
||||
.prk_size = 64,
|
||||
.okm = "\x74\x13\xe8\x99\x7e\x02\x06\x10\xfb\xf6\x82\x3f\x2c\xe1\x4b\xff"
|
||||
"\x01\x87\x5d\xb1\xca\x55\xf6\x8c\xfc\xf3\x95\x4d\xc8\xaf\xf5\x35"
|
||||
"\x59\xbd\x5e\x30\x28\xb0\x80\xf7\xc0\x68",
|
||||
.okm_size = 42,
|
||||
}, {
|
||||
.test = "unsalted hkdf test with zero info",
|
||||
.ikm = "\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c"
|
||||
"\x0c\x0c\x0c\x0c\x0c\x0c",
|
||||
.ikm_size = 22,
|
||||
.salt = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
|
||||
.salt_size = 64,
|
||||
.info = NULL,
|
||||
.info_size = 0,
|
||||
.prk = "\x53\x46\xb3\x76\xbf\x3a\xa9\xf8\x4f\x8f\x6e\xd5\xb1\xc4\xf4\x89"
|
||||
"\x17\x2e\x24\x4d\xac\x30\x3d\x12\xf6\x8e\xcc\x76\x6e\xa6\x00\xaa"
|
||||
"\x88\x49\x5e\x7f\xb6\x05\x80\x31\x22\xfa\x13\x69\x24\xa8\x40\xb1"
|
||||
"\xf0\x71\x9d\x2d\x5f\x68\xe2\x9b\x24\x22\x99\xd7\x58\xed\x68\x0c",
|
||||
.prk_size = 64,
|
||||
.okm = "\x14\x07\xd4\x60\x13\xd9\x8b\xc6\xde\xce\xfc\xfe\xe5\x5f\x0f\x90"
|
||||
"\xb0\xc7\xf6\x3d\x68\xeb\x1a\x80\xea\xf0\x7e\x95\x3c\xfc\x0a\x3a"
|
||||
"\x52\x40\xa1\x55\xd6\xe4\xda\xa9\x65\xbb",
|
||||
.okm_size = 42,
|
||||
}
|
||||
};
|
||||
|
||||
static int hkdf_test(const char *shash, const struct hkdf_testvec *tv)
|
||||
{ struct crypto_shash *tfm = NULL;
|
||||
u8 *prk = NULL, *okm = NULL;
|
||||
unsigned int prk_size;
|
||||
const char *driver;
|
||||
int err;
|
||||
|
||||
tfm = crypto_alloc_shash(shash, 0, 0);
|
||||
if (IS_ERR(tfm)) {
|
||||
pr_err("%s(%s): failed to allocate transform: %ld\n",
|
||||
tv->test, shash, PTR_ERR(tfm));
|
||||
return PTR_ERR(tfm);
|
||||
}
|
||||
driver = crypto_shash_driver_name(tfm);
|
||||
|
||||
prk_size = crypto_shash_digestsize(tfm);
|
||||
prk = kzalloc(prk_size, GFP_KERNEL);
|
||||
if (!prk) {
|
||||
err = -ENOMEM;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
if (tv->prk_size != prk_size) {
|
||||
pr_err("%s(%s): prk size mismatch (vec %u, digest %u\n",
|
||||
tv->test, driver, tv->prk_size, prk_size);
|
||||
err = -EINVAL;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
err = hkdf_extract(tfm, tv->ikm, tv->ikm_size,
|
||||
tv->salt, tv->salt_size, prk);
|
||||
if (err) {
|
||||
pr_err("%s(%s): hkdf_extract failed with %d\n",
|
||||
tv->test, driver, err);
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
if (memcmp(prk, tv->prk, tv->prk_size)) {
|
||||
pr_err("%s(%s): hkdf_extract prk mismatch\n",
|
||||
tv->test, driver);
|
||||
print_hex_dump(KERN_ERR, "prk: ", DUMP_PREFIX_NONE,
|
||||
16, 1, prk, tv->prk_size, false);
|
||||
err = -EINVAL;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
okm = kzalloc(tv->okm_size, GFP_KERNEL);
|
||||
if (!okm) {
|
||||
err = -ENOMEM;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
err = crypto_shash_setkey(tfm, tv->prk, tv->prk_size);
|
||||
if (err) {
|
||||
pr_err("%s(%s): failed to set prk, error %d\n",
|
||||
tv->test, driver, err);
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
err = hkdf_expand(tfm, tv->info, tv->info_size,
|
||||
okm, tv->okm_size);
|
||||
if (err) {
|
||||
pr_err("%s(%s): hkdf_expand() failed with %d\n",
|
||||
tv->test, driver, err);
|
||||
} else if (memcmp(okm, tv->okm, tv->okm_size)) {
|
||||
pr_err("%s(%s): hkdf_expand() okm mismatch\n",
|
||||
tv->test, driver);
|
||||
print_hex_dump(KERN_ERR, "okm: ", DUMP_PREFIX_NONE,
|
||||
16, 1, okm, tv->okm_size, false);
|
||||
err = -EINVAL;
|
||||
}
|
||||
out_free:
|
||||
kfree(okm);
|
||||
kfree(prk);
|
||||
crypto_free_shash(tfm);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int __init crypto_hkdf_module_init(void)
|
||||
{
|
||||
int ret = 0, i;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_CRYPTO_SELFTESTS))
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(hkdf_sha256_tv); i++) {
|
||||
ret = hkdf_test("hmac(sha256)", &hkdf_sha256_tv[i]);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
for (i = 0; i < ARRAY_SIZE(hkdf_sha384_tv); i++) {
|
||||
ret = hkdf_test("hmac(sha384)", &hkdf_sha384_tv[i]);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
for (i = 0; i < ARRAY_SIZE(hkdf_sha512_tv); i++) {
|
||||
ret = hkdf_test("hmac(sha512)", &hkdf_sha512_tv[i]);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit crypto_hkdf_module_exit(void) {}
|
||||
|
||||
late_initcall(crypto_hkdf_module_init);
|
||||
module_exit(crypto_hkdf_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("HMAC-based Key Derivation Function (HKDF)");
|
||||
@@ -3,7 +3,6 @@ drbd-y := drbd_buildtag.o drbd_bitmap.o drbd_proc.o
|
||||
drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
|
||||
drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
|
||||
drbd-y += drbd_interval.o drbd_state.o
|
||||
drbd-y += drbd_nla.o
|
||||
drbd-$(CONFIG_DEBUG_FS) += drbd_debugfs.o
|
||||
|
||||
obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o
|
||||
|
||||
@@ -874,7 +874,7 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device)
|
||||
if (uuid && uuid != UUID_JUST_CREATED)
|
||||
uuid = uuid + UUID_NEW_BM_OFFSET;
|
||||
else
|
||||
get_random_bytes(&uuid, sizeof(u64));
|
||||
uuid = get_random_u64();
|
||||
drbd_uuid_set(device, UI_BITMAP, uuid);
|
||||
drbd_print_uuids(device, "updated sync UUID");
|
||||
drbd_md_sync(device);
|
||||
@@ -3337,7 +3337,7 @@ void drbd_uuid_new_current(struct drbd_device *device) __must_hold(local)
|
||||
u64 val;
|
||||
unsigned long long bm_uuid;
|
||||
|
||||
get_random_bytes(&val, sizeof(u64));
|
||||
val = get_random_u64();
|
||||
|
||||
spin_lock_irq(&device->ldev->md.uuid_lock);
|
||||
bm_uuid = device->ldev->md.uuid[UI_BITMAP];
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,56 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
#include <linux/kernel.h>
|
||||
#include <net/netlink.h>
|
||||
#include <linux/drbd_genl_api.h>
|
||||
#include "drbd_nla.h"
|
||||
|
||||
static int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
|
||||
{
|
||||
struct nlattr *head = nla_data(nla);
|
||||
int len = nla_len(nla);
|
||||
int rem;
|
||||
|
||||
/*
|
||||
* validate_nla (called from nla_parse_nested) ignores attributes
|
||||
* beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag.
|
||||
* In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY
|
||||
* flag set also, check and remove that flag before calling
|
||||
* nla_parse_nested.
|
||||
*/
|
||||
|
||||
nla_for_each_attr(nla, head, len, rem) {
|
||||
if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
|
||||
nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
|
||||
if (nla_type(nla) > maxtype)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
|
||||
const struct nla_policy *policy)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = drbd_nla_check_mandatory(maxtype, nla);
|
||||
if (!err)
|
||||
err = nla_parse_nested_deprecated(tb, maxtype, nla, policy,
|
||||
NULL);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype)
|
||||
{
|
||||
int err;
|
||||
/*
|
||||
* If any nested attribute has the DRBD_GENLA_F_MANDATORY flag set and
|
||||
* we don't know about that attribute, reject all the nested
|
||||
* attributes.
|
||||
*/
|
||||
err = drbd_nla_check_mandatory(maxtype, nla);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
return nla_find_nested(nla, attrtype);
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
#ifndef __DRBD_NLA_H
|
||||
#define __DRBD_NLA_H
|
||||
|
||||
extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
|
||||
const struct nla_policy *policy);
|
||||
extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype);
|
||||
|
||||
#endif /* __DRBD_NLA_H */
|
||||
@@ -46,6 +46,8 @@
|
||||
#include <linux/kref.h>
|
||||
#include <linux/kfifo.h>
|
||||
#include <linux/blk-integrity.h>
|
||||
#include <linux/maple_tree.h>
|
||||
#include <linux/xarray.h>
|
||||
#include <uapi/linux/fs.h>
|
||||
#include <uapi/linux/ublk_cmd.h>
|
||||
|
||||
@@ -58,6 +60,11 @@
|
||||
#define UBLK_CMD_UPDATE_SIZE _IOC_NR(UBLK_U_CMD_UPDATE_SIZE)
|
||||
#define UBLK_CMD_QUIESCE_DEV _IOC_NR(UBLK_U_CMD_QUIESCE_DEV)
|
||||
#define UBLK_CMD_TRY_STOP_DEV _IOC_NR(UBLK_U_CMD_TRY_STOP_DEV)
|
||||
#define UBLK_CMD_REG_BUF _IOC_NR(UBLK_U_CMD_REG_BUF)
|
||||
#define UBLK_CMD_UNREG_BUF _IOC_NR(UBLK_U_CMD_UNREG_BUF)
|
||||
|
||||
/* Default max shmem buffer size: 4GB (may be increased in future) */
|
||||
#define UBLK_SHMEM_BUF_SIZE_MAX (1ULL << 32)
|
||||
|
||||
#define UBLK_IO_REGISTER_IO_BUF _IOC_NR(UBLK_U_IO_REGISTER_IO_BUF)
|
||||
#define UBLK_IO_UNREGISTER_IO_BUF _IOC_NR(UBLK_U_IO_UNREGISTER_IO_BUF)
|
||||
@@ -81,7 +88,8 @@
|
||||
| (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) ? UBLK_F_INTEGRITY : 0) \
|
||||
| UBLK_F_SAFE_STOP_DEV \
|
||||
| UBLK_F_BATCH_IO \
|
||||
| UBLK_F_NO_AUTO_PART_SCAN)
|
||||
| UBLK_F_NO_AUTO_PART_SCAN \
|
||||
| UBLK_F_SHMEM_ZC)
|
||||
|
||||
#define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \
|
||||
| UBLK_F_USER_RECOVERY_REISSUE \
|
||||
@@ -289,6 +297,13 @@ struct ublk_queue {
|
||||
struct ublk_io ios[] __counted_by(q_depth);
|
||||
};
|
||||
|
||||
/* Maple tree value: maps a PFN range to buffer location */
|
||||
struct ublk_buf_range {
|
||||
unsigned short buf_index;
|
||||
unsigned short flags;
|
||||
unsigned int base_offset; /* byte offset within buffer */
|
||||
};
|
||||
|
||||
struct ublk_device {
|
||||
struct gendisk *ub_disk;
|
||||
|
||||
@@ -323,6 +338,10 @@ struct ublk_device {
|
||||
|
||||
bool block_open; /* protected by open_mutex */
|
||||
|
||||
/* shared memory zero copy */
|
||||
struct maple_tree buf_tree;
|
||||
struct ida buf_ida;
|
||||
|
||||
struct ublk_queue *queues[];
|
||||
};
|
||||
|
||||
@@ -334,6 +353,9 @@ struct ublk_params_header {
|
||||
|
||||
static void ublk_io_release(void *priv);
|
||||
static void ublk_stop_dev_unlocked(struct ublk_device *ub);
|
||||
static bool ublk_try_buf_match(struct ublk_device *ub, struct request *rq,
|
||||
u32 *buf_idx, u32 *buf_off);
|
||||
static void ublk_buf_cleanup(struct ublk_device *ub);
|
||||
static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq);
|
||||
static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
|
||||
u16 q_id, u16 tag, struct ublk_io *io);
|
||||
@@ -398,6 +420,22 @@ static inline bool ublk_dev_support_zero_copy(const struct ublk_device *ub)
|
||||
return ub->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY;
|
||||
}
|
||||
|
||||
static inline bool ublk_support_shmem_zc(const struct ublk_queue *ubq)
|
||||
{
|
||||
return ubq->flags & UBLK_F_SHMEM_ZC;
|
||||
}
|
||||
|
||||
static inline bool ublk_iod_is_shmem_zc(const struct ublk_queue *ubq,
|
||||
unsigned int tag)
|
||||
{
|
||||
return ublk_get_iod(ubq, tag)->op_flags & UBLK_IO_F_SHMEM_ZC;
|
||||
}
|
||||
|
||||
static inline bool ublk_dev_support_shmem_zc(const struct ublk_device *ub)
|
||||
{
|
||||
return ub->dev_info.flags & UBLK_F_SHMEM_ZC;
|
||||
}
|
||||
|
||||
static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq)
|
||||
{
|
||||
return ubq->flags & UBLK_F_AUTO_BUF_REG;
|
||||
@@ -808,7 +846,7 @@ static void ublk_dev_param_basic_apply(struct ublk_device *ub)
|
||||
|
||||
static int ublk_integrity_flags(u32 flags)
|
||||
{
|
||||
int ret_flags = 0;
|
||||
int ret_flags = BLK_SPLIT_INTERVAL_CAPABLE;
|
||||
|
||||
if (flags & LBMD_PI_CAP_INTEGRITY) {
|
||||
flags &= ~LBMD_PI_CAP_INTEGRITY;
|
||||
@@ -1460,6 +1498,19 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req)
|
||||
iod->op_flags = ublk_op | ublk_req_build_flags(req);
|
||||
iod->nr_sectors = blk_rq_sectors(req);
|
||||
iod->start_sector = blk_rq_pos(req);
|
||||
|
||||
/* Try shmem zero-copy match before setting addr */
|
||||
if (ublk_support_shmem_zc(ubq) && ublk_rq_has_data(req)) {
|
||||
u32 buf_idx, buf_off;
|
||||
|
||||
if (ublk_try_buf_match(ubq->dev, req,
|
||||
&buf_idx, &buf_off)) {
|
||||
iod->op_flags |= UBLK_IO_F_SHMEM_ZC;
|
||||
iod->addr = ublk_shmem_zc_addr(buf_idx, buf_off);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
}
|
||||
|
||||
iod->addr = io->buf.addr;
|
||||
|
||||
return BLK_STS_OK;
|
||||
@@ -1505,6 +1556,10 @@ static inline void __ublk_complete_rq(struct request *req, struct ublk_io *io,
|
||||
req_op(req) != REQ_OP_DRV_IN)
|
||||
goto exit;
|
||||
|
||||
/* shmem zero copy: no data to unmap, pages already shared */
|
||||
if (ublk_iod_is_shmem_zc(req->mq_hctx->driver_data, req->tag))
|
||||
goto exit;
|
||||
|
||||
/* for READ request, writing data in iod->addr to rq buffers */
|
||||
unmapped_bytes = ublk_unmap_io(need_map, req, io);
|
||||
|
||||
@@ -1663,7 +1718,13 @@ static void ublk_auto_buf_dispatch(const struct ublk_queue *ubq,
|
||||
static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req,
|
||||
struct ublk_io *io)
|
||||
{
|
||||
unsigned mapped_bytes = ublk_map_io(ubq, req, io);
|
||||
unsigned mapped_bytes;
|
||||
|
||||
/* shmem zero copy: skip data copy, pages already shared */
|
||||
if (ublk_iod_is_shmem_zc(ubq, req->tag))
|
||||
return true;
|
||||
|
||||
mapped_bytes = ublk_map_io(ubq, req, io);
|
||||
|
||||
/* partially mapped, update io descriptor */
|
||||
if (unlikely(mapped_bytes != blk_rq_bytes(req))) {
|
||||
@@ -1789,7 +1850,7 @@ static bool ublk_batch_prep_dispatch(struct ublk_queue *ubq,
|
||||
* Filter out UBLK_BATCH_IO_UNUSED_TAG entries from tag_buf.
|
||||
* Returns the new length after filtering.
|
||||
*/
|
||||
static unsigned int ublk_filter_unused_tags(unsigned short *tag_buf,
|
||||
static noinline unsigned int ublk_filter_unused_tags(unsigned short *tag_buf,
|
||||
unsigned int len)
|
||||
{
|
||||
unsigned int i, j;
|
||||
@@ -1805,6 +1866,41 @@ static unsigned int ublk_filter_unused_tags(unsigned short *tag_buf,
|
||||
return j;
|
||||
}
|
||||
|
||||
static noinline void ublk_batch_dispatch_fail(struct ublk_queue *ubq,
|
||||
const struct ublk_batch_io_data *data,
|
||||
unsigned short *tag_buf, size_t len, int ret)
|
||||
{
|
||||
int i, res;
|
||||
|
||||
/*
|
||||
* Undo prep state for all IOs since userspace never received them.
|
||||
* This restores IOs to pre-prepared state so they can be cleanly
|
||||
* re-prepared when tags are pulled from FIFO again.
|
||||
*/
|
||||
for (i = 0; i < len; i++) {
|
||||
struct ublk_io *io = &ubq->ios[tag_buf[i]];
|
||||
int index = -1;
|
||||
|
||||
ublk_io_lock(io);
|
||||
if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG)
|
||||
index = io->buf.auto_reg.index;
|
||||
io->flags &= ~(UBLK_IO_FLAG_OWNED_BY_SRV | UBLK_IO_FLAG_AUTO_BUF_REG);
|
||||
io->flags |= UBLK_IO_FLAG_ACTIVE;
|
||||
ublk_io_unlock(io);
|
||||
|
||||
if (index != -1)
|
||||
io_buffer_unregister_bvec(data->cmd, index,
|
||||
data->issue_flags);
|
||||
}
|
||||
|
||||
res = kfifo_in_spinlocked_noirqsave(&ubq->evts_fifo,
|
||||
tag_buf, len, &ubq->evts_lock);
|
||||
|
||||
pr_warn_ratelimited("%s: copy tags or post CQE failure, move back "
|
||||
"tags(%d %zu) ret %d\n", __func__, res, len,
|
||||
ret);
|
||||
}
|
||||
|
||||
#define MAX_NR_TAG 128
|
||||
static int __ublk_batch_dispatch(struct ublk_queue *ubq,
|
||||
const struct ublk_batch_io_data *data,
|
||||
@@ -1848,37 +1944,8 @@ static int __ublk_batch_dispatch(struct ublk_queue *ubq,
|
||||
|
||||
sel.val = ublk_batch_copy_io_tags(fcmd, sel.addr, tag_buf, len * tag_sz);
|
||||
ret = ublk_batch_fetch_post_cqe(fcmd, &sel, data->issue_flags);
|
||||
if (unlikely(ret < 0)) {
|
||||
int i, res;
|
||||
|
||||
/*
|
||||
* Undo prep state for all IOs since userspace never received them.
|
||||
* This restores IOs to pre-prepared state so they can be cleanly
|
||||
* re-prepared when tags are pulled from FIFO again.
|
||||
*/
|
||||
for (i = 0; i < len; i++) {
|
||||
struct ublk_io *io = &ubq->ios[tag_buf[i]];
|
||||
int index = -1;
|
||||
|
||||
ublk_io_lock(io);
|
||||
if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG)
|
||||
index = io->buf.auto_reg.index;
|
||||
io->flags &= ~(UBLK_IO_FLAG_OWNED_BY_SRV | UBLK_IO_FLAG_AUTO_BUF_REG);
|
||||
io->flags |= UBLK_IO_FLAG_ACTIVE;
|
||||
ublk_io_unlock(io);
|
||||
|
||||
if (index != -1)
|
||||
io_buffer_unregister_bvec(data->cmd, index,
|
||||
data->issue_flags);
|
||||
}
|
||||
|
||||
res = kfifo_in_spinlocked_noirqsave(&ubq->evts_fifo,
|
||||
tag_buf, len, &ubq->evts_lock);
|
||||
|
||||
pr_warn_ratelimited("%s: copy tags or post CQE failure, move back "
|
||||
"tags(%d %zu) ret %d\n", __func__, res, len,
|
||||
ret);
|
||||
}
|
||||
if (unlikely(ret < 0))
|
||||
ublk_batch_dispatch_fail(ubq, data, tag_buf, len, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2910,22 +2977,26 @@ static void ublk_stop_dev(struct ublk_device *ub)
|
||||
ublk_cancel_dev(ub);
|
||||
}
|
||||
|
||||
static void ublk_reset_io_flags(struct ublk_queue *ubq, struct ublk_io *io)
|
||||
{
|
||||
/* UBLK_IO_FLAG_CANCELED can be cleared now */
|
||||
spin_lock(&ubq->cancel_lock);
|
||||
io->flags &= ~UBLK_IO_FLAG_CANCELED;
|
||||
spin_unlock(&ubq->cancel_lock);
|
||||
}
|
||||
|
||||
/* reset per-queue io flags */
|
||||
static void ublk_queue_reset_io_flags(struct ublk_queue *ubq)
|
||||
{
|
||||
int j;
|
||||
|
||||
/* UBLK_IO_FLAG_CANCELED can be cleared now */
|
||||
spin_lock(&ubq->cancel_lock);
|
||||
for (j = 0; j < ubq->q_depth; j++)
|
||||
ubq->ios[j].flags &= ~UBLK_IO_FLAG_CANCELED;
|
||||
ubq->canceling = false;
|
||||
spin_unlock(&ubq->cancel_lock);
|
||||
ubq->fail_io = false;
|
||||
}
|
||||
|
||||
/* device can only be started after all IOs are ready */
|
||||
static void ublk_mark_io_ready(struct ublk_device *ub, u16 q_id)
|
||||
static void ublk_mark_io_ready(struct ublk_device *ub, u16 q_id,
|
||||
struct ublk_io *io)
|
||||
__must_hold(&ub->mutex)
|
||||
{
|
||||
struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
|
||||
@@ -2934,6 +3005,7 @@ static void ublk_mark_io_ready(struct ublk_device *ub, u16 q_id)
|
||||
ub->unprivileged_daemons = true;
|
||||
|
||||
ubq->nr_io_ready++;
|
||||
ublk_reset_io_flags(ubq, io);
|
||||
|
||||
/* Check if this specific queue is now fully ready */
|
||||
if (ublk_queue_ready(ubq)) {
|
||||
@@ -3196,7 +3268,7 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_device *ub,
|
||||
if (!ret)
|
||||
ret = ublk_config_io_buf(ub, io, cmd, buf_addr, NULL);
|
||||
if (!ret)
|
||||
ublk_mark_io_ready(ub, q_id);
|
||||
ublk_mark_io_ready(ub, q_id, io);
|
||||
mutex_unlock(&ub->mutex);
|
||||
return ret;
|
||||
}
|
||||
@@ -3604,7 +3676,7 @@ static int ublk_batch_prep_io(struct ublk_queue *ubq,
|
||||
ublk_io_unlock(io);
|
||||
|
||||
if (!ret)
|
||||
ublk_mark_io_ready(data->ub, ubq->q_id);
|
||||
ublk_mark_io_ready(data->ub, ubq->q_id, io);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -4200,6 +4272,7 @@ static void ublk_cdev_rel(struct device *dev)
|
||||
{
|
||||
struct ublk_device *ub = container_of(dev, struct ublk_device, cdev_dev);
|
||||
|
||||
ublk_buf_cleanup(ub);
|
||||
blk_mq_free_tag_set(&ub->tag_set);
|
||||
ublk_deinit_queues(ub);
|
||||
ublk_free_dev_number(ub);
|
||||
@@ -4621,6 +4694,8 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
|
||||
mutex_init(&ub->mutex);
|
||||
spin_lock_init(&ub->lock);
|
||||
mutex_init(&ub->cancel_mutex);
|
||||
mt_init(&ub->buf_tree);
|
||||
ida_init(&ub->buf_ida);
|
||||
INIT_WORK(&ub->partition_scan_work, ublk_partition_scan_work);
|
||||
|
||||
ret = ublk_alloc_dev_number(ub, header->dev_id);
|
||||
@@ -5171,6 +5246,314 @@ exit:
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock for maple tree modification: acquire ub->mutex, then freeze queue
|
||||
* if device is started. If device is not yet started, only mutex is
|
||||
* needed since no I/O path can access the tree.
|
||||
*
|
||||
* This ordering (mutex -> freeze) is safe because ublk_stop_dev_unlocked()
|
||||
* already holds ub->mutex when calling del_gendisk() which freezes the queue.
|
||||
*/
|
||||
static unsigned int ublk_lock_buf_tree(struct ublk_device *ub)
|
||||
{
|
||||
unsigned int memflags = 0;
|
||||
|
||||
mutex_lock(&ub->mutex);
|
||||
if (ub->ub_disk)
|
||||
memflags = blk_mq_freeze_queue(ub->ub_disk->queue);
|
||||
|
||||
return memflags;
|
||||
}
|
||||
|
||||
static void ublk_unlock_buf_tree(struct ublk_device *ub, unsigned int memflags)
|
||||
{
|
||||
if (ub->ub_disk)
|
||||
blk_mq_unfreeze_queue(ub->ub_disk->queue, memflags);
|
||||
mutex_unlock(&ub->mutex);
|
||||
}
|
||||
|
||||
/* Erase coalesced PFN ranges from the maple tree matching buf_index */
|
||||
static void ublk_buf_erase_ranges(struct ublk_device *ub, int buf_index)
|
||||
{
|
||||
MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX);
|
||||
struct ublk_buf_range *range;
|
||||
|
||||
mas_lock(&mas);
|
||||
mas_for_each(&mas, range, ULONG_MAX) {
|
||||
if (range->buf_index == buf_index) {
|
||||
mas_erase(&mas);
|
||||
kfree(range);
|
||||
}
|
||||
}
|
||||
mas_unlock(&mas);
|
||||
}
|
||||
|
||||
static int __ublk_ctrl_reg_buf(struct ublk_device *ub,
|
||||
struct page **pages, unsigned long nr_pages,
|
||||
int index, unsigned short flags)
|
||||
{
|
||||
unsigned long i;
|
||||
int ret;
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
unsigned long pfn = page_to_pfn(pages[i]);
|
||||
unsigned long start = i;
|
||||
struct ublk_buf_range *range;
|
||||
|
||||
/* Find run of consecutive PFNs */
|
||||
while (i + 1 < nr_pages &&
|
||||
page_to_pfn(pages[i + 1]) == pfn + (i - start) + 1)
|
||||
i++;
|
||||
|
||||
range = kzalloc(sizeof(*range), GFP_KERNEL);
|
||||
if (!range) {
|
||||
ret = -ENOMEM;
|
||||
goto unwind;
|
||||
}
|
||||
range->buf_index = index;
|
||||
range->flags = flags;
|
||||
range->base_offset = start << PAGE_SHIFT;
|
||||
|
||||
ret = mtree_insert_range(&ub->buf_tree, pfn,
|
||||
pfn + (i - start),
|
||||
range, GFP_KERNEL);
|
||||
if (ret) {
|
||||
kfree(range);
|
||||
goto unwind;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
unwind:
|
||||
ublk_buf_erase_ranges(ub, index);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Register a shared memory buffer for zero-copy I/O.
|
||||
* Pins pages, builds PFN maple tree, freezes/unfreezes the queue
|
||||
* internally. Returns buffer index (>= 0) on success.
|
||||
*/
|
||||
static int ublk_ctrl_reg_buf(struct ublk_device *ub,
|
||||
struct ublksrv_ctrl_cmd *header)
|
||||
{
|
||||
void __user *argp = (void __user *)(unsigned long)header->addr;
|
||||
struct ublk_shmem_buf_reg buf_reg;
|
||||
unsigned long nr_pages;
|
||||
struct page **pages = NULL;
|
||||
unsigned int gup_flags;
|
||||
unsigned int memflags;
|
||||
long pinned;
|
||||
int index;
|
||||
int ret;
|
||||
|
||||
if (!ublk_dev_support_shmem_zc(ub))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
memset(&buf_reg, 0, sizeof(buf_reg));
|
||||
if (copy_from_user(&buf_reg, argp,
|
||||
min_t(size_t, header->len, sizeof(buf_reg))))
|
||||
return -EFAULT;
|
||||
|
||||
if (buf_reg.flags & ~UBLK_SHMEM_BUF_READ_ONLY)
|
||||
return -EINVAL;
|
||||
|
||||
if (buf_reg.reserved)
|
||||
return -EINVAL;
|
||||
|
||||
if (!buf_reg.len || buf_reg.len > UBLK_SHMEM_BUF_SIZE_MAX ||
|
||||
!PAGE_ALIGNED(buf_reg.len) || !PAGE_ALIGNED(buf_reg.addr))
|
||||
return -EINVAL;
|
||||
|
||||
nr_pages = buf_reg.len >> PAGE_SHIFT;
|
||||
|
||||
/* Pin pages before any locks (may sleep) */
|
||||
pages = kvmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL);
|
||||
if (!pages)
|
||||
return -ENOMEM;
|
||||
|
||||
gup_flags = FOLL_LONGTERM;
|
||||
if (!(buf_reg.flags & UBLK_SHMEM_BUF_READ_ONLY))
|
||||
gup_flags |= FOLL_WRITE;
|
||||
|
||||
pinned = pin_user_pages_fast(buf_reg.addr, nr_pages, gup_flags, pages);
|
||||
if (pinned < 0) {
|
||||
ret = pinned;
|
||||
goto err_free_pages;
|
||||
}
|
||||
if (pinned != nr_pages) {
|
||||
ret = -EFAULT;
|
||||
goto err_unpin;
|
||||
}
|
||||
|
||||
memflags = ublk_lock_buf_tree(ub);
|
||||
|
||||
index = ida_alloc_max(&ub->buf_ida, USHRT_MAX, GFP_KERNEL);
|
||||
if (index < 0) {
|
||||
ret = index;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
ret = __ublk_ctrl_reg_buf(ub, pages, nr_pages, index, buf_reg.flags);
|
||||
if (ret) {
|
||||
ida_free(&ub->buf_ida, index);
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
ublk_unlock_buf_tree(ub, memflags);
|
||||
kvfree(pages);
|
||||
return index;
|
||||
|
||||
err_unlock:
|
||||
ublk_unlock_buf_tree(ub, memflags);
|
||||
err_unpin:
|
||||
unpin_user_pages(pages, pinned);
|
||||
err_free_pages:
|
||||
kvfree(pages);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __ublk_ctrl_unreg_buf(struct ublk_device *ub, int buf_index)
|
||||
{
|
||||
MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX);
|
||||
struct ublk_buf_range *range;
|
||||
struct page *pages[32];
|
||||
int ret = -ENOENT;
|
||||
|
||||
mas_lock(&mas);
|
||||
mas_for_each(&mas, range, ULONG_MAX) {
|
||||
unsigned long base, nr, off;
|
||||
|
||||
if (range->buf_index != buf_index)
|
||||
continue;
|
||||
|
||||
ret = 0;
|
||||
base = mas.index;
|
||||
nr = mas.last - base + 1;
|
||||
mas_erase(&mas);
|
||||
|
||||
for (off = 0; off < nr; ) {
|
||||
unsigned int batch = min_t(unsigned long,
|
||||
nr - off, 32);
|
||||
unsigned int j;
|
||||
|
||||
for (j = 0; j < batch; j++)
|
||||
pages[j] = pfn_to_page(base + off + j);
|
||||
unpin_user_pages(pages, batch);
|
||||
off += batch;
|
||||
}
|
||||
kfree(range);
|
||||
}
|
||||
mas_unlock(&mas);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ublk_ctrl_unreg_buf(struct ublk_device *ub,
|
||||
struct ublksrv_ctrl_cmd *header)
|
||||
{
|
||||
int index = (int)header->data[0];
|
||||
unsigned int memflags;
|
||||
int ret;
|
||||
|
||||
if (!ublk_dev_support_shmem_zc(ub))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (index < 0 || index > USHRT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
memflags = ublk_lock_buf_tree(ub);
|
||||
|
||||
ret = __ublk_ctrl_unreg_buf(ub, index);
|
||||
if (!ret)
|
||||
ida_free(&ub->buf_ida, index);
|
||||
|
||||
ublk_unlock_buf_tree(ub, memflags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ublk_buf_cleanup(struct ublk_device *ub)
|
||||
{
|
||||
MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX);
|
||||
struct ublk_buf_range *range;
|
||||
struct page *pages[32];
|
||||
|
||||
mas_for_each(&mas, range, ULONG_MAX) {
|
||||
unsigned long base = mas.index;
|
||||
unsigned long nr = mas.last - base + 1;
|
||||
unsigned long off;
|
||||
|
||||
for (off = 0; off < nr; ) {
|
||||
unsigned int batch = min_t(unsigned long,
|
||||
nr - off, 32);
|
||||
unsigned int j;
|
||||
|
||||
for (j = 0; j < batch; j++)
|
||||
pages[j] = pfn_to_page(base + off + j);
|
||||
unpin_user_pages(pages, batch);
|
||||
off += batch;
|
||||
}
|
||||
kfree(range);
|
||||
}
|
||||
mtree_destroy(&ub->buf_tree);
|
||||
ida_destroy(&ub->buf_ida);
|
||||
}
|
||||
|
||||
/* Check if request pages match a registered shared memory buffer */
|
||||
static bool ublk_try_buf_match(struct ublk_device *ub,
|
||||
struct request *rq,
|
||||
u32 *buf_idx, u32 *buf_off)
|
||||
{
|
||||
struct req_iterator iter;
|
||||
struct bio_vec bv;
|
||||
int index = -1;
|
||||
unsigned long expected_offset = 0;
|
||||
bool first = true;
|
||||
|
||||
rq_for_each_bvec(bv, rq, iter) {
|
||||
unsigned long pfn = page_to_pfn(bv.bv_page);
|
||||
unsigned long end_pfn = pfn +
|
||||
((bv.bv_offset + bv.bv_len - 1) >> PAGE_SHIFT);
|
||||
struct ublk_buf_range *range;
|
||||
unsigned long off;
|
||||
MA_STATE(mas, &ub->buf_tree, pfn, pfn);
|
||||
|
||||
range = mas_walk(&mas);
|
||||
if (!range)
|
||||
return false;
|
||||
|
||||
/* verify all pages in this bvec fall within the range */
|
||||
if (end_pfn > mas.last)
|
||||
return false;
|
||||
|
||||
off = range->base_offset +
|
||||
(pfn - mas.index) * PAGE_SIZE + bv.bv_offset;
|
||||
|
||||
if (first) {
|
||||
/* Read-only buffer can't serve READ (kernel writes) */
|
||||
if ((range->flags & UBLK_SHMEM_BUF_READ_ONLY) &&
|
||||
req_op(rq) != REQ_OP_WRITE)
|
||||
return false;
|
||||
index = range->buf_index;
|
||||
expected_offset = off;
|
||||
*buf_off = off;
|
||||
first = false;
|
||||
} else {
|
||||
if (range->buf_index != index)
|
||||
return false;
|
||||
if (off != expected_offset)
|
||||
return false;
|
||||
}
|
||||
expected_offset += bv.bv_len;
|
||||
}
|
||||
|
||||
if (first)
|
||||
return false;
|
||||
|
||||
*buf_idx = index;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
|
||||
u32 cmd_op, struct ublksrv_ctrl_cmd *header)
|
||||
{
|
||||
@@ -5228,6 +5611,8 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
|
||||
case UBLK_CMD_UPDATE_SIZE:
|
||||
case UBLK_CMD_QUIESCE_DEV:
|
||||
case UBLK_CMD_TRY_STOP_DEV:
|
||||
case UBLK_CMD_REG_BUF:
|
||||
case UBLK_CMD_UNREG_BUF:
|
||||
mask = MAY_READ | MAY_WRITE;
|
||||
break;
|
||||
default:
|
||||
@@ -5352,6 +5737,12 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
|
||||
case UBLK_CMD_TRY_STOP_DEV:
|
||||
ret = ublk_ctrl_try_stop_dev(ub);
|
||||
break;
|
||||
case UBLK_CMD_REG_BUF:
|
||||
ret = ublk_ctrl_reg_buf(ub, &header);
|
||||
break;
|
||||
case UBLK_CMD_UNREG_BUF:
|
||||
ret = ublk_ctrl_unreg_buf(ub, &header);
|
||||
break;
|
||||
default:
|
||||
ret = -EOPNOTSUPP;
|
||||
break;
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/parser.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/xattr.h>
|
||||
|
||||
/*
|
||||
* Options for adding (and removing) a device.
|
||||
@@ -34,6 +35,8 @@ enum {
|
||||
ZLOOP_OPT_BUFFERED_IO = (1 << 8),
|
||||
ZLOOP_OPT_ZONE_APPEND = (1 << 9),
|
||||
ZLOOP_OPT_ORDERED_ZONE_APPEND = (1 << 10),
|
||||
ZLOOP_OPT_DISCARD_WRITE_CACHE = (1 << 11),
|
||||
ZLOOP_OPT_MAX_OPEN_ZONES = (1 << 12),
|
||||
};
|
||||
|
||||
static const match_table_t zloop_opt_tokens = {
|
||||
@@ -48,6 +51,8 @@ static const match_table_t zloop_opt_tokens = {
|
||||
{ ZLOOP_OPT_BUFFERED_IO, "buffered_io" },
|
||||
{ ZLOOP_OPT_ZONE_APPEND, "zone_append=%u" },
|
||||
{ ZLOOP_OPT_ORDERED_ZONE_APPEND, "ordered_zone_append" },
|
||||
{ ZLOOP_OPT_DISCARD_WRITE_CACHE, "discard_write_cache" },
|
||||
{ ZLOOP_OPT_MAX_OPEN_ZONES, "max_open_zones=%u" },
|
||||
{ ZLOOP_OPT_ERR, NULL }
|
||||
};
|
||||
|
||||
@@ -56,6 +61,7 @@ static const match_table_t zloop_opt_tokens = {
|
||||
#define ZLOOP_DEF_ZONE_SIZE ((256ULL * SZ_1M) >> SECTOR_SHIFT)
|
||||
#define ZLOOP_DEF_NR_ZONES 64
|
||||
#define ZLOOP_DEF_NR_CONV_ZONES 8
|
||||
#define ZLOOP_DEF_MAX_OPEN_ZONES 0
|
||||
#define ZLOOP_DEF_BASE_DIR "/var/local/zloop"
|
||||
#define ZLOOP_DEF_NR_QUEUES 1
|
||||
#define ZLOOP_DEF_QUEUE_DEPTH 128
|
||||
@@ -73,12 +79,14 @@ struct zloop_options {
|
||||
sector_t zone_size;
|
||||
sector_t zone_capacity;
|
||||
unsigned int nr_conv_zones;
|
||||
unsigned int max_open_zones;
|
||||
char *base_dir;
|
||||
unsigned int nr_queues;
|
||||
unsigned int queue_depth;
|
||||
bool buffered_io;
|
||||
bool zone_append;
|
||||
bool ordered_zone_append;
|
||||
bool discard_write_cache;
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -95,7 +103,12 @@ enum zloop_zone_flags {
|
||||
ZLOOP_ZONE_SEQ_ERROR,
|
||||
};
|
||||
|
||||
/*
|
||||
* Zone descriptor.
|
||||
* Locking order: z.lock -> z.wp_lock -> zlo.open_zones_lock
|
||||
*/
|
||||
struct zloop_zone {
|
||||
struct list_head open_zone_entry;
|
||||
struct file *file;
|
||||
|
||||
unsigned long flags;
|
||||
@@ -119,6 +132,7 @@ struct zloop_device {
|
||||
bool buffered_io;
|
||||
bool zone_append;
|
||||
bool ordered_zone_append;
|
||||
bool discard_write_cache;
|
||||
|
||||
const char *base_dir;
|
||||
struct file *data_dir;
|
||||
@@ -128,8 +142,13 @@ struct zloop_device {
|
||||
sector_t zone_capacity;
|
||||
unsigned int nr_zones;
|
||||
unsigned int nr_conv_zones;
|
||||
unsigned int max_open_zones;
|
||||
unsigned int block_size;
|
||||
|
||||
spinlock_t open_zones_lock;
|
||||
struct list_head open_zones_lru_list;
|
||||
unsigned int nr_open_zones;
|
||||
|
||||
struct zloop_zone zones[] __counted_by(nr_zones);
|
||||
};
|
||||
|
||||
@@ -153,6 +172,122 @@ static unsigned int rq_zone_no(struct request *rq)
|
||||
return blk_rq_pos(rq) >> zlo->zone_shift;
|
||||
}
|
||||
|
||||
/*
|
||||
* Open an already open zone. This is mostly a no-op, except for the imp open ->
|
||||
* exp open condition change that may happen. We also move a zone at the tail of
|
||||
* the list of open zones so that if we need to
|
||||
* implicitly close one open zone, we can do so in LRU order.
|
||||
*/
|
||||
static inline void zloop_lru_rotate_open_zone(struct zloop_device *zlo,
|
||||
struct zloop_zone *zone)
|
||||
{
|
||||
if (zlo->max_open_zones) {
|
||||
spin_lock(&zlo->open_zones_lock);
|
||||
list_move_tail(&zone->open_zone_entry,
|
||||
&zlo->open_zones_lru_list);
|
||||
spin_unlock(&zlo->open_zones_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void zloop_lru_remove_open_zone(struct zloop_device *zlo,
|
||||
struct zloop_zone *zone)
|
||||
{
|
||||
if (zone->cond == BLK_ZONE_COND_IMP_OPEN ||
|
||||
zone->cond == BLK_ZONE_COND_EXP_OPEN) {
|
||||
spin_lock(&zlo->open_zones_lock);
|
||||
list_del_init(&zone->open_zone_entry);
|
||||
zlo->nr_open_zones--;
|
||||
spin_unlock(&zlo->open_zones_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool zloop_can_open_zone(struct zloop_device *zlo)
|
||||
{
|
||||
return !zlo->max_open_zones || zlo->nr_open_zones < zlo->max_open_zones;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have reached the maximum open zones limit, attempt to close an
|
||||
* implicitly open zone (if we have any) so that we can implicitly open another
|
||||
* zone without exceeding the maximum number of open zones.
|
||||
*/
|
||||
static bool zloop_close_imp_open_zone(struct zloop_device *zlo)
|
||||
{
|
||||
struct zloop_zone *zone;
|
||||
|
||||
lockdep_assert_held(&zlo->open_zones_lock);
|
||||
|
||||
if (zloop_can_open_zone(zlo))
|
||||
return true;
|
||||
|
||||
list_for_each_entry(zone, &zlo->open_zones_lru_list, open_zone_entry) {
|
||||
if (zone->cond == BLK_ZONE_COND_IMP_OPEN) {
|
||||
zone->cond = BLK_ZONE_COND_CLOSED;
|
||||
list_del_init(&zone->open_zone_entry);
|
||||
zlo->nr_open_zones--;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool zloop_open_closed_or_empty_zone(struct zloop_device *zlo,
|
||||
struct zloop_zone *zone,
|
||||
bool explicit)
|
||||
{
|
||||
spin_lock(&zlo->open_zones_lock);
|
||||
|
||||
if (explicit) {
|
||||
/*
|
||||
* Explicit open: we cannot allow this if we have reached the
|
||||
* maximum open zones limit.
|
||||
*/
|
||||
if (!zloop_can_open_zone(zlo))
|
||||
goto fail;
|
||||
zone->cond = BLK_ZONE_COND_EXP_OPEN;
|
||||
} else {
|
||||
/*
|
||||
* Implicit open case: if we have reached the maximum open zones
|
||||
* limit, try to close an implicitly open zone first.
|
||||
*/
|
||||
if (!zloop_close_imp_open_zone(zlo))
|
||||
goto fail;
|
||||
zone->cond = BLK_ZONE_COND_IMP_OPEN;
|
||||
}
|
||||
|
||||
zlo->nr_open_zones++;
|
||||
list_add_tail(&zone->open_zone_entry,
|
||||
&zlo->open_zones_lru_list);
|
||||
|
||||
spin_unlock(&zlo->open_zones_lock);
|
||||
|
||||
return true;
|
||||
|
||||
fail:
|
||||
spin_unlock(&zlo->open_zones_lock);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool zloop_do_open_zone(struct zloop_device *zlo,
|
||||
struct zloop_zone *zone, bool explicit)
|
||||
{
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
if (explicit)
|
||||
zone->cond = BLK_ZONE_COND_EXP_OPEN;
|
||||
zloop_lru_rotate_open_zone(zlo, zone);
|
||||
return true;
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
return zloop_open_closed_or_empty_zone(zlo, zone, explicit);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no)
|
||||
{
|
||||
struct zloop_zone *zone = &zlo->zones[zone_no];
|
||||
@@ -186,13 +321,17 @@ static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no)
|
||||
|
||||
spin_lock_irqsave(&zone->wp_lock, flags);
|
||||
if (!file_sectors) {
|
||||
zloop_lru_remove_open_zone(zlo, zone);
|
||||
zone->cond = BLK_ZONE_COND_EMPTY;
|
||||
zone->wp = zone->start;
|
||||
} else if (file_sectors == zlo->zone_capacity) {
|
||||
zloop_lru_remove_open_zone(zlo, zone);
|
||||
zone->cond = BLK_ZONE_COND_FULL;
|
||||
zone->wp = ULLONG_MAX;
|
||||
} else {
|
||||
zone->cond = BLK_ZONE_COND_CLOSED;
|
||||
if (zone->cond != BLK_ZONE_COND_IMP_OPEN &&
|
||||
zone->cond != BLK_ZONE_COND_EXP_OPEN)
|
||||
zone->cond = BLK_ZONE_COND_CLOSED;
|
||||
zone->wp = zone->start + file_sectors;
|
||||
}
|
||||
spin_unlock_irqrestore(&zone->wp_lock, flags);
|
||||
@@ -216,19 +355,8 @@ static int zloop_open_zone(struct zloop_device *zlo, unsigned int zone_no)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
break;
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
zone->cond = BLK_ZONE_COND_EXP_OPEN;
|
||||
break;
|
||||
case BLK_ZONE_COND_FULL:
|
||||
default:
|
||||
if (!zloop_do_open_zone(zlo, zone, true))
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&zone->lock);
|
||||
@@ -259,6 +387,7 @@ static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no)
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
spin_lock_irqsave(&zone->wp_lock, flags);
|
||||
zloop_lru_remove_open_zone(zlo, zone);
|
||||
if (zone->wp == zone->start)
|
||||
zone->cond = BLK_ZONE_COND_EMPTY;
|
||||
else
|
||||
@@ -300,6 +429,7 @@ static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no)
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&zone->wp_lock, flags);
|
||||
zloop_lru_remove_open_zone(zlo, zone);
|
||||
zone->cond = BLK_ZONE_COND_EMPTY;
|
||||
zone->wp = zone->start;
|
||||
clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
|
||||
@@ -347,6 +477,7 @@ static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no)
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&zone->wp_lock, flags);
|
||||
zloop_lru_remove_open_zone(zlo, zone);
|
||||
zone->cond = BLK_ZONE_COND_FULL;
|
||||
zone->wp = ULLONG_MAX;
|
||||
clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
|
||||
@@ -378,125 +509,22 @@ static void zloop_rw_complete(struct kiocb *iocb, long ret)
|
||||
zloop_put_cmd(cmd);
|
||||
}
|
||||
|
||||
static void zloop_rw(struct zloop_cmd *cmd)
|
||||
static int zloop_do_rw(struct zloop_cmd *cmd)
|
||||
{
|
||||
struct request *rq = blk_mq_rq_from_pdu(cmd);
|
||||
int rw = req_op(rq) == REQ_OP_READ ? ITER_DEST : ITER_SOURCE;
|
||||
unsigned int nr_bvec = blk_rq_nr_bvec(rq);
|
||||
struct zloop_device *zlo = rq->q->queuedata;
|
||||
unsigned int zone_no = rq_zone_no(rq);
|
||||
sector_t sector = blk_rq_pos(rq);
|
||||
sector_t nr_sectors = blk_rq_sectors(rq);
|
||||
bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
|
||||
bool is_write = req_op(rq) == REQ_OP_WRITE || is_append;
|
||||
int rw = is_write ? ITER_SOURCE : ITER_DEST;
|
||||
struct zloop_zone *zone = &zlo->zones[rq_zone_no(rq)];
|
||||
struct req_iterator rq_iter;
|
||||
struct zloop_zone *zone;
|
||||
struct iov_iter iter;
|
||||
struct bio_vec tmp;
|
||||
unsigned long flags;
|
||||
sector_t zone_end;
|
||||
unsigned int nr_bvec;
|
||||
int ret;
|
||||
|
||||
atomic_set(&cmd->ref, 2);
|
||||
cmd->sector = sector;
|
||||
cmd->nr_sectors = nr_sectors;
|
||||
cmd->ret = 0;
|
||||
|
||||
if (WARN_ON_ONCE(is_append && !zlo->zone_append)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* We should never get an I/O beyond the device capacity. */
|
||||
if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
zone = &zlo->zones[zone_no];
|
||||
zone_end = zone->start + zlo->zone_capacity;
|
||||
|
||||
/*
|
||||
* The block layer should never send requests that are not fully
|
||||
* contained within the zone.
|
||||
*/
|
||||
if (WARN_ON_ONCE(sector + nr_sectors > zone->start + zlo->zone_size)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
|
||||
mutex_lock(&zone->lock);
|
||||
ret = zloop_update_seq_zone(zlo, zone_no);
|
||||
mutex_unlock(&zone->lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
|
||||
mutex_lock(&zone->lock);
|
||||
|
||||
spin_lock_irqsave(&zone->wp_lock, flags);
|
||||
|
||||
/*
|
||||
* Zone append operations always go at the current write
|
||||
* pointer, but regular write operations must already be
|
||||
* aligned to the write pointer when submitted.
|
||||
*/
|
||||
if (is_append) {
|
||||
/*
|
||||
* If ordered zone append is in use, we already checked
|
||||
* and set the target sector in zloop_queue_rq().
|
||||
*/
|
||||
if (!zlo->ordered_zone_append) {
|
||||
if (zone->cond == BLK_ZONE_COND_FULL ||
|
||||
zone->wp + nr_sectors > zone_end) {
|
||||
spin_unlock_irqrestore(&zone->wp_lock,
|
||||
flags);
|
||||
ret = -EIO;
|
||||
goto unlock;
|
||||
}
|
||||
sector = zone->wp;
|
||||
}
|
||||
cmd->sector = sector;
|
||||
} else if (sector != zone->wp) {
|
||||
spin_unlock_irqrestore(&zone->wp_lock, flags);
|
||||
pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
|
||||
zone_no, sector, zone->wp);
|
||||
ret = -EIO;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* Implicitly open the target zone. */
|
||||
if (zone->cond == BLK_ZONE_COND_CLOSED ||
|
||||
zone->cond == BLK_ZONE_COND_EMPTY)
|
||||
zone->cond = BLK_ZONE_COND_IMP_OPEN;
|
||||
|
||||
/*
|
||||
* Advance the write pointer, unless ordered zone append is in
|
||||
* use. If the write fails, the write pointer position will be
|
||||
* corrected when the next I/O starts execution.
|
||||
*/
|
||||
if (!is_append || !zlo->ordered_zone_append) {
|
||||
zone->wp += nr_sectors;
|
||||
if (zone->wp == zone_end) {
|
||||
zone->cond = BLK_ZONE_COND_FULL;
|
||||
zone->wp = ULLONG_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&zone->wp_lock, flags);
|
||||
}
|
||||
|
||||
nr_bvec = blk_rq_nr_bvec(rq);
|
||||
|
||||
if (rq->bio != rq->biotail) {
|
||||
struct bio_vec *bvec;
|
||||
struct bio_vec tmp, *bvec;
|
||||
|
||||
cmd->bvec = kmalloc_objs(*cmd->bvec, nr_bvec, GFP_NOIO);
|
||||
if (!cmd->bvec) {
|
||||
ret = -EIO;
|
||||
goto unlock;
|
||||
}
|
||||
if (!cmd->bvec)
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
* The bios of the request may be started from the middle of
|
||||
@@ -522,7 +550,7 @@ static void zloop_rw(struct zloop_cmd *cmd)
|
||||
iter.iov_offset = rq->bio->bi_iter.bi_bvec_done;
|
||||
}
|
||||
|
||||
cmd->iocb.ki_pos = (sector - zone->start) << SECTOR_SHIFT;
|
||||
cmd->iocb.ki_pos = (cmd->sector - zone->start) << SECTOR_SHIFT;
|
||||
cmd->iocb.ki_filp = zone->file;
|
||||
cmd->iocb.ki_complete = zloop_rw_complete;
|
||||
if (!zlo->buffered_io)
|
||||
@@ -530,18 +558,166 @@ static void zloop_rw(struct zloop_cmd *cmd)
|
||||
cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
|
||||
|
||||
if (rw == ITER_SOURCE)
|
||||
ret = zone->file->f_op->write_iter(&cmd->iocb, &iter);
|
||||
else
|
||||
ret = zone->file->f_op->read_iter(&cmd->iocb, &iter);
|
||||
unlock:
|
||||
if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write)
|
||||
return zone->file->f_op->write_iter(&cmd->iocb, &iter);
|
||||
return zone->file->f_op->read_iter(&cmd->iocb, &iter);
|
||||
}
|
||||
|
||||
static int zloop_seq_write_prep(struct zloop_cmd *cmd)
|
||||
{
|
||||
struct request *rq = blk_mq_rq_from_pdu(cmd);
|
||||
struct zloop_device *zlo = rq->q->queuedata;
|
||||
unsigned int zone_no = rq_zone_no(rq);
|
||||
sector_t nr_sectors = blk_rq_sectors(rq);
|
||||
bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
|
||||
struct zloop_zone *zone = &zlo->zones[zone_no];
|
||||
sector_t zone_end = zone->start + zlo->zone_capacity;
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
||||
spin_lock_irqsave(&zone->wp_lock, flags);
|
||||
|
||||
/*
|
||||
* Zone append operations always go at the current write pointer, but
|
||||
* regular write operations must already be aligned to the write pointer
|
||||
* when submitted.
|
||||
*/
|
||||
if (is_append) {
|
||||
/*
|
||||
* If ordered zone append is in use, we already checked and set
|
||||
* the target sector in zloop_queue_rq().
|
||||
*/
|
||||
if (!zlo->ordered_zone_append) {
|
||||
if (zone->cond == BLK_ZONE_COND_FULL ||
|
||||
zone->wp + nr_sectors > zone_end) {
|
||||
ret = -EIO;
|
||||
goto out_unlock;
|
||||
}
|
||||
cmd->sector = zone->wp;
|
||||
}
|
||||
} else {
|
||||
if (cmd->sector != zone->wp) {
|
||||
pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
|
||||
zone_no, cmd->sector, zone->wp);
|
||||
ret = -EIO;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
/* Implicitly open the target zone. */
|
||||
if (!zloop_do_open_zone(zlo, zone, false)) {
|
||||
ret = -EIO;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Advance the write pointer, unless ordered zone append is in use. If
|
||||
* the write fails, the write pointer position will be corrected when
|
||||
* the next I/O starts execution.
|
||||
*/
|
||||
if (!is_append || !zlo->ordered_zone_append) {
|
||||
zone->wp += nr_sectors;
|
||||
if (zone->wp == zone_end) {
|
||||
zloop_lru_remove_open_zone(zlo, zone);
|
||||
zone->cond = BLK_ZONE_COND_FULL;
|
||||
zone->wp = ULLONG_MAX;
|
||||
}
|
||||
}
|
||||
out_unlock:
|
||||
spin_unlock_irqrestore(&zone->wp_lock, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void zloop_rw(struct zloop_cmd *cmd)
|
||||
{
|
||||
struct request *rq = blk_mq_rq_from_pdu(cmd);
|
||||
struct zloop_device *zlo = rq->q->queuedata;
|
||||
unsigned int zone_no = rq_zone_no(rq);
|
||||
sector_t nr_sectors = blk_rq_sectors(rq);
|
||||
bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
|
||||
bool is_write = req_op(rq) == REQ_OP_WRITE || is_append;
|
||||
struct zloop_zone *zone;
|
||||
int ret = -EIO;
|
||||
|
||||
atomic_set(&cmd->ref, 2);
|
||||
cmd->sector = blk_rq_pos(rq);
|
||||
cmd->nr_sectors = nr_sectors;
|
||||
cmd->ret = 0;
|
||||
|
||||
if (WARN_ON_ONCE(is_append && !zlo->zone_append))
|
||||
goto out;
|
||||
|
||||
/* We should never get an I/O beyond the device capacity. */
|
||||
if (WARN_ON_ONCE(zone_no >= zlo->nr_zones))
|
||||
goto out;
|
||||
|
||||
zone = &zlo->zones[zone_no];
|
||||
|
||||
/*
|
||||
* The block layer should never send requests that are not fully
|
||||
* contained within the zone.
|
||||
*/
|
||||
if (WARN_ON_ONCE(cmd->sector + nr_sectors >
|
||||
zone->start + zlo->zone_size))
|
||||
goto out;
|
||||
|
||||
if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
|
||||
mutex_lock(&zone->lock);
|
||||
ret = zloop_update_seq_zone(zlo, zone_no);
|
||||
mutex_unlock(&zone->lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
|
||||
mutex_lock(&zone->lock);
|
||||
ret = zloop_seq_write_prep(cmd);
|
||||
if (!ret)
|
||||
ret = zloop_do_rw(cmd);
|
||||
mutex_unlock(&zone->lock);
|
||||
} else {
|
||||
ret = zloop_do_rw(cmd);
|
||||
}
|
||||
out:
|
||||
if (ret != -EIOCBQUEUED)
|
||||
zloop_rw_complete(&cmd->iocb, ret);
|
||||
zloop_put_cmd(cmd);
|
||||
}
|
||||
|
||||
static inline bool zloop_zone_is_active(struct zloop_zone *zone)
|
||||
{
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static int zloop_record_safe_wps(struct zloop_device *zlo)
|
||||
{
|
||||
unsigned int i;
|
||||
int ret;
|
||||
|
||||
for (i = 0; i < zlo->nr_zones; i++) {
|
||||
struct zloop_zone *zone = &zlo->zones[i];
|
||||
struct file *file = zone->file;
|
||||
|
||||
if (!zloop_zone_is_active(zone))
|
||||
continue;
|
||||
ret = vfs_setxattr(file_mnt_idmap(file), file_dentry(file),
|
||||
"user.zloop.wp", &zone->wp, sizeof(zone->wp), 0);
|
||||
if (ret) {
|
||||
pr_err("%pg: failed to record write pointer (%d)\n",
|
||||
zlo->disk->part0, ret);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sync the entire FS containing the zone files instead of walking all files.
|
||||
*/
|
||||
@@ -550,6 +726,12 @@ static int zloop_flush(struct zloop_device *zlo)
|
||||
struct super_block *sb = file_inode(zlo->data_dir)->i_sb;
|
||||
int ret;
|
||||
|
||||
if (zlo->discard_write_cache) {
|
||||
ret = zloop_record_safe_wps(zlo);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
down_read(&sb->s_umount);
|
||||
ret = sync_filesystem(sb);
|
||||
up_read(&sb->s_umount);
|
||||
@@ -692,6 +874,7 @@ static bool zloop_set_zone_append_sector(struct request *rq)
|
||||
rq->__sector = zone->wp;
|
||||
zone->wp += blk_rq_sectors(rq);
|
||||
if (zone->wp >= zone_end) {
|
||||
zloop_lru_remove_open_zone(zlo, zone);
|
||||
zone->cond = BLK_ZONE_COND_FULL;
|
||||
zone->wp = ULLONG_MAX;
|
||||
}
|
||||
@@ -889,6 +1072,7 @@ static int zloop_init_zone(struct zloop_device *zlo, struct zloop_options *opts,
|
||||
int ret;
|
||||
|
||||
mutex_init(&zone->lock);
|
||||
INIT_LIST_HEAD(&zone->open_zone_entry);
|
||||
spin_lock_init(&zone->wp_lock);
|
||||
zone->start = (sector_t)zone_no << zlo->zone_shift;
|
||||
|
||||
@@ -1009,12 +1193,20 @@ static int zloop_ctl_add(struct zloop_options *opts)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (opts->max_open_zones > nr_zones - opts->nr_conv_zones) {
|
||||
pr_err("Invalid maximum number of open zones %u\n",
|
||||
opts->max_open_zones);
|
||||
goto out;
|
||||
}
|
||||
|
||||
zlo = kvzalloc_flex(*zlo, zones, nr_zones);
|
||||
if (!zlo) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
WRITE_ONCE(zlo->state, Zlo_creating);
|
||||
spin_lock_init(&zlo->open_zones_lock);
|
||||
INIT_LIST_HEAD(&zlo->open_zones_lru_list);
|
||||
|
||||
ret = mutex_lock_killable(&zloop_ctl_mutex);
|
||||
if (ret)
|
||||
@@ -1042,10 +1234,12 @@ static int zloop_ctl_add(struct zloop_options *opts)
|
||||
zlo->zone_capacity = zlo->zone_size;
|
||||
zlo->nr_zones = nr_zones;
|
||||
zlo->nr_conv_zones = opts->nr_conv_zones;
|
||||
zlo->max_open_zones = opts->max_open_zones;
|
||||
zlo->buffered_io = opts->buffered_io;
|
||||
zlo->zone_append = opts->zone_append;
|
||||
if (zlo->zone_append)
|
||||
zlo->ordered_zone_append = opts->ordered_zone_append;
|
||||
zlo->discard_write_cache = opts->discard_write_cache;
|
||||
|
||||
zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND | WQ_FREEZABLE,
|
||||
opts->nr_queues * opts->queue_depth, zlo->id);
|
||||
@@ -1088,6 +1282,7 @@ static int zloop_ctl_add(struct zloop_options *opts)
|
||||
lim.logical_block_size = zlo->block_size;
|
||||
if (zlo->zone_append)
|
||||
lim.max_hw_zone_append_sectors = lim.max_hw_sectors;
|
||||
lim.max_open_zones = zlo->max_open_zones;
|
||||
|
||||
zlo->tag_set.ops = &zloop_mq_ops;
|
||||
zlo->tag_set.nr_hw_queues = opts->nr_queues;
|
||||
@@ -1168,6 +1363,49 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void zloop_truncate(struct file *file, loff_t pos)
|
||||
{
|
||||
struct mnt_idmap *idmap = file_mnt_idmap(file);
|
||||
struct dentry *dentry = file_dentry(file);
|
||||
struct iattr newattrs;
|
||||
|
||||
newattrs.ia_size = pos;
|
||||
newattrs.ia_valid = ATTR_SIZE;
|
||||
|
||||
inode_lock(dentry->d_inode);
|
||||
notify_change(idmap, dentry, &newattrs, NULL);
|
||||
inode_unlock(dentry->d_inode);
|
||||
}
|
||||
|
||||
static void zloop_forget_cache(struct zloop_device *zlo)
|
||||
{
|
||||
unsigned int i;
|
||||
int ret;
|
||||
|
||||
pr_info("%pg: discarding volatile write cache\n", zlo->disk->part0);
|
||||
|
||||
for (i = 0; i < zlo->nr_zones; i++) {
|
||||
struct zloop_zone *zone = &zlo->zones[i];
|
||||
struct file *file = zone->file;
|
||||
sector_t old_wp;
|
||||
|
||||
if (!zloop_zone_is_active(zone))
|
||||
continue;
|
||||
|
||||
ret = vfs_getxattr(file_mnt_idmap(file), file_dentry(file),
|
||||
"user.zloop.wp", &old_wp, sizeof(old_wp));
|
||||
if (ret == -ENODATA) {
|
||||
old_wp = 0;
|
||||
} else if (ret != sizeof(old_wp)) {
|
||||
pr_err("%pg: failed to retrieve write pointer (%d)\n",
|
||||
zlo->disk->part0, ret);
|
||||
continue;
|
||||
}
|
||||
if (old_wp < zone->wp)
|
||||
zloop_truncate(file, old_wp);
|
||||
}
|
||||
}
|
||||
|
||||
static int zloop_ctl_remove(struct zloop_options *opts)
|
||||
{
|
||||
struct zloop_device *zlo;
|
||||
@@ -1202,6 +1440,10 @@ static int zloop_ctl_remove(struct zloop_options *opts)
|
||||
return ret;
|
||||
|
||||
del_gendisk(zlo->disk);
|
||||
|
||||
if (zlo->discard_write_cache)
|
||||
zloop_forget_cache(zlo);
|
||||
|
||||
put_disk(zlo->disk);
|
||||
|
||||
pr_info("Removed device %d\n", opts->id);
|
||||
@@ -1224,6 +1466,7 @@ static int zloop_parse_options(struct zloop_options *opts, const char *buf)
|
||||
opts->capacity = ZLOOP_DEF_ZONE_SIZE * ZLOOP_DEF_NR_ZONES;
|
||||
opts->zone_size = ZLOOP_DEF_ZONE_SIZE;
|
||||
opts->nr_conv_zones = ZLOOP_DEF_NR_CONV_ZONES;
|
||||
opts->max_open_zones = ZLOOP_DEF_MAX_OPEN_ZONES;
|
||||
opts->nr_queues = ZLOOP_DEF_NR_QUEUES;
|
||||
opts->queue_depth = ZLOOP_DEF_QUEUE_DEPTH;
|
||||
opts->buffered_io = ZLOOP_DEF_BUFFERED_IO;
|
||||
@@ -1302,6 +1545,13 @@ static int zloop_parse_options(struct zloop_options *opts, const char *buf)
|
||||
}
|
||||
opts->nr_conv_zones = token;
|
||||
break;
|
||||
case ZLOOP_OPT_MAX_OPEN_ZONES:
|
||||
if (match_uint(args, &token)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
opts->max_open_zones = token;
|
||||
break;
|
||||
case ZLOOP_OPT_BASE_DIR:
|
||||
p = match_strdup(args);
|
||||
if (!p) {
|
||||
@@ -1353,6 +1603,9 @@ static int zloop_parse_options(struct zloop_options *opts, const char *buf)
|
||||
case ZLOOP_OPT_ORDERED_ZONE_APPEND:
|
||||
opts->ordered_zone_append = true;
|
||||
break;
|
||||
case ZLOOP_OPT_DISCARD_WRITE_CACHE:
|
||||
opts->discard_write_cache = true;
|
||||
break;
|
||||
case ZLOOP_OPT_ERR:
|
||||
default:
|
||||
pr_warn("unknown parameter or missing value '%s'\n", p);
|
||||
|
||||
@@ -1373,6 +1373,14 @@ static CLOSURE_CALLBACK(cached_dev_free)
|
||||
|
||||
mutex_unlock(&bch_register_lock);
|
||||
|
||||
/*
|
||||
* Wait for any pending sb_write to complete before free.
|
||||
* The sb_bio is embedded in struct cached_dev, so we must
|
||||
* ensure no I/O is in progress.
|
||||
*/
|
||||
down(&dc->sb_write_mutex);
|
||||
up(&dc->sb_write_mutex);
|
||||
|
||||
if (dc->sb_disk)
|
||||
folio_put(virt_to_folio(dc->sb_disk));
|
||||
|
||||
|
||||
@@ -208,6 +208,20 @@ enum llbitmap_state {
|
||||
BitNeedSync,
|
||||
/* data is synchronizing */
|
||||
BitSyncing,
|
||||
/*
|
||||
* Proactive sync requested for unwritten region (raid456 only).
|
||||
* Triggered via sysfs when user wants to pre-build XOR parity
|
||||
* for regions that have never been written.
|
||||
*/
|
||||
BitNeedSyncUnwritten,
|
||||
/* Proactive sync in progress for unwritten region */
|
||||
BitSyncingUnwritten,
|
||||
/*
|
||||
* XOR parity has been pre-built for a region that has never had
|
||||
* user data written. When user writes to this region, it transitions
|
||||
* to BitDirty.
|
||||
*/
|
||||
BitCleanUnwritten,
|
||||
BitStateCount,
|
||||
BitNone = 0xff,
|
||||
};
|
||||
@@ -232,6 +246,12 @@ enum llbitmap_action {
|
||||
* BitNeedSync.
|
||||
*/
|
||||
BitmapActionStale,
|
||||
/*
|
||||
* Proactive sync trigger for raid456 - builds XOR parity for
|
||||
* Unwritten regions without requiring user data write first.
|
||||
*/
|
||||
BitmapActionProactiveSync,
|
||||
BitmapActionClearUnwritten,
|
||||
BitmapActionCount,
|
||||
/* Init state is BitUnwritten */
|
||||
BitmapActionInit,
|
||||
@@ -304,6 +324,8 @@ static char state_machine[BitStateCount][BitmapActionCount] = {
|
||||
[BitmapActionDaemon] = BitNone,
|
||||
[BitmapActionDiscard] = BitNone,
|
||||
[BitmapActionStale] = BitNone,
|
||||
[BitmapActionProactiveSync] = BitNeedSyncUnwritten,
|
||||
[BitmapActionClearUnwritten] = BitNone,
|
||||
},
|
||||
[BitClean] = {
|
||||
[BitmapActionStartwrite] = BitDirty,
|
||||
@@ -314,6 +336,8 @@ static char state_machine[BitStateCount][BitmapActionCount] = {
|
||||
[BitmapActionDaemon] = BitNone,
|
||||
[BitmapActionDiscard] = BitUnwritten,
|
||||
[BitmapActionStale] = BitNeedSync,
|
||||
[BitmapActionProactiveSync] = BitNone,
|
||||
[BitmapActionClearUnwritten] = BitNone,
|
||||
},
|
||||
[BitDirty] = {
|
||||
[BitmapActionStartwrite] = BitNone,
|
||||
@@ -324,6 +348,8 @@ static char state_machine[BitStateCount][BitmapActionCount] = {
|
||||
[BitmapActionDaemon] = BitClean,
|
||||
[BitmapActionDiscard] = BitUnwritten,
|
||||
[BitmapActionStale] = BitNeedSync,
|
||||
[BitmapActionProactiveSync] = BitNone,
|
||||
[BitmapActionClearUnwritten] = BitNone,
|
||||
},
|
||||
[BitNeedSync] = {
|
||||
[BitmapActionStartwrite] = BitNone,
|
||||
@@ -334,6 +360,8 @@ static char state_machine[BitStateCount][BitmapActionCount] = {
|
||||
[BitmapActionDaemon] = BitNone,
|
||||
[BitmapActionDiscard] = BitUnwritten,
|
||||
[BitmapActionStale] = BitNone,
|
||||
[BitmapActionProactiveSync] = BitNone,
|
||||
[BitmapActionClearUnwritten] = BitNone,
|
||||
},
|
||||
[BitSyncing] = {
|
||||
[BitmapActionStartwrite] = BitNone,
|
||||
@@ -344,6 +372,44 @@ static char state_machine[BitStateCount][BitmapActionCount] = {
|
||||
[BitmapActionDaemon] = BitNone,
|
||||
[BitmapActionDiscard] = BitUnwritten,
|
||||
[BitmapActionStale] = BitNeedSync,
|
||||
[BitmapActionProactiveSync] = BitNone,
|
||||
[BitmapActionClearUnwritten] = BitNone,
|
||||
},
|
||||
[BitNeedSyncUnwritten] = {
|
||||
[BitmapActionStartwrite] = BitNeedSync,
|
||||
[BitmapActionStartsync] = BitSyncingUnwritten,
|
||||
[BitmapActionEndsync] = BitNone,
|
||||
[BitmapActionAbortsync] = BitUnwritten,
|
||||
[BitmapActionReload] = BitUnwritten,
|
||||
[BitmapActionDaemon] = BitNone,
|
||||
[BitmapActionDiscard] = BitUnwritten,
|
||||
[BitmapActionStale] = BitUnwritten,
|
||||
[BitmapActionProactiveSync] = BitNone,
|
||||
[BitmapActionClearUnwritten] = BitUnwritten,
|
||||
},
|
||||
[BitSyncingUnwritten] = {
|
||||
[BitmapActionStartwrite] = BitSyncing,
|
||||
[BitmapActionStartsync] = BitSyncingUnwritten,
|
||||
[BitmapActionEndsync] = BitCleanUnwritten,
|
||||
[BitmapActionAbortsync] = BitUnwritten,
|
||||
[BitmapActionReload] = BitUnwritten,
|
||||
[BitmapActionDaemon] = BitNone,
|
||||
[BitmapActionDiscard] = BitUnwritten,
|
||||
[BitmapActionStale] = BitUnwritten,
|
||||
[BitmapActionProactiveSync] = BitNone,
|
||||
[BitmapActionClearUnwritten] = BitUnwritten,
|
||||
},
|
||||
[BitCleanUnwritten] = {
|
||||
[BitmapActionStartwrite] = BitDirty,
|
||||
[BitmapActionStartsync] = BitNone,
|
||||
[BitmapActionEndsync] = BitNone,
|
||||
[BitmapActionAbortsync] = BitNone,
|
||||
[BitmapActionReload] = BitNone,
|
||||
[BitmapActionDaemon] = BitNone,
|
||||
[BitmapActionDiscard] = BitUnwritten,
|
||||
[BitmapActionStale] = BitUnwritten,
|
||||
[BitmapActionProactiveSync] = BitNone,
|
||||
[BitmapActionClearUnwritten] = BitUnwritten,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -376,6 +442,7 @@ static void llbitmap_infect_dirty_bits(struct llbitmap *llbitmap,
|
||||
pctl->state[pos] = level_456 ? BitNeedSync : BitDirty;
|
||||
break;
|
||||
case BitClean:
|
||||
case BitCleanUnwritten:
|
||||
pctl->state[pos] = BitDirty;
|
||||
break;
|
||||
}
|
||||
@@ -383,7 +450,7 @@ static void llbitmap_infect_dirty_bits(struct llbitmap *llbitmap,
|
||||
}
|
||||
|
||||
static void llbitmap_set_page_dirty(struct llbitmap *llbitmap, int idx,
|
||||
int offset)
|
||||
int offset, bool infect)
|
||||
{
|
||||
struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx];
|
||||
unsigned int io_size = llbitmap->io_size;
|
||||
@@ -398,7 +465,7 @@ static void llbitmap_set_page_dirty(struct llbitmap *llbitmap, int idx,
|
||||
* resync all the dirty bits, hence skip infect new dirty bits to
|
||||
* prevent resync unnecessary data.
|
||||
*/
|
||||
if (llbitmap->mddev->degraded) {
|
||||
if (llbitmap->mddev->degraded || !infect) {
|
||||
set_bit(block, pctl->dirty);
|
||||
return;
|
||||
}
|
||||
@@ -438,7 +505,9 @@ static void llbitmap_write(struct llbitmap *llbitmap, enum llbitmap_state state,
|
||||
|
||||
llbitmap->pctl[idx]->state[bit] = state;
|
||||
if (state == BitDirty || state == BitNeedSync)
|
||||
llbitmap_set_page_dirty(llbitmap, idx, bit);
|
||||
llbitmap_set_page_dirty(llbitmap, idx, bit, true);
|
||||
else if (state == BitNeedSyncUnwritten)
|
||||
llbitmap_set_page_dirty(llbitmap, idx, bit, false);
|
||||
}
|
||||
|
||||
static struct page *llbitmap_read_page(struct llbitmap *llbitmap, int idx)
|
||||
@@ -459,7 +528,8 @@ static struct page *llbitmap_read_page(struct llbitmap *llbitmap, int idx)
|
||||
rdev_for_each(rdev, mddev) {
|
||||
sector_t sector;
|
||||
|
||||
if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags))
|
||||
if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags) ||
|
||||
!test_bit(In_sync, &rdev->flags))
|
||||
continue;
|
||||
|
||||
sector = mddev->bitmap_info.offset +
|
||||
@@ -584,13 +654,73 @@ static int llbitmap_cache_pages(struct llbitmap *llbitmap)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if all underlying disks support write_zeroes with unmap.
|
||||
*/
|
||||
static bool llbitmap_all_disks_support_wzeroes_unmap(struct llbitmap *llbitmap)
|
||||
{
|
||||
struct mddev *mddev = llbitmap->mddev;
|
||||
struct md_rdev *rdev;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
|
||||
if (bdev_write_zeroes_unmap_sectors(rdev->bdev) == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Issue write_zeroes to all underlying disks to zero their data regions.
|
||||
* This ensures parity consistency for RAID-456 (0 XOR 0 = 0).
|
||||
* Returns true if all disks were successfully zeroed.
|
||||
*/
|
||||
static bool llbitmap_zero_all_disks(struct llbitmap *llbitmap)
|
||||
{
|
||||
struct mddev *mddev = llbitmap->mddev;
|
||||
struct md_rdev *rdev;
|
||||
sector_t dev_sectors = mddev->dev_sectors;
|
||||
int ret;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
|
||||
ret = blkdev_issue_zeroout(rdev->bdev,
|
||||
rdev->data_offset,
|
||||
dev_sectors,
|
||||
GFP_KERNEL, 0);
|
||||
if (ret) {
|
||||
pr_warn("md/llbitmap: failed to zero disk %pg: %d\n",
|
||||
rdev->bdev, ret);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void llbitmap_init_state(struct llbitmap *llbitmap)
|
||||
{
|
||||
struct mddev *mddev = llbitmap->mddev;
|
||||
enum llbitmap_state state = BitUnwritten;
|
||||
unsigned long i;
|
||||
|
||||
if (test_and_clear_bit(BITMAP_CLEAN, &llbitmap->flags))
|
||||
if (test_and_clear_bit(BITMAP_CLEAN, &llbitmap->flags)) {
|
||||
state = BitClean;
|
||||
} else if (raid_is_456(mddev) &&
|
||||
llbitmap_all_disks_support_wzeroes_unmap(llbitmap)) {
|
||||
/*
|
||||
* All disks support write_zeroes with unmap. Zero all disks
|
||||
* to ensure parity consistency, then set BitCleanUnwritten
|
||||
* to skip initial sync.
|
||||
*/
|
||||
if (llbitmap_zero_all_disks(llbitmap))
|
||||
state = BitCleanUnwritten;
|
||||
}
|
||||
|
||||
for (i = 0; i < llbitmap->chunks; i++)
|
||||
llbitmap_write(llbitmap, state, i);
|
||||
@@ -626,11 +756,10 @@ static enum llbitmap_state llbitmap_state_machine(struct llbitmap *llbitmap,
|
||||
goto write_bitmap;
|
||||
}
|
||||
|
||||
if (c == BitNeedSync)
|
||||
if (c == BitNeedSync || c == BitNeedSyncUnwritten)
|
||||
need_resync = !mddev->degraded;
|
||||
|
||||
state = state_machine[c][action];
|
||||
|
||||
write_bitmap:
|
||||
if (unlikely(mddev->degraded)) {
|
||||
/* For degraded array, mark new data as need sync. */
|
||||
@@ -657,8 +786,7 @@ write_bitmap:
|
||||
}
|
||||
|
||||
llbitmap_write(llbitmap, state, start);
|
||||
|
||||
if (state == BitNeedSync)
|
||||
if (state == BitNeedSync || state == BitNeedSyncUnwritten)
|
||||
need_resync = !mddev->degraded;
|
||||
else if (state == BitDirty &&
|
||||
!timer_pending(&llbitmap->pending_timer))
|
||||
@@ -1069,12 +1197,12 @@ static void llbitmap_start_write(struct mddev *mddev, sector_t offset,
|
||||
int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
|
||||
int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
|
||||
|
||||
llbitmap_state_machine(llbitmap, start, end, BitmapActionStartwrite);
|
||||
|
||||
while (page_start <= page_end) {
|
||||
llbitmap_raise_barrier(llbitmap, page_start);
|
||||
page_start++;
|
||||
}
|
||||
|
||||
llbitmap_state_machine(llbitmap, start, end, BitmapActionStartwrite);
|
||||
}
|
||||
|
||||
static void llbitmap_end_write(struct mddev *mddev, sector_t offset,
|
||||
@@ -1101,12 +1229,12 @@ static void llbitmap_start_discard(struct mddev *mddev, sector_t offset,
|
||||
int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
|
||||
int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
|
||||
|
||||
llbitmap_state_machine(llbitmap, start, end, BitmapActionDiscard);
|
||||
|
||||
while (page_start <= page_end) {
|
||||
llbitmap_raise_barrier(llbitmap, page_start);
|
||||
page_start++;
|
||||
}
|
||||
|
||||
llbitmap_state_machine(llbitmap, start, end, BitmapActionDiscard);
|
||||
}
|
||||
|
||||
static void llbitmap_end_discard(struct mddev *mddev, sector_t offset,
|
||||
@@ -1228,7 +1356,7 @@ static bool llbitmap_blocks_synced(struct mddev *mddev, sector_t offset)
|
||||
unsigned long p = offset >> llbitmap->chunkshift;
|
||||
enum llbitmap_state c = llbitmap_read(llbitmap, p);
|
||||
|
||||
return c == BitClean || c == BitDirty;
|
||||
return c == BitClean || c == BitDirty || c == BitCleanUnwritten;
|
||||
}
|
||||
|
||||
static sector_t llbitmap_skip_sync_blocks(struct mddev *mddev, sector_t offset)
|
||||
@@ -1242,6 +1370,10 @@ static sector_t llbitmap_skip_sync_blocks(struct mddev *mddev, sector_t offset)
|
||||
if (c == BitUnwritten)
|
||||
return blocks;
|
||||
|
||||
/* Skip CleanUnwritten - no user data, will be reset after recovery */
|
||||
if (c == BitCleanUnwritten)
|
||||
return blocks;
|
||||
|
||||
/* For degraded array, don't skip */
|
||||
if (mddev->degraded)
|
||||
return 0;
|
||||
@@ -1260,14 +1392,25 @@ static bool llbitmap_start_sync(struct mddev *mddev, sector_t offset,
|
||||
{
|
||||
struct llbitmap *llbitmap = mddev->bitmap;
|
||||
unsigned long p = offset >> llbitmap->chunkshift;
|
||||
enum llbitmap_state state;
|
||||
|
||||
/*
|
||||
* Before recovery starts, convert CleanUnwritten to Unwritten.
|
||||
* This ensures the new disk won't have stale parity data.
|
||||
*/
|
||||
if (offset == 0 && test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
|
||||
!test_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery))
|
||||
llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1,
|
||||
BitmapActionClearUnwritten);
|
||||
|
||||
|
||||
/*
|
||||
* Handle one bit at a time, this is much simpler. And it doesn't matter
|
||||
* if md_do_sync() loop more times.
|
||||
*/
|
||||
*blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1));
|
||||
return llbitmap_state_machine(llbitmap, p, p,
|
||||
BitmapActionStartsync) == BitSyncing;
|
||||
state = llbitmap_state_machine(llbitmap, p, p, BitmapActionStartsync);
|
||||
return state == BitSyncing || state == BitSyncingUnwritten;
|
||||
}
|
||||
|
||||
/* Something is wrong, sync_thread stop at @offset */
|
||||
@@ -1473,9 +1616,15 @@ static ssize_t bits_show(struct mddev *mddev, char *page)
|
||||
}
|
||||
|
||||
mutex_unlock(&mddev->bitmap_info.mutex);
|
||||
return sprintf(page, "unwritten %d\nclean %d\ndirty %d\nneed sync %d\nsyncing %d\n",
|
||||
return sprintf(page,
|
||||
"unwritten %d\nclean %d\ndirty %d\n"
|
||||
"need sync %d\nsyncing %d\n"
|
||||
"need sync unwritten %d\nsyncing unwritten %d\n"
|
||||
"clean unwritten %d\n",
|
||||
bits[BitUnwritten], bits[BitClean], bits[BitDirty],
|
||||
bits[BitNeedSync], bits[BitSyncing]);
|
||||
bits[BitNeedSync], bits[BitSyncing],
|
||||
bits[BitNeedSyncUnwritten], bits[BitSyncingUnwritten],
|
||||
bits[BitCleanUnwritten]);
|
||||
}
|
||||
|
||||
static struct md_sysfs_entry llbitmap_bits = __ATTR_RO(bits);
|
||||
@@ -1548,11 +1697,39 @@ barrier_idle_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
|
||||
static struct md_sysfs_entry llbitmap_barrier_idle = __ATTR_RW(barrier_idle);
|
||||
|
||||
static ssize_t
|
||||
proactive_sync_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
{
|
||||
struct llbitmap *llbitmap;
|
||||
|
||||
/* Only for RAID-456 */
|
||||
if (!raid_is_456(mddev))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&mddev->bitmap_info.mutex);
|
||||
llbitmap = mddev->bitmap;
|
||||
if (!llbitmap || !llbitmap->pctl) {
|
||||
mutex_unlock(&mddev->bitmap_info.mutex);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/* Trigger proactive sync on all Unwritten regions */
|
||||
llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1,
|
||||
BitmapActionProactiveSync);
|
||||
|
||||
mutex_unlock(&mddev->bitmap_info.mutex);
|
||||
return len;
|
||||
}
|
||||
|
||||
static struct md_sysfs_entry llbitmap_proactive_sync =
|
||||
__ATTR(proactive_sync, 0200, NULL, proactive_sync_store);
|
||||
|
||||
static struct attribute *md_llbitmap_attrs[] = {
|
||||
&llbitmap_bits.attr,
|
||||
&llbitmap_metadata.attr,
|
||||
&llbitmap_daemon_sleep.attr,
|
||||
&llbitmap_barrier_idle.attr,
|
||||
&llbitmap_proactive_sync.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
|
||||
171
drivers/md/md.c
171
drivers/md/md.c
@@ -84,7 +84,6 @@ static DEFINE_XARRAY(md_submodule);
|
||||
static const struct kobj_type md_ktype;
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
|
||||
static struct workqueue_struct *md_wq;
|
||||
|
||||
/*
|
||||
* This workqueue is used for sync_work to register new sync_thread, and for
|
||||
@@ -98,7 +97,7 @@ static struct workqueue_struct *md_misc_wq;
|
||||
static int remove_and_add_spares(struct mddev *mddev,
|
||||
struct md_rdev *this);
|
||||
static void mddev_detach(struct mddev *mddev);
|
||||
static void export_rdev(struct md_rdev *rdev, struct mddev *mddev);
|
||||
static void export_rdev(struct md_rdev *rdev);
|
||||
static void md_wakeup_thread_directly(struct md_thread __rcu **thread);
|
||||
|
||||
/*
|
||||
@@ -188,7 +187,6 @@ static int rdev_init_serial(struct md_rdev *rdev)
|
||||
|
||||
spin_lock_init(&serial_tmp->serial_lock);
|
||||
serial_tmp->serial_rb = RB_ROOT_CACHED;
|
||||
init_waitqueue_head(&serial_tmp->serial_io_wait);
|
||||
}
|
||||
|
||||
rdev->serial = serial;
|
||||
@@ -489,6 +487,17 @@ int mddev_suspend(struct mddev *mddev, bool interruptible)
|
||||
}
|
||||
|
||||
percpu_ref_kill(&mddev->active_io);
|
||||
|
||||
/*
|
||||
* RAID456 IO can sleep in wait_for_reshape while still holding an
|
||||
* active_io reference. If reshape is already interrupted or frozen,
|
||||
* wake those waiters so they can abort and drop the reference instead
|
||||
* of deadlocking suspend.
|
||||
*/
|
||||
if (mddev->pers && mddev->pers->prepare_suspend &&
|
||||
reshape_interrupted(mddev))
|
||||
mddev->pers->prepare_suspend(mddev);
|
||||
|
||||
if (interruptible)
|
||||
err = wait_event_interruptible(mddev->sb_wait,
|
||||
percpu_ref_is_zero(&mddev->active_io));
|
||||
@@ -959,7 +968,7 @@ void mddev_unlock(struct mddev *mddev)
|
||||
list_for_each_entry_safe(rdev, tmp, &delete, same_set) {
|
||||
list_del_init(&rdev->same_set);
|
||||
kobject_del(&rdev->kobj);
|
||||
export_rdev(rdev, mddev);
|
||||
export_rdev(rdev);
|
||||
}
|
||||
|
||||
if (!legacy_async_del_gendisk) {
|
||||
@@ -2632,7 +2641,7 @@ void md_autodetect_dev(dev_t dev);
|
||||
/* just for claiming the bdev */
|
||||
static struct md_rdev claim_rdev;
|
||||
|
||||
static void export_rdev(struct md_rdev *rdev, struct mddev *mddev)
|
||||
static void export_rdev(struct md_rdev *rdev)
|
||||
{
|
||||
pr_debug("md: export_rdev(%pg)\n", rdev->bdev);
|
||||
md_rdev_clear(rdev);
|
||||
@@ -2788,7 +2797,9 @@ void md_update_sb(struct mddev *mddev, int force_change)
|
||||
if (!md_is_rdwr(mddev)) {
|
||||
if (force_change)
|
||||
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
|
||||
pr_err("%s: can't update sb for read-only array %s\n", __func__, mdname(mddev));
|
||||
if (!mddev_is_dm(mddev))
|
||||
pr_err_ratelimited("%s: can't update sb for read-only array %s\n",
|
||||
__func__, mdname(mddev));
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -4848,7 +4859,7 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
err = bind_rdev_to_array(rdev, mddev);
|
||||
out:
|
||||
if (err)
|
||||
export_rdev(rdev, mddev);
|
||||
export_rdev(rdev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
if (!err)
|
||||
md_new_event();
|
||||
@@ -6128,10 +6139,16 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||
}
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
rv = entry->store(mddev, page, length);
|
||||
mddev_put(mddev);
|
||||
|
||||
/*
|
||||
* For "array_state=clear", dropping the extra kobject reference from
|
||||
* sysfs_break_active_protection() can trigger md kobject deletion.
|
||||
* Restore active protection before mddev_put() so deletion happens
|
||||
* after the sysfs write path fully unwinds.
|
||||
*/
|
||||
if (kn)
|
||||
sysfs_unbreak_active_protection(kn);
|
||||
mddev_put(mddev);
|
||||
|
||||
return rv;
|
||||
}
|
||||
@@ -6447,15 +6464,124 @@ static void md_safemode_timeout(struct timer_list *t)
|
||||
|
||||
static int start_dirty_degraded;
|
||||
|
||||
/*
|
||||
* Read bitmap superblock and return the bitmap_id based on disk version.
|
||||
* This is used as fallback when default bitmap version and on-disk version
|
||||
* doesn't match, and mdadm is not the latest version to set bitmap_type.
|
||||
*/
|
||||
static enum md_submodule_id md_bitmap_get_id_from_sb(struct mddev *mddev)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
struct page *sb_page;
|
||||
bitmap_super_t *sb;
|
||||
enum md_submodule_id id = ID_BITMAP_NONE;
|
||||
sector_t sector;
|
||||
u32 version;
|
||||
|
||||
if (!mddev->bitmap_info.offset)
|
||||
return ID_BITMAP_NONE;
|
||||
|
||||
sb_page = alloc_page(GFP_KERNEL);
|
||||
if (!sb_page) {
|
||||
pr_warn("md: %s: failed to allocate memory for bitmap\n",
|
||||
mdname(mddev));
|
||||
return ID_BITMAP_NONE;
|
||||
}
|
||||
|
||||
sector = mddev->bitmap_info.offset;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
u32 iosize;
|
||||
|
||||
if (!test_bit(In_sync, &rdev->flags) ||
|
||||
test_bit(Faulty, &rdev->flags) ||
|
||||
test_bit(Bitmap_sync, &rdev->flags))
|
||||
continue;
|
||||
|
||||
iosize = roundup(sizeof(bitmap_super_t),
|
||||
bdev_logical_block_size(rdev->bdev));
|
||||
if (sync_page_io(rdev, sector, iosize, sb_page, REQ_OP_READ,
|
||||
true))
|
||||
goto read_ok;
|
||||
}
|
||||
pr_warn("md: %s: failed to read bitmap from any device\n",
|
||||
mdname(mddev));
|
||||
goto out;
|
||||
|
||||
read_ok:
|
||||
sb = kmap_local_page(sb_page);
|
||||
if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) {
|
||||
pr_warn("md: %s: invalid bitmap magic 0x%x\n",
|
||||
mdname(mddev), le32_to_cpu(sb->magic));
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
version = le32_to_cpu(sb->version);
|
||||
switch (version) {
|
||||
case BITMAP_MAJOR_LO:
|
||||
case BITMAP_MAJOR_HI:
|
||||
case BITMAP_MAJOR_CLUSTERED:
|
||||
id = ID_BITMAP;
|
||||
break;
|
||||
case BITMAP_MAJOR_LOCKLESS:
|
||||
id = ID_LLBITMAP;
|
||||
break;
|
||||
default:
|
||||
pr_warn("md: %s: unknown bitmap version %u\n",
|
||||
mdname(mddev), version);
|
||||
break;
|
||||
}
|
||||
|
||||
out_unmap:
|
||||
kunmap_local(sb);
|
||||
out:
|
||||
__free_page(sb_page);
|
||||
return id;
|
||||
}
|
||||
|
||||
static int md_bitmap_create(struct mddev *mddev)
|
||||
{
|
||||
enum md_submodule_id orig_id = mddev->bitmap_id;
|
||||
enum md_submodule_id sb_id;
|
||||
int err;
|
||||
|
||||
if (mddev->bitmap_id == ID_BITMAP_NONE)
|
||||
return -EINVAL;
|
||||
|
||||
if (!mddev_set_bitmap_ops(mddev))
|
||||
return -ENOENT;
|
||||
|
||||
return mddev->bitmap_ops->create(mddev);
|
||||
err = mddev->bitmap_ops->create(mddev);
|
||||
if (!err)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Create failed, if default bitmap version and on-disk version
|
||||
* doesn't match, and mdadm is not the latest version to set
|
||||
* bitmap_type, set bitmap_ops based on the disk version.
|
||||
*/
|
||||
mddev_clear_bitmap_ops(mddev);
|
||||
|
||||
sb_id = md_bitmap_get_id_from_sb(mddev);
|
||||
if (sb_id == ID_BITMAP_NONE || sb_id == orig_id)
|
||||
return err;
|
||||
|
||||
pr_info("md: %s: bitmap version mismatch, switching from %d to %d\n",
|
||||
mdname(mddev), orig_id, sb_id);
|
||||
|
||||
mddev->bitmap_id = sb_id;
|
||||
if (!mddev_set_bitmap_ops(mddev)) {
|
||||
mddev->bitmap_id = orig_id;
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
err = mddev->bitmap_ops->create(mddev);
|
||||
if (err) {
|
||||
mddev_clear_bitmap_ops(mddev);
|
||||
mddev->bitmap_id = orig_id;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void md_bitmap_destroy(struct mddev *mddev)
|
||||
@@ -7140,7 +7266,7 @@ static void autorun_devices(int part)
|
||||
rdev_for_each_list(rdev, tmp, &candidates) {
|
||||
list_del_init(&rdev->same_set);
|
||||
if (bind_rdev_to_array(rdev, mddev))
|
||||
export_rdev(rdev, mddev);
|
||||
export_rdev(rdev);
|
||||
}
|
||||
autorun_array(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
@@ -7150,7 +7276,7 @@ static void autorun_devices(int part)
|
||||
*/
|
||||
rdev_for_each_list(rdev, tmp, &candidates) {
|
||||
list_del_init(&rdev->same_set);
|
||||
export_rdev(rdev, mddev);
|
||||
export_rdev(rdev);
|
||||
}
|
||||
mddev_put(mddev);
|
||||
}
|
||||
@@ -7338,13 +7464,13 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
|
||||
pr_warn("md: %pg has different UUID to %pg\n",
|
||||
rdev->bdev,
|
||||
rdev0->bdev);
|
||||
export_rdev(rdev, mddev);
|
||||
export_rdev(rdev);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
err = bind_rdev_to_array(rdev, mddev);
|
||||
if (err)
|
||||
export_rdev(rdev, mddev);
|
||||
export_rdev(rdev);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -7387,7 +7513,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
|
||||
/* This was a hot-add request, but events doesn't
|
||||
* match, so reject it.
|
||||
*/
|
||||
export_rdev(rdev, mddev);
|
||||
export_rdev(rdev);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -7413,7 +7539,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
|
||||
}
|
||||
}
|
||||
if (has_journal || mddev->bitmap) {
|
||||
export_rdev(rdev, mddev);
|
||||
export_rdev(rdev);
|
||||
return -EBUSY;
|
||||
}
|
||||
set_bit(Journal, &rdev->flags);
|
||||
@@ -7428,7 +7554,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
|
||||
/* --add initiated by this node */
|
||||
err = mddev->cluster_ops->add_new_disk(mddev, rdev);
|
||||
if (err) {
|
||||
export_rdev(rdev, mddev);
|
||||
export_rdev(rdev);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
@@ -7438,7 +7564,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
|
||||
err = bind_rdev_to_array(rdev, mddev);
|
||||
|
||||
if (err)
|
||||
export_rdev(rdev, mddev);
|
||||
export_rdev(rdev);
|
||||
|
||||
if (mddev_is_clustered(mddev)) {
|
||||
if (info->state & (1 << MD_DISK_CANDIDATE)) {
|
||||
@@ -7501,7 +7627,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
|
||||
|
||||
err = bind_rdev_to_array(rdev, mddev);
|
||||
if (err) {
|
||||
export_rdev(rdev, mddev);
|
||||
export_rdev(rdev);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
@@ -7613,7 +7739,7 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
|
||||
return 0;
|
||||
|
||||
abort_export:
|
||||
export_rdev(rdev, mddev);
|
||||
export_rdev(rdev);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -10503,10 +10629,6 @@ static int __init md_init(void)
|
||||
goto err_bitmap;
|
||||
|
||||
ret = -ENOMEM;
|
||||
md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM | WQ_PERCPU, 0);
|
||||
if (!md_wq)
|
||||
goto err_wq;
|
||||
|
||||
md_misc_wq = alloc_workqueue("md_misc", WQ_PERCPU, 0);
|
||||
if (!md_misc_wq)
|
||||
goto err_misc_wq;
|
||||
@@ -10531,8 +10653,6 @@ err_mdp:
|
||||
err_md:
|
||||
destroy_workqueue(md_misc_wq);
|
||||
err_misc_wq:
|
||||
destroy_workqueue(md_wq);
|
||||
err_wq:
|
||||
md_llbitmap_exit();
|
||||
err_bitmap:
|
||||
md_bitmap_exit();
|
||||
@@ -10841,7 +10961,6 @@ static __exit void md_exit(void)
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
|
||||
destroy_workqueue(md_misc_wq);
|
||||
destroy_workqueue(md_wq);
|
||||
md_bitmap_exit();
|
||||
}
|
||||
|
||||
|
||||
@@ -126,7 +126,6 @@ enum sync_action {
|
||||
struct serial_in_rdev {
|
||||
struct rb_root_cached serial_rb;
|
||||
spinlock_t serial_lock;
|
||||
wait_queue_head_t serial_io_wait;
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -381,7 +380,11 @@ struct serial_info {
|
||||
struct rb_node node;
|
||||
sector_t start; /* start sector of rb node */
|
||||
sector_t last; /* end sector of rb node */
|
||||
sector_t wnode_start; /* address of waiting nodes on the same list */
|
||||
sector_t _subtree_last; /* highest sector in subtree of rb node */
|
||||
struct list_head list_node;
|
||||
struct list_head waiters;
|
||||
struct completion ready;
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
@@ -143,13 +143,13 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||
}
|
||||
|
||||
err = -ENOMEM;
|
||||
conf->strip_zone = kzalloc_objs(struct strip_zone, conf->nr_strip_zones);
|
||||
conf->strip_zone = kvzalloc_objs(struct strip_zone, conf->nr_strip_zones);
|
||||
if (!conf->strip_zone)
|
||||
goto abort;
|
||||
conf->devlist = kzalloc(array3_size(sizeof(struct md_rdev *),
|
||||
conf->nr_strip_zones,
|
||||
mddev->raid_disks),
|
||||
GFP_KERNEL);
|
||||
conf->devlist = kvzalloc(array3_size(sizeof(struct md_rdev *),
|
||||
conf->nr_strip_zones,
|
||||
mddev->raid_disks),
|
||||
GFP_KERNEL);
|
||||
if (!conf->devlist)
|
||||
goto abort;
|
||||
|
||||
@@ -291,8 +291,8 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||
|
||||
return 0;
|
||||
abort:
|
||||
kfree(conf->strip_zone);
|
||||
kfree(conf->devlist);
|
||||
kvfree(conf->strip_zone);
|
||||
kvfree(conf->devlist);
|
||||
kfree(conf);
|
||||
*private_conf = ERR_PTR(err);
|
||||
return err;
|
||||
@@ -373,8 +373,8 @@ static void raid0_free(struct mddev *mddev, void *priv)
|
||||
{
|
||||
struct r0conf *conf = priv;
|
||||
|
||||
kfree(conf->strip_zone);
|
||||
kfree(conf->devlist);
|
||||
kvfree(conf->strip_zone);
|
||||
kvfree(conf->devlist);
|
||||
kfree(conf);
|
||||
}
|
||||
|
||||
|
||||
@@ -57,21 +57,29 @@ INTERVAL_TREE_DEFINE(struct serial_info, node, sector_t, _subtree_last,
|
||||
START, LAST, static inline, raid1_rb);
|
||||
|
||||
static int check_and_add_serial(struct md_rdev *rdev, struct r1bio *r1_bio,
|
||||
struct serial_info *si, int idx)
|
||||
struct serial_info *si)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
sector_t lo = r1_bio->sector;
|
||||
sector_t hi = lo + r1_bio->sectors;
|
||||
sector_t hi = lo + r1_bio->sectors - 1;
|
||||
int idx = sector_to_idx(r1_bio->sector);
|
||||
struct serial_in_rdev *serial = &rdev->serial[idx];
|
||||
struct serial_info *head_si;
|
||||
|
||||
spin_lock_irqsave(&serial->serial_lock, flags);
|
||||
/* collision happened */
|
||||
if (raid1_rb_iter_first(&serial->serial_rb, lo, hi))
|
||||
ret = -EBUSY;
|
||||
else {
|
||||
head_si = raid1_rb_iter_first(&serial->serial_rb, lo, hi);
|
||||
if (head_si && head_si != si) {
|
||||
si->start = lo;
|
||||
si->last = hi;
|
||||
si->wnode_start = head_si->wnode_start;
|
||||
list_add_tail(&si->list_node, &head_si->waiters);
|
||||
ret = -EBUSY;
|
||||
} else if (!head_si) {
|
||||
si->start = lo;
|
||||
si->last = hi;
|
||||
si->wnode_start = si->start;
|
||||
raid1_rb_insert(si, &serial->serial_rb);
|
||||
}
|
||||
spin_unlock_irqrestore(&serial->serial_lock, flags);
|
||||
@@ -83,19 +91,22 @@ static void wait_for_serialization(struct md_rdev *rdev, struct r1bio *r1_bio)
|
||||
{
|
||||
struct mddev *mddev = rdev->mddev;
|
||||
struct serial_info *si;
|
||||
int idx = sector_to_idx(r1_bio->sector);
|
||||
struct serial_in_rdev *serial = &rdev->serial[idx];
|
||||
|
||||
if (WARN_ON(!mddev->serial_info_pool))
|
||||
return;
|
||||
si = mempool_alloc(mddev->serial_info_pool, GFP_NOIO);
|
||||
wait_event(serial->serial_io_wait,
|
||||
check_and_add_serial(rdev, r1_bio, si, idx) == 0);
|
||||
INIT_LIST_HEAD(&si->waiters);
|
||||
INIT_LIST_HEAD(&si->list_node);
|
||||
init_completion(&si->ready);
|
||||
while (check_and_add_serial(rdev, r1_bio, si)) {
|
||||
wait_for_completion(&si->ready);
|
||||
reinit_completion(&si->ready);
|
||||
}
|
||||
}
|
||||
|
||||
static void remove_serial(struct md_rdev *rdev, sector_t lo, sector_t hi)
|
||||
{
|
||||
struct serial_info *si;
|
||||
struct serial_info *si, *iter_si;
|
||||
unsigned long flags;
|
||||
int found = 0;
|
||||
struct mddev *mddev = rdev->mddev;
|
||||
@@ -106,16 +117,28 @@ static void remove_serial(struct md_rdev *rdev, sector_t lo, sector_t hi)
|
||||
for (si = raid1_rb_iter_first(&serial->serial_rb, lo, hi);
|
||||
si; si = raid1_rb_iter_next(si, lo, hi)) {
|
||||
if (si->start == lo && si->last == hi) {
|
||||
raid1_rb_remove(si, &serial->serial_rb);
|
||||
mempool_free(si, mddev->serial_info_pool);
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
if (found) {
|
||||
raid1_rb_remove(si, &serial->serial_rb);
|
||||
if (!list_empty(&si->waiters)) {
|
||||
list_for_each_entry(iter_si, &si->waiters, list_node) {
|
||||
if (iter_si->wnode_start == si->wnode_start) {
|
||||
list_del_init(&iter_si->list_node);
|
||||
list_splice_init(&si->waiters, &iter_si->waiters);
|
||||
raid1_rb_insert(iter_si, &serial->serial_rb);
|
||||
complete(&iter_si->ready);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
mempool_free(si, mddev->serial_info_pool);
|
||||
} else {
|
||||
WARN(1, "The write IO is not recorded for serialization\n");
|
||||
}
|
||||
spin_unlock_irqrestore(&serial->serial_lock, flags);
|
||||
wake_up(&serial->serial_io_wait);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -452,7 +475,7 @@ static void raid1_end_write_request(struct bio *bio)
|
||||
int mirror = find_bio_disk(r1_bio, bio);
|
||||
struct md_rdev *rdev = conf->mirrors[mirror].rdev;
|
||||
sector_t lo = r1_bio->sector;
|
||||
sector_t hi = r1_bio->sector + r1_bio->sectors;
|
||||
sector_t hi = r1_bio->sector + r1_bio->sectors - 1;
|
||||
bool ignore_error = !raid1_should_handle_error(bio) ||
|
||||
(bio->bi_status && bio_op(bio) == REQ_OP_DISCARD);
|
||||
|
||||
@@ -1878,7 +1901,7 @@ static bool raid1_add_conf(struct r1conf *conf, struct md_rdev *rdev, int disk,
|
||||
if (info->rdev)
|
||||
return false;
|
||||
|
||||
if (bdev_nonrot(rdev->bdev)) {
|
||||
if (!bdev_rot(rdev->bdev)) {
|
||||
set_bit(Nonrot, &rdev->flags);
|
||||
WRITE_ONCE(conf->nonrot_disks, conf->nonrot_disks + 1);
|
||||
}
|
||||
|
||||
@@ -806,7 +806,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
|
||||
if (!do_balance)
|
||||
break;
|
||||
|
||||
nonrot = bdev_nonrot(rdev->bdev);
|
||||
nonrot = !bdev_rot(rdev->bdev);
|
||||
has_nonrot_disk |= nonrot;
|
||||
pending = atomic_read(&rdev->nr_pending);
|
||||
if (min_pending > pending && nonrot) {
|
||||
@@ -1184,7 +1184,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
||||
}
|
||||
|
||||
if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) {
|
||||
raid_end_bio_io(r10_bio);
|
||||
free_r10bio(r10_bio);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1372,7 +1372,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
|
||||
sectors = r10_bio->sectors;
|
||||
if (!regular_request_wait(mddev, conf, bio, sectors)) {
|
||||
raid_end_bio_io(r10_bio);
|
||||
free_r10bio(r10_bio);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -2002,15 +2002,27 @@ r5l_recovery_verify_data_checksum_for_mb(struct r5l_log *log,
|
||||
return -ENOMEM;
|
||||
|
||||
while (mb_offset < le32_to_cpu(mb->meta_size)) {
|
||||
sector_t payload_len;
|
||||
|
||||
payload = (void *)mb + mb_offset;
|
||||
payload_flush = (void *)mb + mb_offset;
|
||||
|
||||
if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_DATA) {
|
||||
payload_len = sizeof(struct r5l_payload_data_parity) +
|
||||
(sector_t)sizeof(__le32) *
|
||||
(le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9));
|
||||
if (mb_offset + payload_len > le32_to_cpu(mb->meta_size))
|
||||
goto mismatch;
|
||||
if (r5l_recovery_verify_data_checksum(
|
||||
log, ctx, page, log_offset,
|
||||
payload->checksum[0]) < 0)
|
||||
goto mismatch;
|
||||
} else if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_PARITY) {
|
||||
payload_len = sizeof(struct r5l_payload_data_parity) +
|
||||
(sector_t)sizeof(__le32) *
|
||||
(le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9));
|
||||
if (mb_offset + payload_len > le32_to_cpu(mb->meta_size))
|
||||
goto mismatch;
|
||||
if (r5l_recovery_verify_data_checksum(
|
||||
log, ctx, page, log_offset,
|
||||
payload->checksum[0]) < 0)
|
||||
@@ -2023,22 +2035,18 @@ r5l_recovery_verify_data_checksum_for_mb(struct r5l_log *log,
|
||||
payload->checksum[1]) < 0)
|
||||
goto mismatch;
|
||||
} else if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_FLUSH) {
|
||||
/* nothing to do for R5LOG_PAYLOAD_FLUSH here */
|
||||
payload_len = sizeof(struct r5l_payload_flush) +
|
||||
(sector_t)le32_to_cpu(payload_flush->size);
|
||||
if (mb_offset + payload_len > le32_to_cpu(mb->meta_size))
|
||||
goto mismatch;
|
||||
} else /* not R5LOG_PAYLOAD_DATA/PARITY/FLUSH */
|
||||
goto mismatch;
|
||||
|
||||
if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_FLUSH) {
|
||||
mb_offset += sizeof(struct r5l_payload_flush) +
|
||||
le32_to_cpu(payload_flush->size);
|
||||
} else {
|
||||
/* DATA or PARITY payload */
|
||||
if (le16_to_cpu(payload->header.type) != R5LOG_PAYLOAD_FLUSH) {
|
||||
log_offset = r5l_ring_add(log, log_offset,
|
||||
le32_to_cpu(payload->size));
|
||||
mb_offset += sizeof(struct r5l_payload_data_parity) +
|
||||
sizeof(__le32) *
|
||||
(le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9));
|
||||
}
|
||||
|
||||
mb_offset += payload_len;
|
||||
}
|
||||
|
||||
put_page(page);
|
||||
@@ -2089,6 +2097,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
|
||||
log_offset = r5l_ring_add(log, ctx->pos, BLOCK_SECTORS);
|
||||
|
||||
while (mb_offset < le32_to_cpu(mb->meta_size)) {
|
||||
sector_t payload_len;
|
||||
int dd;
|
||||
|
||||
payload = (void *)mb + mb_offset;
|
||||
@@ -2097,6 +2106,12 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
|
||||
if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_FLUSH) {
|
||||
int i, count;
|
||||
|
||||
payload_len = sizeof(struct r5l_payload_flush) +
|
||||
(sector_t)le32_to_cpu(payload_flush->size);
|
||||
if (mb_offset + payload_len >
|
||||
le32_to_cpu(mb->meta_size))
|
||||
return -EINVAL;
|
||||
|
||||
count = le32_to_cpu(payload_flush->size) / sizeof(__le64);
|
||||
for (i = 0; i < count; ++i) {
|
||||
stripe_sect = le64_to_cpu(payload_flush->flush_stripes[i]);
|
||||
@@ -2110,12 +2125,17 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
|
||||
}
|
||||
}
|
||||
|
||||
mb_offset += sizeof(struct r5l_payload_flush) +
|
||||
le32_to_cpu(payload_flush->size);
|
||||
mb_offset += payload_len;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* DATA or PARITY payload */
|
||||
payload_len = sizeof(struct r5l_payload_data_parity) +
|
||||
(sector_t)sizeof(__le32) *
|
||||
(le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9));
|
||||
if (mb_offset + payload_len > le32_to_cpu(mb->meta_size))
|
||||
return -EINVAL;
|
||||
|
||||
stripe_sect = (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_DATA) ?
|
||||
raid5_compute_sector(
|
||||
conf, le64_to_cpu(payload->location), 0, &dd,
|
||||
@@ -2180,9 +2200,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
|
||||
log_offset = r5l_ring_add(log, log_offset,
|
||||
le32_to_cpu(payload->size));
|
||||
|
||||
mb_offset += sizeof(struct r5l_payload_data_parity) +
|
||||
sizeof(__le32) *
|
||||
(le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9));
|
||||
mb_offset += payload_len;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -3916,6 +3916,8 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
|
||||
break;
|
||||
}
|
||||
BUG_ON(other < 0);
|
||||
if (test_bit(R5_LOCKED, &sh->dev[other].flags))
|
||||
return 0;
|
||||
pr_debug("Computing stripe %llu blocks %d,%d\n",
|
||||
(unsigned long long)sh->sector,
|
||||
disk_idx, other);
|
||||
@@ -4594,20 +4596,6 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
|
||||
async_tx_quiesce(&tx);
|
||||
}
|
||||
|
||||
/*
|
||||
* handle_stripe - do things to a stripe.
|
||||
*
|
||||
* We lock the stripe by setting STRIPE_ACTIVE and then examine the
|
||||
* state of various bits to see what needs to be done.
|
||||
* Possible results:
|
||||
* return some read requests which now have data
|
||||
* return some write requests which are safely on storage
|
||||
* schedule a read on some buffers
|
||||
* schedule a write of some buffers
|
||||
* return confirmation of parity correctness
|
||||
*
|
||||
*/
|
||||
|
||||
static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
{
|
||||
struct r5conf *conf = sh->raid_conf;
|
||||
@@ -4901,6 +4889,18 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
|
||||
set_bit(STRIPE_HANDLE, &head_sh->state);
|
||||
}
|
||||
|
||||
/*
|
||||
* handle_stripe - do things to a stripe.
|
||||
*
|
||||
* We lock the stripe by setting STRIPE_ACTIVE and then examine the
|
||||
* state of various bits to see what needs to be done.
|
||||
* Possible results:
|
||||
* return some read requests which now have data
|
||||
* return some write requests which are safely on storage
|
||||
* schedule a read on some buffers
|
||||
* schedule a write of some buffers
|
||||
* return confirmation of parity correctness
|
||||
*/
|
||||
static void handle_stripe(struct stripe_head *sh)
|
||||
{
|
||||
struct stripe_head_state s;
|
||||
@@ -6641,7 +6641,13 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio,
|
||||
}
|
||||
|
||||
if (!add_stripe_bio(sh, raid_bio, dd_idx, 0, 0)) {
|
||||
raid5_release_stripe(sh);
|
||||
int hash;
|
||||
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
hash = sh->hash_lock_index;
|
||||
__release_stripe(conf, sh,
|
||||
&conf->temp_inactive_list[hash]);
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
conf->retry_read_aligned = raid_bio;
|
||||
conf->retry_read_offset = scnt;
|
||||
return handled;
|
||||
@@ -7541,7 +7547,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (test_bit(Journal, &rdev->flags))
|
||||
continue;
|
||||
if (bdev_nonrot(rdev->bdev)) {
|
||||
if (!bdev_rot(rdev->bdev)) {
|
||||
conf->batch_bio_dispatch = false;
|
||||
break;
|
||||
}
|
||||
@@ -7780,6 +7786,7 @@ static int raid5_set_limits(struct mddev *mddev)
|
||||
lim.logical_block_size = mddev->logical_block_size;
|
||||
lim.io_min = mddev->chunk_sectors << 9;
|
||||
lim.io_opt = lim.io_min * (conf->raid_disks - conf->max_degraded);
|
||||
lim.chunk_sectors = lim.io_opt >> 9;
|
||||
lim.features |= BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE;
|
||||
lim.discard_granularity = stripe;
|
||||
lim.max_write_zeroes_sectors = 0;
|
||||
|
||||
@@ -801,7 +801,6 @@ raid5_get_dev_page(struct stripe_head *sh, int disk_idx)
|
||||
}
|
||||
#endif
|
||||
|
||||
void md_raid5_kick_device(struct r5conf *conf);
|
||||
int raid5_set_cache_size(struct mddev *mddev, int size);
|
||||
sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous);
|
||||
void raid5_release_stripe(struct stripe_head *sh);
|
||||
|
||||
6
drivers/nvme/common/.kunitconfig
Normal file
6
drivers/nvme/common/.kunitconfig
Normal file
@@ -0,0 +1,6 @@
|
||||
CONFIG_KUNIT=y
|
||||
CONFIG_PCI=y
|
||||
CONFIG_BLOCK=y
|
||||
CONFIG_BLK_DEV_NVME=y
|
||||
CONFIG_NVME_HOST_AUTH=y
|
||||
CONFIG_NVME_AUTH_KUNIT_TEST=y
|
||||
@@ -7,9 +7,15 @@ config NVME_KEYRING
|
||||
config NVME_AUTH
|
||||
tristate
|
||||
select CRYPTO
|
||||
select CRYPTO_HMAC
|
||||
select CRYPTO_SHA256
|
||||
select CRYPTO_SHA512
|
||||
select CRYPTO_DH
|
||||
select CRYPTO_DH_RFC7919_GROUPS
|
||||
select CRYPTO_HKDF
|
||||
select CRYPTO_LIB_SHA256
|
||||
select CRYPTO_LIB_SHA512
|
||||
|
||||
config NVME_AUTH_KUNIT_TEST
|
||||
tristate "KUnit tests for NVMe authentication" if !KUNIT_ALL_TESTS
|
||||
depends on KUNIT && NVME_AUTH
|
||||
default KUNIT_ALL_TESTS
|
||||
help
|
||||
Enable KUnit tests for some of the common code for NVMe over Fabrics
|
||||
In-Band Authentication.
|
||||
|
||||
@@ -7,3 +7,5 @@ obj-$(CONFIG_NVME_KEYRING) += nvme-keyring.o
|
||||
|
||||
nvme-auth-y += auth.o
|
||||
nvme-keyring-y += keyring.o
|
||||
|
||||
obj-$(CONFIG_NVME_AUTH_KUNIT_TEST) += tests/auth_kunit.o
|
||||
|
||||
@@ -9,14 +9,11 @@
|
||||
#include <linux/prandom.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/unaligned.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/dh.h>
|
||||
#include <crypto/hkdf.h>
|
||||
#include <crypto/sha2.h>
|
||||
#include <linux/nvme.h>
|
||||
#include <linux/nvme-auth.h>
|
||||
|
||||
#define HKDF_MAX_HASHLEN 64
|
||||
|
||||
static u32 nvme_dhchap_seqnum;
|
||||
static DEFINE_MUTEX(nvme_dhchap_mutex);
|
||||
|
||||
@@ -38,9 +35,9 @@ u32 nvme_auth_get_seqnum(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_get_seqnum);
|
||||
|
||||
static struct nvme_auth_dhgroup_map {
|
||||
const char name[16];
|
||||
const char kpp[16];
|
||||
static const struct nvme_auth_dhgroup_map {
|
||||
char name[16];
|
||||
char kpp[16];
|
||||
} dhgroup_map[] = {
|
||||
[NVME_AUTH_DHGROUP_NULL] = {
|
||||
.name = "null", .kpp = "null" },
|
||||
@@ -89,25 +86,21 @@ u8 nvme_auth_dhgroup_id(const char *dhgroup_name)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_dhgroup_id);
|
||||
|
||||
static struct nvme_dhchap_hash_map {
|
||||
static const struct nvme_dhchap_hash_map {
|
||||
int len;
|
||||
const char hmac[15];
|
||||
const char digest[8];
|
||||
char hmac[15];
|
||||
} hash_map[] = {
|
||||
[NVME_AUTH_HASH_SHA256] = {
|
||||
.len = 32,
|
||||
.hmac = "hmac(sha256)",
|
||||
.digest = "sha256",
|
||||
},
|
||||
[NVME_AUTH_HASH_SHA384] = {
|
||||
.len = 48,
|
||||
.hmac = "hmac(sha384)",
|
||||
.digest = "sha384",
|
||||
},
|
||||
[NVME_AUTH_HASH_SHA512] = {
|
||||
.len = 64,
|
||||
.hmac = "hmac(sha512)",
|
||||
.digest = "sha512",
|
||||
},
|
||||
};
|
||||
|
||||
@@ -119,14 +112,6 @@ const char *nvme_auth_hmac_name(u8 hmac_id)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_hmac_name);
|
||||
|
||||
const char *nvme_auth_digest_name(u8 hmac_id)
|
||||
{
|
||||
if (hmac_id >= ARRAY_SIZE(hash_map))
|
||||
return NULL;
|
||||
return hash_map[hmac_id].digest;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_digest_name);
|
||||
|
||||
u8 nvme_auth_hmac_id(const char *hmac_name)
|
||||
{
|
||||
int i;
|
||||
@@ -161,11 +146,10 @@ u32 nvme_auth_key_struct_size(u32 key_len)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_key_struct_size);
|
||||
|
||||
struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret,
|
||||
u8 key_hash)
|
||||
struct nvme_dhchap_key *nvme_auth_extract_key(const char *secret, u8 key_hash)
|
||||
{
|
||||
struct nvme_dhchap_key *key;
|
||||
unsigned char *p;
|
||||
const char *p;
|
||||
u32 crc;
|
||||
int ret, key_len;
|
||||
size_t allocated_len = strlen(secret);
|
||||
@@ -183,14 +167,14 @@ struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret,
|
||||
pr_debug("base64 key decoding error %d\n",
|
||||
key_len);
|
||||
ret = key_len;
|
||||
goto out_free_secret;
|
||||
goto out_free_key;
|
||||
}
|
||||
|
||||
if (key_len != 36 && key_len != 52 &&
|
||||
key_len != 68) {
|
||||
pr_err("Invalid key len %d\n", key_len);
|
||||
ret = -EINVAL;
|
||||
goto out_free_secret;
|
||||
goto out_free_key;
|
||||
}
|
||||
|
||||
/* The last four bytes is the CRC in little-endian format */
|
||||
@@ -205,12 +189,12 @@ struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret,
|
||||
pr_err("key crc mismatch (key %08x, crc %08x)\n",
|
||||
get_unaligned_le32(key->key + key_len), crc);
|
||||
ret = -EKEYREJECTED;
|
||||
goto out_free_secret;
|
||||
goto out_free_key;
|
||||
}
|
||||
key->len = key_len;
|
||||
key->hash = key_hash;
|
||||
return key;
|
||||
out_free_secret:
|
||||
out_free_key:
|
||||
nvme_auth_free_key(key);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
@@ -237,12 +221,106 @@ void nvme_auth_free_key(struct nvme_dhchap_key *key)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_free_key);
|
||||
|
||||
struct nvme_dhchap_key *nvme_auth_transform_key(
|
||||
struct nvme_dhchap_key *key, char *nqn)
|
||||
/*
|
||||
* Start computing an HMAC value, given the algorithm ID and raw key.
|
||||
*
|
||||
* The context should be zeroized at the end of its lifetime. The caller can do
|
||||
* that implicitly by calling nvme_auth_hmac_final(), or explicitly (needed when
|
||||
* a context is abandoned without finalizing it) by calling memzero_explicit().
|
||||
*/
|
||||
int nvme_auth_hmac_init(struct nvme_auth_hmac_ctx *hmac, u8 hmac_id,
|
||||
const u8 *key, size_t key_len)
|
||||
{
|
||||
const char *hmac_name;
|
||||
struct crypto_shash *key_tfm;
|
||||
SHASH_DESC_ON_STACK(shash, key_tfm);
|
||||
hmac->hmac_id = hmac_id;
|
||||
switch (hmac_id) {
|
||||
case NVME_AUTH_HASH_SHA256:
|
||||
hmac_sha256_init_usingrawkey(&hmac->sha256, key, key_len);
|
||||
return 0;
|
||||
case NVME_AUTH_HASH_SHA384:
|
||||
hmac_sha384_init_usingrawkey(&hmac->sha384, key, key_len);
|
||||
return 0;
|
||||
case NVME_AUTH_HASH_SHA512:
|
||||
hmac_sha512_init_usingrawkey(&hmac->sha512, key, key_len);
|
||||
return 0;
|
||||
}
|
||||
pr_warn("%s: invalid hash algorithm %d\n", __func__, hmac_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_hmac_init);
|
||||
|
||||
void nvme_auth_hmac_update(struct nvme_auth_hmac_ctx *hmac, const u8 *data,
|
||||
size_t data_len)
|
||||
{
|
||||
switch (hmac->hmac_id) {
|
||||
case NVME_AUTH_HASH_SHA256:
|
||||
hmac_sha256_update(&hmac->sha256, data, data_len);
|
||||
return;
|
||||
case NVME_AUTH_HASH_SHA384:
|
||||
hmac_sha384_update(&hmac->sha384, data, data_len);
|
||||
return;
|
||||
case NVME_AUTH_HASH_SHA512:
|
||||
hmac_sha512_update(&hmac->sha512, data, data_len);
|
||||
return;
|
||||
}
|
||||
/* Unreachable because nvme_auth_hmac_init() validated hmac_id */
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_hmac_update);
|
||||
|
||||
/* Finish computing an HMAC value. Note that this zeroizes the HMAC context. */
|
||||
void nvme_auth_hmac_final(struct nvme_auth_hmac_ctx *hmac, u8 *out)
|
||||
{
|
||||
switch (hmac->hmac_id) {
|
||||
case NVME_AUTH_HASH_SHA256:
|
||||
hmac_sha256_final(&hmac->sha256, out);
|
||||
return;
|
||||
case NVME_AUTH_HASH_SHA384:
|
||||
hmac_sha384_final(&hmac->sha384, out);
|
||||
return;
|
||||
case NVME_AUTH_HASH_SHA512:
|
||||
hmac_sha512_final(&hmac->sha512, out);
|
||||
return;
|
||||
}
|
||||
/* Unreachable because nvme_auth_hmac_init() validated hmac_id */
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_hmac_final);
|
||||
|
||||
static int nvme_auth_hmac(u8 hmac_id, const u8 *key, size_t key_len,
|
||||
const u8 *data, size_t data_len, u8 *out)
|
||||
{
|
||||
struct nvme_auth_hmac_ctx hmac;
|
||||
int ret;
|
||||
|
||||
ret = nvme_auth_hmac_init(&hmac, hmac_id, key, key_len);
|
||||
if (ret == 0) {
|
||||
nvme_auth_hmac_update(&hmac, data, data_len);
|
||||
nvme_auth_hmac_final(&hmac, out);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nvme_auth_hash(u8 hmac_id, const u8 *data, size_t data_len, u8 *out)
|
||||
{
|
||||
switch (hmac_id) {
|
||||
case NVME_AUTH_HASH_SHA256:
|
||||
sha256(data, data_len, out);
|
||||
return 0;
|
||||
case NVME_AUTH_HASH_SHA384:
|
||||
sha384(data, data_len, out);
|
||||
return 0;
|
||||
case NVME_AUTH_HASH_SHA512:
|
||||
sha512(data, data_len, out);
|
||||
return 0;
|
||||
}
|
||||
pr_warn("%s: invalid hash algorithm %d\n", __func__, hmac_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
struct nvme_dhchap_key *nvme_auth_transform_key(
|
||||
const struct nvme_dhchap_key *key, const char *nqn)
|
||||
{
|
||||
struct nvme_auth_hmac_ctx hmac;
|
||||
struct nvme_dhchap_key *transformed_key;
|
||||
int ret, key_len;
|
||||
|
||||
@@ -257,118 +335,33 @@ struct nvme_dhchap_key *nvme_auth_transform_key(
|
||||
return ERR_PTR(-ENOMEM);
|
||||
return transformed_key;
|
||||
}
|
||||
hmac_name = nvme_auth_hmac_name(key->hash);
|
||||
if (!hmac_name) {
|
||||
pr_warn("Invalid key hash id %d\n", key->hash);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
key_tfm = crypto_alloc_shash(hmac_name, 0, 0);
|
||||
if (IS_ERR(key_tfm))
|
||||
return ERR_CAST(key_tfm);
|
||||
|
||||
key_len = crypto_shash_digestsize(key_tfm);
|
||||
ret = nvme_auth_hmac_init(&hmac, key->hash, key->key, key->len);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
key_len = nvme_auth_hmac_hash_len(key->hash);
|
||||
transformed_key = nvme_auth_alloc_key(key_len, key->hash);
|
||||
if (!transformed_key) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_key;
|
||||
memzero_explicit(&hmac, sizeof(hmac));
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
shash->tfm = key_tfm;
|
||||
ret = crypto_shash_setkey(key_tfm, key->key, key->len);
|
||||
if (ret < 0)
|
||||
goto out_free_transformed_key;
|
||||
ret = crypto_shash_init(shash);
|
||||
if (ret < 0)
|
||||
goto out_free_transformed_key;
|
||||
ret = crypto_shash_update(shash, nqn, strlen(nqn));
|
||||
if (ret < 0)
|
||||
goto out_free_transformed_key;
|
||||
ret = crypto_shash_update(shash, "NVMe-over-Fabrics", 17);
|
||||
if (ret < 0)
|
||||
goto out_free_transformed_key;
|
||||
ret = crypto_shash_final(shash, transformed_key->key);
|
||||
if (ret < 0)
|
||||
goto out_free_transformed_key;
|
||||
|
||||
crypto_free_shash(key_tfm);
|
||||
|
||||
nvme_auth_hmac_update(&hmac, nqn, strlen(nqn));
|
||||
nvme_auth_hmac_update(&hmac, "NVMe-over-Fabrics", 17);
|
||||
nvme_auth_hmac_final(&hmac, transformed_key->key);
|
||||
return transformed_key;
|
||||
|
||||
out_free_transformed_key:
|
||||
nvme_auth_free_key(transformed_key);
|
||||
out_free_key:
|
||||
crypto_free_shash(key_tfm);
|
||||
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_transform_key);
|
||||
|
||||
static int nvme_auth_hash_skey(int hmac_id, u8 *skey, size_t skey_len, u8 *hkey)
|
||||
int nvme_auth_augmented_challenge(u8 hmac_id, const u8 *skey, size_t skey_len,
|
||||
const u8 *challenge, u8 *aug, size_t hlen)
|
||||
{
|
||||
const char *digest_name;
|
||||
struct crypto_shash *tfm;
|
||||
u8 hashed_key[NVME_AUTH_MAX_DIGEST_SIZE];
|
||||
int ret;
|
||||
|
||||
digest_name = nvme_auth_digest_name(hmac_id);
|
||||
if (!digest_name) {
|
||||
pr_debug("%s: failed to get digest for %d\n", __func__,
|
||||
hmac_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
tfm = crypto_alloc_shash(digest_name, 0, 0);
|
||||
if (IS_ERR(tfm))
|
||||
return -ENOMEM;
|
||||
|
||||
ret = crypto_shash_tfm_digest(tfm, skey, skey_len, hkey);
|
||||
if (ret < 0)
|
||||
pr_debug("%s: Failed to hash digest len %zu\n", __func__,
|
||||
skey_len);
|
||||
|
||||
crypto_free_shash(tfm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len,
|
||||
u8 *challenge, u8 *aug, size_t hlen)
|
||||
{
|
||||
struct crypto_shash *tfm;
|
||||
u8 *hashed_key;
|
||||
const char *hmac_name;
|
||||
int ret;
|
||||
|
||||
hashed_key = kmalloc(hlen, GFP_KERNEL);
|
||||
if (!hashed_key)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = nvme_auth_hash_skey(hmac_id, skey,
|
||||
skey_len, hashed_key);
|
||||
if (ret < 0)
|
||||
goto out_free_key;
|
||||
|
||||
hmac_name = nvme_auth_hmac_name(hmac_id);
|
||||
if (!hmac_name) {
|
||||
pr_warn("%s: invalid hash algorithm %d\n",
|
||||
__func__, hmac_id);
|
||||
ret = -EINVAL;
|
||||
goto out_free_key;
|
||||
}
|
||||
|
||||
tfm = crypto_alloc_shash(hmac_name, 0, 0);
|
||||
if (IS_ERR(tfm)) {
|
||||
ret = PTR_ERR(tfm);
|
||||
goto out_free_key;
|
||||
}
|
||||
|
||||
ret = crypto_shash_setkey(tfm, hashed_key, hlen);
|
||||
ret = nvme_auth_hash(hmac_id, skey, skey_len, hashed_key);
|
||||
if (ret)
|
||||
goto out_free_hash;
|
||||
|
||||
ret = crypto_shash_tfm_digest(tfm, challenge, hlen, aug);
|
||||
out_free_hash:
|
||||
crypto_free_shash(tfm);
|
||||
out_free_key:
|
||||
kfree_sensitive(hashed_key);
|
||||
return ret;
|
||||
ret = nvme_auth_hmac(hmac_id, hashed_key, hlen, challenge, hlen, aug);
|
||||
memzero_explicit(hashed_key, sizeof(hashed_key));
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_augmented_challenge);
|
||||
@@ -411,7 +404,7 @@ int nvme_auth_gen_pubkey(struct crypto_kpp *dh_tfm,
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_gen_pubkey);
|
||||
|
||||
int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm,
|
||||
u8 *ctrl_key, size_t ctrl_key_len,
|
||||
const u8 *ctrl_key, size_t ctrl_key_len,
|
||||
u8 *sess_key, size_t sess_key_len)
|
||||
{
|
||||
struct kpp_request *req;
|
||||
@@ -438,7 +431,7 @@ int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_gen_shared_secret);
|
||||
|
||||
int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key)
|
||||
int nvme_auth_parse_key(const char *secret, struct nvme_dhchap_key **ret_key)
|
||||
{
|
||||
struct nvme_dhchap_key *key;
|
||||
u8 key_hash;
|
||||
@@ -461,7 +454,7 @@ int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key)
|
||||
*ret_key = key;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_generate_key);
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_parse_key);
|
||||
|
||||
/**
|
||||
* nvme_auth_generate_psk - Generate a PSK for TLS
|
||||
@@ -486,66 +479,32 @@ EXPORT_SYMBOL_GPL(nvme_auth_generate_key);
|
||||
* Returns 0 on success with a valid generated PSK pointer in @ret_psk and
|
||||
* the length of @ret_psk in @ret_len, or a negative error number otherwise.
|
||||
*/
|
||||
int nvme_auth_generate_psk(u8 hmac_id, u8 *skey, size_t skey_len,
|
||||
u8 *c1, u8 *c2, size_t hash_len, u8 **ret_psk, size_t *ret_len)
|
||||
int nvme_auth_generate_psk(u8 hmac_id, const u8 *skey, size_t skey_len,
|
||||
const u8 *c1, const u8 *c2, size_t hash_len,
|
||||
u8 **ret_psk, size_t *ret_len)
|
||||
{
|
||||
struct crypto_shash *tfm;
|
||||
SHASH_DESC_ON_STACK(shash, tfm);
|
||||
size_t psk_len = nvme_auth_hmac_hash_len(hmac_id);
|
||||
struct nvme_auth_hmac_ctx hmac;
|
||||
u8 *psk;
|
||||
const char *hmac_name;
|
||||
int ret, psk_len;
|
||||
int ret;
|
||||
|
||||
if (!c1 || !c2)
|
||||
return -EINVAL;
|
||||
|
||||
hmac_name = nvme_auth_hmac_name(hmac_id);
|
||||
if (!hmac_name) {
|
||||
pr_warn("%s: invalid hash algorithm %d\n",
|
||||
__func__, hmac_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
tfm = crypto_alloc_shash(hmac_name, 0, 0);
|
||||
if (IS_ERR(tfm))
|
||||
return PTR_ERR(tfm);
|
||||
|
||||
psk_len = crypto_shash_digestsize(tfm);
|
||||
ret = nvme_auth_hmac_init(&hmac, hmac_id, skey, skey_len);
|
||||
if (ret)
|
||||
return ret;
|
||||
psk = kzalloc(psk_len, GFP_KERNEL);
|
||||
if (!psk) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_tfm;
|
||||
memzero_explicit(&hmac, sizeof(hmac));
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
shash->tfm = tfm;
|
||||
ret = crypto_shash_setkey(tfm, skey, skey_len);
|
||||
if (ret)
|
||||
goto out_free_psk;
|
||||
|
||||
ret = crypto_shash_init(shash);
|
||||
if (ret)
|
||||
goto out_free_psk;
|
||||
|
||||
ret = crypto_shash_update(shash, c1, hash_len);
|
||||
if (ret)
|
||||
goto out_free_psk;
|
||||
|
||||
ret = crypto_shash_update(shash, c2, hash_len);
|
||||
if (ret)
|
||||
goto out_free_psk;
|
||||
|
||||
ret = crypto_shash_final(shash, psk);
|
||||
if (!ret) {
|
||||
*ret_psk = psk;
|
||||
*ret_len = psk_len;
|
||||
}
|
||||
|
||||
out_free_psk:
|
||||
if (ret)
|
||||
kfree_sensitive(psk);
|
||||
out_free_tfm:
|
||||
crypto_free_shash(tfm);
|
||||
|
||||
return ret;
|
||||
nvme_auth_hmac_update(&hmac, c1, hash_len);
|
||||
nvme_auth_hmac_update(&hmac, c2, hash_len);
|
||||
nvme_auth_hmac_final(&hmac, psk);
|
||||
*ret_psk = psk;
|
||||
*ret_len = psk_len;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_generate_psk);
|
||||
|
||||
@@ -584,158 +543,70 @@ EXPORT_SYMBOL_GPL(nvme_auth_generate_psk);
|
||||
* Returns 0 on success with a valid digest pointer in @ret_digest, or a
|
||||
* negative error number on failure.
|
||||
*/
|
||||
int nvme_auth_generate_digest(u8 hmac_id, u8 *psk, size_t psk_len,
|
||||
char *subsysnqn, char *hostnqn, u8 **ret_digest)
|
||||
int nvme_auth_generate_digest(u8 hmac_id, const u8 *psk, size_t psk_len,
|
||||
const char *subsysnqn, const char *hostnqn,
|
||||
char **ret_digest)
|
||||
{
|
||||
struct crypto_shash *tfm;
|
||||
SHASH_DESC_ON_STACK(shash, tfm);
|
||||
u8 *digest, *enc;
|
||||
const char *hmac_name;
|
||||
size_t digest_len, hmac_len;
|
||||
struct nvme_auth_hmac_ctx hmac;
|
||||
u8 digest[NVME_AUTH_MAX_DIGEST_SIZE];
|
||||
size_t hash_len = nvme_auth_hmac_hash_len(hmac_id);
|
||||
char *enc;
|
||||
size_t enc_len;
|
||||
int ret;
|
||||
|
||||
if (WARN_ON(!subsysnqn || !hostnqn))
|
||||
return -EINVAL;
|
||||
|
||||
hmac_name = nvme_auth_hmac_name(hmac_id);
|
||||
if (!hmac_name) {
|
||||
if (hash_len == 0) {
|
||||
pr_warn("%s: invalid hash algorithm %d\n",
|
||||
__func__, hmac_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
switch (nvme_auth_hmac_hash_len(hmac_id)) {
|
||||
switch (hash_len) {
|
||||
case 32:
|
||||
hmac_len = 44;
|
||||
enc_len = 44;
|
||||
break;
|
||||
case 48:
|
||||
hmac_len = 64;
|
||||
enc_len = 64;
|
||||
break;
|
||||
default:
|
||||
pr_warn("%s: invalid hash algorithm '%s'\n",
|
||||
__func__, hmac_name);
|
||||
__func__, nvme_auth_hmac_name(hmac_id));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
enc = kzalloc(hmac_len + 1, GFP_KERNEL);
|
||||
if (!enc)
|
||||
return -ENOMEM;
|
||||
|
||||
tfm = crypto_alloc_shash(hmac_name, 0, 0);
|
||||
if (IS_ERR(tfm)) {
|
||||
ret = PTR_ERR(tfm);
|
||||
goto out_free_enc;
|
||||
}
|
||||
|
||||
digest_len = crypto_shash_digestsize(tfm);
|
||||
digest = kzalloc(digest_len, GFP_KERNEL);
|
||||
if (!digest) {
|
||||
enc = kzalloc(enc_len + 1, GFP_KERNEL);
|
||||
if (!enc) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_tfm;
|
||||
goto out;
|
||||
}
|
||||
|
||||
shash->tfm = tfm;
|
||||
ret = crypto_shash_setkey(tfm, psk, psk_len);
|
||||
ret = nvme_auth_hmac_init(&hmac, hmac_id, psk, psk_len);
|
||||
if (ret)
|
||||
goto out_free_digest;
|
||||
goto out;
|
||||
nvme_auth_hmac_update(&hmac, hostnqn, strlen(hostnqn));
|
||||
nvme_auth_hmac_update(&hmac, " ", 1);
|
||||
nvme_auth_hmac_update(&hmac, subsysnqn, strlen(subsysnqn));
|
||||
nvme_auth_hmac_update(&hmac, " NVMe-over-Fabrics", 18);
|
||||
nvme_auth_hmac_final(&hmac, digest);
|
||||
|
||||
ret = crypto_shash_init(shash);
|
||||
if (ret)
|
||||
goto out_free_digest;
|
||||
|
||||
ret = crypto_shash_update(shash, hostnqn, strlen(hostnqn));
|
||||
if (ret)
|
||||
goto out_free_digest;
|
||||
|
||||
ret = crypto_shash_update(shash, " ", 1);
|
||||
if (ret)
|
||||
goto out_free_digest;
|
||||
|
||||
ret = crypto_shash_update(shash, subsysnqn, strlen(subsysnqn));
|
||||
if (ret)
|
||||
goto out_free_digest;
|
||||
|
||||
ret = crypto_shash_update(shash, " NVMe-over-Fabrics", 18);
|
||||
if (ret)
|
||||
goto out_free_digest;
|
||||
|
||||
ret = crypto_shash_final(shash, digest);
|
||||
if (ret)
|
||||
goto out_free_digest;
|
||||
|
||||
ret = base64_encode(digest, digest_len, enc, true, BASE64_STD);
|
||||
if (ret < hmac_len) {
|
||||
ret = base64_encode(digest, hash_len, enc, true, BASE64_STD);
|
||||
if (ret < enc_len) {
|
||||
ret = -ENOKEY;
|
||||
goto out_free_digest;
|
||||
goto out;
|
||||
}
|
||||
*ret_digest = enc;
|
||||
ret = 0;
|
||||
|
||||
out_free_digest:
|
||||
kfree_sensitive(digest);
|
||||
out_free_tfm:
|
||||
crypto_free_shash(tfm);
|
||||
out_free_enc:
|
||||
out:
|
||||
if (ret)
|
||||
kfree_sensitive(enc);
|
||||
|
||||
memzero_explicit(digest, sizeof(digest));
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_generate_digest);
|
||||
|
||||
/**
|
||||
* hkdf_expand_label - HKDF-Expand-Label (RFC 8846 section 7.1)
|
||||
* @hmac_tfm: hash context keyed with pseudorandom key
|
||||
* @label: ASCII label without "tls13 " prefix
|
||||
* @labellen: length of @label
|
||||
* @context: context bytes
|
||||
* @contextlen: length of @context
|
||||
* @okm: output keying material
|
||||
* @okmlen: length of @okm
|
||||
*
|
||||
* Build the TLS 1.3 HkdfLabel structure and invoke hkdf_expand().
|
||||
*
|
||||
* Returns 0 on success with output keying material stored in @okm,
|
||||
* or a negative errno value otherwise.
|
||||
*/
|
||||
static int hkdf_expand_label(struct crypto_shash *hmac_tfm,
|
||||
const u8 *label, unsigned int labellen,
|
||||
const u8 *context, unsigned int contextlen,
|
||||
u8 *okm, unsigned int okmlen)
|
||||
{
|
||||
int err;
|
||||
u8 *info;
|
||||
unsigned int infolen;
|
||||
const char *tls13_prefix = "tls13 ";
|
||||
unsigned int prefixlen = strlen(tls13_prefix);
|
||||
|
||||
if (WARN_ON(labellen > (255 - prefixlen)))
|
||||
return -EINVAL;
|
||||
if (WARN_ON(contextlen > 255))
|
||||
return -EINVAL;
|
||||
|
||||
infolen = 2 + (1 + prefixlen + labellen) + (1 + contextlen);
|
||||
info = kzalloc(infolen, GFP_KERNEL);
|
||||
if (!info)
|
||||
return -ENOMEM;
|
||||
|
||||
/* HkdfLabel.Length */
|
||||
put_unaligned_be16(okmlen, info);
|
||||
|
||||
/* HkdfLabel.Label */
|
||||
info[2] = prefixlen + labellen;
|
||||
memcpy(info + 3, tls13_prefix, prefixlen);
|
||||
memcpy(info + 3 + prefixlen, label, labellen);
|
||||
|
||||
/* HkdfLabel.Context */
|
||||
info[3 + prefixlen + labellen] = contextlen;
|
||||
memcpy(info + 4 + prefixlen + labellen, context, contextlen);
|
||||
|
||||
err = hkdf_expand(hmac_tfm, info, infolen, okm, okmlen);
|
||||
kfree_sensitive(info);
|
||||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
* nvme_auth_derive_tls_psk - Derive TLS PSK
|
||||
* @hmac_id: Hash function identifier
|
||||
@@ -763,82 +634,92 @@ static int hkdf_expand_label(struct crypto_shash *hmac_tfm,
|
||||
* Returns 0 on success with a valid psk pointer in @ret_psk or a negative
|
||||
* error number otherwise.
|
||||
*/
|
||||
int nvme_auth_derive_tls_psk(int hmac_id, u8 *psk, size_t psk_len,
|
||||
u8 *psk_digest, u8 **ret_psk)
|
||||
int nvme_auth_derive_tls_psk(int hmac_id, const u8 *psk, size_t psk_len,
|
||||
const char *psk_digest, u8 **ret_psk)
|
||||
{
|
||||
struct crypto_shash *hmac_tfm;
|
||||
const char *hmac_name;
|
||||
const char *label = "nvme-tls-psk";
|
||||
static const char default_salt[HKDF_MAX_HASHLEN];
|
||||
size_t prk_len;
|
||||
const char *ctx;
|
||||
unsigned char *prk, *tls_key;
|
||||
static const u8 default_salt[NVME_AUTH_MAX_DIGEST_SIZE];
|
||||
static const char label[] = "tls13 nvme-tls-psk";
|
||||
const size_t label_len = sizeof(label) - 1;
|
||||
u8 prk[NVME_AUTH_MAX_DIGEST_SIZE];
|
||||
size_t hash_len, ctx_len;
|
||||
u8 *hmac_data = NULL, *tls_key;
|
||||
size_t i;
|
||||
int ret;
|
||||
|
||||
hmac_name = nvme_auth_hmac_name(hmac_id);
|
||||
if (!hmac_name) {
|
||||
hash_len = nvme_auth_hmac_hash_len(hmac_id);
|
||||
if (hash_len == 0) {
|
||||
pr_warn("%s: invalid hash algorithm %d\n",
|
||||
__func__, hmac_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (hmac_id == NVME_AUTH_HASH_SHA512) {
|
||||
pr_warn("%s: unsupported hash algorithm %s\n",
|
||||
__func__, hmac_name);
|
||||
__func__, nvme_auth_hmac_name(hmac_id));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
hmac_tfm = crypto_alloc_shash(hmac_name, 0, 0);
|
||||
if (IS_ERR(hmac_tfm))
|
||||
return PTR_ERR(hmac_tfm);
|
||||
|
||||
prk_len = crypto_shash_digestsize(hmac_tfm);
|
||||
prk = kzalloc(prk_len, GFP_KERNEL);
|
||||
if (!prk) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_shash;
|
||||
if (psk_len != hash_len) {
|
||||
pr_warn("%s: unexpected psk_len %zu\n", __func__, psk_len);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (WARN_ON(prk_len > HKDF_MAX_HASHLEN)) {
|
||||
/* HKDF-Extract */
|
||||
ret = nvme_auth_hmac(hmac_id, default_salt, hash_len, psk, psk_len,
|
||||
prk);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* HKDF-Expand-Label (RFC 8446 section 7.1), with output length equal to
|
||||
* the hash length (so only a single HMAC operation is needed)
|
||||
*/
|
||||
|
||||
hmac_data = kmalloc(/* output length */ 2 +
|
||||
/* label */ 1 + label_len +
|
||||
/* context (max) */ 1 + 3 + 1 + strlen(psk_digest) +
|
||||
/* counter */ 1,
|
||||
GFP_KERNEL);
|
||||
if (!hmac_data) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
/* output length */
|
||||
i = 0;
|
||||
hmac_data[i++] = hash_len >> 8;
|
||||
hmac_data[i++] = hash_len;
|
||||
|
||||
/* label */
|
||||
static_assert(label_len <= 255);
|
||||
hmac_data[i] = label_len;
|
||||
memcpy(&hmac_data[i + 1], label, label_len);
|
||||
i += 1 + label_len;
|
||||
|
||||
/* context */
|
||||
ctx_len = sprintf(&hmac_data[i + 1], "%02d %s", hmac_id, psk_digest);
|
||||
if (ctx_len > 255) {
|
||||
ret = -EINVAL;
|
||||
goto out_free_prk;
|
||||
goto out;
|
||||
}
|
||||
ret = hkdf_extract(hmac_tfm, psk, psk_len,
|
||||
default_salt, prk_len, prk);
|
||||
if (ret)
|
||||
goto out_free_prk;
|
||||
hmac_data[i] = ctx_len;
|
||||
i += 1 + ctx_len;
|
||||
|
||||
ret = crypto_shash_setkey(hmac_tfm, prk, prk_len);
|
||||
if (ret)
|
||||
goto out_free_prk;
|
||||
|
||||
ctx = kasprintf(GFP_KERNEL, "%02d %s", hmac_id, psk_digest);
|
||||
if (!ctx) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_prk;
|
||||
}
|
||||
/* counter (this overwrites the NUL terminator written by sprintf) */
|
||||
hmac_data[i++] = 1;
|
||||
|
||||
tls_key = kzalloc(psk_len, GFP_KERNEL);
|
||||
if (!tls_key) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_ctx;
|
||||
goto out;
|
||||
}
|
||||
ret = hkdf_expand_label(hmac_tfm,
|
||||
label, strlen(label),
|
||||
ctx, strlen(ctx),
|
||||
tls_key, psk_len);
|
||||
ret = nvme_auth_hmac(hmac_id, prk, hash_len, hmac_data, i, tls_key);
|
||||
if (ret) {
|
||||
kfree(tls_key);
|
||||
goto out_free_ctx;
|
||||
kfree_sensitive(tls_key);
|
||||
goto out;
|
||||
}
|
||||
*ret_psk = tls_key;
|
||||
|
||||
out_free_ctx:
|
||||
kfree(ctx);
|
||||
out_free_prk:
|
||||
kfree(prk);
|
||||
out_free_shash:
|
||||
crypto_free_shash(hmac_tfm);
|
||||
|
||||
out:
|
||||
kfree_sensitive(hmac_data);
|
||||
memzero_explicit(prk, sizeof(prk));
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_auth_derive_tls_psk);
|
||||
|
||||
175
drivers/nvme/common/tests/auth_kunit.c
Normal file
175
drivers/nvme/common/tests/auth_kunit.c
Normal file
@@ -0,0 +1,175 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Unit tests for NVMe authentication functions
|
||||
*
|
||||
* Copyright 2026 Google LLC
|
||||
*/
|
||||
|
||||
#include <crypto/sha2.h>
|
||||
#include <kunit/test.h>
|
||||
#include <linux/nvme.h>
|
||||
#include <linux/nvme-auth.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
struct nvme_auth_test_values {
|
||||
u8 hmac_id;
|
||||
size_t hash_len;
|
||||
u8 expected_psk[NVME_AUTH_MAX_DIGEST_SIZE];
|
||||
char *expected_psk_digest;
|
||||
u8 expected_tls_psk[NVME_AUTH_MAX_DIGEST_SIZE];
|
||||
};
|
||||
|
||||
static void kfree_action(void *ptr)
|
||||
{
|
||||
kfree(ptr);
|
||||
}
|
||||
|
||||
static void kunit_add_kfree_action(struct kunit *test, void *ptr)
|
||||
{
|
||||
KUNIT_ASSERT_EQ(test, 0,
|
||||
kunit_add_action_or_reset(test, kfree_action, ptr));
|
||||
}
|
||||
|
||||
/*
|
||||
* Test the derivation of a TLS PSK from the initial skey. The vals parameter
|
||||
* gives the expected value of tls_psk as well as the intermediate values psk
|
||||
* and psk_digest. The inputs are implicitly the fixed values set below.
|
||||
*/
|
||||
static void
|
||||
test_nvme_auth_derive_tls_psk(struct kunit *test,
|
||||
const struct nvme_auth_test_values *vals)
|
||||
{
|
||||
const u8 hmac_id = vals->hmac_id;
|
||||
const size_t hash_len = vals->hash_len;
|
||||
const size_t skey_len = hash_len;
|
||||
u8 skey[NVME_AUTH_MAX_DIGEST_SIZE];
|
||||
u8 c1[NVME_AUTH_MAX_DIGEST_SIZE];
|
||||
u8 c2[NVME_AUTH_MAX_DIGEST_SIZE];
|
||||
const char *subsysnqn = "subsysnqn";
|
||||
const char *hostnqn = "hostnqn";
|
||||
u8 *psk = NULL, *tls_psk = NULL;
|
||||
char *psk_digest = NULL;
|
||||
size_t psk_len;
|
||||
int ret;
|
||||
|
||||
for (int i = 0; i < NVME_AUTH_MAX_DIGEST_SIZE; i++) {
|
||||
skey[i] = 'A' + i;
|
||||
c1[i] = i;
|
||||
c2[i] = 0xff - i;
|
||||
}
|
||||
|
||||
ret = nvme_auth_generate_psk(hmac_id, skey, skey_len, c1, c2, hash_len,
|
||||
&psk, &psk_len);
|
||||
kunit_add_kfree_action(test, psk);
|
||||
KUNIT_ASSERT_EQ(test, 0, ret);
|
||||
KUNIT_ASSERT_EQ(test, hash_len, psk_len);
|
||||
KUNIT_ASSERT_MEMEQ(test, vals->expected_psk, psk, psk_len);
|
||||
|
||||
ret = nvme_auth_generate_digest(hmac_id, psk, psk_len, subsysnqn,
|
||||
hostnqn, &psk_digest);
|
||||
kunit_add_kfree_action(test, psk_digest);
|
||||
if (vals->expected_psk_digest == NULL) {
|
||||
/*
|
||||
* Algorithm has an ID assigned but is not supported by
|
||||
* nvme_auth_generate_digest().
|
||||
*/
|
||||
KUNIT_ASSERT_EQ(test, -EINVAL, ret);
|
||||
return;
|
||||
}
|
||||
KUNIT_ASSERT_EQ(test, 0, ret);
|
||||
KUNIT_ASSERT_STREQ(test, vals->expected_psk_digest, psk_digest);
|
||||
|
||||
ret = nvme_auth_derive_tls_psk(hmac_id, psk, psk_len, psk_digest,
|
||||
&tls_psk);
|
||||
kunit_add_kfree_action(test, tls_psk);
|
||||
KUNIT_ASSERT_EQ(test, 0, ret);
|
||||
KUNIT_ASSERT_MEMEQ(test, vals->expected_tls_psk, tls_psk, psk_len);
|
||||
}
|
||||
|
||||
static void test_nvme_auth_derive_tls_psk_hmac_sha256(struct kunit *test)
|
||||
{
|
||||
static const struct nvme_auth_test_values vals = {
|
||||
.hmac_id = NVME_AUTH_HASH_SHA256,
|
||||
.hash_len = SHA256_DIGEST_SIZE,
|
||||
.expected_psk = {
|
||||
0x17, 0x33, 0xc5, 0x9f, 0xa7, 0xf4, 0x8f, 0xcf,
|
||||
0x37, 0xf5, 0xf2, 0x6f, 0xc4, 0xff, 0x02, 0x68,
|
||||
0xad, 0x4f, 0x78, 0xe0, 0x30, 0xf4, 0xf3, 0xb0,
|
||||
0xbf, 0xd1, 0xd4, 0x7e, 0x7b, 0xb1, 0x44, 0x7a,
|
||||
},
|
||||
.expected_psk_digest = "OldoKuTfKddMuyCznAZojkWD7P4D9/AtzDzLimtOxqI=",
|
||||
.expected_tls_psk = {
|
||||
0x3c, 0x17, 0xda, 0x62, 0x84, 0x74, 0xa0, 0x4d,
|
||||
0x22, 0x47, 0xc4, 0xca, 0xb4, 0x79, 0x68, 0xc9,
|
||||
0x15, 0x38, 0x81, 0x93, 0xf7, 0xc0, 0x71, 0xbd,
|
||||
0x94, 0x89, 0xcc, 0x36, 0x66, 0xcd, 0x7c, 0xc8,
|
||||
},
|
||||
};
|
||||
|
||||
test_nvme_auth_derive_tls_psk(test, &vals);
|
||||
}
|
||||
|
||||
static void test_nvme_auth_derive_tls_psk_hmac_sha384(struct kunit *test)
|
||||
{
|
||||
static const struct nvme_auth_test_values vals = {
|
||||
.hmac_id = NVME_AUTH_HASH_SHA384,
|
||||
.hash_len = SHA384_DIGEST_SIZE,
|
||||
.expected_psk = {
|
||||
0xf1, 0x4b, 0x2d, 0xd3, 0x23, 0x4c, 0x45, 0x96,
|
||||
0x94, 0xd3, 0xbc, 0x63, 0xf8, 0x96, 0x8b, 0xd6,
|
||||
0xb3, 0x7c, 0x2c, 0x6d, 0xe8, 0x49, 0xe2, 0x2e,
|
||||
0x11, 0x87, 0x49, 0x00, 0x1c, 0xe4, 0xbb, 0xe8,
|
||||
0x64, 0x0b, 0x9e, 0x3a, 0x74, 0x8c, 0xb1, 0x1c,
|
||||
0xe4, 0xb1, 0xd7, 0x1d, 0x35, 0x9c, 0xce, 0x39,
|
||||
},
|
||||
.expected_psk_digest = "cffMWk8TSS7HOQebjgYEIkrPrjWPV4JE5cdPB8WhEvY4JBW5YynKyv66XscN4A9n",
|
||||
.expected_tls_psk = {
|
||||
0x27, 0x74, 0x75, 0x32, 0x33, 0x53, 0x7b, 0x3f,
|
||||
0xa5, 0x0e, 0xb7, 0xd1, 0x6a, 0x8e, 0x43, 0x45,
|
||||
0x7d, 0x85, 0xf4, 0x90, 0x6c, 0x00, 0x5b, 0x22,
|
||||
0x36, 0x61, 0x6c, 0x5d, 0x80, 0x93, 0x9d, 0x08,
|
||||
0x98, 0xff, 0xf1, 0x5b, 0xb8, 0xb7, 0x71, 0x19,
|
||||
0xd2, 0xbe, 0x0a, 0xac, 0x42, 0x3e, 0x75, 0x90,
|
||||
},
|
||||
};
|
||||
|
||||
test_nvme_auth_derive_tls_psk(test, &vals);
|
||||
}
|
||||
|
||||
static void test_nvme_auth_derive_tls_psk_hmac_sha512(struct kunit *test)
|
||||
{
|
||||
static const struct nvme_auth_test_values vals = {
|
||||
.hmac_id = NVME_AUTH_HASH_SHA512,
|
||||
.hash_len = SHA512_DIGEST_SIZE,
|
||||
.expected_psk = {
|
||||
0x9c, 0x9f, 0x08, 0x9a, 0x61, 0x8b, 0x47, 0xd2,
|
||||
0xd7, 0x5f, 0x4b, 0x6c, 0x28, 0x07, 0x04, 0x24,
|
||||
0x48, 0x7b, 0x44, 0x5d, 0xd9, 0x6e, 0x70, 0xc4,
|
||||
0xc0, 0x9b, 0x55, 0xe8, 0xb6, 0x00, 0x01, 0x52,
|
||||
0xa3, 0x36, 0x3c, 0x34, 0x54, 0x04, 0x3f, 0x38,
|
||||
0xf0, 0xb8, 0x50, 0x36, 0xde, 0xd4, 0x06, 0x55,
|
||||
0x35, 0x0a, 0xa8, 0x7b, 0x8b, 0x6a, 0x28, 0x2b,
|
||||
0x5c, 0x1a, 0xca, 0xe1, 0x62, 0x33, 0xdd, 0x5b,
|
||||
},
|
||||
/* nvme_auth_generate_digest() doesn't support SHA-512 yet. */
|
||||
.expected_psk_digest = NULL,
|
||||
};
|
||||
|
||||
test_nvme_auth_derive_tls_psk(test, &vals);
|
||||
}
|
||||
|
||||
static struct kunit_case nvme_auth_test_cases[] = {
|
||||
KUNIT_CASE(test_nvme_auth_derive_tls_psk_hmac_sha256),
|
||||
KUNIT_CASE(test_nvme_auth_derive_tls_psk_hmac_sha384),
|
||||
KUNIT_CASE(test_nvme_auth_derive_tls_psk_hmac_sha512),
|
||||
{},
|
||||
};
|
||||
|
||||
static struct kunit_suite nvme_auth_test_suite = {
|
||||
.name = "nvme-auth",
|
||||
.test_cases = nvme_auth_test_cases,
|
||||
};
|
||||
kunit_test_suite(nvme_auth_test_suite);
|
||||
|
||||
MODULE_DESCRIPTION("Unit tests for NVMe authentication functions");
|
||||
MODULE_LICENSE("GPL");
|
||||
@@ -7,7 +7,6 @@
|
||||
#include <linux/base64.h>
|
||||
#include <linux/prandom.h>
|
||||
#include <linux/unaligned.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/dh.h>
|
||||
#include "nvme.h"
|
||||
#include "fabrics.h"
|
||||
@@ -22,7 +21,6 @@ struct nvme_dhchap_queue_context {
|
||||
struct list_head entry;
|
||||
struct work_struct auth_work;
|
||||
struct nvme_ctrl *ctrl;
|
||||
struct crypto_shash *shash_tfm;
|
||||
struct crypto_kpp *dh_tfm;
|
||||
struct nvme_dhchap_key *transformed_key;
|
||||
void *buf;
|
||||
@@ -38,9 +36,9 @@ struct nvme_dhchap_queue_context {
|
||||
u8 hash_id;
|
||||
u8 sc_c;
|
||||
size_t hash_len;
|
||||
u8 c1[64];
|
||||
u8 c2[64];
|
||||
u8 response[64];
|
||||
u8 c1[NVME_AUTH_MAX_DIGEST_SIZE];
|
||||
u8 c2[NVME_AUTH_MAX_DIGEST_SIZE];
|
||||
u8 response[NVME_AUTH_MAX_DIGEST_SIZE];
|
||||
u8 *ctrl_key;
|
||||
u8 *host_key;
|
||||
u8 *sess_key;
|
||||
@@ -125,6 +123,8 @@ static int nvme_auth_set_dhchap_negotiate_data(struct nvme_ctrl *ctrl,
|
||||
{
|
||||
struct nvmf_auth_dhchap_negotiate_data *data = chap->buf;
|
||||
size_t size = sizeof(*data) + sizeof(union nvmf_auth_protocol);
|
||||
u8 dh_list_offset = NVME_AUTH_DHCHAP_MAX_DH_IDS;
|
||||
u8 *idlist = data->auth_protocol[0].dhchap.idlist;
|
||||
|
||||
if (size > CHAP_BUF_SIZE) {
|
||||
chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
|
||||
@@ -141,21 +141,22 @@ static int nvme_auth_set_dhchap_negotiate_data(struct nvme_ctrl *ctrl,
|
||||
data->sc_c = NVME_AUTH_SECP_NEWTLSPSK;
|
||||
} else
|
||||
data->sc_c = NVME_AUTH_SECP_NOSC;
|
||||
chap->sc_c = data->sc_c;
|
||||
data->napd = 1;
|
||||
data->auth_protocol[0].dhchap.authid = NVME_AUTH_DHCHAP_AUTH_ID;
|
||||
data->auth_protocol[0].dhchap.halen = 3;
|
||||
data->auth_protocol[0].dhchap.dhlen = 6;
|
||||
data->auth_protocol[0].dhchap.idlist[0] = NVME_AUTH_HASH_SHA256;
|
||||
data->auth_protocol[0].dhchap.idlist[1] = NVME_AUTH_HASH_SHA384;
|
||||
data->auth_protocol[0].dhchap.idlist[2] = NVME_AUTH_HASH_SHA512;
|
||||
data->auth_protocol[0].dhchap.idlist[30] = NVME_AUTH_DHGROUP_NULL;
|
||||
data->auth_protocol[0].dhchap.idlist[31] = NVME_AUTH_DHGROUP_2048;
|
||||
data->auth_protocol[0].dhchap.idlist[32] = NVME_AUTH_DHGROUP_3072;
|
||||
data->auth_protocol[0].dhchap.idlist[33] = NVME_AUTH_DHGROUP_4096;
|
||||
data->auth_protocol[0].dhchap.idlist[34] = NVME_AUTH_DHGROUP_6144;
|
||||
data->auth_protocol[0].dhchap.idlist[35] = NVME_AUTH_DHGROUP_8192;
|
||||
|
||||
chap->sc_c = data->sc_c;
|
||||
idlist[0] = NVME_AUTH_HASH_SHA256;
|
||||
idlist[1] = NVME_AUTH_HASH_SHA384;
|
||||
idlist[2] = NVME_AUTH_HASH_SHA512;
|
||||
if (chap->sc_c == NVME_AUTH_SECP_NOSC)
|
||||
idlist[dh_list_offset++] = NVME_AUTH_DHGROUP_NULL;
|
||||
idlist[dh_list_offset++] = NVME_AUTH_DHGROUP_2048;
|
||||
idlist[dh_list_offset++] = NVME_AUTH_DHGROUP_3072;
|
||||
idlist[dh_list_offset++] = NVME_AUTH_DHGROUP_4096;
|
||||
idlist[dh_list_offset++] = NVME_AUTH_DHGROUP_6144;
|
||||
idlist[dh_list_offset++] = NVME_AUTH_DHGROUP_8192;
|
||||
data->auth_protocol[0].dhchap.dhlen =
|
||||
dh_list_offset - NVME_AUTH_DHCHAP_MAX_DH_IDS;
|
||||
|
||||
return size;
|
||||
}
|
||||
@@ -183,38 +184,17 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl,
|
||||
return -EPROTO;
|
||||
}
|
||||
|
||||
if (chap->hash_id == data->hashid && chap->shash_tfm &&
|
||||
!strcmp(crypto_shash_alg_name(chap->shash_tfm), hmac_name) &&
|
||||
crypto_shash_digestsize(chap->shash_tfm) == data->hl) {
|
||||
if (chap->hash_id == data->hashid && chap->hash_len == data->hl) {
|
||||
dev_dbg(ctrl->device,
|
||||
"qid %d: reuse existing hash %s\n",
|
||||
chap->qid, hmac_name);
|
||||
goto select_kpp;
|
||||
}
|
||||
|
||||
/* Reset if hash cannot be reused */
|
||||
if (chap->shash_tfm) {
|
||||
crypto_free_shash(chap->shash_tfm);
|
||||
chap->hash_id = 0;
|
||||
chap->hash_len = 0;
|
||||
}
|
||||
chap->shash_tfm = crypto_alloc_shash(hmac_name, 0,
|
||||
CRYPTO_ALG_ALLOCATES_MEMORY);
|
||||
if (IS_ERR(chap->shash_tfm)) {
|
||||
dev_warn(ctrl->device,
|
||||
"qid %d: failed to allocate hash %s, error %ld\n",
|
||||
chap->qid, hmac_name, PTR_ERR(chap->shash_tfm));
|
||||
chap->shash_tfm = NULL;
|
||||
chap->status = NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (crypto_shash_digestsize(chap->shash_tfm) != data->hl) {
|
||||
if (nvme_auth_hmac_hash_len(data->hashid) != data->hl) {
|
||||
dev_warn(ctrl->device,
|
||||
"qid %d: invalid hash length %d\n",
|
||||
chap->qid, data->hl);
|
||||
crypto_free_shash(chap->shash_tfm);
|
||||
chap->shash_tfm = NULL;
|
||||
chap->status = NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE;
|
||||
return -EPROTO;
|
||||
}
|
||||
@@ -434,7 +414,7 @@ static int nvme_auth_set_dhchap_failure2_data(struct nvme_ctrl *ctrl,
|
||||
static int nvme_auth_dhchap_setup_host_response(struct nvme_ctrl *ctrl,
|
||||
struct nvme_dhchap_queue_context *chap)
|
||||
{
|
||||
SHASH_DESC_ON_STACK(shash, chap->shash_tfm);
|
||||
struct nvme_auth_hmac_ctx hmac;
|
||||
u8 buf[4], *challenge = chap->c1;
|
||||
int ret;
|
||||
|
||||
@@ -454,13 +434,11 @@ static int nvme_auth_dhchap_setup_host_response(struct nvme_ctrl *ctrl,
|
||||
__func__, chap->qid);
|
||||
}
|
||||
|
||||
ret = crypto_shash_setkey(chap->shash_tfm,
|
||||
chap->transformed_key->key, chap->transformed_key->len);
|
||||
if (ret) {
|
||||
dev_warn(ctrl->device, "qid %d: failed to set key, error %d\n",
|
||||
chap->qid, ret);
|
||||
ret = nvme_auth_hmac_init(&hmac, chap->hash_id,
|
||||
chap->transformed_key->key,
|
||||
chap->transformed_key->len);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (chap->dh_tfm) {
|
||||
challenge = kmalloc(chap->hash_len, GFP_KERNEL);
|
||||
@@ -477,51 +455,36 @@ static int nvme_auth_dhchap_setup_host_response(struct nvme_ctrl *ctrl,
|
||||
goto out;
|
||||
}
|
||||
|
||||
shash->tfm = chap->shash_tfm;
|
||||
ret = crypto_shash_init(shash);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, challenge, chap->hash_len);
|
||||
if (ret)
|
||||
goto out;
|
||||
nvme_auth_hmac_update(&hmac, challenge, chap->hash_len);
|
||||
|
||||
put_unaligned_le32(chap->s1, buf);
|
||||
ret = crypto_shash_update(shash, buf, 4);
|
||||
if (ret)
|
||||
goto out;
|
||||
nvme_auth_hmac_update(&hmac, buf, 4);
|
||||
|
||||
put_unaligned_le16(chap->transaction, buf);
|
||||
ret = crypto_shash_update(shash, buf, 2);
|
||||
if (ret)
|
||||
goto out;
|
||||
nvme_auth_hmac_update(&hmac, buf, 2);
|
||||
|
||||
*buf = chap->sc_c;
|
||||
ret = crypto_shash_update(shash, buf, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, "HostHost", 8);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, ctrl->opts->host->nqn,
|
||||
strlen(ctrl->opts->host->nqn));
|
||||
if (ret)
|
||||
goto out;
|
||||
nvme_auth_hmac_update(&hmac, buf, 1);
|
||||
nvme_auth_hmac_update(&hmac, "HostHost", 8);
|
||||
nvme_auth_hmac_update(&hmac, ctrl->opts->host->nqn,
|
||||
strlen(ctrl->opts->host->nqn));
|
||||
memset(buf, 0, sizeof(buf));
|
||||
ret = crypto_shash_update(shash, buf, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, ctrl->opts->subsysnqn,
|
||||
strlen(ctrl->opts->subsysnqn));
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_final(shash, chap->response);
|
||||
nvme_auth_hmac_update(&hmac, buf, 1);
|
||||
nvme_auth_hmac_update(&hmac, ctrl->opts->subsysnqn,
|
||||
strlen(ctrl->opts->subsysnqn));
|
||||
nvme_auth_hmac_final(&hmac, chap->response);
|
||||
ret = 0;
|
||||
out:
|
||||
if (challenge != chap->c1)
|
||||
kfree(challenge);
|
||||
memzero_explicit(&hmac, sizeof(hmac));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nvme_auth_dhchap_setup_ctrl_response(struct nvme_ctrl *ctrl,
|
||||
struct nvme_dhchap_queue_context *chap)
|
||||
{
|
||||
SHASH_DESC_ON_STACK(shash, chap->shash_tfm);
|
||||
struct nvme_auth_hmac_ctx hmac;
|
||||
struct nvme_dhchap_key *transformed_key;
|
||||
u8 buf[4], *challenge = chap->c2;
|
||||
int ret;
|
||||
@@ -533,10 +496,10 @@ static int nvme_auth_dhchap_setup_ctrl_response(struct nvme_ctrl *ctrl,
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = crypto_shash_setkey(chap->shash_tfm,
|
||||
transformed_key->key, transformed_key->len);
|
||||
ret = nvme_auth_hmac_init(&hmac, chap->hash_id, transformed_key->key,
|
||||
transformed_key->len);
|
||||
if (ret) {
|
||||
dev_warn(ctrl->device, "qid %d: failed to set key, error %d\n",
|
||||
dev_warn(ctrl->device, "qid %d: failed to init hmac, error %d\n",
|
||||
chap->qid, ret);
|
||||
goto out;
|
||||
}
|
||||
@@ -563,43 +526,29 @@ static int nvme_auth_dhchap_setup_ctrl_response(struct nvme_ctrl *ctrl,
|
||||
__func__, chap->qid, ctrl->opts->subsysnqn);
|
||||
dev_dbg(ctrl->device, "%s: qid %d hostnqn %s\n",
|
||||
__func__, chap->qid, ctrl->opts->host->nqn);
|
||||
shash->tfm = chap->shash_tfm;
|
||||
ret = crypto_shash_init(shash);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, challenge, chap->hash_len);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
nvme_auth_hmac_update(&hmac, challenge, chap->hash_len);
|
||||
|
||||
put_unaligned_le32(chap->s2, buf);
|
||||
ret = crypto_shash_update(shash, buf, 4);
|
||||
if (ret)
|
||||
goto out;
|
||||
nvme_auth_hmac_update(&hmac, buf, 4);
|
||||
|
||||
put_unaligned_le16(chap->transaction, buf);
|
||||
ret = crypto_shash_update(shash, buf, 2);
|
||||
if (ret)
|
||||
goto out;
|
||||
nvme_auth_hmac_update(&hmac, buf, 2);
|
||||
|
||||
memset(buf, 0, 4);
|
||||
ret = crypto_shash_update(shash, buf, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, "Controller", 10);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, ctrl->opts->subsysnqn,
|
||||
strlen(ctrl->opts->subsysnqn));
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, buf, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, ctrl->opts->host->nqn,
|
||||
strlen(ctrl->opts->host->nqn));
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_final(shash, chap->response);
|
||||
nvme_auth_hmac_update(&hmac, buf, 1);
|
||||
nvme_auth_hmac_update(&hmac, "Controller", 10);
|
||||
nvme_auth_hmac_update(&hmac, ctrl->opts->subsysnqn,
|
||||
strlen(ctrl->opts->subsysnqn));
|
||||
nvme_auth_hmac_update(&hmac, buf, 1);
|
||||
nvme_auth_hmac_update(&hmac, ctrl->opts->host->nqn,
|
||||
strlen(ctrl->opts->host->nqn));
|
||||
nvme_auth_hmac_final(&hmac, chap->response);
|
||||
ret = 0;
|
||||
out:
|
||||
if (challenge != chap->c2)
|
||||
kfree(challenge);
|
||||
memzero_explicit(&hmac, sizeof(hmac));
|
||||
nvme_auth_free_key(transformed_key);
|
||||
return ret;
|
||||
}
|
||||
@@ -689,8 +638,6 @@ static void nvme_auth_free_dhchap(struct nvme_dhchap_queue_context *chap)
|
||||
{
|
||||
nvme_auth_reset_dhchap(chap);
|
||||
chap->authenticated = false;
|
||||
if (chap->shash_tfm)
|
||||
crypto_free_shash(chap->shash_tfm);
|
||||
if (chap->dh_tfm)
|
||||
crypto_free_kpp(chap->dh_tfm);
|
||||
}
|
||||
@@ -708,7 +655,8 @@ EXPORT_SYMBOL_GPL(nvme_auth_revoke_tls_key);
|
||||
static int nvme_auth_secure_concat(struct nvme_ctrl *ctrl,
|
||||
struct nvme_dhchap_queue_context *chap)
|
||||
{
|
||||
u8 *psk, *digest, *tls_psk;
|
||||
u8 *psk, *tls_psk;
|
||||
char *digest;
|
||||
struct key *tls_key;
|
||||
size_t psk_len;
|
||||
int ret = 0;
|
||||
@@ -1071,12 +1019,11 @@ int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl)
|
||||
INIT_WORK(&ctrl->dhchap_auth_work, nvme_ctrl_auth_work);
|
||||
if (!ctrl->opts)
|
||||
return 0;
|
||||
ret = nvme_auth_generate_key(ctrl->opts->dhchap_secret,
|
||||
&ctrl->host_key);
|
||||
ret = nvme_auth_parse_key(ctrl->opts->dhchap_secret, &ctrl->host_key);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = nvme_auth_generate_key(ctrl->opts->dhchap_ctrl_secret,
|
||||
&ctrl->ctrl_key);
|
||||
ret = nvme_auth_parse_key(ctrl->opts->dhchap_ctrl_secret,
|
||||
&ctrl->ctrl_key);
|
||||
if (ret)
|
||||
goto err_free_dhchap_secret;
|
||||
|
||||
|
||||
@@ -1875,6 +1875,7 @@ static bool nvme_init_integrity(struct nvme_ns_head *head,
|
||||
break;
|
||||
}
|
||||
|
||||
bi->flags |= BLK_SPLIT_INTERVAL_CAPABLE;
|
||||
bi->metadata_size = head->ms;
|
||||
if (bi->csum_type) {
|
||||
bi->pi_tuple_size = head->pi_size;
|
||||
@@ -1883,26 +1884,6 @@ static bool nvme_init_integrity(struct nvme_ns_head *head,
|
||||
return true;
|
||||
}
|
||||
|
||||
static void nvme_config_discard(struct nvme_ns *ns, struct queue_limits *lim)
|
||||
{
|
||||
struct nvme_ctrl *ctrl = ns->ctrl;
|
||||
|
||||
if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns->head, UINT_MAX))
|
||||
lim->max_hw_discard_sectors =
|
||||
nvme_lba_to_sect(ns->head, ctrl->dmrsl);
|
||||
else if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
|
||||
lim->max_hw_discard_sectors = UINT_MAX;
|
||||
else
|
||||
lim->max_hw_discard_sectors = 0;
|
||||
|
||||
lim->discard_granularity = lim->logical_block_size;
|
||||
|
||||
if (ctrl->dmrl)
|
||||
lim->max_discard_segments = ctrl->dmrl;
|
||||
else
|
||||
lim->max_discard_segments = NVME_DSM_MAX_RANGES;
|
||||
}
|
||||
|
||||
static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
|
||||
{
|
||||
return uuid_equal(&a->uuid, &b->uuid) &&
|
||||
@@ -2078,12 +2059,15 @@ static void nvme_set_ctrl_limits(struct nvme_ctrl *ctrl,
|
||||
}
|
||||
|
||||
static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
|
||||
struct queue_limits *lim)
|
||||
struct nvme_id_ns_nvm *nvm, struct queue_limits *lim)
|
||||
{
|
||||
struct nvme_ns_head *head = ns->head;
|
||||
struct nvme_ctrl *ctrl = ns->ctrl;
|
||||
u32 bs = 1U << head->lba_shift;
|
||||
u32 atomic_bs, phys_bs, io_opt = 0;
|
||||
u32 npdg = 1, npda = 1;
|
||||
bool valid = true;
|
||||
u8 optperf;
|
||||
|
||||
/*
|
||||
* The block layer can't support LBA sizes larger than the page size
|
||||
@@ -2098,7 +2082,12 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
|
||||
phys_bs = bs;
|
||||
atomic_bs = nvme_configure_atomic_write(ns, id, lim, bs);
|
||||
|
||||
if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
|
||||
optperf = id->nsfeat >> NVME_NS_FEAT_OPTPERF_SHIFT;
|
||||
if (ctrl->vs >= NVME_VS(2, 1, 0))
|
||||
optperf &= NVME_NS_FEAT_OPTPERF_MASK_2_1;
|
||||
else
|
||||
optperf &= NVME_NS_FEAT_OPTPERF_MASK;
|
||||
if (optperf) {
|
||||
/* NPWG = Namespace Preferred Write Granularity */
|
||||
phys_bs = bs * (1 + le16_to_cpu(id->npwg));
|
||||
/* NOWS = Namespace Optimal Write Size */
|
||||
@@ -2115,11 +2104,54 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
|
||||
lim->physical_block_size = min(phys_bs, atomic_bs);
|
||||
lim->io_min = phys_bs;
|
||||
lim->io_opt = io_opt;
|
||||
if ((ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) &&
|
||||
(ns->ctrl->oncs & NVME_CTRL_ONCS_DSM))
|
||||
if ((ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) &&
|
||||
(ctrl->oncs & NVME_CTRL_ONCS_DSM))
|
||||
lim->max_write_zeroes_sectors = UINT_MAX;
|
||||
else
|
||||
lim->max_write_zeroes_sectors = ns->ctrl->max_zeroes_sectors;
|
||||
lim->max_write_zeroes_sectors = ctrl->max_zeroes_sectors;
|
||||
|
||||
if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns->head, UINT_MAX))
|
||||
lim->max_hw_discard_sectors =
|
||||
nvme_lba_to_sect(ns->head, ctrl->dmrsl);
|
||||
else if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
|
||||
lim->max_hw_discard_sectors = UINT_MAX;
|
||||
else
|
||||
lim->max_hw_discard_sectors = 0;
|
||||
|
||||
/*
|
||||
* NVMe namespaces advertise both a preferred deallocate granularity
|
||||
* (for a discard length) and alignment (for a discard starting offset).
|
||||
* However, Linux block devices advertise a single discard_granularity.
|
||||
* From NVM Command Set specification 1.1 section 5.2.2, the NPDGL/NPDAL
|
||||
* fields in the NVM Command Set Specific Identify Namespace structure
|
||||
* are preferred to NPDG/NPDA in the Identify Namespace structure since
|
||||
* they can represent larger values. However, NPDGL or NPDAL may be 0 if
|
||||
* unsupported. NPDG and NPDA are 0's based.
|
||||
* From Figure 115 of NVM Command Set specification 1.1, NPDGL and NPDAL
|
||||
* are supported if the high bit of OPTPERF is set. NPDG is supported if
|
||||
* the low bit of OPTPERF is set. NPDA is supported if either is set.
|
||||
* NPDG should be a multiple of NPDA, and likewise NPDGL should be a
|
||||
* multiple of NPDAL, but the spec doesn't say anything about NPDG vs.
|
||||
* NPDAL or NPDGL vs. NPDA. So compute the maximum instead of assuming
|
||||
* NPDG(L) is the larger. If neither NPDG, NPDGL, NPDA, nor NPDAL are
|
||||
* supported, default the discard_granularity to the logical block size.
|
||||
*/
|
||||
if (optperf & 0x2 && nvm && nvm->npdgl)
|
||||
npdg = le32_to_cpu(nvm->npdgl);
|
||||
else if (optperf & 0x1)
|
||||
npdg = from0based(id->npdg);
|
||||
if (optperf & 0x2 && nvm && nvm->npdal)
|
||||
npda = le32_to_cpu(nvm->npdal);
|
||||
else if (optperf)
|
||||
npda = from0based(id->npda);
|
||||
if (check_mul_overflow(max(npdg, npda), lim->logical_block_size,
|
||||
&lim->discard_granularity))
|
||||
lim->discard_granularity = lim->logical_block_size;
|
||||
|
||||
if (ctrl->dmrl)
|
||||
lim->max_discard_segments = ctrl->dmrl;
|
||||
else
|
||||
lim->max_discard_segments = NVME_DSM_MAX_RANGES;
|
||||
return valid;
|
||||
}
|
||||
|
||||
@@ -2353,7 +2385,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
|
||||
}
|
||||
lbaf = nvme_lbaf_index(id->flbas);
|
||||
|
||||
if (ns->ctrl->ctratt & NVME_CTRL_ATTR_ELBAS) {
|
||||
if (nvme_id_cns_ok(ns->ctrl, NVME_ID_CNS_CS_NS)) {
|
||||
ret = nvme_identify_ns_nvm(ns->ctrl, info->nsid, &nvm);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
@@ -2381,10 +2413,9 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
|
||||
nvme_set_ctrl_limits(ns->ctrl, &lim, false);
|
||||
nvme_configure_metadata(ns->ctrl, ns->head, id, nvm, info);
|
||||
nvme_set_chunk_sectors(ns, id, &lim);
|
||||
if (!nvme_update_disk_info(ns, id, &lim))
|
||||
if (!nvme_update_disk_info(ns, id, nvm, &lim))
|
||||
capacity = 0;
|
||||
|
||||
nvme_config_discard(ns, &lim);
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
|
||||
ns->head->ids.csi == NVME_CSI_ZNS)
|
||||
nvme_update_zone_info(ns, &lim, &zi);
|
||||
@@ -3388,7 +3419,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
|
||||
|
||||
ctrl->dmrl = id->dmrl;
|
||||
ctrl->dmrsl = le32_to_cpu(id->dmrsl);
|
||||
if (id->wzsl)
|
||||
if (id->wzsl && !(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
|
||||
ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl);
|
||||
|
||||
free_data:
|
||||
|
||||
@@ -154,21 +154,8 @@ void nvme_failover_req(struct request *req)
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&ns->head->requeue_lock, flags);
|
||||
for (bio = req->bio; bio; bio = bio->bi_next) {
|
||||
for (bio = req->bio; bio; bio = bio->bi_next)
|
||||
bio_set_dev(bio, ns->head->disk->part0);
|
||||
if (bio->bi_opf & REQ_POLLED) {
|
||||
bio->bi_opf &= ~REQ_POLLED;
|
||||
bio->bi_cookie = BLK_QC_T_NONE;
|
||||
}
|
||||
/*
|
||||
* The alternate request queue that we may end up submitting
|
||||
* the bio to may be frozen temporarily, in this case REQ_NOWAIT
|
||||
* will fail the I/O immediately with EAGAIN to the issuer.
|
||||
* We are not in the issuer context which cannot block. Clear
|
||||
* the flag to avoid spurious EAGAIN I/O failures.
|
||||
*/
|
||||
bio->bi_opf &= ~REQ_NOWAIT;
|
||||
}
|
||||
blk_steal_bios(&ns->head->requeue_list, req);
|
||||
spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
|
||||
|
||||
|
||||
@@ -762,6 +762,12 @@ static inline u32 nvme_bytes_to_numd(size_t len)
|
||||
return (len >> 2) - 1;
|
||||
}
|
||||
|
||||
/* Decode a 2-byte "0's based"/"0-based" field */
|
||||
static inline u32 from0based(__le16 value)
|
||||
{
|
||||
return (u32)le16_to_cpu(value) + 1;
|
||||
}
|
||||
|
||||
static inline bool nvme_is_ana_error(u16 status)
|
||||
{
|
||||
switch (status & NVME_SCT_SC_MASK) {
|
||||
|
||||
@@ -4178,6 +4178,8 @@ static const struct pci_device_id nvme_id_table[] = {
|
||||
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
|
||||
{ PCI_DEVICE(0x2646, 0x501E), /* KINGSTON OM3PGP4xxxxQ OS21011 NVMe SSD */
|
||||
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
|
||||
{ PCI_DEVICE(0x2646, 0x502F), /* KINGSTON OM3SGP4xxxxK NVMe SSD */
|
||||
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
|
||||
{ PCI_DEVICE(0x1f40, 0x1202), /* Netac Technologies Co. NV3000 NVMe SSD */
|
||||
.driver_data = NVME_QUIRK_BOGUS_NID, },
|
||||
{ PCI_DEVICE(0x1f40, 0x5236), /* Netac Technologies Co. NV7000 NVMe SSD */
|
||||
|
||||
@@ -658,7 +658,7 @@ static ssize_t nvme_ctrl_dhchap_secret_store(struct device *dev,
|
||||
struct nvme_dhchap_key *key, *host_key;
|
||||
int ret;
|
||||
|
||||
ret = nvme_auth_generate_key(dhchap_secret, &key);
|
||||
ret = nvme_auth_parse_key(dhchap_secret, &key);
|
||||
if (ret) {
|
||||
kfree(dhchap_secret);
|
||||
return ret;
|
||||
@@ -716,7 +716,7 @@ static ssize_t nvme_ctrl_dhchap_ctrl_secret_store(struct device *dev,
|
||||
struct nvme_dhchap_key *key, *ctrl_key;
|
||||
int ret;
|
||||
|
||||
ret = nvme_auth_generate_key(dhchap_secret, &key);
|
||||
ret = nvme_auth_parse_key(dhchap_secret, &key);
|
||||
if (ret) {
|
||||
kfree(dhchap_secret);
|
||||
return ret;
|
||||
@@ -829,7 +829,49 @@ static ssize_t tls_configured_key_show(struct device *dev,
|
||||
|
||||
return sysfs_emit(buf, "%08x\n", key_serial(key));
|
||||
}
|
||||
static DEVICE_ATTR_RO(tls_configured_key);
|
||||
|
||||
static ssize_t tls_configured_key_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
|
||||
int error, qid;
|
||||
|
||||
error = kstrtoint(buf, 10, &qid);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* We currently only allow userspace to write a `0` indicating
|
||||
* generate a new key.
|
||||
*/
|
||||
if (qid)
|
||||
return -EINVAL;
|
||||
|
||||
if (!ctrl->opts || !ctrl->opts->concat)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
error = nvme_auth_negotiate(ctrl, 0);
|
||||
if (error < 0) {
|
||||
nvme_reset_ctrl(ctrl);
|
||||
return error;
|
||||
}
|
||||
|
||||
error = nvme_auth_wait(ctrl, 0);
|
||||
if (error < 0) {
|
||||
nvme_reset_ctrl(ctrl);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to reset the TLS connection, so let's just
|
||||
* reset the controller.
|
||||
*/
|
||||
nvme_reset_ctrl(ctrl);
|
||||
|
||||
return count;
|
||||
}
|
||||
static DEVICE_ATTR_RW(tls_configured_key);
|
||||
|
||||
static ssize_t tls_keyring_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
@@ -861,7 +903,7 @@ static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj,
|
||||
!ctrl->opts->tls && !ctrl->opts->concat)
|
||||
return 0;
|
||||
if (a == &dev_attr_tls_configured_key.attr &&
|
||||
(!ctrl->opts->tls_key || ctrl->opts->concat))
|
||||
!ctrl->opts->concat)
|
||||
return 0;
|
||||
if (a == &dev_attr_tls_keyring.attr &&
|
||||
!ctrl->opts->keyring)
|
||||
|
||||
@@ -1057,6 +1057,8 @@ static void nvme_execute_identify_ns_nvm(struct nvmet_req *req)
|
||||
status = NVME_SC_INTERNAL;
|
||||
goto out;
|
||||
}
|
||||
if (req->ns->bdev)
|
||||
nvmet_bdev_set_nvm_limits(req->ns->bdev, id);
|
||||
status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
|
||||
kfree(id);
|
||||
out:
|
||||
@@ -1603,7 +1605,7 @@ void nvmet_execute_keep_alive(struct nvmet_req *req)
|
||||
|
||||
pr_debug("ctrl %d update keep-alive timer for %d secs\n",
|
||||
ctrl->cntlid, ctrl->kato);
|
||||
mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
|
||||
mod_delayed_work(system_percpu_wq, &ctrl->ka_work, ctrl->kato * HZ);
|
||||
out:
|
||||
nvmet_req_complete(req, status);
|
||||
}
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/err.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <linux/base64.h>
|
||||
#include <linux/ctype.h>
|
||||
@@ -45,15 +44,6 @@ int nvmet_auth_set_key(struct nvmet_host *host, const char *secret,
|
||||
key_hash);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (key_hash > 0) {
|
||||
/* Validate selected hash algorithm */
|
||||
const char *hmac = nvme_auth_hmac_name(key_hash);
|
||||
|
||||
if (!crypto_has_shash(hmac, 0, 0)) {
|
||||
pr_err("DH-HMAC-CHAP hash %s unsupported\n", hmac);
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
}
|
||||
dhchap_secret = kstrdup(secret, GFP_KERNEL);
|
||||
if (!dhchap_secret)
|
||||
return -ENOMEM;
|
||||
@@ -140,7 +130,7 @@ int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id)
|
||||
return ret;
|
||||
}
|
||||
|
||||
u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq)
|
||||
u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, bool reset)
|
||||
{
|
||||
int ret = 0;
|
||||
struct nvmet_host_link *p;
|
||||
@@ -166,7 +156,7 @@ u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (nvmet_queue_tls_keyid(sq)) {
|
||||
if (!reset && nvmet_queue_tls_keyid(sq)) {
|
||||
pr_debug("host %s tls enabled\n", ctrl->hostnqn);
|
||||
goto out_unlock;
|
||||
}
|
||||
@@ -292,47 +282,30 @@ bool nvmet_check_auth_status(struct nvmet_req *req)
|
||||
int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response,
|
||||
unsigned int shash_len)
|
||||
{
|
||||
struct crypto_shash *shash_tfm;
|
||||
SHASH_DESC_ON_STACK(shash, shash_tfm);
|
||||
struct nvme_auth_hmac_ctx hmac;
|
||||
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||
const char *hash_name;
|
||||
u8 *challenge = req->sq->dhchap_c1;
|
||||
struct nvme_dhchap_key *transformed_key;
|
||||
u8 buf[4];
|
||||
int ret;
|
||||
|
||||
hash_name = nvme_auth_hmac_name(ctrl->shash_id);
|
||||
if (!hash_name) {
|
||||
pr_warn("Hash ID %d invalid\n", ctrl->shash_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
shash_tfm = crypto_alloc_shash(hash_name, 0, 0);
|
||||
if (IS_ERR(shash_tfm)) {
|
||||
pr_err("failed to allocate shash %s\n", hash_name);
|
||||
return PTR_ERR(shash_tfm);
|
||||
}
|
||||
|
||||
if (shash_len != crypto_shash_digestsize(shash_tfm)) {
|
||||
pr_err("%s: hash len mismatch (len %d digest %d)\n",
|
||||
__func__, shash_len,
|
||||
crypto_shash_digestsize(shash_tfm));
|
||||
ret = -EINVAL;
|
||||
goto out_free_tfm;
|
||||
}
|
||||
|
||||
transformed_key = nvme_auth_transform_key(ctrl->host_key,
|
||||
ctrl->hostnqn);
|
||||
if (IS_ERR(transformed_key)) {
|
||||
ret = PTR_ERR(transformed_key);
|
||||
goto out_free_tfm;
|
||||
}
|
||||
if (IS_ERR(transformed_key))
|
||||
return PTR_ERR(transformed_key);
|
||||
|
||||
ret = crypto_shash_setkey(shash_tfm, transformed_key->key,
|
||||
ret = nvme_auth_hmac_init(&hmac, ctrl->shash_id, transformed_key->key,
|
||||
transformed_key->len);
|
||||
if (ret)
|
||||
goto out_free_response;
|
||||
|
||||
if (shash_len != nvme_auth_hmac_hash_len(ctrl->shash_id)) {
|
||||
pr_err("%s: hash len mismatch (len %u digest %zu)\n", __func__,
|
||||
shash_len, nvme_auth_hmac_hash_len(ctrl->shash_id));
|
||||
ret = -EINVAL;
|
||||
goto out_free_response;
|
||||
}
|
||||
|
||||
if (ctrl->dh_gid != NVME_AUTH_DHGROUP_NULL) {
|
||||
challenge = kmalloc(shash_len, GFP_KERNEL);
|
||||
if (!challenge) {
|
||||
@@ -345,101 +318,67 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response,
|
||||
req->sq->dhchap_c1,
|
||||
challenge, shash_len);
|
||||
if (ret)
|
||||
goto out;
|
||||
goto out_free_challenge;
|
||||
}
|
||||
|
||||
pr_debug("ctrl %d qid %d host response seq %u transaction %d\n",
|
||||
ctrl->cntlid, req->sq->qid, req->sq->dhchap_s1,
|
||||
req->sq->dhchap_tid);
|
||||
|
||||
shash->tfm = shash_tfm;
|
||||
ret = crypto_shash_init(shash);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, challenge, shash_len);
|
||||
if (ret)
|
||||
goto out;
|
||||
nvme_auth_hmac_update(&hmac, challenge, shash_len);
|
||||
|
||||
put_unaligned_le32(req->sq->dhchap_s1, buf);
|
||||
ret = crypto_shash_update(shash, buf, 4);
|
||||
if (ret)
|
||||
goto out;
|
||||
nvme_auth_hmac_update(&hmac, buf, 4);
|
||||
|
||||
put_unaligned_le16(req->sq->dhchap_tid, buf);
|
||||
ret = crypto_shash_update(shash, buf, 2);
|
||||
if (ret)
|
||||
goto out;
|
||||
nvme_auth_hmac_update(&hmac, buf, 2);
|
||||
|
||||
*buf = req->sq->sc_c;
|
||||
ret = crypto_shash_update(shash, buf, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, "HostHost", 8);
|
||||
if (ret)
|
||||
goto out;
|
||||
nvme_auth_hmac_update(&hmac, buf, 1);
|
||||
nvme_auth_hmac_update(&hmac, "HostHost", 8);
|
||||
memset(buf, 0, 4);
|
||||
ret = crypto_shash_update(shash, ctrl->hostnqn, strlen(ctrl->hostnqn));
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, buf, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, ctrl->subsys->subsysnqn,
|
||||
strlen(ctrl->subsys->subsysnqn));
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_final(shash, response);
|
||||
out:
|
||||
nvme_auth_hmac_update(&hmac, ctrl->hostnqn, strlen(ctrl->hostnqn));
|
||||
nvme_auth_hmac_update(&hmac, buf, 1);
|
||||
nvme_auth_hmac_update(&hmac, ctrl->subsys->subsysnqn,
|
||||
strlen(ctrl->subsys->subsysnqn));
|
||||
nvme_auth_hmac_final(&hmac, response);
|
||||
ret = 0;
|
||||
out_free_challenge:
|
||||
if (challenge != req->sq->dhchap_c1)
|
||||
kfree(challenge);
|
||||
out_free_response:
|
||||
memzero_explicit(&hmac, sizeof(hmac));
|
||||
nvme_auth_free_key(transformed_key);
|
||||
out_free_tfm:
|
||||
crypto_free_shash(shash_tfm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response,
|
||||
unsigned int shash_len)
|
||||
{
|
||||
struct crypto_shash *shash_tfm;
|
||||
struct shash_desc *shash;
|
||||
struct nvme_auth_hmac_ctx hmac;
|
||||
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||
const char *hash_name;
|
||||
u8 *challenge = req->sq->dhchap_c2;
|
||||
struct nvme_dhchap_key *transformed_key;
|
||||
u8 buf[4];
|
||||
int ret;
|
||||
|
||||
hash_name = nvme_auth_hmac_name(ctrl->shash_id);
|
||||
if (!hash_name) {
|
||||
pr_warn("Hash ID %d invalid\n", ctrl->shash_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
shash_tfm = crypto_alloc_shash(hash_name, 0, 0);
|
||||
if (IS_ERR(shash_tfm)) {
|
||||
pr_err("failed to allocate shash %s\n", hash_name);
|
||||
return PTR_ERR(shash_tfm);
|
||||
}
|
||||
|
||||
if (shash_len != crypto_shash_digestsize(shash_tfm)) {
|
||||
pr_debug("%s: hash len mismatch (len %d digest %d)\n",
|
||||
__func__, shash_len,
|
||||
crypto_shash_digestsize(shash_tfm));
|
||||
ret = -EINVAL;
|
||||
goto out_free_tfm;
|
||||
}
|
||||
|
||||
transformed_key = nvme_auth_transform_key(ctrl->ctrl_key,
|
||||
ctrl->subsys->subsysnqn);
|
||||
if (IS_ERR(transformed_key)) {
|
||||
ret = PTR_ERR(transformed_key);
|
||||
goto out_free_tfm;
|
||||
}
|
||||
if (IS_ERR(transformed_key))
|
||||
return PTR_ERR(transformed_key);
|
||||
|
||||
ret = crypto_shash_setkey(shash_tfm, transformed_key->key,
|
||||
ret = nvme_auth_hmac_init(&hmac, ctrl->shash_id, transformed_key->key,
|
||||
transformed_key->len);
|
||||
if (ret)
|
||||
goto out_free_response;
|
||||
|
||||
if (shash_len != nvme_auth_hmac_hash_len(ctrl->shash_id)) {
|
||||
pr_err("%s: hash len mismatch (len %u digest %zu)\n", __func__,
|
||||
shash_len, nvme_auth_hmac_hash_len(ctrl->shash_id));
|
||||
ret = -EINVAL;
|
||||
goto out_free_response;
|
||||
}
|
||||
|
||||
if (ctrl->dh_gid != NVME_AUTH_DHGROUP_NULL) {
|
||||
challenge = kmalloc(shash_len, GFP_KERNEL);
|
||||
if (!challenge) {
|
||||
@@ -455,55 +394,29 @@ int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response,
|
||||
goto out_free_challenge;
|
||||
}
|
||||
|
||||
shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(shash_tfm),
|
||||
GFP_KERNEL);
|
||||
if (!shash) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_challenge;
|
||||
}
|
||||
shash->tfm = shash_tfm;
|
||||
nvme_auth_hmac_update(&hmac, challenge, shash_len);
|
||||
|
||||
ret = crypto_shash_init(shash);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, challenge, shash_len);
|
||||
if (ret)
|
||||
goto out;
|
||||
put_unaligned_le32(req->sq->dhchap_s2, buf);
|
||||
ret = crypto_shash_update(shash, buf, 4);
|
||||
if (ret)
|
||||
goto out;
|
||||
nvme_auth_hmac_update(&hmac, buf, 4);
|
||||
|
||||
put_unaligned_le16(req->sq->dhchap_tid, buf);
|
||||
ret = crypto_shash_update(shash, buf, 2);
|
||||
if (ret)
|
||||
goto out;
|
||||
nvme_auth_hmac_update(&hmac, buf, 2);
|
||||
|
||||
memset(buf, 0, 4);
|
||||
ret = crypto_shash_update(shash, buf, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, "Controller", 10);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, ctrl->subsys->subsysnqn,
|
||||
strlen(ctrl->subsys->subsysnqn));
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, buf, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_update(shash, ctrl->hostnqn, strlen(ctrl->hostnqn));
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = crypto_shash_final(shash, response);
|
||||
out:
|
||||
kfree(shash);
|
||||
nvme_auth_hmac_update(&hmac, buf, 1);
|
||||
nvme_auth_hmac_update(&hmac, "Controller", 10);
|
||||
nvme_auth_hmac_update(&hmac, ctrl->subsys->subsysnqn,
|
||||
strlen(ctrl->subsys->subsysnqn));
|
||||
nvme_auth_hmac_update(&hmac, buf, 1);
|
||||
nvme_auth_hmac_update(&hmac, ctrl->hostnqn, strlen(ctrl->hostnqn));
|
||||
nvme_auth_hmac_final(&hmac, response);
|
||||
ret = 0;
|
||||
out_free_challenge:
|
||||
if (challenge != req->sq->dhchap_c2)
|
||||
kfree(challenge);
|
||||
out_free_response:
|
||||
memzero_explicit(&hmac, sizeof(hmac));
|
||||
nvme_auth_free_key(transformed_key);
|
||||
out_free_tfm:
|
||||
crypto_free_shash(shash_tfm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -531,7 +444,7 @@ int nvmet_auth_ctrl_exponential(struct nvmet_req *req,
|
||||
}
|
||||
|
||||
int nvmet_auth_ctrl_sesskey(struct nvmet_req *req,
|
||||
u8 *pkey, int pkey_size)
|
||||
const u8 *pkey, int pkey_size)
|
||||
{
|
||||
struct nvmet_ctrl *ctrl = req->sq->ctrl;
|
||||
int ret;
|
||||
@@ -557,7 +470,8 @@ int nvmet_auth_ctrl_sesskey(struct nvmet_req *req,
|
||||
void nvmet_auth_insert_psk(struct nvmet_sq *sq)
|
||||
{
|
||||
int hash_len = nvme_auth_hmac_hash_len(sq->ctrl->shash_id);
|
||||
u8 *psk, *digest, *tls_psk;
|
||||
u8 *psk, *tls_psk;
|
||||
char *digest;
|
||||
size_t psk_len;
|
||||
int ret;
|
||||
#ifdef CONFIG_NVME_TARGET_TCP_TLS
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
#include <linux/nvme-auth.h>
|
||||
#endif
|
||||
#include <linux/nvme-keyring.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/kpp.h>
|
||||
#include <linux/nospec.h>
|
||||
|
||||
@@ -2181,8 +2180,6 @@ static ssize_t nvmet_host_dhchap_hash_store(struct config_item *item,
|
||||
hmac_id = nvme_auth_hmac_id(page);
|
||||
if (hmac_id == NVME_AUTH_HASH_INVALID)
|
||||
return -EINVAL;
|
||||
if (!crypto_has_shash(nvme_auth_hmac_name(hmac_id), 0, 0))
|
||||
return -ENOTSUPP;
|
||||
host->dhchap_hash_id = hmac_id;
|
||||
return count;
|
||||
}
|
||||
|
||||
@@ -1688,7 +1688,7 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
|
||||
if (args->hostid)
|
||||
uuid_copy(&ctrl->hostid, args->hostid);
|
||||
|
||||
dhchap_status = nvmet_setup_auth(ctrl, args->sq);
|
||||
dhchap_status = nvmet_setup_auth(ctrl, args->sq, false);
|
||||
if (dhchap_status) {
|
||||
pr_err("Failed to setup authentication, dhchap status %u\n",
|
||||
dhchap_status);
|
||||
@@ -1944,12 +1944,13 @@ static int __init nvmet_init(void)
|
||||
if (!nvmet_bvec_cache)
|
||||
return -ENOMEM;
|
||||
|
||||
zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0);
|
||||
zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM | WQ_PERCPU,
|
||||
0);
|
||||
if (!zbd_wq)
|
||||
goto out_destroy_bvec_cache;
|
||||
|
||||
buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
|
||||
WQ_MEM_RECLAIM, 0);
|
||||
WQ_MEM_RECLAIM | WQ_PERCPU, 0);
|
||||
if (!buffered_io_wq)
|
||||
goto out_free_zbd_work_queue;
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/nvme-auth.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/kpp.h>
|
||||
#include "nvmet.h"
|
||||
|
||||
@@ -75,8 +74,7 @@ static u8 nvmet_auth_negotiate(struct nvmet_req *req, void *d)
|
||||
for (i = 0; i < data->auth_protocol[0].dhchap.halen; i++) {
|
||||
u8 host_hmac_id = data->auth_protocol[0].dhchap.idlist[i];
|
||||
|
||||
if (!fallback_hash_id &&
|
||||
crypto_has_shash(nvme_auth_hmac_name(host_hmac_id), 0, 0))
|
||||
if (!fallback_hash_id && nvme_auth_hmac_hash_len(host_hmac_id))
|
||||
fallback_hash_id = host_hmac_id;
|
||||
if (ctrl->shash_id != host_hmac_id)
|
||||
continue;
|
||||
@@ -293,7 +291,8 @@ void nvmet_execute_auth_send(struct nvmet_req *req)
|
||||
pr_debug("%s: ctrl %d qid %d reset negotiation\n",
|
||||
__func__, ctrl->cntlid, req->sq->qid);
|
||||
if (!req->sq->qid) {
|
||||
dhchap_status = nvmet_setup_auth(ctrl, req->sq);
|
||||
dhchap_status = nvmet_setup_auth(ctrl, req->sq,
|
||||
true);
|
||||
if (dhchap_status) {
|
||||
pr_err("ctrl %d qid 0 failed to setup re-authentication\n",
|
||||
ctrl->cntlid);
|
||||
@@ -391,14 +390,15 @@ done:
|
||||
req->sq->dhchap_step != NVME_AUTH_DHCHAP_MESSAGE_FAILURE2) {
|
||||
unsigned long auth_expire_secs = ctrl->kato ? ctrl->kato : 120;
|
||||
|
||||
mod_delayed_work(system_wq, &req->sq->auth_expired_work,
|
||||
mod_delayed_work(system_percpu_wq, &req->sq->auth_expired_work,
|
||||
auth_expire_secs * HZ);
|
||||
goto complete;
|
||||
}
|
||||
/* Final states, clear up variables */
|
||||
nvmet_auth_sq_free(req->sq);
|
||||
if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_FAILURE2)
|
||||
if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_FAILURE2) {
|
||||
nvmet_auth_sq_free(req->sq);
|
||||
nvmet_ctrl_fatal_error(ctrl);
|
||||
}
|
||||
|
||||
complete:
|
||||
nvmet_req_complete(req, status);
|
||||
@@ -574,9 +574,7 @@ void nvmet_execute_auth_receive(struct nvmet_req *req)
|
||||
status = nvmet_copy_to_sgl(req, 0, d, al);
|
||||
kfree(d);
|
||||
done:
|
||||
if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2)
|
||||
nvmet_auth_sq_free(req->sq);
|
||||
else if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_FAILURE1) {
|
||||
if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_FAILURE1) {
|
||||
nvmet_auth_sq_free(req->sq);
|
||||
nvmet_ctrl_fatal_error(ctrl);
|
||||
}
|
||||
|
||||
@@ -792,9 +792,9 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
|
||||
if (!queue)
|
||||
return NULL;
|
||||
|
||||
queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0,
|
||||
assoc->tgtport->fc_target_port.port_num,
|
||||
assoc->a_id, qid);
|
||||
queue->work_q = alloc_workqueue("ntfc%d.%d.%d", WQ_PERCPU, 0,
|
||||
assoc->tgtport->fc_target_port.port_num,
|
||||
assoc->a_id, qid);
|
||||
if (!queue->work_q)
|
||||
goto out_free_queue;
|
||||
|
||||
|
||||
@@ -30,11 +30,11 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
|
||||
id->nacwu = lpp0b;
|
||||
|
||||
/*
|
||||
* Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and
|
||||
* NOWS are defined for this namespace and should be used by
|
||||
* the host for I/O optimization.
|
||||
* OPTPERF = 11b indicates that the fields NPWG, NPWA, NPDG, NPDA,
|
||||
* NPDGL, NPDAL, and NOWS are defined for this namespace and should be
|
||||
* used by the host for I/O optimization.
|
||||
*/
|
||||
id->nsfeat |= 1 << 4;
|
||||
id->nsfeat |= 0x3 << NVME_NS_FEAT_OPTPERF_SHIFT;
|
||||
/* NPWG = Namespace Preferred Write Granularity. 0's based */
|
||||
id->npwg = to0based(bdev_io_min(bdev) / bdev_logical_block_size(bdev));
|
||||
/* NPWA = Namespace Preferred Write Alignment. 0's based */
|
||||
@@ -52,6 +52,17 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
|
||||
id->dlfeat = (1 << 3) | 0x1;
|
||||
}
|
||||
|
||||
void nvmet_bdev_set_nvm_limits(struct block_device *bdev,
|
||||
struct nvme_id_ns_nvm *id)
|
||||
{
|
||||
/*
|
||||
* NPDGL = Namespace Preferred Deallocate Granularity Large
|
||||
* NPDAL = Namespace Preferred Deallocate Alignment Large
|
||||
*/
|
||||
id->npdgl = id->npdal = cpu_to_le32(bdev_discard_granularity(bdev) /
|
||||
bdev_logical_block_size(bdev));
|
||||
}
|
||||
|
||||
void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
|
||||
{
|
||||
if (ns->bdev_file) {
|
||||
|
||||
@@ -419,7 +419,6 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
|
||||
{
|
||||
if (ctrl->ctrl.queue_count > 1) {
|
||||
nvme_quiesce_io_queues(&ctrl->ctrl);
|
||||
nvme_cancel_tagset(&ctrl->ctrl);
|
||||
nvme_loop_destroy_io_queues(ctrl);
|
||||
}
|
||||
|
||||
@@ -427,7 +426,6 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
|
||||
if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_LIVE)
|
||||
nvme_disable_ctrl(&ctrl->ctrl, true);
|
||||
|
||||
nvme_cancel_admin_tagset(&ctrl->ctrl);
|
||||
nvme_loop_destroy_admin_queue(ctrl);
|
||||
}
|
||||
|
||||
|
||||
@@ -550,6 +550,8 @@ void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl);
|
||||
u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
|
||||
u32 nvmet_connect_cmd_data_len(struct nvmet_req *req);
|
||||
void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id);
|
||||
void nvmet_bdev_set_nvm_limits(struct block_device *bdev,
|
||||
struct nvme_id_ns_nvm *id);
|
||||
u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req);
|
||||
u16 nvmet_file_parse_io_cmd(struct nvmet_req *req);
|
||||
u16 nvmet_bdev_zns_parse_io_cmd(struct nvmet_req *req);
|
||||
@@ -896,7 +898,7 @@ void nvmet_execute_auth_receive(struct nvmet_req *req);
|
||||
int nvmet_auth_set_key(struct nvmet_host *host, const char *secret,
|
||||
bool set_ctrl);
|
||||
int nvmet_auth_set_host_hash(struct nvmet_host *host, const char *hash);
|
||||
u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq);
|
||||
u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, bool reset);
|
||||
void nvmet_auth_sq_init(struct nvmet_sq *sq);
|
||||
void nvmet_destroy_auth(struct nvmet_ctrl *ctrl);
|
||||
void nvmet_auth_sq_free(struct nvmet_sq *sq);
|
||||
@@ -913,11 +915,11 @@ static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq)
|
||||
int nvmet_auth_ctrl_exponential(struct nvmet_req *req,
|
||||
u8 *buf, int buf_size);
|
||||
int nvmet_auth_ctrl_sesskey(struct nvmet_req *req,
|
||||
u8 *buf, int buf_size);
|
||||
const u8 *pkey, int pkey_size);
|
||||
void nvmet_auth_insert_psk(struct nvmet_sq *sq);
|
||||
#else
|
||||
static inline u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl,
|
||||
struct nvmet_sq *sq)
|
||||
struct nvmet_sq *sq, bool reset)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2225,7 +2225,7 @@ static int __init nvmet_tcp_init(void)
|
||||
int ret;
|
||||
|
||||
nvmet_tcp_wq = alloc_workqueue("nvmet_tcp_wq",
|
||||
WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
|
||||
WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_PERCPU, 0);
|
||||
if (!nvmet_tcp_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/bsg.h>
|
||||
#include <linux/io_uring/cmd.h>
|
||||
#include <scsi/scsi.h>
|
||||
#include <scsi/scsi_ioctl.h>
|
||||
#include <scsi/scsi_cmnd.h>
|
||||
@@ -9,6 +10,178 @@
|
||||
|
||||
#define uptr64(val) ((void __user *)(uintptr_t)(val))
|
||||
|
||||
/*
|
||||
* Per-command BSG SCSI PDU stored in io_uring_cmd.pdu[32].
|
||||
* Holds temporary state between submission, completion and task_work.
|
||||
*/
|
||||
struct scsi_bsg_uring_cmd_pdu {
|
||||
struct bio *bio; /* mapped user buffer, unmap in task work */
|
||||
struct request *req; /* block request, freed in task work */
|
||||
u64 response_addr; /* user space response buffer address */
|
||||
};
|
||||
static_assert(sizeof(struct scsi_bsg_uring_cmd_pdu) <= sizeof_field(struct io_uring_cmd, pdu));
|
||||
|
||||
static inline struct scsi_bsg_uring_cmd_pdu *scsi_bsg_uring_cmd_pdu(
|
||||
struct io_uring_cmd *ioucmd)
|
||||
{
|
||||
return io_uring_cmd_to_pdu(ioucmd, struct scsi_bsg_uring_cmd_pdu);
|
||||
}
|
||||
|
||||
/* Task work: build res2 (layout in uapi/linux/bsg.h) and copy sense to user. */
|
||||
static void scsi_bsg_uring_task_cb(struct io_tw_req tw_req, io_tw_token_t tw)
|
||||
{
|
||||
struct io_uring_cmd *ioucmd = io_uring_cmd_from_tw(tw_req);
|
||||
struct scsi_bsg_uring_cmd_pdu *pdu = scsi_bsg_uring_cmd_pdu(ioucmd);
|
||||
struct request *rq = pdu->req;
|
||||
struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(rq);
|
||||
u64 res2;
|
||||
int ret = 0;
|
||||
u8 driver_status = 0;
|
||||
u8 sense_len_wr = 0;
|
||||
|
||||
if (pdu->bio)
|
||||
blk_rq_unmap_user(pdu->bio);
|
||||
|
||||
if (scsi_status_is_check_condition(scmd->result)) {
|
||||
driver_status = DRIVER_SENSE;
|
||||
if (pdu->response_addr)
|
||||
sense_len_wr = min_t(u8, scmd->sense_len,
|
||||
SCSI_SENSE_BUFFERSIZE);
|
||||
}
|
||||
|
||||
if (sense_len_wr) {
|
||||
if (copy_to_user(uptr64(pdu->response_addr), scmd->sense_buffer,
|
||||
sense_len_wr))
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
res2 = bsg_scsi_res2_build(status_byte(scmd->result), driver_status,
|
||||
host_byte(scmd->result), sense_len_wr,
|
||||
scmd->resid_len);
|
||||
|
||||
blk_mq_free_request(rq);
|
||||
io_uring_cmd_done32(ioucmd, ret, res2,
|
||||
IO_URING_CMD_TASK_WORK_ISSUE_FLAGS);
|
||||
}
|
||||
|
||||
static enum rq_end_io_ret scsi_bsg_uring_cmd_done(struct request *req,
|
||||
blk_status_t status,
|
||||
const struct io_comp_batch *iocb)
|
||||
{
|
||||
struct io_uring_cmd *ioucmd = req->end_io_data;
|
||||
|
||||
io_uring_cmd_do_in_task_lazy(ioucmd, scsi_bsg_uring_task_cb);
|
||||
return RQ_END_IO_NONE;
|
||||
}
|
||||
|
||||
static int scsi_bsg_map_user_buffer(struct request *req,
|
||||
struct io_uring_cmd *ioucmd,
|
||||
unsigned int issue_flags, gfp_t gfp_mask)
|
||||
{
|
||||
const struct bsg_uring_cmd *cmd = io_uring_sqe128_cmd(ioucmd->sqe, struct bsg_uring_cmd);
|
||||
bool is_write = cmd->dout_xfer_len > 0;
|
||||
u64 buf_addr = is_write ? cmd->dout_xferp : cmd->din_xferp;
|
||||
unsigned long buf_len = is_write ? cmd->dout_xfer_len : cmd->din_xfer_len;
|
||||
struct iov_iter iter;
|
||||
int ret;
|
||||
|
||||
if (ioucmd->flags & IORING_URING_CMD_FIXED) {
|
||||
ret = io_uring_cmd_import_fixed(buf_addr, buf_len,
|
||||
is_write ? WRITE : READ,
|
||||
&iter, ioucmd, issue_flags);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = blk_rq_map_user_iov(req->q, req, NULL, &iter, gfp_mask);
|
||||
} else {
|
||||
ret = blk_rq_map_user(req->q, req, NULL, uptr64(buf_addr),
|
||||
buf_len, gfp_mask);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int scsi_bsg_uring_cmd(struct request_queue *q, struct io_uring_cmd *ioucmd,
|
||||
unsigned int issue_flags, bool open_for_write)
|
||||
{
|
||||
struct scsi_bsg_uring_cmd_pdu *pdu = scsi_bsg_uring_cmd_pdu(ioucmd);
|
||||
const struct bsg_uring_cmd *cmd = io_uring_sqe128_cmd(ioucmd->sqe, struct bsg_uring_cmd);
|
||||
struct scsi_cmnd *scmd;
|
||||
struct request *req;
|
||||
blk_mq_req_flags_t blk_flags = 0;
|
||||
gfp_t gfp_mask = GFP_KERNEL;
|
||||
int ret;
|
||||
|
||||
if (cmd->protocol != BSG_PROTOCOL_SCSI ||
|
||||
cmd->subprotocol != BSG_SUB_PROTOCOL_SCSI_CMD)
|
||||
return -EINVAL;
|
||||
|
||||
if (!cmd->request || cmd->request_len == 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (cmd->dout_xfer_len && cmd->din_xfer_len) {
|
||||
pr_warn_once("BIDI support in bsg has been removed.\n");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (cmd->dout_iovec_count > 0 || cmd->din_iovec_count > 0)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK) {
|
||||
blk_flags = BLK_MQ_REQ_NOWAIT;
|
||||
gfp_mask = GFP_NOWAIT;
|
||||
}
|
||||
|
||||
req = scsi_alloc_request(q, cmd->dout_xfer_len ?
|
||||
REQ_OP_DRV_OUT : REQ_OP_DRV_IN, blk_flags);
|
||||
if (IS_ERR(req))
|
||||
return PTR_ERR(req);
|
||||
|
||||
scmd = blk_mq_rq_to_pdu(req);
|
||||
if (cmd->request_len > sizeof(scmd->cmnd)) {
|
||||
ret = -EINVAL;
|
||||
goto out_free_req;
|
||||
}
|
||||
scmd->cmd_len = cmd->request_len;
|
||||
scmd->allowed = SG_DEFAULT_RETRIES;
|
||||
|
||||
if (copy_from_user(scmd->cmnd, uptr64(cmd->request), cmd->request_len)) {
|
||||
ret = -EFAULT;
|
||||
goto out_free_req;
|
||||
}
|
||||
|
||||
if (!scsi_cmd_allowed(scmd->cmnd, open_for_write)) {
|
||||
ret = -EPERM;
|
||||
goto out_free_req;
|
||||
}
|
||||
|
||||
pdu->response_addr = cmd->response;
|
||||
scmd->sense_len = cmd->max_response_len ?
|
||||
min(cmd->max_response_len, SCSI_SENSE_BUFFERSIZE) : SCSI_SENSE_BUFFERSIZE;
|
||||
|
||||
if (cmd->dout_xfer_len || cmd->din_xfer_len) {
|
||||
ret = scsi_bsg_map_user_buffer(req, ioucmd, issue_flags, gfp_mask);
|
||||
if (ret)
|
||||
goto out_free_req;
|
||||
pdu->bio = req->bio;
|
||||
} else {
|
||||
pdu->bio = NULL;
|
||||
}
|
||||
|
||||
req->timeout = cmd->timeout_ms ?
|
||||
msecs_to_jiffies(cmd->timeout_ms) : BLK_DEFAULT_SG_TIMEOUT;
|
||||
|
||||
req->end_io = scsi_bsg_uring_cmd_done;
|
||||
req->end_io_data = ioucmd;
|
||||
pdu->req = req;
|
||||
|
||||
blk_execute_rq_nowait(req, false);
|
||||
return -EIOCBQUEUED;
|
||||
|
||||
out_free_req:
|
||||
blk_mq_free_request(req);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int scsi_bsg_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr,
|
||||
bool open_for_write, unsigned int timeout)
|
||||
{
|
||||
@@ -99,5 +272,6 @@ out_put_request:
|
||||
struct bsg_device *scsi_bsg_register_queue(struct scsi_device *sdev)
|
||||
{
|
||||
return bsg_register_queue(sdev->request_queue, &sdev->sdev_gendev,
|
||||
dev_name(&sdev->sdev_gendev), scsi_bsg_sg_io_fn);
|
||||
dev_name(&sdev->sdev_gendev), scsi_bsg_sg_io_fn,
|
||||
scsi_bsg_uring_cmd);
|
||||
}
|
||||
|
||||
@@ -173,7 +173,7 @@ static int fd_configure_device(struct se_device *dev)
|
||||
*/
|
||||
dev->dev_attrib.max_write_same_len = 0xFFFF;
|
||||
|
||||
if (bdev_nonrot(bdev))
|
||||
if (!bdev_rot(bdev))
|
||||
dev->dev_attrib.is_nonrot = 1;
|
||||
} else {
|
||||
if (!(fd_dev->fbd_flags & FBDF_HAS_SIZE)) {
|
||||
|
||||
@@ -148,7 +148,7 @@ static int iblock_configure_device(struct se_device *dev)
|
||||
else
|
||||
dev->dev_attrib.max_write_same_len = 0xFFFF;
|
||||
|
||||
if (bdev_nonrot(bd))
|
||||
if (!bdev_rot(bd))
|
||||
dev->dev_attrib.is_nonrot = 1;
|
||||
|
||||
target_configure_write_atomic_from_bdev(&dev->dev_attrib, bd);
|
||||
|
||||
@@ -694,7 +694,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
|
||||
set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
|
||||
}
|
||||
|
||||
if (!bdev_nonrot(file_bdev(bdev_file)))
|
||||
if (bdev_rot(file_bdev(bdev_file)))
|
||||
fs_devices->rotating = true;
|
||||
|
||||
if (bdev_max_discard_sectors(file_bdev(bdev_file)))
|
||||
@@ -2919,7 +2919,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||
|
||||
atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
|
||||
|
||||
if (!bdev_nonrot(device->bdev))
|
||||
if (bdev_rot(device->bdev))
|
||||
fs_devices->rotating = true;
|
||||
|
||||
orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
|
||||
|
||||
@@ -73,7 +73,7 @@ static int mbt_mb_init(struct super_block *sb)
|
||||
ext4_fsblk_t block;
|
||||
int ret;
|
||||
|
||||
/* needed by ext4_mb_init->bdev_nonrot(sb->s_bdev) */
|
||||
/* needed by ext4_mb_init->bdev_rot(sb->s_bdev) */
|
||||
sb->s_bdev = kzalloc_obj(*sb->s_bdev);
|
||||
if (sb->s_bdev == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -3840,7 +3840,7 @@ int ext4_mb_init(struct super_block *sb)
|
||||
spin_lock_init(&lg->lg_prealloc_lock);
|
||||
}
|
||||
|
||||
if (bdev_nonrot(sb->s_bdev))
|
||||
if (!bdev_rot(sb->s_bdev))
|
||||
sbi->s_mb_max_linear_groups = 0;
|
||||
else
|
||||
sbi->s_mb_max_linear_groups = MB_DEFAULT_LINEAR_LIMIT;
|
||||
|
||||
@@ -670,7 +670,6 @@ xfs_zone_gc_start_chunk(
|
||||
struct xfs_inode *ip;
|
||||
struct bio *bio;
|
||||
xfs_daddr_t daddr;
|
||||
unsigned int len;
|
||||
bool is_seq;
|
||||
|
||||
if (xfs_is_shutdown(mp))
|
||||
@@ -685,15 +684,16 @@ xfs_zone_gc_start_chunk(
|
||||
return false;
|
||||
}
|
||||
|
||||
len = XFS_FSB_TO_B(mp, irec.rm_blockcount);
|
||||
bio = bio_alloc_bioset(bdev,
|
||||
min(howmany(len, XFS_GC_BUF_SIZE) + 1, XFS_GC_NR_BUFS),
|
||||
REQ_OP_READ, GFP_NOFS, &data->bio_set);
|
||||
|
||||
/*
|
||||
* Scratch allocation can wrap around to the same buffer again,
|
||||
* provision an extra bvec for that case.
|
||||
*/
|
||||
bio = bio_alloc_bioset(bdev, XFS_GC_NR_BUFS + 1, REQ_OP_READ, GFP_NOFS,
|
||||
&data->bio_set);
|
||||
chunk = container_of(bio, struct xfs_gc_bio, bio);
|
||||
chunk->ip = ip;
|
||||
chunk->offset = XFS_FSB_TO_B(mp, irec.rm_offset);
|
||||
chunk->len = len;
|
||||
chunk->len = XFS_FSB_TO_B(mp, irec.rm_blockcount);
|
||||
chunk->old_startblock =
|
||||
xfs_rgbno_to_rtb(iter->victim_rtg, irec.rm_startblock);
|
||||
chunk->new_daddr = daddr;
|
||||
@@ -707,8 +707,9 @@ xfs_zone_gc_start_chunk(
|
||||
bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock);
|
||||
bio->bi_end_io = xfs_zone_gc_end_io;
|
||||
xfs_zone_gc_add_data(chunk);
|
||||
data->scratch_head = (data->scratch_head + len) % data->scratch_size;
|
||||
data->scratch_available -= len;
|
||||
data->scratch_head =
|
||||
(data->scratch_head + chunk->len) % data->scratch_size;
|
||||
data->scratch_available -= chunk->len;
|
||||
|
||||
XFS_STATS_INC(mp, xs_gc_read_calls);
|
||||
|
||||
@@ -899,9 +900,10 @@ out:
|
||||
|
||||
static void
|
||||
xfs_submit_zone_reset_bio(
|
||||
struct xfs_rtgroup *rtg,
|
||||
struct bio *bio)
|
||||
struct bio *bio,
|
||||
void *priv)
|
||||
{
|
||||
struct xfs_rtgroup *rtg = priv;
|
||||
struct xfs_mount *mp = rtg_mount(rtg);
|
||||
|
||||
trace_xfs_zone_reset(rtg);
|
||||
@@ -933,26 +935,16 @@ xfs_submit_zone_reset_bio(
|
||||
submit_bio(bio);
|
||||
}
|
||||
|
||||
static void xfs_bio_wait_endio(struct bio *bio)
|
||||
{
|
||||
complete(bio->bi_private);
|
||||
}
|
||||
|
||||
int
|
||||
xfs_zone_gc_reset_sync(
|
||||
struct xfs_rtgroup *rtg)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK(done);
|
||||
struct bio bio;
|
||||
int error;
|
||||
|
||||
bio_init(&bio, rtg_mount(rtg)->m_rtdev_targp->bt_bdev, NULL, 0,
|
||||
REQ_OP_ZONE_RESET | REQ_SYNC);
|
||||
bio.bi_private = &done;
|
||||
bio.bi_end_io = xfs_bio_wait_endio;
|
||||
xfs_submit_zone_reset_bio(rtg, &bio);
|
||||
wait_for_completion_io(&done);
|
||||
|
||||
bio_await(&bio, rtg, xfs_submit_zone_reset_bio);
|
||||
error = blk_status_to_errno(bio.bi_status);
|
||||
bio_uninit(&bio);
|
||||
return error;
|
||||
@@ -989,7 +981,7 @@ xfs_zone_gc_reset_zones(
|
||||
chunk->data = data;
|
||||
WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW);
|
||||
list_add_tail(&chunk->entry, &data->resetting);
|
||||
xfs_submit_zone_reset_bio(rtg, bio);
|
||||
xfs_submit_zone_reset_bio(bio, rtg);
|
||||
} while (next);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* HKDF: HMAC-based Key Derivation Function (HKDF), RFC 5869
|
||||
*
|
||||
* Extracted from fs/crypto/hkdf.c, which has
|
||||
* Copyright 2019 Google LLC
|
||||
*/
|
||||
|
||||
#ifndef _CRYPTO_HKDF_H
|
||||
#define _CRYPTO_HKDF_H
|
||||
|
||||
#include <crypto/hash.h>
|
||||
|
||||
int hkdf_extract(struct crypto_shash *hmac_tfm, const u8 *ikm,
|
||||
unsigned int ikmlen, const u8 *salt, unsigned int saltlen,
|
||||
u8 *prk);
|
||||
int hkdf_expand(struct crypto_shash *hmac_tfm,
|
||||
const u8 *info, unsigned int infolen,
|
||||
u8 *okm, unsigned int okmlen);
|
||||
#endif
|
||||
@@ -350,8 +350,7 @@ extern void bioset_exit(struct bio_set *);
|
||||
extern int biovec_init_pool(mempool_t *pool, int pool_entries);
|
||||
|
||||
struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
|
||||
blk_opf_t opf, gfp_t gfp_mask,
|
||||
struct bio_set *bs);
|
||||
blk_opf_t opf, gfp_t gfp, struct bio_set *bs);
|
||||
struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask);
|
||||
extern void bio_put(struct bio *);
|
||||
|
||||
@@ -433,6 +432,8 @@ extern void bio_uninit(struct bio *);
|
||||
void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf);
|
||||
void bio_reuse(struct bio *bio, blk_opf_t opf);
|
||||
void bio_chain(struct bio *, struct bio *);
|
||||
void bio_await(struct bio *bio, void *priv,
|
||||
void (*submit)(struct bio *bio, void *priv));
|
||||
|
||||
int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len,
|
||||
unsigned off);
|
||||
|
||||
@@ -14,6 +14,7 @@ enum blk_integrity_flags {
|
||||
BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2,
|
||||
BLK_INTEGRITY_REF_TAG = 1 << 3,
|
||||
BLK_INTEGRITY_STACKED = 1 << 4,
|
||||
BLK_SPLIT_INTERVAL_CAPABLE = 1 << 5,
|
||||
};
|
||||
|
||||
const char *blk_integrity_profile_name(struct blk_integrity *bi);
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/gfp.h>
|
||||
@@ -201,10 +202,14 @@ struct gendisk {
|
||||
u8 __rcu *zones_cond;
|
||||
unsigned int zone_wplugs_hash_bits;
|
||||
atomic_t nr_zone_wplugs;
|
||||
spinlock_t zone_wplugs_lock;
|
||||
spinlock_t zone_wplugs_hash_lock;
|
||||
struct mempool *zone_wplugs_pool;
|
||||
struct hlist_head *zone_wplugs_hash;
|
||||
struct workqueue_struct *zone_wplugs_wq;
|
||||
spinlock_t zone_wplugs_list_lock;
|
||||
struct list_head zone_wplugs_list;
|
||||
struct task_struct *zone_wplugs_worker;
|
||||
struct completion zone_wplugs_worker_bio_done;
|
||||
#endif /* CONFIG_BLK_DEV_ZONED */
|
||||
|
||||
#if IS_ENABLED(CONFIG_CDROM)
|
||||
@@ -503,7 +508,7 @@ struct request_queue {
|
||||
|
||||
/* hw dispatch queues */
|
||||
unsigned int nr_hw_queues;
|
||||
struct blk_mq_hw_ctx * __rcu *queue_hw_ctx;
|
||||
struct blk_mq_hw_ctx * __rcu *queue_hw_ctx __counted_by_ptr(nr_hw_queues);
|
||||
|
||||
struct percpu_ref q_usage_counter;
|
||||
struct lock_class_key io_lock_cls_key;
|
||||
@@ -669,6 +674,7 @@ enum {
|
||||
QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */
|
||||
QUEUE_FLAG_QOS_ENABLED, /* qos is enabled */
|
||||
QUEUE_FLAG_BIO_ISSUE_TIME, /* record bio->issue_time_ns */
|
||||
QUEUE_FLAG_ZONED_QD1_WRITES, /* Limit zoned devices writes to QD=1 */
|
||||
QUEUE_FLAG_MAX
|
||||
};
|
||||
|
||||
@@ -708,6 +714,8 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
|
||||
test_bit(QUEUE_FLAG_DISABLE_WBT_DEF, &(q)->queue_flags)
|
||||
#define blk_queue_no_elv_switch(q) \
|
||||
test_bit(QUEUE_FLAG_NO_ELV_SWITCH, &(q)->queue_flags)
|
||||
#define blk_queue_zoned_qd1_writes(q) \
|
||||
test_bit(QUEUE_FLAG_ZONED_QD1_WRITES, &(q)->queue_flags)
|
||||
|
||||
extern void blk_set_pm_only(struct request_queue *q);
|
||||
extern void blk_clear_pm_only(struct request_queue *q);
|
||||
@@ -1468,11 +1476,6 @@ static inline bool bdev_rot(struct block_device *bdev)
|
||||
return blk_queue_rot(bdev_get_queue(bdev));
|
||||
}
|
||||
|
||||
static inline bool bdev_nonrot(struct block_device *bdev)
|
||||
{
|
||||
return !bdev_rot(bdev);
|
||||
}
|
||||
|
||||
static inline bool bdev_synchronous(struct block_device *bdev)
|
||||
{
|
||||
return bdev->bd_disk->queue->limits.features & BLK_FEAT_SYNCHRONOUS;
|
||||
|
||||
@@ -7,13 +7,17 @@
|
||||
struct bsg_device;
|
||||
struct device;
|
||||
struct request_queue;
|
||||
struct io_uring_cmd;
|
||||
|
||||
typedef int (bsg_sg_io_fn)(struct request_queue *, struct sg_io_v4 *hdr,
|
||||
bool open_for_write, unsigned int timeout);
|
||||
|
||||
typedef int (bsg_uring_cmd_fn)(struct request_queue *q, struct io_uring_cmd *ioucmd,
|
||||
unsigned int issue_flags, bool open_for_write);
|
||||
|
||||
struct bsg_device *bsg_register_queue(struct request_queue *q,
|
||||
struct device *parent, const char *name,
|
||||
bsg_sg_io_fn *sg_io_fn);
|
||||
bsg_sg_io_fn *sg_io_fn, bsg_uring_cmd_fn *uring_cmd_fn);
|
||||
void bsg_unregister_queue(struct bsg_device *bcd);
|
||||
|
||||
#endif /* _LINUX_BSG_H */
|
||||
|
||||
@@ -203,15 +203,6 @@ static inline void bvec_iter_advance_single(const struct bio_vec *bv,
|
||||
((bvl = mp_bvec_iter_bvec((bio_vec), (iter))), 1); \
|
||||
bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len))
|
||||
|
||||
/* for iterating one bio from start to end */
|
||||
#define BVEC_ITER_ALL_INIT (struct bvec_iter) \
|
||||
{ \
|
||||
.bi_sector = 0, \
|
||||
.bi_size = UINT_MAX, \
|
||||
.bi_idx = 0, \
|
||||
.bi_bvec_done = 0, \
|
||||
}
|
||||
|
||||
static inline struct bio_vec *bvec_init_iter_all(struct bvec_iter_all *iter_all)
|
||||
{
|
||||
iter_all->done = 0;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user