mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
Merge tag 'for-6.9/block-20240310' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- MD pull requests via Song:
- Cleanup redundant checks (Yu Kuai)
- Remove deprecated headers (Marc Zyngier, Song Liu)
- Concurrency fixes (Li Lingfeng)
- Memory leak fix (Li Nan)
- Refactor raid1 read_balance (Yu Kuai, Paul Luse)
- Clean up and fix for md_ioctl (Li Nan)
- Other small fixes (Gui-Dong Han, Heming Zhao)
- MD atomic limits (Christoph)
- NVMe pull request via Keith:
- RDMA target enhancements (Max)
- Fabrics fixes (Max, Guixin, Hannes)
- Atomic queue_limits usage (Christoph)
- Const use for class_register (Ricardo)
- Identification error handling fixes (Shin'ichiro, Keith)
- Improvement and cleanup for cached request handling (Christoph)
- Moving towards atomic queue limits. Core changes and driver bits so
far (Christoph)
- Fix UAF issues in aoeblk (Chun-Yi)
- Zoned fix and cleanups (Damien)
- s390 dasd cleanups and fixes (Jan, Miroslav)
- Block issue timestamp caching (me)
- noio scope guarding for zoned IO (Johannes)
- block/nvme PI improvements (Kanchan)
- Ability to terminate long running discard loop (Keith)
- bdev revalidation fix (Li)
- Get rid of old nr_queues hack for kdump kernels (Ming)
- Support for async deletion of ublk (Ming)
- Improve IRQ bio recycling (Pavel)
- Factor in CPU capacity for remote vs local completion (Qais)
- Add shared_tags configfs entry for null_blk (Shin'ichiro
- Fix for a regression in page refcounts introduced by the folio
unification (Tony)
- Misc fixes and cleanups (Arnd, Colin, John, Kunwu, Li, Navid,
Ricardo, Roman, Tang, Uwe)
* tag 'for-6.9/block-20240310' of git://git.kernel.dk/linux: (221 commits)
block: partitions: only define function mac_fix_string for CONFIG_PPC_PMAC
block/swim: Convert to platform remove callback returning void
cdrom: gdrom: Convert to platform remove callback returning void
block: remove disk_stack_limits
md: remove mddev->queue
md: don't initialize queue limits
md/raid10: use the atomic queue limit update APIs
md/raid5: use the atomic queue limit update APIs
md/raid1: use the atomic queue limit update APIs
md/raid0: use the atomic queue limit update APIs
md: add queue limit helpers
md: add a mddev_is_dm helper
md: add a mddev_add_trace_msg helper
md: add a mddev_trace_remap helper
bcache: move calculation of stripe_size and io_opt into bcache_device_init
virtio_blk: Do not use disk_set_max_open/active_zones()
aoe: fix the potential use-after-free problem in aoecmd_cfg_pkts
block: move capacity validation to blkpg_do_ioctl()
block: prevent division by zero in blk_rq_stat_sum()
drbd: atomically update queue limits in drbd_reconsider_queue_parameters
...
This commit is contained in:
@@ -1189,9 +1189,31 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void blk_queue_discard_granularity(struct request_queue *q, unsigned int granularity)
|
||||
static unsigned int drbd_max_peer_bio_size(struct drbd_device *device)
|
||||
{
|
||||
q->limits.discard_granularity = granularity;
|
||||
/*
|
||||
* We may ignore peer limits if the peer is modern enough. From 8.3.8
|
||||
* onwards the peer can use multiple BIOs for a single peer_request.
|
||||
*/
|
||||
if (device->state.conn < C_WF_REPORT_PARAMS)
|
||||
return device->peer_max_bio_size;
|
||||
|
||||
if (first_peer_device(device)->connection->agreed_pro_version < 94)
|
||||
return min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
|
||||
|
||||
/*
|
||||
* Correct old drbd (up to 8.3.7) if it believes it can do more than
|
||||
* 32KiB.
|
||||
*/
|
||||
if (first_peer_device(device)->connection->agreed_pro_version == 94)
|
||||
return DRBD_MAX_SIZE_H80_PACKET;
|
||||
|
||||
/*
|
||||
* drbd 8.3.8 onwards, before 8.4.0
|
||||
*/
|
||||
if (first_peer_device(device)->connection->agreed_pro_version < 100)
|
||||
return DRBD_MAX_BIO_SIZE_P95;
|
||||
return DRBD_MAX_BIO_SIZE;
|
||||
}
|
||||
|
||||
static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
|
||||
@@ -1204,24 +1226,81 @@ static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
|
||||
return AL_EXTENT_SIZE >> 9;
|
||||
}
|
||||
|
||||
static void decide_on_discard_support(struct drbd_device *device,
|
||||
static bool drbd_discard_supported(struct drbd_connection *connection,
|
||||
struct drbd_backing_dev *bdev)
|
||||
{
|
||||
struct drbd_connection *connection =
|
||||
first_peer_device(device)->connection;
|
||||
struct request_queue *q = device->rq_queue;
|
||||
unsigned int max_discard_sectors;
|
||||
|
||||
if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev))
|
||||
goto not_supported;
|
||||
return false;
|
||||
|
||||
if (connection->cstate >= C_CONNECTED &&
|
||||
!(connection->agreed_features & DRBD_FF_TRIM)) {
|
||||
drbd_info(connection,
|
||||
"peer DRBD too old, does not support TRIM: disabling discards\n");
|
||||
goto not_supported;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* This is the workaround for "bio would need to, but cannot, be split" */
|
||||
static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device)
|
||||
{
|
||||
unsigned int max_segments;
|
||||
|
||||
rcu_read_lock();
|
||||
max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!max_segments)
|
||||
return BLK_MAX_SEGMENTS;
|
||||
return max_segments;
|
||||
}
|
||||
|
||||
void drbd_reconsider_queue_parameters(struct drbd_device *device,
|
||||
struct drbd_backing_dev *bdev, struct o_qlim *o)
|
||||
{
|
||||
struct drbd_connection *connection =
|
||||
first_peer_device(device)->connection;
|
||||
struct request_queue * const q = device->rq_queue;
|
||||
unsigned int now = queue_max_hw_sectors(q) << 9;
|
||||
struct queue_limits lim;
|
||||
struct request_queue *b = NULL;
|
||||
unsigned int new;
|
||||
|
||||
if (bdev) {
|
||||
b = bdev->backing_bdev->bd_disk->queue;
|
||||
|
||||
device->local_max_bio_size =
|
||||
queue_max_hw_sectors(b) << SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
/*
|
||||
* We may later detach and re-attach on a disconnected Primary. Avoid
|
||||
* decreasing the value in this case.
|
||||
*
|
||||
* We want to store what we know the peer DRBD can handle, not what the
|
||||
* peer IO backend can handle.
|
||||
*/
|
||||
new = min3(DRBD_MAX_BIO_SIZE, device->local_max_bio_size,
|
||||
max(drbd_max_peer_bio_size(device), device->peer_max_bio_size));
|
||||
if (new != now) {
|
||||
if (device->state.role == R_PRIMARY && new < now)
|
||||
drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n",
|
||||
new, now);
|
||||
drbd_info(device, "max BIO size = %u\n", new);
|
||||
}
|
||||
|
||||
lim = queue_limits_start_update(q);
|
||||
if (bdev) {
|
||||
blk_set_stacking_limits(&lim);
|
||||
lim.max_segments = drbd_backing_dev_max_segments(device);
|
||||
} else {
|
||||
lim.max_segments = BLK_MAX_SEGMENTS;
|
||||
}
|
||||
|
||||
lim.max_hw_sectors = new >> SECTOR_SHIFT;
|
||||
lim.seg_boundary_mask = PAGE_SIZE - 1;
|
||||
|
||||
/*
|
||||
* We don't care for the granularity, really.
|
||||
*
|
||||
@@ -1230,123 +1309,36 @@ static void decide_on_discard_support(struct drbd_device *device,
|
||||
* problem, really. If you care, you need to use devices with similar
|
||||
* topology on all peers.
|
||||
*/
|
||||
blk_queue_discard_granularity(q, 512);
|
||||
max_discard_sectors = drbd_max_discard_sectors(connection);
|
||||
blk_queue_max_discard_sectors(q, max_discard_sectors);
|
||||
blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
|
||||
return;
|
||||
if (drbd_discard_supported(connection, bdev)) {
|
||||
lim.discard_granularity = 512;
|
||||
lim.max_hw_discard_sectors =
|
||||
drbd_max_discard_sectors(connection);
|
||||
} else {
|
||||
lim.discard_granularity = 0;
|
||||
lim.max_hw_discard_sectors = 0;
|
||||
}
|
||||
|
||||
not_supported:
|
||||
blk_queue_discard_granularity(q, 0);
|
||||
blk_queue_max_discard_sectors(q, 0);
|
||||
}
|
||||
if (bdev)
|
||||
blk_stack_limits(&lim, &b->limits, 0);
|
||||
|
||||
static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q)
|
||||
{
|
||||
/* Fixup max_write_zeroes_sectors after blk_stack_limits():
|
||||
* if we can handle "zeroes" efficiently on the protocol,
|
||||
* we want to do that, even if our backend does not announce
|
||||
* max_write_zeroes_sectors itself. */
|
||||
struct drbd_connection *connection = first_peer_device(device)->connection;
|
||||
/* If the peer announces WZEROES support, use it. Otherwise, rather
|
||||
* send explicit zeroes than rely on some discard-zeroes-data magic. */
|
||||
/*
|
||||
* If we can handle "zeroes" efficiently on the protocol, we want to do
|
||||
* that, even if our backend does not announce max_write_zeroes_sectors
|
||||
* itself.
|
||||
*/
|
||||
if (connection->agreed_features & DRBD_FF_WZEROES)
|
||||
q->limits.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS;
|
||||
lim.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS;
|
||||
else
|
||||
q->limits.max_write_zeroes_sectors = 0;
|
||||
}
|
||||
lim.max_write_zeroes_sectors = 0;
|
||||
|
||||
static void fixup_discard_support(struct drbd_device *device, struct request_queue *q)
|
||||
{
|
||||
unsigned int max_discard = device->rq_queue->limits.max_discard_sectors;
|
||||
unsigned int discard_granularity =
|
||||
device->rq_queue->limits.discard_granularity >> SECTOR_SHIFT;
|
||||
|
||||
if (discard_granularity > max_discard) {
|
||||
blk_queue_discard_granularity(q, 0);
|
||||
blk_queue_max_discard_sectors(q, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
|
||||
unsigned int max_bio_size, struct o_qlim *o)
|
||||
{
|
||||
struct request_queue * const q = device->rq_queue;
|
||||
unsigned int max_hw_sectors = max_bio_size >> 9;
|
||||
unsigned int max_segments = 0;
|
||||
struct request_queue *b = NULL;
|
||||
struct disk_conf *dc;
|
||||
|
||||
if (bdev) {
|
||||
b = bdev->backing_bdev->bd_disk->queue;
|
||||
|
||||
max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
|
||||
rcu_read_lock();
|
||||
dc = rcu_dereference(device->ldev->disk_conf);
|
||||
max_segments = dc->max_bio_bvecs;
|
||||
rcu_read_unlock();
|
||||
|
||||
blk_set_stacking_limits(&q->limits);
|
||||
if ((lim.discard_granularity >> SECTOR_SHIFT) >
|
||||
lim.max_hw_discard_sectors) {
|
||||
lim.discard_granularity = 0;
|
||||
lim.max_hw_discard_sectors = 0;
|
||||
}
|
||||
|
||||
blk_queue_max_hw_sectors(q, max_hw_sectors);
|
||||
/* This is the workaround for "bio would need to, but cannot, be split" */
|
||||
blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
|
||||
blk_queue_segment_boundary(q, PAGE_SIZE-1);
|
||||
decide_on_discard_support(device, bdev);
|
||||
|
||||
if (b) {
|
||||
blk_stack_limits(&q->limits, &b->limits, 0);
|
||||
disk_update_readahead(device->vdisk);
|
||||
}
|
||||
fixup_write_zeroes(device, q);
|
||||
fixup_discard_support(device, q);
|
||||
}
|
||||
|
||||
void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o)
|
||||
{
|
||||
unsigned int now, new, local, peer;
|
||||
|
||||
now = queue_max_hw_sectors(device->rq_queue) << 9;
|
||||
local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
|
||||
peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
|
||||
|
||||
if (bdev) {
|
||||
local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
|
||||
device->local_max_bio_size = local;
|
||||
}
|
||||
local = min(local, DRBD_MAX_BIO_SIZE);
|
||||
|
||||
/* We may ignore peer limits if the peer is modern enough.
|
||||
Because new from 8.3.8 onwards the peer can use multiple
|
||||
BIOs for a single peer_request */
|
||||
if (device->state.conn >= C_WF_REPORT_PARAMS) {
|
||||
if (first_peer_device(device)->connection->agreed_pro_version < 94)
|
||||
peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
|
||||
/* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
|
||||
else if (first_peer_device(device)->connection->agreed_pro_version == 94)
|
||||
peer = DRBD_MAX_SIZE_H80_PACKET;
|
||||
else if (first_peer_device(device)->connection->agreed_pro_version < 100)
|
||||
peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */
|
||||
else
|
||||
peer = DRBD_MAX_BIO_SIZE;
|
||||
|
||||
/* We may later detach and re-attach on a disconnected Primary.
|
||||
* Avoid this setting to jump back in that case.
|
||||
* We want to store what we know the peer DRBD can handle,
|
||||
* not what the peer IO backend can handle. */
|
||||
if (peer > device->peer_max_bio_size)
|
||||
device->peer_max_bio_size = peer;
|
||||
}
|
||||
new = min(local, peer);
|
||||
|
||||
if (device->state.role == R_PRIMARY && new < now)
|
||||
drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
|
||||
|
||||
if (new != now)
|
||||
drbd_info(device, "max BIO size = %u\n", new);
|
||||
|
||||
drbd_setup_queue_param(device, bdev, new, o);
|
||||
if (queue_limits_commit_update(q, &lim))
|
||||
drbd_err(device, "setting new queue limits failed\n");
|
||||
}
|
||||
|
||||
/* Starts the worker thread */
|
||||
|
||||
Reference in New Issue
Block a user