mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
md: allow configuring logical block size
Previously, raid array used the maximum logical block size (LBS) of all member disks. Adding a larger LBS disk at runtime could unexpectedly increase RAID's LBS, risking corruption of existing partitions. This can be reproduced by: ``` # LBS of sd[de] is 512 bytes, sdf is 4096 bytes. mdadm -CRq /dev/md0 -l1 -n3 /dev/sd[de] missing --assume-clean # LBS is 512 cat /sys/block/md0/queue/logical_block_size # create partition md0p1 parted -s /dev/md0 mklabel gpt mkpart primary 1MiB 100% lsblk | grep md0p1 # LBS becomes 4096 after adding sdf mdadm --add -q /dev/md0 /dev/sdf cat /sys/block/md0/queue/logical_block_size # partition lost partprobe /dev/md0 lsblk | grep md0p1 ``` Simply restricting larger-LBS disks is inflexible. In some scenarios, only disks with 512 bytes LBS are available currently, but later, disks with 4KB LBS may be added to the array. Making LBS configurable is the best way to solve this scenario. After this patch, the raid will: - store LBS in disk metadata - add a read-write sysfs 'mdX/logical_block_size' Future mdadm should support setting LBS via metadata field during RAID creation and the new sysfs. Though the kernel allows runtime LBS changes, users should avoid modifying it after creating partitions or filesystems to prevent compatibility issues. Only 1.x metadata supports configurable LBS. 0.90 metadata inits all fields to default values at auto-detect. Supporting 0.90 would require more extensive changes and no such use case has been observed. Note that many RAID paths rely on PAGE_SIZE alignment, including for metadata I/O. A larger LBS than PAGE_SIZE will result in metadata read/write failures. So this config should be prevented. Link: https://lore.kernel.org/linux-raid/20251103125757.1405796-6-linan666@huaweicloud.com Signed-off-by: Li Nan <linan122@huawei.com> Reviewed-by: Xiao Ni <xni@redhat.com> Signed-off-by: Yu Kuai <yukuai@fnnas.com>
This commit is contained in:
@@ -1999,6 +1999,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struc
|
||||
mddev->layout = le32_to_cpu(sb->layout);
|
||||
mddev->raid_disks = le32_to_cpu(sb->raid_disks);
|
||||
mddev->dev_sectors = le64_to_cpu(sb->size);
|
||||
mddev->logical_block_size = le32_to_cpu(sb->logical_block_size);
|
||||
mddev->events = ev1;
|
||||
mddev->bitmap_info.offset = 0;
|
||||
mddev->bitmap_info.space = 0;
|
||||
@@ -2208,6 +2209,7 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
|
||||
sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
|
||||
sb->level = cpu_to_le32(mddev->level);
|
||||
sb->layout = cpu_to_le32(mddev->layout);
|
||||
sb->logical_block_size = cpu_to_le32(mddev->logical_block_size);
|
||||
if (test_bit(FailFast, &rdev->flags))
|
||||
sb->devflags |= FailFast1;
|
||||
else
|
||||
@@ -5936,6 +5938,68 @@ static struct md_sysfs_entry md_serialize_policy =
|
||||
__ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
|
||||
serialize_policy_store);
|
||||
|
||||
static int mddev_set_logical_block_size(struct mddev *mddev,
|
||||
unsigned int lbs)
|
||||
{
|
||||
int err = 0;
|
||||
struct queue_limits lim;
|
||||
|
||||
if (queue_logical_block_size(mddev->gendisk->queue) >= lbs) {
|
||||
pr_err("%s: Cannot set LBS smaller than mddev LBS %u\n",
|
||||
mdname(mddev), lbs);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
lim = queue_limits_start_update(mddev->gendisk->queue);
|
||||
lim.logical_block_size = lbs;
|
||||
pr_info("%s: logical_block_size is changed, data may be lost\n",
|
||||
mdname(mddev));
|
||||
err = queue_limits_commit_update(mddev->gendisk->queue, &lim);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
mddev->logical_block_size = lbs;
|
||||
/* New lbs will be written to superblock after array is running */
|
||||
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
lbs_show(struct mddev *mddev, char *page)
|
||||
{
|
||||
return sprintf(page, "%u\n", mddev->logical_block_size);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
lbs_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
{
|
||||
unsigned int lbs;
|
||||
int err = -EBUSY;
|
||||
|
||||
/* Only 1.x meta supports configurable LBS */
|
||||
if (mddev->major_version == 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (mddev->pers)
|
||||
return -EBUSY;
|
||||
|
||||
err = kstrtouint(buf, 10, &lbs);
|
||||
if (err < 0)
|
||||
return -EINVAL;
|
||||
|
||||
err = mddev_lock(mddev);
|
||||
if (err)
|
||||
goto unlock;
|
||||
|
||||
err = mddev_set_logical_block_size(mddev, lbs);
|
||||
|
||||
unlock:
|
||||
mddev_unlock(mddev);
|
||||
return err ?: len;
|
||||
}
|
||||
|
||||
static struct md_sysfs_entry md_logical_block_size =
|
||||
__ATTR(logical_block_size, 0644, lbs_show, lbs_store);
|
||||
|
||||
static struct attribute *md_default_attrs[] = {
|
||||
&md_level.attr,
|
||||
@@ -5958,6 +6022,7 @@ static struct attribute *md_default_attrs[] = {
|
||||
&md_consistency_policy.attr,
|
||||
&md_fail_last_dev.attr,
|
||||
&md_serialize_policy.attr,
|
||||
&md_logical_block_size.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -6088,6 +6153,17 @@ int mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Before RAID adding folio support, the logical_block_size
|
||||
* should be smaller than the page size.
|
||||
*/
|
||||
if (lim->logical_block_size > PAGE_SIZE) {
|
||||
pr_err("%s: logical_block_size must not larger than PAGE_SIZE\n",
|
||||
mdname(mddev));
|
||||
return -EINVAL;
|
||||
}
|
||||
mddev->logical_block_size = lim->logical_block_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mddev_stack_rdev_limits);
|
||||
@@ -6699,6 +6775,7 @@ static void md_clean(struct mddev *mddev)
|
||||
mddev->chunk_sectors = 0;
|
||||
mddev->ctime = mddev->utime = 0;
|
||||
mddev->layout = 0;
|
||||
mddev->logical_block_size = 0;
|
||||
mddev->max_disks = 0;
|
||||
mddev->events = 0;
|
||||
mddev->can_decrease_events = 0;
|
||||
|
||||
Reference in New Issue
Block a user