md: mark rdev Faulty when badblocks setting fails

Currently when sync read fails and badblocks set fails (exceeding
512 limit), rdev isn't immediately marked Faulty. Instead
'recovery_disabled' is set and non-In_sync rdevs are removed later.
This preserves array availability if bad regions aren't read, but bad
sectors might be read by users before rdev removal. This occurs due
to incorrect resync/recovery_offset updates that include these bad
sectors.

When badblocks exceed 512, keeping the disk provides little benefit
while adding complexity. Prompt disk replacement is more important.
Therefore when badblocks set fails, directly call md_error to mark rdev
Faulty immediately, preventing potential data access issues.

After this change, cleanup of offset update logic and 'recovery_disabled'
handling will follow.

Link: https://lore.kernel.org/linux-raid/20260105110300.1442509-6-linan666@huaweicloud.com
Fixes: 5e5702898e ("md/raid10: Handle read errors during recovery better.")
Fixes: 3a9f28a511 ("md/raid1: improve handling of read failure during recovery.")
Signed-off-by: Li Nan <linan122@huawei.com>
Signed-off-by: Yu Kuai <yukuai@fnnas.com>
This commit is contained in:
Li Nan
2026-01-05 19:02:53 +08:00
committed by Yu Kuai
parent aa9d12cfa1
commit fd4d44c14f
4 changed files with 32 additions and 45 deletions

View File

@@ -2115,8 +2115,7 @@ static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
rdev->mddev->recovery);
}
/* need to record an error - either for the block or the device */
if (!rdev_set_badblocks(rdev, sector, sectors, 0))
md_error(rdev->mddev, rdev);
rdev_set_badblocks(rdev, sector, sectors, 0);
return 0;
}
@@ -2441,8 +2440,7 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
if (!success) {
/* Cannot read from anywhere - mark it bad */
struct md_rdev *rdev = conf->mirrors[read_disk].rdev;
if (!rdev_set_badblocks(rdev, sect, s, 0))
md_error(mddev, rdev);
rdev_set_badblocks(rdev, sect, s, 0);
break;
}
/* write it back and re-read */
@@ -2546,7 +2544,6 @@ static void narrow_write_error(struct r1bio *r1_bio, int i)
* Badblocks set failed, disk marked Faulty.
* No further operations needed.
*/
md_error(mddev, rdev);
bio_put(wbio);
break;
}
@@ -2568,14 +2565,11 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
if (bio->bi_end_io == NULL)
continue;
if (!bio->bi_status &&
test_bit(R1BIO_MadeGood, &r1_bio->state)) {
test_bit(R1BIO_MadeGood, &r1_bio->state))
rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
}
if (bio->bi_status &&
test_bit(R1BIO_WriteError, &r1_bio->state)) {
if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
md_error(conf->mddev, rdev);
}
test_bit(R1BIO_WriteError, &r1_bio->state))
rdev_set_badblocks(rdev, r1_bio->sector, s, 0);
}
put_buf(r1_bio);
md_done_sync(conf->mddev, s);