mirror of
https://github.com/torvalds/linux.git
synced 2026-05-05 23:05:25 -04:00
bcachefs: Check for stale dirty pointer before reads
Since we retry reads when we discover we read from a pointer that went stale, if a dirty pointer is erroniously stale it would cause us to loop retrying that read forever - unless we check before issuing the read, while the btree is still locked, when we know that a dirty pointer should never be stale. This patch adds that check, along with printing some helpful debug info. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
committed by
Kent Overstreet
parent
fcf01959ea
commit
eb331fe5a4
@@ -1043,8 +1043,6 @@ retry:
|
||||
|
||||
sectors = min(sectors, k.k->size - offset_into_extent);
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
if (readpages_iter)
|
||||
readpage_bio_extend(readpages_iter, &rbio->bio, sectors,
|
||||
extent_partial_reads_expensive(k));
|
||||
|
||||
@@ -2032,6 +2032,33 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
|
||||
struct bkey_s_c k,
|
||||
struct bch_extent_ptr ptr)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev);
|
||||
struct btree_iter iter;
|
||||
char buf[200];
|
||||
int ret;
|
||||
|
||||
bch2_bkey_val_to_text(&PBUF(buf), c, k);
|
||||
bch2_fs_inconsistent(c, "Attempting to read from stale dirty pointer: %s", buf);
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
|
||||
POS(ptr.dev, PTR_BUCKET_NR(ca, &ptr)),
|
||||
BTREE_ITER_CACHED);
|
||||
|
||||
ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
bch2_bkey_val_to_text(&PBUF(buf), c, k);
|
||||
bch_err(c, "%s", buf);
|
||||
bch_err(c, "memory gen: %u", *bucket_gen(ca, iter.pos.offset));
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
}
|
||||
|
||||
int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
||||
struct bvec_iter iter, struct bpos read_pos,
|
||||
enum btree_id data_btree, struct bkey_s_c k,
|
||||
@@ -2041,7 +2068,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
||||
struct bch_fs *c = trans->c;
|
||||
struct extent_ptr_decoded pick;
|
||||
struct bch_read_bio *rbio = NULL;
|
||||
struct bch_dev *ca;
|
||||
struct bch_dev *ca = NULL;
|
||||
struct promote_op *promote = NULL;
|
||||
bool bounce = false, read_full = false, narrow_crcs = false;
|
||||
struct bpos data_pos = bkey_start_pos(k.k);
|
||||
@@ -2058,7 +2085,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
||||
zero_fill_bio_iter(&orig->bio, iter);
|
||||
goto out_read_done;
|
||||
}
|
||||
|
||||
retry_pick:
|
||||
pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
|
||||
|
||||
/* hole or reservation - just zero fill: */
|
||||
@@ -2071,8 +2098,27 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (pick_ret > 0)
|
||||
ca = bch_dev_bkey_exists(c, pick.ptr.dev);
|
||||
ca = bch_dev_bkey_exists(c, pick.ptr.dev);
|
||||
|
||||
/*
|
||||
* Stale dirty pointers are treated as IO errors, but @failed isn't
|
||||
* allocated unless we're in the retry path - so if we're not in the
|
||||
* retry path, don't check here, it'll be caught in bch2_read_endio()
|
||||
* and we'll end up in the retry path:
|
||||
*/
|
||||
if ((flags & BCH_READ_IN_RETRY) &&
|
||||
!pick.ptr.cached &&
|
||||
unlikely(ptr_stale(ca, &pick.ptr))) {
|
||||
read_from_stale_dirty_pointer(trans, k, pick.ptr);
|
||||
bch2_mark_io_failure(failed, &pick);
|
||||
goto retry_pick;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlock the iterator while the btree node's lock is still in
|
||||
* cache, before doing the IO:
|
||||
*/
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
if (flags & BCH_READ_NODECODE) {
|
||||
/*
|
||||
@@ -2367,12 +2413,6 @@ retry:
|
||||
*/
|
||||
sectors = min(sectors, k.k->size - offset_into_extent);
|
||||
|
||||
/*
|
||||
* Unlock the iterator while the btree node's lock is still in
|
||||
* cache, before doing the IO:
|
||||
*/
|
||||
bch2_trans_unlock(&trans);
|
||||
|
||||
bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
|
||||
swap(bvec_iter.bi_size, bytes);
|
||||
|
||||
|
||||
@@ -752,10 +752,12 @@ static int __bch2_move_data(struct bch_fs *c,
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* unlock before doing IO: */
|
||||
/*
|
||||
* The iterator gets unlocked by __bch2_read_extent - need to
|
||||
* save a copy of @k elsewhere:
|
||||
*/
|
||||
bch2_bkey_buf_reassemble(&sk, c, k);
|
||||
k = bkey_i_to_s_c(sk.k);
|
||||
bch2_trans_unlock(&trans);
|
||||
|
||||
ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k,
|
||||
data_cmd, data_opts);
|
||||
|
||||
Reference in New Issue
Block a user