mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
Merge tag 'xfs-merge-7.1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Carlos Maiolino: "There aren't any new features. The whole series is just a collection of bug fixes and code refactoring. There is some new information added a couple new tracepoints, new data added to mountstats, but no big changes" * tag 'xfs-merge-7.1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (41 commits) xfs: fix number of GC bvecs xfs: untangle the open zones reporting in mountinfo xfs: expose the number of open zones in sysfs xfs: reduce special casing for the open GC zone xfs: streamline GC zone selection xfs: refactor GC zone selection helpers xfs: rename xfs_zone_gc_iter_next to xfs_zone_gc_iter_irec xfs: put the open zone later xfs_open_zone_put xfs: add a separate tracepoint for stealing an open zone for GC xfs: delay initial open of the GC zone xfs: fix a resource leak in xfs_alloc_buftarg() xfs: handle too many open zones when mounting xfs: refactor xfs_mount_zones xfs: fix integer overflow in busy extent sort comparator xfs: fix integer overflow in deferred intent sort comparators xfs: fold xfs_setattr_size into xfs_vn_setattr_size xfs: remove a duplicate assert in xfs_setattr_size xfs: return default quota limits for IDs without a dquot xfs: start gc on zonegc_low_space attribute updates xfs: don't decrement the buffer LRU count for in-use buffers ...
This commit is contained in:
@@ -550,6 +550,10 @@ For zoned file systems, the following attributes are exposed in:
|
||||
is limited by the capabilities of the backing zoned device, file system
|
||||
size and the max_open_zones mount option.
|
||||
|
||||
nr_open_zones (Min: 0 Default: Varies Max: UINTMAX)
|
||||
This read-only attribute exposes the current number of open zones
|
||||
used by the file system.
|
||||
|
||||
zonegc_low_space (Min: 0 Default: 0 Max: 100)
|
||||
Define a percentage for how much of the unused space that GC should keep
|
||||
available for writing. A high value will reclaim more of the space
|
||||
|
||||
@@ -1647,16 +1647,12 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
|
||||
while ((ret = iomap_iter(&iter, ops)) > 0) {
|
||||
const struct iomap *srcmap = iomap_iter_srcmap(&iter);
|
||||
|
||||
if (WARN_ON_ONCE((iter.iomap.flags & IOMAP_F_FOLIO_BATCH) &&
|
||||
srcmap->type != IOMAP_UNWRITTEN))
|
||||
return -EIO;
|
||||
|
||||
if (!(iter.iomap.flags & IOMAP_F_FOLIO_BATCH) &&
|
||||
(srcmap->type == IOMAP_HOLE ||
|
||||
srcmap->type == IOMAP_UNWRITTEN)) {
|
||||
s64 status;
|
||||
|
||||
if (range_dirty) {
|
||||
if (range_dirty && srcmap->type == IOMAP_UNWRITTEN) {
|
||||
range_dirty = false;
|
||||
status = iomap_zero_iter_flush_and_stale(&iter);
|
||||
} else {
|
||||
|
||||
@@ -110,10 +110,7 @@ xfs_perag_uninit(
|
||||
struct xfs_group *xg)
|
||||
{
|
||||
#ifdef __KERNEL__
|
||||
struct xfs_perag *pag = to_perag(xg);
|
||||
|
||||
cancel_delayed_work_sync(&pag->pag_blockgc_work);
|
||||
xfs_buf_cache_destroy(&pag->pag_bcache);
|
||||
cancel_delayed_work_sync(&to_perag(xg)->pag_blockgc_work);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -235,10 +232,6 @@ xfs_perag_alloc(
|
||||
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
error = xfs_buf_cache_init(&pag->pag_bcache);
|
||||
if (error)
|
||||
goto out_free_perag;
|
||||
|
||||
/*
|
||||
* Pre-calculated geometry
|
||||
*/
|
||||
@@ -250,12 +243,10 @@ xfs_perag_alloc(
|
||||
|
||||
error = xfs_group_insert(mp, pag_group(pag), index, XG_TYPE_AG);
|
||||
if (error)
|
||||
goto out_buf_cache_destroy;
|
||||
goto out_free_perag;
|
||||
|
||||
return 0;
|
||||
|
||||
out_buf_cache_destroy:
|
||||
xfs_buf_cache_destroy(&pag->pag_bcache);
|
||||
out_free_perag:
|
||||
kfree(pag);
|
||||
return error;
|
||||
|
||||
@@ -85,8 +85,6 @@ struct xfs_perag {
|
||||
int pag_ici_reclaimable; /* reclaimable inodes */
|
||||
unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */
|
||||
|
||||
struct xfs_buf_cache pag_bcache;
|
||||
|
||||
/* background prealloc block trimming */
|
||||
struct delayed_work pag_blockgc_work;
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
@@ -995,7 +995,8 @@ struct xfs_rtgroup_geometry {
|
||||
__u32 rg_sick; /* o: sick things in ag */
|
||||
__u32 rg_checked; /* o: checked metadata in ag */
|
||||
__u32 rg_flags; /* i/o: flags for this ag */
|
||||
__u32 rg_reserved[27]; /* o: zero */
|
||||
__u32 rg_writepointer; /* o: write pointer block offset for zoned */
|
||||
__u32 rg_reserved[26]; /* o: zero */
|
||||
};
|
||||
#define XFS_RTGROUP_GEOM_SICK_SUPER (1U << 0) /* superblock */
|
||||
#define XFS_RTGROUP_GEOM_SICK_BITMAP (1U << 1) /* rtbitmap */
|
||||
@@ -1003,6 +1004,8 @@ struct xfs_rtgroup_geometry {
|
||||
#define XFS_RTGROUP_GEOM_SICK_RMAPBT (1U << 3) /* reverse mappings */
|
||||
#define XFS_RTGROUP_GEOM_SICK_REFCNTBT (1U << 4) /* reference counts */
|
||||
|
||||
#define XFS_RTGROUP_GEOM_WRITEPOINTER (1U << 0) /* write pointer */
|
||||
|
||||
/* Health monitor event domains */
|
||||
|
||||
/* affects the whole fs */
|
||||
|
||||
238
fs/xfs/xfs_buf.c
238
fs/xfs/xfs_buf.c
@@ -31,20 +31,20 @@ struct kmem_cache *xfs_buf_cache;
|
||||
*
|
||||
* xfs_buf_stale:
|
||||
* b_sema (caller holds)
|
||||
* b_lock
|
||||
* b_lockref.lock
|
||||
* lru_lock
|
||||
*
|
||||
* xfs_buf_rele:
|
||||
* b_lock
|
||||
* b_lockref.lock
|
||||
* lru_lock
|
||||
*
|
||||
* xfs_buftarg_drain_rele
|
||||
* lru_lock
|
||||
* b_lock (trylock due to inversion)
|
||||
* b_lockref.lock (trylock due to inversion)
|
||||
*
|
||||
* xfs_buftarg_isolate
|
||||
* lru_lock
|
||||
* b_lock (trylock due to inversion)
|
||||
* b_lockref.lock (trylock due to inversion)
|
||||
*/
|
||||
|
||||
static void xfs_buf_submit(struct xfs_buf *bp);
|
||||
@@ -78,14 +78,11 @@ xfs_buf_stale(
|
||||
*/
|
||||
bp->b_flags &= ~_XBF_DELWRI_Q;
|
||||
|
||||
spin_lock(&bp->b_lock);
|
||||
spin_lock(&bp->b_lockref.lock);
|
||||
atomic_set(&bp->b_lru_ref, 0);
|
||||
if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
|
||||
(list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru)))
|
||||
bp->b_hold--;
|
||||
|
||||
ASSERT(bp->b_hold >= 1);
|
||||
spin_unlock(&bp->b_lock);
|
||||
if (!__lockref_is_dead(&bp->b_lockref))
|
||||
list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
|
||||
spin_unlock(&bp->b_lockref.lock);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -277,10 +274,8 @@ xfs_buf_alloc(
|
||||
* inserting into the hash table are safe (and will have to wait for
|
||||
* the unlock to do anything non-trivial).
|
||||
*/
|
||||
bp->b_hold = 1;
|
||||
lockref_init(&bp->b_lockref);
|
||||
sema_init(&bp->b_sema, 0); /* held, no waiters */
|
||||
|
||||
spin_lock_init(&bp->b_lock);
|
||||
atomic_set(&bp->b_lru_ref, 1);
|
||||
init_completion(&bp->b_iowait);
|
||||
INIT_LIST_HEAD(&bp->b_lru);
|
||||
@@ -368,20 +363,6 @@ static const struct rhashtable_params xfs_buf_hash_params = {
|
||||
.obj_cmpfn = _xfs_buf_obj_cmp,
|
||||
};
|
||||
|
||||
int
|
||||
xfs_buf_cache_init(
|
||||
struct xfs_buf_cache *bch)
|
||||
{
|
||||
return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params);
|
||||
}
|
||||
|
||||
void
|
||||
xfs_buf_cache_destroy(
|
||||
struct xfs_buf_cache *bch)
|
||||
{
|
||||
rhashtable_destroy(&bch->bc_hash);
|
||||
}
|
||||
|
||||
static int
|
||||
xfs_buf_map_verify(
|
||||
struct xfs_buftarg *btp,
|
||||
@@ -437,23 +418,9 @@ xfs_buf_find_lock(
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
xfs_buf_try_hold(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
spin_lock(&bp->b_lock);
|
||||
if (bp->b_hold == 0) {
|
||||
spin_unlock(&bp->b_lock);
|
||||
return false;
|
||||
}
|
||||
bp->b_hold++;
|
||||
spin_unlock(&bp->b_lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int
|
||||
xfs_buf_lookup(
|
||||
struct xfs_buf_cache *bch,
|
||||
struct xfs_buftarg *btp,
|
||||
struct xfs_buf_map *map,
|
||||
xfs_buf_flags_t flags,
|
||||
struct xfs_buf **bpp)
|
||||
@@ -462,8 +429,8 @@ xfs_buf_lookup(
|
||||
int error;
|
||||
|
||||
rcu_read_lock();
|
||||
bp = rhashtable_lookup(&bch->bc_hash, map, xfs_buf_hash_params);
|
||||
if (!bp || !xfs_buf_try_hold(bp)) {
|
||||
bp = rhashtable_lookup(&btp->bt_hash, map, xfs_buf_hash_params);
|
||||
if (!bp || !lockref_get_not_dead(&bp->b_lockref)) {
|
||||
rcu_read_unlock();
|
||||
return -ENOENT;
|
||||
}
|
||||
@@ -487,7 +454,6 @@ xfs_buf_lookup(
|
||||
static int
|
||||
xfs_buf_find_insert(
|
||||
struct xfs_buftarg *btp,
|
||||
struct xfs_buf_cache *bch,
|
||||
struct xfs_perag *pag,
|
||||
struct xfs_buf_map *cmap,
|
||||
struct xfs_buf_map *map,
|
||||
@@ -507,14 +473,14 @@ xfs_buf_find_insert(
|
||||
new_bp->b_pag = pag;
|
||||
|
||||
rcu_read_lock();
|
||||
bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash,
|
||||
bp = rhashtable_lookup_get_insert_fast(&btp->bt_hash,
|
||||
&new_bp->b_rhash_head, xfs_buf_hash_params);
|
||||
if (IS_ERR(bp)) {
|
||||
rcu_read_unlock();
|
||||
error = PTR_ERR(bp);
|
||||
goto out_free_buf;
|
||||
}
|
||||
if (bp && xfs_buf_try_hold(bp)) {
|
||||
if (bp && lockref_get_not_dead(&bp->b_lockref)) {
|
||||
/* found an existing buffer */
|
||||
rcu_read_unlock();
|
||||
error = xfs_buf_find_lock(bp, flags);
|
||||
@@ -549,16 +515,6 @@ xfs_buftarg_get_pag(
|
||||
return xfs_perag_get(mp, xfs_daddr_to_agno(mp, map->bm_bn));
|
||||
}
|
||||
|
||||
static inline struct xfs_buf_cache *
|
||||
xfs_buftarg_buf_cache(
|
||||
struct xfs_buftarg *btp,
|
||||
struct xfs_perag *pag)
|
||||
{
|
||||
if (pag)
|
||||
return &pag->pag_bcache;
|
||||
return btp->bt_cache;
|
||||
}
|
||||
|
||||
/*
|
||||
* Assembles a buffer covering the specified range. The code is optimised for
|
||||
* cache hits, as metadata intensive workloads will see 3 orders of magnitude
|
||||
@@ -572,7 +528,6 @@ xfs_buf_get_map(
|
||||
xfs_buf_flags_t flags,
|
||||
struct xfs_buf **bpp)
|
||||
{
|
||||
struct xfs_buf_cache *bch;
|
||||
struct xfs_perag *pag;
|
||||
struct xfs_buf *bp = NULL;
|
||||
struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn };
|
||||
@@ -589,9 +544,8 @@ xfs_buf_get_map(
|
||||
return error;
|
||||
|
||||
pag = xfs_buftarg_get_pag(btp, &cmap);
|
||||
bch = xfs_buftarg_buf_cache(btp, pag);
|
||||
|
||||
error = xfs_buf_lookup(bch, &cmap, flags, &bp);
|
||||
error = xfs_buf_lookup(btp, &cmap, flags, &bp);
|
||||
if (error && error != -ENOENT)
|
||||
goto out_put_perag;
|
||||
|
||||
@@ -603,7 +557,7 @@ xfs_buf_get_map(
|
||||
goto out_put_perag;
|
||||
|
||||
/* xfs_buf_find_insert() consumes the perag reference. */
|
||||
error = xfs_buf_find_insert(btp, bch, pag, &cmap, map, nmaps,
|
||||
error = xfs_buf_find_insert(btp, pag, &cmap, map, nmaps,
|
||||
flags, &bp);
|
||||
if (error)
|
||||
return error;
|
||||
@@ -856,84 +810,27 @@ xfs_buf_hold(
|
||||
{
|
||||
trace_xfs_buf_hold(bp, _RET_IP_);
|
||||
|
||||
spin_lock(&bp->b_lock);
|
||||
bp->b_hold++;
|
||||
spin_unlock(&bp->b_lock);
|
||||
lockref_get(&bp->b_lockref);
|
||||
}
|
||||
|
||||
static void
|
||||
xfs_buf_rele_uncached(
|
||||
xfs_buf_destroy(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
ASSERT(list_empty(&bp->b_lru));
|
||||
ASSERT(__lockref_is_dead(&bp->b_lockref));
|
||||
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
|
||||
|
||||
spin_lock(&bp->b_lock);
|
||||
if (--bp->b_hold) {
|
||||
spin_unlock(&bp->b_lock);
|
||||
return;
|
||||
if (!xfs_buf_is_uncached(bp)) {
|
||||
rhashtable_remove_fast(&bp->b_target->bt_hash,
|
||||
&bp->b_rhash_head, xfs_buf_hash_params);
|
||||
|
||||
if (bp->b_pag)
|
||||
xfs_perag_put(bp->b_pag);
|
||||
}
|
||||
spin_unlock(&bp->b_lock);
|
||||
|
||||
xfs_buf_free(bp);
|
||||
}
|
||||
|
||||
static void
|
||||
xfs_buf_rele_cached(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
struct xfs_buftarg *btp = bp->b_target;
|
||||
struct xfs_perag *pag = bp->b_pag;
|
||||
struct xfs_buf_cache *bch = xfs_buftarg_buf_cache(btp, pag);
|
||||
bool freebuf = false;
|
||||
|
||||
trace_xfs_buf_rele(bp, _RET_IP_);
|
||||
|
||||
spin_lock(&bp->b_lock);
|
||||
ASSERT(bp->b_hold >= 1);
|
||||
if (bp->b_hold > 1) {
|
||||
bp->b_hold--;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* we are asked to drop the last reference */
|
||||
if (atomic_read(&bp->b_lru_ref)) {
|
||||
/*
|
||||
* If the buffer is added to the LRU, keep the reference to the
|
||||
* buffer for the LRU and clear the (now stale) dispose list
|
||||
* state flag, else drop the reference.
|
||||
*/
|
||||
if (list_lru_add_obj(&btp->bt_lru, &bp->b_lru))
|
||||
bp->b_state &= ~XFS_BSTATE_DISPOSE;
|
||||
else
|
||||
bp->b_hold--;
|
||||
} else {
|
||||
bp->b_hold--;
|
||||
/*
|
||||
* most of the time buffers will already be removed from the
|
||||
* LRU, so optimise that case by checking for the
|
||||
* XFS_BSTATE_DISPOSE flag indicating the last list the buffer
|
||||
* was on was the disposal list
|
||||
*/
|
||||
if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
|
||||
list_lru_del_obj(&btp->bt_lru, &bp->b_lru);
|
||||
} else {
|
||||
ASSERT(list_empty(&bp->b_lru));
|
||||
}
|
||||
|
||||
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
|
||||
rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head,
|
||||
xfs_buf_hash_params);
|
||||
if (pag)
|
||||
xfs_perag_put(pag);
|
||||
freebuf = true;
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&bp->b_lock);
|
||||
|
||||
if (freebuf)
|
||||
xfs_buf_free(bp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Release a hold on the specified buffer.
|
||||
*/
|
||||
@@ -942,10 +839,23 @@ xfs_buf_rele(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
trace_xfs_buf_rele(bp, _RET_IP_);
|
||||
if (xfs_buf_is_uncached(bp))
|
||||
xfs_buf_rele_uncached(bp);
|
||||
else
|
||||
xfs_buf_rele_cached(bp);
|
||||
|
||||
if (lockref_put_or_lock(&bp->b_lockref))
|
||||
return;
|
||||
if (!--bp->b_lockref.count) {
|
||||
if (xfs_buf_is_uncached(bp) || !atomic_read(&bp->b_lru_ref))
|
||||
goto kill;
|
||||
list_lru_add_obj(&bp->b_target->bt_lru, &bp->b_lru);
|
||||
}
|
||||
spin_unlock(&bp->b_lockref.lock);
|
||||
return;
|
||||
|
||||
kill:
|
||||
lockref_mark_dead(&bp->b_lockref);
|
||||
list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
|
||||
spin_unlock(&bp->b_lockref.lock);
|
||||
|
||||
xfs_buf_destroy(bp);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1254,9 +1164,11 @@ xfs_buf_ioerror_alert(
|
||||
|
||||
/*
|
||||
* To simulate an I/O failure, the buffer must be locked and held with at least
|
||||
* three references. The LRU reference is dropped by the stale call. The buf
|
||||
* item reference is dropped via ioend processing. The third reference is owned
|
||||
* by the caller and is dropped on I/O completion if the buffer is XBF_ASYNC.
|
||||
* two references.
|
||||
*
|
||||
* The buf item reference is dropped via ioend processing. The second reference
|
||||
* is owned by the caller and is dropped on I/O completion if the buffer is
|
||||
* XBF_ASYNC.
|
||||
*/
|
||||
void
|
||||
xfs_buf_ioend_fail(
|
||||
@@ -1512,23 +1424,18 @@ xfs_buftarg_drain_rele(
|
||||
struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
|
||||
struct list_head *dispose = arg;
|
||||
|
||||
if (!spin_trylock(&bp->b_lock))
|
||||
if (!spin_trylock(&bp->b_lockref.lock))
|
||||
return LRU_SKIP;
|
||||
if (bp->b_hold > 1) {
|
||||
if (bp->b_lockref.count > 0) {
|
||||
/* need to wait, so skip it this pass */
|
||||
spin_unlock(&bp->b_lock);
|
||||
spin_unlock(&bp->b_lockref.lock);
|
||||
trace_xfs_buf_drain_buftarg(bp, _RET_IP_);
|
||||
return LRU_SKIP;
|
||||
}
|
||||
|
||||
/*
|
||||
* clear the LRU reference count so the buffer doesn't get
|
||||
* ignored in xfs_buf_rele().
|
||||
*/
|
||||
atomic_set(&bp->b_lru_ref, 0);
|
||||
bp->b_state |= XFS_BSTATE_DISPOSE;
|
||||
lockref_mark_dead(&bp->b_lockref);
|
||||
list_lru_isolate_move(lru, item, dispose);
|
||||
spin_unlock(&bp->b_lock);
|
||||
spin_unlock(&bp->b_lockref.lock);
|
||||
return LRU_REMOVED;
|
||||
}
|
||||
|
||||
@@ -1581,7 +1488,7 @@ xfs_buftarg_drain(
|
||||
"Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!",
|
||||
(long long)xfs_buf_daddr(bp));
|
||||
}
|
||||
xfs_buf_rele(bp);
|
||||
xfs_buf_destroy(bp);
|
||||
}
|
||||
if (loop++ != 0)
|
||||
delay(100);
|
||||
@@ -1610,24 +1517,37 @@ xfs_buftarg_isolate(
|
||||
struct list_head *dispose = arg;
|
||||
|
||||
/*
|
||||
* we are inverting the lru lock/bp->b_lock here, so use a trylock.
|
||||
* If we fail to get the lock, just skip it.
|
||||
* We are inverting the lru lock vs bp->b_lockref.lock order here, so
|
||||
* use a trylock. If we fail to get the lock, just skip the buffer.
|
||||
*/
|
||||
if (!spin_trylock(&bp->b_lock))
|
||||
if (!spin_trylock(&bp->b_lockref.lock))
|
||||
return LRU_SKIP;
|
||||
|
||||
/*
|
||||
* If the buffer is in use, remove it from the LRU for now. We can't
|
||||
* free it while someone is using it, and we should also not count
|
||||
* eviction passed for it, just as if it hadn't been added to the LRU
|
||||
* yet.
|
||||
*/
|
||||
if (bp->b_lockref.count > 0) {
|
||||
list_lru_isolate(lru, &bp->b_lru);
|
||||
spin_unlock(&bp->b_lockref.lock);
|
||||
return LRU_REMOVED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decrement the b_lru_ref count unless the value is already
|
||||
* zero. If the value is already zero, we need to reclaim the
|
||||
* buffer, otherwise it gets another trip through the LRU.
|
||||
*/
|
||||
if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
|
||||
spin_unlock(&bp->b_lock);
|
||||
spin_unlock(&bp->b_lockref.lock);
|
||||
return LRU_ROTATE;
|
||||
}
|
||||
|
||||
bp->b_state |= XFS_BSTATE_DISPOSE;
|
||||
lockref_mark_dead(&bp->b_lockref);
|
||||
list_lru_isolate_move(lru, item, dispose);
|
||||
spin_unlock(&bp->b_lock);
|
||||
spin_unlock(&bp->b_lockref.lock);
|
||||
return LRU_REMOVED;
|
||||
}
|
||||
|
||||
@@ -1647,7 +1567,7 @@ xfs_buftarg_shrink_scan(
|
||||
struct xfs_buf *bp;
|
||||
bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
|
||||
list_del_init(&bp->b_lru);
|
||||
xfs_buf_rele(bp);
|
||||
xfs_buf_destroy(bp);
|
||||
}
|
||||
|
||||
return freed;
|
||||
@@ -1670,6 +1590,7 @@ xfs_destroy_buftarg(
|
||||
ASSERT(percpu_counter_sum(&btp->bt_readahead_count) == 0);
|
||||
percpu_counter_destroy(&btp->bt_readahead_count);
|
||||
list_lru_destroy(&btp->bt_lru);
|
||||
rhashtable_destroy(&btp->bt_hash);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -1764,8 +1685,10 @@ xfs_init_buftarg(
|
||||
ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
|
||||
DEFAULT_RATELIMIT_BURST);
|
||||
|
||||
if (list_lru_init(&btp->bt_lru))
|
||||
if (rhashtable_init(&btp->bt_hash, &xfs_buf_hash_params))
|
||||
return -ENOMEM;
|
||||
if (list_lru_init(&btp->bt_lru))
|
||||
goto out_destroy_hash;
|
||||
if (percpu_counter_init(&btp->bt_readahead_count, 0, GFP_KERNEL))
|
||||
goto out_destroy_lru;
|
||||
|
||||
@@ -1783,6 +1706,8 @@ out_destroy_io_count:
|
||||
percpu_counter_destroy(&btp->bt_readahead_count);
|
||||
out_destroy_lru:
|
||||
list_lru_destroy(&btp->bt_lru);
|
||||
out_destroy_hash:
|
||||
rhashtable_destroy(&btp->bt_hash);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@@ -1831,6 +1756,7 @@ xfs_alloc_buftarg(
|
||||
return btp;
|
||||
|
||||
error_free:
|
||||
fs_put_dax(btp->bt_daxdev, mp);
|
||||
kfree(btp);
|
||||
return ERR_PTR(error);
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include <linux/dax.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/list_lru.h>
|
||||
#include <linux/lockref.h>
|
||||
|
||||
extern struct kmem_cache *xfs_buf_cache;
|
||||
|
||||
@@ -68,18 +69,6 @@ typedef unsigned int xfs_buf_flags_t;
|
||||
{ XBF_INCORE, "INCORE" }, \
|
||||
{ XBF_TRYLOCK, "TRYLOCK" }
|
||||
|
||||
/*
|
||||
* Internal state flags.
|
||||
*/
|
||||
#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */
|
||||
|
||||
struct xfs_buf_cache {
|
||||
struct rhashtable bc_hash;
|
||||
};
|
||||
|
||||
int xfs_buf_cache_init(struct xfs_buf_cache *bch);
|
||||
void xfs_buf_cache_destroy(struct xfs_buf_cache *bch);
|
||||
|
||||
/*
|
||||
* The xfs_buftarg contains 2 notions of "sector size" -
|
||||
*
|
||||
@@ -117,8 +106,7 @@ struct xfs_buftarg {
|
||||
unsigned int bt_awu_min;
|
||||
unsigned int bt_awu_max;
|
||||
|
||||
/* built-in cache, if we're not using the perag one */
|
||||
struct xfs_buf_cache bt_cache[];
|
||||
struct rhashtable bt_hash;
|
||||
};
|
||||
|
||||
struct xfs_buf_map {
|
||||
@@ -159,7 +147,7 @@ struct xfs_buf {
|
||||
|
||||
xfs_daddr_t b_rhash_key; /* buffer cache index */
|
||||
int b_length; /* size of buffer in BBs */
|
||||
unsigned int b_hold; /* reference count */
|
||||
struct lockref b_lockref; /* refcount + lock */
|
||||
atomic_t b_lru_ref; /* lru reclaim ref count */
|
||||
xfs_buf_flags_t b_flags; /* status flags */
|
||||
struct semaphore b_sema; /* semaphore for lockables */
|
||||
@@ -169,8 +157,6 @@ struct xfs_buf {
|
||||
* bt_lru_lock and not by b_sema
|
||||
*/
|
||||
struct list_head b_lru; /* lru list */
|
||||
spinlock_t b_lock; /* internal state lock */
|
||||
unsigned int b_state; /* internal state flags */
|
||||
wait_queue_head_t b_waiters; /* unpin waiters */
|
||||
struct list_head b_list;
|
||||
struct xfs_perag *b_pag;
|
||||
|
||||
@@ -58,7 +58,7 @@ xmbuf_alloc(
|
||||
struct xfs_buftarg *btp;
|
||||
int error;
|
||||
|
||||
btp = kzalloc_flex(*btp, bt_cache, 1);
|
||||
btp = kzalloc_obj(*btp);
|
||||
if (!btp)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -81,10 +81,6 @@ xmbuf_alloc(
|
||||
/* ensure all writes are below EOF to avoid pagecache zeroing */
|
||||
i_size_write(inode, inode->i_sb->s_maxbytes);
|
||||
|
||||
error = xfs_buf_cache_init(btp->bt_cache);
|
||||
if (error)
|
||||
goto out_file;
|
||||
|
||||
/* Initialize buffer target */
|
||||
btp->bt_mount = mp;
|
||||
btp->bt_dev = (dev_t)-1U;
|
||||
@@ -95,15 +91,13 @@ xmbuf_alloc(
|
||||
|
||||
error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr);
|
||||
if (error)
|
||||
goto out_bcache;
|
||||
goto out_file;
|
||||
|
||||
trace_xmbuf_create(btp);
|
||||
|
||||
*btpp = btp;
|
||||
return 0;
|
||||
|
||||
out_bcache:
|
||||
xfs_buf_cache_destroy(btp->bt_cache);
|
||||
out_file:
|
||||
fput(file);
|
||||
out_free_btp:
|
||||
@@ -122,7 +116,6 @@ xmbuf_free(
|
||||
trace_xmbuf_free(btp);
|
||||
|
||||
xfs_destroy_buftarg(btp);
|
||||
xfs_buf_cache_destroy(btp->bt_cache);
|
||||
fput(btp->bt_file);
|
||||
kfree(btp);
|
||||
}
|
||||
|
||||
@@ -690,9 +690,9 @@ xfs_extent_busy_ag_cmp(
|
||||
container_of(l2, struct xfs_extent_busy, list);
|
||||
s32 diff;
|
||||
|
||||
diff = b1->group->xg_gno - b2->group->xg_gno;
|
||||
diff = cmp_int(b1->group->xg_gno, b2->group->xg_gno);
|
||||
if (!diff)
|
||||
diff = b1->bno - b2->bno;
|
||||
diff = cmp_int(b1->bno, b2->bno);
|
||||
return diff;
|
||||
}
|
||||
|
||||
|
||||
@@ -387,7 +387,7 @@ xfs_extent_free_diff_items(
|
||||
struct xfs_extent_free_item *ra = xefi_entry(a);
|
||||
struct xfs_extent_free_item *rb = xefi_entry(b);
|
||||
|
||||
return ra->xefi_group->xg_gno - rb->xefi_group->xg_gno;
|
||||
return cmp_int(ra->xefi_group->xg_gno, rb->xefi_group->xg_gno);
|
||||
}
|
||||
|
||||
/* Log a free extent to the intent item. */
|
||||
|
||||
@@ -560,6 +560,72 @@ xfs_zoned_write_space_reserve(
|
||||
flags, ac);
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to lock the test/set EOF update as we can be racing with
|
||||
* other IO completions here to update the EOF. Failing to serialise
|
||||
* here can result in EOF moving backwards and Bad Things Happen when
|
||||
* that occurs.
|
||||
*
|
||||
* As IO completion only ever extends EOF, we can do an unlocked check
|
||||
* here to avoid taking the spinlock. If we land within the current EOF,
|
||||
* then we do not need to do an extending update at all, and we don't
|
||||
* need to take the lock to check this. If we race with an update moving
|
||||
* EOF, then we'll either still be beyond EOF and need to take the lock,
|
||||
* or we'll be within EOF and we don't need to take it at all.
|
||||
*/
|
||||
static int
|
||||
xfs_dio_endio_set_isize(
|
||||
struct inode *inode,
|
||||
loff_t offset,
|
||||
ssize_t size)
|
||||
{
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
|
||||
if (offset + size <= i_size_read(inode))
|
||||
return 0;
|
||||
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
if (offset + size <= i_size_read(inode)) {
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
i_size_write(inode, offset + size);
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
|
||||
return xfs_setfilesize(ip, offset, size);
|
||||
}
|
||||
|
||||
static int
|
||||
xfs_zoned_dio_write_end_io(
|
||||
struct kiocb *iocb,
|
||||
ssize_t size,
|
||||
int error,
|
||||
unsigned flags)
|
||||
{
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
unsigned int nofs_flag;
|
||||
|
||||
ASSERT(!(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW)));
|
||||
|
||||
trace_xfs_end_io_direct_write(ip, iocb->ki_pos, size);
|
||||
|
||||
if (xfs_is_shutdown(ip->i_mount))
|
||||
return -EIO;
|
||||
|
||||
if (error || !size)
|
||||
return error;
|
||||
|
||||
XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
|
||||
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
error = xfs_dio_endio_set_isize(inode, iocb->ki_pos, size);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static int
|
||||
xfs_dio_write_end_io(
|
||||
struct kiocb *iocb,
|
||||
@@ -572,8 +638,7 @@ xfs_dio_write_end_io(
|
||||
loff_t offset = iocb->ki_pos;
|
||||
unsigned int nofs_flag;
|
||||
|
||||
ASSERT(!xfs_is_zoned_inode(ip) ||
|
||||
!(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW)));
|
||||
ASSERT(!xfs_is_zoned_inode(ip));
|
||||
|
||||
trace_xfs_end_io_direct_write(ip, offset, size);
|
||||
|
||||
@@ -623,30 +688,8 @@ xfs_dio_write_end_io(
|
||||
* with the on-disk inode size being outside the in-core inode size. We
|
||||
* have no other method of updating EOF for AIO, so always do it here
|
||||
* if necessary.
|
||||
*
|
||||
* We need to lock the test/set EOF update as we can be racing with
|
||||
* other IO completions here to update the EOF. Failing to serialise
|
||||
* here can result in EOF moving backwards and Bad Things Happen when
|
||||
* that occurs.
|
||||
*
|
||||
* As IO completion only ever extends EOF, we can do an unlocked check
|
||||
* here to avoid taking the spinlock. If we land within the current EOF,
|
||||
* then we do not need to do an extending update at all, and we don't
|
||||
* need to take the lock to check this. If we race with an update moving
|
||||
* EOF, then we'll either still be beyond EOF and need to take the lock,
|
||||
* or we'll be within EOF and we don't need to take it at all.
|
||||
*/
|
||||
if (offset + size <= i_size_read(inode))
|
||||
goto out;
|
||||
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
if (offset + size > i_size_read(inode)) {
|
||||
i_size_write(inode, offset + size);
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
error = xfs_setfilesize(ip, offset, size);
|
||||
} else {
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
}
|
||||
error = xfs_dio_endio_set_isize(inode, offset, size);
|
||||
|
||||
out:
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
@@ -688,7 +731,7 @@ xfs_dio_zoned_submit_io(
|
||||
static const struct iomap_dio_ops xfs_dio_zoned_write_ops = {
|
||||
.bio_set = &iomap_ioend_bioset,
|
||||
.submit_io = xfs_dio_zoned_submit_io,
|
||||
.end_io = xfs_dio_write_end_io,
|
||||
.end_io = xfs_zoned_dio_write_end_io,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -1263,6 +1306,23 @@ xfs_falloc_insert_range(
|
||||
if (offset >= isize)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Let writeback clean up EOF folio state before we bump i_size. The
|
||||
* insert flushes before it starts shifting and under certain
|
||||
* circumstances we can write back blocks that should technically be
|
||||
* considered post-eof (and thus should not be submitted for writeback).
|
||||
*
|
||||
* For example, a large, dirty folio that spans EOF and is backed by
|
||||
* post-eof COW fork preallocation can cause block remap into the data
|
||||
* fork. This shifts back out beyond EOF, but creates an expectedly
|
||||
* written post-eof block. The insert is going to flush, unmap and
|
||||
* cancel prealloc across this whole range, so flush EOF now before we
|
||||
* bump i_size to provide consistent behavior.
|
||||
*/
|
||||
error = filemap_write_and_wait_range(inode->i_mapping, isize, isize);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xfs_falloc_setsize(file, isize + len);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
@@ -37,12 +37,15 @@
|
||||
#include "xfs_ioctl.h"
|
||||
#include "xfs_xattr.h"
|
||||
#include "xfs_rtbitmap.h"
|
||||
#include "xfs_rtrmap_btree.h"
|
||||
#include "xfs_file.h"
|
||||
#include "xfs_exchrange.h"
|
||||
#include "xfs_handle.h"
|
||||
#include "xfs_rtgroup.h"
|
||||
#include "xfs_healthmon.h"
|
||||
#include "xfs_verify_media.h"
|
||||
#include "xfs_zone_priv.h"
|
||||
#include "xfs_zone_alloc.h"
|
||||
|
||||
#include <linux/mount.h>
|
||||
#include <linux/fileattr.h>
|
||||
@@ -413,6 +416,7 @@ xfs_ioc_rtgroup_geometry(
|
||||
{
|
||||
struct xfs_rtgroup *rtg;
|
||||
struct xfs_rtgroup_geometry rgeo;
|
||||
xfs_rgblock_t highest_rgbno;
|
||||
int error;
|
||||
|
||||
if (copy_from_user(&rgeo, arg, sizeof(rgeo)))
|
||||
@@ -433,6 +437,21 @@ xfs_ioc_rtgroup_geometry(
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (xfs_has_zoned(mp)) {
|
||||
xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
|
||||
if (rtg->rtg_open_zone) {
|
||||
rgeo.rg_writepointer = rtg->rtg_open_zone->oz_allocated;
|
||||
} else {
|
||||
highest_rgbno = xfs_rtrmap_highest_rgbno(rtg);
|
||||
if (highest_rgbno == NULLRGBLOCK)
|
||||
rgeo.rg_writepointer = 0;
|
||||
else
|
||||
rgeo.rg_writepointer = highest_rgbno + 1;
|
||||
}
|
||||
xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
|
||||
rgeo.rg_flags |= XFS_RTGROUP_GEOM_WRITEPOINTER;
|
||||
}
|
||||
|
||||
if (copy_to_user(arg, &rgeo, sizeof(rgeo)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
|
||||
@@ -1593,6 +1593,7 @@ xfs_zoned_buffered_write_iomap_begin(
|
||||
{
|
||||
struct iomap_iter *iter =
|
||||
container_of(iomap, struct iomap_iter, iomap);
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
struct xfs_zone_alloc_ctx *ac = iter->private;
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
@@ -1617,6 +1618,7 @@ xfs_zoned_buffered_write_iomap_begin(
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
restart:
|
||||
error = xfs_ilock_for_iomap(ip, flags, &lockmode);
|
||||
if (error)
|
||||
return error;
|
||||
@@ -1654,14 +1656,6 @@ xfs_zoned_buffered_write_iomap_begin(
|
||||
&smap))
|
||||
smap.br_startoff = end_fsb; /* fake hole until EOF */
|
||||
if (smap.br_startoff > offset_fsb) {
|
||||
/*
|
||||
* We never need to allocate blocks for zeroing a hole.
|
||||
*/
|
||||
if (flags & IOMAP_ZERO) {
|
||||
xfs_hole_to_iomap(ip, iomap, offset_fsb,
|
||||
smap.br_startoff);
|
||||
goto out_unlock;
|
||||
}
|
||||
end_fsb = min(end_fsb, smap.br_startoff);
|
||||
} else {
|
||||
end_fsb = min(end_fsb,
|
||||
@@ -1693,6 +1687,33 @@ xfs_zoned_buffered_write_iomap_begin(
|
||||
count_fsb = min3(end_fsb - offset_fsb, XFS_MAX_BMBT_EXTLEN,
|
||||
XFS_B_TO_FSB(mp, 1024 * PAGE_SIZE));
|
||||
|
||||
/*
|
||||
* When zeroing, don't allocate blocks for holes as they are already
|
||||
* zeroes, but we need to ensure that no extents exist in both the data
|
||||
* and COW fork to ensure this really is a hole.
|
||||
*
|
||||
* A window exists where we might observe a hole in both forks with
|
||||
* valid data in cache. Writeback removes the COW fork blocks on
|
||||
* submission but doesn't remap into the data fork until completion. If
|
||||
* the data fork was previously a hole, we'll fail to zero. Until we
|
||||
* find a way to avoid this transient state, check for dirty pagecache
|
||||
* and flush to wait on blocks to land in the data fork.
|
||||
*/
|
||||
if ((flags & IOMAP_ZERO) && srcmap->type == IOMAP_HOLE) {
|
||||
if (filemap_range_needs_writeback(mapping, offset,
|
||||
offset + count - 1)) {
|
||||
xfs_iunlock(ip, lockmode);
|
||||
error = filemap_write_and_wait_range(mapping, offset,
|
||||
offset + count - 1);
|
||||
if (error)
|
||||
return error;
|
||||
goto restart;
|
||||
}
|
||||
|
||||
xfs_hole_to_iomap(ip, iomap, offset_fsb, end_fsb);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* The block reservation is supposed to cover all blocks that the
|
||||
* operation could possible write, but there is a nasty corner case
|
||||
@@ -1767,6 +1788,8 @@ xfs_buffered_write_iomap_begin(
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
||||
xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, count);
|
||||
xfs_fileoff_t cow_fsb = NULLFILEOFF;
|
||||
xfs_fileoff_t eof_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
|
||||
struct xfs_bmbt_irec imap, cmap;
|
||||
struct xfs_iext_cursor icur, ccur;
|
||||
xfs_fsblock_t prealloc_blocks = 0;
|
||||
@@ -1811,30 +1834,96 @@ xfs_buffered_write_iomap_begin(
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Search the data fork first to look up our source mapping. We
|
||||
* always need the data fork map, as we have to return it to the
|
||||
* iomap code so that the higher level write code can read data in to
|
||||
* perform read-modify-write cycles for unaligned writes.
|
||||
* Search the data fork first to look up our source mapping. We always
|
||||
* need the data fork map, as we have to return it to the iomap code so
|
||||
* that the higher level write code can read data in to perform
|
||||
* read-modify-write cycles for unaligned writes.
|
||||
*
|
||||
* Then search the COW fork extent list even if we did not find a data
|
||||
* fork extent. This serves two purposes: first this implements the
|
||||
* speculative preallocation using cowextsize, so that we also unshare
|
||||
* block adjacent to shared blocks instead of just the shared blocks
|
||||
* themselves. Second the lookup in the extent list is generally faster
|
||||
* than going out to the shared extent tree.
|
||||
*/
|
||||
eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap);
|
||||
if (eof)
|
||||
imap.br_startoff = end_fsb; /* fake hole until the end */
|
||||
if (xfs_is_cow_inode(ip)) {
|
||||
if (!ip->i_cowfp) {
|
||||
ASSERT(!xfs_is_reflink_inode(ip));
|
||||
xfs_ifork_init_cow(ip);
|
||||
}
|
||||
cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb,
|
||||
&ccur, &cmap);
|
||||
if (!cow_eof)
|
||||
cow_fsb = cmap.br_startoff;
|
||||
}
|
||||
|
||||
/* We never need to allocate blocks for zeroing or unsharing a hole. */
|
||||
if ((flags & (IOMAP_UNSHARE | IOMAP_ZERO)) &&
|
||||
imap.br_startoff > offset_fsb) {
|
||||
/* We never need to allocate blocks for unsharing a hole. */
|
||||
if ((flags & IOMAP_UNSHARE) && imap.br_startoff > offset_fsb) {
|
||||
xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* We may need to zero over a hole in the data fork if it's fronted by
|
||||
* COW blocks and dirty pagecache. Scan such file ranges for dirty
|
||||
* cache and fill the iomap batch with folios that need zeroing.
|
||||
*/
|
||||
if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) {
|
||||
loff_t start, end;
|
||||
unsigned int fbatch_count;
|
||||
|
||||
imap.br_blockcount = imap.br_startoff - offset_fsb;
|
||||
imap.br_startoff = offset_fsb;
|
||||
imap.br_startblock = HOLESTARTBLOCK;
|
||||
imap.br_state = XFS_EXT_NORM;
|
||||
|
||||
if (cow_fsb == NULLFILEOFF)
|
||||
goto found_imap;
|
||||
if (cow_fsb > offset_fsb) {
|
||||
xfs_trim_extent(&imap, offset_fsb,
|
||||
cow_fsb - offset_fsb);
|
||||
goto found_imap;
|
||||
}
|
||||
|
||||
/* no zeroing beyond eof, so split at the boundary */
|
||||
if (offset_fsb >= eof_fsb)
|
||||
goto found_imap;
|
||||
if (offset_fsb < eof_fsb && end_fsb > eof_fsb)
|
||||
xfs_trim_extent(&imap, offset_fsb,
|
||||
eof_fsb - offset_fsb);
|
||||
|
||||
/* COW fork blocks overlap the hole */
|
||||
xfs_trim_extent(&imap, offset_fsb,
|
||||
cmap.br_startoff + cmap.br_blockcount - offset_fsb);
|
||||
start = XFS_FSB_TO_B(mp, imap.br_startoff);
|
||||
end = XFS_FSB_TO_B(mp, imap.br_startoff + imap.br_blockcount);
|
||||
fbatch_count = iomap_fill_dirty_folios(iter, &start, end,
|
||||
&iomap_flags);
|
||||
xfs_trim_extent(&imap, offset_fsb,
|
||||
XFS_B_TO_FSB(mp, start) - offset_fsb);
|
||||
|
||||
/*
|
||||
* Report the COW mapping if we have folios to zero. Otherwise
|
||||
* ignore the COW blocks as preallocation and report a hole.
|
||||
*/
|
||||
if (fbatch_count) {
|
||||
xfs_trim_extent(&cmap, imap.br_startoff,
|
||||
imap.br_blockcount);
|
||||
imap.br_startoff = end_fsb; /* fake hole */
|
||||
goto found_cow;
|
||||
}
|
||||
goto found_imap;
|
||||
}
|
||||
|
||||
/*
|
||||
* For zeroing, trim extents that extend beyond the EOF block. If a
|
||||
* delalloc extent starts beyond the EOF block, convert it to an
|
||||
* unwritten extent.
|
||||
*/
|
||||
if (flags & IOMAP_ZERO) {
|
||||
xfs_fileoff_t eof_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
|
||||
|
||||
if (isnullstartblock(imap.br_startblock) &&
|
||||
offset_fsb >= eof_fsb)
|
||||
goto convert_delay;
|
||||
@@ -1867,24 +1956,13 @@ xfs_buffered_write_iomap_begin(
|
||||
}
|
||||
|
||||
/*
|
||||
* Search the COW fork extent list even if we did not find a data fork
|
||||
* extent. This serves two purposes: first this implements the
|
||||
* speculative preallocation using cowextsize, so that we also unshare
|
||||
* block adjacent to shared blocks instead of just the shared blocks
|
||||
* themselves. Second the lookup in the extent list is generally faster
|
||||
* than going out to the shared extent tree.
|
||||
* Now that we've handled any operation specific special cases, at this
|
||||
* point we can report a COW mapping if found.
|
||||
*/
|
||||
if (xfs_is_cow_inode(ip)) {
|
||||
if (!ip->i_cowfp) {
|
||||
ASSERT(!xfs_is_reflink_inode(ip));
|
||||
xfs_ifork_init_cow(ip);
|
||||
}
|
||||
cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb,
|
||||
&ccur, &cmap);
|
||||
if (!cow_eof && cmap.br_startoff <= offset_fsb) {
|
||||
trace_xfs_reflink_cow_found(ip, &cmap);
|
||||
goto found_cow;
|
||||
}
|
||||
if (xfs_is_cow_inode(ip) &&
|
||||
!cow_eof && cmap.br_startoff <= offset_fsb) {
|
||||
trace_xfs_reflink_cow_found(ip, &cmap);
|
||||
goto found_cow;
|
||||
}
|
||||
|
||||
if (imap.br_startoff <= offset_fsb) {
|
||||
|
||||
@@ -901,20 +901,18 @@ out_dqrele:
|
||||
|
||||
/*
|
||||
* Truncate file. Must have write permission and not be a directory.
|
||||
*
|
||||
* Caution: The caller of this function is responsible for calling
|
||||
* setattr_prepare() or otherwise verifying the change is fine.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_setattr_size(
|
||||
int
|
||||
xfs_vn_setattr_size(
|
||||
struct mnt_idmap *idmap,
|
||||
struct dentry *dentry,
|
||||
struct xfs_inode *ip,
|
||||
struct iattr *iattr)
|
||||
{
|
||||
struct inode *inode = d_inode(dentry);
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
struct inode *inode = VFS_I(ip);
|
||||
xfs_off_t oldsize, newsize;
|
||||
xfs_off_t oldsize = inode->i_size;
|
||||
xfs_off_t newsize = iattr->ia_size;
|
||||
struct xfs_trans *tp;
|
||||
int error;
|
||||
uint lock_flags = 0;
|
||||
@@ -927,8 +925,11 @@ xfs_setattr_size(
|
||||
ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
|
||||
ATTR_MTIME_SET|ATTR_TIMES_SET)) == 0);
|
||||
|
||||
oldsize = inode->i_size;
|
||||
newsize = iattr->ia_size;
|
||||
trace_xfs_setattr(ip);
|
||||
|
||||
error = xfs_vn_change_ok(idmap, dentry, iattr);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Short circuit the truncate case for zero length files.
|
||||
@@ -1109,7 +1110,6 @@ xfs_setattr_size(
|
||||
xfs_inode_clear_eofblocks_tag(ip);
|
||||
}
|
||||
|
||||
ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID)));
|
||||
setattr_copy(idmap, inode, iattr);
|
||||
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
||||
|
||||
@@ -1129,23 +1129,6 @@ out_trans_cancel:
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
int
|
||||
xfs_vn_setattr_size(
|
||||
struct mnt_idmap *idmap,
|
||||
struct dentry *dentry,
|
||||
struct iattr *iattr)
|
||||
{
|
||||
struct xfs_inode *ip = XFS_I(d_inode(dentry));
|
||||
int error;
|
||||
|
||||
trace_xfs_setattr(ip);
|
||||
|
||||
error = xfs_vn_change_ok(idmap, dentry, iattr);
|
||||
if (error)
|
||||
return error;
|
||||
return xfs_setattr_size(idmap, dentry, ip, iattr);
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_vn_setattr(
|
||||
struct mnt_idmap *idmap,
|
||||
|
||||
@@ -44,17 +44,36 @@
|
||||
#include "xfs_healthmon.h"
|
||||
|
||||
static DEFINE_MUTEX(xfs_uuid_table_mutex);
|
||||
static int xfs_uuid_table_size;
|
||||
static uuid_t *xfs_uuid_table;
|
||||
static DEFINE_XARRAY_ALLOC(xfs_uuid_table);
|
||||
|
||||
static uuid_t *
|
||||
xfs_uuid_search(
|
||||
uuid_t *new_uuid)
|
||||
{
|
||||
unsigned long index = 0;
|
||||
uuid_t *uuid;
|
||||
|
||||
xa_for_each(&xfs_uuid_table, index, uuid) {
|
||||
if (uuid_equal(uuid, new_uuid))
|
||||
return uuid;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
xfs_uuid_delete(
|
||||
uuid_t *uuid,
|
||||
unsigned int index)
|
||||
{
|
||||
ASSERT(uuid_equal(xa_load(&xfs_uuid_table, index), uuid));
|
||||
xa_erase(&xfs_uuid_table, index);
|
||||
}
|
||||
|
||||
void
|
||||
xfs_uuid_table_free(void)
|
||||
{
|
||||
if (xfs_uuid_table_size == 0)
|
||||
return;
|
||||
kfree(xfs_uuid_table);
|
||||
xfs_uuid_table = NULL;
|
||||
xfs_uuid_table_size = 0;
|
||||
ASSERT(xa_empty(&xfs_uuid_table));
|
||||
xa_destroy(&xfs_uuid_table);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -66,7 +85,7 @@ xfs_uuid_mount(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
uuid_t *uuid = &mp->m_sb.sb_uuid;
|
||||
int hole, i;
|
||||
int ret;
|
||||
|
||||
/* Publish UUID in struct super_block */
|
||||
super_set_uuid(mp->m_super, uuid->b, sizeof(*uuid));
|
||||
@@ -80,30 +99,17 @@ xfs_uuid_mount(
|
||||
}
|
||||
|
||||
mutex_lock(&xfs_uuid_table_mutex);
|
||||
for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
|
||||
if (uuid_is_null(&xfs_uuid_table[i])) {
|
||||
hole = i;
|
||||
continue;
|
||||
}
|
||||
if (uuid_equal(uuid, &xfs_uuid_table[i]))
|
||||
goto out_duplicate;
|
||||
if (unlikely(xfs_uuid_search(uuid))) {
|
||||
xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount",
|
||||
uuid);
|
||||
mutex_unlock(&xfs_uuid_table_mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (hole < 0) {
|
||||
xfs_uuid_table = krealloc(xfs_uuid_table,
|
||||
(xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
|
||||
GFP_KERNEL | __GFP_NOFAIL);
|
||||
hole = xfs_uuid_table_size++;
|
||||
}
|
||||
xfs_uuid_table[hole] = *uuid;
|
||||
ret = xa_alloc(&xfs_uuid_table, &mp->m_uuid_table_index, uuid,
|
||||
xa_limit_32b, GFP_KERNEL);
|
||||
mutex_unlock(&xfs_uuid_table_mutex);
|
||||
|
||||
return 0;
|
||||
|
||||
out_duplicate:
|
||||
mutex_unlock(&xfs_uuid_table_mutex);
|
||||
xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
|
||||
return -EINVAL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
STATIC void
|
||||
@@ -111,21 +117,12 @@ xfs_uuid_unmount(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
uuid_t *uuid = &mp->m_sb.sb_uuid;
|
||||
int i;
|
||||
|
||||
if (xfs_has_nouuid(mp))
|
||||
return;
|
||||
|
||||
mutex_lock(&xfs_uuid_table_mutex);
|
||||
for (i = 0; i < xfs_uuid_table_size; i++) {
|
||||
if (uuid_is_null(&xfs_uuid_table[i]))
|
||||
continue;
|
||||
if (!uuid_equal(uuid, &xfs_uuid_table[i]))
|
||||
continue;
|
||||
memset(&xfs_uuid_table[i], 0, sizeof(uuid_t));
|
||||
break;
|
||||
}
|
||||
ASSERT(i < xfs_uuid_table_size);
|
||||
xfs_uuid_delete(uuid, mp->m_uuid_table_index);
|
||||
mutex_unlock(&xfs_uuid_table_mutex);
|
||||
}
|
||||
|
||||
|
||||
@@ -346,6 +346,9 @@ typedef struct xfs_mount {
|
||||
|
||||
/* Private data referring to a health monitor object. */
|
||||
struct xfs_healthmon __rcu *m_healthmon;
|
||||
|
||||
/* Index of uuid record in the uuid xarray. */
|
||||
unsigned int m_uuid_table_index;
|
||||
} xfs_mount_t;
|
||||
|
||||
#define M_IGEO(mp) (&(mp)->m_ino_geo)
|
||||
|
||||
@@ -391,6 +391,38 @@ out_rele:
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill out the default quota limits for an ID that has no dquot on disk.
|
||||
* Returns 0 if default limits are configured
|
||||
* and were filled in, -ENOENT otherwise.
|
||||
*/
|
||||
static int
|
||||
xfs_qm_scall_getquota_fill_defaults(
|
||||
struct xfs_mount *mp,
|
||||
xfs_dqtype_t type,
|
||||
struct qc_dqblk *dst)
|
||||
{
|
||||
struct xfs_def_quota *defq;
|
||||
|
||||
defq = xfs_get_defquota(mp->m_quotainfo, type);
|
||||
|
||||
if (!defq->blk.soft && !defq->blk.hard &&
|
||||
!defq->ino.soft && !defq->ino.hard &&
|
||||
!defq->rtb.soft && !defq->rtb.hard) {
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
memset(dst, 0, sizeof(*dst));
|
||||
dst->d_spc_softlimit = XFS_FSB_TO_B(mp, defq->blk.soft);
|
||||
dst->d_spc_hardlimit = XFS_FSB_TO_B(mp, defq->blk.hard);
|
||||
dst->d_ino_softlimit = defq->ino.soft;
|
||||
dst->d_ino_hardlimit = defq->ino.hard;
|
||||
dst->d_rt_spc_softlimit = XFS_FSB_TO_B(mp, defq->rtb.soft);
|
||||
dst->d_rt_spc_hardlimit = XFS_FSB_TO_B(mp, defq->rtb.hard);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Fill out the quota context. */
|
||||
static void
|
||||
xfs_qm_scall_getquota_fill_qc(
|
||||
@@ -451,8 +483,17 @@ xfs_qm_scall_getquota(
|
||||
* set doalloc. If it doesn't exist, we'll get ENOENT back.
|
||||
*/
|
||||
error = xfs_qm_dqget(mp, id, type, false, &dqp);
|
||||
if (error)
|
||||
if (error) {
|
||||
/*
|
||||
* If there is no dquot on disk and default limits are
|
||||
* configured, return them with zero usage so that
|
||||
* unprivileged users can see what limits apply to them.
|
||||
*/
|
||||
if (error == -ENOENT && id != 0 &&
|
||||
!xfs_qm_scall_getquota_fill_defaults(mp, type, dst))
|
||||
return 0;
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* If everything's NULL, this dquot doesn't quite exist as far as
|
||||
|
||||
@@ -266,7 +266,7 @@ xfs_refcount_update_diff_items(
|
||||
struct xfs_refcount_intent *ra = ci_entry(a);
|
||||
struct xfs_refcount_intent *rb = ci_entry(b);
|
||||
|
||||
return ra->ri_group->xg_gno - rb->ri_group->xg_gno;
|
||||
return cmp_int(ra->ri_group->xg_gno, rb->ri_group->xg_gno);
|
||||
}
|
||||
|
||||
/* Log refcount updates in the intent item. */
|
||||
|
||||
@@ -267,7 +267,7 @@ xfs_rmap_update_diff_items(
|
||||
struct xfs_rmap_intent *ra = ri_entry(a);
|
||||
struct xfs_rmap_intent *rb = ri_entry(b);
|
||||
|
||||
return ra->ri_group->xg_gno - rb->ri_group->xg_gno;
|
||||
return cmp_int(ra->ri_group->xg_gno, rb->ri_group->xg_gno);
|
||||
}
|
||||
|
||||
/* Log rmap updates in the intent item. */
|
||||
|
||||
@@ -13,7 +13,9 @@
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_log_priv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_zone_priv.h"
|
||||
#include "xfs_zones.h"
|
||||
#include "xfs_zone_alloc.h"
|
||||
|
||||
struct xfs_sysfs_attr {
|
||||
struct attribute attr;
|
||||
@@ -718,12 +720,24 @@ max_open_zones_show(
|
||||
}
|
||||
XFS_SYSFS_ATTR_RO(max_open_zones);
|
||||
|
||||
static ssize_t
|
||||
nr_open_zones_show(
|
||||
struct kobject *kobj,
|
||||
char *buf)
|
||||
{
|
||||
struct xfs_zone_info *zi = zoned_to_mp(kobj)->m_zone_info;
|
||||
|
||||
return sysfs_emit(buf, "%u\n", READ_ONCE(zi->zi_nr_open_zones));
|
||||
}
|
||||
XFS_SYSFS_ATTR_RO(nr_open_zones);
|
||||
|
||||
static ssize_t
|
||||
zonegc_low_space_store(
|
||||
struct kobject *kobj,
|
||||
const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
struct xfs_mount *mp = zoned_to_mp(kobj);
|
||||
int ret;
|
||||
unsigned int val;
|
||||
|
||||
@@ -734,7 +748,10 @@ zonegc_low_space_store(
|
||||
if (val > 100)
|
||||
return -EINVAL;
|
||||
|
||||
zoned_to_mp(kobj)->m_zonegc_low_space = val;
|
||||
if (mp->m_zonegc_low_space != val) {
|
||||
mp->m_zonegc_low_space = val;
|
||||
xfs_zone_gc_wakeup(mp);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
@@ -751,6 +768,7 @@ XFS_SYSFS_ATTR_RW(zonegc_low_space);
|
||||
|
||||
static struct attribute *xfs_zoned_attrs[] = {
|
||||
ATTR_LIST(max_open_zones),
|
||||
ATTR_LIST(nr_open_zones),
|
||||
ATTR_LIST(zonegc_low_space),
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -394,6 +394,7 @@ DEFINE_ZONE_EVENT(xfs_zone_full);
|
||||
DEFINE_ZONE_EVENT(xfs_zone_opened);
|
||||
DEFINE_ZONE_EVENT(xfs_zone_reset);
|
||||
DEFINE_ZONE_EVENT(xfs_zone_gc_target_opened);
|
||||
DEFINE_ZONE_EVENT(xfs_zone_gc_target_stolen);
|
||||
|
||||
TRACE_EVENT(xfs_zone_free_blocks,
|
||||
TP_PROTO(struct xfs_rtgroup *rtg, xfs_rgblock_t rgbno,
|
||||
@@ -461,6 +462,7 @@ DEFINE_EVENT(xfs_zone_alloc_class, name, \
|
||||
DEFINE_ZONE_ALLOC_EVENT(xfs_zone_record_blocks);
|
||||
DEFINE_ZONE_ALLOC_EVENT(xfs_zone_skip_blocks);
|
||||
DEFINE_ZONE_ALLOC_EVENT(xfs_zone_alloc_blocks);
|
||||
DEFINE_ZONE_ALLOC_EVENT(xfs_zone_spurious_open);
|
||||
|
||||
TRACE_EVENT(xfs_zone_gc_select_victim,
|
||||
TP_PROTO(struct xfs_rtgroup *rtg, unsigned int bucket),
|
||||
@@ -740,7 +742,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
|
||||
__entry->dev = bp->b_target->bt_dev;
|
||||
__entry->bno = xfs_buf_daddr(bp);
|
||||
__entry->nblks = bp->b_length;
|
||||
__entry->hold = bp->b_hold;
|
||||
__entry->hold = bp->b_lockref.count;
|
||||
__entry->pincount = atomic_read(&bp->b_pin_count);
|
||||
__entry->lockval = bp->b_sema.count;
|
||||
__entry->flags = bp->b_flags;
|
||||
@@ -814,7 +816,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
|
||||
__entry->bno = xfs_buf_daddr(bp);
|
||||
__entry->length = bp->b_length;
|
||||
__entry->flags = flags;
|
||||
__entry->hold = bp->b_hold;
|
||||
__entry->hold = bp->b_lockref.count;
|
||||
__entry->pincount = atomic_read(&bp->b_pin_count);
|
||||
__entry->lockval = bp->b_sema.count;
|
||||
__entry->caller_ip = caller_ip;
|
||||
@@ -858,7 +860,7 @@ TRACE_EVENT(xfs_buf_ioerror,
|
||||
__entry->dev = bp->b_target->bt_dev;
|
||||
__entry->bno = xfs_buf_daddr(bp);
|
||||
__entry->length = bp->b_length;
|
||||
__entry->hold = bp->b_hold;
|
||||
__entry->hold = bp->b_lockref.count;
|
||||
__entry->pincount = atomic_read(&bp->b_pin_count);
|
||||
__entry->lockval = bp->b_sema.count;
|
||||
__entry->error = error;
|
||||
@@ -902,7 +904,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
|
||||
__entry->buf_bno = xfs_buf_daddr(bip->bli_buf);
|
||||
__entry->buf_len = bip->bli_buf->b_length;
|
||||
__entry->buf_flags = bip->bli_buf->b_flags;
|
||||
__entry->buf_hold = bip->bli_buf->b_hold;
|
||||
__entry->buf_hold = bip->bli_buf->b_lockref.count;
|
||||
__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
|
||||
__entry->buf_lockval = bip->bli_buf->b_sema.count;
|
||||
__entry->li_flags = bip->bli_item.li_flags;
|
||||
@@ -5206,7 +5208,7 @@ DECLARE_EVENT_CLASS(xfbtree_buf_class,
|
||||
__entry->xfino = file_inode(xfbt->target->bt_file)->i_ino;
|
||||
__entry->bno = xfs_buf_daddr(bp);
|
||||
__entry->nblks = bp->b_length;
|
||||
__entry->hold = bp->b_hold;
|
||||
__entry->hold = bp->b_lockref.count;
|
||||
__entry->pincount = atomic_read(&bp->b_pin_count);
|
||||
__entry->lockval = bp->b_sema.count;
|
||||
__entry->flags = bp->b_flags;
|
||||
|
||||
@@ -174,42 +174,33 @@ xfs_open_zone_mark_full(
|
||||
WRITE_ONCE(rtg->rtg_open_zone, NULL);
|
||||
|
||||
spin_lock(&zi->zi_open_zones_lock);
|
||||
if (oz->oz_is_gc) {
|
||||
ASSERT(current == zi->zi_gc_thread);
|
||||
zi->zi_open_gc_zone = NULL;
|
||||
} else {
|
||||
if (oz->oz_is_gc)
|
||||
zi->zi_nr_open_gc_zones--;
|
||||
else
|
||||
zi->zi_nr_open_zones--;
|
||||
list_del_init(&oz->oz_entry);
|
||||
}
|
||||
list_del_init(&oz->oz_entry);
|
||||
spin_unlock(&zi->zi_open_zones_lock);
|
||||
xfs_open_zone_put(oz);
|
||||
|
||||
wake_up_all(&zi->zi_zone_wait);
|
||||
if (oz->oz_is_gc)
|
||||
wake_up_process(zi->zi_gc_thread);
|
||||
else
|
||||
wake_up_all(&zi->zi_zone_wait);
|
||||
|
||||
if (used < rtg_blocks(rtg))
|
||||
xfs_zone_account_reclaimable(rtg, rtg_blocks(rtg) - used);
|
||||
xfs_open_zone_put(oz);
|
||||
}
|
||||
|
||||
static void
|
||||
xfs_zone_record_blocks(
|
||||
struct xfs_trans *tp,
|
||||
static inline void
|
||||
xfs_zone_inc_written(
|
||||
struct xfs_open_zone *oz,
|
||||
xfs_fsblock_t fsbno,
|
||||
xfs_filblks_t len)
|
||||
{
|
||||
struct xfs_mount *mp = tp->t_mountp;
|
||||
struct xfs_rtgroup *rtg = oz->oz_rtg;
|
||||
struct xfs_inode *rmapip = rtg_rmap(rtg);
|
||||
xfs_assert_ilocked(rtg_rmap(oz->oz_rtg), XFS_ILOCK_EXCL);
|
||||
|
||||
trace_xfs_zone_record_blocks(oz, xfs_rtb_to_rgbno(mp, fsbno), len);
|
||||
|
||||
xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
|
||||
xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP);
|
||||
rmapip->i_used_blocks += len;
|
||||
ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg));
|
||||
oz->oz_written += len;
|
||||
if (oz->oz_written == rtg_blocks(rtg))
|
||||
if (oz->oz_written == rtg_blocks(oz->oz_rtg))
|
||||
xfs_open_zone_mark_full(oz);
|
||||
xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -227,9 +218,7 @@ xfs_zone_skip_blocks(
|
||||
trace_xfs_zone_skip_blocks(oz, 0, len);
|
||||
|
||||
xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
|
||||
oz->oz_written += len;
|
||||
if (oz->oz_written == rtg_blocks(rtg))
|
||||
xfs_open_zone_mark_full(oz);
|
||||
xfs_zone_inc_written(oz, len);
|
||||
xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
|
||||
|
||||
xfs_add_frextents(rtg_mount(rtg), len);
|
||||
@@ -244,6 +233,8 @@ xfs_zoned_map_extent(
|
||||
xfs_fsblock_t old_startblock)
|
||||
{
|
||||
struct xfs_bmbt_irec data;
|
||||
struct xfs_rtgroup *rtg = oz->oz_rtg;
|
||||
struct xfs_inode *rmapip = rtg_rmap(rtg);
|
||||
int nmaps = 1;
|
||||
int error;
|
||||
|
||||
@@ -302,7 +293,15 @@ xfs_zoned_map_extent(
|
||||
}
|
||||
}
|
||||
|
||||
xfs_zone_record_blocks(tp, oz, new->br_startblock, new->br_blockcount);
|
||||
trace_xfs_zone_record_blocks(oz,
|
||||
xfs_rtb_to_rgbno(tp->t_mountp, new->br_startblock),
|
||||
new->br_blockcount);
|
||||
xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
|
||||
xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP);
|
||||
rmapip->i_used_blocks += new->br_blockcount;
|
||||
ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg));
|
||||
xfs_zone_inc_written(oz, new->br_blockcount);
|
||||
xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE);
|
||||
|
||||
/* Map the new blocks into the data fork. */
|
||||
xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, new);
|
||||
@@ -560,6 +559,9 @@ xfs_try_use_zone(
|
||||
struct xfs_open_zone *oz,
|
||||
unsigned int goodness)
|
||||
{
|
||||
if (oz->oz_is_gc)
|
||||
return false;
|
||||
|
||||
if (oz->oz_allocated == rtg_blocks(oz->oz_rtg))
|
||||
return false;
|
||||
|
||||
@@ -681,10 +683,11 @@ xfs_select_zone_nowait(
|
||||
if (oz)
|
||||
goto out_unlock;
|
||||
|
||||
if (pack_tight)
|
||||
if (pack_tight) {
|
||||
oz = xfs_select_open_zone_mru(zi, write_hint);
|
||||
if (oz)
|
||||
goto out_unlock;
|
||||
if (oz)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* See if we can open a new zone and use that so that data for different
|
||||
@@ -695,7 +698,7 @@ xfs_select_zone_nowait(
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Try to find an zone that is an ok match to colocate data with.
|
||||
* Try to find a zone that is an ok match to colocate data with.
|
||||
*/
|
||||
oz = xfs_select_open_zone_lru(zi, write_hint, XFS_ZONE_ALLOC_OK);
|
||||
if (oz)
|
||||
@@ -1232,6 +1235,100 @@ xfs_free_zone_info(
|
||||
kfree(zi);
|
||||
}
|
||||
|
||||
static int
|
||||
xfs_report_zones(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_init_zones *iz)
|
||||
{
|
||||
struct xfs_rtgroup *rtg = NULL;
|
||||
|
||||
while ((rtg = xfs_rtgroup_next(mp, rtg))) {
|
||||
xfs_rgblock_t write_pointer;
|
||||
int error;
|
||||
|
||||
error = xfs_query_write_pointer(iz, rtg, &write_pointer);
|
||||
if (!error)
|
||||
error = xfs_init_zone(iz, rtg, write_pointer);
|
||||
if (error) {
|
||||
xfs_rtgroup_rele(rtg);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
xfs_zone_is_conv(
|
||||
struct xfs_rtgroup *rtg)
|
||||
{
|
||||
return !bdev_zone_is_seq(rtg_mount(rtg)->m_rtdev_targp->bt_bdev,
|
||||
xfs_gbno_to_daddr(rtg_group(rtg), 0));
|
||||
}
|
||||
|
||||
static struct xfs_open_zone *
|
||||
xfs_find_fullest_conventional_open_zone(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
struct xfs_zone_info *zi = mp->m_zone_info;
|
||||
struct xfs_open_zone *found = NULL, *oz;
|
||||
|
||||
spin_lock(&zi->zi_open_zones_lock);
|
||||
list_for_each_entry(oz, &zi->zi_open_zones, oz_entry) {
|
||||
if (!xfs_zone_is_conv(oz->oz_rtg))
|
||||
continue;
|
||||
if (!found || oz->oz_allocated > found->oz_allocated)
|
||||
found = oz;
|
||||
}
|
||||
spin_unlock(&zi->zi_open_zones_lock);
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the fullest conventional zones and remove them from the open zone pool
|
||||
* until we are at the open zone limit.
|
||||
*
|
||||
* We can end up with spurious "open" zones when the last blocks in a fully
|
||||
* written zone were invalidate as there is no write pointer for conventional
|
||||
* zones.
|
||||
*
|
||||
* If we are still over the limit when there is no conventional open zone left,
|
||||
* the user overrode the max open zones limit using the max_open_zones mount
|
||||
* option we should fail.
|
||||
*/
|
||||
static int
|
||||
xfs_finish_spurious_open_zones(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_init_zones *iz)
|
||||
{
|
||||
struct xfs_zone_info *zi = mp->m_zone_info;
|
||||
|
||||
while (zi->zi_nr_open_zones > mp->m_max_open_zones) {
|
||||
struct xfs_open_zone *oz;
|
||||
xfs_filblks_t adjust;
|
||||
|
||||
oz = xfs_find_fullest_conventional_open_zone(mp);
|
||||
if (!oz) {
|
||||
xfs_err(mp,
|
||||
"too many open zones for max_open_zones limit (%u/%u)",
|
||||
zi->zi_nr_open_zones, mp->m_max_open_zones);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
xfs_rtgroup_lock(oz->oz_rtg, XFS_RTGLOCK_RMAP);
|
||||
adjust = rtg_blocks(oz->oz_rtg) - oz->oz_written;
|
||||
trace_xfs_zone_spurious_open(oz, oz->oz_written, adjust);
|
||||
oz->oz_written = rtg_blocks(oz->oz_rtg);
|
||||
xfs_open_zone_mark_full(oz);
|
||||
xfs_rtgroup_unlock(oz->oz_rtg, XFS_RTGLOCK_RMAP);
|
||||
iz->available -= adjust;
|
||||
iz->reclaimable += adjust;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
xfs_mount_zones(
|
||||
struct xfs_mount *mp)
|
||||
@@ -1240,7 +1337,6 @@ xfs_mount_zones(
|
||||
.zone_capacity = mp->m_groups[XG_TYPE_RTG].blocks,
|
||||
.zone_size = xfs_rtgroup_raw_size(mp),
|
||||
};
|
||||
struct xfs_rtgroup *rtg = NULL;
|
||||
int error;
|
||||
|
||||
if (!mp->m_rtdev_targp) {
|
||||
@@ -1270,9 +1366,17 @@ xfs_mount_zones(
|
||||
if (!mp->m_zone_info)
|
||||
return -ENOMEM;
|
||||
|
||||
xfs_info(mp, "%u zones of %u blocks (%u max open zones)",
|
||||
mp->m_sb.sb_rgcount, iz.zone_capacity, mp->m_max_open_zones);
|
||||
trace_xfs_zones_mount(mp);
|
||||
error = xfs_report_zones(mp, &iz);
|
||||
if (error)
|
||||
goto out_free_zone_info;
|
||||
|
||||
error = xfs_finish_spurious_open_zones(mp, &iz);
|
||||
if (error)
|
||||
goto out_free_zone_info;
|
||||
|
||||
xfs_set_freecounter(mp, XC_FREE_RTAVAILABLE, iz.available);
|
||||
xfs_set_freecounter(mp, XC_FREE_RTEXTENTS,
|
||||
iz.available + iz.reclaimable);
|
||||
|
||||
/*
|
||||
* The writeback code switches between inodes regularly to provide
|
||||
@@ -1298,22 +1402,6 @@ xfs_mount_zones(
|
||||
XFS_FSB_TO_B(mp, min(iz.zone_capacity, XFS_MAX_BMBT_EXTLEN)) >>
|
||||
PAGE_SHIFT;
|
||||
|
||||
while ((rtg = xfs_rtgroup_next(mp, rtg))) {
|
||||
xfs_rgblock_t write_pointer;
|
||||
|
||||
error = xfs_query_write_pointer(&iz, rtg, &write_pointer);
|
||||
if (!error)
|
||||
error = xfs_init_zone(&iz, rtg, write_pointer);
|
||||
if (error) {
|
||||
xfs_rtgroup_rele(rtg);
|
||||
goto out_free_zone_info;
|
||||
}
|
||||
}
|
||||
|
||||
xfs_set_freecounter(mp, XC_FREE_RTAVAILABLE, iz.available);
|
||||
xfs_set_freecounter(mp, XC_FREE_RTEXTENTS,
|
||||
iz.available + iz.reclaimable);
|
||||
|
||||
/*
|
||||
* The user may configure GC to free up a percentage of unused blocks.
|
||||
* By default this is 0. GC will always trigger at the minimum level
|
||||
@@ -1324,6 +1412,10 @@ xfs_mount_zones(
|
||||
error = xfs_zone_gc_mount(mp);
|
||||
if (error)
|
||||
goto out_free_zone_info;
|
||||
|
||||
xfs_info(mp, "%u zones of %u blocks (%u max open zones)",
|
||||
mp->m_sb.sb_rgcount, iz.zone_capacity, mp->m_max_open_zones);
|
||||
trace_xfs_zones_mount(mp);
|
||||
return 0;
|
||||
|
||||
out_free_zone_info:
|
||||
|
||||
@@ -51,6 +51,7 @@ int xfs_mount_zones(struct xfs_mount *mp);
|
||||
void xfs_unmount_zones(struct xfs_mount *mp);
|
||||
void xfs_zone_gc_start(struct xfs_mount *mp);
|
||||
void xfs_zone_gc_stop(struct xfs_mount *mp);
|
||||
void xfs_zone_gc_wakeup(struct xfs_mount *mp);
|
||||
#else
|
||||
static inline int xfs_mount_zones(struct xfs_mount *mp)
|
||||
{
|
||||
@@ -65,6 +66,9 @@ static inline void xfs_zone_gc_start(struct xfs_mount *mp)
|
||||
static inline void xfs_zone_gc_stop(struct xfs_mount *mp)
|
||||
{
|
||||
}
|
||||
static inline void xfs_zone_gc_wakeup(struct xfs_mount *mp)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_XFS_RT */
|
||||
|
||||
#endif /* _XFS_ZONE_ALLOC_H */
|
||||
|
||||
@@ -125,6 +125,7 @@ struct xfs_zone_gc_iter {
|
||||
*/
|
||||
struct xfs_zone_gc_data {
|
||||
struct xfs_mount *mp;
|
||||
struct xfs_open_zone *oz;
|
||||
|
||||
/* bioset used to allocate the gc_bios */
|
||||
struct bio_set bio_set;
|
||||
@@ -170,25 +171,37 @@ xfs_zoned_need_gc(
|
||||
s64 available, free, threshold;
|
||||
s32 remainder;
|
||||
|
||||
/* If we have no reclaimable blocks, running GC is useless. */
|
||||
if (!xfs_zoned_have_reclaimable(mp->m_zone_info))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* In order to avoid file fragmentation as much as possible, we should
|
||||
* make sure that we can open enough zones. So trigger GC if the number
|
||||
* of blocks immediately available for writes is lower than the total
|
||||
* number of blocks from all possible open zones.
|
||||
*/
|
||||
available = xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE);
|
||||
|
||||
if (available <
|
||||
xfs_rtgs_to_rfsbs(mp, mp->m_max_open_zones - XFS_OPEN_GC_ZONES))
|
||||
return true;
|
||||
|
||||
free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS);
|
||||
/*
|
||||
* For cases where the user wants to be more aggressive with GC,
|
||||
* the sysfs attribute zonegc_low_space may be set to a non zero value,
|
||||
* to indicate that GC should try to maintain at least zonegc_low_space
|
||||
* percent of the free space to be directly available for writing. Check
|
||||
* this here.
|
||||
*/
|
||||
if (!mp->m_zonegc_low_space)
|
||||
return false;
|
||||
|
||||
free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS);
|
||||
threshold = div_s64_rem(free, 100, &remainder);
|
||||
threshold = threshold * mp->m_zonegc_low_space +
|
||||
remainder * div_s64(mp->m_zonegc_low_space, 100);
|
||||
|
||||
if (available < threshold)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
return available < threshold;
|
||||
}
|
||||
|
||||
static struct xfs_zone_gc_data *
|
||||
@@ -362,7 +375,7 @@ done:
|
||||
}
|
||||
|
||||
static bool
|
||||
xfs_zone_gc_iter_next(
|
||||
xfs_zone_gc_iter_irec(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_zone_gc_iter *iter,
|
||||
struct xfs_rmap_irec *chunk_rec,
|
||||
@@ -371,9 +384,6 @@ xfs_zone_gc_iter_next(
|
||||
struct xfs_rmap_irec *irec;
|
||||
int error;
|
||||
|
||||
if (!iter->victim_rtg)
|
||||
return false;
|
||||
|
||||
retry:
|
||||
if (iter->rec_idx == iter->rec_count) {
|
||||
error = xfs_zone_gc_query(mp, iter);
|
||||
@@ -515,10 +525,11 @@ xfs_zone_gc_select_victim(
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct xfs_open_zone *
|
||||
xfs_zone_gc_steal_open(
|
||||
struct xfs_zone_info *zi)
|
||||
static int
|
||||
xfs_zone_gc_steal_open_zone(
|
||||
struct xfs_zone_gc_data *data)
|
||||
{
|
||||
struct xfs_zone_info *zi = data->mp->m_zone_info;
|
||||
struct xfs_open_zone *oz, *found = NULL;
|
||||
|
||||
spin_lock(&zi->zi_open_zones_lock);
|
||||
@@ -526,56 +537,64 @@ xfs_zone_gc_steal_open(
|
||||
if (!found || oz->oz_allocated < found->oz_allocated)
|
||||
found = oz;
|
||||
}
|
||||
|
||||
if (found) {
|
||||
found->oz_is_gc = true;
|
||||
list_del_init(&found->oz_entry);
|
||||
zi->zi_nr_open_zones--;
|
||||
if (!found) {
|
||||
spin_unlock(&zi->zi_open_zones_lock);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
trace_xfs_zone_gc_target_stolen(found->oz_rtg);
|
||||
found->oz_is_gc = true;
|
||||
zi->zi_nr_open_zones--;
|
||||
zi->zi_nr_open_gc_zones++;
|
||||
spin_unlock(&zi->zi_open_zones_lock);
|
||||
return found;
|
||||
}
|
||||
|
||||
static struct xfs_open_zone *
|
||||
xfs_zone_gc_select_target(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
struct xfs_zone_info *zi = mp->m_zone_info;
|
||||
struct xfs_open_zone *oz = zi->zi_open_gc_zone;
|
||||
|
||||
/*
|
||||
* We need to wait for pending writes to finish.
|
||||
*/
|
||||
if (oz && oz->oz_written < rtg_blocks(oz->oz_rtg))
|
||||
return NULL;
|
||||
|
||||
ASSERT(zi->zi_nr_open_zones <=
|
||||
mp->m_max_open_zones - XFS_OPEN_GC_ZONES);
|
||||
oz = xfs_open_zone(mp, WRITE_LIFE_NOT_SET, true);
|
||||
if (oz)
|
||||
trace_xfs_zone_gc_target_opened(oz->oz_rtg);
|
||||
spin_lock(&zi->zi_open_zones_lock);
|
||||
zi->zi_open_gc_zone = oz;
|
||||
spin_unlock(&zi->zi_open_zones_lock);
|
||||
return oz;
|
||||
atomic_inc(&found->oz_ref);
|
||||
data->oz = found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure we have a valid open zone to write the GC data to.
|
||||
*
|
||||
* If the current target zone has space keep writing to it, else first wait for
|
||||
* all pending writes and then pick a new one.
|
||||
* Ensure we have a valid open zone to write to.
|
||||
*/
|
||||
static struct xfs_open_zone *
|
||||
xfs_zone_gc_ensure_target(
|
||||
struct xfs_mount *mp)
|
||||
static bool
|
||||
xfs_zone_gc_select_target(
|
||||
struct xfs_zone_gc_data *data)
|
||||
{
|
||||
struct xfs_open_zone *oz = mp->m_zone_info->zi_open_gc_zone;
|
||||
struct xfs_zone_info *zi = data->mp->m_zone_info;
|
||||
|
||||
if (!oz || oz->oz_allocated == rtg_blocks(oz->oz_rtg))
|
||||
return xfs_zone_gc_select_target(mp);
|
||||
return oz;
|
||||
if (data->oz) {
|
||||
/*
|
||||
* If we have space available, just keep using the existing
|
||||
* zone.
|
||||
*/
|
||||
if (data->oz->oz_allocated < rtg_blocks(data->oz->oz_rtg))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Wait for all writes to the current zone to finish before
|
||||
* picking a new one.
|
||||
*/
|
||||
if (data->oz->oz_written < rtg_blocks(data->oz->oz_rtg))
|
||||
return false;
|
||||
|
||||
xfs_open_zone_put(data->oz);
|
||||
}
|
||||
|
||||
/*
|
||||
* Open a new zone when there is none currently in use.
|
||||
*/
|
||||
ASSERT(zi->zi_nr_open_zones <=
|
||||
data->mp->m_max_open_zones - XFS_OPEN_GC_ZONES);
|
||||
data->oz = xfs_open_zone(data->mp, WRITE_LIFE_NOT_SET, true);
|
||||
if (!data->oz)
|
||||
return false;
|
||||
trace_xfs_zone_gc_target_opened(data->oz->oz_rtg);
|
||||
atomic_inc(&data->oz->oz_ref);
|
||||
spin_lock(&zi->zi_open_zones_lock);
|
||||
zi->zi_nr_open_gc_zones++;
|
||||
list_add_tail(&data->oz->oz_entry, &zi->zi_open_zones);
|
||||
spin_unlock(&zi->zi_open_zones_lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -590,7 +609,7 @@ xfs_zone_gc_end_io(
|
||||
wake_up_process(data->mp->m_zone_info->zi_gc_thread);
|
||||
}
|
||||
|
||||
static struct xfs_open_zone *
|
||||
static bool
|
||||
xfs_zone_gc_alloc_blocks(
|
||||
struct xfs_zone_gc_data *data,
|
||||
xfs_extlen_t *count_fsb,
|
||||
@@ -598,11 +617,7 @@ xfs_zone_gc_alloc_blocks(
|
||||
bool *is_seq)
|
||||
{
|
||||
struct xfs_mount *mp = data->mp;
|
||||
struct xfs_open_zone *oz;
|
||||
|
||||
oz = xfs_zone_gc_ensure_target(mp);
|
||||
if (!oz)
|
||||
return NULL;
|
||||
struct xfs_open_zone *oz = data->oz;
|
||||
|
||||
*count_fsb = min(*count_fsb, XFS_B_TO_FSB(mp, data->scratch_available));
|
||||
|
||||
@@ -624,7 +639,7 @@ xfs_zone_gc_alloc_blocks(
|
||||
spin_unlock(&mp->m_sb_lock);
|
||||
|
||||
if (!*count_fsb)
|
||||
return NULL;
|
||||
return false;
|
||||
|
||||
*daddr = xfs_gbno_to_daddr(rtg_group(oz->oz_rtg), 0);
|
||||
*is_seq = bdev_zone_is_seq(mp->m_rtdev_targp->bt_bdev, *daddr);
|
||||
@@ -632,7 +647,7 @@ xfs_zone_gc_alloc_blocks(
|
||||
*daddr += XFS_FSB_TO_BB(mp, oz->oz_allocated);
|
||||
oz->oz_allocated += *count_fsb;
|
||||
atomic_inc(&oz->oz_ref);
|
||||
return oz;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -657,6 +672,28 @@ xfs_zone_gc_add_data(
|
||||
} while (len);
|
||||
}
|
||||
|
||||
static bool
|
||||
xfs_zone_gc_can_start_chunk(
|
||||
struct xfs_zone_gc_data *data)
|
||||
{
|
||||
|
||||
if (xfs_is_shutdown(data->mp))
|
||||
return false;
|
||||
if (!data->scratch_available)
|
||||
return false;
|
||||
|
||||
if (!data->iter.victim_rtg) {
|
||||
if (kthread_should_stop() || kthread_should_park())
|
||||
return false;
|
||||
if (!xfs_zoned_need_gc(data->mp))
|
||||
return false;
|
||||
if (!xfs_zone_gc_select_victim(data))
|
||||
return false;
|
||||
}
|
||||
|
||||
return xfs_zone_gc_select_target(data);
|
||||
}
|
||||
|
||||
static bool
|
||||
xfs_zone_gc_start_chunk(
|
||||
struct xfs_zone_gc_data *data)
|
||||
@@ -664,7 +701,6 @@ xfs_zone_gc_start_chunk(
|
||||
struct xfs_zone_gc_iter *iter = &data->iter;
|
||||
struct xfs_mount *mp = data->mp;
|
||||
struct block_device *bdev = mp->m_rtdev_targp->bt_bdev;
|
||||
struct xfs_open_zone *oz;
|
||||
struct xfs_rmap_irec irec;
|
||||
struct xfs_gc_bio *chunk;
|
||||
struct xfs_inode *ip;
|
||||
@@ -672,14 +708,15 @@ xfs_zone_gc_start_chunk(
|
||||
xfs_daddr_t daddr;
|
||||
bool is_seq;
|
||||
|
||||
if (xfs_is_shutdown(mp))
|
||||
if (!xfs_zone_gc_can_start_chunk(data))
|
||||
return false;
|
||||
|
||||
if (!xfs_zone_gc_iter_next(mp, iter, &irec, &ip))
|
||||
set_current_state(TASK_RUNNING);
|
||||
if (!xfs_zone_gc_iter_irec(mp, iter, &irec, &ip))
|
||||
return false;
|
||||
oz = xfs_zone_gc_alloc_blocks(data, &irec.rm_blockcount, &daddr,
|
||||
&is_seq);
|
||||
if (!oz) {
|
||||
|
||||
if (!xfs_zone_gc_alloc_blocks(data, &irec.rm_blockcount, &daddr,
|
||||
&is_seq)) {
|
||||
xfs_irele(ip);
|
||||
return false;
|
||||
}
|
||||
@@ -699,7 +736,7 @@ xfs_zone_gc_start_chunk(
|
||||
chunk->new_daddr = daddr;
|
||||
chunk->is_seq = is_seq;
|
||||
chunk->data = data;
|
||||
chunk->oz = oz;
|
||||
chunk->oz = data->oz;
|
||||
chunk->victim_rtg = iter->victim_rtg;
|
||||
atomic_inc(&rtg_group(chunk->victim_rtg)->xg_active_ref);
|
||||
atomic_inc(&chunk->victim_rtg->rtg_gccount);
|
||||
@@ -985,33 +1022,6 @@ xfs_zone_gc_reset_zones(
|
||||
} while (next);
|
||||
}
|
||||
|
||||
static bool
|
||||
xfs_zone_gc_should_start_new_work(
|
||||
struct xfs_zone_gc_data *data)
|
||||
{
|
||||
struct xfs_open_zone *oz;
|
||||
|
||||
if (xfs_is_shutdown(data->mp))
|
||||
return false;
|
||||
if (!data->scratch_available)
|
||||
return false;
|
||||
|
||||
oz = xfs_zone_gc_ensure_target(data->mp);
|
||||
if (!oz || oz->oz_allocated == rtg_blocks(oz->oz_rtg))
|
||||
return false;
|
||||
|
||||
if (!data->iter.victim_rtg) {
|
||||
if (kthread_should_stop() || kthread_should_park())
|
||||
return false;
|
||||
if (!xfs_zoned_need_gc(data->mp))
|
||||
return false;
|
||||
if (!xfs_zone_gc_select_victim(data))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle the work to read and write data for GC and to reset the zones,
|
||||
* including handling all completions.
|
||||
@@ -1061,13 +1071,10 @@ xfs_zone_gc_handle_work(
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
if (xfs_zone_gc_should_start_new_work(data)) {
|
||||
set_current_state(TASK_RUNNING);
|
||||
blk_start_plug(&plug);
|
||||
while (xfs_zone_gc_start_chunk(data))
|
||||
;
|
||||
blk_finish_plug(&plug);
|
||||
}
|
||||
blk_start_plug(&plug);
|
||||
while (xfs_zone_gc_start_chunk(data))
|
||||
;
|
||||
blk_finish_plug(&plug);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1127,6 +1134,8 @@ xfs_zoned_gcd(
|
||||
}
|
||||
xfs_clear_zonegc_running(mp);
|
||||
|
||||
if (data->oz)
|
||||
xfs_open_zone_put(data->oz);
|
||||
if (data->iter.victim_rtg)
|
||||
xfs_rtgroup_rele(data->iter.victim_rtg);
|
||||
|
||||
@@ -1151,41 +1160,49 @@ xfs_zone_gc_stop(
|
||||
kthread_park(mp->m_zone_info->zi_gc_thread);
|
||||
}
|
||||
|
||||
void
|
||||
xfs_zone_gc_wakeup(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
struct super_block *sb = mp->m_super;
|
||||
|
||||
/*
|
||||
* If we are unmounting the file system we must not try to
|
||||
* wake gc as m_zone_info might have been freed already.
|
||||
*/
|
||||
if (down_read_trylock(&sb->s_umount)) {
|
||||
if (!xfs_is_readonly(mp))
|
||||
wake_up_process(mp->m_zone_info->zi_gc_thread);
|
||||
up_read(&sb->s_umount);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
xfs_zone_gc_mount(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
struct xfs_zone_info *zi = mp->m_zone_info;
|
||||
struct xfs_zone_gc_data *data;
|
||||
struct xfs_open_zone *oz;
|
||||
int error;
|
||||
|
||||
data = xfs_zone_gc_data_alloc(mp);
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* If there are no free zones available for GC, pick the open zone with
|
||||
* If there are no free zones available for GC, or the number of open
|
||||
* zones has reached the open zone limit, pick the open zone with
|
||||
* the least used space to GC into. This should only happen after an
|
||||
* unclean shutdown near ENOSPC while GC was ongoing.
|
||||
*
|
||||
* We also need to do this for the first gc zone allocation if we
|
||||
* unmounted while at the open limit.
|
||||
* unclean shutdown while GC was ongoing. Otherwise a GC zone will
|
||||
* be selected from the free zone pool on demand.
|
||||
*/
|
||||
if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_FREE) ||
|
||||
zi->zi_nr_open_zones == mp->m_max_open_zones)
|
||||
oz = xfs_zone_gc_steal_open(zi);
|
||||
else
|
||||
oz = xfs_open_zone(mp, WRITE_LIFE_NOT_SET, true);
|
||||
if (!oz) {
|
||||
xfs_warn(mp, "unable to allocate a zone for gc");
|
||||
error = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
trace_xfs_zone_gc_target_opened(oz->oz_rtg);
|
||||
zi->zi_open_gc_zone = oz;
|
||||
|
||||
data = xfs_zone_gc_data_alloc(mp);
|
||||
if (!data) {
|
||||
error = -ENOMEM;
|
||||
goto out_put_gc_zone;
|
||||
zi->zi_nr_open_zones >= mp->m_max_open_zones) {
|
||||
error = xfs_zone_gc_steal_open_zone(data);
|
||||
if (error) {
|
||||
xfs_warn(mp, "unable to steal an open zone for gc");
|
||||
goto out_free_gc_data;
|
||||
}
|
||||
}
|
||||
|
||||
zi->zi_gc_thread = kthread_create(xfs_zoned_gcd, data,
|
||||
@@ -1193,18 +1210,18 @@ xfs_zone_gc_mount(
|
||||
if (IS_ERR(zi->zi_gc_thread)) {
|
||||
xfs_warn(mp, "unable to create zone gc thread");
|
||||
error = PTR_ERR(zi->zi_gc_thread);
|
||||
goto out_free_gc_data;
|
||||
goto out_put_oz;
|
||||
}
|
||||
|
||||
/* xfs_zone_gc_start will unpark for rw mounts */
|
||||
kthread_park(zi->zi_gc_thread);
|
||||
return 0;
|
||||
|
||||
out_put_oz:
|
||||
if (data->oz)
|
||||
xfs_open_zone_put(data->oz);
|
||||
out_free_gc_data:
|
||||
kfree(data);
|
||||
out_put_gc_zone:
|
||||
xfs_open_zone_put(zi->zi_open_gc_zone);
|
||||
out:
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -1215,6 +1232,4 @@ xfs_zone_gc_unmount(
|
||||
struct xfs_zone_info *zi = mp->m_zone_info;
|
||||
|
||||
kthread_stop(zi->zi_gc_thread);
|
||||
if (zi->zi_open_gc_zone)
|
||||
xfs_open_zone_put(zi->zi_open_gc_zone);
|
||||
}
|
||||
|
||||
@@ -30,11 +30,12 @@ xfs_show_open_zone(
|
||||
struct seq_file *m,
|
||||
struct xfs_open_zone *oz)
|
||||
{
|
||||
seq_printf(m, "\t zone %d, wp %u, written %u, used %u, hint %s\n",
|
||||
seq_printf(m, "\t zone %d, wp %u, written %u, used %u, hint %s %s\n",
|
||||
rtg_rgno(oz->oz_rtg),
|
||||
oz->oz_allocated, oz->oz_written,
|
||||
rtg_rmap(oz->oz_rtg)->i_used_blocks,
|
||||
xfs_write_hint_to_str(oz->oz_write_hint));
|
||||
xfs_write_hint_to_str(oz->oz_write_hint),
|
||||
oz->oz_is_gc ? "(GC)" : "");
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -58,9 +59,8 @@ xfs_show_full_zone_used_distribution(
|
||||
spin_unlock(&zi->zi_used_buckets_lock);
|
||||
|
||||
full = mp->m_sb.sb_rgcount;
|
||||
if (zi->zi_open_gc_zone)
|
||||
full--;
|
||||
full -= zi->zi_nr_open_zones;
|
||||
full -= zi->zi_nr_open_gc_zones;
|
||||
full -= atomic_read(&zi->zi_nr_free_zones);
|
||||
full -= reclaimable;
|
||||
|
||||
@@ -90,15 +90,20 @@ xfs_zoned_show_stats(
|
||||
seq_printf(m, "\tRT GC required: %d\n",
|
||||
xfs_zoned_need_gc(mp));
|
||||
|
||||
seq_printf(m, "\ttotal number of zones: %u\n",
|
||||
mp->m_sb.sb_rgcount);
|
||||
seq_printf(m, "\tfree zones: %d\n", atomic_read(&zi->zi_nr_free_zones));
|
||||
seq_puts(m, "\topen zones:\n");
|
||||
|
||||
spin_lock(&zi->zi_open_zones_lock);
|
||||
seq_printf(m, "\tmax open zones: %u\n",
|
||||
mp->m_max_open_zones);
|
||||
seq_printf(m, "\tnr open zones: %u\n",
|
||||
zi->zi_nr_open_zones);
|
||||
seq_printf(m, "\tnr open GC zones: %u\n",
|
||||
zi->zi_nr_open_gc_zones);
|
||||
seq_puts(m, "\topen zones:\n");
|
||||
list_for_each_entry(oz, &zi->zi_open_zones, oz_entry)
|
||||
xfs_show_open_zone(m, oz);
|
||||
if (zi->zi_open_gc_zone) {
|
||||
seq_puts(m, "\topen gc zone:\n");
|
||||
xfs_show_open_zone(m, zi->zi_open_gc_zone);
|
||||
}
|
||||
spin_unlock(&zi->zi_open_zones_lock);
|
||||
seq_puts(m, "\tused blocks distribution (fully written zones):\n");
|
||||
xfs_show_full_zone_used_distribution(m, mp);
|
||||
|
||||
@@ -32,11 +32,7 @@ struct xfs_open_zone {
|
||||
*/
|
||||
enum rw_hint oz_write_hint;
|
||||
|
||||
/*
|
||||
* Is this open zone used for garbage collection? There can only be a
|
||||
* single open GC zone, which is pointed to by zi_open_gc_zone in
|
||||
* struct xfs_zone_info. Constant over the life time of an open zone.
|
||||
*/
|
||||
/* Is this open zone used for garbage collection? */
|
||||
bool oz_is_gc;
|
||||
|
||||
/*
|
||||
@@ -68,6 +64,7 @@ struct xfs_zone_info {
|
||||
spinlock_t zi_open_zones_lock;
|
||||
struct list_head zi_open_zones;
|
||||
unsigned int zi_nr_open_zones;
|
||||
unsigned int zi_nr_open_gc_zones;
|
||||
|
||||
/*
|
||||
* Free zone search cursor and number of free zones:
|
||||
@@ -81,15 +78,9 @@ struct xfs_zone_info {
|
||||
wait_queue_head_t zi_zone_wait;
|
||||
|
||||
/*
|
||||
* Pointer to the GC thread, and the current open zone used by GC
|
||||
* (if any).
|
||||
*
|
||||
* zi_open_gc_zone is mostly private to the GC thread, but can be read
|
||||
* for debugging from other threads, in which case zi_open_zones_lock
|
||||
* must be taken to access it.
|
||||
* Pointer to the GC thread.
|
||||
*/
|
||||
struct task_struct *zi_gc_thread;
|
||||
struct xfs_open_zone *zi_open_gc_zone;
|
||||
|
||||
/*
|
||||
* List of zones that need a reset:
|
||||
|
||||
Reference in New Issue
Block a user