Merge tag 'ext4_for_linus_6.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Major ext4 changes for 6.17:

   - Better scalability for ext4 block allocation

   - Fix insufficient credits when writing back large folios

  Miscellaneous bug fixes, especially when handling exteded attriutes,
  inline data, and fast commit"

* tag 'ext4_for_linus_6.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (39 commits)
  ext4: do not BUG when INLINE_DATA_FL lacks system.data xattr
  ext4: implement linear-like traversal across order xarrays
  ext4: refactor choose group to scan group
  ext4: convert free groups order lists to xarrays
  ext4: factor out ext4_mb_scan_group()
  ext4: factor out ext4_mb_might_prefetch()
  ext4: factor out __ext4_mb_scan_group()
  ext4: fix largest free orders lists corruption on mb_optimize_scan switch
  ext4: fix zombie groups in average fragment size lists
  ext4: merge freed extent with existing extents before insertion
  ext4: convert sbi->s_mb_free_pending to atomic_t
  ext4: fix typo in CR_GOAL_LEN_SLOW comment
  ext4: get rid of some obsolete EXT4_MB_HINT flags
  ext4: utilize multiple global goals to reduce contention
  ext4: remove unnecessary s_md_lock on update s_mb_last_group
  ext4: remove unnecessary s_mb_last_start
  ext4: separate stream goal hits from s_bal_goals for better tracking
  ext4: add ext4_try_lock_group() to skip busy groups
  ext4: initialize superblock fields in the kballoc-test.c kunit tests
  ext4: refactor the inline directory conversion and new directory codepaths
  ...
This commit is contained in:
Linus Torvalds
2025-07-31 10:02:44 -07:00
15 changed files with 902 additions and 694 deletions

View File

@@ -157,7 +157,7 @@ enum criteria {
/*
* Reads each block group sequentially, performing disk IO if
* necessary, to find find_suitable block group. Tries to
* necessary, to find suitable block group. Tries to
* allocate goal length but might trim the request if nothing
* is found after enough tries.
*/
@@ -185,14 +185,8 @@ enum criteria {
/* prefer goal again. length */
#define EXT4_MB_HINT_MERGE 0x0001
/* blocks already reserved */
#define EXT4_MB_HINT_RESERVED 0x0002
/* metadata is being allocated */
#define EXT4_MB_HINT_METADATA 0x0004
/* first blocks in the file */
#define EXT4_MB_HINT_FIRST 0x0008
/* search for the best chunk */
#define EXT4_MB_HINT_BEST 0x0010
/* data is being allocated */
#define EXT4_MB_HINT_DATA 0x0020
/* don't preallocate (for tails) */
@@ -213,15 +207,6 @@ enum criteria {
#define EXT4_MB_USE_RESERVED 0x2000
/* Do strict check for free blocks while retrying block allocation */
#define EXT4_MB_STRICT_CHECK 0x4000
/* Large fragment size list lookup succeeded at least once for
* CR_POWER2_ALIGNED */
#define EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED 0x8000
/* Avg fragment size rb tree lookup succeeded at least once for
* CR_GOAL_LEN_FAST */
#define EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED 0x00010000
/* Avg fragment size rb tree lookup succeeded at least once for
* CR_BEST_AVAIL_LEN */
#define EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED 0x00020000
struct ext4_allocation_request {
/* target inode for block we're allocating */
@@ -1608,16 +1593,14 @@ struct ext4_sb_info {
unsigned short *s_mb_offsets;
unsigned int *s_mb_maxs;
unsigned int s_group_info_size;
unsigned int s_mb_free_pending;
atomic_t s_mb_free_pending;
struct list_head s_freed_data_list[2]; /* List of blocks to be freed
after commit completed */
struct list_head s_discard_list;
struct work_struct s_discard_work;
atomic_t s_retry_alloc_pending;
struct list_head *s_mb_avg_fragment_size;
rwlock_t *s_mb_avg_fragment_size_locks;
struct list_head *s_mb_largest_free_orders;
rwlock_t *s_mb_largest_free_orders_locks;
struct xarray *s_mb_avg_fragment_size;
struct xarray *s_mb_largest_free_orders;
/* tunables */
unsigned long s_stripe;
@@ -1629,15 +1612,16 @@ struct ext4_sb_info {
unsigned int s_mb_order2_reqs;
unsigned int s_mb_group_prealloc;
unsigned int s_max_dir_size_kb;
/* where last allocation was done - for stream allocation */
unsigned long s_mb_last_group;
unsigned long s_mb_last_start;
unsigned int s_mb_prefetch;
unsigned int s_mb_prefetch_limit;
unsigned int s_mb_best_avail_max_trim_order;
unsigned int s_sb_update_sec;
unsigned int s_sb_update_kb;
/* where last allocation was done - for stream allocation */
ext4_group_t *s_mb_last_groups;
unsigned int s_mb_nr_global_goals;
/* stats for buddy allocator */
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
atomic_t s_bal_success; /* we found long enough chunks */
@@ -1646,12 +1630,10 @@ struct ext4_sb_info {
atomic_t s_bal_cX_ex_scanned[EXT4_MB_NUM_CRS]; /* total extents scanned */
atomic_t s_bal_groups_scanned; /* number of groups scanned */
atomic_t s_bal_goals; /* goal hits */
atomic_t s_bal_stream_goals; /* stream allocation global goal hits */
atomic_t s_bal_len_goals; /* len goal hits */
atomic_t s_bal_breaks; /* too long searches */
atomic_t s_bal_2orders; /* 2^order hits */
atomic_t s_bal_p2_aligned_bad_suggestions;
atomic_t s_bal_goal_fast_bad_suggestions;
atomic_t s_bal_best_avail_bad_suggestions;
atomic64_t s_bal_cX_groups_considered[EXT4_MB_NUM_CRS];
atomic64_t s_bal_cX_hits[EXT4_MB_NUM_CRS];
atomic64_t s_bal_cX_failed[EXT4_MB_NUM_CRS]; /* cX loop didn't find blocks */
@@ -3020,7 +3002,7 @@ int ext4_walk_page_buffers(handle_t *handle,
struct buffer_head *bh));
int do_journal_get_write_access(handle_t *handle, struct inode *inode,
struct buffer_head *bh);
bool ext4_should_enable_large_folio(struct inode *inode);
void ext4_set_inode_mapping_order(struct inode *inode);
#define FALL_BACK_TO_NONDELALLOC 1
#define CONVERT_INLINE_DATA 2
@@ -3064,9 +3046,9 @@ extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
extern void ext4_set_inode_flags(struct inode *, bool init);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_chunk_trans_extent(struct inode *inode, int nrblocks);
extern int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
int pextents);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
@@ -3489,8 +3471,6 @@ struct ext4_group_info {
void *bb_bitmap;
#endif
struct rw_semaphore alloc_sem;
struct list_head bb_avg_fragment_size_node;
struct list_head bb_largest_free_order_node;
ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block
* regions, index is order.
* bb_counters[3] = 5 means
@@ -3541,23 +3521,28 @@ static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi)
return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD);
}
static inline bool ext4_try_lock_group(struct super_block *sb, ext4_group_t group)
{
if (!spin_trylock(ext4_group_lock_ptr(sb, group)))
return false;
/*
* We're able to grab the lock right away, so drop the lock
* contention counter.
*/
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0);
return true;
}
static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
{
spinlock_t *lock = ext4_group_lock_ptr(sb, group);
if (spin_trylock(lock))
/*
* We're able to grab the lock right away, so drop the
* lock contention counter.
*/
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0);
else {
if (!ext4_try_lock_group(sb, group)) {
/*
* The lock is busy, so bump the contention counter,
* and then wait on the spin lock.
*/
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1,
EXT4_MAX_CONTENTION);
spin_lock(lock);
spin_lock(ext4_group_lock_ptr(sb, group));
}
}
@@ -3612,6 +3597,7 @@ extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
extern int ext4_get_max_inline_size(struct inode *inode);
extern int ext4_find_inline_data_nolock(struct inode *inode);
extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
extern void ext4_update_final_de(void *de_buf, int old_size, int new_size);
int ext4_readpage_inline(struct inode *inode, struct folio *folio);
extern int ext4_try_to_write_inline_data(struct address_space *mapping,
@@ -3671,10 +3657,10 @@ static inline int ext4_has_inline_data(struct inode *inode)
extern const struct inode_operations ext4_dir_inode_operations;
extern const struct inode_operations ext4_special_inode_operations;
extern struct dentry *ext4_get_parent(struct dentry *child);
extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
struct ext4_dir_entry_2 *de,
int blocksize, int csum_size,
unsigned int parent_ino, int dotdot_real_len);
extern int ext4_init_dirblock(handle_t *handle, struct inode *inode,
struct buffer_head *dir_block,
unsigned int parent_ino, void *inline_buf,
int inline_size);
extern void ext4_initialize_dirent_tail(struct buffer_head *bh,
unsigned int blocksize);
extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,