Merge tag 'ext4_for_linux-7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:

 - Refactor code paths involved with partial block zero-out in
   prearation for converting ext4 to use iomap for buffered writes

 - Remove use of d_alloc() from ext4 in preparation for the deprecation
   of this interface

 - Replace some J_ASSERTS with a journal abort so we can avoid a kernel
   panic for a localized file system error

 - Simplify various code paths in mballoc, move_extent, and fast commit

 - Fix rare deadlock in jbd2_journal_cancel_revoke() that can be
   triggered by generic/013 when blocksize < pagesize

 - Fix memory leak when releasing an extended attribute when its value
   is stored in an ea_inode

 - Fix various potential kunit test bugs in fs/ext4/extents.c

 - Fix potential out-of-bounds access in check_xattr() with a corrupted
   file system

 - Make the jbd2_inode dirty range tracking safe for lockless reads

 - Avoid a WARN_ON when writeback files due to a corrupted file system;
   we already print an ext4 warning indicatign that data will be lost,
   so the WARN_ON is not necessary and doesn't add any new information

* tag 'ext4_for_linux-7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (37 commits)
  jbd2: fix deadlock in jbd2_journal_cancel_revoke()
  ext4: fix missing brelse() in ext4_xattr_inode_dec_ref_all()
  ext4: fix possible null-ptr-deref in mbt_kunit_exit()
  ext4: fix possible null-ptr-deref in extents_kunit_exit()
  ext4: fix the error handling process in extents_kunit_init).
  ext4: call deactivate_super() in extents_kunit_exit()
  ext4: fix miss unlock 'sb->s_umount' in extents_kunit_init()
  ext4: fix bounds check in check_xattrs() to prevent out-of-bounds access
  ext4: zero post-EOF partial block before appending write
  ext4: move pagecache_isize_extended() out of active handle
  ext4: remove ctime/mtime update from ext4_alloc_file_blocks()
  ext4: unify SYNC mode checks in fallocate paths
  ext4: ensure zeroed partial blocks are persisted in SYNC mode
  ext4: move zero partial block range functions out of active handle
  ext4: pass allocate range as loff_t to ext4_alloc_file_blocks()
  ext4: remove handle parameters from zero partial block functions
  ext4: move ordered data handling out of ext4_block_do_zero_range()
  ext4: rename ext4_block_zero_page_range() to ext4_block_zero_range()
  ext4: factor out journalled block zeroing range
  ext4: rename and extend ext4_block_truncate_page()
  ...
This commit is contained in:
Linus Torvalds
2026-04-17 17:08:31 -07:00
19 changed files with 633 additions and 389 deletions

View File

@@ -28,7 +28,6 @@
#include <linux/seqlock.h> #include <linux/seqlock.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/timer.h> #include <linux/timer.h>
#include <linux/wait.h>
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/blockgroup_lock.h> #include <linux/blockgroup_lock.h>
#include <linux/percpu_counter.h> #include <linux/percpu_counter.h>
@@ -1082,9 +1081,6 @@ struct ext4_inode_info {
spinlock_t i_raw_lock; /* protects updates to the raw inode */ spinlock_t i_raw_lock; /* protects updates to the raw inode */
/* Fast commit wait queue for this inode */
wait_queue_head_t i_fc_wait;
/* /*
* Protect concurrent accesses on i_fc_lblk_start, i_fc_lblk_len * Protect concurrent accesses on i_fc_lblk_start, i_fc_lblk_len
* and inode's EXT4_FC_STATE_COMMITTING state bit. * and inode's EXT4_FC_STATE_COMMITTING state bit.
@@ -2976,7 +2972,8 @@ void __ext4_fc_track_unlink(handle_t *handle, struct inode *inode,
void __ext4_fc_track_link(handle_t *handle, struct inode *inode, void __ext4_fc_track_link(handle_t *handle, struct inode *inode,
struct dentry *dentry); struct dentry *dentry);
void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry); void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry);
void ext4_fc_track_link(handle_t *handle, struct dentry *dentry); void ext4_fc_track_link(handle_t *handle, struct inode *inode,
struct dentry *dentry);
void __ext4_fc_track_create(handle_t *handle, struct inode *inode, void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
struct dentry *dentry); struct dentry *dentry);
void ext4_fc_track_create(handle_t *handle, struct dentry *dentry); void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
@@ -3101,8 +3098,9 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_chunk_trans_extent(struct inode *inode, int nrblocks); extern int ext4_chunk_trans_extent(struct inode *inode, int nrblocks);
extern int ext4_meta_trans_blocks(struct inode *inode, int lblocks, extern int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
int pextents); int pextents);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, extern int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end);
loff_t lstart, loff_t lend); extern int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart,
loff_t length, bool *did_zero);
extern vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf); extern vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern int ext4_get_projid(struct inode *inode, kprojid_t *projid); extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
@@ -3721,7 +3719,7 @@ extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name, extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
struct inode *inode, struct dentry *dentry); struct inode *inode, struct dentry *dentry);
extern int __ext4_link(struct inode *dir, struct inode *inode, extern int __ext4_link(struct inode *dir, struct inode *inode,
struct dentry *dentry); const struct qstr *d_name, struct dentry *dentry);
#define S_SHIFT 12 #define S_SHIFT 12
static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = { static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {

View File

@@ -142,10 +142,14 @@ static struct file_system_type ext_fs_type = {
static void extents_kunit_exit(struct kunit *test) static void extents_kunit_exit(struct kunit *test)
{ {
struct super_block *sb = k_ctx.k_ei->vfs_inode.i_sb; struct ext4_sb_info *sbi;
struct ext4_sb_info *sbi = sb->s_fs_info;
if (!k_ctx.k_ei)
return;
sbi = k_ctx.k_ei->vfs_inode.i_sb->s_fs_info;
ext4_es_unregister_shrinker(sbi); ext4_es_unregister_shrinker(sbi);
deactivate_super(sbi->s_sb);
kfree(sbi); kfree(sbi);
kfree(k_ctx.k_ei); kfree(k_ctx.k_ei);
kfree(k_ctx.k_data); kfree(k_ctx.k_data);
@@ -224,33 +228,37 @@ static int extents_kunit_init(struct kunit *test)
(struct kunit_ext_test_param *)(test->param_value); (struct kunit_ext_test_param *)(test->param_value);
int err; int err;
sb = sget(&ext_fs_type, NULL, ext_set, 0, NULL);
if (IS_ERR(sb))
return PTR_ERR(sb);
sb->s_blocksize = 4096;
sb->s_blocksize_bits = 12;
sbi = kzalloc_obj(struct ext4_sb_info); sbi = kzalloc_obj(struct ext4_sb_info);
if (sbi == NULL) if (sbi == NULL)
return -ENOMEM; return -ENOMEM;
sb = sget(&ext_fs_type, NULL, ext_set, 0, NULL);
if (IS_ERR(sb)) {
kfree(sbi);
return PTR_ERR(sb);
}
sbi->s_sb = sb; sbi->s_sb = sb;
sb->s_fs_info = sbi; sb->s_fs_info = sbi;
sb->s_blocksize = 4096;
sb->s_blocksize_bits = 12;
if (!param || !param->disable_zeroout) if (!param || !param->disable_zeroout)
sbi->s_extent_max_zeroout_kb = 32; sbi->s_extent_max_zeroout_kb = 32;
/* setup the mock inode */
k_ctx.k_ei = kzalloc_obj(struct ext4_inode_info);
if (k_ctx.k_ei == NULL)
return -ENOMEM;
ei = k_ctx.k_ei;
inode = &ei->vfs_inode;
err = ext4_es_register_shrinker(sbi); err = ext4_es_register_shrinker(sbi);
if (err) if (err)
return err; goto out_deactivate;
/* setup the mock inode */
k_ctx.k_ei = kzalloc_obj(struct ext4_inode_info);
if (k_ctx.k_ei == NULL) {
err = -ENOMEM;
goto out;
}
ei = k_ctx.k_ei;
inode = &ei->vfs_inode;
ext4_es_init_tree(&ei->i_es_tree); ext4_es_init_tree(&ei->i_es_tree);
rwlock_init(&ei->i_es_lock); rwlock_init(&ei->i_es_lock);
@@ -266,8 +274,10 @@ static int extents_kunit_init(struct kunit *test)
inode->i_sb = sb; inode->i_sb = sb;
k_ctx.k_data = kzalloc(EXT_DATA_LEN * 4096, GFP_KERNEL); k_ctx.k_data = kzalloc(EXT_DATA_LEN * 4096, GFP_KERNEL);
if (k_ctx.k_data == NULL) if (k_ctx.k_data == NULL) {
return -ENOMEM; err = -ENOMEM;
goto out;
}
/* /*
* set the data area to a junk value * set the data area to a junk value
@@ -309,7 +319,23 @@ static int extents_kunit_init(struct kunit *test)
kunit_activate_static_stub(test, ext4_ext_zeroout, ext4_ext_zeroout_stub); kunit_activate_static_stub(test, ext4_ext_zeroout, ext4_ext_zeroout_stub);
kunit_activate_static_stub(test, ext4_issue_zeroout, kunit_activate_static_stub(test, ext4_issue_zeroout,
ext4_issue_zeroout_stub); ext4_issue_zeroout_stub);
up_write(&sb->s_umount);
return 0; return 0;
out:
kfree(k_ctx.k_ei);
k_ctx.k_ei = NULL;
kfree(k_ctx.k_data);
k_ctx.k_data = NULL;
ext4_es_unregister_shrinker(sbi);
out_deactivate:
deactivate_locked_super(sb);
kfree(sbi);
return err;
} }
/* /*

View File

@@ -4571,30 +4571,30 @@ retry_remove_space:
return err; return err;
} }
static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, static int ext4_alloc_file_blocks(struct file *file, loff_t offset, loff_t len,
ext4_lblk_t len, loff_t new_size, loff_t new_size, int flags)
int flags)
{ {
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
handle_t *handle; handle_t *handle;
int ret = 0, ret2 = 0, ret3 = 0; int ret = 0, ret2 = 0, ret3 = 0;
int retries = 0; int retries = 0;
int depth = 0; int depth = 0;
ext4_lblk_t len_lblk;
struct ext4_map_blocks map; struct ext4_map_blocks map;
unsigned int credits; unsigned int credits;
loff_t epos, old_size = i_size_read(inode); loff_t epos = 0, old_size = i_size_read(inode);
unsigned int blkbits = inode->i_blkbits; unsigned int blkbits = inode->i_blkbits;
bool alloc_zero = false; bool alloc_zero = false;
BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)); BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
map.m_lblk = offset; map.m_lblk = offset >> blkbits;
map.m_len = len; map.m_len = len_lblk = EXT4_MAX_BLOCKS(len, offset, blkbits);
/* /*
* Don't normalize the request if it can fit in one extent so * Don't normalize the request if it can fit in one extent so
* that it doesn't get unnecessarily split into multiple * that it doesn't get unnecessarily split into multiple
* extents. * extents.
*/ */
if (len <= EXT_UNWRITTEN_MAX_LEN) if (len_lblk <= EXT_UNWRITTEN_MAX_LEN)
flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
/* /*
@@ -4611,16 +4611,23 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
/* /*
* credits to insert 1 extent into extent tree * credits to insert 1 extent into extent tree
*/ */
credits = ext4_chunk_trans_blocks(inode, len); credits = ext4_chunk_trans_blocks(inode, len_lblk);
depth = ext_depth(inode); depth = ext_depth(inode);
/* Zero to the end of the block containing i_size */
if (new_size > old_size) {
ret = ext4_block_zero_eof(inode, old_size, LLONG_MAX);
if (ret)
return ret;
}
retry: retry:
while (len) { while (len_lblk) {
/* /*
* Recalculate credits when extent tree depth changes. * Recalculate credits when extent tree depth changes.
*/ */
if (depth != ext_depth(inode)) { if (depth != ext_depth(inode)) {
credits = ext4_chunk_trans_blocks(inode, len); credits = ext4_chunk_trans_blocks(inode, len_lblk);
depth = ext_depth(inode); depth = ext_depth(inode);
} }
@@ -4640,50 +4647,60 @@ retry:
ext4_journal_stop(handle); ext4_journal_stop(handle);
break; break;
} }
ext4_update_inode_fsync_trans(handle, inode, 1);
ret = ext4_journal_stop(handle);
if (unlikely(ret))
break;
/* /*
* allow a full retry cycle for any remaining allocations * allow a full retry cycle for any remaining allocations
*/ */
retries = 0; retries = 0;
epos = EXT4_LBLK_TO_B(inode, map.m_lblk + ret);
inode_set_ctime_current(inode);
if (new_size) {
if (epos > new_size)
epos = new_size;
if (ext4_update_inode_size(inode, epos) & 0x1)
inode_set_mtime_to_ts(inode,
inode_get_ctime(inode));
if (epos > old_size) {
pagecache_isize_extended(inode, old_size, epos);
ext4_zero_partial_blocks(handle, inode,
old_size, epos - old_size);
}
}
ret2 = ext4_mark_inode_dirty(handle, inode);
ext4_update_inode_fsync_trans(handle, inode, 1);
ret3 = ext4_journal_stop(handle);
ret2 = ret3 ? ret3 : ret2;
if (unlikely(ret2))
break;
if (alloc_zero && if (alloc_zero &&
(map.m_flags & (EXT4_MAP_MAPPED | EXT4_MAP_UNWRITTEN))) { (map.m_flags & (EXT4_MAP_MAPPED | EXT4_MAP_UNWRITTEN))) {
ret2 = ext4_issue_zeroout(inode, map.m_lblk, map.m_pblk, ret = ext4_issue_zeroout(inode, map.m_lblk, map.m_pblk,
map.m_len); map.m_len);
if (likely(!ret2)) if (likely(!ret))
ret2 = ext4_convert_unwritten_extents(NULL, ret = ext4_convert_unwritten_extents(NULL,
inode, (loff_t)map.m_lblk << blkbits, inode, (loff_t)map.m_lblk << blkbits,
(loff_t)map.m_len << blkbits); (loff_t)map.m_len << blkbits);
if (ret2) if (ret)
break; break;
} }
map.m_lblk += ret; map.m_lblk += map.m_len;
map.m_len = len = len - ret; map.m_len = len_lblk = len_lblk - map.m_len;
epos = EXT4_LBLK_TO_B(inode, map.m_lblk);
} }
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry; goto retry;
return ret > 0 ? ret2 : ret; if (!epos || !new_size)
return ret;
/*
* Allocate blocks, update the file size to match the size of the
* already successfully allocated blocks.
*/
if (epos > new_size)
epos = new_size;
handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
if (IS_ERR(handle))
return ret ? ret : PTR_ERR(handle);
ext4_update_inode_size(inode, epos);
ret2 = ext4_mark_inode_dirty(handle, inode);
ext4_update_inode_fsync_trans(handle, inode, 1);
ret3 = ext4_journal_stop(handle);
ret2 = ret3 ? ret3 : ret2;
if (epos > old_size)
pagecache_isize_extended(inode, old_size, epos);
return ret ? ret : ret2;
} }
static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len); static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len);
@@ -4695,12 +4712,11 @@ static long ext4_zero_range(struct file *file, loff_t offset,
{ {
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
handle_t *handle = NULL; handle_t *handle = NULL;
loff_t new_size = 0; loff_t align_start, align_end, new_size = 0;
loff_t end = offset + len; loff_t end = offset + len;
ext4_lblk_t start_lblk, end_lblk;
unsigned int blocksize = i_blocksize(inode); unsigned int blocksize = i_blocksize(inode);
unsigned int blkbits = inode->i_blkbits; bool partial_zeroed = false;
int ret, flags, credits; int ret, flags;
trace_ext4_zero_range(inode, offset, len, mode); trace_ext4_zero_range(inode, offset, len, mode);
WARN_ON_ONCE(!inode_is_locked(inode)); WARN_ON_ONCE(!inode_is_locked(inode));
@@ -4720,11 +4736,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT; flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
/* Preallocate the range including the unaligned edges */ /* Preallocate the range including the unaligned edges */
if (!IS_ALIGNED(offset | end, blocksize)) { if (!IS_ALIGNED(offset | end, blocksize)) {
ext4_lblk_t alloc_lblk = offset >> blkbits; ret = ext4_alloc_file_blocks(file, offset, len, new_size,
ext4_lblk_t len_lblk = EXT4_MAX_BLOCKS(len, offset, blkbits); flags);
ret = ext4_alloc_file_blocks(file, alloc_lblk, len_lblk,
new_size, flags);
if (ret) if (ret)
return ret; return ret;
} }
@@ -4739,18 +4752,17 @@ static long ext4_zero_range(struct file *file, loff_t offset,
return ret; return ret;
/* Zero range excluding the unaligned edges */ /* Zero range excluding the unaligned edges */
start_lblk = EXT4_B_TO_LBLK(inode, offset); align_start = round_up(offset, blocksize);
end_lblk = end >> blkbits; align_end = round_down(end, blocksize);
if (end_lblk > start_lblk) { if (align_end > align_start) {
ext4_lblk_t zero_blks = end_lblk - start_lblk;
if (mode & FALLOC_FL_WRITE_ZEROES) if (mode & FALLOC_FL_WRITE_ZEROES)
flags = EXT4_GET_BLOCKS_CREATE_ZERO | EXT4_EX_NOCACHE; flags = EXT4_GET_BLOCKS_CREATE_ZERO | EXT4_EX_NOCACHE;
else else
flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
EXT4_EX_NOCACHE); EXT4_EX_NOCACHE);
ret = ext4_alloc_file_blocks(file, start_lblk, zero_blks, ret = ext4_alloc_file_blocks(file, align_start,
new_size, flags); align_end - align_start, new_size,
flags);
if (ret) if (ret)
return ret; return ret;
} }
@@ -4758,25 +4770,24 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (IS_ALIGNED(offset | end, blocksize)) if (IS_ALIGNED(offset | end, blocksize))
return ret; return ret;
/* /* Zero out partial block at the edges of the range */
* In worst case we have to writeout two nonadjacent unwritten ret = ext4_zero_partial_blocks(inode, offset, len, &partial_zeroed);
* blocks and update the inode if (ret)
*/ return ret;
credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1; if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
if (ext4_should_journal_data(inode)) ret = filemap_write_and_wait_range(inode->i_mapping, offset,
credits += 2; end - 1);
handle = ext4_journal_start(inode, EXT4_HT_MISC, credits); if (ret)
return ret;
}
handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
ret = PTR_ERR(handle); ret = PTR_ERR(handle);
ext4_std_error(inode->i_sb, ret); ext4_std_error(inode->i_sb, ret);
return ret; return ret;
} }
/* Zero out partial block at the edges of the range */
ret = ext4_zero_partial_blocks(handle, inode, offset, len);
if (ret)
goto out_handle;
if (new_size) if (new_size)
ext4_update_inode_size(inode, new_size); ext4_update_inode_size(inode, new_size);
ret = ext4_mark_inode_dirty(handle, inode); ret = ext4_mark_inode_dirty(handle, inode);
@@ -4784,7 +4795,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
goto out_handle; goto out_handle;
ext4_update_inode_fsync_trans(handle, inode, 1); ext4_update_inode_fsync_trans(handle, inode, 1);
if (file->f_flags & O_SYNC) if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
ext4_handle_sync(handle); ext4_handle_sync(handle);
out_handle: out_handle:
@@ -4798,15 +4809,11 @@ static long ext4_do_fallocate(struct file *file, loff_t offset,
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
loff_t end = offset + len; loff_t end = offset + len;
loff_t new_size = 0; loff_t new_size = 0;
ext4_lblk_t start_lblk, len_lblk;
int ret; int ret;
trace_ext4_fallocate_enter(inode, offset, len, mode); trace_ext4_fallocate_enter(inode, offset, len, mode);
WARN_ON_ONCE(!inode_is_locked(inode)); WARN_ON_ONCE(!inode_is_locked(inode));
start_lblk = offset >> inode->i_blkbits;
len_lblk = EXT4_MAX_BLOCKS(len, offset, inode->i_blkbits);
/* We only support preallocation for extent-based files only. */ /* We only support preallocation for extent-based files only. */
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
@@ -4821,17 +4828,19 @@ static long ext4_do_fallocate(struct file *file, loff_t offset,
goto out; goto out;
} }
ret = ext4_alloc_file_blocks(file, start_lblk, len_lblk, new_size, ret = ext4_alloc_file_blocks(file, offset, len, new_size,
EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT); EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT);
if (ret) if (ret)
goto out; goto out;
if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) { if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) &&
EXT4_SB(inode->i_sb)->s_journal) {
ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal, ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
EXT4_I(inode)->i_sync_tid); EXT4_I(inode)->i_sync_tid);
} }
out: out:
trace_ext4_fallocate_exit(inode, offset, len_lblk, ret); trace_ext4_fallocate_exit(inode, offset,
EXT4_MAX_BLOCKS(len, offset, inode->i_blkbits), ret);
return ret; return ret;
} }
@@ -5598,7 +5607,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
goto out_handle; goto out_handle;
ext4_update_inode_fsync_trans(handle, inode, 1); ext4_update_inode_fsync_trans(handle, inode, 1);
if (IS_SYNC(inode)) if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
ext4_handle_sync(handle); ext4_handle_sync(handle);
out_handle: out_handle:
@@ -5722,7 +5731,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
goto out_handle; goto out_handle;
ext4_update_inode_fsync_trans(handle, inode, 1); ext4_update_inode_fsync_trans(handle, inode, 1);
if (IS_SYNC(inode)) if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
ext4_handle_sync(handle); ext4_handle_sync(handle);
out_handle: out_handle:

View File

@@ -13,6 +13,7 @@
#include "mballoc.h" #include "mballoc.h"
#include <linux/lockdep.h> #include <linux/lockdep.h>
#include <linux/wait_bit.h>
/* /*
* Ext4 Fast Commits * Ext4 Fast Commits
* ----------------- * -----------------
@@ -215,7 +216,6 @@ void ext4_fc_init_inode(struct inode *inode)
ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING); ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
INIT_LIST_HEAD(&ei->i_fc_list); INIT_LIST_HEAD(&ei->i_fc_list);
INIT_LIST_HEAD(&ei->i_fc_dilist); INIT_LIST_HEAD(&ei->i_fc_dilist);
init_waitqueue_head(&ei->i_fc_wait);
} }
static bool ext4_fc_disabled(struct super_block *sb) static bool ext4_fc_disabled(struct super_block *sb)
@@ -224,6 +224,12 @@ static bool ext4_fc_disabled(struct super_block *sb)
(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)); (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY));
} }
static bool ext4_fc_eligible(struct super_block *sb)
{
return !ext4_fc_disabled(sb) &&
!(ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE));
}
/* /*
* Remove inode from fast commit list. If the inode is being committed * Remove inode from fast commit list. If the inode is being committed
* we wait until inode commit is done. * we wait until inode commit is done.
@@ -320,7 +326,7 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handl
if (ext4_fc_disabled(sb)) if (ext4_fc_disabled(sb))
return; return;
if (handle && !IS_ERR(handle)) if (!IS_ERR_OR_NULL(handle))
tid = handle->h_transaction->t_tid; tid = handle->h_transaction->t_tid;
else { else {
read_lock(&sbi->s_journal->j_state_lock); read_lock(&sbi->s_journal->j_state_lock);
@@ -473,13 +479,8 @@ void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
{ {
struct inode *inode = d_inode(dentry); struct inode *inode = d_inode(dentry);
if (ext4_fc_disabled(inode->i_sb)) if (ext4_fc_eligible(inode->i_sb))
return; __ext4_fc_track_unlink(handle, inode, dentry);
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
return;
__ext4_fc_track_unlink(handle, inode, dentry);
} }
void __ext4_fc_track_link(handle_t *handle, void __ext4_fc_track_link(handle_t *handle,
@@ -496,17 +497,11 @@ void __ext4_fc_track_link(handle_t *handle,
trace_ext4_fc_track_link(handle, inode, dentry, ret); trace_ext4_fc_track_link(handle, inode, dentry, ret);
} }
void ext4_fc_track_link(handle_t *handle, struct dentry *dentry) void ext4_fc_track_link(handle_t *handle, struct inode *inode,
struct dentry *dentry)
{ {
struct inode *inode = d_inode(dentry); if (ext4_fc_eligible(inode->i_sb))
__ext4_fc_track_link(handle, inode, dentry);
if (ext4_fc_disabled(inode->i_sb))
return;
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
return;
__ext4_fc_track_link(handle, inode, dentry);
} }
void __ext4_fc_track_create(handle_t *handle, struct inode *inode, void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
@@ -527,13 +522,8 @@ void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
{ {
struct inode *inode = d_inode(dentry); struct inode *inode = d_inode(dentry);
if (ext4_fc_disabled(inode->i_sb)) if (ext4_fc_eligible(inode->i_sb))
return; __ext4_fc_track_create(handle, inode, dentry);
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
return;
__ext4_fc_track_create(handle, inode, dentry);
} }
/* __track_fn for inode tracking */ /* __track_fn for inode tracking */
@@ -557,16 +547,13 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
if (S_ISDIR(inode->i_mode)) if (S_ISDIR(inode->i_mode))
return; return;
if (ext4_fc_disabled(inode->i_sb))
return;
if (ext4_should_journal_data(inode)) { if (ext4_should_journal_data(inode)) {
ext4_fc_mark_ineligible(inode->i_sb, ext4_fc_mark_ineligible(inode->i_sb,
EXT4_FC_REASON_INODE_JOURNAL_DATA, handle); EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
return; return;
} }
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) if (!ext4_fc_eligible(inode->i_sb))
return; return;
/* /*
@@ -644,10 +631,7 @@ void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t star
if (S_ISDIR(inode->i_mode)) if (S_ISDIR(inode->i_mode))
return; return;
if (ext4_fc_disabled(inode->i_sb)) if (!ext4_fc_eligible(inode->i_sb))
return;
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
return; return;
if (ext4_has_inline_data(inode)) { if (ext4_has_inline_data(inode)) {
@@ -1446,7 +1430,6 @@ static int ext4_fc_replay_link_internal(struct super_block *sb,
struct inode *inode) struct inode *inode)
{ {
struct inode *dir = NULL; struct inode *dir = NULL;
struct dentry *dentry_dir = NULL, *dentry_inode = NULL;
struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len); struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len);
int ret = 0; int ret = 0;
@@ -1457,21 +1440,7 @@ static int ext4_fc_replay_link_internal(struct super_block *sb,
goto out; goto out;
} }
dentry_dir = d_obtain_alias(dir); ret = __ext4_link(dir, inode, &qstr_dname, NULL);
if (IS_ERR(dentry_dir)) {
ext4_debug("Failed to obtain dentry");
dentry_dir = NULL;
goto out;
}
dentry_inode = d_alloc(dentry_dir, &qstr_dname);
if (!dentry_inode) {
ext4_debug("Inode dentry not created.");
ret = -ENOMEM;
goto out;
}
ret = __ext4_link(dir, inode, dentry_inode);
/* /*
* It's possible that link already existed since data blocks * It's possible that link already existed since data blocks
* for the dir in question got persisted before we crashed OR * for the dir in question got persisted before we crashed OR
@@ -1485,16 +1454,8 @@ static int ext4_fc_replay_link_internal(struct super_block *sb,
ret = 0; ret = 0;
out: out:
if (dentry_dir) { if (dir)
d_drop(dentry_dir);
dput(dentry_dir);
} else if (dir) {
iput(dir); iput(dir);
}
if (dentry_inode) {
d_drop(dentry_inode);
dput(dentry_inode);
}
return ret; return ret;
} }
@@ -1759,8 +1720,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
} }
/* Replay add range tag */ /* Replay add range tag */
static int ext4_fc_replay_add_range(struct super_block *sb, static int ext4_fc_replay_add_range(struct super_block *sb, u8 *val)
struct ext4_fc_tl_mem *tl, u8 *val)
{ {
struct ext4_fc_add_range fc_add_ex; struct ext4_fc_add_range fc_add_ex;
struct ext4_extent newex, *ex; struct ext4_extent newex, *ex;
@@ -1880,8 +1840,7 @@ out:
/* Replay DEL_RANGE tag */ /* Replay DEL_RANGE tag */
static int static int
ext4_fc_replay_del_range(struct super_block *sb, ext4_fc_replay_del_range(struct super_block *sb, u8 *val)
struct ext4_fc_tl_mem *tl, u8 *val)
{ {
struct inode *inode; struct inode *inode;
struct ext4_fc_del_range lrange; struct ext4_fc_del_range lrange;
@@ -2251,13 +2210,13 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
ret = ext4_fc_replay_unlink(sb, &tl, val); ret = ext4_fc_replay_unlink(sb, &tl, val);
break; break;
case EXT4_FC_TAG_ADD_RANGE: case EXT4_FC_TAG_ADD_RANGE:
ret = ext4_fc_replay_add_range(sb, &tl, val); ret = ext4_fc_replay_add_range(sb, val);
break; break;
case EXT4_FC_TAG_CREAT: case EXT4_FC_TAG_CREAT:
ret = ext4_fc_replay_create(sb, &tl, val); ret = ext4_fc_replay_create(sb, &tl, val);
break; break;
case EXT4_FC_TAG_DEL_RANGE: case EXT4_FC_TAG_DEL_RANGE:
ret = ext4_fc_replay_del_range(sb, &tl, val); ret = ext4_fc_replay_del_range(sb, val);
break; break;
case EXT4_FC_TAG_INODE: case EXT4_FC_TAG_INODE:
ret = ext4_fc_replay_inode(sb, &tl, val); ret = ext4_fc_replay_inode(sb, &tl, val);

View File

@@ -270,6 +270,8 @@ static ssize_t ext4_generic_write_checks(struct kiocb *iocb,
static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from) static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
{ {
struct inode *inode = file_inode(iocb->ki_filp);
loff_t old_size = i_size_read(inode);
ssize_t ret, count; ssize_t ret, count;
count = ext4_generic_write_checks(iocb, from); count = ext4_generic_write_checks(iocb, from);
@@ -279,6 +281,21 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
ret = file_modified(iocb->ki_filp); ret = file_modified(iocb->ki_filp);
if (ret) if (ret)
return ret; return ret;
/*
* If the position is beyond the EOF, it is necessary to zero out the
* partial block that beyond the existing EOF, as it may contains
* stale data written through mmap.
*/
if (iocb->ki_pos > old_size && !ext4_verity_in_progress(inode)) {
if (iocb->ki_flags & IOCB_NOWAIT)
return -EAGAIN;
ret = ext4_block_zero_eof(inode, old_size, iocb->ki_pos);
if (ret)
return ret;
}
return count; return count;
} }

View File

@@ -1468,10 +1468,9 @@ static int ext4_write_end(const struct kiocb *iocb,
folio_unlock(folio); folio_unlock(folio);
folio_put(folio); folio_put(folio);
if (old_size < pos && !verity) { if (old_size < pos && !verity)
pagecache_isize_extended(inode, old_size, pos); pagecache_isize_extended(inode, old_size, pos);
ext4_zero_partial_blocks(handle, inode, old_size, pos - old_size);
}
/* /*
* Don't mark the inode dirty under folio lock. First, it unnecessarily * Don't mark the inode dirty under folio lock. First, it unnecessarily
* makes the holding time of folio lock longer. Second, it forces lock * makes the holding time of folio lock longer. Second, it forces lock
@@ -1586,10 +1585,8 @@ static int ext4_journalled_write_end(const struct kiocb *iocb,
folio_unlock(folio); folio_unlock(folio);
folio_put(folio); folio_put(folio);
if (old_size < pos && !verity) { if (old_size < pos && !verity)
pagecache_isize_extended(inode, old_size, pos); pagecache_isize_extended(inode, old_size, pos);
ext4_zero_partial_blocks(handle, inode, old_size, pos - old_size);
}
if (size_changed) { if (size_changed) {
ret2 = ext4_mark_inode_dirty(handle, inode); ret2 = ext4_mark_inode_dirty(handle, inode);
@@ -1759,8 +1756,22 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
BUG_ON(!folio_test_locked(folio)); BUG_ON(!folio_test_locked(folio));
BUG_ON(folio_test_writeback(folio)); BUG_ON(folio_test_writeback(folio));
if (invalidate) { if (invalidate) {
if (folio_mapped(folio)) if (folio_mapped(folio)) {
folio_clear_dirty_for_io(folio); folio_clear_dirty_for_io(folio);
/*
* Unmap folio from page
* tables to prevent
* subsequent accesses through
* stale PTEs. This ensures
* future accesses trigger new
* page faults rather than
* reusing the invalidated
* folio.
*/
unmap_mapping_pages(folio->mapping,
folio->index,
folio_nr_pages(folio), false);
}
block_invalidate_folio(folio, 0, block_invalidate_folio(folio, 0,
folio_size(folio)); folio_size(folio));
folio_clear_uptodate(folio); folio_clear_uptodate(folio);
@@ -3043,17 +3054,23 @@ static int ext4_writepages(struct address_space *mapping,
int ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode) int ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode)
{ {
loff_t range_start, range_end;
struct writeback_control wbc = { struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL, .sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX, .nr_to_write = LONG_MAX,
.range_start = jinode->i_dirty_start,
.range_end = jinode->i_dirty_end,
}; };
struct mpage_da_data mpd = { struct mpage_da_data mpd = {
.inode = jinode->i_vfs_inode, .inode = jinode->i_vfs_inode,
.wbc = &wbc, .wbc = &wbc,
.can_map = 0, .can_map = 0,
}; };
if (!jbd2_jinode_get_dirty_range(jinode, &range_start, &range_end))
return 0;
wbc.range_start = range_start;
wbc.range_end = range_end;
return ext4_do_writepages(&mpd); return ext4_do_writepages(&mpd);
} }
@@ -3208,7 +3225,7 @@ static int ext4_da_do_write_end(struct address_space *mapping,
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
loff_t old_size = inode->i_size; loff_t old_size = inode->i_size;
bool disksize_changed = false; bool disksize_changed = false;
loff_t new_i_size, zero_len = 0; loff_t new_i_size;
handle_t *handle; handle_t *handle;
if (unlikely(!folio_buffers(folio))) { if (unlikely(!folio_buffers(folio))) {
@@ -3252,19 +3269,15 @@ static int ext4_da_do_write_end(struct address_space *mapping,
folio_unlock(folio); folio_unlock(folio);
folio_put(folio); folio_put(folio);
if (pos > old_size) { if (pos > old_size)
pagecache_isize_extended(inode, old_size, pos); pagecache_isize_extended(inode, old_size, pos);
zero_len = pos - old_size;
}
if (!disksize_changed && !zero_len) if (!disksize_changed)
return copied; return copied;
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle)) if (IS_ERR(handle))
return PTR_ERR(handle); return PTR_ERR(handle);
if (zero_len)
ext4_zero_partial_blocks(handle, inode, old_size, zero_len);
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle); ext4_journal_stop(handle);
@@ -4014,12 +4027,11 @@ void ext4_set_aops(struct inode *inode)
* ext4_punch_hole, etc) which needs to be properly zeroed out. Otherwise a * ext4_punch_hole, etc) which needs to be properly zeroed out. Otherwise a
* racing writeback can come later and flush the stale pagecache to disk. * racing writeback can come later and flush the stale pagecache to disk.
*/ */
static int __ext4_block_zero_page_range(handle_t *handle, static struct buffer_head *ext4_load_tail_bh(struct inode *inode, loff_t from)
struct address_space *mapping, loff_t from, loff_t length)
{ {
unsigned int offset, blocksize, pos; unsigned int offset, blocksize, pos;
ext4_lblk_t iblock; ext4_lblk_t iblock;
struct inode *inode = mapping->host; struct address_space *mapping = inode->i_mapping;
struct buffer_head *bh; struct buffer_head *bh;
struct folio *folio; struct folio *folio;
int err = 0; int err = 0;
@@ -4028,7 +4040,7 @@ static int __ext4_block_zero_page_range(handle_t *handle,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT, FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
mapping_gfp_constraint(mapping, ~__GFP_FS)); mapping_gfp_constraint(mapping, ~__GFP_FS));
if (IS_ERR(folio)) if (IS_ERR(folio))
return PTR_ERR(folio); return ERR_CAST(folio);
blocksize = inode->i_sb->s_blocksize; blocksize = inode->i_sb->s_blocksize;
@@ -4080,47 +4092,92 @@ static int __ext4_block_zero_page_range(handle_t *handle,
} }
} }
} }
if (ext4_should_journal_data(inode)) { return bh;
BUFFER_TRACE(bh, "get write access");
err = ext4_journal_get_write_access(handle, inode->i_sb, bh,
EXT4_JTR_NONE);
if (err)
goto unlock;
}
folio_zero_range(folio, offset, length);
BUFFER_TRACE(bh, "zeroed end of block");
if (ext4_should_journal_data(inode)) {
err = ext4_dirty_journalled_data(handle, bh);
} else {
mark_buffer_dirty(bh);
/*
* Only the written block requires ordered data to prevent
* exposing stale data.
*/
if (!buffer_unwritten(bh) && !buffer_delay(bh) &&
ext4_should_order_data(inode))
err = ext4_jbd2_inode_add_write(handle, inode, from,
length);
}
unlock: unlock:
folio_unlock(folio); folio_unlock(folio);
folio_put(folio); folio_put(folio);
return err ? ERR_PTR(err) : NULL;
}
static int ext4_block_do_zero_range(struct inode *inode, loff_t from,
loff_t length, bool *did_zero,
bool *zero_written)
{
struct buffer_head *bh;
struct folio *folio;
bh = ext4_load_tail_bh(inode, from);
if (IS_ERR_OR_NULL(bh))
return PTR_ERR_OR_ZERO(bh);
folio = bh->b_folio;
folio_zero_range(folio, offset_in_folio(folio, from), length);
BUFFER_TRACE(bh, "zeroed end of block");
mark_buffer_dirty(bh);
if (did_zero)
*did_zero = true;
if (zero_written && !buffer_unwritten(bh) && !buffer_delay(bh))
*zero_written = true;
folio_unlock(folio);
folio_put(folio);
return 0;
}
static int ext4_block_journalled_zero_range(struct inode *inode, loff_t from,
loff_t length, bool *did_zero)
{
struct buffer_head *bh;
struct folio *folio;
handle_t *handle;
int err;
handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
if (IS_ERR(handle))
return PTR_ERR(handle);
bh = ext4_load_tail_bh(inode, from);
if (IS_ERR_OR_NULL(bh)) {
err = PTR_ERR_OR_ZERO(bh);
goto out_handle;
}
folio = bh->b_folio;
BUFFER_TRACE(bh, "get write access");
err = ext4_journal_get_write_access(handle, inode->i_sb, bh,
EXT4_JTR_NONE);
if (err)
goto out;
folio_zero_range(folio, offset_in_folio(folio, from), length);
BUFFER_TRACE(bh, "zeroed end of block");
err = ext4_dirty_journalled_data(handle, bh);
if (err)
goto out;
if (did_zero)
*did_zero = true;
out:
folio_unlock(folio);
folio_put(folio);
out_handle:
ext4_journal_stop(handle);
return err; return err;
} }
/* /*
* ext4_block_zero_page_range() zeros out a mapping of length 'length' * Zeros out a mapping of length 'length' starting from file offset
* starting from file offset 'from'. The range to be zero'd must * 'from'. The range to be zero'd must be contained with in one block.
* be contained with in one block. If the specified range exceeds * If the specified range exceeds the end of the block it will be
* the end of the block it will be shortened to end of the block * shortened to end of the block that corresponds to 'from'.
* that corresponds to 'from'
*/ */
static int ext4_block_zero_page_range(handle_t *handle, static int ext4_block_zero_range(struct inode *inode,
struct address_space *mapping, loff_t from, loff_t length) loff_t from, loff_t length, bool *did_zero,
bool *zero_written)
{ {
struct inode *inode = mapping->host;
unsigned blocksize = inode->i_sb->s_blocksize; unsigned blocksize = inode->i_sb->s_blocksize;
unsigned int max = blocksize - (from & (blocksize - 1)); unsigned int max = blocksize - (from & (blocksize - 1));
@@ -4132,40 +4189,73 @@ static int ext4_block_zero_page_range(handle_t *handle,
length = max; length = max;
if (IS_DAX(inode)) { if (IS_DAX(inode)) {
return dax_zero_range(inode, from, length, NULL, return dax_zero_range(inode, from, length, did_zero,
&ext4_iomap_ops); &ext4_iomap_ops);
} else if (ext4_should_journal_data(inode)) {
return ext4_block_journalled_zero_range(inode, from, length,
did_zero);
} }
return __ext4_block_zero_page_range(handle, mapping, from, length); return ext4_block_do_zero_range(inode, from, length, did_zero,
zero_written);
} }
/* /*
* ext4_block_truncate_page() zeroes out a mapping from file offset `from' * Zero out a mapping from file offset 'from' up to the end of the block
* up to the end of the block which corresponds to `from'. * which corresponds to 'from' or to the given 'end' inside this block.
* This required during truncate. We need to physically zero the tail end * This required during truncate up and performing append writes. We need
* of that block so it doesn't yield old data if the file is later grown. * to physically zero the tail end of that block so it doesn't yield old
* data if the file is grown.
*/ */
static int ext4_block_truncate_page(handle_t *handle, int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end)
struct address_space *mapping, loff_t from)
{ {
unsigned length; unsigned int blocksize = i_blocksize(inode);
unsigned blocksize; unsigned int offset;
struct inode *inode = mapping->host; loff_t length = end - from;
bool did_zero = false;
bool zero_written = false;
int err;
offset = from & (blocksize - 1);
if (!offset || from >= end)
return 0;
/* If we are processing an encrypted inode during orphan list handling */ /* If we are processing an encrypted inode during orphan list handling */
if (IS_ENCRYPTED(inode) && !fscrypt_has_encryption_key(inode)) if (IS_ENCRYPTED(inode) && !fscrypt_has_encryption_key(inode))
return 0; return 0;
blocksize = i_blocksize(inode); if (length > blocksize - offset)
length = blocksize - (from & (blocksize - 1)); length = blocksize - offset;
return ext4_block_zero_page_range(handle, mapping, from, length); err = ext4_block_zero_range(inode, from, length,
&did_zero, &zero_written);
if (err)
return err;
/*
* It's necessary to order zeroed data before update i_disksize when
* truncating up or performing an append write, because there might be
* exposing stale on-disk data which may caused by concurrent post-EOF
* mmap write during folio writeback.
*/
if (ext4_should_order_data(inode) &&
did_zero && zero_written && !IS_DAX(inode)) {
handle_t *handle;
handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
if (IS_ERR(handle))
return PTR_ERR(handle);
err = ext4_jbd2_inode_add_write(handle, inode, from, length);
ext4_journal_stop(handle);
if (err)
return err;
}
return 0;
} }
int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length,
loff_t lstart, loff_t length) bool *did_zero)
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct address_space *mapping = inode->i_mapping;
unsigned partial_start, partial_end; unsigned partial_start, partial_end;
ext4_fsblk_t start, end; ext4_fsblk_t start, end;
loff_t byte_end = (lstart + length - 1); loff_t byte_end = (lstart + length - 1);
@@ -4180,22 +4270,21 @@ int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
/* Handle partial zero within the single block */ /* Handle partial zero within the single block */
if (start == end && if (start == end &&
(partial_start || (partial_end != sb->s_blocksize - 1))) { (partial_start || (partial_end != sb->s_blocksize - 1))) {
err = ext4_block_zero_page_range(handle, mapping, err = ext4_block_zero_range(inode, lstart, length, did_zero,
lstart, length); NULL);
return err; return err;
} }
/* Handle partial zero out on the start of the range */ /* Handle partial zero out on the start of the range */
if (partial_start) { if (partial_start) {
err = ext4_block_zero_page_range(handle, mapping, err = ext4_block_zero_range(inode, lstart, sb->s_blocksize,
lstart, sb->s_blocksize); did_zero, NULL);
if (err) if (err)
return err; return err;
} }
/* Handle partial zero out on the end of the range */ /* Handle partial zero out on the end of the range */
if (partial_end != sb->s_blocksize - 1) if (partial_end != sb->s_blocksize - 1)
err = ext4_block_zero_page_range(handle, mapping, err = ext4_block_zero_range(inode, byte_end - partial_end,
byte_end - partial_end, partial_end + 1, did_zero, NULL);
partial_end + 1);
return err; return err;
} }
@@ -4344,6 +4433,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
loff_t end = offset + length; loff_t end = offset + length;
handle_t *handle; handle_t *handle;
unsigned int credits; unsigned int credits;
bool partial_zeroed = false;
int ret; int ret;
trace_ext4_punch_hole(inode, offset, length, 0); trace_ext4_punch_hole(inode, offset, length, 0);
@@ -4370,17 +4460,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
end = max_end; end = max_end;
length = end - offset; length = end - offset;
/*
* Attach jinode to inode for jbd2 if we do any zeroing of partial
* block.
*/
if (!IS_ALIGNED(offset | end, sb->s_blocksize)) {
ret = ext4_inode_attach_jinode(inode);
if (ret < 0)
return ret;
}
ret = ext4_update_disksize_before_punch(inode, offset, length); ret = ext4_update_disksize_before_punch(inode, offset, length);
if (ret) if (ret)
return ret; return ret;
@@ -4390,8 +4469,18 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
if (ret) if (ret)
return ret; return ret;
ret = ext4_zero_partial_blocks(inode, offset, length, &partial_zeroed);
if (ret)
return ret;
if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
ret = filemap_write_and_wait_range(inode->i_mapping, offset,
end - 1);
if (ret)
return ret;
}
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
credits = ext4_chunk_trans_extent(inode, 2); credits = ext4_chunk_trans_extent(inode, 0);
else else
credits = ext4_blocks_for_truncate(inode); credits = ext4_blocks_for_truncate(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
@@ -4401,10 +4490,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
return ret; return ret;
} }
ret = ext4_zero_partial_blocks(handle, inode, offset, length);
if (ret)
goto out_handle;
/* If there are blocks to remove, do it */ /* If there are blocks to remove, do it */
start_lblk = EXT4_B_TO_LBLK(inode, offset); start_lblk = EXT4_B_TO_LBLK(inode, offset);
end_lblk = end >> inode->i_blkbits; end_lblk = end >> inode->i_blkbits;
@@ -4441,7 +4526,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
goto out_handle; goto out_handle;
ext4_update_inode_fsync_trans(handle, inode, 1); ext4_update_inode_fsync_trans(handle, inode, 1);
if (IS_SYNC(inode)) if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
ext4_handle_sync(handle); ext4_handle_sync(handle);
out_handle: out_handle:
ext4_journal_stop(handle); ext4_journal_stop(handle);
@@ -4512,7 +4597,6 @@ int ext4_truncate(struct inode *inode)
unsigned int credits; unsigned int credits;
int err = 0, err2; int err = 0, err2;
handle_t *handle; handle_t *handle;
struct address_space *mapping = inode->i_mapping;
/* /*
* There is a possibility that we're either freeing the inode * There is a possibility that we're either freeing the inode
@@ -4542,6 +4626,11 @@ int ext4_truncate(struct inode *inode)
err = ext4_inode_attach_jinode(inode); err = ext4_inode_attach_jinode(inode);
if (err) if (err)
goto out_trace; goto out_trace;
/* Zero to the end of the block containing i_size */
err = ext4_block_zero_eof(inode, inode->i_size, LLONG_MAX);
if (err)
goto out_trace;
} }
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
@@ -4555,9 +4644,6 @@ int ext4_truncate(struct inode *inode)
goto out_trace; goto out_trace;
} }
if (inode->i_size & (inode->i_sb->s_blocksize - 1))
ext4_block_truncate_page(handle, mapping, inode->i_size);
/* /*
* We add the inode to the orphan list, so that if this * We add the inode to the orphan list, so that if this
* truncate spans multiple transactions, and we crash, we will * truncate spans multiple transactions, and we crash, we will
@@ -5927,15 +6013,6 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
goto out_mmap_sem; goto out_mmap_sem;
} }
handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
goto out_mmap_sem;
}
if (ext4_handle_valid(handle) && shrink) {
error = ext4_orphan_add(handle, inode);
orphan = 1;
}
/* /*
* Update c/mtime and tail zero the EOF folio on * Update c/mtime and tail zero the EOF folio on
* truncate up. ext4_truncate() handles the shrink case * truncate up. ext4_truncate() handles the shrink case
@@ -5944,9 +6021,22 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (!shrink) { if (!shrink) {
inode_set_mtime_to_ts(inode, inode_set_mtime_to_ts(inode,
inode_set_ctime_current(inode)); inode_set_ctime_current(inode));
if (oldsize & (inode->i_sb->s_blocksize - 1)) if (oldsize & (inode->i_sb->s_blocksize - 1)) {
ext4_block_truncate_page(handle, error = ext4_block_zero_eof(inode,
inode->i_mapping, oldsize); oldsize, LLONG_MAX);
if (error)
goto out_mmap_sem;
}
}
handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
goto out_mmap_sem;
}
if (ext4_handle_valid(handle) && shrink) {
error = ext4_orphan_add(handle, inode);
orphan = 1;
} }
if (shrink) if (shrink)

View File

@@ -362,7 +362,6 @@ static int mbt_kunit_init(struct kunit *test)
return ret; return ret;
} }
test->priv = sb;
kunit_activate_static_stub(test, kunit_activate_static_stub(test,
ext4_read_block_bitmap_nowait, ext4_read_block_bitmap_nowait,
ext4_read_block_bitmap_nowait_stub); ext4_read_block_bitmap_nowait_stub);
@@ -383,6 +382,8 @@ static int mbt_kunit_init(struct kunit *test)
return -ENOMEM; return -ENOMEM;
} }
test->priv = sb;
return 0; return 0;
} }
@@ -390,6 +391,9 @@ static void mbt_kunit_exit(struct kunit *test)
{ {
struct super_block *sb = (struct super_block *)test->priv; struct super_block *sb = (struct super_block *)test->priv;
if (!sb)
return;
mbt_mb_release(sb); mbt_mb_release(sb);
mbt_ctx_release(sb); mbt_ctx_release(sb);
mbt_ext4_free_super_block(sb); mbt_ext4_free_super_block(sb);

View File

@@ -2876,7 +2876,7 @@ ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
EXT4_MB_GRP_NEED_INIT(grp) && EXT4_MB_GRP_NEED_INIT(grp) &&
ext4_free_group_clusters(sb, gdp) > 0 ) { ext4_free_group_clusters(sb, gdp) > 0 ) {
bh = ext4_read_block_bitmap_nowait(sb, group, true); bh = ext4_read_block_bitmap_nowait(sb, group, true);
if (bh && !IS_ERR(bh)) { if (!IS_ERR_OR_NULL(bh)) {
if (!buffer_uptodate(bh) && cnt) if (!buffer_uptodate(bh) && cnt)
(*cnt)++; (*cnt)++;
brelse(bh); brelse(bh);
@@ -4561,22 +4561,16 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
(req <= (size) || max <= (chunk_size)) (req <= (size) || max <= (chunk_size))
/* first, try to predict filesize */ /* first, try to predict filesize */
/* XXX: should this table be tunable? */
start_off = 0; start_off = 0;
if (size <= 16 * 1024) { if (size <= SZ_1M) {
size = 16 * 1024; /*
} else if (size <= 32 * 1024) { * For files up to 1MB, round up the preallocation size to
size = 32 * 1024; * the next power of two, with a minimum of 16KB.
} else if (size <= 64 * 1024) { */
size = 64 * 1024; if (size <= (unsigned long)SZ_16K)
} else if (size <= 128 * 1024) { size = SZ_16K;
size = 128 * 1024; else
} else if (size <= 256 * 1024) { size = roundup_pow_of_two(size);
size = 256 * 1024;
} else if (size <= 512 * 1024) {
size = 512 * 1024;
} else if (size <= 1024 * 1024) {
size = 1024 * 1024;
} else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) { } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
start_off = ((loff_t)ac->ac_o_ex.fe_logical >> start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
(21 - bsbits)) << 21; (21 - bsbits)) << 21;

View File

@@ -224,8 +224,8 @@ static int mext_move_begin(struct mext_data *mext, struct folio *folio[2],
} }
/* Adjust the moving length according to the length of shorter folio. */ /* Adjust the moving length according to the length of shorter folio. */
move_len = umin(folio_pos(folio[0]) + folio_size(folio[0]) - orig_pos, move_len = umin(folio_next_pos(folio[0]) - orig_pos,
folio_pos(folio[1]) + folio_size(folio[1]) - donor_pos); folio_next_pos(folio[1]) - donor_pos);
move_len >>= blkbits; move_len >>= blkbits;
if (move_len < mext->orig_map.m_len) if (move_len < mext->orig_map.m_len)
mext->orig_map.m_len = move_len; mext->orig_map.m_len = move_len;

View File

@@ -647,7 +647,7 @@ static struct stats dx_show_leaf(struct inode *dir,
/* Directory is not encrypted */ /* Directory is not encrypted */
(void) ext4fs_dirhash(dir, de->name, (void) ext4fs_dirhash(dir, de->name,
de->name_len, &h); de->name_len, &h);
printk("%*.s:(U)%x.%u ", len, printk("%.*s:(U)%x.%u ", len,
name, h.hash, name, h.hash,
(unsigned) ((char *) de (unsigned) ((char *) de
- base)); - base));
@@ -683,7 +683,7 @@ static struct stats dx_show_leaf(struct inode *dir,
(void) ext4fs_dirhash(dir, (void) ext4fs_dirhash(dir,
de->name, de->name,
de->name_len, &h); de->name_len, &h);
printk("%*.s:(E)%x.%u ", len, name, printk("%.*s:(E)%x.%u ", len, name,
h.hash, (unsigned) ((char *) de h.hash, (unsigned) ((char *) de
- base)); - base));
fscrypt_fname_free_buffer( fscrypt_fname_free_buffer(
@@ -694,7 +694,7 @@ static struct stats dx_show_leaf(struct inode *dir,
char *name = de->name; char *name = de->name;
(void) ext4fs_dirhash(dir, de->name, (void) ext4fs_dirhash(dir, de->name,
de->name_len, &h); de->name_len, &h);
printk("%*.s:%x.%u ", len, name, h.hash, printk("%.*s:%x.%u ", len, name, h.hash,
(unsigned) ((char *) de - base)); (unsigned) ((char *) de - base));
#endif #endif
} }
@@ -723,7 +723,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
struct stats stats; struct stats stats;
printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range);
bh = ext4_bread(NULL,dir, block, 0); bh = ext4_bread(NULL,dir, block, 0);
if (!bh || IS_ERR(bh)) if (IS_ERR_OR_NULL(bh))
continue; continue;
stats = levels? stats = levels?
dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
@@ -2353,10 +2353,10 @@ out_frames:
* may not sleep between calling this and putting something into * may not sleep between calling this and putting something into
* the entry, as someone else might have used it while you slept. * the entry, as someone else might have used it while you slept.
*/ */
static int ext4_add_entry(handle_t *handle, struct dentry *dentry, static int __ext4_add_entry(handle_t *handle, struct inode *dir,
const struct qstr *d_name,
struct inode *inode) struct inode *inode)
{ {
struct inode *dir = d_inode(dentry->d_parent);
struct buffer_head *bh = NULL; struct buffer_head *bh = NULL;
struct ext4_dir_entry_2 *de; struct ext4_dir_entry_2 *de;
struct super_block *sb; struct super_block *sb;
@@ -2373,13 +2373,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
sb = dir->i_sb; sb = dir->i_sb;
blocksize = sb->s_blocksize; blocksize = sb->s_blocksize;
if (fscrypt_is_nokey_name(dentry)) if (!generic_ci_validate_strict_name(dir, d_name))
return -ENOKEY;
if (!generic_ci_validate_strict_name(dir, &dentry->d_name))
return -EINVAL; return -EINVAL;
retval = ext4_fname_setup_filename(dir, &dentry->d_name, 0, &fname); retval = ext4_fname_setup_filename(dir, d_name, 0, &fname);
if (retval) if (retval)
return retval; return retval;
@@ -2460,6 +2457,16 @@ out:
return retval; return retval;
} }
static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode)
{
struct inode *dir = d_inode(dentry->d_parent);
if (fscrypt_is_nokey_name(dentry))
return -ENOKEY;
return __ext4_add_entry(handle, dir, &dentry->d_name, inode);
}
/* /*
* Returns 0 for success, or a negative error value * Returns 0 for success, or a negative error value
*/ */
@@ -3445,7 +3452,8 @@ out_retry:
return err; return err;
} }
int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry) int __ext4_link(struct inode *dir, struct inode *inode,
const struct qstr *d_name, struct dentry *dentry)
{ {
handle_t *handle; handle_t *handle;
int err, retries = 0; int err, retries = 0;
@@ -3461,9 +3469,8 @@ retry:
inode_set_ctime_current(inode); inode_set_ctime_current(inode);
ext4_inc_count(inode); ext4_inc_count(inode);
ihold(inode);
err = ext4_add_entry(handle, dentry, inode); err = __ext4_add_entry(handle, dir, d_name, inode);
if (!err) { if (!err) {
err = ext4_mark_inode_dirty(handle, inode); err = ext4_mark_inode_dirty(handle, inode);
/* this can happen only for tmpfile being /* this can happen only for tmpfile being
@@ -3471,11 +3478,10 @@ retry:
*/ */
if (inode->i_nlink == 1) if (inode->i_nlink == 1)
ext4_orphan_del(handle, inode); ext4_orphan_del(handle, inode);
d_instantiate(dentry, inode); if (dentry)
ext4_fc_track_link(handle, dentry); ext4_fc_track_link(handle, inode, dentry);
} else { } else {
drop_nlink(inode); drop_nlink(inode);
iput(inode);
} }
ext4_journal_stop(handle); ext4_journal_stop(handle);
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
@@ -3504,9 +3510,13 @@ static int ext4_link(struct dentry *old_dentry,
err = dquot_initialize(dir); err = dquot_initialize(dir);
if (err) if (err)
return err; return err;
return __ext4_link(dir, inode, dentry); err = __ext4_link(dir, inode, &dentry->d_name, dentry);
if (!err) {
ihold(inode);
d_instantiate(dentry, inode);
}
return err;
} }
/* /*
* Try to find buffer head where contains the parent block. * Try to find buffer head where contains the parent block.
* It should be the inode block if it is inlined or the 1st block * It should be the inode block if it is inlined or the 1st block

View File

@@ -521,6 +521,7 @@ static bool ext4_journalled_writepage_needs_redirty(struct jbd2_inode *jinode,
{ {
struct buffer_head *bh, *head; struct buffer_head *bh, *head;
struct journal_head *jh; struct journal_head *jh;
transaction_t *trans = READ_ONCE(jinode->i_transaction);
bh = head = folio_buffers(folio); bh = head = folio_buffers(folio);
do { do {
@@ -539,7 +540,7 @@ static bool ext4_journalled_writepage_needs_redirty(struct jbd2_inode *jinode,
*/ */
jh = bh2jh(bh); jh = bh2jh(bh);
if (buffer_dirty(bh) || if (buffer_dirty(bh) ||
(jh && (jh->b_transaction != jinode->i_transaction || (jh && (jh->b_transaction != trans ||
jh->b_next_transaction))) jh->b_next_transaction)))
return true; return true;
} while ((bh = bh->b_this_page) != head); } while ((bh = bh->b_this_page) != head);
@@ -550,15 +551,20 @@ static bool ext4_journalled_writepage_needs_redirty(struct jbd2_inode *jinode,
static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode) static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode)
{ {
struct address_space *mapping = jinode->i_vfs_inode->i_mapping; struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
loff_t range_start, range_end;
struct writeback_control wbc = { struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL, .sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX, .nr_to_write = LONG_MAX,
.range_start = jinode->i_dirty_start, };
.range_end = jinode->i_dirty_end,
};
struct folio *folio = NULL; struct folio *folio = NULL;
int error; int error;
if (!jbd2_jinode_get_dirty_range(jinode, &range_start, &range_end))
return 0;
wbc.range_start = range_start;
wbc.range_end = range_end;
/* /*
* writeback_iter() already checks for dirty pages and calls * writeback_iter() already checks for dirty pages and calls
* folio_clear_dirty_for_io(), which we want to write protect the * folio_clear_dirty_for_io(), which we want to write protect the

View File

@@ -92,7 +92,7 @@ static const char *ext4_get_link(struct dentry *dentry, struct inode *inode,
if (!dentry) { if (!dentry) {
bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT); bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT);
if (IS_ERR(bh) || !bh) if (IS_ERR_OR_NULL(bh))
return ERR_PTR(-ECHILD); return ERR_PTR(-ECHILD);
if (!ext4_buffer_uptodate(bh)) { if (!ext4_buffer_uptodate(bh)) {
brelse(bh); brelse(bh);

View File

@@ -226,7 +226,7 @@ check_xattrs(struct inode *inode, struct buffer_head *bh,
/* Find the end of the names list */ /* Find the end of the names list */
while (!IS_LAST_ENTRY(e)) { while (!IS_LAST_ENTRY(e)) {
struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e); struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
if ((void *)next >= end) { if ((void *)next + sizeof(u32) > end) {
err_str = "e_name out of bounds"; err_str = "e_name out of bounds";
goto errout; goto errout;
} }
@@ -1165,7 +1165,7 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
{ {
struct inode *ea_inode; struct inode *ea_inode;
struct ext4_xattr_entry *entry; struct ext4_xattr_entry *entry;
struct ext4_iloc iloc; struct ext4_iloc iloc = { .bh = NULL };
bool dirty = false; bool dirty = false;
unsigned int ea_ino; unsigned int ea_ino;
int err; int err;
@@ -1260,6 +1260,8 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
ext4_warning_inode(parent, ext4_warning_inode(parent,
"handle dirty metadata err=%d", err); "handle dirty metadata err=%d", err);
} }
brelse(iloc.bh);
} }
/* /*

View File

@@ -180,7 +180,13 @@ static int journal_wait_on_commit_record(journal_t *journal,
/* Send all the data buffers related to an inode */ /* Send all the data buffers related to an inode */
int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode) int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode)
{ {
if (!jinode || !(jinode->i_flags & JI_WRITE_DATA)) unsigned long flags;
if (!jinode)
return 0;
flags = READ_ONCE(jinode->i_flags);
if (!(flags & JI_WRITE_DATA))
return 0; return 0;
trace_jbd2_submit_inode_data(jinode->i_vfs_inode); trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
@@ -191,12 +197,30 @@ EXPORT_SYMBOL(jbd2_submit_inode_data);
int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode) int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
{ {
if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) || struct address_space *mapping;
!jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping) struct inode *inode;
unsigned long flags;
loff_t start_byte, end_byte;
if (!jinode)
return 0;
flags = READ_ONCE(jinode->i_flags);
if (!(flags & JI_WAIT_DATA))
return 0;
inode = jinode->i_vfs_inode;
if (!inode)
return 0;
mapping = inode->i_mapping;
if (!mapping)
return 0;
if (!jbd2_jinode_get_dirty_range(jinode, &start_byte, &end_byte))
return 0; return 0;
return filemap_fdatawait_range_keep_errors( return filemap_fdatawait_range_keep_errors(
jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start, mapping, start_byte, end_byte);
jinode->i_dirty_end);
} }
EXPORT_SYMBOL(jbd2_wait_inode_data); EXPORT_SYMBOL(jbd2_wait_inode_data);
@@ -218,7 +242,8 @@ static int journal_submit_data_buffers(journal_t *journal,
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
if (!(jinode->i_flags & JI_WRITE_DATA)) if (!(jinode->i_flags & JI_WRITE_DATA))
continue; continue;
jinode->i_flags |= JI_COMMIT_RUNNING; WRITE_ONCE(jinode->i_flags,
jinode->i_flags | JI_COMMIT_RUNNING);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
/* submit the inode data buffers. */ /* submit the inode data buffers. */
trace_jbd2_submit_inode_data(jinode->i_vfs_inode); trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
@@ -229,7 +254,8 @@ static int journal_submit_data_buffers(journal_t *journal,
} }
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
J_ASSERT(jinode->i_transaction == commit_transaction); J_ASSERT(jinode->i_transaction == commit_transaction);
jinode->i_flags &= ~JI_COMMIT_RUNNING; WRITE_ONCE(jinode->i_flags,
jinode->i_flags & ~JI_COMMIT_RUNNING);
smp_mb(); smp_mb();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
} }
@@ -240,10 +266,13 @@ static int journal_submit_data_buffers(journal_t *journal,
int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode) int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
{ {
struct address_space *mapping = jinode->i_vfs_inode->i_mapping; struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
loff_t start_byte, end_byte;
if (!jbd2_jinode_get_dirty_range(jinode, &start_byte, &end_byte))
return 0;
return filemap_fdatawait_range_keep_errors(mapping, return filemap_fdatawait_range_keep_errors(mapping,
jinode->i_dirty_start, start_byte, end_byte);
jinode->i_dirty_end);
} }
/* /*
@@ -262,7 +291,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
if (!(jinode->i_flags & JI_WAIT_DATA)) if (!(jinode->i_flags & JI_WAIT_DATA))
continue; continue;
jinode->i_flags |= JI_COMMIT_RUNNING; WRITE_ONCE(jinode->i_flags, jinode->i_flags | JI_COMMIT_RUNNING);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
/* wait for the inode data buffers writeout. */ /* wait for the inode data buffers writeout. */
if (journal->j_finish_inode_data_buffers) { if (journal->j_finish_inode_data_buffers) {
@@ -272,7 +301,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
} }
cond_resched(); cond_resched();
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
jinode->i_flags &= ~JI_COMMIT_RUNNING; WRITE_ONCE(jinode->i_flags, jinode->i_flags & ~JI_COMMIT_RUNNING);
smp_mb(); smp_mb();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
} }
@@ -288,8 +317,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
&jinode->i_transaction->t_inode_list); &jinode->i_transaction->t_inode_list);
} else { } else {
jinode->i_transaction = NULL; jinode->i_transaction = NULL;
jinode->i_dirty_start = 0; WRITE_ONCE(jinode->i_dirty_start_page, 0);
jinode->i_dirty_end = 0; WRITE_ONCE(jinode->i_dirty_end_page, 0);
} }
} }
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);

View File

@@ -3018,8 +3018,8 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
jinode->i_next_transaction = NULL; jinode->i_next_transaction = NULL;
jinode->i_vfs_inode = inode; jinode->i_vfs_inode = inode;
jinode->i_flags = 0; jinode->i_flags = 0;
jinode->i_dirty_start = 0; jinode->i_dirty_start_page = 0;
jinode->i_dirty_end = 0; jinode->i_dirty_end_page = 0;
INIT_LIST_HEAD(&jinode->i_list); INIT_LIST_HEAD(&jinode->i_list);
} }
@@ -3176,4 +3176,3 @@ MODULE_DESCRIPTION("Generic filesystem journal-writing module");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
module_init(journal_init); module_init(journal_init);
module_exit(journal_exit); module_exit(journal_exit);

View File

@@ -428,6 +428,7 @@ void jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
journal_t *journal = handle->h_transaction->t_journal; journal_t *journal = handle->h_transaction->t_journal;
int need_cancel; int need_cancel;
struct buffer_head *bh = jh2bh(jh); struct buffer_head *bh = jh2bh(jh);
struct address_space *bh_mapping = bh->b_folio->mapping;
jbd2_debug(4, "journal_head %p, cancelling revoke\n", jh); jbd2_debug(4, "journal_head %p, cancelling revoke\n", jh);
@@ -464,13 +465,14 @@ void jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
* buffer_head? If so, we'd better make sure we clear the * buffer_head? If so, we'd better make sure we clear the
* revoked status on any hashed alias too, otherwise the revoke * revoked status on any hashed alias too, otherwise the revoke
* state machine will get very upset later on. */ * state machine will get very upset later on. */
if (need_cancel) { if (need_cancel && !sb_is_blkdev_sb(bh_mapping->host->i_sb)) {
struct buffer_head *bh2; struct buffer_head *bh2;
bh2 = __find_get_block_nonatomic(bh->b_bdev, bh->b_blocknr, bh2 = __find_get_block_nonatomic(bh->b_bdev, bh->b_blocknr,
bh->b_size); bh->b_size);
if (bh2) { if (bh2) {
if (bh2 != bh) WARN_ON_ONCE(bh2 == bh);
clear_buffer_revoked(bh2); clear_buffer_revoked(bh2);
__brelse(bh2); __brelse(bh2);
} }
} }

View File

@@ -474,7 +474,8 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
return ERR_PTR(-EROFS); return ERR_PTR(-EROFS);
if (handle) { if (handle) {
J_ASSERT(handle->h_transaction->t_journal == journal); if (WARN_ON_ONCE(handle->h_transaction->t_journal != journal))
return ERR_PTR(-EINVAL);
handle->h_ref++; handle->h_ref++;
return handle; return handle;
} }
@@ -1036,7 +1037,13 @@ repeat:
*/ */
if (!jh->b_transaction) { if (!jh->b_transaction) {
JBUFFER_TRACE(jh, "no transaction"); JBUFFER_TRACE(jh, "no transaction");
J_ASSERT_JH(jh, !jh->b_next_transaction); if (WARN_ON_ONCE(jh->b_next_transaction)) {
spin_unlock(&jh->b_state_lock);
unlock_buffer(bh);
error = -EINVAL;
jbd2_journal_abort(journal, error);
goto out;
}
JBUFFER_TRACE(jh, "file as BJ_Reserved"); JBUFFER_TRACE(jh, "file as BJ_Reserved");
/* /*
* Make sure all stores to jh (b_modified, b_frozen_data) are * Make sure all stores to jh (b_modified, b_frozen_data) are
@@ -1069,13 +1076,27 @@ repeat:
*/ */
if (jh->b_frozen_data) { if (jh->b_frozen_data) {
JBUFFER_TRACE(jh, "has frozen data"); JBUFFER_TRACE(jh, "has frozen data");
J_ASSERT_JH(jh, jh->b_next_transaction == NULL); if (WARN_ON_ONCE(jh->b_next_transaction)) {
spin_unlock(&jh->b_state_lock);
error = -EINVAL;
jbd2_journal_abort(journal, error);
goto out;
}
goto attach_next; goto attach_next;
} }
JBUFFER_TRACE(jh, "owned by older transaction"); JBUFFER_TRACE(jh, "owned by older transaction");
J_ASSERT_JH(jh, jh->b_next_transaction == NULL); if (WARN_ON_ONCE(jh->b_next_transaction ||
J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction); jh->b_transaction !=
journal->j_committing_transaction)) {
pr_err("JBD2: %s: assertion failure: b_next_transaction=%p b_transaction=%p j_committing_transaction=%p\n",
journal->j_devname, jh->b_next_transaction,
jh->b_transaction, journal->j_committing_transaction);
spin_unlock(&jh->b_state_lock);
error = -EINVAL;
jbd2_journal_abort(journal, error);
goto out;
}
/* /*
* There is one case we have to be very careful about. If the * There is one case we have to be very careful about. If the
@@ -1302,7 +1323,12 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
goto out; goto out;
} }
J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); if (WARN_ON_ONCE(!buffer_locked(jh2bh(jh)))) {
err = -EINVAL;
spin_unlock(&jh->b_state_lock);
jbd2_journal_abort(journal, err);
goto out;
}
if (jh->b_transaction == NULL) { if (jh->b_transaction == NULL) {
/* /*
@@ -1491,7 +1517,7 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
{ {
transaction_t *transaction = handle->h_transaction; transaction_t *transaction = handle->h_transaction;
journal_t *journal; journal_t *journal = transaction->t_journal;
struct journal_head *jh; struct journal_head *jh;
int ret = 0; int ret = 0;
@@ -1515,8 +1541,14 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
if (data_race(jh->b_transaction != transaction && if (data_race(jh->b_transaction != transaction &&
jh->b_next_transaction != transaction)) { jh->b_next_transaction != transaction)) {
spin_lock(&jh->b_state_lock); spin_lock(&jh->b_state_lock);
J_ASSERT_JH(jh, jh->b_transaction == transaction || if (WARN_ON_ONCE(jh->b_transaction != transaction &&
jh->b_next_transaction == transaction); jh->b_next_transaction != transaction)) {
pr_err("JBD2: %s: assertion failure: b_transaction=%p transaction=%p b_next_transaction=%p\n",
journal->j_devname, jh->b_transaction,
transaction, jh->b_next_transaction);
ret = -EINVAL;
goto out_unlock_bh;
}
spin_unlock(&jh->b_state_lock); spin_unlock(&jh->b_state_lock);
} }
if (data_race(jh->b_modified == 1)) { if (data_race(jh->b_modified == 1)) {
@@ -1524,15 +1556,15 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
if (data_race(jh->b_transaction == transaction && if (data_race(jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata)) { jh->b_jlist != BJ_Metadata)) {
spin_lock(&jh->b_state_lock); spin_lock(&jh->b_state_lock);
if (jh->b_transaction == transaction && if (WARN_ON_ONCE(jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata) jh->b_jlist != BJ_Metadata)) {
pr_err("JBD2: assertion failure: h_type=%u " pr_err("JBD2: assertion failure: h_type=%u h_line_no=%u block_no=%llu jlist=%u\n",
"h_line_no=%u block_no=%llu jlist=%u\n",
handle->h_type, handle->h_line_no, handle->h_type, handle->h_line_no,
(unsigned long long) bh->b_blocknr, (unsigned long long) bh->b_blocknr,
jh->b_jlist); jh->b_jlist);
J_ASSERT_JH(jh, jh->b_transaction != transaction || ret = -EINVAL;
jh->b_jlist == BJ_Metadata); goto out_unlock_bh;
}
spin_unlock(&jh->b_state_lock); spin_unlock(&jh->b_state_lock);
} }
goto out; goto out;
@@ -1552,8 +1584,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
goto out_unlock_bh; goto out_unlock_bh;
} }
journal = transaction->t_journal;
if (jh->b_modified == 0) { if (jh->b_modified == 0) {
/* /*
* This buffer's got modified and becoming part * This buffer's got modified and becoming part
@@ -1631,7 +1661,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
} }
/* That test should have eliminated the following case: */ /* That test should have eliminated the following case: */
J_ASSERT_JH(jh, jh->b_frozen_data == NULL); if (WARN_ON_ONCE(jh->b_frozen_data)) {
ret = -EINVAL;
goto out_unlock_bh;
}
JBUFFER_TRACE(jh, "file as BJ_Metadata"); JBUFFER_TRACE(jh, "file as BJ_Metadata");
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
@@ -1670,6 +1703,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
int err = 0; int err = 0;
int was_modified = 0; int was_modified = 0;
int wait_for_writeback = 0; int wait_for_writeback = 0;
int abort_journal = 0;
if (is_handle_aborted(handle)) if (is_handle_aborted(handle))
return -EROFS; return -EROFS;
@@ -1703,7 +1737,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
jh->b_modified = 0; jh->b_modified = 0;
if (jh->b_transaction == transaction) { if (jh->b_transaction == transaction) {
J_ASSERT_JH(jh, !jh->b_frozen_data); if (WARN_ON_ONCE(jh->b_frozen_data)) {
err = -EINVAL;
abort_journal = 1;
goto drop;
}
/* If we are forgetting a buffer which is already part /* If we are forgetting a buffer which is already part
* of this transaction, then we can just drop it from * of this transaction, then we can just drop it from
@@ -1742,8 +1780,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
} }
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
} else if (jh->b_transaction) { } else if (jh->b_transaction) {
J_ASSERT_JH(jh, (jh->b_transaction == if (WARN_ON_ONCE(jh->b_transaction != journal->j_committing_transaction)) {
journal->j_committing_transaction)); err = -EINVAL;
abort_journal = 1;
goto drop;
}
/* However, if the buffer is still owned by a prior /* However, if the buffer is still owned by a prior
* (committing) transaction, we can't drop it yet... */ * (committing) transaction, we can't drop it yet... */
JBUFFER_TRACE(jh, "belongs to older transaction"); JBUFFER_TRACE(jh, "belongs to older transaction");
@@ -1761,7 +1802,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
jh->b_next_transaction = transaction; jh->b_next_transaction = transaction;
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
} else { } else {
J_ASSERT(jh->b_next_transaction == transaction); if (WARN_ON_ONCE(jh->b_next_transaction != transaction)) {
err = -EINVAL;
abort_journal = 1;
goto drop;
}
/* /*
* only drop a reference if this transaction modified * only drop a reference if this transaction modified
@@ -1807,6 +1852,8 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
drop: drop:
__brelse(bh); __brelse(bh);
spin_unlock(&jh->b_state_lock); spin_unlock(&jh->b_state_lock);
if (abort_journal)
jbd2_journal_abort(journal, err);
if (wait_for_writeback) if (wait_for_writeback)
wait_on_buffer(bh); wait_on_buffer(bh);
jbd2_journal_put_journal_head(jh); jbd2_journal_put_journal_head(jh);
@@ -2131,7 +2178,8 @@ bool jbd2_journal_try_to_free_buffers(journal_t *journal, struct folio *folio)
struct buffer_head *bh; struct buffer_head *bh;
bool ret = false; bool ret = false;
J_ASSERT(folio_test_locked(folio)); if (WARN_ON_ONCE(!folio_test_locked(folio)))
return false;
head = folio_buffers(folio); head = folio_buffers(folio);
bh = head; bh = head;
@@ -2646,6 +2694,9 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
{ {
transaction_t *transaction = handle->h_transaction; transaction_t *transaction = handle->h_transaction;
journal_t *journal; journal_t *journal;
pgoff_t start_page, end_page;
int err = 0;
int abort_transaction = 0;
if (is_handle_aborted(handle)) if (is_handle_aborted(handle))
return -EROFS; return -EROFS;
@@ -2654,15 +2705,21 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
jbd2_debug(4, "Adding inode %llu, tid:%d\n", jinode->i_vfs_inode->i_ino, jbd2_debug(4, "Adding inode %llu, tid:%d\n", jinode->i_vfs_inode->i_ino,
transaction->t_tid); transaction->t_tid);
spin_lock(&journal->j_list_lock); start_page = (pgoff_t)(start_byte >> PAGE_SHIFT);
jinode->i_flags |= flags; end_page = (pgoff_t)(end_byte >> PAGE_SHIFT) + 1;
if (jinode->i_dirty_end) { spin_lock(&journal->j_list_lock);
jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte); WRITE_ONCE(jinode->i_flags, jinode->i_flags | flags);
jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
if (jinode->i_dirty_start_page != jinode->i_dirty_end_page) {
WRITE_ONCE(jinode->i_dirty_start_page,
min(jinode->i_dirty_start_page, start_page));
WRITE_ONCE(jinode->i_dirty_end_page,
max(jinode->i_dirty_end_page, end_page));
} else { } else {
jinode->i_dirty_start = start_byte; /* Publish a new non-empty range by making end visible first. */
jinode->i_dirty_end = end_byte; WRITE_ONCE(jinode->i_dirty_end_page, end_page);
WRITE_ONCE(jinode->i_dirty_start_page, start_page);
} }
/* Is inode already attached where we need it? */ /* Is inode already attached where we need it? */
@@ -2680,20 +2737,33 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
/* On some different transaction's list - should be /* On some different transaction's list - should be
* the committing one */ * the committing one */
if (jinode->i_transaction) { if (jinode->i_transaction) {
J_ASSERT(jinode->i_next_transaction == NULL); if (WARN_ON_ONCE(jinode->i_next_transaction ||
J_ASSERT(jinode->i_transaction == jinode->i_transaction !=
journal->j_committing_transaction); journal->j_committing_transaction)) {
pr_err("JBD2: %s: assertion failure: i_next_transaction=%p i_transaction=%p j_committing_transaction=%p\n",
journal->j_devname, jinode->i_next_transaction,
jinode->i_transaction,
journal->j_committing_transaction);
err = -EINVAL;
abort_transaction = 1;
goto done;
}
jinode->i_next_transaction = transaction; jinode->i_next_transaction = transaction;
goto done; goto done;
} }
/* Not on any transaction list... */ /* Not on any transaction list... */
J_ASSERT(!jinode->i_next_transaction); if (WARN_ON_ONCE(jinode->i_next_transaction)) {
err = -EINVAL;
abort_transaction = 1;
goto done;
}
jinode->i_transaction = transaction; jinode->i_transaction = transaction;
list_add(&jinode->i_list, &transaction->t_inode_list); list_add(&jinode->i_list, &transaction->t_inode_list);
done: done:
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
if (abort_transaction)
return 0; jbd2_journal_abort(journal, err);
return err;
} }
int jbd2_journal_inode_ranged_write(handle_t *handle, int jbd2_journal_inode_ranged_write(handle_t *handle,
@@ -2739,7 +2809,7 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
int ret = 0; int ret = 0;
/* This is a quick check to avoid locking if not necessary */ /* This is a quick check to avoid locking if not necessary */
if (!jinode->i_transaction) if (!READ_ONCE(jinode->i_transaction))
goto out; goto out;
/* Locks are here just to force reading of recent values, it is /* Locks are here just to force reading of recent values, it is
* enough that the transaction was not committing before we started * enough that the transaction was not committing before we started

View File

@@ -899,8 +899,13 @@ bail:
static int ocfs2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode) static int ocfs2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
{ {
return filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping, struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
jinode->i_dirty_start, jinode->i_dirty_end); loff_t range_start, range_end;
if (!jbd2_jinode_get_dirty_range(jinode, &range_start, &range_end))
return 0;
return filemap_fdatawrite_range(mapping, range_start, range_end);
} }
int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty) int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)

View File

@@ -429,22 +429,46 @@ struct jbd2_inode {
unsigned long i_flags; unsigned long i_flags;
/** /**
* @i_dirty_start: * @i_dirty_start_page:
*
* Dirty range start in PAGE_SIZE units.
*
* The dirty range is empty if @i_dirty_start_page is greater than or
* equal to @i_dirty_end_page.
* *
* Offset in bytes where the dirty range for this inode starts.
* [j_list_lock] * [j_list_lock]
*/ */
loff_t i_dirty_start; pgoff_t i_dirty_start_page;
/** /**
* @i_dirty_end: * @i_dirty_end_page:
* *
* Inclusive offset in bytes where the dirty range for this inode * Dirty range end in PAGE_SIZE units (exclusive).
* ends. [j_list_lock] *
* [j_list_lock]
*/ */
loff_t i_dirty_end; pgoff_t i_dirty_end_page;
}; };
/*
* Lockless readers treat start_page >= end_page as an empty range.
* Writers publish a new non-empty range by storing i_dirty_end_page before
* i_dirty_start_page.
*/
static inline bool jbd2_jinode_get_dirty_range(const struct jbd2_inode *jinode,
loff_t *start, loff_t *end)
{
pgoff_t start_page = READ_ONCE(jinode->i_dirty_start_page);
pgoff_t end_page = READ_ONCE(jinode->i_dirty_end_page);
if (start_page >= end_page)
return false;
*start = (loff_t)start_page << PAGE_SHIFT;
*end = ((loff_t)end_page << PAGE_SHIFT) - 1;
return true;
}
struct jbd2_revoke_table_s; struct jbd2_revoke_table_s;
/** /**