Merge tag 'ext4_for_linux-7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:

 - Refactor code paths involved with partial block zero-out in
   prearation for converting ext4 to use iomap for buffered writes

 - Remove use of d_alloc() from ext4 in preparation for the deprecation
   of this interface

 - Replace some J_ASSERTS with a journal abort so we can avoid a kernel
   panic for a localized file system error

 - Simplify various code paths in mballoc, move_extent, and fast commit

 - Fix rare deadlock in jbd2_journal_cancel_revoke() that can be
   triggered by generic/013 when blocksize < pagesize

 - Fix memory leak when releasing an extended attribute when its value
   is stored in an ea_inode

 - Fix various potential kunit test bugs in fs/ext4/extents.c

 - Fix potential out-of-bounds access in check_xattr() with a corrupted
   file system

 - Make the jbd2_inode dirty range tracking safe for lockless reads

 - Avoid a WARN_ON when writeback files due to a corrupted file system;
   we already print an ext4 warning indicatign that data will be lost,
   so the WARN_ON is not necessary and doesn't add any new information

* tag 'ext4_for_linux-7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (37 commits)
  jbd2: fix deadlock in jbd2_journal_cancel_revoke()
  ext4: fix missing brelse() in ext4_xattr_inode_dec_ref_all()
  ext4: fix possible null-ptr-deref in mbt_kunit_exit()
  ext4: fix possible null-ptr-deref in extents_kunit_exit()
  ext4: fix the error handling process in extents_kunit_init).
  ext4: call deactivate_super() in extents_kunit_exit()
  ext4: fix miss unlock 'sb->s_umount' in extents_kunit_init()
  ext4: fix bounds check in check_xattrs() to prevent out-of-bounds access
  ext4: zero post-EOF partial block before appending write
  ext4: move pagecache_isize_extended() out of active handle
  ext4: remove ctime/mtime update from ext4_alloc_file_blocks()
  ext4: unify SYNC mode checks in fallocate paths
  ext4: ensure zeroed partial blocks are persisted in SYNC mode
  ext4: move zero partial block range functions out of active handle
  ext4: pass allocate range as loff_t to ext4_alloc_file_blocks()
  ext4: remove handle parameters from zero partial block functions
  ext4: move ordered data handling out of ext4_block_do_zero_range()
  ext4: rename ext4_block_zero_page_range() to ext4_block_zero_range()
  ext4: factor out journalled block zeroing range
  ext4: rename and extend ext4_block_truncate_page()
  ...
This commit is contained in:
Linus Torvalds
2026-04-17 17:08:31 -07:00
19 changed files with 633 additions and 389 deletions

View File

@@ -28,7 +28,6 @@
#include <linux/seqlock.h>
#include <linux/mutex.h>
#include <linux/timer.h>
#include <linux/wait.h>
#include <linux/sched/signal.h>
#include <linux/blockgroup_lock.h>
#include <linux/percpu_counter.h>
@@ -1082,9 +1081,6 @@ struct ext4_inode_info {
spinlock_t i_raw_lock; /* protects updates to the raw inode */
/* Fast commit wait queue for this inode */
wait_queue_head_t i_fc_wait;
/*
* Protect concurrent accesses on i_fc_lblk_start, i_fc_lblk_len
* and inode's EXT4_FC_STATE_COMMITTING state bit.
@@ -2976,7 +2972,8 @@ void __ext4_fc_track_unlink(handle_t *handle, struct inode *inode,
void __ext4_fc_track_link(handle_t *handle, struct inode *inode,
struct dentry *dentry);
void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry);
void ext4_fc_track_link(handle_t *handle, struct dentry *dentry);
void ext4_fc_track_link(handle_t *handle, struct inode *inode,
struct dentry *dentry);
void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
struct dentry *dentry);
void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
@@ -3101,8 +3098,9 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_chunk_trans_extent(struct inode *inode, int nrblocks);
extern int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
int pextents);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
loff_t lstart, loff_t lend);
extern int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end);
extern int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart,
loff_t length, bool *did_zero);
extern vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
@@ -3721,7 +3719,7 @@ extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
struct inode *inode, struct dentry *dentry);
extern int __ext4_link(struct inode *dir, struct inode *inode,
struct dentry *dentry);
const struct qstr *d_name, struct dentry *dentry);
#define S_SHIFT 12
static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {

View File

@@ -142,10 +142,14 @@ static struct file_system_type ext_fs_type = {
static void extents_kunit_exit(struct kunit *test)
{
struct super_block *sb = k_ctx.k_ei->vfs_inode.i_sb;
struct ext4_sb_info *sbi = sb->s_fs_info;
struct ext4_sb_info *sbi;
if (!k_ctx.k_ei)
return;
sbi = k_ctx.k_ei->vfs_inode.i_sb->s_fs_info;
ext4_es_unregister_shrinker(sbi);
deactivate_super(sbi->s_sb);
kfree(sbi);
kfree(k_ctx.k_ei);
kfree(k_ctx.k_data);
@@ -224,33 +228,37 @@ static int extents_kunit_init(struct kunit *test)
(struct kunit_ext_test_param *)(test->param_value);
int err;
sb = sget(&ext_fs_type, NULL, ext_set, 0, NULL);
if (IS_ERR(sb))
return PTR_ERR(sb);
sb->s_blocksize = 4096;
sb->s_blocksize_bits = 12;
sbi = kzalloc_obj(struct ext4_sb_info);
if (sbi == NULL)
return -ENOMEM;
sb = sget(&ext_fs_type, NULL, ext_set, 0, NULL);
if (IS_ERR(sb)) {
kfree(sbi);
return PTR_ERR(sb);
}
sbi->s_sb = sb;
sb->s_fs_info = sbi;
sb->s_blocksize = 4096;
sb->s_blocksize_bits = 12;
if (!param || !param->disable_zeroout)
sbi->s_extent_max_zeroout_kb = 32;
/* setup the mock inode */
k_ctx.k_ei = kzalloc_obj(struct ext4_inode_info);
if (k_ctx.k_ei == NULL)
return -ENOMEM;
ei = k_ctx.k_ei;
inode = &ei->vfs_inode;
err = ext4_es_register_shrinker(sbi);
if (err)
return err;
goto out_deactivate;
/* setup the mock inode */
k_ctx.k_ei = kzalloc_obj(struct ext4_inode_info);
if (k_ctx.k_ei == NULL) {
err = -ENOMEM;
goto out;
}
ei = k_ctx.k_ei;
inode = &ei->vfs_inode;
ext4_es_init_tree(&ei->i_es_tree);
rwlock_init(&ei->i_es_lock);
@@ -266,8 +274,10 @@ static int extents_kunit_init(struct kunit *test)
inode->i_sb = sb;
k_ctx.k_data = kzalloc(EXT_DATA_LEN * 4096, GFP_KERNEL);
if (k_ctx.k_data == NULL)
return -ENOMEM;
if (k_ctx.k_data == NULL) {
err = -ENOMEM;
goto out;
}
/*
* set the data area to a junk value
@@ -309,7 +319,23 @@ static int extents_kunit_init(struct kunit *test)
kunit_activate_static_stub(test, ext4_ext_zeroout, ext4_ext_zeroout_stub);
kunit_activate_static_stub(test, ext4_issue_zeroout,
ext4_issue_zeroout_stub);
up_write(&sb->s_umount);
return 0;
out:
kfree(k_ctx.k_ei);
k_ctx.k_ei = NULL;
kfree(k_ctx.k_data);
k_ctx.k_data = NULL;
ext4_es_unregister_shrinker(sbi);
out_deactivate:
deactivate_locked_super(sb);
kfree(sbi);
return err;
}
/*

View File

@@ -4571,30 +4571,30 @@ retry_remove_space:
return err;
}
static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
ext4_lblk_t len, loff_t new_size,
int flags)
static int ext4_alloc_file_blocks(struct file *file, loff_t offset, loff_t len,
loff_t new_size, int flags)
{
struct inode *inode = file_inode(file);
handle_t *handle;
int ret = 0, ret2 = 0, ret3 = 0;
int retries = 0;
int depth = 0;
ext4_lblk_t len_lblk;
struct ext4_map_blocks map;
unsigned int credits;
loff_t epos, old_size = i_size_read(inode);
loff_t epos = 0, old_size = i_size_read(inode);
unsigned int blkbits = inode->i_blkbits;
bool alloc_zero = false;
BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
map.m_lblk = offset;
map.m_len = len;
map.m_lblk = offset >> blkbits;
map.m_len = len_lblk = EXT4_MAX_BLOCKS(len, offset, blkbits);
/*
* Don't normalize the request if it can fit in one extent so
* that it doesn't get unnecessarily split into multiple
* extents.
*/
if (len <= EXT_UNWRITTEN_MAX_LEN)
if (len_lblk <= EXT_UNWRITTEN_MAX_LEN)
flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
/*
@@ -4611,16 +4611,23 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
/*
* credits to insert 1 extent into extent tree
*/
credits = ext4_chunk_trans_blocks(inode, len);
credits = ext4_chunk_trans_blocks(inode, len_lblk);
depth = ext_depth(inode);
/* Zero to the end of the block containing i_size */
if (new_size > old_size) {
ret = ext4_block_zero_eof(inode, old_size, LLONG_MAX);
if (ret)
return ret;
}
retry:
while (len) {
while (len_lblk) {
/*
* Recalculate credits when extent tree depth changes.
*/
if (depth != ext_depth(inode)) {
credits = ext4_chunk_trans_blocks(inode, len);
credits = ext4_chunk_trans_blocks(inode, len_lblk);
depth = ext_depth(inode);
}
@@ -4640,50 +4647,60 @@ retry:
ext4_journal_stop(handle);
break;
}
ext4_update_inode_fsync_trans(handle, inode, 1);
ret = ext4_journal_stop(handle);
if (unlikely(ret))
break;
/*
* allow a full retry cycle for any remaining allocations
*/
retries = 0;
epos = EXT4_LBLK_TO_B(inode, map.m_lblk + ret);
inode_set_ctime_current(inode);
if (new_size) {
if (alloc_zero &&
(map.m_flags & (EXT4_MAP_MAPPED | EXT4_MAP_UNWRITTEN))) {
ret = ext4_issue_zeroout(inode, map.m_lblk, map.m_pblk,
map.m_len);
if (likely(!ret))
ret = ext4_convert_unwritten_extents(NULL,
inode, (loff_t)map.m_lblk << blkbits,
(loff_t)map.m_len << blkbits);
if (ret)
break;
}
map.m_lblk += map.m_len;
map.m_len = len_lblk = len_lblk - map.m_len;
epos = EXT4_LBLK_TO_B(inode, map.m_lblk);
}
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
if (!epos || !new_size)
return ret;
/*
* Allocate blocks, update the file size to match the size of the
* already successfully allocated blocks.
*/
if (epos > new_size)
epos = new_size;
if (ext4_update_inode_size(inode, epos) & 0x1)
inode_set_mtime_to_ts(inode,
inode_get_ctime(inode));
if (epos > old_size) {
pagecache_isize_extended(inode, old_size, epos);
ext4_zero_partial_blocks(handle, inode,
old_size, epos - old_size);
}
}
handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
if (IS_ERR(handle))
return ret ? ret : PTR_ERR(handle);
ext4_update_inode_size(inode, epos);
ret2 = ext4_mark_inode_dirty(handle, inode);
ext4_update_inode_fsync_trans(handle, inode, 1);
ret3 = ext4_journal_stop(handle);
ret2 = ret3 ? ret3 : ret2;
if (unlikely(ret2))
break;
if (alloc_zero &&
(map.m_flags & (EXT4_MAP_MAPPED | EXT4_MAP_UNWRITTEN))) {
ret2 = ext4_issue_zeroout(inode, map.m_lblk, map.m_pblk,
map.m_len);
if (likely(!ret2))
ret2 = ext4_convert_unwritten_extents(NULL,
inode, (loff_t)map.m_lblk << blkbits,
(loff_t)map.m_len << blkbits);
if (ret2)
break;
}
if (epos > old_size)
pagecache_isize_extended(inode, old_size, epos);
map.m_lblk += ret;
map.m_len = len = len - ret;
}
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
return ret > 0 ? ret2 : ret;
return ret ? ret : ret2;
}
static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len);
@@ -4695,12 +4712,11 @@ static long ext4_zero_range(struct file *file, loff_t offset,
{
struct inode *inode = file_inode(file);
handle_t *handle = NULL;
loff_t new_size = 0;
loff_t align_start, align_end, new_size = 0;
loff_t end = offset + len;
ext4_lblk_t start_lblk, end_lblk;
unsigned int blocksize = i_blocksize(inode);
unsigned int blkbits = inode->i_blkbits;
int ret, flags, credits;
bool partial_zeroed = false;
int ret, flags;
trace_ext4_zero_range(inode, offset, len, mode);
WARN_ON_ONCE(!inode_is_locked(inode));
@@ -4720,11 +4736,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
/* Preallocate the range including the unaligned edges */
if (!IS_ALIGNED(offset | end, blocksize)) {
ext4_lblk_t alloc_lblk = offset >> blkbits;
ext4_lblk_t len_lblk = EXT4_MAX_BLOCKS(len, offset, blkbits);
ret = ext4_alloc_file_blocks(file, alloc_lblk, len_lblk,
new_size, flags);
ret = ext4_alloc_file_blocks(file, offset, len, new_size,
flags);
if (ret)
return ret;
}
@@ -4739,18 +4752,17 @@ static long ext4_zero_range(struct file *file, loff_t offset,
return ret;
/* Zero range excluding the unaligned edges */
start_lblk = EXT4_B_TO_LBLK(inode, offset);
end_lblk = end >> blkbits;
if (end_lblk > start_lblk) {
ext4_lblk_t zero_blks = end_lblk - start_lblk;
align_start = round_up(offset, blocksize);
align_end = round_down(end, blocksize);
if (align_end > align_start) {
if (mode & FALLOC_FL_WRITE_ZEROES)
flags = EXT4_GET_BLOCKS_CREATE_ZERO | EXT4_EX_NOCACHE;
else
flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
EXT4_EX_NOCACHE);
ret = ext4_alloc_file_blocks(file, start_lblk, zero_blks,
new_size, flags);
ret = ext4_alloc_file_blocks(file, align_start,
align_end - align_start, new_size,
flags);
if (ret)
return ret;
}
@@ -4758,25 +4770,24 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (IS_ALIGNED(offset | end, blocksize))
return ret;
/*
* In worst case we have to writeout two nonadjacent unwritten
* blocks and update the inode
*/
credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
if (ext4_should_journal_data(inode))
credits += 2;
handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
/* Zero out partial block at the edges of the range */
ret = ext4_zero_partial_blocks(inode, offset, len, &partial_zeroed);
if (ret)
return ret;
if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
ret = filemap_write_and_wait_range(inode->i_mapping, offset,
end - 1);
if (ret)
return ret;
}
handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
ext4_std_error(inode->i_sb, ret);
return ret;
}
/* Zero out partial block at the edges of the range */
ret = ext4_zero_partial_blocks(handle, inode, offset, len);
if (ret)
goto out_handle;
if (new_size)
ext4_update_inode_size(inode, new_size);
ret = ext4_mark_inode_dirty(handle, inode);
@@ -4784,7 +4795,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
goto out_handle;
ext4_update_inode_fsync_trans(handle, inode, 1);
if (file->f_flags & O_SYNC)
if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
ext4_handle_sync(handle);
out_handle:
@@ -4798,15 +4809,11 @@ static long ext4_do_fallocate(struct file *file, loff_t offset,
struct inode *inode = file_inode(file);
loff_t end = offset + len;
loff_t new_size = 0;
ext4_lblk_t start_lblk, len_lblk;
int ret;
trace_ext4_fallocate_enter(inode, offset, len, mode);
WARN_ON_ONCE(!inode_is_locked(inode));
start_lblk = offset >> inode->i_blkbits;
len_lblk = EXT4_MAX_BLOCKS(len, offset, inode->i_blkbits);
/* We only support preallocation for extent-based files only. */
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
ret = -EOPNOTSUPP;
@@ -4821,17 +4828,19 @@ static long ext4_do_fallocate(struct file *file, loff_t offset,
goto out;
}
ret = ext4_alloc_file_blocks(file, start_lblk, len_lblk, new_size,
ret = ext4_alloc_file_blocks(file, offset, len, new_size,
EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT);
if (ret)
goto out;
if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) &&
EXT4_SB(inode->i_sb)->s_journal) {
ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
EXT4_I(inode)->i_sync_tid);
}
out:
trace_ext4_fallocate_exit(inode, offset, len_lblk, ret);
trace_ext4_fallocate_exit(inode, offset,
EXT4_MAX_BLOCKS(len, offset, inode->i_blkbits), ret);
return ret;
}
@@ -5598,7 +5607,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
goto out_handle;
ext4_update_inode_fsync_trans(handle, inode, 1);
if (IS_SYNC(inode))
if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
ext4_handle_sync(handle);
out_handle:
@@ -5722,7 +5731,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
goto out_handle;
ext4_update_inode_fsync_trans(handle, inode, 1);
if (IS_SYNC(inode))
if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
ext4_handle_sync(handle);
out_handle:

View File

@@ -13,6 +13,7 @@
#include "mballoc.h"
#include <linux/lockdep.h>
#include <linux/wait_bit.h>
/*
* Ext4 Fast Commits
* -----------------
@@ -215,7 +216,6 @@ void ext4_fc_init_inode(struct inode *inode)
ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
INIT_LIST_HEAD(&ei->i_fc_list);
INIT_LIST_HEAD(&ei->i_fc_dilist);
init_waitqueue_head(&ei->i_fc_wait);
}
static bool ext4_fc_disabled(struct super_block *sb)
@@ -224,6 +224,12 @@ static bool ext4_fc_disabled(struct super_block *sb)
(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY));
}
static bool ext4_fc_eligible(struct super_block *sb)
{
return !ext4_fc_disabled(sb) &&
!(ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE));
}
/*
* Remove inode from fast commit list. If the inode is being committed
* we wait until inode commit is done.
@@ -320,7 +326,7 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handl
if (ext4_fc_disabled(sb))
return;
if (handle && !IS_ERR(handle))
if (!IS_ERR_OR_NULL(handle))
tid = handle->h_transaction->t_tid;
else {
read_lock(&sbi->s_journal->j_state_lock);
@@ -473,12 +479,7 @@ void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
if (ext4_fc_disabled(inode->i_sb))
return;
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
return;
if (ext4_fc_eligible(inode->i_sb))
__ext4_fc_track_unlink(handle, inode, dentry);
}
@@ -496,16 +497,10 @@ void __ext4_fc_track_link(handle_t *handle,
trace_ext4_fc_track_link(handle, inode, dentry, ret);
}
void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
void ext4_fc_track_link(handle_t *handle, struct inode *inode,
struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
if (ext4_fc_disabled(inode->i_sb))
return;
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
return;
if (ext4_fc_eligible(inode->i_sb))
__ext4_fc_track_link(handle, inode, dentry);
}
@@ -527,12 +522,7 @@ void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
if (ext4_fc_disabled(inode->i_sb))
return;
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
return;
if (ext4_fc_eligible(inode->i_sb))
__ext4_fc_track_create(handle, inode, dentry);
}
@@ -557,16 +547,13 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
if (S_ISDIR(inode->i_mode))
return;
if (ext4_fc_disabled(inode->i_sb))
return;
if (ext4_should_journal_data(inode)) {
ext4_fc_mark_ineligible(inode->i_sb,
EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
return;
}
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
if (!ext4_fc_eligible(inode->i_sb))
return;
/*
@@ -644,10 +631,7 @@ void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t star
if (S_ISDIR(inode->i_mode))
return;
if (ext4_fc_disabled(inode->i_sb))
return;
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
if (!ext4_fc_eligible(inode->i_sb))
return;
if (ext4_has_inline_data(inode)) {
@@ -1446,7 +1430,6 @@ static int ext4_fc_replay_link_internal(struct super_block *sb,
struct inode *inode)
{
struct inode *dir = NULL;
struct dentry *dentry_dir = NULL, *dentry_inode = NULL;
struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len);
int ret = 0;
@@ -1457,21 +1440,7 @@ static int ext4_fc_replay_link_internal(struct super_block *sb,
goto out;
}
dentry_dir = d_obtain_alias(dir);
if (IS_ERR(dentry_dir)) {
ext4_debug("Failed to obtain dentry");
dentry_dir = NULL;
goto out;
}
dentry_inode = d_alloc(dentry_dir, &qstr_dname);
if (!dentry_inode) {
ext4_debug("Inode dentry not created.");
ret = -ENOMEM;
goto out;
}
ret = __ext4_link(dir, inode, dentry_inode);
ret = __ext4_link(dir, inode, &qstr_dname, NULL);
/*
* It's possible that link already existed since data blocks
* for the dir in question got persisted before we crashed OR
@@ -1485,16 +1454,8 @@ static int ext4_fc_replay_link_internal(struct super_block *sb,
ret = 0;
out:
if (dentry_dir) {
d_drop(dentry_dir);
dput(dentry_dir);
} else if (dir) {
if (dir)
iput(dir);
}
if (dentry_inode) {
d_drop(dentry_inode);
dput(dentry_inode);
}
return ret;
}
@@ -1759,8 +1720,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
}
/* Replay add range tag */
static int ext4_fc_replay_add_range(struct super_block *sb,
struct ext4_fc_tl_mem *tl, u8 *val)
static int ext4_fc_replay_add_range(struct super_block *sb, u8 *val)
{
struct ext4_fc_add_range fc_add_ex;
struct ext4_extent newex, *ex;
@@ -1880,8 +1840,7 @@ out:
/* Replay DEL_RANGE tag */
static int
ext4_fc_replay_del_range(struct super_block *sb,
struct ext4_fc_tl_mem *tl, u8 *val)
ext4_fc_replay_del_range(struct super_block *sb, u8 *val)
{
struct inode *inode;
struct ext4_fc_del_range lrange;
@@ -2251,13 +2210,13 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
ret = ext4_fc_replay_unlink(sb, &tl, val);
break;
case EXT4_FC_TAG_ADD_RANGE:
ret = ext4_fc_replay_add_range(sb, &tl, val);
ret = ext4_fc_replay_add_range(sb, val);
break;
case EXT4_FC_TAG_CREAT:
ret = ext4_fc_replay_create(sb, &tl, val);
break;
case EXT4_FC_TAG_DEL_RANGE:
ret = ext4_fc_replay_del_range(sb, &tl, val);
ret = ext4_fc_replay_del_range(sb, val);
break;
case EXT4_FC_TAG_INODE:
ret = ext4_fc_replay_inode(sb, &tl, val);

View File

@@ -270,6 +270,8 @@ static ssize_t ext4_generic_write_checks(struct kiocb *iocb,
static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
loff_t old_size = i_size_read(inode);
ssize_t ret, count;
count = ext4_generic_write_checks(iocb, from);
@@ -279,6 +281,21 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
ret = file_modified(iocb->ki_filp);
if (ret)
return ret;
/*
* If the position is beyond the EOF, it is necessary to zero out the
* partial block that beyond the existing EOF, as it may contains
* stale data written through mmap.
*/
if (iocb->ki_pos > old_size && !ext4_verity_in_progress(inode)) {
if (iocb->ki_flags & IOCB_NOWAIT)
return -EAGAIN;
ret = ext4_block_zero_eof(inode, old_size, iocb->ki_pos);
if (ret)
return ret;
}
return count;
}

View File

@@ -1468,10 +1468,9 @@ static int ext4_write_end(const struct kiocb *iocb,
folio_unlock(folio);
folio_put(folio);
if (old_size < pos && !verity) {
if (old_size < pos && !verity)
pagecache_isize_extended(inode, old_size, pos);
ext4_zero_partial_blocks(handle, inode, old_size, pos - old_size);
}
/*
* Don't mark the inode dirty under folio lock. First, it unnecessarily
* makes the holding time of folio lock longer. Second, it forces lock
@@ -1586,10 +1585,8 @@ static int ext4_journalled_write_end(const struct kiocb *iocb,
folio_unlock(folio);
folio_put(folio);
if (old_size < pos && !verity) {
if (old_size < pos && !verity)
pagecache_isize_extended(inode, old_size, pos);
ext4_zero_partial_blocks(handle, inode, old_size, pos - old_size);
}
if (size_changed) {
ret2 = ext4_mark_inode_dirty(handle, inode);
@@ -1759,8 +1756,22 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
BUG_ON(!folio_test_locked(folio));
BUG_ON(folio_test_writeback(folio));
if (invalidate) {
if (folio_mapped(folio))
if (folio_mapped(folio)) {
folio_clear_dirty_for_io(folio);
/*
* Unmap folio from page
* tables to prevent
* subsequent accesses through
* stale PTEs. This ensures
* future accesses trigger new
* page faults rather than
* reusing the invalidated
* folio.
*/
unmap_mapping_pages(folio->mapping,
folio->index,
folio_nr_pages(folio), false);
}
block_invalidate_folio(folio, 0,
folio_size(folio));
folio_clear_uptodate(folio);
@@ -3043,17 +3054,23 @@ static int ext4_writepages(struct address_space *mapping,
int ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode)
{
loff_t range_start, range_end;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.range_start = jinode->i_dirty_start,
.range_end = jinode->i_dirty_end,
};
struct mpage_da_data mpd = {
.inode = jinode->i_vfs_inode,
.wbc = &wbc,
.can_map = 0,
};
if (!jbd2_jinode_get_dirty_range(jinode, &range_start, &range_end))
return 0;
wbc.range_start = range_start;
wbc.range_end = range_end;
return ext4_do_writepages(&mpd);
}
@@ -3208,7 +3225,7 @@ static int ext4_da_do_write_end(struct address_space *mapping,
struct inode *inode = mapping->host;
loff_t old_size = inode->i_size;
bool disksize_changed = false;
loff_t new_i_size, zero_len = 0;
loff_t new_i_size;
handle_t *handle;
if (unlikely(!folio_buffers(folio))) {
@@ -3252,19 +3269,15 @@ static int ext4_da_do_write_end(struct address_space *mapping,
folio_unlock(folio);
folio_put(folio);
if (pos > old_size) {
if (pos > old_size)
pagecache_isize_extended(inode, old_size, pos);
zero_len = pos - old_size;
}
if (!disksize_changed && !zero_len)
if (!disksize_changed)
return copied;
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle))
return PTR_ERR(handle);
if (zero_len)
ext4_zero_partial_blocks(handle, inode, old_size, zero_len);
ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
@@ -4014,12 +4027,11 @@ void ext4_set_aops(struct inode *inode)
* ext4_punch_hole, etc) which needs to be properly zeroed out. Otherwise a
* racing writeback can come later and flush the stale pagecache to disk.
*/
static int __ext4_block_zero_page_range(handle_t *handle,
struct address_space *mapping, loff_t from, loff_t length)
static struct buffer_head *ext4_load_tail_bh(struct inode *inode, loff_t from)
{
unsigned int offset, blocksize, pos;
ext4_lblk_t iblock;
struct inode *inode = mapping->host;
struct address_space *mapping = inode->i_mapping;
struct buffer_head *bh;
struct folio *folio;
int err = 0;
@@ -4028,7 +4040,7 @@ static int __ext4_block_zero_page_range(handle_t *handle,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
mapping_gfp_constraint(mapping, ~__GFP_FS));
if (IS_ERR(folio))
return PTR_ERR(folio);
return ERR_CAST(folio);
blocksize = inode->i_sb->s_blocksize;
@@ -4080,47 +4092,92 @@ static int __ext4_block_zero_page_range(handle_t *handle,
}
}
}
if (ext4_should_journal_data(inode)) {
BUFFER_TRACE(bh, "get write access");
err = ext4_journal_get_write_access(handle, inode->i_sb, bh,
EXT4_JTR_NONE);
if (err)
goto unlock;
}
folio_zero_range(folio, offset, length);
BUFFER_TRACE(bh, "zeroed end of block");
if (ext4_should_journal_data(inode)) {
err = ext4_dirty_journalled_data(handle, bh);
} else {
mark_buffer_dirty(bh);
/*
* Only the written block requires ordered data to prevent
* exposing stale data.
*/
if (!buffer_unwritten(bh) && !buffer_delay(bh) &&
ext4_should_order_data(inode))
err = ext4_jbd2_inode_add_write(handle, inode, from,
length);
}
return bh;
unlock:
folio_unlock(folio);
folio_put(folio);
return err ? ERR_PTR(err) : NULL;
}
static int ext4_block_do_zero_range(struct inode *inode, loff_t from,
loff_t length, bool *did_zero,
bool *zero_written)
{
struct buffer_head *bh;
struct folio *folio;
bh = ext4_load_tail_bh(inode, from);
if (IS_ERR_OR_NULL(bh))
return PTR_ERR_OR_ZERO(bh);
folio = bh->b_folio;
folio_zero_range(folio, offset_in_folio(folio, from), length);
BUFFER_TRACE(bh, "zeroed end of block");
mark_buffer_dirty(bh);
if (did_zero)
*did_zero = true;
if (zero_written && !buffer_unwritten(bh) && !buffer_delay(bh))
*zero_written = true;
folio_unlock(folio);
folio_put(folio);
return 0;
}
static int ext4_block_journalled_zero_range(struct inode *inode, loff_t from,
loff_t length, bool *did_zero)
{
struct buffer_head *bh;
struct folio *folio;
handle_t *handle;
int err;
handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
if (IS_ERR(handle))
return PTR_ERR(handle);
bh = ext4_load_tail_bh(inode, from);
if (IS_ERR_OR_NULL(bh)) {
err = PTR_ERR_OR_ZERO(bh);
goto out_handle;
}
folio = bh->b_folio;
BUFFER_TRACE(bh, "get write access");
err = ext4_journal_get_write_access(handle, inode->i_sb, bh,
EXT4_JTR_NONE);
if (err)
goto out;
folio_zero_range(folio, offset_in_folio(folio, from), length);
BUFFER_TRACE(bh, "zeroed end of block");
err = ext4_dirty_journalled_data(handle, bh);
if (err)
goto out;
if (did_zero)
*did_zero = true;
out:
folio_unlock(folio);
folio_put(folio);
out_handle:
ext4_journal_stop(handle);
return err;
}
/*
* ext4_block_zero_page_range() zeros out a mapping of length 'length'
* starting from file offset 'from'. The range to be zero'd must
* be contained with in one block. If the specified range exceeds
* the end of the block it will be shortened to end of the block
* that corresponds to 'from'
* Zeros out a mapping of length 'length' starting from file offset
* 'from'. The range to be zero'd must be contained with in one block.
* If the specified range exceeds the end of the block it will be
* shortened to end of the block that corresponds to 'from'.
*/
static int ext4_block_zero_page_range(handle_t *handle,
struct address_space *mapping, loff_t from, loff_t length)
static int ext4_block_zero_range(struct inode *inode,
loff_t from, loff_t length, bool *did_zero,
bool *zero_written)
{
struct inode *inode = mapping->host;
unsigned blocksize = inode->i_sb->s_blocksize;
unsigned int max = blocksize - (from & (blocksize - 1));
@@ -4132,40 +4189,73 @@ static int ext4_block_zero_page_range(handle_t *handle,
length = max;
if (IS_DAX(inode)) {
return dax_zero_range(inode, from, length, NULL,
return dax_zero_range(inode, from, length, did_zero,
&ext4_iomap_ops);
} else if (ext4_should_journal_data(inode)) {
return ext4_block_journalled_zero_range(inode, from, length,
did_zero);
}
return __ext4_block_zero_page_range(handle, mapping, from, length);
return ext4_block_do_zero_range(inode, from, length, did_zero,
zero_written);
}
/*
* ext4_block_truncate_page() zeroes out a mapping from file offset `from'
* up to the end of the block which corresponds to `from'.
* This required during truncate. We need to physically zero the tail end
* of that block so it doesn't yield old data if the file is later grown.
* Zero out a mapping from file offset 'from' up to the end of the block
* which corresponds to 'from' or to the given 'end' inside this block.
* This required during truncate up and performing append writes. We need
* to physically zero the tail end of that block so it doesn't yield old
* data if the file is grown.
*/
static int ext4_block_truncate_page(handle_t *handle,
struct address_space *mapping, loff_t from)
int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end)
{
unsigned length;
unsigned blocksize;
struct inode *inode = mapping->host;
unsigned int blocksize = i_blocksize(inode);
unsigned int offset;
loff_t length = end - from;
bool did_zero = false;
bool zero_written = false;
int err;
offset = from & (blocksize - 1);
if (!offset || from >= end)
return 0;
/* If we are processing an encrypted inode during orphan list handling */
if (IS_ENCRYPTED(inode) && !fscrypt_has_encryption_key(inode))
return 0;
blocksize = i_blocksize(inode);
length = blocksize - (from & (blocksize - 1));
if (length > blocksize - offset)
length = blocksize - offset;
return ext4_block_zero_page_range(handle, mapping, from, length);
err = ext4_block_zero_range(inode, from, length,
&did_zero, &zero_written);
if (err)
return err;
/*
* It's necessary to order zeroed data before update i_disksize when
* truncating up or performing an append write, because there might be
* exposing stale on-disk data which may caused by concurrent post-EOF
* mmap write during folio writeback.
*/
if (ext4_should_order_data(inode) &&
did_zero && zero_written && !IS_DAX(inode)) {
handle_t *handle;
handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
if (IS_ERR(handle))
return PTR_ERR(handle);
err = ext4_jbd2_inode_add_write(handle, inode, from, length);
ext4_journal_stop(handle);
if (err)
return err;
}
int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
loff_t lstart, loff_t length)
return 0;
}
int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length,
bool *did_zero)
{
struct super_block *sb = inode->i_sb;
struct address_space *mapping = inode->i_mapping;
unsigned partial_start, partial_end;
ext4_fsblk_t start, end;
loff_t byte_end = (lstart + length - 1);
@@ -4180,22 +4270,21 @@ int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
/* Handle partial zero within the single block */
if (start == end &&
(partial_start || (partial_end != sb->s_blocksize - 1))) {
err = ext4_block_zero_page_range(handle, mapping,
lstart, length);
err = ext4_block_zero_range(inode, lstart, length, did_zero,
NULL);
return err;
}
/* Handle partial zero out on the start of the range */
if (partial_start) {
err = ext4_block_zero_page_range(handle, mapping,
lstart, sb->s_blocksize);
err = ext4_block_zero_range(inode, lstart, sb->s_blocksize,
did_zero, NULL);
if (err)
return err;
}
/* Handle partial zero out on the end of the range */
if (partial_end != sb->s_blocksize - 1)
err = ext4_block_zero_page_range(handle, mapping,
byte_end - partial_end,
partial_end + 1);
err = ext4_block_zero_range(inode, byte_end - partial_end,
partial_end + 1, did_zero, NULL);
return err;
}
@@ -4344,6 +4433,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
loff_t end = offset + length;
handle_t *handle;
unsigned int credits;
bool partial_zeroed = false;
int ret;
trace_ext4_punch_hole(inode, offset, length, 0);
@@ -4370,17 +4460,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
end = max_end;
length = end - offset;
/*
* Attach jinode to inode for jbd2 if we do any zeroing of partial
* block.
*/
if (!IS_ALIGNED(offset | end, sb->s_blocksize)) {
ret = ext4_inode_attach_jinode(inode);
if (ret < 0)
return ret;
}
ret = ext4_update_disksize_before_punch(inode, offset, length);
if (ret)
return ret;
@@ -4390,8 +4469,18 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
if (ret)
return ret;
ret = ext4_zero_partial_blocks(inode, offset, length, &partial_zeroed);
if (ret)
return ret;
if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
ret = filemap_write_and_wait_range(inode->i_mapping, offset,
end - 1);
if (ret)
return ret;
}
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
credits = ext4_chunk_trans_extent(inode, 2);
credits = ext4_chunk_trans_extent(inode, 0);
else
credits = ext4_blocks_for_truncate(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
@@ -4401,10 +4490,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
return ret;
}
ret = ext4_zero_partial_blocks(handle, inode, offset, length);
if (ret)
goto out_handle;
/* If there are blocks to remove, do it */
start_lblk = EXT4_B_TO_LBLK(inode, offset);
end_lblk = end >> inode->i_blkbits;
@@ -4441,7 +4526,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
goto out_handle;
ext4_update_inode_fsync_trans(handle, inode, 1);
if (IS_SYNC(inode))
if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
ext4_handle_sync(handle);
out_handle:
ext4_journal_stop(handle);
@@ -4512,7 +4597,6 @@ int ext4_truncate(struct inode *inode)
unsigned int credits;
int err = 0, err2;
handle_t *handle;
struct address_space *mapping = inode->i_mapping;
/*
* There is a possibility that we're either freeing the inode
@@ -4542,6 +4626,11 @@ int ext4_truncate(struct inode *inode)
err = ext4_inode_attach_jinode(inode);
if (err)
goto out_trace;
/* Zero to the end of the block containing i_size */
err = ext4_block_zero_eof(inode, inode->i_size, LLONG_MAX);
if (err)
goto out_trace;
}
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
@@ -4555,9 +4644,6 @@ int ext4_truncate(struct inode *inode)
goto out_trace;
}
if (inode->i_size & (inode->i_sb->s_blocksize - 1))
ext4_block_truncate_page(handle, mapping, inode->i_size);
/*
* We add the inode to the orphan list, so that if this
* truncate spans multiple transactions, and we crash, we will
@@ -5927,15 +6013,6 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
goto out_mmap_sem;
}
handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
goto out_mmap_sem;
}
if (ext4_handle_valid(handle) && shrink) {
error = ext4_orphan_add(handle, inode);
orphan = 1;
}
/*
* Update c/mtime and tail zero the EOF folio on
* truncate up. ext4_truncate() handles the shrink case
@@ -5944,9 +6021,22 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (!shrink) {
inode_set_mtime_to_ts(inode,
inode_set_ctime_current(inode));
if (oldsize & (inode->i_sb->s_blocksize - 1))
ext4_block_truncate_page(handle,
inode->i_mapping, oldsize);
if (oldsize & (inode->i_sb->s_blocksize - 1)) {
error = ext4_block_zero_eof(inode,
oldsize, LLONG_MAX);
if (error)
goto out_mmap_sem;
}
}
handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
goto out_mmap_sem;
}
if (ext4_handle_valid(handle) && shrink) {
error = ext4_orphan_add(handle, inode);
orphan = 1;
}
if (shrink)

View File

@@ -362,7 +362,6 @@ static int mbt_kunit_init(struct kunit *test)
return ret;
}
test->priv = sb;
kunit_activate_static_stub(test,
ext4_read_block_bitmap_nowait,
ext4_read_block_bitmap_nowait_stub);
@@ -383,6 +382,8 @@ static int mbt_kunit_init(struct kunit *test)
return -ENOMEM;
}
test->priv = sb;
return 0;
}
@@ -390,6 +391,9 @@ static void mbt_kunit_exit(struct kunit *test)
{
struct super_block *sb = (struct super_block *)test->priv;
if (!sb)
return;
mbt_mb_release(sb);
mbt_ctx_release(sb);
mbt_ext4_free_super_block(sb);

View File

@@ -2876,7 +2876,7 @@ ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
EXT4_MB_GRP_NEED_INIT(grp) &&
ext4_free_group_clusters(sb, gdp) > 0 ) {
bh = ext4_read_block_bitmap_nowait(sb, group, true);
if (bh && !IS_ERR(bh)) {
if (!IS_ERR_OR_NULL(bh)) {
if (!buffer_uptodate(bh) && cnt)
(*cnt)++;
brelse(bh);
@@ -4561,22 +4561,16 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
(req <= (size) || max <= (chunk_size))
/* first, try to predict filesize */
/* XXX: should this table be tunable? */
start_off = 0;
if (size <= 16 * 1024) {
size = 16 * 1024;
} else if (size <= 32 * 1024) {
size = 32 * 1024;
} else if (size <= 64 * 1024) {
size = 64 * 1024;
} else if (size <= 128 * 1024) {
size = 128 * 1024;
} else if (size <= 256 * 1024) {
size = 256 * 1024;
} else if (size <= 512 * 1024) {
size = 512 * 1024;
} else if (size <= 1024 * 1024) {
size = 1024 * 1024;
if (size <= SZ_1M) {
/*
* For files up to 1MB, round up the preallocation size to
* the next power of two, with a minimum of 16KB.
*/
if (size <= (unsigned long)SZ_16K)
size = SZ_16K;
else
size = roundup_pow_of_two(size);
} else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
(21 - bsbits)) << 21;

View File

@@ -224,8 +224,8 @@ static int mext_move_begin(struct mext_data *mext, struct folio *folio[2],
}
/* Adjust the moving length according to the length of shorter folio. */
move_len = umin(folio_pos(folio[0]) + folio_size(folio[0]) - orig_pos,
folio_pos(folio[1]) + folio_size(folio[1]) - donor_pos);
move_len = umin(folio_next_pos(folio[0]) - orig_pos,
folio_next_pos(folio[1]) - donor_pos);
move_len >>= blkbits;
if (move_len < mext->orig_map.m_len)
mext->orig_map.m_len = move_len;

View File

@@ -647,7 +647,7 @@ static struct stats dx_show_leaf(struct inode *dir,
/* Directory is not encrypted */
(void) ext4fs_dirhash(dir, de->name,
de->name_len, &h);
printk("%*.s:(U)%x.%u ", len,
printk("%.*s:(U)%x.%u ", len,
name, h.hash,
(unsigned) ((char *) de
- base));
@@ -683,7 +683,7 @@ static struct stats dx_show_leaf(struct inode *dir,
(void) ext4fs_dirhash(dir,
de->name,
de->name_len, &h);
printk("%*.s:(E)%x.%u ", len, name,
printk("%.*s:(E)%x.%u ", len, name,
h.hash, (unsigned) ((char *) de
- base));
fscrypt_fname_free_buffer(
@@ -694,7 +694,7 @@ static struct stats dx_show_leaf(struct inode *dir,
char *name = de->name;
(void) ext4fs_dirhash(dir, de->name,
de->name_len, &h);
printk("%*.s:%x.%u ", len, name, h.hash,
printk("%.*s:%x.%u ", len, name, h.hash,
(unsigned) ((char *) de - base));
#endif
}
@@ -723,7 +723,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
struct stats stats;
printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range);
bh = ext4_bread(NULL,dir, block, 0);
if (!bh || IS_ERR(bh))
if (IS_ERR_OR_NULL(bh))
continue;
stats = levels?
dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
@@ -2353,10 +2353,10 @@ out_frames:
* may not sleep between calling this and putting something into
* the entry, as someone else might have used it while you slept.
*/
static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
static int __ext4_add_entry(handle_t *handle, struct inode *dir,
const struct qstr *d_name,
struct inode *inode)
{
struct inode *dir = d_inode(dentry->d_parent);
struct buffer_head *bh = NULL;
struct ext4_dir_entry_2 *de;
struct super_block *sb;
@@ -2373,13 +2373,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
sb = dir->i_sb;
blocksize = sb->s_blocksize;
if (fscrypt_is_nokey_name(dentry))
return -ENOKEY;
if (!generic_ci_validate_strict_name(dir, &dentry->d_name))
if (!generic_ci_validate_strict_name(dir, d_name))
return -EINVAL;
retval = ext4_fname_setup_filename(dir, &dentry->d_name, 0, &fname);
retval = ext4_fname_setup_filename(dir, d_name, 0, &fname);
if (retval)
return retval;
@@ -2460,6 +2457,16 @@ out:
return retval;
}
static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode)
{
struct inode *dir = d_inode(dentry->d_parent);
if (fscrypt_is_nokey_name(dentry))
return -ENOKEY;
return __ext4_add_entry(handle, dir, &dentry->d_name, inode);
}
/*
* Returns 0 for success, or a negative error value
*/
@@ -3445,7 +3452,8 @@ out_retry:
return err;
}
int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
int __ext4_link(struct inode *dir, struct inode *inode,
const struct qstr *d_name, struct dentry *dentry)
{
handle_t *handle;
int err, retries = 0;
@@ -3461,9 +3469,8 @@ retry:
inode_set_ctime_current(inode);
ext4_inc_count(inode);
ihold(inode);
err = ext4_add_entry(handle, dentry, inode);
err = __ext4_add_entry(handle, dir, d_name, inode);
if (!err) {
err = ext4_mark_inode_dirty(handle, inode);
/* this can happen only for tmpfile being
@@ -3471,11 +3478,10 @@ retry:
*/
if (inode->i_nlink == 1)
ext4_orphan_del(handle, inode);
d_instantiate(dentry, inode);
ext4_fc_track_link(handle, dentry);
if (dentry)
ext4_fc_track_link(handle, inode, dentry);
} else {
drop_nlink(inode);
iput(inode);
}
ext4_journal_stop(handle);
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
@@ -3504,9 +3510,13 @@ static int ext4_link(struct dentry *old_dentry,
err = dquot_initialize(dir);
if (err)
return err;
return __ext4_link(dir, inode, dentry);
err = __ext4_link(dir, inode, &dentry->d_name, dentry);
if (!err) {
ihold(inode);
d_instantiate(dentry, inode);
}
return err;
}
/*
* Try to find buffer head where contains the parent block.
* It should be the inode block if it is inlined or the 1st block

View File

@@ -521,6 +521,7 @@ static bool ext4_journalled_writepage_needs_redirty(struct jbd2_inode *jinode,
{
struct buffer_head *bh, *head;
struct journal_head *jh;
transaction_t *trans = READ_ONCE(jinode->i_transaction);
bh = head = folio_buffers(folio);
do {
@@ -539,7 +540,7 @@ static bool ext4_journalled_writepage_needs_redirty(struct jbd2_inode *jinode,
*/
jh = bh2jh(bh);
if (buffer_dirty(bh) ||
(jh && (jh->b_transaction != jinode->i_transaction ||
(jh && (jh->b_transaction != trans ||
jh->b_next_transaction)))
return true;
} while ((bh = bh->b_this_page) != head);
@@ -550,15 +551,20 @@ static bool ext4_journalled_writepage_needs_redirty(struct jbd2_inode *jinode,
static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode)
{
struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
loff_t range_start, range_end;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.range_start = jinode->i_dirty_start,
.range_end = jinode->i_dirty_end,
};
struct folio *folio = NULL;
int error;
if (!jbd2_jinode_get_dirty_range(jinode, &range_start, &range_end))
return 0;
wbc.range_start = range_start;
wbc.range_end = range_end;
/*
* writeback_iter() already checks for dirty pages and calls
* folio_clear_dirty_for_io(), which we want to write protect the

View File

@@ -92,7 +92,7 @@ static const char *ext4_get_link(struct dentry *dentry, struct inode *inode,
if (!dentry) {
bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT);
if (IS_ERR(bh) || !bh)
if (IS_ERR_OR_NULL(bh))
return ERR_PTR(-ECHILD);
if (!ext4_buffer_uptodate(bh)) {
brelse(bh);

View File

@@ -226,7 +226,7 @@ check_xattrs(struct inode *inode, struct buffer_head *bh,
/* Find the end of the names list */
while (!IS_LAST_ENTRY(e)) {
struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
if ((void *)next >= end) {
if ((void *)next + sizeof(u32) > end) {
err_str = "e_name out of bounds";
goto errout;
}
@@ -1165,7 +1165,7 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
{
struct inode *ea_inode;
struct ext4_xattr_entry *entry;
struct ext4_iloc iloc;
struct ext4_iloc iloc = { .bh = NULL };
bool dirty = false;
unsigned int ea_ino;
int err;
@@ -1260,6 +1260,8 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
ext4_warning_inode(parent,
"handle dirty metadata err=%d", err);
}
brelse(iloc.bh);
}
/*

View File

@@ -180,7 +180,13 @@ static int journal_wait_on_commit_record(journal_t *journal,
/* Send all the data buffers related to an inode */
int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode)
{
if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
unsigned long flags;
if (!jinode)
return 0;
flags = READ_ONCE(jinode->i_flags);
if (!(flags & JI_WRITE_DATA))
return 0;
trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
@@ -191,12 +197,30 @@ EXPORT_SYMBOL(jbd2_submit_inode_data);
int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
{
if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) ||
!jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping)
struct address_space *mapping;
struct inode *inode;
unsigned long flags;
loff_t start_byte, end_byte;
if (!jinode)
return 0;
flags = READ_ONCE(jinode->i_flags);
if (!(flags & JI_WAIT_DATA))
return 0;
inode = jinode->i_vfs_inode;
if (!inode)
return 0;
mapping = inode->i_mapping;
if (!mapping)
return 0;
if (!jbd2_jinode_get_dirty_range(jinode, &start_byte, &end_byte))
return 0;
return filemap_fdatawait_range_keep_errors(
jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start,
jinode->i_dirty_end);
mapping, start_byte, end_byte);
}
EXPORT_SYMBOL(jbd2_wait_inode_data);
@@ -218,7 +242,8 @@ static int journal_submit_data_buffers(journal_t *journal,
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
if (!(jinode->i_flags & JI_WRITE_DATA))
continue;
jinode->i_flags |= JI_COMMIT_RUNNING;
WRITE_ONCE(jinode->i_flags,
jinode->i_flags | JI_COMMIT_RUNNING);
spin_unlock(&journal->j_list_lock);
/* submit the inode data buffers. */
trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
@@ -229,7 +254,8 @@ static int journal_submit_data_buffers(journal_t *journal,
}
spin_lock(&journal->j_list_lock);
J_ASSERT(jinode->i_transaction == commit_transaction);
jinode->i_flags &= ~JI_COMMIT_RUNNING;
WRITE_ONCE(jinode->i_flags,
jinode->i_flags & ~JI_COMMIT_RUNNING);
smp_mb();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
@@ -240,10 +266,13 @@ static int journal_submit_data_buffers(journal_t *journal,
int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
{
struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
loff_t start_byte, end_byte;
if (!jbd2_jinode_get_dirty_range(jinode, &start_byte, &end_byte))
return 0;
return filemap_fdatawait_range_keep_errors(mapping,
jinode->i_dirty_start,
jinode->i_dirty_end);
start_byte, end_byte);
}
/*
@@ -262,7 +291,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
if (!(jinode->i_flags & JI_WAIT_DATA))
continue;
jinode->i_flags |= JI_COMMIT_RUNNING;
WRITE_ONCE(jinode->i_flags, jinode->i_flags | JI_COMMIT_RUNNING);
spin_unlock(&journal->j_list_lock);
/* wait for the inode data buffers writeout. */
if (journal->j_finish_inode_data_buffers) {
@@ -272,7 +301,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
}
cond_resched();
spin_lock(&journal->j_list_lock);
jinode->i_flags &= ~JI_COMMIT_RUNNING;
WRITE_ONCE(jinode->i_flags, jinode->i_flags & ~JI_COMMIT_RUNNING);
smp_mb();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
@@ -288,8 +317,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
&jinode->i_transaction->t_inode_list);
} else {
jinode->i_transaction = NULL;
jinode->i_dirty_start = 0;
jinode->i_dirty_end = 0;
WRITE_ONCE(jinode->i_dirty_start_page, 0);
WRITE_ONCE(jinode->i_dirty_end_page, 0);
}
}
spin_unlock(&journal->j_list_lock);

View File

@@ -3018,8 +3018,8 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
jinode->i_next_transaction = NULL;
jinode->i_vfs_inode = inode;
jinode->i_flags = 0;
jinode->i_dirty_start = 0;
jinode->i_dirty_end = 0;
jinode->i_dirty_start_page = 0;
jinode->i_dirty_end_page = 0;
INIT_LIST_HEAD(&jinode->i_list);
}
@@ -3176,4 +3176,3 @@ MODULE_DESCRIPTION("Generic filesystem journal-writing module");
MODULE_LICENSE("GPL");
module_init(journal_init);
module_exit(journal_exit);

View File

@@ -428,6 +428,7 @@ void jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
journal_t *journal = handle->h_transaction->t_journal;
int need_cancel;
struct buffer_head *bh = jh2bh(jh);
struct address_space *bh_mapping = bh->b_folio->mapping;
jbd2_debug(4, "journal_head %p, cancelling revoke\n", jh);
@@ -464,12 +465,13 @@ void jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
* buffer_head? If so, we'd better make sure we clear the
* revoked status on any hashed alias too, otherwise the revoke
* state machine will get very upset later on. */
if (need_cancel) {
if (need_cancel && !sb_is_blkdev_sb(bh_mapping->host->i_sb)) {
struct buffer_head *bh2;
bh2 = __find_get_block_nonatomic(bh->b_bdev, bh->b_blocknr,
bh->b_size);
if (bh2) {
if (bh2 != bh)
WARN_ON_ONCE(bh2 == bh);
clear_buffer_revoked(bh2);
__brelse(bh2);
}

View File

@@ -474,7 +474,8 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
return ERR_PTR(-EROFS);
if (handle) {
J_ASSERT(handle->h_transaction->t_journal == journal);
if (WARN_ON_ONCE(handle->h_transaction->t_journal != journal))
return ERR_PTR(-EINVAL);
handle->h_ref++;
return handle;
}
@@ -1036,7 +1037,13 @@ repeat:
*/
if (!jh->b_transaction) {
JBUFFER_TRACE(jh, "no transaction");
J_ASSERT_JH(jh, !jh->b_next_transaction);
if (WARN_ON_ONCE(jh->b_next_transaction)) {
spin_unlock(&jh->b_state_lock);
unlock_buffer(bh);
error = -EINVAL;
jbd2_journal_abort(journal, error);
goto out;
}
JBUFFER_TRACE(jh, "file as BJ_Reserved");
/*
* Make sure all stores to jh (b_modified, b_frozen_data) are
@@ -1069,13 +1076,27 @@ repeat:
*/
if (jh->b_frozen_data) {
JBUFFER_TRACE(jh, "has frozen data");
J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
if (WARN_ON_ONCE(jh->b_next_transaction)) {
spin_unlock(&jh->b_state_lock);
error = -EINVAL;
jbd2_journal_abort(journal, error);
goto out;
}
goto attach_next;
}
JBUFFER_TRACE(jh, "owned by older transaction");
J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction);
if (WARN_ON_ONCE(jh->b_next_transaction ||
jh->b_transaction !=
journal->j_committing_transaction)) {
pr_err("JBD2: %s: assertion failure: b_next_transaction=%p b_transaction=%p j_committing_transaction=%p\n",
journal->j_devname, jh->b_next_transaction,
jh->b_transaction, journal->j_committing_transaction);
spin_unlock(&jh->b_state_lock);
error = -EINVAL;
jbd2_journal_abort(journal, error);
goto out;
}
/*
* There is one case we have to be very careful about. If the
@@ -1302,7 +1323,12 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
goto out;
}
J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
if (WARN_ON_ONCE(!buffer_locked(jh2bh(jh)))) {
err = -EINVAL;
spin_unlock(&jh->b_state_lock);
jbd2_journal_abort(journal, err);
goto out;
}
if (jh->b_transaction == NULL) {
/*
@@ -1491,7 +1517,7 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal;
journal_t *journal = transaction->t_journal;
struct journal_head *jh;
int ret = 0;
@@ -1515,8 +1541,14 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
if (data_race(jh->b_transaction != transaction &&
jh->b_next_transaction != transaction)) {
spin_lock(&jh->b_state_lock);
J_ASSERT_JH(jh, jh->b_transaction == transaction ||
jh->b_next_transaction == transaction);
if (WARN_ON_ONCE(jh->b_transaction != transaction &&
jh->b_next_transaction != transaction)) {
pr_err("JBD2: %s: assertion failure: b_transaction=%p transaction=%p b_next_transaction=%p\n",
journal->j_devname, jh->b_transaction,
transaction, jh->b_next_transaction);
ret = -EINVAL;
goto out_unlock_bh;
}
spin_unlock(&jh->b_state_lock);
}
if (data_race(jh->b_modified == 1)) {
@@ -1524,15 +1556,15 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
if (data_race(jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata)) {
spin_lock(&jh->b_state_lock);
if (jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata)
pr_err("JBD2: assertion failure: h_type=%u "
"h_line_no=%u block_no=%llu jlist=%u\n",
if (WARN_ON_ONCE(jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata)) {
pr_err("JBD2: assertion failure: h_type=%u h_line_no=%u block_no=%llu jlist=%u\n",
handle->h_type, handle->h_line_no,
(unsigned long long) bh->b_blocknr,
jh->b_jlist);
J_ASSERT_JH(jh, jh->b_transaction != transaction ||
jh->b_jlist == BJ_Metadata);
ret = -EINVAL;
goto out_unlock_bh;
}
spin_unlock(&jh->b_state_lock);
}
goto out;
@@ -1552,8 +1584,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
goto out_unlock_bh;
}
journal = transaction->t_journal;
if (jh->b_modified == 0) {
/*
* This buffer's got modified and becoming part
@@ -1631,7 +1661,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
}
/* That test should have eliminated the following case: */
J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
if (WARN_ON_ONCE(jh->b_frozen_data)) {
ret = -EINVAL;
goto out_unlock_bh;
}
JBUFFER_TRACE(jh, "file as BJ_Metadata");
spin_lock(&journal->j_list_lock);
@@ -1670,6 +1703,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
int err = 0;
int was_modified = 0;
int wait_for_writeback = 0;
int abort_journal = 0;
if (is_handle_aborted(handle))
return -EROFS;
@@ -1703,7 +1737,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
jh->b_modified = 0;
if (jh->b_transaction == transaction) {
J_ASSERT_JH(jh, !jh->b_frozen_data);
if (WARN_ON_ONCE(jh->b_frozen_data)) {
err = -EINVAL;
abort_journal = 1;
goto drop;
}
/* If we are forgetting a buffer which is already part
* of this transaction, then we can just drop it from
@@ -1742,8 +1780,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
}
spin_unlock(&journal->j_list_lock);
} else if (jh->b_transaction) {
J_ASSERT_JH(jh, (jh->b_transaction ==
journal->j_committing_transaction));
if (WARN_ON_ONCE(jh->b_transaction != journal->j_committing_transaction)) {
err = -EINVAL;
abort_journal = 1;
goto drop;
}
/* However, if the buffer is still owned by a prior
* (committing) transaction, we can't drop it yet... */
JBUFFER_TRACE(jh, "belongs to older transaction");
@@ -1761,7 +1802,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
jh->b_next_transaction = transaction;
spin_unlock(&journal->j_list_lock);
} else {
J_ASSERT(jh->b_next_transaction == transaction);
if (WARN_ON_ONCE(jh->b_next_transaction != transaction)) {
err = -EINVAL;
abort_journal = 1;
goto drop;
}
/*
* only drop a reference if this transaction modified
@@ -1807,6 +1852,8 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
drop:
__brelse(bh);
spin_unlock(&jh->b_state_lock);
if (abort_journal)
jbd2_journal_abort(journal, err);
if (wait_for_writeback)
wait_on_buffer(bh);
jbd2_journal_put_journal_head(jh);
@@ -2131,7 +2178,8 @@ bool jbd2_journal_try_to_free_buffers(journal_t *journal, struct folio *folio)
struct buffer_head *bh;
bool ret = false;
J_ASSERT(folio_test_locked(folio));
if (WARN_ON_ONCE(!folio_test_locked(folio)))
return false;
head = folio_buffers(folio);
bh = head;
@@ -2646,6 +2694,9 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal;
pgoff_t start_page, end_page;
int err = 0;
int abort_transaction = 0;
if (is_handle_aborted(handle))
return -EROFS;
@@ -2654,15 +2705,21 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
jbd2_debug(4, "Adding inode %llu, tid:%d\n", jinode->i_vfs_inode->i_ino,
transaction->t_tid);
spin_lock(&journal->j_list_lock);
jinode->i_flags |= flags;
start_page = (pgoff_t)(start_byte >> PAGE_SHIFT);
end_page = (pgoff_t)(end_byte >> PAGE_SHIFT) + 1;
if (jinode->i_dirty_end) {
jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
spin_lock(&journal->j_list_lock);
WRITE_ONCE(jinode->i_flags, jinode->i_flags | flags);
if (jinode->i_dirty_start_page != jinode->i_dirty_end_page) {
WRITE_ONCE(jinode->i_dirty_start_page,
min(jinode->i_dirty_start_page, start_page));
WRITE_ONCE(jinode->i_dirty_end_page,
max(jinode->i_dirty_end_page, end_page));
} else {
jinode->i_dirty_start = start_byte;
jinode->i_dirty_end = end_byte;
/* Publish a new non-empty range by making end visible first. */
WRITE_ONCE(jinode->i_dirty_end_page, end_page);
WRITE_ONCE(jinode->i_dirty_start_page, start_page);
}
/* Is inode already attached where we need it? */
@@ -2680,20 +2737,33 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
/* On some different transaction's list - should be
* the committing one */
if (jinode->i_transaction) {
J_ASSERT(jinode->i_next_transaction == NULL);
J_ASSERT(jinode->i_transaction ==
if (WARN_ON_ONCE(jinode->i_next_transaction ||
jinode->i_transaction !=
journal->j_committing_transaction)) {
pr_err("JBD2: %s: assertion failure: i_next_transaction=%p i_transaction=%p j_committing_transaction=%p\n",
journal->j_devname, jinode->i_next_transaction,
jinode->i_transaction,
journal->j_committing_transaction);
err = -EINVAL;
abort_transaction = 1;
goto done;
}
jinode->i_next_transaction = transaction;
goto done;
}
/* Not on any transaction list... */
J_ASSERT(!jinode->i_next_transaction);
if (WARN_ON_ONCE(jinode->i_next_transaction)) {
err = -EINVAL;
abort_transaction = 1;
goto done;
}
jinode->i_transaction = transaction;
list_add(&jinode->i_list, &transaction->t_inode_list);
done:
spin_unlock(&journal->j_list_lock);
return 0;
if (abort_transaction)
jbd2_journal_abort(journal, err);
return err;
}
int jbd2_journal_inode_ranged_write(handle_t *handle,
@@ -2739,7 +2809,7 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
int ret = 0;
/* This is a quick check to avoid locking if not necessary */
if (!jinode->i_transaction)
if (!READ_ONCE(jinode->i_transaction))
goto out;
/* Locks are here just to force reading of recent values, it is
* enough that the transaction was not committing before we started

View File

@@ -899,8 +899,13 @@ bail:
static int ocfs2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
{
return filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
jinode->i_dirty_start, jinode->i_dirty_end);
struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
loff_t range_start, range_end;
if (!jbd2_jinode_get_dirty_range(jinode, &range_start, &range_end))
return 0;
return filemap_fdatawrite_range(mapping, range_start, range_end);
}
int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)

View File

@@ -429,22 +429,46 @@ struct jbd2_inode {
unsigned long i_flags;
/**
* @i_dirty_start:
* @i_dirty_start_page:
*
* Dirty range start in PAGE_SIZE units.
*
* The dirty range is empty if @i_dirty_start_page is greater than or
* equal to @i_dirty_end_page.
*
* Offset in bytes where the dirty range for this inode starts.
* [j_list_lock]
*/
loff_t i_dirty_start;
pgoff_t i_dirty_start_page;
/**
* @i_dirty_end:
* @i_dirty_end_page:
*
* Inclusive offset in bytes where the dirty range for this inode
* ends. [j_list_lock]
* Dirty range end in PAGE_SIZE units (exclusive).
*
* [j_list_lock]
*/
loff_t i_dirty_end;
pgoff_t i_dirty_end_page;
};
/*
* Lockless readers treat start_page >= end_page as an empty range.
* Writers publish a new non-empty range by storing i_dirty_end_page before
* i_dirty_start_page.
*/
static inline bool jbd2_jinode_get_dirty_range(const struct jbd2_inode *jinode,
loff_t *start, loff_t *end)
{
pgoff_t start_page = READ_ONCE(jinode->i_dirty_start_page);
pgoff_t end_page = READ_ONCE(jinode->i_dirty_end_page);
if (start_page >= end_page)
return false;
*start = (loff_t)start_page << PAGE_SHIFT;
*end = ((loff_t)end_page << PAGE_SHIFT) - 1;
return true;
}
struct jbd2_revoke_table_s;
/**