mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
jbd2: store jinode dirty range in PAGE_SIZE units
jbd2_inode fields are updated under journal->j_list_lock, but some paths read them without holding the lock (e.g. fast commit helpers and ordered truncate helpers). READ_ONCE() alone is not sufficient for the dirty range fields when they are stored as loff_t because 32-bit platforms can observe torn loads. Store the dirty range in PAGE_SIZE units as pgoff_t instead. Represent the dirty range end as an exclusive end page. This avoids a special sentinel value and keeps MAX_LFS_FILESIZE on 32-bit representable. Publish a new dirty range by updating end_page before start_page, and treat start_page >= end_page as empty in the accessor for robustness. Use READ_ONCE() on the read side and WRITE_ONCE() on the write side for the dirty range and i_flags to match the existing lockless access pattern. Suggested-by: Jan Kara <jack@suse.cz> Reviewed-by: Jan Kara <jack@suse.cz> Signed-off-by: Li Chen <me@linux.beauty> Link: https://patch.msgid.link/20260306085643.465275-5-me@linux.beauty Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
@@ -180,7 +180,13 @@ static int journal_wait_on_commit_record(journal_t *journal,
|
||||
/* Send all the data buffers related to an inode */
|
||||
int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode)
|
||||
{
|
||||
if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
|
||||
unsigned long flags;
|
||||
|
||||
if (!jinode)
|
||||
return 0;
|
||||
|
||||
flags = READ_ONCE(jinode->i_flags);
|
||||
if (!(flags & JI_WRITE_DATA))
|
||||
return 0;
|
||||
|
||||
trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
|
||||
@@ -191,12 +197,30 @@ EXPORT_SYMBOL(jbd2_submit_inode_data);
|
||||
|
||||
int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
|
||||
{
|
||||
if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) ||
|
||||
!jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping)
|
||||
struct address_space *mapping;
|
||||
struct inode *inode;
|
||||
unsigned long flags;
|
||||
loff_t start_byte, end_byte;
|
||||
|
||||
if (!jinode)
|
||||
return 0;
|
||||
|
||||
flags = READ_ONCE(jinode->i_flags);
|
||||
if (!(flags & JI_WAIT_DATA))
|
||||
return 0;
|
||||
|
||||
inode = jinode->i_vfs_inode;
|
||||
if (!inode)
|
||||
return 0;
|
||||
|
||||
mapping = inode->i_mapping;
|
||||
if (!mapping)
|
||||
return 0;
|
||||
|
||||
if (!jbd2_jinode_get_dirty_range(jinode, &start_byte, &end_byte))
|
||||
return 0;
|
||||
return filemap_fdatawait_range_keep_errors(
|
||||
jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start,
|
||||
jinode->i_dirty_end);
|
||||
mapping, start_byte, end_byte);
|
||||
}
|
||||
EXPORT_SYMBOL(jbd2_wait_inode_data);
|
||||
|
||||
@@ -218,7 +242,8 @@ static int journal_submit_data_buffers(journal_t *journal,
|
||||
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
|
||||
if (!(jinode->i_flags & JI_WRITE_DATA))
|
||||
continue;
|
||||
jinode->i_flags |= JI_COMMIT_RUNNING;
|
||||
WRITE_ONCE(jinode->i_flags,
|
||||
jinode->i_flags | JI_COMMIT_RUNNING);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
/* submit the inode data buffers. */
|
||||
trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
|
||||
@@ -229,7 +254,8 @@ static int journal_submit_data_buffers(journal_t *journal,
|
||||
}
|
||||
spin_lock(&journal->j_list_lock);
|
||||
J_ASSERT(jinode->i_transaction == commit_transaction);
|
||||
jinode->i_flags &= ~JI_COMMIT_RUNNING;
|
||||
WRITE_ONCE(jinode->i_flags,
|
||||
jinode->i_flags & ~JI_COMMIT_RUNNING);
|
||||
smp_mb();
|
||||
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
|
||||
}
|
||||
@@ -240,10 +266,13 @@ static int journal_submit_data_buffers(journal_t *journal,
|
||||
int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
|
||||
{
|
||||
struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
|
||||
loff_t start_byte, end_byte;
|
||||
|
||||
if (!jbd2_jinode_get_dirty_range(jinode, &start_byte, &end_byte))
|
||||
return 0;
|
||||
|
||||
return filemap_fdatawait_range_keep_errors(mapping,
|
||||
jinode->i_dirty_start,
|
||||
jinode->i_dirty_end);
|
||||
start_byte, end_byte);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -262,7 +291,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
|
||||
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
|
||||
if (!(jinode->i_flags & JI_WAIT_DATA))
|
||||
continue;
|
||||
jinode->i_flags |= JI_COMMIT_RUNNING;
|
||||
WRITE_ONCE(jinode->i_flags, jinode->i_flags | JI_COMMIT_RUNNING);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
/* wait for the inode data buffers writeout. */
|
||||
if (journal->j_finish_inode_data_buffers) {
|
||||
@@ -272,7 +301,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
|
||||
}
|
||||
cond_resched();
|
||||
spin_lock(&journal->j_list_lock);
|
||||
jinode->i_flags &= ~JI_COMMIT_RUNNING;
|
||||
WRITE_ONCE(jinode->i_flags, jinode->i_flags & ~JI_COMMIT_RUNNING);
|
||||
smp_mb();
|
||||
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
|
||||
}
|
||||
@@ -288,8 +317,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
|
||||
&jinode->i_transaction->t_inode_list);
|
||||
} else {
|
||||
jinode->i_transaction = NULL;
|
||||
jinode->i_dirty_start = 0;
|
||||
jinode->i_dirty_end = 0;
|
||||
WRITE_ONCE(jinode->i_dirty_start_page, 0);
|
||||
WRITE_ONCE(jinode->i_dirty_end_page, 0);
|
||||
}
|
||||
}
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
|
||||
@@ -3018,8 +3018,8 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
|
||||
jinode->i_next_transaction = NULL;
|
||||
jinode->i_vfs_inode = inode;
|
||||
jinode->i_flags = 0;
|
||||
jinode->i_dirty_start = 0;
|
||||
jinode->i_dirty_end = 0;
|
||||
jinode->i_dirty_start_page = 0;
|
||||
jinode->i_dirty_end_page = 0;
|
||||
INIT_LIST_HEAD(&jinode->i_list);
|
||||
}
|
||||
|
||||
@@ -3176,4 +3176,3 @@ MODULE_DESCRIPTION("Generic filesystem journal-writing module");
|
||||
MODULE_LICENSE("GPL");
|
||||
module_init(journal_init);
|
||||
module_exit(journal_exit);
|
||||
|
||||
|
||||
@@ -2694,6 +2694,7 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
|
||||
{
|
||||
transaction_t *transaction = handle->h_transaction;
|
||||
journal_t *journal;
|
||||
pgoff_t start_page, end_page;
|
||||
int err = 0;
|
||||
int abort_transaction = 0;
|
||||
|
||||
@@ -2704,15 +2705,21 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
|
||||
jbd2_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
|
||||
transaction->t_tid);
|
||||
|
||||
spin_lock(&journal->j_list_lock);
|
||||
jinode->i_flags |= flags;
|
||||
start_page = (pgoff_t)(start_byte >> PAGE_SHIFT);
|
||||
end_page = (pgoff_t)(end_byte >> PAGE_SHIFT) + 1;
|
||||
|
||||
if (jinode->i_dirty_end) {
|
||||
jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
|
||||
jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
|
||||
spin_lock(&journal->j_list_lock);
|
||||
WRITE_ONCE(jinode->i_flags, jinode->i_flags | flags);
|
||||
|
||||
if (jinode->i_dirty_start_page != jinode->i_dirty_end_page) {
|
||||
WRITE_ONCE(jinode->i_dirty_start_page,
|
||||
min(jinode->i_dirty_start_page, start_page));
|
||||
WRITE_ONCE(jinode->i_dirty_end_page,
|
||||
max(jinode->i_dirty_end_page, end_page));
|
||||
} else {
|
||||
jinode->i_dirty_start = start_byte;
|
||||
jinode->i_dirty_end = end_byte;
|
||||
/* Publish a new non-empty range by making end visible first. */
|
||||
WRITE_ONCE(jinode->i_dirty_end_page, end_page);
|
||||
WRITE_ONCE(jinode->i_dirty_start_page, start_page);
|
||||
}
|
||||
|
||||
/* Is inode already attached where we need it? */
|
||||
@@ -2802,7 +2809,7 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
|
||||
int ret = 0;
|
||||
|
||||
/* This is a quick check to avoid locking if not necessary */
|
||||
if (!jinode->i_transaction)
|
||||
if (!READ_ONCE(jinode->i_transaction))
|
||||
goto out;
|
||||
/* Locks are here just to force reading of recent values, it is
|
||||
* enough that the transaction was not committing before we started
|
||||
|
||||
@@ -429,33 +429,43 @@ struct jbd2_inode {
|
||||
unsigned long i_flags;
|
||||
|
||||
/**
|
||||
* @i_dirty_start:
|
||||
* @i_dirty_start_page:
|
||||
*
|
||||
* Dirty range start in PAGE_SIZE units.
|
||||
*
|
||||
* The dirty range is empty if @i_dirty_start_page is greater than or
|
||||
* equal to @i_dirty_end_page.
|
||||
*
|
||||
* Offset in bytes where the dirty range for this inode starts.
|
||||
* [j_list_lock]
|
||||
*/
|
||||
loff_t i_dirty_start;
|
||||
pgoff_t i_dirty_start_page;
|
||||
|
||||
/**
|
||||
* @i_dirty_end:
|
||||
* @i_dirty_end_page:
|
||||
*
|
||||
* Inclusive offset in bytes where the dirty range for this inode
|
||||
* ends. [j_list_lock]
|
||||
* Dirty range end in PAGE_SIZE units (exclusive).
|
||||
*
|
||||
* [j_list_lock]
|
||||
*/
|
||||
loff_t i_dirty_end;
|
||||
pgoff_t i_dirty_end_page;
|
||||
};
|
||||
|
||||
/*
|
||||
* Lockless readers treat start_page >= end_page as an empty range.
|
||||
* Writers publish a new non-empty range by storing i_dirty_end_page before
|
||||
* i_dirty_start_page.
|
||||
*/
|
||||
static inline bool jbd2_jinode_get_dirty_range(const struct jbd2_inode *jinode,
|
||||
loff_t *start, loff_t *end)
|
||||
{
|
||||
loff_t start_byte = jinode->i_dirty_start;
|
||||
loff_t end_byte = jinode->i_dirty_end;
|
||||
pgoff_t start_page = READ_ONCE(jinode->i_dirty_start_page);
|
||||
pgoff_t end_page = READ_ONCE(jinode->i_dirty_end_page);
|
||||
|
||||
if (!end_byte)
|
||||
if (start_page >= end_page)
|
||||
return false;
|
||||
|
||||
*start = start_byte;
|
||||
*end = end_byte;
|
||||
*start = (loff_t)start_page << PAGE_SHIFT;
|
||||
*end = ((loff_t)end_page << PAGE_SHIFT) - 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user