Merge tag 'vfs-7.1-rc1.bh.metadata' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs buffer_head updates from Christian Brauner:
 "This cleans up the mess that has accumulated over the years in
  metadata buffer_head tracking for inodes.

  It moves the tracking into dedicated structure in filesystem-private
  part of the inode (so that we don't use private_list, private_data,
  and private_lock in struct address_space), and also moves couple other
  users of private_data and private_list so these are removed from
  struct address_space saving 3 longs in struct inode for 99% of inodes"

* tag 'vfs-7.1-rc1.bh.metadata' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (42 commits)
  fs: Drop i_private_list from address_space
  fs: Drop mapping_metadata_bhs from address space
  ext4: Track metadata bhs in fs-private inode part
  minix: Track metadata bhs in fs-private inode part
  udf: Track metadata bhs in fs-private inode part
  fat: Track metadata bhs in fs-private inode part
  bfs: Track metadata bhs in fs-private inode part
  affs: Track metadata bhs in fs-private inode part
  ext2: Track metadata bhs in fs-private inode part
  fs: Provide functions for handling mapping_metadata_bhs directly
  fs: Switch inode_has_buffers() to take mapping_metadata_bhs
  fs: Make bhs point to mapping_metadata_bhs
  fs: Move metadata bhs tracking to a separate struct
  fs: Fold fsync_buffers_list() into sync_mapping_buffers()
  fs: Drop osync_buffers_list()
  kvm: Use private inode list instead of i_private_list
  fs: Remove i_private_data
  aio: Stop using i_private_data and i_private_lock
  hugetlbfs: Stop using i_private_data
  fs: Stop using i_private_data for metadata bh tracking
  ...
This commit is contained in:
Linus Torvalds
2026-04-13 12:46:42 -07:00
65 changed files with 478 additions and 496 deletions

View File

@@ -417,19 +417,11 @@ static void init_once(void *data)
inode_init_once(&ei->vfs_inode);
}
static void bdev_evict_inode(struct inode *inode)
{
truncate_inode_pages_final(&inode->i_data);
invalidate_inode_buffers(inode); /* is it needed here? */
clear_inode(inode);
}
static const struct super_operations bdev_sops = {
.statfs = simple_statfs,
.alloc_inode = bdev_alloc_inode,
.free_inode = bdev_free_inode,
.drop_inode = inode_just_drop,
.evict_inode = bdev_evict_inode,
};
static int bd_init_fs_context(struct fs_context *fc)

View File

@@ -389,7 +389,7 @@ const struct file_operations adfs_dir_operations = {
.read = generic_read_dir,
.llseek = generic_file_llseek,
.iterate_shared = adfs_iterate,
.fsync = generic_file_fsync,
.fsync = simple_fsync,
};
static int

View File

@@ -26,7 +26,7 @@ const struct file_operations adfs_file_operations = {
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
.mmap_prepare = generic_file_mmap_prepare,
.fsync = generic_file_fsync,
.fsync = simple_fsync,
.write_iter = generic_file_write_iter,
.splice_read = filemap_splice_read,
};

View File

@@ -44,6 +44,7 @@ struct affs_inode_info {
struct mutex i_link_lock; /* Protects internal inode access. */
struct mutex i_ext_lock; /* Protects internal inode access. */
#define i_hash_lock i_ext_lock
struct mapping_metadata_bhs i_metadata_bhs;
u32 i_blkcnt; /* block count */
u32 i_extcnt; /* extended block count */
u32 *i_lc; /* linear cache of extended blocks */
@@ -151,6 +152,7 @@ extern bool affs_nofilenametruncate(const struct dentry *dentry);
extern int affs_check_name(const unsigned char *name, int len,
bool notruncate);
extern int affs_copy_name(unsigned char *bstr, struct dentry *dentry);
struct mapping_metadata_bhs *affs_get_metadata_bhs(struct inode *inode);
/* bitmap. c */

View File

@@ -57,7 +57,7 @@ affs_insert_hash(struct inode *dir, struct buffer_head *bh)
AFFS_TAIL(sb, dir_bh)->hash_chain = cpu_to_be32(ino);
affs_adjust_checksum(dir_bh, ino);
mark_buffer_dirty_inode(dir_bh, dir);
mmb_mark_buffer_dirty(dir_bh, &AFFS_I(dir)->i_metadata_bhs);
affs_brelse(dir_bh);
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
@@ -100,7 +100,7 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh)
else
AFFS_TAIL(sb, bh)->hash_chain = ino;
affs_adjust_checksum(bh, be32_to_cpu(ino) - hash_ino);
mark_buffer_dirty_inode(bh, dir);
mmb_mark_buffer_dirty(bh, &AFFS_I(dir)->i_metadata_bhs);
AFFS_TAIL(sb, rem_bh)->parent = 0;
retval = 0;
break;
@@ -180,7 +180,7 @@ affs_remove_link(struct dentry *dentry)
affs_unlock_dir(dir);
goto done;
}
mark_buffer_dirty_inode(link_bh, inode);
mmb_mark_buffer_dirty(link_bh, &AFFS_I(inode)->i_metadata_bhs);
memcpy(AFFS_TAIL(sb, bh)->name, AFFS_TAIL(sb, link_bh)->name, 32);
retval = affs_insert_hash(dir, bh);
@@ -188,7 +188,7 @@ affs_remove_link(struct dentry *dentry)
affs_unlock_dir(dir);
goto done;
}
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs);
affs_unlock_dir(dir);
iput(dir);
@@ -203,7 +203,7 @@ affs_remove_link(struct dentry *dentry)
__be32 ino2 = AFFS_TAIL(sb, link_bh)->link_chain;
AFFS_TAIL(sb, bh)->link_chain = ino2;
affs_adjust_checksum(bh, be32_to_cpu(ino2) - link_ino);
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs);
retval = 0;
/* Fix the link count, if bh is a normal header block without links */
switch (be32_to_cpu(AFFS_TAIL(sb, bh)->stype)) {
@@ -306,7 +306,7 @@ affs_remove_header(struct dentry *dentry)
retval = affs_remove_hash(dir, bh);
if (retval)
goto done_unlock;
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs);
affs_unlock_dir(dir);

View File

@@ -140,14 +140,14 @@ affs_alloc_extblock(struct inode *inode, struct buffer_head *bh, u32 ext)
AFFS_TAIL(sb, new_bh)->parent = cpu_to_be32(inode->i_ino);
affs_fix_checksum(sb, new_bh);
mark_buffer_dirty_inode(new_bh, inode);
mmb_mark_buffer_dirty(new_bh, &AFFS_I(inode)->i_metadata_bhs);
tmp = be32_to_cpu(AFFS_TAIL(sb, bh)->extension);
if (tmp)
affs_warning(sb, "alloc_ext", "previous extension set (%x)", tmp);
AFFS_TAIL(sb, bh)->extension = cpu_to_be32(blocknr);
affs_adjust_checksum(bh, blocknr - tmp);
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs);
AFFS_I(inode)->i_extcnt++;
mark_inode_dirty(inode);
@@ -581,7 +581,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
memset(AFFS_DATA(bh) + boff, 0, tmp);
be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp);
affs_fix_checksum(sb, bh);
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs);
size += tmp;
bidx++;
} else if (bidx) {
@@ -603,7 +603,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
AFFS_DATA_HEAD(bh)->size = cpu_to_be32(tmp);
affs_fix_checksum(sb, bh);
bh->b_state &= ~(1UL << BH_New);
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs);
if (prev_bh) {
u32 tmp_next = be32_to_cpu(AFFS_DATA_HEAD(prev_bh)->next);
@@ -613,7 +613,8 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
bidx, tmp_next);
AFFS_DATA_HEAD(prev_bh)->next = cpu_to_be32(bh->b_blocknr);
affs_adjust_checksum(prev_bh, bh->b_blocknr - tmp_next);
mark_buffer_dirty_inode(prev_bh, inode);
mmb_mark_buffer_dirty(prev_bh,
&AFFS_I(inode)->i_metadata_bhs);
affs_brelse(prev_bh);
}
size += bsize;
@@ -732,7 +733,7 @@ static int affs_write_end_ofs(const struct kiocb *iocb,
AFFS_DATA_HEAD(bh)->size = cpu_to_be32(
max(boff + tmp, be32_to_cpu(AFFS_DATA_HEAD(bh)->size)));
affs_fix_checksum(sb, bh);
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs);
written += tmp;
from += tmp;
bidx++;
@@ -765,12 +766,13 @@ static int affs_write_end_ofs(const struct kiocb *iocb,
bidx, tmp_next);
AFFS_DATA_HEAD(prev_bh)->next = cpu_to_be32(bh->b_blocknr);
affs_adjust_checksum(prev_bh, bh->b_blocknr - tmp_next);
mark_buffer_dirty_inode(prev_bh, inode);
mmb_mark_buffer_dirty(prev_bh,
&AFFS_I(inode)->i_metadata_bhs);
}
}
affs_brelse(prev_bh);
affs_fix_checksum(sb, bh);
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs);
written += bsize;
from += bsize;
bidx++;
@@ -799,13 +801,14 @@ static int affs_write_end_ofs(const struct kiocb *iocb,
bidx, tmp_next);
AFFS_DATA_HEAD(prev_bh)->next = cpu_to_be32(bh->b_blocknr);
affs_adjust_checksum(prev_bh, bh->b_blocknr - tmp_next);
mark_buffer_dirty_inode(prev_bh, inode);
mmb_mark_buffer_dirty(prev_bh,
&AFFS_I(inode)->i_metadata_bhs);
}
} else if (be32_to_cpu(AFFS_DATA_HEAD(bh)->size) < tmp)
AFFS_DATA_HEAD(bh)->size = cpu_to_be32(tmp);
affs_brelse(prev_bh);
affs_fix_checksum(sb, bh);
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs);
written += tmp;
from += tmp;
bidx++;
@@ -942,7 +945,7 @@ affs_truncate(struct inode *inode)
}
AFFS_TAIL(sb, ext_bh)->extension = 0;
affs_fix_checksum(sb, ext_bh);
mark_buffer_dirty_inode(ext_bh, inode);
mmb_mark_buffer_dirty(ext_bh, &AFFS_I(inode)->i_metadata_bhs);
affs_brelse(ext_bh);
if (inode->i_size) {

View File

@@ -206,7 +206,7 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
}
}
affs_fix_checksum(sb, bh);
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs);
affs_brelse(bh);
affs_free_prealloc(inode);
return 0;
@@ -267,9 +267,11 @@ affs_evict_inode(struct inode *inode)
if (!inode->i_nlink) {
inode->i_size = 0;
affs_truncate(inode);
} else {
mmb_sync(&AFFS_I(inode)->i_metadata_bhs);
}
invalidate_inode_buffers(inode);
mmb_invalidate(&AFFS_I(inode)->i_metadata_bhs);
clear_inode(inode);
affs_free_prealloc(inode);
cache_page = (unsigned long)AFFS_I(inode)->i_lc;
@@ -304,7 +306,7 @@ affs_new_inode(struct inode *dir)
bh = affs_getzeroblk(sb, block);
if (!bh)
goto err_bh;
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs);
affs_brelse(bh);
inode->i_uid = current_fsuid();
@@ -392,17 +394,17 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
AFFS_TAIL(sb, bh)->link_chain = chain;
AFFS_TAIL(sb, inode_bh)->link_chain = cpu_to_be32(block);
affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain));
mark_buffer_dirty_inode(inode_bh, inode);
mmb_mark_buffer_dirty(inode_bh, &AFFS_I(inode)->i_metadata_bhs);
set_nlink(inode, 2);
ihold(inode);
}
affs_fix_checksum(sb, bh);
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs);
dentry->d_fsdata = (void *)(long)bh->b_blocknr;
affs_lock_dir(dir);
retval = affs_insert_hash(dir, bh);
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs);
affs_unlock_dir(dir);
affs_unlock_link(inode);

View File

@@ -373,7 +373,7 @@ affs_symlink(struct mnt_idmap *idmap, struct inode *dir,
}
*p = 0;
inode->i_size = i + 1;
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs);
affs_brelse(bh);
mark_inode_dirty(inode);
@@ -443,7 +443,8 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry,
/* TODO: move it back to old_dir, if error? */
done:
mark_buffer_dirty_inode(bh, retval ? old_dir : new_dir);
mmb_mark_buffer_dirty(bh,
&AFFS_I(retval ? old_dir : new_dir)->i_metadata_bhs);
affs_brelse(bh);
return retval;
}
@@ -496,8 +497,8 @@ affs_xrename(struct inode *old_dir, struct dentry *old_dentry,
retval = affs_insert_hash(old_dir, bh_new);
affs_unlock_dir(old_dir);
done:
mark_buffer_dirty_inode(bh_old, new_dir);
mark_buffer_dirty_inode(bh_new, old_dir);
mmb_mark_buffer_dirty(bh_old, &AFFS_I(new_dir)->i_metadata_bhs);
mmb_mark_buffer_dirty(bh_new, &AFFS_I(old_dir)->i_metadata_bhs);
affs_brelse(bh_old);
affs_brelse(bh_new);
return retval;

View File

@@ -108,6 +108,7 @@ static struct inode *affs_alloc_inode(struct super_block *sb)
i->i_lc = NULL;
i->i_ext_bh = NULL;
i->i_pa_cnt = 0;
mmb_init(&i->i_metadata_bhs, &i->vfs_inode.i_data);
return &i->vfs_inode;
}

View File

@@ -218,6 +218,17 @@ struct aio_kiocb {
struct eventfd_ctx *ki_eventfd;
};
struct aio_inode_info {
struct inode vfs_inode;
spinlock_t migrate_lock;
struct kioctx *ctx;
};
static inline struct aio_inode_info *AIO_I(struct inode *inode)
{
return container_of(inode, struct aio_inode_info, vfs_inode);
}
/*------ sysctl variables----*/
static DEFINE_SPINLOCK(aio_nr_lock);
static unsigned long aio_nr; /* current system wide number of aio requests */
@@ -251,6 +262,7 @@ static void __init aio_sysctl_init(void)
static struct kmem_cache *kiocb_cachep;
static struct kmem_cache *kioctx_cachep;
static struct kmem_cache *aio_inode_cachep;
static struct vfsmount *aio_mnt;
@@ -261,11 +273,12 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
{
struct file *file;
struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
if (IS_ERR(inode))
return ERR_CAST(inode);
inode->i_mapping->a_ops = &aio_ctx_aops;
inode->i_mapping->i_private_data = ctx;
AIO_I(inode)->ctx = ctx;
inode->i_size = PAGE_SIZE * nr_pages;
file = alloc_file_pseudo(inode, aio_mnt, "[aio]",
@@ -275,14 +288,49 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
return file;
}
static struct inode *aio_alloc_inode(struct super_block *sb)
{
struct aio_inode_info *ai;
ai = alloc_inode_sb(sb, aio_inode_cachep, GFP_KERNEL);
if (!ai)
return NULL;
ai->ctx = NULL;
return &ai->vfs_inode;
}
static void aio_free_inode(struct inode *inode)
{
kmem_cache_free(aio_inode_cachep, AIO_I(inode));
}
static const struct super_operations aio_super_operations = {
.alloc_inode = aio_alloc_inode,
.free_inode = aio_free_inode,
.statfs = simple_statfs,
};
static int aio_init_fs_context(struct fs_context *fc)
{
if (!init_pseudo(fc, AIO_RING_MAGIC))
struct pseudo_fs_context *pfc;
pfc = init_pseudo(fc, AIO_RING_MAGIC);
if (!pfc)
return -ENOMEM;
fc->s_iflags |= SB_I_NOEXEC;
pfc->ops = &aio_super_operations;
return 0;
}
static void init_once(void *obj)
{
struct aio_inode_info *ai = obj;
inode_init_once(&ai->vfs_inode);
spin_lock_init(&ai->migrate_lock);
}
/* aio_setup
* Creates the slab caches used by the aio routines, panic on
* failure as this is done early during the boot sequence.
@@ -294,6 +342,11 @@ static int __init aio_setup(void)
.init_fs_context = aio_init_fs_context,
.kill_sb = kill_anon_super,
};
aio_inode_cachep = kmem_cache_create("aio_inode_cache",
sizeof(struct aio_inode_info), 0,
(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_ACCOUNT),
init_once);
aio_mnt = kern_mount(&aio_fs);
if (IS_ERR(aio_mnt))
panic("Failed to create aio fs mount.");
@@ -308,17 +361,17 @@ __initcall(aio_setup);
static void put_aio_ring_file(struct kioctx *ctx)
{
struct file *aio_ring_file = ctx->aio_ring_file;
struct address_space *i_mapping;
if (aio_ring_file) {
truncate_setsize(file_inode(aio_ring_file), 0);
struct inode *inode = file_inode(aio_ring_file);
truncate_setsize(inode, 0);
/* Prevent further access to the kioctx from migratepages */
i_mapping = aio_ring_file->f_mapping;
spin_lock(&i_mapping->i_private_lock);
i_mapping->i_private_data = NULL;
spin_lock(&AIO_I(inode)->migrate_lock);
AIO_I(inode)->ctx = NULL;
ctx->aio_ring_file = NULL;
spin_unlock(&i_mapping->i_private_lock);
spin_unlock(&AIO_I(inode)->migrate_lock);
fput(aio_ring_file);
}
@@ -408,13 +461,14 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
struct folio *src, enum migrate_mode mode)
{
struct kioctx *ctx;
struct aio_inode_info *ai = AIO_I(mapping->host);
unsigned long flags;
pgoff_t idx;
int rc = 0;
/* mapping->i_private_lock here protects against the kioctx teardown. */
spin_lock(&mapping->i_private_lock);
ctx = mapping->i_private_data;
/* ai->migrate_lock here protects against the kioctx teardown. */
spin_lock(&ai->migrate_lock);
ctx = ai->ctx;
if (!ctx) {
rc = -EINVAL;
goto out;
@@ -467,7 +521,7 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
out_unlock:
mutex_unlock(&ctx->ring_lock);
out:
spin_unlock(&mapping->i_private_lock);
spin_unlock(&ai->migrate_lock);
return rc;
}
#else

View File

@@ -35,6 +35,7 @@ struct bfs_inode_info {
unsigned long i_dsk_ino; /* inode number from the disk, can be 0 */
unsigned long i_sblock;
unsigned long i_eblock;
struct mapping_metadata_bhs i_metadata_bhs;
struct inode vfs_inode;
};

View File

@@ -68,10 +68,17 @@ static int bfs_readdir(struct file *f, struct dir_context *ctx)
return 0;
}
static int bfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
return mmb_fsync(file,
&BFS_I(file->f_mapping->host)->i_metadata_bhs,
start, end, datasync);
}
const struct file_operations bfs_dir_operations = {
.read = generic_read_dir,
.iterate_shared = bfs_readdir,
.fsync = generic_file_fsync,
.fsync = bfs_fsync,
.llseek = generic_file_llseek,
};
@@ -186,7 +193,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
set_nlink(inode, 1);
}
de->ino = 0;
mark_buffer_dirty_inode(bh, dir);
mmb_mark_buffer_dirty(bh, &BFS_I(dir)->i_metadata_bhs);
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
mark_inode_dirty(dir);
inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
@@ -246,7 +253,7 @@ static int bfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
inode_set_ctime_current(new_inode);
inode_dec_link_count(new_inode);
}
mark_buffer_dirty_inode(old_bh, old_dir);
mmb_mark_buffer_dirty(old_bh, &BFS_I(old_dir)->i_metadata_bhs);
error = 0;
end_rename:
@@ -296,7 +303,8 @@ static int bfs_add_entry(struct inode *dir, const struct qstr *child, int ino)
for (i = 0; i < BFS_NAMELEN; i++)
de->name[i] =
(i < namelen) ? name[i] : 0;
mark_buffer_dirty_inode(bh, dir);
mmb_mark_buffer_dirty(bh,
&BFS_I(dir)->i_metadata_bhs);
brelse(bh);
return 0;
}

View File

@@ -187,7 +187,9 @@ static void bfs_evict_inode(struct inode *inode)
dprintf("ino=%08lx\n", ino);
truncate_inode_pages_final(&inode->i_data);
invalidate_inode_buffers(inode);
if (inode->i_nlink)
mmb_sync(&BFS_I(inode)->i_metadata_bhs);
mmb_invalidate(&BFS_I(inode)->i_metadata_bhs);
clear_inode(inode);
if (inode->i_nlink)
@@ -257,6 +259,8 @@ static struct inode *bfs_alloc_inode(struct super_block *sb)
bi = alloc_inode_sb(sb, bfs_inode_cachep, GFP_KERNEL);
if (!bi)
return NULL;
mmb_init(&bi->i_metadata_bhs, &bi->vfs_inode.i_data);
return &bi->vfs_inode;
}

View File

@@ -54,7 +54,6 @@
#include "internal.h"
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
enum rw_hint hint, struct writeback_control *wbc);
@@ -468,146 +467,187 @@ EXPORT_SYMBOL(mark_buffer_async_write);
* a successful fsync(). For example, ext2 indirect blocks need to be
* written back and waited upon before fsync() returns.
*
* The functions mark_buffer_dirty_inode(), fsync_inode_buffers(),
* inode_has_buffers() and invalidate_inode_buffers() are provided for the
* management of a list of dependent buffers at ->i_mapping->i_private_list.
* The functions mmb_mark_buffer_dirty(), mmb_sync(), mmb_has_buffers()
* and mmb_invalidate() are provided for the management of a list of dependent
* buffers in mapping_metadata_bhs struct.
*
* Locking is a little subtle: try_to_free_buffers() will remove buffers
* from their controlling inode's queue when they are being freed. But
* try_to_free_buffers() will be operating against the *blockdev* mapping
* at the time, not against the S_ISREG file which depends on those buffers.
* So the locking for i_private_list is via the i_private_lock in the address_space
* which backs the buffers. Which is different from the address_space
* against which the buffers are listed. So for a particular address_space,
* mapping->i_private_lock does *not* protect mapping->i_private_list! In fact,
* mapping->i_private_list will always be protected by the backing blockdev's
* ->i_private_lock.
*
* Which introduces a requirement: all buffers on an address_space's
* ->i_private_list must be from the same address_space: the blockdev's.
*
* address_spaces which do not place buffers at ->i_private_list via these
* utility functions are free to use i_private_lock and i_private_list for
* whatever they want. The only requirement is that list_empty(i_private_list)
* be true at clear_inode() time.
*
* FIXME: clear_inode should not call invalidate_inode_buffers(). The
* filesystems should do that. invalidate_inode_buffers() should just go
* BUG_ON(!list_empty).
*
* FIXME: mark_buffer_dirty_inode() is a data-plane operation. It should
* take an address_space, not an inode. And it should be called
* mark_buffer_dirty_fsync() to clearly define why those buffers are being
* queued up.
*
* FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the
* list if it is already on a list. Because if the buffer is on a list,
* it *must* already be on the right one. If not, the filesystem is being
* silly. This will save a ton of locking. But first we have to ensure
* that buffers are taken *off* the old inode's list when they are freed
* (presumably in truncate). That requires careful auditing of all
* filesystems (do it inside bforget()). It could also be done by bringing
* b_inode back.
* The locking is a little subtle: The list of buffer heads is protected by
* the lock in mapping_metadata_bhs so functions coming from bdev mapping
* (such as try_to_free_buffers()) need to safely get to mapping_metadata_bhs
* using RCU, grab the lock, verify we didn't race with somebody detaching the
* bh / moving it to different inode and only then proceeding.
*/
/*
* The buffer's backing address_space's i_private_lock must be held
*/
static void __remove_assoc_queue(struct buffer_head *bh)
void mmb_init(struct mapping_metadata_bhs *mmb, struct address_space *mapping)
{
spin_lock_init(&mmb->lock);
INIT_LIST_HEAD(&mmb->list);
mmb->mapping = mapping;
}
EXPORT_SYMBOL(mmb_init);
static void __remove_assoc_queue(struct mapping_metadata_bhs *mmb,
struct buffer_head *bh)
{
lockdep_assert_held(&mmb->lock);
list_del_init(&bh->b_assoc_buffers);
WARN_ON(!bh->b_assoc_map);
bh->b_assoc_map = NULL;
WARN_ON(!bh->b_mmb);
bh->b_mmb = NULL;
}
int inode_has_buffers(struct inode *inode)
static void remove_assoc_queue(struct buffer_head *bh)
{
return !list_empty(&inode->i_data.i_private_list);
struct mapping_metadata_bhs *mmb;
/*
* The locking dance is ugly here. We need to acquire the lock
* protecting the metadata bh list while possibly racing with bh
* being removed from the list or moved to a different one. We
* use RCU to pin mapping_metadata_bhs in memory to
* opportunistically acquire the lock and then recheck the bh
* didn't move under us.
*/
while (bh->b_mmb) {
rcu_read_lock();
mmb = READ_ONCE(bh->b_mmb);
if (mmb) {
spin_lock(&mmb->lock);
if (bh->b_mmb == mmb)
__remove_assoc_queue(mmb, bh);
spin_unlock(&mmb->lock);
}
rcu_read_unlock();
}
}
/*
* osync is designed to support O_SYNC io. It waits synchronously for
* all already-submitted IO to complete, but does not queue any new
* writes to the disk.
bool mmb_has_buffers(struct mapping_metadata_bhs *mmb)
{
return !list_empty(&mmb->list);
}
EXPORT_SYMBOL_GPL(mmb_has_buffers);
/**
* mmb_sync - write out & wait upon all buffers in a list
* @mmb: the list of buffers to write
*
* To do O_SYNC writes, just queue the buffer writes with write_dirty_buffer
* as you dirty the buffers, and then use osync_inode_buffers to wait for
* completion. Any other dirty buffers which are not yet queued for
* write will not be flushed to disk by the osync.
* Starts I/O against the buffers in the given list and waits upon
* that I/O. Basically, this is a convenience function for fsync(). @mmb is
* for a file or directory which needs those buffers to be written for a
* successful fsync().
*
* We have conflicting pressures: we want to make sure that all
* initially dirty buffers get waited on, but that any subsequently
* dirtied buffers don't. After all, we don't want fsync to last
* forever if somebody is actively writing to the file.
*
* Do this in two main stages: first we copy dirty buffers to a
* temporary inode list, queueing the writes as we go. Then we clean
* up, waiting for those writes to complete. mark_buffer_dirty_inode()
* doesn't touch b_assoc_buffers list if b_mmb is not NULL so we are sure the
* buffer stays on our list until IO completes (at which point it can be
* reaped).
*/
static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
int mmb_sync(struct mapping_metadata_bhs *mmb)
{
struct buffer_head *bh;
struct list_head *p;
int err = 0;
struct blk_plug plug;
LIST_HEAD(tmp);
spin_lock(lock);
repeat:
list_for_each_prev(p, list) {
bh = BH_ENTRY(p);
if (buffer_locked(bh)) {
get_bh(bh);
spin_unlock(lock);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
err = -EIO;
brelse(bh);
spin_lock(lock);
goto repeat;
}
}
spin_unlock(lock);
return err;
}
/**
* sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
* @mapping: the mapping which wants those buffers written
*
* Starts I/O against the buffers at mapping->i_private_list, and waits upon
* that I/O.
*
* Basically, this is a convenience function for fsync().
* @mapping is a file or directory which needs those buffers to be written for
* a successful fsync().
*/
int sync_mapping_buffers(struct address_space *mapping)
{
struct address_space *buffer_mapping = mapping->i_private_data;
if (buffer_mapping == NULL || list_empty(&mapping->i_private_list))
if (!mmb_has_buffers(mmb))
return 0;
return fsync_buffers_list(&buffer_mapping->i_private_lock,
&mapping->i_private_list);
blk_start_plug(&plug);
spin_lock(&mmb->lock);
while (!list_empty(&mmb->list)) {
bh = BH_ENTRY(mmb->list.next);
WARN_ON_ONCE(bh->b_mmb != mmb);
__remove_assoc_queue(mmb, bh);
/* Avoid race with mark_buffer_dirty_inode() which does
* a lockless check and we rely on seeing the dirty bit */
smp_mb();
if (buffer_dirty(bh) || buffer_locked(bh)) {
list_add(&bh->b_assoc_buffers, &tmp);
bh->b_mmb = mmb;
if (buffer_dirty(bh)) {
get_bh(bh);
spin_unlock(&mmb->lock);
/*
* Ensure any pending I/O completes so that
* write_dirty_buffer() actually writes the
* current contents - it is a noop if I/O is
* still in flight on potentially older
* contents.
*/
write_dirty_buffer(bh, REQ_SYNC);
/*
* Kick off IO for the previous mapping. Note
* that we will not run the very last mapping,
* wait_on_buffer() will do that for us
* through sync_buffer().
*/
brelse(bh);
spin_lock(&mmb->lock);
}
}
}
spin_unlock(&mmb->lock);
blk_finish_plug(&plug);
spin_lock(&mmb->lock);
while (!list_empty(&tmp)) {
bh = BH_ENTRY(tmp.prev);
get_bh(bh);
__remove_assoc_queue(mmb, bh);
/* Avoid race with mark_buffer_dirty_inode() which does
* a lockless check and we rely on seeing the dirty bit */
smp_mb();
if (buffer_dirty(bh)) {
list_add(&bh->b_assoc_buffers, &mmb->list);
bh->b_mmb = mmb;
}
spin_unlock(&mmb->lock);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
err = -EIO;
brelse(bh);
spin_lock(&mmb->lock);
}
spin_unlock(&mmb->lock);
return err;
}
EXPORT_SYMBOL(sync_mapping_buffers);
EXPORT_SYMBOL(mmb_sync);
/**
* generic_buffers_fsync_noflush - generic buffer fsync implementation
* for simple filesystems with no inode lock
* mmb_fsync_noflush - fsync implementation for simple filesystems with
* metadata buffers list
*
* @file: file to synchronize
* @mmb: list of metadata bhs to flush
* @start: start offset in bytes
* @end: end offset in bytes (inclusive)
* @datasync: only synchronize essential metadata if true
*
* This is a generic implementation of the fsync method for simple
* filesystems which track all non-inode metadata in the buffers list
* hanging off the address_space structure.
* This is an implementation of the fsync method for simple filesystems which
* track all non-inode metadata in the buffers list hanging off the @mmb
* structure.
*/
int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end,
bool datasync)
int mmb_fsync_noflush(struct file *file, struct mapping_metadata_bhs *mmb,
loff_t start, loff_t end, bool datasync)
{
struct inode *inode = file->f_mapping->host;
int err;
int ret;
int ret = 0;
err = file_write_and_wait_range(file, start, end);
if (err)
return err;
ret = sync_mapping_buffers(inode->i_mapping);
if (mmb)
ret = mmb_sync(mmb);
if (!(inode_state_read_once(inode) & I_DIRTY_ALL))
goto out;
if (datasync && !(inode_state_read_once(inode) & I_DIRTY_DATASYNC))
@@ -624,34 +664,35 @@ out:
ret = err;
return ret;
}
EXPORT_SYMBOL(generic_buffers_fsync_noflush);
EXPORT_SYMBOL(mmb_fsync_noflush);
/**
* generic_buffers_fsync - generic buffer fsync implementation
* for simple filesystems with no inode lock
* mmb_fsync - fsync implementation for simple filesystems with metadata
* buffers list
*
* @file: file to synchronize
* @mmb: list of metadata bhs to flush
* @start: start offset in bytes
* @end: end offset in bytes (inclusive)
* @datasync: only synchronize essential metadata if true
*
* This is a generic implementation of the fsync method for simple
* filesystems which track all non-inode metadata in the buffers list
* hanging off the address_space structure. This also makes sure that
* a device cache flush operation is called at the end.
* This is an implementation of the fsync method for simple filesystems which
* track all non-inode metadata in the buffers list hanging off the @mmb
* structure. This also makes sure that a device cache flush operation is
* called at the end.
*/
int generic_buffers_fsync(struct file *file, loff_t start, loff_t end,
bool datasync)
int mmb_fsync(struct file *file, struct mapping_metadata_bhs *mmb,
loff_t start, loff_t end, bool datasync)
{
struct inode *inode = file->f_mapping->host;
int ret;
ret = generic_buffers_fsync_noflush(file, start, end, datasync);
ret = mmb_fsync_noflush(file, mmb, start, end, datasync);
if (!ret)
ret = blkdev_issue_flush(inode->i_sb->s_bdev);
return ret;
}
EXPORT_SYMBOL(generic_buffers_fsync);
EXPORT_SYMBOL(mmb_fsync);
/*
* Called when we've recently written block `bblock', and it is known that
@@ -672,26 +713,18 @@ void write_boundary_block(struct block_device *bdev,
}
}
void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
void mmb_mark_buffer_dirty(struct buffer_head *bh,
struct mapping_metadata_bhs *mmb)
{
struct address_space *mapping = inode->i_mapping;
struct address_space *buffer_mapping = bh->b_folio->mapping;
mark_buffer_dirty(bh);
if (!mapping->i_private_data) {
mapping->i_private_data = buffer_mapping;
} else {
BUG_ON(mapping->i_private_data != buffer_mapping);
}
if (!bh->b_assoc_map) {
spin_lock(&buffer_mapping->i_private_lock);
list_move_tail(&bh->b_assoc_buffers,
&mapping->i_private_list);
bh->b_assoc_map = mapping;
spin_unlock(&buffer_mapping->i_private_lock);
if (!bh->b_mmb) {
spin_lock(&mmb->lock);
list_move_tail(&bh->b_assoc_buffers, &mmb->list);
bh->b_mmb = mmb;
spin_unlock(&mmb->lock);
}
}
EXPORT_SYMBOL(mark_buffer_dirty_inode);
EXPORT_SYMBOL(mmb_mark_buffer_dirty);
/**
* block_dirty_folio - Mark a folio as dirty.
@@ -758,153 +791,20 @@ bool block_dirty_folio(struct address_space *mapping, struct folio *folio)
EXPORT_SYMBOL(block_dirty_folio);
/*
* Write out and wait upon a list of buffers.
*
* We have conflicting pressures: we want to make sure that all
* initially dirty buffers get waited on, but that any subsequently
* dirtied buffers don't. After all, we don't want fsync to last
* forever if somebody is actively writing to the file.
*
* Do this in two main stages: first we copy dirty buffers to a
* temporary inode list, queueing the writes as we go. Then we clean
* up, waiting for those writes to complete.
*
* During this second stage, any subsequent updates to the file may end
* up refiling the buffer on the original inode's dirty list again, so
* there is a chance we will end up with a buffer queued for write but
* not yet completed on that list. So, as a final cleanup we go through
* the osync code to catch these locked, dirty buffers without requeuing
* any newly dirty buffers for write.
*/
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
{
struct buffer_head *bh;
struct address_space *mapping;
int err = 0, err2;
struct blk_plug plug;
LIST_HEAD(tmp);
blk_start_plug(&plug);
spin_lock(lock);
while (!list_empty(list)) {
bh = BH_ENTRY(list->next);
mapping = bh->b_assoc_map;
__remove_assoc_queue(bh);
/* Avoid race with mark_buffer_dirty_inode() which does
* a lockless check and we rely on seeing the dirty bit */
smp_mb();
if (buffer_dirty(bh) || buffer_locked(bh)) {
list_add(&bh->b_assoc_buffers, &tmp);
bh->b_assoc_map = mapping;
if (buffer_dirty(bh)) {
get_bh(bh);
spin_unlock(lock);
/*
* Ensure any pending I/O completes so that
* write_dirty_buffer() actually writes the
* current contents - it is a noop if I/O is
* still in flight on potentially older
* contents.
*/
write_dirty_buffer(bh, REQ_SYNC);
/*
* Kick off IO for the previous mapping. Note
* that we will not run the very last mapping,
* wait_on_buffer() will do that for us
* through sync_buffer().
*/
brelse(bh);
spin_lock(lock);
}
}
}
spin_unlock(lock);
blk_finish_plug(&plug);
spin_lock(lock);
while (!list_empty(&tmp)) {
bh = BH_ENTRY(tmp.prev);
get_bh(bh);
mapping = bh->b_assoc_map;
__remove_assoc_queue(bh);
/* Avoid race with mark_buffer_dirty_inode() which does
* a lockless check and we rely on seeing the dirty bit */
smp_mb();
if (buffer_dirty(bh)) {
list_add(&bh->b_assoc_buffers,
&mapping->i_private_list);
bh->b_assoc_map = mapping;
}
spin_unlock(lock);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
err = -EIO;
brelse(bh);
spin_lock(lock);
}
spin_unlock(lock);
err2 = osync_buffers_list(lock, list);
if (err)
return err;
else
return err2;
}
/*
* Invalidate any and all dirty buffers on a given inode. We are
* Invalidate any and all dirty buffers on a given buffers list. We are
* probably unmounting the fs, but that doesn't mean we have already
* done a sync(). Just drop the buffers from the inode list.
*
* NOTE: we take the inode's blockdev's mapping's i_private_lock. Which
* assumes that all the buffers are against the blockdev.
*/
void invalidate_inode_buffers(struct inode *inode)
void mmb_invalidate(struct mapping_metadata_bhs *mmb)
{
if (inode_has_buffers(inode)) {
struct address_space *mapping = &inode->i_data;
struct list_head *list = &mapping->i_private_list;
struct address_space *buffer_mapping = mapping->i_private_data;
spin_lock(&buffer_mapping->i_private_lock);
while (!list_empty(list))
__remove_assoc_queue(BH_ENTRY(list->next));
spin_unlock(&buffer_mapping->i_private_lock);
if (mmb_has_buffers(mmb)) {
spin_lock(&mmb->lock);
while (!list_empty(&mmb->list))
__remove_assoc_queue(mmb, BH_ENTRY(mmb->list.next));
spin_unlock(&mmb->lock);
}
}
EXPORT_SYMBOL(invalidate_inode_buffers);
/*
* Remove any clean buffers from the inode's buffer list. This is called
* when we're trying to free the inode itself. Those buffers can pin it.
*
* Returns true if all buffers were removed.
*/
int remove_inode_buffers(struct inode *inode)
{
int ret = 1;
if (inode_has_buffers(inode)) {
struct address_space *mapping = &inode->i_data;
struct list_head *list = &mapping->i_private_list;
struct address_space *buffer_mapping = mapping->i_private_data;
spin_lock(&buffer_mapping->i_private_lock);
while (!list_empty(list)) {
struct buffer_head *bh = BH_ENTRY(list->next);
if (buffer_dirty(bh)) {
ret = 0;
break;
}
__remove_assoc_queue(bh);
}
spin_unlock(&buffer_mapping->i_private_lock);
}
return ret;
}
EXPORT_SYMBOL(mmb_invalidate);
/*
* Create the appropriate buffers when given a folio for data area and
@@ -1214,8 +1114,8 @@ void mark_buffer_write_io_error(struct buffer_head *bh)
/* FIXME: do we need to set this in both places? */
if (bh->b_folio && bh->b_folio->mapping)
mapping_set_error(bh->b_folio->mapping, -EIO);
if (bh->b_assoc_map)
mapping_set_error(bh->b_assoc_map, -EIO);
if (bh->b_mmb)
mapping_set_error(bh->b_mmb->mapping, -EIO);
}
EXPORT_SYMBOL(mark_buffer_write_io_error);
@@ -1245,14 +1145,7 @@ EXPORT_SYMBOL(__brelse);
void __bforget(struct buffer_head *bh)
{
clear_buffer_dirty(bh);
if (bh->b_assoc_map) {
struct address_space *buffer_mapping = bh->b_folio->mapping;
spin_lock(&buffer_mapping->i_private_lock);
list_del_init(&bh->b_assoc_buffers);
bh->b_assoc_map = NULL;
spin_unlock(&buffer_mapping->i_private_lock);
}
remove_assoc_queue(bh);
__brelse(bh);
}
EXPORT_SYMBOL(__bforget);
@@ -2900,8 +2793,7 @@ drop_buffers(struct folio *folio, struct buffer_head **buffers_to_free)
do {
struct buffer_head *next = bh->b_this_page;
if (bh->b_assoc_map)
__remove_assoc_queue(bh);
remove_assoc_queue(bh);
bh = next;
} while (bh != head);
*buffers_to_free = head;

View File

@@ -577,7 +577,7 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
if (unlikely(exfat_forced_shutdown(inode->i_sb)))
return -EIO;
err = __generic_file_fsync(filp, start, end, datasync);
err = simple_fsync_noflush(filp, start, end, datasync);
if (err)
return err;

View File

@@ -695,7 +695,6 @@ void exfat_evict_inode(struct inode *inode)
mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock);
}
invalidate_inode_buffers(inode);
clear_inode(inode);
exfat_cache_inval_inode(inode);
exfat_unhash_inode(inode);

View File

@@ -676,6 +676,7 @@ struct ext2_inode_info {
#ifdef CONFIG_QUOTA
struct dquot __rcu *i_dquot[MAXQUOTAS];
#endif
struct mapping_metadata_bhs i_metadata_bhs;
};
/*

View File

@@ -156,9 +156,11 @@ static int ext2_release_file (struct inode * inode, struct file * filp)
int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
int ret;
struct super_block *sb = file->f_mapping->host->i_sb;
struct inode *inode = file->f_mapping->host;
struct super_block *sb = inode->i_sb;
ret = generic_buffers_fsync(file, start, end, datasync);
ret = mmb_fsync(file, &EXT2_I(inode)->i_metadata_bhs,
start, end, datasync);
if (ret == -EIO)
/* We don't really know where the IO error happened... */
ext2_error(sb, __func__,

View File

@@ -94,9 +94,10 @@ void ext2_evict_inode(struct inode * inode)
if (inode->i_blocks)
ext2_truncate_blocks(inode, 0);
ext2_xattr_delete_inode(inode);
} else {
mmb_sync(&EXT2_I(inode)->i_metadata_bhs);
}
invalidate_inode_buffers(inode);
mmb_invalidate(&EXT2_I(inode)->i_metadata_bhs);
clear_inode(inode);
ext2_discard_reservation(inode);
@@ -526,7 +527,7 @@ static int ext2_alloc_branch(struct inode *inode,
}
set_buffer_uptodate(bh);
unlock_buffer(bh);
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &EXT2_I(inode)->i_metadata_bhs);
/* We used to sync bh here if IS_SYNC(inode).
* But we now rely upon generic_write_sync()
* and b_inode_buffers. But not for directories.
@@ -597,7 +598,7 @@ static void ext2_splice_branch(struct inode *inode,
/* had we spliced it onto indirect block? */
if (where->bh)
mark_buffer_dirty_inode(where->bh, inode);
mmb_mark_buffer_dirty(where->bh, &EXT2_I(inode)->i_metadata_bhs);
inode_set_ctime_current(inode);
mark_inode_dirty(inode);
@@ -1210,7 +1211,8 @@ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
if (partial == chain)
mark_inode_dirty(inode);
else
mark_buffer_dirty_inode(partial->bh, inode);
mmb_mark_buffer_dirty(partial->bh,
&EXT2_I(inode)->i_metadata_bhs);
ext2_free_branches(inode, &nr, &nr+1, (chain+n-1) - partial);
}
/* Clear the ends of indirect blocks on the shared branch */
@@ -1219,7 +1221,8 @@ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
partial->p + 1,
(__le32*)partial->bh->b_data+addr_per_block,
(chain+n-1) - partial);
mark_buffer_dirty_inode(partial->bh, inode);
mmb_mark_buffer_dirty(partial->bh,
&EXT2_I(inode)->i_metadata_bhs);
brelse (partial->bh);
partial--;
}
@@ -1302,7 +1305,7 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
if (inode_needs_sync(inode)) {
sync_mapping_buffers(inode->i_mapping);
mmb_sync(&EXT2_I(inode)->i_metadata_bhs);
sync_inode_metadata(inode, 1);
} else {
mark_inode_dirty(inode);

View File

@@ -215,6 +215,7 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
#ifdef CONFIG_QUOTA
memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
#endif
mmb_init(&ei->i_metadata_bhs, &ei->vfs_inode.i_data);
return &ei->vfs_inode;
}

View File

@@ -1121,6 +1121,7 @@ struct ext4_inode_info {
struct rw_semaphore i_data_sem;
struct inode vfs_inode;
struct jbd2_inode *jinode;
struct mapping_metadata_bhs i_metadata_bhs;
/*
* File creation time. Its function is same as that of

View File

@@ -390,7 +390,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
}
} else {
if (inode)
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh,
&EXT4_I(inode)->i_metadata_bhs);
else
mark_buffer_dirty(bh);
if (inode && inode_needs_sync(inode)) {

View File

@@ -68,7 +68,7 @@ static int ext4_sync_parent(struct inode *inode)
* through ext4_evict_inode()) and so we are safe to flush
* metadata blocks and the inode.
*/
ret = sync_mapping_buffers(inode->i_mapping);
ret = mmb_sync(&EXT4_I(inode)->i_metadata_bhs);
if (ret)
break;
ret = sync_inode_metadata(inode, 1);
@@ -89,7 +89,8 @@ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end,
};
int ret;
ret = generic_buffers_fsync_noflush(file, start, end, datasync);
ret = mmb_fsync_noflush(file, &EXT4_I(inode)->i_metadata_bhs,
start, end, datasync);
if (ret)
return ret;

View File

@@ -195,7 +195,9 @@ void ext4_evict_inode(struct inode *inode)
ext4_warning_inode(inode, "data will be lost");
truncate_inode_pages_final(&inode->i_data);
/* Avoid mballoc special inode which has no proper iops */
if (!EXT4_SB(inode->i_sb)->s_journal)
mmb_sync(&EXT4_I(inode)->i_metadata_bhs);
goto no_delete;
}
@@ -1430,9 +1432,6 @@ static int write_end_fn(handle_t *handle, struct inode *inode,
/*
* We need to pick up the new inode size which generic_commit_write gave us
* `iocb` can be NULL - eg, when called from page_symlink().
*
* ext4 never places buffers on inode->i_mapping->i_private_list. metadata
* buffers are managed internally.
*/
static int ext4_write_end(const struct kiocb *iocb,
struct address_space *mapping,
@@ -3447,7 +3446,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
}
/* Any metadata buffers to write? */
if (!list_empty(&inode->i_mapping->i_private_list))
if (mmb_has_buffers(&EXT4_I(inode)->i_metadata_bhs))
return true;
return inode_state_read_once(inode) & I_DIRTY_DATASYNC;
}

View File

@@ -1424,6 +1424,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
ext4_fc_init_inode(&ei->vfs_inode);
spin_lock_init(&ei->i_fc_lock);
mmb_init(&ei->i_metadata_bhs, &ei->vfs_inode.i_data);
return &ei->vfs_inode;
}
@@ -1520,7 +1521,8 @@ static void destroy_inodecache(void)
void ext4_clear_inode(struct inode *inode)
{
ext4_fc_del(inode);
invalidate_inode_buffers(inode);
if (!EXT4_SB(inode->i_sb)->s_journal)
mmb_invalidate(&EXT4_I(inode)->i_metadata_bhs);
clear_inode(inode);
ext4_discard_preallocations(inode);
/*

View File

@@ -1027,7 +1027,7 @@ static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
de++;
nr_slots--;
}
mark_buffer_dirty_inode(bh, dir);
mmb_mark_buffer_dirty(bh, &MSDOS_I(dir)->i_metadata_bhs);
if (IS_DIRSYNC(dir))
err = sync_dirty_buffer(bh);
brelse(bh);
@@ -1062,7 +1062,7 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
de--;
nr_slots--;
}
mark_buffer_dirty_inode(bh, dir);
mmb_mark_buffer_dirty(bh, &MSDOS_I(dir)->i_metadata_bhs);
if (IS_DIRSYNC(dir))
err = sync_dirty_buffer(bh);
brelse(bh);
@@ -1114,7 +1114,7 @@ static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used,
memset(bhs[n]->b_data, 0, sb->s_blocksize);
set_buffer_uptodate(bhs[n]);
unlock_buffer(bhs[n]);
mark_buffer_dirty_inode(bhs[n], dir);
mmb_mark_buffer_dirty(bhs[n], &MSDOS_I(dir)->i_metadata_bhs);
n++;
blknr++;
@@ -1195,7 +1195,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts)
memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de));
set_buffer_uptodate(bhs[0]);
unlock_buffer(bhs[0]);
mark_buffer_dirty_inode(bhs[0], dir);
mmb_mark_buffer_dirty(bhs[0], &MSDOS_I(dir)->i_metadata_bhs);
err = fat_zeroed_cluster(dir, blknr, 1, bhs, MAX_BUF_PER_PAGE);
if (err)
@@ -1257,7 +1257,8 @@ static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
memcpy(bhs[n]->b_data, slots, copy);
set_buffer_uptodate(bhs[n]);
unlock_buffer(bhs[n]);
mark_buffer_dirty_inode(bhs[n], dir);
mmb_mark_buffer_dirty(bhs[n],
&MSDOS_I(dir)->i_metadata_bhs);
slots += copy;
size -= copy;
if (!size)
@@ -1358,7 +1359,8 @@ found:
for (i = 0; i < long_bhs; i++) {
int copy = umin(sb->s_blocksize - offset, size);
memcpy(bhs[i]->b_data + offset, slots, copy);
mark_buffer_dirty_inode(bhs[i], dir);
mmb_mark_buffer_dirty(bhs[i],
&MSDOS_I(dir)->i_metadata_bhs);
offset = 0;
slots += copy;
size -= copy;
@@ -1369,7 +1371,8 @@ found:
/* Fill the short name slot. */
int copy = umin(sb->s_blocksize - offset, size);
memcpy(bhs[i]->b_data + offset, slots, copy);
mark_buffer_dirty_inode(bhs[i], dir);
mmb_mark_buffer_dirty(bhs[i],
&MSDOS_I(dir)->i_metadata_bhs);
if (IS_DIRSYNC(dir))
err = sync_dirty_buffer(bhs[i]);
}

View File

@@ -130,6 +130,7 @@ struct msdos_inode_info {
struct hlist_node i_dir_hash; /* hash by i_logstart */
struct rw_semaphore truncate_lock; /* protect bmap against truncate */
struct timespec64 i_crtime; /* File creation (birth) time */
struct mapping_metadata_bhs i_metadata_bhs;
struct inode vfs_inode;
};

View File

@@ -170,9 +170,11 @@ static void fat12_ent_put(struct fat_entry *fatent, int new)
}
spin_unlock(&fat12_entry_lock);
mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode);
mmb_mark_buffer_dirty(fatent->bhs[0],
&MSDOS_I(fatent->fat_inode)->i_metadata_bhs);
if (fatent->nr_bhs == 2)
mark_buffer_dirty_inode(fatent->bhs[1], fatent->fat_inode);
mmb_mark_buffer_dirty(fatent->bhs[1],
&MSDOS_I(fatent->fat_inode)->i_metadata_bhs);
}
static void fat16_ent_put(struct fat_entry *fatent, int new)
@@ -181,7 +183,8 @@ static void fat16_ent_put(struct fat_entry *fatent, int new)
new = EOF_FAT16;
*fatent->u.ent16_p = cpu_to_le16(new);
mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode);
mmb_mark_buffer_dirty(fatent->bhs[0],
&MSDOS_I(fatent->fat_inode)->i_metadata_bhs);
}
static void fat32_ent_put(struct fat_entry *fatent, int new)
@@ -189,7 +192,8 @@ static void fat32_ent_put(struct fat_entry *fatent, int new)
WARN_ON(new & 0xf0000000);
new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff;
*fatent->u.ent32_p = cpu_to_le32(new);
mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode);
mmb_mark_buffer_dirty(fatent->bhs[0],
&MSDOS_I(fatent->fat_inode)->i_metadata_bhs);
}
static int fat12_ent_next(struct fat_entry *fatent)
@@ -395,7 +399,8 @@ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs,
memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize);
set_buffer_uptodate(c_bh);
unlock_buffer(c_bh);
mark_buffer_dirty_inode(c_bh, sbi->fat_inode);
mmb_mark_buffer_dirty(c_bh,
&MSDOS_I(sbi->fat_inode)->i_metadata_bhs);
if (sb->s_flags & SB_SYNCHRONOUS)
err = sync_dirty_buffer(c_bh);
brelse(c_bh);

View File

@@ -186,13 +186,15 @@ static int fat_file_release(struct inode *inode, struct file *filp)
int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
{
struct inode *inode = filp->f_mapping->host;
struct inode *fat_inode = MSDOS_SB(inode->i_sb)->fat_inode;
int err;
err = __generic_file_fsync(filp, start, end, datasync);
err = mmb_fsync_noflush(filp, &MSDOS_I(inode)->i_metadata_bhs,
start, end, datasync);
if (err)
return err;
err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping);
err = mmb_sync(&MSDOS_I(fat_inode)->i_metadata_bhs);
if (err)
return err;
@@ -236,7 +238,7 @@ static int fat_cont_expand(struct inode *inode, loff_t size)
*/
err = filemap_fdatawrite_range(mapping, start,
start + count - 1);
err2 = sync_mapping_buffers(mapping);
err2 = mmb_sync(&MSDOS_I(inode)->i_metadata_bhs);
if (!err)
err = err2;
err2 = write_inode_now(inode, 1);

View File

@@ -657,10 +657,12 @@ static void fat_evict_inode(struct inode *inode)
if (!inode->i_nlink) {
inode->i_size = 0;
fat_truncate_blocks(inode, 0);
} else
} else {
mmb_sync(&MSDOS_I(inode)->i_metadata_bhs);
fat_free_eofblocks(inode);
}
invalidate_inode_buffers(inode);
mmb_invalidate(&MSDOS_I(inode)->i_metadata_bhs);
clear_inode(inode);
fat_cache_inval_inode(inode);
fat_detach(inode);
@@ -761,6 +763,7 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
ei->i_pos = 0;
ei->i_crtime.tv_sec = 0;
ei->i_crtime.tv_nsec = 0;
mmb_init(&ei->i_metadata_bhs, &ei->vfs_inode.i_data);
return &ei->vfs_inode;
}

View File

@@ -527,7 +527,8 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name,
if (update_dotdot) {
fat_set_start(dotdot_de, MSDOS_I(new_dir)->i_logstart);
mark_buffer_dirty_inode(dotdot_bh, old_inode);
mmb_mark_buffer_dirty(dotdot_bh,
&MSDOS_I(old_inode)->i_metadata_bhs);
if (IS_DIRSYNC(new_dir)) {
err = sync_dirty_buffer(dotdot_bh);
if (err)
@@ -566,7 +567,8 @@ error_dotdot:
if (update_dotdot) {
fat_set_start(dotdot_de, MSDOS_I(old_dir)->i_logstart);
mark_buffer_dirty_inode(dotdot_bh, old_inode);
mmb_mark_buffer_dirty(dotdot_bh,
&MSDOS_I(old_inode)->i_metadata_bhs);
corrupt |= sync_dirty_buffer(dotdot_bh);
}
error_inode:

View File

@@ -915,7 +915,7 @@ static int vfat_update_dotdot_de(struct inode *dir, struct inode *inode,
struct msdos_dir_entry *dotdot_de)
{
fat_set_start(dotdot_de, MSDOS_I(dir)->i_logstart);
mark_buffer_dirty_inode(dotdot_bh, inode);
mmb_mark_buffer_dirty(dotdot_bh, &MSDOS_I(inode)->i_metadata_bhs);
if (IS_DIRSYNC(dir))
return sync_dirty_buffer(dotdot_bh);
return 0;

View File

@@ -1149,7 +1149,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
mapping->flags = 0;
gfp_mask = mapping_gfp_mask(sdp->sd_inode->i_mapping);
mapping_set_gfp_mask(mapping, gfp_mask);
mapping->i_private_data = NULL;
mapping->writeback_index = 0;
}

View File

@@ -622,13 +622,7 @@ static void hugetlbfs_evict_inode(struct inode *inode)
trace_hugetlbfs_evict_inode(inode);
remove_inode_hugepages(inode, 0, LLONG_MAX);
/*
* Get the resv_map from the address space embedded in the inode.
* This is the address space which points to any resv_map allocated
* at inode creation time. If this is a device special inode,
* i_mapping may not point to the original address space.
*/
resv_map = (struct resv_map *)(&inode->i_data)->i_private_data;
resv_map = HUGETLBFS_I(inode)->resv_map;
/* Only regular and link inodes have associated reserve maps */
if (resv_map)
resv_map_release(&resv_map->refs);
@@ -907,6 +901,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
simple_inode_init_ts(inode);
inode->i_op = &hugetlbfs_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
HUGETLBFS_I(inode)->resv_map = NULL;
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
lockdep_annotate_inode_mutex_key(inode);
@@ -950,7 +945,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
&hugetlbfs_i_mmap_rwsem_key);
inode->i_mapping->a_ops = &hugetlbfs_aops;
simple_inode_init_ts(inode);
inode->i_mapping->i_private_data = resv_map;
info->resv_map = resv_map;
info->seals = F_SEAL_SEAL;
switch (mode & S_IFMT) {
default:

View File

@@ -17,7 +17,6 @@
#include <linux/fsverity.h>
#include <linux/mount.h>
#include <linux/posix_acl.h>
#include <linux/buffer_head.h> /* for inode_has_buffers */
#include <linux/ratelimit.h>
#include <linux/list_lru.h>
#include <linux/iversion.h>
@@ -284,7 +283,6 @@ int inode_init_always_gfp(struct super_block *sb, struct inode *inode, gfp_t gfp
atomic_set(&mapping->nr_thps, 0);
#endif
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
mapping->i_private_data = NULL;
mapping->writeback_index = 0;
init_rwsem(&mapping->invalidate_lock);
lockdep_set_class_and_name(&mapping->invalidate_lock,
@@ -367,7 +365,6 @@ struct inode *alloc_inode(struct super_block *sb)
void __destroy_inode(struct inode *inode)
{
BUG_ON(inode_has_buffers(inode));
inode_detach_wb(inode);
security_inode_free(inode);
fsnotify_inode_delete(inode);
@@ -484,7 +481,6 @@ static void __address_space_init_once(struct address_space *mapping)
{
xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
init_rwsem(&mapping->i_mmap_rwsem);
INIT_LIST_HEAD(&mapping->i_private_list);
spin_lock_init(&mapping->i_private_lock);
mapping->i_mmap = RB_ROOT_CACHED;
}
@@ -798,7 +794,6 @@ void clear_inode(struct inode *inode)
* nor even WARN_ON(!mapping_empty).
*/
xa_unlock_irq(&inode->i_data.i_pages);
BUG_ON(!list_empty(&inode->i_data.i_private_list));
BUG_ON(!(inode_state_read_once(inode) & I_FREEING));
BUG_ON(inode_state_read_once(inode) & I_CLEAR);
BUG_ON(!list_empty(&inode->i_wb_list));
@@ -994,19 +989,18 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
* page cache in order to free up struct inodes: lowmem might
* be under pressure before the cache inside the highmem zone.
*/
if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) {
if (!mapping_empty(&inode->i_data)) {
unsigned long reap;
inode_pin_lru_isolating(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&lru->lock);
if (remove_inode_buffers(inode)) {
unsigned long reap;
reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
else
__count_vm_events(PGINODESTEAL, reap);
mm_account_reclaimed_pages(reap);
}
reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
else
__count_vm_events(PGINODESTEAL, reap);
mm_account_reclaimed_pages(reap);
inode_unpin_lru_isolating(inode);
return LRU_RETRY;
}

View File

@@ -18,7 +18,6 @@
#include <linux/exportfs.h>
#include <linux/iversion.h>
#include <linux/writeback.h>
#include <linux/buffer_head.h> /* sync_mapping_buffers */
#include <linux/fs_context.h>
#include <linux/pseudo_fs.h>
#include <linux/fsnotify.h>
@@ -1539,71 +1538,63 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
EXPORT_SYMBOL_GPL(generic_fh_to_parent);
/**
* __generic_file_fsync - generic fsync implementation for simple filesystems
* simple_fsync_noflush - generic fsync implementation for simple filesystems
*
* @file: file to synchronize
* @start: start offset in bytes
* @end: end offset in bytes (inclusive)
* @datasync: only synchronize essential metadata if true
*
* This is a generic implementation of the fsync method for simple
* filesystems which track all non-inode metadata in the buffers list
* hanging off the address_space structure.
* This function is an fsync handler for simple filesystems. It writes out
* dirty data, inode (if dirty), but does not issue a cache flush.
*/
int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
int datasync)
int simple_fsync_noflush(struct file *file, loff_t start, loff_t end,
int datasync)
{
struct inode *inode = file->f_mapping->host;
int err;
int ret;
int ret = 0;
err = file_write_and_wait_range(file, start, end);
if (err)
return err;
inode_lock(inode);
ret = sync_mapping_buffers(inode->i_mapping);
if (!(inode_state_read_once(inode) & I_DIRTY_ALL))
goto out;
if (datasync && !(inode_state_read_once(inode) & I_DIRTY_DATASYNC))
goto out;
err = sync_inode_metadata(inode, 1);
if (ret == 0)
ret = err;
ret = sync_inode_metadata(inode, 1);
out:
inode_unlock(inode);
/* check and advance again to catch errors after syncing out buffers */
err = file_check_and_advance_wb_err(file);
if (ret == 0)
ret = err;
return ret;
}
EXPORT_SYMBOL(__generic_file_fsync);
EXPORT_SYMBOL(simple_fsync_noflush);
/**
* generic_file_fsync - generic fsync implementation for simple filesystems
* with flush
* simple_fsync - fsync implementation for simple filesystems with flush
* @file: file to synchronize
* @start: start offset in bytes
* @end: end offset in bytes (inclusive)
* @datasync: only synchronize essential metadata if true
*
* This function is an fsync handler for simple filesystems. It writes out
* dirty data, inode (if dirty), and issues a cache flush.
*/
int generic_file_fsync(struct file *file, loff_t start, loff_t end,
int datasync)
int simple_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
int err;
err = __generic_file_fsync(file, start, end, datasync);
err = simple_fsync_noflush(file, start, end, datasync);
if (err)
return err;
return blkdev_issue_flush(inode->i_sb->s_bdev);
}
EXPORT_SYMBOL(generic_file_fsync);
EXPORT_SYMBOL(simple_fsync);
/**
* generic_check_addressable - Check addressability of file system

View File

@@ -23,7 +23,7 @@ const struct file_operations minix_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.iterate_shared = minix_readdir,
.fsync = generic_file_fsync,
.fsync = minix_fsync,
};
/*

View File

@@ -7,8 +7,16 @@
* minix regular file handling primitives
*/
#include <linux/buffer_head.h>
#include "minix.h"
int minix_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
return mmb_fsync(file,
&minix_i(file->f_mapping->host)->i_metadata_bhs,
start, end, datasync);
}
/*
* We have mostly NULLs here: the current defaults are OK for
* the minix filesystem.
@@ -18,7 +26,7 @@ const struct file_operations minix_file_operations = {
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.mmap_prepare = generic_file_mmap_prepare,
.fsync = generic_file_fsync,
.fsync = minix_fsync,
.splice_read = filemap_splice_read,
};

View File

@@ -48,8 +48,10 @@ static void minix_evict_inode(struct inode *inode)
if (!inode->i_nlink) {
inode->i_size = 0;
minix_truncate(inode);
} else {
mmb_sync(&minix_i(inode)->i_metadata_bhs);
}
invalidate_inode_buffers(inode);
mmb_invalidate(&minix_i(inode)->i_metadata_bhs);
clear_inode(inode);
if (!inode->i_nlink)
minix_free_inode(inode);
@@ -83,6 +85,8 @@ static struct inode *minix_alloc_inode(struct super_block *sb)
ei = alloc_inode_sb(sb, minix_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
mmb_init(&ei->i_metadata_bhs, &ei->vfs_inode.i_data);
return &ei->vfs_inode;
}

View File

@@ -98,7 +98,7 @@ static int alloc_branch(struct inode *inode,
*branch[n].p = branch[n].key;
set_buffer_uptodate(bh);
unlock_buffer(bh);
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &minix_i(inode)->i_metadata_bhs);
parent = nr;
}
if (n == num)
@@ -135,7 +135,8 @@ static inline int splice_branch(struct inode *inode,
/* had we spliced it onto indirect block? */
if (where->bh)
mark_buffer_dirty_inode(where->bh, inode);
mmb_mark_buffer_dirty(where->bh,
&minix_i(inode)->i_metadata_bhs);
mark_inode_dirty(inode);
return 0;
@@ -328,14 +329,16 @@ static inline void truncate (struct inode * inode)
if (partial == chain)
mark_inode_dirty(inode);
else
mark_buffer_dirty_inode(partial->bh, inode);
mmb_mark_buffer_dirty(partial->bh,
&minix_i(inode)->i_metadata_bhs);
free_branches(inode, &nr, &nr+1, (chain+n-1) - partial);
}
/* Clear the ends of indirect blocks on the shared branch */
while (partial > chain) {
free_branches(inode, partial->p + 1, block_end(partial->bh),
(chain+n-1) - partial);
mark_buffer_dirty_inode(partial->bh, inode);
mmb_mark_buffer_dirty(partial->bh,
&minix_i(inode)->i_metadata_bhs);
brelse (partial->bh);
partial--;
}

View File

@@ -19,6 +19,7 @@ struct minix_inode_info {
__u16 i1_data[16];
__u32 i2_data[16];
} u;
struct mapping_metadata_bhs i_metadata_bhs;
struct inode vfs_inode;
};
@@ -57,6 +58,8 @@ unsigned long minix_count_free_blocks(struct super_block *sb);
int minix_getattr(struct mnt_idmap *, const struct path *,
struct kstat *, u32, unsigned int);
int minix_prepare_chunk(struct folio *folio, loff_t pos, unsigned len);
struct mapping_metadata_bhs *minix_get_metadata_bhs(struct inode *inode);
int minix_fsync(struct file *file, loff_t start, loff_t end, int datasync);
extern void V1_minix_truncate(struct inode *);
extern void V2_minix_truncate(struct inode *);

View File

@@ -387,9 +387,6 @@ static int ntfs_extend(struct inode *inode, loff_t pos, size_t count,
int err2;
err = filemap_fdatawrite_range(mapping, pos, end - 1);
err2 = sync_mapping_buffers(mapping);
if (!err)
err = err2;
err2 = write_inode_now(inode, 1);
if (!err)
err = err2;

View File

@@ -1815,7 +1815,6 @@ void ntfs_evict_inode(struct inode *inode)
{
truncate_inode_pages_final(&inode->i_data);
invalidate_inode_buffers(inode);
clear_inode(inode);
ni_clear(ntfs_i(inode));

View File

@@ -3971,7 +3971,6 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
}
sync_mapping_buffers(mapping);
if (blocking == DLM_LOCK_EX) {
truncate_inode_pages(mapping, 0);
} else {

View File

@@ -1683,9 +1683,6 @@ bail:
if (rename_lock)
ocfs2_rename_unlock(osb);
if (new_inode)
sync_mapping_buffers(old_inode->i_mapping);
iput(new_inode);
ocfs2_free_dir_lookup_result(&target_lookup_res);

View File

@@ -334,7 +334,7 @@ const struct file_operations omfs_file_operations = {
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.mmap_prepare = generic_file_mmap_prepare,
.fsync = generic_file_fsync,
.fsync = simple_fsync,
.splice_read = filemap_splice_read,
};

View File

@@ -71,7 +71,7 @@ const struct file_operations qnx4_dir_operations =
.llseek = generic_file_llseek,
.read = generic_read_dir,
.iterate_shared = qnx4_readdir,
.fsync = generic_file_fsync,
.fsync = simple_fsync,
.setlease = generic_setlease,
};

View File

@@ -275,7 +275,7 @@ const struct file_operations qnx6_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.iterate_shared = qnx6_readdir,
.fsync = generic_file_fsync,
.fsync = simple_fsync,
.setlease = generic_setlease,
};

View File

@@ -157,6 +157,6 @@ const struct file_operations udf_dir_operations = {
.read = generic_read_dir,
.iterate_shared = udf_readdir,
.unlocked_ioctl = udf_ioctl,
.fsync = generic_file_fsync,
.fsync = udf_fsync,
.setlease = generic_setlease,
};

View File

@@ -430,9 +430,10 @@ void udf_fiiter_write_fi(struct udf_fileident_iter *iter, uint8_t *impuse)
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
mark_inode_dirty(iter->dir);
} else {
mark_buffer_dirty_inode(iter->bh[0], iter->dir);
mmb_mark_buffer_dirty(iter->bh[0], &iinfo->i_metadata_bhs);
if (iter->bh[1])
mark_buffer_dirty_inode(iter->bh[1], iter->dir);
mmb_mark_buffer_dirty(iter->bh[1],
&iinfo->i_metadata_bhs);
}
inode_inc_iversion(iter->dir);
}

View File

@@ -198,6 +198,13 @@ static int udf_file_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
int udf_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
return mmb_fsync(file,
&UDF_I(file->f_mapping->host)->i_metadata_bhs,
start, end, datasync);
}
const struct file_operations udf_file_operations = {
.read_iter = generic_file_read_iter,
.unlocked_ioctl = udf_ioctl,
@@ -205,7 +212,7 @@ const struct file_operations udf_file_operations = {
.mmap = udf_file_mmap,
.write_iter = udf_file_write_iter,
.release = udf_release_file,
.fsync = generic_file_fsync,
.fsync = udf_fsync,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.llseek = generic_file_llseek,

View File

@@ -154,7 +154,9 @@ void udf_evict_inode(struct inode *inode)
}
}
truncate_inode_pages_final(&inode->i_data);
invalidate_inode_buffers(inode);
if (!want_delete)
mmb_sync(&iinfo->i_metadata_bhs);
mmb_invalidate(&iinfo->i_metadata_bhs);
clear_inode(inode);
kfree(iinfo->i_data);
iinfo->i_data = NULL;
@@ -1258,7 +1260,7 @@ struct buffer_head *udf_bread(struct inode *inode, udf_pblk_t block,
memset(bh->b_data, 0x00, inode->i_sb->s_blocksize);
set_buffer_uptodate(bh);
unlock_buffer(bh);
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &UDF_I(inode)->i_metadata_bhs);
return bh;
}
@@ -2006,7 +2008,7 @@ int udf_setup_indirect_aext(struct inode *inode, udf_pblk_t block,
memset(bh->b_data, 0x00, sb->s_blocksize);
set_buffer_uptodate(bh);
unlock_buffer(bh);
mark_buffer_dirty_inode(bh, inode);
mmb_mark_buffer_dirty(bh, &UDF_I(inode)->i_metadata_bhs);
aed = (struct allocExtDesc *)(bh->b_data);
if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT)) {
@@ -2101,7 +2103,7 @@ int __udf_add_aext(struct inode *inode, struct extent_position *epos,
else
udf_update_tag(epos->bh->b_data,
sizeof(struct allocExtDesc));
mark_buffer_dirty_inode(epos->bh, inode);
mmb_mark_buffer_dirty(epos->bh, &iinfo->i_metadata_bhs);
}
return 0;
@@ -2185,7 +2187,7 @@ void udf_write_aext(struct inode *inode, struct extent_position *epos,
le32_to_cpu(aed->lengthAllocDescs) +
sizeof(struct allocExtDesc));
}
mark_buffer_dirty_inode(epos->bh, inode);
mmb_mark_buffer_dirty(epos->bh, &iinfo->i_metadata_bhs);
} else {
mark_inode_dirty(inode);
}
@@ -2393,7 +2395,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos)
else
udf_update_tag(oepos.bh->b_data,
sizeof(struct allocExtDesc));
mark_buffer_dirty_inode(oepos.bh, inode);
mmb_mark_buffer_dirty(oepos.bh, &iinfo->i_metadata_bhs);
}
} else {
udf_write_aext(inode, &oepos, &eloc, elen, 1);
@@ -2410,7 +2412,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos)
else
udf_update_tag(oepos.bh->b_data,
sizeof(struct allocExtDesc));
mark_buffer_dirty_inode(oepos.bh, inode);
mmb_mark_buffer_dirty(oepos.bh, &iinfo->i_metadata_bhs);
}
}

View File

@@ -638,7 +638,7 @@ static int udf_symlink(struct mnt_idmap *idmap, struct inode *dir,
memset(epos.bh->b_data, 0x00, bsize);
set_buffer_uptodate(epos.bh);
unlock_buffer(epos.bh);
mark_buffer_dirty_inode(epos.bh, inode);
mmb_mark_buffer_dirty(epos.bh, &iinfo->i_metadata_bhs);
ea = epos.bh->b_data + udf_ext0_offset(inode);
} else
ea = iinfo->i_data + iinfo->i_lenEAttr;

View File

@@ -166,6 +166,7 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
ei->cached_extent.lstart = -1;
spin_lock_init(&ei->i_extent_cache_lock);
inode_set_iversion(&ei->vfs_inode, 1);
mmb_init(&ei->i_metadata_bhs, &ei->vfs_inode.i_data);
return &ei->vfs_inode;
}

View File

@@ -186,7 +186,7 @@ static void udf_update_alloc_ext_desc(struct inode *inode,
len += lenalloc;
udf_update_tag(epos->bh->b_data, len);
mark_buffer_dirty_inode(epos->bh, inode);
mmb_mark_buffer_dirty(epos->bh, &UDF_I(inode)->i_metadata_bhs);
}
/*

View File

@@ -50,6 +50,7 @@ struct udf_inode_info {
struct kernel_lb_addr i_locStreamdir;
__u64 i_lenStreams;
struct rw_semaphore i_data_sem;
struct mapping_metadata_bhs i_metadata_bhs;
struct udf_ext_cache cached_extent;
/* Spinlock for protecting extent cache */
spinlock_t i_extent_cache_lock;

View File

@@ -137,6 +137,7 @@ static inline unsigned int udf_dir_entry_len(struct fileIdentDesc *cfi)
/* file.c */
extern long udf_ioctl(struct file *, unsigned int, unsigned long);
int udf_fsync(struct file *file, loff_t start, loff_t end, int datasync);
/* inode.c */
extern struct inode *__udf_iget(struct super_block *, struct kernel_lb_addr *,

View File

@@ -652,7 +652,7 @@ const struct file_operations ufs_dir_operations = {
.release = ufs_dir_release,
.read = generic_read_dir,
.iterate_shared = ufs_readdir,
.fsync = generic_file_fsync,
.fsync = simple_fsync,
.llseek = ufs_dir_llseek,
.setlease = generic_setlease,
};

View File

@@ -41,7 +41,7 @@ const struct file_operations ufs_file_operations = {
.write_iter = generic_file_write_iter,
.mmap_prepare = generic_file_mmap_prepare,
.open = generic_file_open,
.fsync = generic_file_fsync,
.fsync = simple_fsync,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.setlease = generic_setlease,

View File

@@ -853,7 +853,6 @@ void ufs_evict_inode(struct inode * inode)
ufs_update_inode(inode, inode_needs_sync(inode));
}
invalidate_inode_buffers(inode);
clear_inode(inode);
if (want_delete)

View File

@@ -73,8 +73,8 @@ struct buffer_head {
bh_end_io_t *b_end_io; /* I/O completion */
void *b_private; /* reserved for b_end_io */
struct list_head b_assoc_buffers; /* associated with another mapping */
struct address_space *b_assoc_map; /* mapping this buffer is
associated with */
struct mapping_metadata_bhs *b_mmb; /* head of the list of metadata bhs
* this buffer is associated with */
atomic_t b_count; /* users using this buffer_head */
spinlock_t b_uptodate_lock; /* Used by the first bh in a page, to
* serialise IO completion of other
@@ -205,12 +205,12 @@ struct buffer_head *create_empty_buffers(struct folio *folio,
void end_buffer_read_sync(struct buffer_head *bh, int uptodate);
void end_buffer_write_sync(struct buffer_head *bh, int uptodate);
/* Things to do with buffers at mapping->private_list */
void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode);
int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end,
bool datasync);
int generic_buffers_fsync(struct file *file, loff_t start, loff_t end,
bool datasync);
/* Things to do with metadata buffers list */
void mmb_mark_buffer_dirty(struct buffer_head *bh, struct mapping_metadata_bhs *mmb);
int mmb_fsync_noflush(struct file *file, struct mapping_metadata_bhs *mmb,
loff_t start, loff_t end, bool datasync);
int mmb_fsync(struct file *file, struct mapping_metadata_bhs *mmb,
loff_t start, loff_t end, bool datasync);
void clean_bdev_aliases(struct block_device *bdev, sector_t block,
sector_t len);
static inline void clean_bdev_bh_alias(struct buffer_head *bh)
@@ -515,10 +515,10 @@ bool block_dirty_folio(struct address_space *mapping, struct folio *folio);
void buffer_init(void);
bool try_to_free_buffers(struct folio *folio);
int inode_has_buffers(struct inode *inode);
void invalidate_inode_buffers(struct inode *inode);
int remove_inode_buffers(struct inode *inode);
int sync_mapping_buffers(struct address_space *mapping);
void mmb_init(struct mapping_metadata_bhs *mmb, struct address_space *mapping);
bool mmb_has_buffers(struct mapping_metadata_bhs *mmb);
void mmb_invalidate(struct mapping_metadata_bhs *mmb);
int mmb_sync(struct mapping_metadata_bhs *mmb);
void invalidate_bh_lrus(void);
void invalidate_bh_lrus_cpu(void);
bool has_bh_in_lru(int cpu, void *dummy);
@@ -528,10 +528,7 @@ extern int buffer_heads_over_limit;
static inline void buffer_init(void) {}
static inline bool try_to_free_buffers(struct folio *folio) { return true; }
static inline int inode_has_buffers(struct inode *inode) { return 0; }
static inline void invalidate_inode_buffers(struct inode *inode) {}
static inline int remove_inode_buffers(struct inode *inode) { return 1; }
static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
static inline int mmb_sync(struct mapping_metadata_bhs *mmb) { return 0; }
static inline void invalidate_bh_lrus(void) {}
static inline void invalidate_bh_lrus_cpu(void) {}
static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; }

View File

@@ -445,6 +445,13 @@ struct address_space_operations {
extern const struct address_space_operations empty_aops;
/* Structure for tracking metadata buffer heads associated with the mapping */
struct mapping_metadata_bhs {
struct address_space *mapping; /* Mapping bhs are associated with */
spinlock_t lock; /* Lock protecting bh list */
struct list_head list; /* The list of bhs (b_assoc_buffers) */
};
/**
* struct address_space - Contents of a cacheable, mappable object.
* @host: Owner, either the inode or the block_device.
@@ -464,8 +471,6 @@ extern const struct address_space_operations empty_aops;
* @flags: Error bits and flags (AS_*).
* @wb_err: The most recent error which has occurred.
* @i_private_lock: For use by the owner of the address_space.
* @i_private_list: For use by the owner of the address_space.
* @i_private_data: For use by the owner of the address_space.
*/
struct address_space {
struct inode *host;
@@ -484,9 +489,7 @@ struct address_space {
unsigned long flags;
errseq_t wb_err;
spinlock_t i_private_lock;
struct list_head i_private_list;
struct rw_semaphore i_mmap_rwsem;
void * i_private_data;
} __attribute__((aligned(sizeof(long)))) __randomize_layout;
/*
* On most architectures that alignment is already the case; but
@@ -3293,8 +3296,8 @@ void simple_offset_destroy(struct offset_ctx *octx);
extern const struct file_operations simple_offset_dir_operations;
extern int __generic_file_fsync(struct file *, loff_t, loff_t, int);
extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
extern int simple_fsync_noflush(struct file *, loff_t, loff_t, int);
extern int simple_fsync(struct file *, loff_t, loff_t, int);
extern int generic_check_addressable(unsigned, u64);

View File

@@ -518,6 +518,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
struct hugetlbfs_inode_info {
struct inode vfs_inode;
struct resv_map *resv_map;
unsigned int seals;
};

View File

@@ -1157,15 +1157,7 @@ void resv_map_release(struct kref *ref)
static inline struct resv_map *inode_resv_map(struct inode *inode)
{
/*
* At inode evict time, i_mapping may not point to the original
* address space within the inode. This original address space
* contains the pointer to the resv_map. So, always use the
* address space embedded within the inode.
* The VERY common case is inode->mapping == &inode->i_data but,
* this may not be true for device special inodes.
*/
return (struct resv_map *)(&inode->i_data)->i_private_data;
return HUGETLBFS_I(inode)->resv_map;
}
static struct resv_map *vma_resv_map(struct vm_area_struct *vma)

View File

@@ -30,6 +30,7 @@ struct gmem_file {
struct gmem_inode {
struct shared_policy policy;
struct inode vfs_inode;
struct list_head gmem_file_list;
u64 flags;
};
@@ -39,8 +40,8 @@ static __always_inline struct gmem_inode *GMEM_I(struct inode *inode)
return container_of(inode, struct gmem_inode, vfs_inode);
}
#define kvm_gmem_for_each_file(f, mapping) \
list_for_each_entry(f, &(mapping)->i_private_list, entry)
#define kvm_gmem_for_each_file(f, inode) \
list_for_each_entry(f, &GMEM_I(inode)->gmem_file_list, entry)
/**
* folio_file_pfn - like folio_file_page, but return a pfn.
@@ -202,7 +203,7 @@ static void kvm_gmem_invalidate_begin(struct inode *inode, pgoff_t start,
attr_filter = kvm_gmem_get_invalidate_filter(inode);
kvm_gmem_for_each_file(f, inode->i_mapping)
kvm_gmem_for_each_file(f, inode)
__kvm_gmem_invalidate_begin(f, start, end, attr_filter);
}
@@ -223,7 +224,7 @@ static void kvm_gmem_invalidate_end(struct inode *inode, pgoff_t start,
{
struct gmem_file *f;
kvm_gmem_for_each_file(f, inode->i_mapping)
kvm_gmem_for_each_file(f, inode)
__kvm_gmem_invalidate_end(f, start, end);
}
@@ -609,7 +610,7 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
kvm_get_kvm(kvm);
f->kvm = kvm;
xa_init(&f->bindings);
list_add(&f->entry, &inode->i_mapping->i_private_list);
list_add(&f->entry, &GMEM_I(inode)->gmem_file_list);
fd_install(fd, file);
return fd;
@@ -945,6 +946,7 @@ static struct inode *kvm_gmem_alloc_inode(struct super_block *sb)
mpol_shared_policy_init(&gi->policy, NULL);
gi->flags = 0;
INIT_LIST_HEAD(&gi->gmem_file_list);
return &gi->vfs_inode;
}