diff --git a/block/bdev.c b/block/bdev.c index ed022f8c48c7..bb0ffa3bb4df 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -417,19 +417,11 @@ static void init_once(void *data) inode_init_once(&ei->vfs_inode); } -static void bdev_evict_inode(struct inode *inode) -{ - truncate_inode_pages_final(&inode->i_data); - invalidate_inode_buffers(inode); /* is it needed here? */ - clear_inode(inode); -} - static const struct super_operations bdev_sops = { .statfs = simple_statfs, .alloc_inode = bdev_alloc_inode, .free_inode = bdev_free_inode, .drop_inode = inode_just_drop, - .evict_inode = bdev_evict_inode, }; static int bd_init_fs_context(struct fs_context *fc) diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index 493500f37cb9..b8e23e8124ed 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -389,7 +389,7 @@ const struct file_operations adfs_dir_operations = { .read = generic_read_dir, .llseek = generic_file_llseek, .iterate_shared = adfs_iterate, - .fsync = generic_file_fsync, + .fsync = simple_fsync, }; static int diff --git a/fs/adfs/file.c b/fs/adfs/file.c index cd13165fd904..4a1828b3f88f 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c @@ -26,7 +26,7 @@ const struct file_operations adfs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .mmap_prepare = generic_file_mmap_prepare, - .fsync = generic_file_fsync, + .fsync = simple_fsync, .write_iter = generic_file_write_iter, .splice_read = filemap_splice_read, }; diff --git a/fs/affs/affs.h b/fs/affs/affs.h index ac4e9a02910b..a1eb400e1018 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -44,6 +44,7 @@ struct affs_inode_info { struct mutex i_link_lock; /* Protects internal inode access. */ struct mutex i_ext_lock; /* Protects internal inode access. */ #define i_hash_lock i_ext_lock + struct mapping_metadata_bhs i_metadata_bhs; u32 i_blkcnt; /* block count */ u32 i_extcnt; /* extended block count */ u32 *i_lc; /* linear cache of extended blocks */ @@ -151,6 +152,7 @@ extern bool affs_nofilenametruncate(const struct dentry *dentry); extern int affs_check_name(const unsigned char *name, int len, bool notruncate); extern int affs_copy_name(unsigned char *bstr, struct dentry *dentry); +struct mapping_metadata_bhs *affs_get_metadata_bhs(struct inode *inode); /* bitmap. c */ diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index d8a96d8cc826..ee512baf57e5 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -57,7 +57,7 @@ affs_insert_hash(struct inode *dir, struct buffer_head *bh) AFFS_TAIL(sb, dir_bh)->hash_chain = cpu_to_be32(ino); affs_adjust_checksum(dir_bh, ino); - mark_buffer_dirty_inode(dir_bh, dir); + mmb_mark_buffer_dirty(dir_bh, &AFFS_I(dir)->i_metadata_bhs); affs_brelse(dir_bh); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); @@ -100,7 +100,7 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh) else AFFS_TAIL(sb, bh)->hash_chain = ino; affs_adjust_checksum(bh, be32_to_cpu(ino) - hash_ino); - mark_buffer_dirty_inode(bh, dir); + mmb_mark_buffer_dirty(bh, &AFFS_I(dir)->i_metadata_bhs); AFFS_TAIL(sb, rem_bh)->parent = 0; retval = 0; break; @@ -180,7 +180,7 @@ affs_remove_link(struct dentry *dentry) affs_unlock_dir(dir); goto done; } - mark_buffer_dirty_inode(link_bh, inode); + mmb_mark_buffer_dirty(link_bh, &AFFS_I(inode)->i_metadata_bhs); memcpy(AFFS_TAIL(sb, bh)->name, AFFS_TAIL(sb, link_bh)->name, 32); retval = affs_insert_hash(dir, bh); @@ -188,7 +188,7 @@ affs_remove_link(struct dentry *dentry) affs_unlock_dir(dir); goto done; } - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); affs_unlock_dir(dir); iput(dir); @@ -203,7 +203,7 @@ affs_remove_link(struct dentry *dentry) __be32 ino2 = AFFS_TAIL(sb, link_bh)->link_chain; AFFS_TAIL(sb, bh)->link_chain = ino2; affs_adjust_checksum(bh, be32_to_cpu(ino2) - link_ino); - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); retval = 0; /* Fix the link count, if bh is a normal header block without links */ switch (be32_to_cpu(AFFS_TAIL(sb, bh)->stype)) { @@ -306,7 +306,7 @@ affs_remove_header(struct dentry *dentry) retval = affs_remove_hash(dir, bh); if (retval) goto done_unlock; - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); affs_unlock_dir(dir); diff --git a/fs/affs/file.c b/fs/affs/file.c index a51dee9d7d7e..2e93491484a9 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -140,14 +140,14 @@ affs_alloc_extblock(struct inode *inode, struct buffer_head *bh, u32 ext) AFFS_TAIL(sb, new_bh)->parent = cpu_to_be32(inode->i_ino); affs_fix_checksum(sb, new_bh); - mark_buffer_dirty_inode(new_bh, inode); + mmb_mark_buffer_dirty(new_bh, &AFFS_I(inode)->i_metadata_bhs); tmp = be32_to_cpu(AFFS_TAIL(sb, bh)->extension); if (tmp) affs_warning(sb, "alloc_ext", "previous extension set (%x)", tmp); AFFS_TAIL(sb, bh)->extension = cpu_to_be32(blocknr); affs_adjust_checksum(bh, blocknr - tmp); - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); AFFS_I(inode)->i_extcnt++; mark_inode_dirty(inode); @@ -581,7 +581,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) memset(AFFS_DATA(bh) + boff, 0, tmp); be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp); affs_fix_checksum(sb, bh); - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); size += tmp; bidx++; } else if (bidx) { @@ -603,7 +603,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) AFFS_DATA_HEAD(bh)->size = cpu_to_be32(tmp); affs_fix_checksum(sb, bh); bh->b_state &= ~(1UL << BH_New); - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); if (prev_bh) { u32 tmp_next = be32_to_cpu(AFFS_DATA_HEAD(prev_bh)->next); @@ -613,7 +613,8 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) bidx, tmp_next); AFFS_DATA_HEAD(prev_bh)->next = cpu_to_be32(bh->b_blocknr); affs_adjust_checksum(prev_bh, bh->b_blocknr - tmp_next); - mark_buffer_dirty_inode(prev_bh, inode); + mmb_mark_buffer_dirty(prev_bh, + &AFFS_I(inode)->i_metadata_bhs); affs_brelse(prev_bh); } size += bsize; @@ -732,7 +733,7 @@ static int affs_write_end_ofs(const struct kiocb *iocb, AFFS_DATA_HEAD(bh)->size = cpu_to_be32( max(boff + tmp, be32_to_cpu(AFFS_DATA_HEAD(bh)->size))); affs_fix_checksum(sb, bh); - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); written += tmp; from += tmp; bidx++; @@ -765,12 +766,13 @@ static int affs_write_end_ofs(const struct kiocb *iocb, bidx, tmp_next); AFFS_DATA_HEAD(prev_bh)->next = cpu_to_be32(bh->b_blocknr); affs_adjust_checksum(prev_bh, bh->b_blocknr - tmp_next); - mark_buffer_dirty_inode(prev_bh, inode); + mmb_mark_buffer_dirty(prev_bh, + &AFFS_I(inode)->i_metadata_bhs); } } affs_brelse(prev_bh); affs_fix_checksum(sb, bh); - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); written += bsize; from += bsize; bidx++; @@ -799,13 +801,14 @@ static int affs_write_end_ofs(const struct kiocb *iocb, bidx, tmp_next); AFFS_DATA_HEAD(prev_bh)->next = cpu_to_be32(bh->b_blocknr); affs_adjust_checksum(prev_bh, bh->b_blocknr - tmp_next); - mark_buffer_dirty_inode(prev_bh, inode); + mmb_mark_buffer_dirty(prev_bh, + &AFFS_I(inode)->i_metadata_bhs); } } else if (be32_to_cpu(AFFS_DATA_HEAD(bh)->size) < tmp) AFFS_DATA_HEAD(bh)->size = cpu_to_be32(tmp); affs_brelse(prev_bh); affs_fix_checksum(sb, bh); - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); written += tmp; from += tmp; bidx++; @@ -942,7 +945,7 @@ affs_truncate(struct inode *inode) } AFFS_TAIL(sb, ext_bh)->extension = 0; affs_fix_checksum(sb, ext_bh); - mark_buffer_dirty_inode(ext_bh, inode); + mmb_mark_buffer_dirty(ext_bh, &AFFS_I(inode)->i_metadata_bhs); affs_brelse(ext_bh); if (inode->i_size) { diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 561fc0185e89..e03e9f109ff9 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -206,7 +206,7 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc) } } affs_fix_checksum(sb, bh); - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); affs_brelse(bh); affs_free_prealloc(inode); return 0; @@ -267,9 +267,11 @@ affs_evict_inode(struct inode *inode) if (!inode->i_nlink) { inode->i_size = 0; affs_truncate(inode); + } else { + mmb_sync(&AFFS_I(inode)->i_metadata_bhs); } - invalidate_inode_buffers(inode); + mmb_invalidate(&AFFS_I(inode)->i_metadata_bhs); clear_inode(inode); affs_free_prealloc(inode); cache_page = (unsigned long)AFFS_I(inode)->i_lc; @@ -304,7 +306,7 @@ affs_new_inode(struct inode *dir) bh = affs_getzeroblk(sb, block); if (!bh) goto err_bh; - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); affs_brelse(bh); inode->i_uid = current_fsuid(); @@ -392,17 +394,17 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3 AFFS_TAIL(sb, bh)->link_chain = chain; AFFS_TAIL(sb, inode_bh)->link_chain = cpu_to_be32(block); affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain)); - mark_buffer_dirty_inode(inode_bh, inode); + mmb_mark_buffer_dirty(inode_bh, &AFFS_I(inode)->i_metadata_bhs); set_nlink(inode, 2); ihold(inode); } affs_fix_checksum(sb, bh); - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); dentry->d_fsdata = (void *)(long)bh->b_blocknr; affs_lock_dir(dir); retval = affs_insert_hash(dir, bh); - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); affs_unlock_dir(dir); affs_unlock_link(inode); diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 870532192600..c3c6532da4b0 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -373,7 +373,7 @@ affs_symlink(struct mnt_idmap *idmap, struct inode *dir, } *p = 0; inode->i_size = i + 1; - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); affs_brelse(bh); mark_inode_dirty(inode); @@ -443,7 +443,8 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry, /* TODO: move it back to old_dir, if error? */ done: - mark_buffer_dirty_inode(bh, retval ? old_dir : new_dir); + mmb_mark_buffer_dirty(bh, + &AFFS_I(retval ? old_dir : new_dir)->i_metadata_bhs); affs_brelse(bh); return retval; } @@ -496,8 +497,8 @@ affs_xrename(struct inode *old_dir, struct dentry *old_dentry, retval = affs_insert_hash(old_dir, bh_new); affs_unlock_dir(old_dir); done: - mark_buffer_dirty_inode(bh_old, new_dir); - mark_buffer_dirty_inode(bh_new, old_dir); + mmb_mark_buffer_dirty(bh_old, &AFFS_I(new_dir)->i_metadata_bhs); + mmb_mark_buffer_dirty(bh_new, &AFFS_I(old_dir)->i_metadata_bhs); affs_brelse(bh_old); affs_brelse(bh_new); return retval; diff --git a/fs/affs/super.c b/fs/affs/super.c index 8451647f3fea..079f36e1ddec 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -108,6 +108,7 @@ static struct inode *affs_alloc_inode(struct super_block *sb) i->i_lc = NULL; i->i_ext_bh = NULL; i->i_pa_cnt = 0; + mmb_init(&i->i_metadata_bhs, &i->vfs_inode.i_data); return &i->vfs_inode; } diff --git a/fs/aio.c b/fs/aio.c index a07bdd1aaaa6..ba9b9fa2446b 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -218,6 +218,17 @@ struct aio_kiocb { struct eventfd_ctx *ki_eventfd; }; +struct aio_inode_info { + struct inode vfs_inode; + spinlock_t migrate_lock; + struct kioctx *ctx; +}; + +static inline struct aio_inode_info *AIO_I(struct inode *inode) +{ + return container_of(inode, struct aio_inode_info, vfs_inode); +} + /*------ sysctl variables----*/ static DEFINE_SPINLOCK(aio_nr_lock); static unsigned long aio_nr; /* current system wide number of aio requests */ @@ -251,6 +262,7 @@ static void __init aio_sysctl_init(void) static struct kmem_cache *kiocb_cachep; static struct kmem_cache *kioctx_cachep; +static struct kmem_cache *aio_inode_cachep; static struct vfsmount *aio_mnt; @@ -261,11 +273,12 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) { struct file *file; struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb); + if (IS_ERR(inode)) return ERR_CAST(inode); inode->i_mapping->a_ops = &aio_ctx_aops; - inode->i_mapping->i_private_data = ctx; + AIO_I(inode)->ctx = ctx; inode->i_size = PAGE_SIZE * nr_pages; file = alloc_file_pseudo(inode, aio_mnt, "[aio]", @@ -275,14 +288,49 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) return file; } +static struct inode *aio_alloc_inode(struct super_block *sb) +{ + struct aio_inode_info *ai; + + ai = alloc_inode_sb(sb, aio_inode_cachep, GFP_KERNEL); + if (!ai) + return NULL; + ai->ctx = NULL; + + return &ai->vfs_inode; +} + +static void aio_free_inode(struct inode *inode) +{ + kmem_cache_free(aio_inode_cachep, AIO_I(inode)); +} + +static const struct super_operations aio_super_operations = { + .alloc_inode = aio_alloc_inode, + .free_inode = aio_free_inode, + .statfs = simple_statfs, +}; + static int aio_init_fs_context(struct fs_context *fc) { - if (!init_pseudo(fc, AIO_RING_MAGIC)) + struct pseudo_fs_context *pfc; + + pfc = init_pseudo(fc, AIO_RING_MAGIC); + if (!pfc) return -ENOMEM; fc->s_iflags |= SB_I_NOEXEC; + pfc->ops = &aio_super_operations; return 0; } +static void init_once(void *obj) +{ + struct aio_inode_info *ai = obj; + + inode_init_once(&ai->vfs_inode); + spin_lock_init(&ai->migrate_lock); +} + /* aio_setup * Creates the slab caches used by the aio routines, panic on * failure as this is done early during the boot sequence. @@ -294,6 +342,11 @@ static int __init aio_setup(void) .init_fs_context = aio_init_fs_context, .kill_sb = kill_anon_super, }; + + aio_inode_cachep = kmem_cache_create("aio_inode_cache", + sizeof(struct aio_inode_info), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_ACCOUNT), + init_once); aio_mnt = kern_mount(&aio_fs); if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); @@ -308,17 +361,17 @@ __initcall(aio_setup); static void put_aio_ring_file(struct kioctx *ctx) { struct file *aio_ring_file = ctx->aio_ring_file; - struct address_space *i_mapping; if (aio_ring_file) { - truncate_setsize(file_inode(aio_ring_file), 0); + struct inode *inode = file_inode(aio_ring_file); + + truncate_setsize(inode, 0); /* Prevent further access to the kioctx from migratepages */ - i_mapping = aio_ring_file->f_mapping; - spin_lock(&i_mapping->i_private_lock); - i_mapping->i_private_data = NULL; + spin_lock(&AIO_I(inode)->migrate_lock); + AIO_I(inode)->ctx = NULL; ctx->aio_ring_file = NULL; - spin_unlock(&i_mapping->i_private_lock); + spin_unlock(&AIO_I(inode)->migrate_lock); fput(aio_ring_file); } @@ -408,13 +461,14 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode) { struct kioctx *ctx; + struct aio_inode_info *ai = AIO_I(mapping->host); unsigned long flags; pgoff_t idx; int rc = 0; - /* mapping->i_private_lock here protects against the kioctx teardown. */ - spin_lock(&mapping->i_private_lock); - ctx = mapping->i_private_data; + /* ai->migrate_lock here protects against the kioctx teardown. */ + spin_lock(&ai->migrate_lock); + ctx = ai->ctx; if (!ctx) { rc = -EINVAL; goto out; @@ -467,7 +521,7 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst, out_unlock: mutex_unlock(&ctx->ring_lock); out: - spin_unlock(&mapping->i_private_lock); + spin_unlock(&ai->migrate_lock); return rc; } #else diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h index 606f9378b2f0..b08afe733e63 100644 --- a/fs/bfs/bfs.h +++ b/fs/bfs/bfs.h @@ -35,6 +35,7 @@ struct bfs_inode_info { unsigned long i_dsk_ino; /* inode number from the disk, can be 0 */ unsigned long i_sblock; unsigned long i_eblock; + struct mapping_metadata_bhs i_metadata_bhs; struct inode vfs_inode; }; diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 481514db4eae..5b40ab09a796 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -68,10 +68,17 @@ static int bfs_readdir(struct file *f, struct dir_context *ctx) return 0; } +static int bfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + return mmb_fsync(file, + &BFS_I(file->f_mapping->host)->i_metadata_bhs, + start, end, datasync); +} + const struct file_operations bfs_dir_operations = { .read = generic_read_dir, .iterate_shared = bfs_readdir, - .fsync = generic_file_fsync, + .fsync = bfs_fsync, .llseek = generic_file_llseek, }; @@ -186,7 +193,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry) set_nlink(inode, 1); } de->ino = 0; - mark_buffer_dirty_inode(bh, dir); + mmb_mark_buffer_dirty(bh, &BFS_I(dir)->i_metadata_bhs); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); mark_inode_dirty(dir); inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); @@ -246,7 +253,7 @@ static int bfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, inode_set_ctime_current(new_inode); inode_dec_link_count(new_inode); } - mark_buffer_dirty_inode(old_bh, old_dir); + mmb_mark_buffer_dirty(old_bh, &BFS_I(old_dir)->i_metadata_bhs); error = 0; end_rename: @@ -296,7 +303,8 @@ static int bfs_add_entry(struct inode *dir, const struct qstr *child, int ino) for (i = 0; i < BFS_NAMELEN; i++) de->name[i] = (i < namelen) ? name[i] : 0; - mark_buffer_dirty_inode(bh, dir); + mmb_mark_buffer_dirty(bh, + &BFS_I(dir)->i_metadata_bhs); brelse(bh); return 0; } diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 9da02f5cb6cd..19e49c8cf750 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -187,7 +187,9 @@ static void bfs_evict_inode(struct inode *inode) dprintf("ino=%08lx\n", ino); truncate_inode_pages_final(&inode->i_data); - invalidate_inode_buffers(inode); + if (inode->i_nlink) + mmb_sync(&BFS_I(inode)->i_metadata_bhs); + mmb_invalidate(&BFS_I(inode)->i_metadata_bhs); clear_inode(inode); if (inode->i_nlink) @@ -257,6 +259,8 @@ static struct inode *bfs_alloc_inode(struct super_block *sb) bi = alloc_inode_sb(sb, bfs_inode_cachep, GFP_KERNEL); if (!bi) return NULL; + mmb_init(&bi->i_metadata_bhs, &bi->vfs_inode.i_data); + return &bi->vfs_inode; } diff --git a/fs/buffer.c b/fs/buffer.c index 22b43642ba57..cbed175f418b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -54,7 +54,6 @@ #include "internal.h" -static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, enum rw_hint hint, struct writeback_control *wbc); @@ -468,146 +467,187 @@ EXPORT_SYMBOL(mark_buffer_async_write); * a successful fsync(). For example, ext2 indirect blocks need to be * written back and waited upon before fsync() returns. * - * The functions mark_buffer_dirty_inode(), fsync_inode_buffers(), - * inode_has_buffers() and invalidate_inode_buffers() are provided for the - * management of a list of dependent buffers at ->i_mapping->i_private_list. + * The functions mmb_mark_buffer_dirty(), mmb_sync(), mmb_has_buffers() + * and mmb_invalidate() are provided for the management of a list of dependent + * buffers in mapping_metadata_bhs struct. * - * Locking is a little subtle: try_to_free_buffers() will remove buffers - * from their controlling inode's queue when they are being freed. But - * try_to_free_buffers() will be operating against the *blockdev* mapping - * at the time, not against the S_ISREG file which depends on those buffers. - * So the locking for i_private_list is via the i_private_lock in the address_space - * which backs the buffers. Which is different from the address_space - * against which the buffers are listed. So for a particular address_space, - * mapping->i_private_lock does *not* protect mapping->i_private_list! In fact, - * mapping->i_private_list will always be protected by the backing blockdev's - * ->i_private_lock. - * - * Which introduces a requirement: all buffers on an address_space's - * ->i_private_list must be from the same address_space: the blockdev's. - * - * address_spaces which do not place buffers at ->i_private_list via these - * utility functions are free to use i_private_lock and i_private_list for - * whatever they want. The only requirement is that list_empty(i_private_list) - * be true at clear_inode() time. - * - * FIXME: clear_inode should not call invalidate_inode_buffers(). The - * filesystems should do that. invalidate_inode_buffers() should just go - * BUG_ON(!list_empty). - * - * FIXME: mark_buffer_dirty_inode() is a data-plane operation. It should - * take an address_space, not an inode. And it should be called - * mark_buffer_dirty_fsync() to clearly define why those buffers are being - * queued up. - * - * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the - * list if it is already on a list. Because if the buffer is on a list, - * it *must* already be on the right one. If not, the filesystem is being - * silly. This will save a ton of locking. But first we have to ensure - * that buffers are taken *off* the old inode's list when they are freed - * (presumably in truncate). That requires careful auditing of all - * filesystems (do it inside bforget()). It could also be done by bringing - * b_inode back. + * The locking is a little subtle: The list of buffer heads is protected by + * the lock in mapping_metadata_bhs so functions coming from bdev mapping + * (such as try_to_free_buffers()) need to safely get to mapping_metadata_bhs + * using RCU, grab the lock, verify we didn't race with somebody detaching the + * bh / moving it to different inode and only then proceeding. */ -/* - * The buffer's backing address_space's i_private_lock must be held - */ -static void __remove_assoc_queue(struct buffer_head *bh) +void mmb_init(struct mapping_metadata_bhs *mmb, struct address_space *mapping) { + spin_lock_init(&mmb->lock); + INIT_LIST_HEAD(&mmb->list); + mmb->mapping = mapping; +} +EXPORT_SYMBOL(mmb_init); + +static void __remove_assoc_queue(struct mapping_metadata_bhs *mmb, + struct buffer_head *bh) +{ + lockdep_assert_held(&mmb->lock); list_del_init(&bh->b_assoc_buffers); - WARN_ON(!bh->b_assoc_map); - bh->b_assoc_map = NULL; + WARN_ON(!bh->b_mmb); + bh->b_mmb = NULL; } -int inode_has_buffers(struct inode *inode) +static void remove_assoc_queue(struct buffer_head *bh) { - return !list_empty(&inode->i_data.i_private_list); + struct mapping_metadata_bhs *mmb; + + /* + * The locking dance is ugly here. We need to acquire the lock + * protecting the metadata bh list while possibly racing with bh + * being removed from the list or moved to a different one. We + * use RCU to pin mapping_metadata_bhs in memory to + * opportunistically acquire the lock and then recheck the bh + * didn't move under us. + */ + while (bh->b_mmb) { + rcu_read_lock(); + mmb = READ_ONCE(bh->b_mmb); + if (mmb) { + spin_lock(&mmb->lock); + if (bh->b_mmb == mmb) + __remove_assoc_queue(mmb, bh); + spin_unlock(&mmb->lock); + } + rcu_read_unlock(); + } } -/* - * osync is designed to support O_SYNC io. It waits synchronously for - * all already-submitted IO to complete, but does not queue any new - * writes to the disk. +bool mmb_has_buffers(struct mapping_metadata_bhs *mmb) +{ + return !list_empty(&mmb->list); +} +EXPORT_SYMBOL_GPL(mmb_has_buffers); + +/** + * mmb_sync - write out & wait upon all buffers in a list + * @mmb: the list of buffers to write * - * To do O_SYNC writes, just queue the buffer writes with write_dirty_buffer - * as you dirty the buffers, and then use osync_inode_buffers to wait for - * completion. Any other dirty buffers which are not yet queued for - * write will not be flushed to disk by the osync. + * Starts I/O against the buffers in the given list and waits upon + * that I/O. Basically, this is a convenience function for fsync(). @mmb is + * for a file or directory which needs those buffers to be written for a + * successful fsync(). + * + * We have conflicting pressures: we want to make sure that all + * initially dirty buffers get waited on, but that any subsequently + * dirtied buffers don't. After all, we don't want fsync to last + * forever if somebody is actively writing to the file. + * + * Do this in two main stages: first we copy dirty buffers to a + * temporary inode list, queueing the writes as we go. Then we clean + * up, waiting for those writes to complete. mark_buffer_dirty_inode() + * doesn't touch b_assoc_buffers list if b_mmb is not NULL so we are sure the + * buffer stays on our list until IO completes (at which point it can be + * reaped). */ -static int osync_buffers_list(spinlock_t *lock, struct list_head *list) +int mmb_sync(struct mapping_metadata_bhs *mmb) { struct buffer_head *bh; - struct list_head *p; int err = 0; + struct blk_plug plug; + LIST_HEAD(tmp); - spin_lock(lock); -repeat: - list_for_each_prev(p, list) { - bh = BH_ENTRY(p); - if (buffer_locked(bh)) { - get_bh(bh); - spin_unlock(lock); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - err = -EIO; - brelse(bh); - spin_lock(lock); - goto repeat; - } - } - spin_unlock(lock); - return err; -} - -/** - * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers - * @mapping: the mapping which wants those buffers written - * - * Starts I/O against the buffers at mapping->i_private_list, and waits upon - * that I/O. - * - * Basically, this is a convenience function for fsync(). - * @mapping is a file or directory which needs those buffers to be written for - * a successful fsync(). - */ -int sync_mapping_buffers(struct address_space *mapping) -{ - struct address_space *buffer_mapping = mapping->i_private_data; - - if (buffer_mapping == NULL || list_empty(&mapping->i_private_list)) + if (!mmb_has_buffers(mmb)) return 0; - return fsync_buffers_list(&buffer_mapping->i_private_lock, - &mapping->i_private_list); + blk_start_plug(&plug); + + spin_lock(&mmb->lock); + while (!list_empty(&mmb->list)) { + bh = BH_ENTRY(mmb->list.next); + WARN_ON_ONCE(bh->b_mmb != mmb); + __remove_assoc_queue(mmb, bh); + /* Avoid race with mark_buffer_dirty_inode() which does + * a lockless check and we rely on seeing the dirty bit */ + smp_mb(); + if (buffer_dirty(bh) || buffer_locked(bh)) { + list_add(&bh->b_assoc_buffers, &tmp); + bh->b_mmb = mmb; + if (buffer_dirty(bh)) { + get_bh(bh); + spin_unlock(&mmb->lock); + /* + * Ensure any pending I/O completes so that + * write_dirty_buffer() actually writes the + * current contents - it is a noop if I/O is + * still in flight on potentially older + * contents. + */ + write_dirty_buffer(bh, REQ_SYNC); + + /* + * Kick off IO for the previous mapping. Note + * that we will not run the very last mapping, + * wait_on_buffer() will do that for us + * through sync_buffer(). + */ + brelse(bh); + spin_lock(&mmb->lock); + } + } + } + + spin_unlock(&mmb->lock); + blk_finish_plug(&plug); + spin_lock(&mmb->lock); + + while (!list_empty(&tmp)) { + bh = BH_ENTRY(tmp.prev); + get_bh(bh); + __remove_assoc_queue(mmb, bh); + /* Avoid race with mark_buffer_dirty_inode() which does + * a lockless check and we rely on seeing the dirty bit */ + smp_mb(); + if (buffer_dirty(bh)) { + list_add(&bh->b_assoc_buffers, &mmb->list); + bh->b_mmb = mmb; + } + spin_unlock(&mmb->lock); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + err = -EIO; + brelse(bh); + spin_lock(&mmb->lock); + } + spin_unlock(&mmb->lock); + return err; } -EXPORT_SYMBOL(sync_mapping_buffers); +EXPORT_SYMBOL(mmb_sync); /** - * generic_buffers_fsync_noflush - generic buffer fsync implementation - * for simple filesystems with no inode lock + * mmb_fsync_noflush - fsync implementation for simple filesystems with + * metadata buffers list * * @file: file to synchronize + * @mmb: list of metadata bhs to flush * @start: start offset in bytes * @end: end offset in bytes (inclusive) * @datasync: only synchronize essential metadata if true * - * This is a generic implementation of the fsync method for simple - * filesystems which track all non-inode metadata in the buffers list - * hanging off the address_space structure. + * This is an implementation of the fsync method for simple filesystems which + * track all non-inode metadata in the buffers list hanging off the @mmb + * structure. */ -int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end, - bool datasync) +int mmb_fsync_noflush(struct file *file, struct mapping_metadata_bhs *mmb, + loff_t start, loff_t end, bool datasync) { struct inode *inode = file->f_mapping->host; int err; - int ret; + int ret = 0; err = file_write_and_wait_range(file, start, end); if (err) return err; - ret = sync_mapping_buffers(inode->i_mapping); + if (mmb) + ret = mmb_sync(mmb); if (!(inode_state_read_once(inode) & I_DIRTY_ALL)) goto out; if (datasync && !(inode_state_read_once(inode) & I_DIRTY_DATASYNC)) @@ -624,34 +664,35 @@ out: ret = err; return ret; } -EXPORT_SYMBOL(generic_buffers_fsync_noflush); +EXPORT_SYMBOL(mmb_fsync_noflush); /** - * generic_buffers_fsync - generic buffer fsync implementation - * for simple filesystems with no inode lock + * mmb_fsync - fsync implementation for simple filesystems with metadata + * buffers list * * @file: file to synchronize + * @mmb: list of metadata bhs to flush * @start: start offset in bytes * @end: end offset in bytes (inclusive) * @datasync: only synchronize essential metadata if true * - * This is a generic implementation of the fsync method for simple - * filesystems which track all non-inode metadata in the buffers list - * hanging off the address_space structure. This also makes sure that - * a device cache flush operation is called at the end. + * This is an implementation of the fsync method for simple filesystems which + * track all non-inode metadata in the buffers list hanging off the @mmb + * structure. This also makes sure that a device cache flush operation is + * called at the end. */ -int generic_buffers_fsync(struct file *file, loff_t start, loff_t end, - bool datasync) +int mmb_fsync(struct file *file, struct mapping_metadata_bhs *mmb, + loff_t start, loff_t end, bool datasync) { struct inode *inode = file->f_mapping->host; int ret; - ret = generic_buffers_fsync_noflush(file, start, end, datasync); + ret = mmb_fsync_noflush(file, mmb, start, end, datasync); if (!ret) ret = blkdev_issue_flush(inode->i_sb->s_bdev); return ret; } -EXPORT_SYMBOL(generic_buffers_fsync); +EXPORT_SYMBOL(mmb_fsync); /* * Called when we've recently written block `bblock', and it is known that @@ -672,26 +713,18 @@ void write_boundary_block(struct block_device *bdev, } } -void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) +void mmb_mark_buffer_dirty(struct buffer_head *bh, + struct mapping_metadata_bhs *mmb) { - struct address_space *mapping = inode->i_mapping; - struct address_space *buffer_mapping = bh->b_folio->mapping; - mark_buffer_dirty(bh); - if (!mapping->i_private_data) { - mapping->i_private_data = buffer_mapping; - } else { - BUG_ON(mapping->i_private_data != buffer_mapping); - } - if (!bh->b_assoc_map) { - spin_lock(&buffer_mapping->i_private_lock); - list_move_tail(&bh->b_assoc_buffers, - &mapping->i_private_list); - bh->b_assoc_map = mapping; - spin_unlock(&buffer_mapping->i_private_lock); + if (!bh->b_mmb) { + spin_lock(&mmb->lock); + list_move_tail(&bh->b_assoc_buffers, &mmb->list); + bh->b_mmb = mmb; + spin_unlock(&mmb->lock); } } -EXPORT_SYMBOL(mark_buffer_dirty_inode); +EXPORT_SYMBOL(mmb_mark_buffer_dirty); /** * block_dirty_folio - Mark a folio as dirty. @@ -758,153 +791,20 @@ bool block_dirty_folio(struct address_space *mapping, struct folio *folio) EXPORT_SYMBOL(block_dirty_folio); /* - * Write out and wait upon a list of buffers. - * - * We have conflicting pressures: we want to make sure that all - * initially dirty buffers get waited on, but that any subsequently - * dirtied buffers don't. After all, we don't want fsync to last - * forever if somebody is actively writing to the file. - * - * Do this in two main stages: first we copy dirty buffers to a - * temporary inode list, queueing the writes as we go. Then we clean - * up, waiting for those writes to complete. - * - * During this second stage, any subsequent updates to the file may end - * up refiling the buffer on the original inode's dirty list again, so - * there is a chance we will end up with a buffer queued for write but - * not yet completed on that list. So, as a final cleanup we go through - * the osync code to catch these locked, dirty buffers without requeuing - * any newly dirty buffers for write. - */ -static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) -{ - struct buffer_head *bh; - struct address_space *mapping; - int err = 0, err2; - struct blk_plug plug; - LIST_HEAD(tmp); - - blk_start_plug(&plug); - - spin_lock(lock); - while (!list_empty(list)) { - bh = BH_ENTRY(list->next); - mapping = bh->b_assoc_map; - __remove_assoc_queue(bh); - /* Avoid race with mark_buffer_dirty_inode() which does - * a lockless check and we rely on seeing the dirty bit */ - smp_mb(); - if (buffer_dirty(bh) || buffer_locked(bh)) { - list_add(&bh->b_assoc_buffers, &tmp); - bh->b_assoc_map = mapping; - if (buffer_dirty(bh)) { - get_bh(bh); - spin_unlock(lock); - /* - * Ensure any pending I/O completes so that - * write_dirty_buffer() actually writes the - * current contents - it is a noop if I/O is - * still in flight on potentially older - * contents. - */ - write_dirty_buffer(bh, REQ_SYNC); - - /* - * Kick off IO for the previous mapping. Note - * that we will not run the very last mapping, - * wait_on_buffer() will do that for us - * through sync_buffer(). - */ - brelse(bh); - spin_lock(lock); - } - } - } - - spin_unlock(lock); - blk_finish_plug(&plug); - spin_lock(lock); - - while (!list_empty(&tmp)) { - bh = BH_ENTRY(tmp.prev); - get_bh(bh); - mapping = bh->b_assoc_map; - __remove_assoc_queue(bh); - /* Avoid race with mark_buffer_dirty_inode() which does - * a lockless check and we rely on seeing the dirty bit */ - smp_mb(); - if (buffer_dirty(bh)) { - list_add(&bh->b_assoc_buffers, - &mapping->i_private_list); - bh->b_assoc_map = mapping; - } - spin_unlock(lock); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - err = -EIO; - brelse(bh); - spin_lock(lock); - } - - spin_unlock(lock); - err2 = osync_buffers_list(lock, list); - if (err) - return err; - else - return err2; -} - -/* - * Invalidate any and all dirty buffers on a given inode. We are + * Invalidate any and all dirty buffers on a given buffers list. We are * probably unmounting the fs, but that doesn't mean we have already * done a sync(). Just drop the buffers from the inode list. - * - * NOTE: we take the inode's blockdev's mapping's i_private_lock. Which - * assumes that all the buffers are against the blockdev. */ -void invalidate_inode_buffers(struct inode *inode) +void mmb_invalidate(struct mapping_metadata_bhs *mmb) { - if (inode_has_buffers(inode)) { - struct address_space *mapping = &inode->i_data; - struct list_head *list = &mapping->i_private_list; - struct address_space *buffer_mapping = mapping->i_private_data; - - spin_lock(&buffer_mapping->i_private_lock); - while (!list_empty(list)) - __remove_assoc_queue(BH_ENTRY(list->next)); - spin_unlock(&buffer_mapping->i_private_lock); + if (mmb_has_buffers(mmb)) { + spin_lock(&mmb->lock); + while (!list_empty(&mmb->list)) + __remove_assoc_queue(mmb, BH_ENTRY(mmb->list.next)); + spin_unlock(&mmb->lock); } } -EXPORT_SYMBOL(invalidate_inode_buffers); - -/* - * Remove any clean buffers from the inode's buffer list. This is called - * when we're trying to free the inode itself. Those buffers can pin it. - * - * Returns true if all buffers were removed. - */ -int remove_inode_buffers(struct inode *inode) -{ - int ret = 1; - - if (inode_has_buffers(inode)) { - struct address_space *mapping = &inode->i_data; - struct list_head *list = &mapping->i_private_list; - struct address_space *buffer_mapping = mapping->i_private_data; - - spin_lock(&buffer_mapping->i_private_lock); - while (!list_empty(list)) { - struct buffer_head *bh = BH_ENTRY(list->next); - if (buffer_dirty(bh)) { - ret = 0; - break; - } - __remove_assoc_queue(bh); - } - spin_unlock(&buffer_mapping->i_private_lock); - } - return ret; -} +EXPORT_SYMBOL(mmb_invalidate); /* * Create the appropriate buffers when given a folio for data area and @@ -1214,8 +1114,8 @@ void mark_buffer_write_io_error(struct buffer_head *bh) /* FIXME: do we need to set this in both places? */ if (bh->b_folio && bh->b_folio->mapping) mapping_set_error(bh->b_folio->mapping, -EIO); - if (bh->b_assoc_map) - mapping_set_error(bh->b_assoc_map, -EIO); + if (bh->b_mmb) + mapping_set_error(bh->b_mmb->mapping, -EIO); } EXPORT_SYMBOL(mark_buffer_write_io_error); @@ -1245,14 +1145,7 @@ EXPORT_SYMBOL(__brelse); void __bforget(struct buffer_head *bh) { clear_buffer_dirty(bh); - if (bh->b_assoc_map) { - struct address_space *buffer_mapping = bh->b_folio->mapping; - - spin_lock(&buffer_mapping->i_private_lock); - list_del_init(&bh->b_assoc_buffers); - bh->b_assoc_map = NULL; - spin_unlock(&buffer_mapping->i_private_lock); - } + remove_assoc_queue(bh); __brelse(bh); } EXPORT_SYMBOL(__bforget); @@ -2900,8 +2793,7 @@ drop_buffers(struct folio *folio, struct buffer_head **buffers_to_free) do { struct buffer_head *next = bh->b_this_page; - if (bh->b_assoc_map) - __remove_assoc_queue(bh); + remove_assoc_queue(bh); bh = next; } while (bh != head); *buffers_to_free = head; diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 90cd540afeaa..4e8d34a75b66 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -577,7 +577,7 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) if (unlikely(exfat_forced_shutdown(inode->i_sb))) return -EIO; - err = __generic_file_fsync(filp, start, end, datasync); + err = simple_fsync_noflush(filp, start, end, datasync); if (err) return err; diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index 2fb2d2d5d503..04559b88482d 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -695,7 +695,6 @@ void exfat_evict_inode(struct inode *inode) mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock); } - invalidate_inode_buffers(inode); clear_inode(inode); exfat_cache_inval_inode(inode); exfat_unhash_inode(inode); diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 5e0c6c5fcb6c..3eb1f342645c 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -676,6 +676,7 @@ struct ext2_inode_info { #ifdef CONFIG_QUOTA struct dquot __rcu *i_dquot[MAXQUOTAS]; #endif + struct mapping_metadata_bhs i_metadata_bhs; }; /* diff --git a/fs/ext2/file.c b/fs/ext2/file.c index ebe356a38b18..d9b1eb34694a 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -156,9 +156,11 @@ static int ext2_release_file (struct inode * inode, struct file * filp) int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync) { int ret; - struct super_block *sb = file->f_mapping->host->i_sb; + struct inode *inode = file->f_mapping->host; + struct super_block *sb = inode->i_sb; - ret = generic_buffers_fsync(file, start, end, datasync); + ret = mmb_fsync(file, &EXT2_I(inode)->i_metadata_bhs, + start, end, datasync); if (ret == -EIO) /* We don't really know where the IO error happened... */ ext2_error(sb, __func__, diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 45286c0c3b6b..6443c298c105 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -94,9 +94,10 @@ void ext2_evict_inode(struct inode * inode) if (inode->i_blocks) ext2_truncate_blocks(inode, 0); ext2_xattr_delete_inode(inode); + } else { + mmb_sync(&EXT2_I(inode)->i_metadata_bhs); } - - invalidate_inode_buffers(inode); + mmb_invalidate(&EXT2_I(inode)->i_metadata_bhs); clear_inode(inode); ext2_discard_reservation(inode); @@ -526,7 +527,7 @@ static int ext2_alloc_branch(struct inode *inode, } set_buffer_uptodate(bh); unlock_buffer(bh); - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &EXT2_I(inode)->i_metadata_bhs); /* We used to sync bh here if IS_SYNC(inode). * But we now rely upon generic_write_sync() * and b_inode_buffers. But not for directories. @@ -597,7 +598,7 @@ static void ext2_splice_branch(struct inode *inode, /* had we spliced it onto indirect block? */ if (where->bh) - mark_buffer_dirty_inode(where->bh, inode); + mmb_mark_buffer_dirty(where->bh, &EXT2_I(inode)->i_metadata_bhs); inode_set_ctime_current(inode); mark_inode_dirty(inode); @@ -1210,7 +1211,8 @@ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset) if (partial == chain) mark_inode_dirty(inode); else - mark_buffer_dirty_inode(partial->bh, inode); + mmb_mark_buffer_dirty(partial->bh, + &EXT2_I(inode)->i_metadata_bhs); ext2_free_branches(inode, &nr, &nr+1, (chain+n-1) - partial); } /* Clear the ends of indirect blocks on the shared branch */ @@ -1219,7 +1221,8 @@ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset) partial->p + 1, (__le32*)partial->bh->b_data+addr_per_block, (chain+n-1) - partial); - mark_buffer_dirty_inode(partial->bh, inode); + mmb_mark_buffer_dirty(partial->bh, + &EXT2_I(inode)->i_metadata_bhs); brelse (partial->bh); partial--; } @@ -1302,7 +1305,7 @@ static int ext2_setsize(struct inode *inode, loff_t newsize) inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); if (inode_needs_sync(inode)) { - sync_mapping_buffers(inode->i_mapping); + mmb_sync(&EXT2_I(inode)->i_metadata_bhs); sync_inode_metadata(inode, 1); } else { mark_inode_dirty(inode); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 603f2641fe10..4118a3a1f620 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -215,6 +215,7 @@ static struct inode *ext2_alloc_inode(struct super_block *sb) #ifdef CONFIG_QUOTA memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); #endif + mmb_init(&ei->i_metadata_bhs, &ei->vfs_inode.i_data); return &ei->vfs_inode; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1aa4622f812b..0cf68f85dfd1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1121,6 +1121,7 @@ struct ext4_inode_info { struct rw_semaphore i_data_sem; struct inode vfs_inode; struct jbd2_inode *jinode; + struct mapping_metadata_bhs i_metadata_bhs; /* * File creation time. Its function is same as that of diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 05e5946ed9b3..9a8c225f2753 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -390,7 +390,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, } } else { if (inode) - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, + &EXT4_I(inode)->i_metadata_bhs); else mark_buffer_dirty(bh); if (inode && inode_needs_sync(inode)) { diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index bd8f230fa507..924726dcc85f 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -68,7 +68,7 @@ static int ext4_sync_parent(struct inode *inode) * through ext4_evict_inode()) and so we are safe to flush * metadata blocks and the inode. */ - ret = sync_mapping_buffers(inode->i_mapping); + ret = mmb_sync(&EXT4_I(inode)->i_metadata_bhs); if (ret) break; ret = sync_inode_metadata(inode, 1); @@ -89,7 +89,8 @@ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end, }; int ret; - ret = generic_buffers_fsync_noflush(file, start, end, datasync); + ret = mmb_fsync_noflush(file, &EXT4_I(inode)->i_metadata_bhs, + start, end, datasync); if (ret) return ret; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 01679d96cd0f..f78cf5f23835 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -195,7 +195,9 @@ void ext4_evict_inode(struct inode *inode) ext4_warning_inode(inode, "data will be lost"); truncate_inode_pages_final(&inode->i_data); - + /* Avoid mballoc special inode which has no proper iops */ + if (!EXT4_SB(inode->i_sb)->s_journal) + mmb_sync(&EXT4_I(inode)->i_metadata_bhs); goto no_delete; } @@ -1430,9 +1432,6 @@ static int write_end_fn(handle_t *handle, struct inode *inode, /* * We need to pick up the new inode size which generic_commit_write gave us * `iocb` can be NULL - eg, when called from page_symlink(). - * - * ext4 never places buffers on inode->i_mapping->i_private_list. metadata - * buffers are managed internally. */ static int ext4_write_end(const struct kiocb *iocb, struct address_space *mapping, @@ -3447,7 +3446,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode) } /* Any metadata buffers to write? */ - if (!list_empty(&inode->i_mapping->i_private_list)) + if (mmb_has_buffers(&EXT4_I(inode)->i_metadata_bhs)) return true; return inode_state_read_once(inode) & I_DIRTY_DATASYNC; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8ca399c15970..578508eb4f1a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1424,6 +1424,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); ext4_fc_init_inode(&ei->vfs_inode); spin_lock_init(&ei->i_fc_lock); + mmb_init(&ei->i_metadata_bhs, &ei->vfs_inode.i_data); return &ei->vfs_inode; } @@ -1520,7 +1521,8 @@ static void destroy_inodecache(void) void ext4_clear_inode(struct inode *inode) { ext4_fc_del(inode); - invalidate_inode_buffers(inode); + if (!EXT4_SB(inode->i_sb)->s_journal) + mmb_invalidate(&EXT4_I(inode)->i_metadata_bhs); clear_inode(inode); ext4_discard_preallocations(inode); /* diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 4b8b25f688e4..4f6f42f33613 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -1027,7 +1027,7 @@ static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots) de++; nr_slots--; } - mark_buffer_dirty_inode(bh, dir); + mmb_mark_buffer_dirty(bh, &MSDOS_I(dir)->i_metadata_bhs); if (IS_DIRSYNC(dir)) err = sync_dirty_buffer(bh); brelse(bh); @@ -1062,7 +1062,7 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo) de--; nr_slots--; } - mark_buffer_dirty_inode(bh, dir); + mmb_mark_buffer_dirty(bh, &MSDOS_I(dir)->i_metadata_bhs); if (IS_DIRSYNC(dir)) err = sync_dirty_buffer(bh); brelse(bh); @@ -1114,7 +1114,7 @@ static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used, memset(bhs[n]->b_data, 0, sb->s_blocksize); set_buffer_uptodate(bhs[n]); unlock_buffer(bhs[n]); - mark_buffer_dirty_inode(bhs[n], dir); + mmb_mark_buffer_dirty(bhs[n], &MSDOS_I(dir)->i_metadata_bhs); n++; blknr++; @@ -1195,7 +1195,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts) memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de)); set_buffer_uptodate(bhs[0]); unlock_buffer(bhs[0]); - mark_buffer_dirty_inode(bhs[0], dir); + mmb_mark_buffer_dirty(bhs[0], &MSDOS_I(dir)->i_metadata_bhs); err = fat_zeroed_cluster(dir, blknr, 1, bhs, MAX_BUF_PER_PAGE); if (err) @@ -1257,7 +1257,8 @@ static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots, memcpy(bhs[n]->b_data, slots, copy); set_buffer_uptodate(bhs[n]); unlock_buffer(bhs[n]); - mark_buffer_dirty_inode(bhs[n], dir); + mmb_mark_buffer_dirty(bhs[n], + &MSDOS_I(dir)->i_metadata_bhs); slots += copy; size -= copy; if (!size) @@ -1358,7 +1359,8 @@ found: for (i = 0; i < long_bhs; i++) { int copy = umin(sb->s_blocksize - offset, size); memcpy(bhs[i]->b_data + offset, slots, copy); - mark_buffer_dirty_inode(bhs[i], dir); + mmb_mark_buffer_dirty(bhs[i], + &MSDOS_I(dir)->i_metadata_bhs); offset = 0; slots += copy; size -= copy; @@ -1369,7 +1371,8 @@ found: /* Fill the short name slot. */ int copy = umin(sb->s_blocksize - offset, size); memcpy(bhs[i]->b_data + offset, slots, copy); - mark_buffer_dirty_inode(bhs[i], dir); + mmb_mark_buffer_dirty(bhs[i], + &MSDOS_I(dir)->i_metadata_bhs); if (IS_DIRSYNC(dir)) err = sync_dirty_buffer(bhs[i]); } diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 0d269dba897b..5a58f0bf8ce8 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -130,6 +130,7 @@ struct msdos_inode_info { struct hlist_node i_dir_hash; /* hash by i_logstart */ struct rw_semaphore truncate_lock; /* protect bmap against truncate */ struct timespec64 i_crtime; /* File creation (birth) time */ + struct mapping_metadata_bhs i_metadata_bhs; struct inode vfs_inode; }; diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index a7061c2ad8e4..f0801d99dd62 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -170,9 +170,11 @@ static void fat12_ent_put(struct fat_entry *fatent, int new) } spin_unlock(&fat12_entry_lock); - mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); + mmb_mark_buffer_dirty(fatent->bhs[0], + &MSDOS_I(fatent->fat_inode)->i_metadata_bhs); if (fatent->nr_bhs == 2) - mark_buffer_dirty_inode(fatent->bhs[1], fatent->fat_inode); + mmb_mark_buffer_dirty(fatent->bhs[1], + &MSDOS_I(fatent->fat_inode)->i_metadata_bhs); } static void fat16_ent_put(struct fat_entry *fatent, int new) @@ -181,7 +183,8 @@ static void fat16_ent_put(struct fat_entry *fatent, int new) new = EOF_FAT16; *fatent->u.ent16_p = cpu_to_le16(new); - mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); + mmb_mark_buffer_dirty(fatent->bhs[0], + &MSDOS_I(fatent->fat_inode)->i_metadata_bhs); } static void fat32_ent_put(struct fat_entry *fatent, int new) @@ -189,7 +192,8 @@ static void fat32_ent_put(struct fat_entry *fatent, int new) WARN_ON(new & 0xf0000000); new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff; *fatent->u.ent32_p = cpu_to_le32(new); - mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); + mmb_mark_buffer_dirty(fatent->bhs[0], + &MSDOS_I(fatent->fat_inode)->i_metadata_bhs); } static int fat12_ent_next(struct fat_entry *fatent) @@ -395,7 +399,8 @@ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs, memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize); set_buffer_uptodate(c_bh); unlock_buffer(c_bh); - mark_buffer_dirty_inode(c_bh, sbi->fat_inode); + mmb_mark_buffer_dirty(c_bh, + &MSDOS_I(sbi->fat_inode)->i_metadata_bhs); if (sb->s_flags & SB_SYNCHRONOUS) err = sync_dirty_buffer(c_bh); brelse(c_bh); diff --git a/fs/fat/file.c b/fs/fat/file.c index 124d9c5431c8..becccdd2e501 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -186,13 +186,15 @@ static int fat_file_release(struct inode *inode, struct file *filp) int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { struct inode *inode = filp->f_mapping->host; + struct inode *fat_inode = MSDOS_SB(inode->i_sb)->fat_inode; int err; - err = __generic_file_fsync(filp, start, end, datasync); + err = mmb_fsync_noflush(filp, &MSDOS_I(inode)->i_metadata_bhs, + start, end, datasync); if (err) return err; - err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping); + err = mmb_sync(&MSDOS_I(fat_inode)->i_metadata_bhs); if (err) return err; @@ -236,7 +238,7 @@ static int fat_cont_expand(struct inode *inode, loff_t size) */ err = filemap_fdatawrite_range(mapping, start, start + count - 1); - err2 = sync_mapping_buffers(mapping); + err2 = mmb_sync(&MSDOS_I(inode)->i_metadata_bhs); if (!err) err = err2; err2 = write_inode_now(inode, 1); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 3cc5fb01afa1..28f78df086ef 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -657,10 +657,12 @@ static void fat_evict_inode(struct inode *inode) if (!inode->i_nlink) { inode->i_size = 0; fat_truncate_blocks(inode, 0); - } else + } else { + mmb_sync(&MSDOS_I(inode)->i_metadata_bhs); fat_free_eofblocks(inode); + } - invalidate_inode_buffers(inode); + mmb_invalidate(&MSDOS_I(inode)->i_metadata_bhs); clear_inode(inode); fat_cache_inval_inode(inode); fat_detach(inode); @@ -761,6 +763,7 @@ static struct inode *fat_alloc_inode(struct super_block *sb) ei->i_pos = 0; ei->i_crtime.tv_sec = 0; ei->i_crtime.tv_nsec = 0; + mmb_init(&ei->i_metadata_bhs, &ei->vfs_inode.i_data); return &ei->vfs_inode; } diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 048c103b506a..4cc65f330fb7 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -527,7 +527,8 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, if (update_dotdot) { fat_set_start(dotdot_de, MSDOS_I(new_dir)->i_logstart); - mark_buffer_dirty_inode(dotdot_bh, old_inode); + mmb_mark_buffer_dirty(dotdot_bh, + &MSDOS_I(old_inode)->i_metadata_bhs); if (IS_DIRSYNC(new_dir)) { err = sync_dirty_buffer(dotdot_bh); if (err) @@ -566,7 +567,8 @@ error_dotdot: if (update_dotdot) { fat_set_start(dotdot_de, MSDOS_I(old_dir)->i_logstart); - mark_buffer_dirty_inode(dotdot_bh, old_inode); + mmb_mark_buffer_dirty(dotdot_bh, + &MSDOS_I(old_inode)->i_metadata_bhs); corrupt |= sync_dirty_buffer(dotdot_bh); } error_inode: diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 87dcdd86272b..918b3756674c 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -915,7 +915,7 @@ static int vfat_update_dotdot_de(struct inode *dir, struct inode *inode, struct msdos_dir_entry *dotdot_de) { fat_set_start(dotdot_de, MSDOS_I(dir)->i_logstart); - mark_buffer_dirty_inode(dotdot_bh, inode); + mmb_mark_buffer_dirty(dotdot_bh, &MSDOS_I(inode)->i_metadata_bhs); if (IS_DIRSYNC(dir)) return sync_dirty_buffer(dotdot_bh); return 0; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 2acbabccc8ad..b8a144d3a73b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1149,7 +1149,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, mapping->flags = 0; gfp_mask = mapping_gfp_mask(sdp->sd_inode->i_mapping); mapping_set_gfp_mask(mapping, gfp_mask); - mapping->i_private_data = NULL; mapping->writeback_index = 0; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 3f70c47981de..6ad02493adfd 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -622,13 +622,7 @@ static void hugetlbfs_evict_inode(struct inode *inode) trace_hugetlbfs_evict_inode(inode); remove_inode_hugepages(inode, 0, LLONG_MAX); - /* - * Get the resv_map from the address space embedded in the inode. - * This is the address space which points to any resv_map allocated - * at inode creation time. If this is a device special inode, - * i_mapping may not point to the original address space. - */ - resv_map = (struct resv_map *)(&inode->i_data)->i_private_data; + resv_map = HUGETLBFS_I(inode)->resv_map; /* Only regular and link inodes have associated reserve maps */ if (resv_map) resv_map_release(&resv_map->refs); @@ -907,6 +901,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb, simple_inode_init_ts(inode); inode->i_op = &hugetlbfs_dir_inode_operations; inode->i_fop = &simple_dir_operations; + HUGETLBFS_I(inode)->resv_map = NULL; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); lockdep_annotate_inode_mutex_key(inode); @@ -950,7 +945,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, &hugetlbfs_i_mmap_rwsem_key); inode->i_mapping->a_ops = &hugetlbfs_aops; simple_inode_init_ts(inode); - inode->i_mapping->i_private_data = resv_map; + info->resv_map = resv_map; info->seals = F_SEAL_SEAL; switch (mode & S_IFMT) { default: diff --git a/fs/inode.c b/fs/inode.c index 5ad169d51728..69e219f0cfcb 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -17,7 +17,6 @@ #include #include #include -#include /* for inode_has_buffers */ #include #include #include @@ -284,7 +283,6 @@ int inode_init_always_gfp(struct super_block *sb, struct inode *inode, gfp_t gfp atomic_set(&mapping->nr_thps, 0); #endif mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); - mapping->i_private_data = NULL; mapping->writeback_index = 0; init_rwsem(&mapping->invalidate_lock); lockdep_set_class_and_name(&mapping->invalidate_lock, @@ -367,7 +365,6 @@ struct inode *alloc_inode(struct super_block *sb) void __destroy_inode(struct inode *inode) { - BUG_ON(inode_has_buffers(inode)); inode_detach_wb(inode); security_inode_free(inode); fsnotify_inode_delete(inode); @@ -484,7 +481,6 @@ static void __address_space_init_once(struct address_space *mapping) { xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT); init_rwsem(&mapping->i_mmap_rwsem); - INIT_LIST_HEAD(&mapping->i_private_list); spin_lock_init(&mapping->i_private_lock); mapping->i_mmap = RB_ROOT_CACHED; } @@ -798,7 +794,6 @@ void clear_inode(struct inode *inode) * nor even WARN_ON(!mapping_empty). */ xa_unlock_irq(&inode->i_data.i_pages); - BUG_ON(!list_empty(&inode->i_data.i_private_list)); BUG_ON(!(inode_state_read_once(inode) & I_FREEING)); BUG_ON(inode_state_read_once(inode) & I_CLEAR); BUG_ON(!list_empty(&inode->i_wb_list)); @@ -994,19 +989,18 @@ static enum lru_status inode_lru_isolate(struct list_head *item, * page cache in order to free up struct inodes: lowmem might * be under pressure before the cache inside the highmem zone. */ - if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) { + if (!mapping_empty(&inode->i_data)) { + unsigned long reap; + inode_pin_lru_isolating(inode); spin_unlock(&inode->i_lock); spin_unlock(&lru->lock); - if (remove_inode_buffers(inode)) { - unsigned long reap; - reap = invalidate_mapping_pages(&inode->i_data, 0, -1); - if (current_is_kswapd()) - __count_vm_events(KSWAPD_INODESTEAL, reap); - else - __count_vm_events(PGINODESTEAL, reap); - mm_account_reclaimed_pages(reap); - } + reap = invalidate_mapping_pages(&inode->i_data, 0, -1); + if (current_is_kswapd()) + __count_vm_events(KSWAPD_INODESTEAL, reap); + else + __count_vm_events(PGINODESTEAL, reap); + mm_account_reclaimed_pages(reap); inode_unpin_lru_isolating(inode); return LRU_RETRY; } diff --git a/fs/libfs.c b/fs/libfs.c index 63b4fb082435..1bbea5e7bae3 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -18,7 +18,6 @@ #include #include #include -#include /* sync_mapping_buffers */ #include #include #include @@ -1539,71 +1538,63 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, EXPORT_SYMBOL_GPL(generic_fh_to_parent); /** - * __generic_file_fsync - generic fsync implementation for simple filesystems + * simple_fsync_noflush - generic fsync implementation for simple filesystems * * @file: file to synchronize * @start: start offset in bytes * @end: end offset in bytes (inclusive) * @datasync: only synchronize essential metadata if true * - * This is a generic implementation of the fsync method for simple - * filesystems which track all non-inode metadata in the buffers list - * hanging off the address_space structure. + * This function is an fsync handler for simple filesystems. It writes out + * dirty data, inode (if dirty), but does not issue a cache flush. */ -int __generic_file_fsync(struct file *file, loff_t start, loff_t end, - int datasync) +int simple_fsync_noflush(struct file *file, loff_t start, loff_t end, + int datasync) { struct inode *inode = file->f_mapping->host; int err; - int ret; + int ret = 0; err = file_write_and_wait_range(file, start, end); if (err) return err; - inode_lock(inode); - ret = sync_mapping_buffers(inode->i_mapping); if (!(inode_state_read_once(inode) & I_DIRTY_ALL)) goto out; if (datasync && !(inode_state_read_once(inode) & I_DIRTY_DATASYNC)) goto out; - err = sync_inode_metadata(inode, 1); - if (ret == 0) - ret = err; - + ret = sync_inode_metadata(inode, 1); out: - inode_unlock(inode); /* check and advance again to catch errors after syncing out buffers */ err = file_check_and_advance_wb_err(file); if (ret == 0) ret = err; return ret; } -EXPORT_SYMBOL(__generic_file_fsync); +EXPORT_SYMBOL(simple_fsync_noflush); /** - * generic_file_fsync - generic fsync implementation for simple filesystems - * with flush + * simple_fsync - fsync implementation for simple filesystems with flush * @file: file to synchronize * @start: start offset in bytes * @end: end offset in bytes (inclusive) * @datasync: only synchronize essential metadata if true * + * This function is an fsync handler for simple filesystems. It writes out + * dirty data, inode (if dirty), and issues a cache flush. */ - -int generic_file_fsync(struct file *file, loff_t start, loff_t end, - int datasync) +int simple_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; int err; - err = __generic_file_fsync(file, start, end, datasync); + err = simple_fsync_noflush(file, start, end, datasync); if (err) return err; return blkdev_issue_flush(inode->i_sb->s_bdev); } -EXPORT_SYMBOL(generic_file_fsync); +EXPORT_SYMBOL(simple_fsync); /** * generic_check_addressable - Check addressability of file system diff --git a/fs/minix/dir.c b/fs/minix/dir.c index 19052fc47e9e..361d26d87d2e 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -23,7 +23,7 @@ const struct file_operations minix_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = minix_readdir, - .fsync = generic_file_fsync, + .fsync = minix_fsync, }; /* diff --git a/fs/minix/file.c b/fs/minix/file.c index dca7ac71f049..86e5943cd2ff 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -7,8 +7,16 @@ * minix regular file handling primitives */ +#include #include "minix.h" +int minix_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + return mmb_fsync(file, + &minix_i(file->f_mapping->host)->i_metadata_bhs, + start, end, datasync); +} + /* * We have mostly NULLs here: the current defaults are OK for * the minix filesystem. @@ -18,7 +26,7 @@ const struct file_operations minix_file_operations = { .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .mmap_prepare = generic_file_mmap_prepare, - .fsync = generic_file_fsync, + .fsync = minix_fsync, .splice_read = filemap_splice_read, }; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 838b072b6cf0..9c6bac248907 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -48,8 +48,10 @@ static void minix_evict_inode(struct inode *inode) if (!inode->i_nlink) { inode->i_size = 0; minix_truncate(inode); + } else { + mmb_sync(&minix_i(inode)->i_metadata_bhs); } - invalidate_inode_buffers(inode); + mmb_invalidate(&minix_i(inode)->i_metadata_bhs); clear_inode(inode); if (!inode->i_nlink) minix_free_inode(inode); @@ -83,6 +85,8 @@ static struct inode *minix_alloc_inode(struct super_block *sb) ei = alloc_inode_sb(sb, minix_inode_cachep, GFP_KERNEL); if (!ei) return NULL; + mmb_init(&ei->i_metadata_bhs, &ei->vfs_inode.i_data); + return &ei->vfs_inode; } diff --git a/fs/minix/itree_common.c b/fs/minix/itree_common.c index dad131e30c05..c3cd2c75af9c 100644 --- a/fs/minix/itree_common.c +++ b/fs/minix/itree_common.c @@ -98,7 +98,7 @@ static int alloc_branch(struct inode *inode, *branch[n].p = branch[n].key; set_buffer_uptodate(bh); unlock_buffer(bh); - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &minix_i(inode)->i_metadata_bhs); parent = nr; } if (n == num) @@ -135,7 +135,8 @@ static inline int splice_branch(struct inode *inode, /* had we spliced it onto indirect block? */ if (where->bh) - mark_buffer_dirty_inode(where->bh, inode); + mmb_mark_buffer_dirty(where->bh, + &minix_i(inode)->i_metadata_bhs); mark_inode_dirty(inode); return 0; @@ -328,14 +329,16 @@ static inline void truncate (struct inode * inode) if (partial == chain) mark_inode_dirty(inode); else - mark_buffer_dirty_inode(partial->bh, inode); + mmb_mark_buffer_dirty(partial->bh, + &minix_i(inode)->i_metadata_bhs); free_branches(inode, &nr, &nr+1, (chain+n-1) - partial); } /* Clear the ends of indirect blocks on the shared branch */ while (partial > chain) { free_branches(inode, partial->p + 1, block_end(partial->bh), (chain+n-1) - partial); - mark_buffer_dirty_inode(partial->bh, inode); + mmb_mark_buffer_dirty(partial->bh, + &minix_i(inode)->i_metadata_bhs); brelse (partial->bh); partial--; } diff --git a/fs/minix/minix.h b/fs/minix/minix.h index 7e1f652f16d3..f2025c9b5825 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -19,6 +19,7 @@ struct minix_inode_info { __u16 i1_data[16]; __u32 i2_data[16]; } u; + struct mapping_metadata_bhs i_metadata_bhs; struct inode vfs_inode; }; @@ -57,6 +58,8 @@ unsigned long minix_count_free_blocks(struct super_block *sb); int minix_getattr(struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); int minix_prepare_chunk(struct folio *folio, loff_t pos, unsigned len); +struct mapping_metadata_bhs *minix_get_metadata_bhs(struct inode *inode); +int minix_fsync(struct file *file, loff_t start, loff_t end, int datasync); extern void V1_minix_truncate(struct inode *); extern void V2_minix_truncate(struct inode *); diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 7eecf1e01f74..570c92fa7ee7 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -387,9 +387,6 @@ static int ntfs_extend(struct inode *inode, loff_t pos, size_t count, int err2; err = filemap_fdatawrite_range(mapping, pos, end - 1); - err2 = sync_mapping_buffers(mapping); - if (!err) - err = err2; err2 = write_inode_now(inode, 1); if (!err) err = err2; diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 60af9f8e0366..7f2ed0f26686 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -1815,7 +1815,6 @@ void ntfs_evict_inode(struct inode *inode) { truncate_inode_pages_final(&inode->i_data); - invalidate_inode_buffers(inode); clear_inode(inode); ni_clear(ntfs_i(inode)); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index bd2ddb7d841d..7283bb2c5a31 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3971,7 +3971,6 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", (unsigned long long)OCFS2_I(inode)->ip_blkno); } - sync_mapping_buffers(mapping); if (blocking == DLM_LOCK_EX) { truncate_inode_pages(mapping, 0); } else { diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 268b79339a51..1277666c77cd 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -1683,9 +1683,6 @@ bail: if (rename_lock) ocfs2_rename_unlock(osb); - if (new_inode) - sync_mapping_buffers(old_inode->i_mapping); - iput(new_inode); ocfs2_free_dir_lookup_result(&target_lookup_res); diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 49a1de5a827f..28f3b113340e 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -334,7 +334,7 @@ const struct file_operations omfs_file_operations = { .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .mmap_prepare = generic_file_mmap_prepare, - .fsync = generic_file_fsync, + .fsync = simple_fsync, .splice_read = filemap_splice_read, }; diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index 6402715ab377..a9038d231be4 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c @@ -71,7 +71,7 @@ const struct file_operations qnx4_dir_operations = .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = qnx4_readdir, - .fsync = generic_file_fsync, + .fsync = simple_fsync, .setlease = generic_setlease, }; diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c index ae0c9846833d..135fb42f6936 100644 --- a/fs/qnx6/dir.c +++ b/fs/qnx6/dir.c @@ -275,7 +275,7 @@ const struct file_operations qnx6_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = qnx6_readdir, - .fsync = generic_file_fsync, + .fsync = simple_fsync, .setlease = generic_setlease, }; diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 5bf75638f352..ebc9f6a379fe 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -157,6 +157,6 @@ const struct file_operations udf_dir_operations = { .read = generic_read_dir, .iterate_shared = udf_readdir, .unlocked_ioctl = udf_ioctl, - .fsync = generic_file_fsync, + .fsync = udf_fsync, .setlease = generic_setlease, }; diff --git a/fs/udf/directory.c b/fs/udf/directory.c index f5c81e13eacb..6b4ac7464aef 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -430,9 +430,10 @@ void udf_fiiter_write_fi(struct udf_fileident_iter *iter, uint8_t *impuse) if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { mark_inode_dirty(iter->dir); } else { - mark_buffer_dirty_inode(iter->bh[0], iter->dir); + mmb_mark_buffer_dirty(iter->bh[0], &iinfo->i_metadata_bhs); if (iter->bh[1]) - mark_buffer_dirty_inode(iter->bh[1], iter->dir); + mmb_mark_buffer_dirty(iter->bh[1], + &iinfo->i_metadata_bhs); } inode_inc_iversion(iter->dir); } diff --git a/fs/udf/file.c b/fs/udf/file.c index b043fe10e5d6..f7f1422de30f 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -198,6 +198,13 @@ static int udf_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } +int udf_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + return mmb_fsync(file, + &UDF_I(file->f_mapping->host)->i_metadata_bhs, + start, end, datasync); +} + const struct file_operations udf_file_operations = { .read_iter = generic_file_read_iter, .unlocked_ioctl = udf_ioctl, @@ -205,7 +212,7 @@ const struct file_operations udf_file_operations = { .mmap = udf_file_mmap, .write_iter = udf_file_write_iter, .release = udf_release_file, - .fsync = generic_file_fsync, + .fsync = udf_fsync, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, .llseek = generic_file_llseek, diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 6148bd94a390..17db63f54211 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -154,7 +154,9 @@ void udf_evict_inode(struct inode *inode) } } truncate_inode_pages_final(&inode->i_data); - invalidate_inode_buffers(inode); + if (!want_delete) + mmb_sync(&iinfo->i_metadata_bhs); + mmb_invalidate(&iinfo->i_metadata_bhs); clear_inode(inode); kfree(iinfo->i_data); iinfo->i_data = NULL; @@ -1258,7 +1260,7 @@ struct buffer_head *udf_bread(struct inode *inode, udf_pblk_t block, memset(bh->b_data, 0x00, inode->i_sb->s_blocksize); set_buffer_uptodate(bh); unlock_buffer(bh); - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &UDF_I(inode)->i_metadata_bhs); return bh; } @@ -2006,7 +2008,7 @@ int udf_setup_indirect_aext(struct inode *inode, udf_pblk_t block, memset(bh->b_data, 0x00, sb->s_blocksize); set_buffer_uptodate(bh); unlock_buffer(bh); - mark_buffer_dirty_inode(bh, inode); + mmb_mark_buffer_dirty(bh, &UDF_I(inode)->i_metadata_bhs); aed = (struct allocExtDesc *)(bh->b_data); if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT)) { @@ -2101,7 +2103,7 @@ int __udf_add_aext(struct inode *inode, struct extent_position *epos, else udf_update_tag(epos->bh->b_data, sizeof(struct allocExtDesc)); - mark_buffer_dirty_inode(epos->bh, inode); + mmb_mark_buffer_dirty(epos->bh, &iinfo->i_metadata_bhs); } return 0; @@ -2185,7 +2187,7 @@ void udf_write_aext(struct inode *inode, struct extent_position *epos, le32_to_cpu(aed->lengthAllocDescs) + sizeof(struct allocExtDesc)); } - mark_buffer_dirty_inode(epos->bh, inode); + mmb_mark_buffer_dirty(epos->bh, &iinfo->i_metadata_bhs); } else { mark_inode_dirty(inode); } @@ -2393,7 +2395,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos) else udf_update_tag(oepos.bh->b_data, sizeof(struct allocExtDesc)); - mark_buffer_dirty_inode(oepos.bh, inode); + mmb_mark_buffer_dirty(oepos.bh, &iinfo->i_metadata_bhs); } } else { udf_write_aext(inode, &oepos, &eloc, elen, 1); @@ -2410,7 +2412,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos) else udf_update_tag(oepos.bh->b_data, sizeof(struct allocExtDesc)); - mark_buffer_dirty_inode(oepos.bh, inode); + mmb_mark_buffer_dirty(oepos.bh, &iinfo->i_metadata_bhs); } } diff --git a/fs/udf/namei.c b/fs/udf/namei.c index ccafcaa96809..9a3b7cef3606 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -638,7 +638,7 @@ static int udf_symlink(struct mnt_idmap *idmap, struct inode *dir, memset(epos.bh->b_data, 0x00, bsize); set_buffer_uptodate(epos.bh); unlock_buffer(epos.bh); - mark_buffer_dirty_inode(epos.bh, inode); + mmb_mark_buffer_dirty(epos.bh, &iinfo->i_metadata_bhs); ea = epos.bh->b_data + udf_ext0_offset(inode); } else ea = iinfo->i_data + iinfo->i_lenEAttr; diff --git a/fs/udf/super.c b/fs/udf/super.c index 3a2d66c7e856..073897f7ff06 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -166,6 +166,7 @@ static struct inode *udf_alloc_inode(struct super_block *sb) ei->cached_extent.lstart = -1; spin_lock_init(&ei->i_extent_cache_lock); inode_set_iversion(&ei->vfs_inode, 1); + mmb_init(&ei->i_metadata_bhs, &ei->vfs_inode.i_data); return &ei->vfs_inode; } diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index b4071c9cf8c9..41b2bfd30449 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -186,7 +186,7 @@ static void udf_update_alloc_ext_desc(struct inode *inode, len += lenalloc; udf_update_tag(epos->bh->b_data, len); - mark_buffer_dirty_inode(epos->bh, inode); + mmb_mark_buffer_dirty(epos->bh, &UDF_I(inode)->i_metadata_bhs); } /* diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h index 312b7c9ef10e..fdaa88c49c2b 100644 --- a/fs/udf/udf_i.h +++ b/fs/udf/udf_i.h @@ -50,6 +50,7 @@ struct udf_inode_info { struct kernel_lb_addr i_locStreamdir; __u64 i_lenStreams; struct rw_semaphore i_data_sem; + struct mapping_metadata_bhs i_metadata_bhs; struct udf_ext_cache cached_extent; /* Spinlock for protecting extent cache */ spinlock_t i_extent_cache_lock; diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index d159f20d61e8..6d951e05c004 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -137,6 +137,7 @@ static inline unsigned int udf_dir_entry_len(struct fileIdentDesc *cfi) /* file.c */ extern long udf_ioctl(struct file *, unsigned int, unsigned long); +int udf_fsync(struct file *file, loff_t start, loff_t end, int datasync); /* inode.c */ extern struct inode *__udf_iget(struct super_block *, struct kernel_lb_addr *, diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index f10a50f7e78b..e62fe5667671 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -652,7 +652,7 @@ const struct file_operations ufs_dir_operations = { .release = ufs_dir_release, .read = generic_read_dir, .iterate_shared = ufs_readdir, - .fsync = generic_file_fsync, + .fsync = simple_fsync, .llseek = ufs_dir_llseek, .setlease = generic_setlease, }; diff --git a/fs/ufs/file.c b/fs/ufs/file.c index 809c7a4603f8..85c509ced7f9 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -41,7 +41,7 @@ const struct file_operations ufs_file_operations = { .write_iter = generic_file_write_iter, .mmap_prepare = generic_file_mmap_prepare, .open = generic_file_open, - .fsync = generic_file_fsync, + .fsync = simple_fsync, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, .setlease = generic_setlease, diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 2a8728c87979..440d014cc5ed 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -853,7 +853,6 @@ void ufs_evict_inode(struct inode * inode) ufs_update_inode(inode, inode_needs_sync(inode)); } - invalidate_inode_buffers(inode); clear_inode(inode); if (want_delete) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index b16b88bfbc3e..e4939e33b4b5 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -73,8 +73,8 @@ struct buffer_head { bh_end_io_t *b_end_io; /* I/O completion */ void *b_private; /* reserved for b_end_io */ struct list_head b_assoc_buffers; /* associated with another mapping */ - struct address_space *b_assoc_map; /* mapping this buffer is - associated with */ + struct mapping_metadata_bhs *b_mmb; /* head of the list of metadata bhs + * this buffer is associated with */ atomic_t b_count; /* users using this buffer_head */ spinlock_t b_uptodate_lock; /* Used by the first bh in a page, to * serialise IO completion of other @@ -205,12 +205,12 @@ struct buffer_head *create_empty_buffers(struct folio *folio, void end_buffer_read_sync(struct buffer_head *bh, int uptodate); void end_buffer_write_sync(struct buffer_head *bh, int uptodate); -/* Things to do with buffers at mapping->private_list */ -void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode); -int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end, - bool datasync); -int generic_buffers_fsync(struct file *file, loff_t start, loff_t end, - bool datasync); +/* Things to do with metadata buffers list */ +void mmb_mark_buffer_dirty(struct buffer_head *bh, struct mapping_metadata_bhs *mmb); +int mmb_fsync_noflush(struct file *file, struct mapping_metadata_bhs *mmb, + loff_t start, loff_t end, bool datasync); +int mmb_fsync(struct file *file, struct mapping_metadata_bhs *mmb, + loff_t start, loff_t end, bool datasync); void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len); static inline void clean_bdev_bh_alias(struct buffer_head *bh) @@ -515,10 +515,10 @@ bool block_dirty_folio(struct address_space *mapping, struct folio *folio); void buffer_init(void); bool try_to_free_buffers(struct folio *folio); -int inode_has_buffers(struct inode *inode); -void invalidate_inode_buffers(struct inode *inode); -int remove_inode_buffers(struct inode *inode); -int sync_mapping_buffers(struct address_space *mapping); +void mmb_init(struct mapping_metadata_bhs *mmb, struct address_space *mapping); +bool mmb_has_buffers(struct mapping_metadata_bhs *mmb); +void mmb_invalidate(struct mapping_metadata_bhs *mmb); +int mmb_sync(struct mapping_metadata_bhs *mmb); void invalidate_bh_lrus(void); void invalidate_bh_lrus_cpu(void); bool has_bh_in_lru(int cpu, void *dummy); @@ -528,10 +528,7 @@ extern int buffer_heads_over_limit; static inline void buffer_init(void) {} static inline bool try_to_free_buffers(struct folio *folio) { return true; } -static inline int inode_has_buffers(struct inode *inode) { return 0; } -static inline void invalidate_inode_buffers(struct inode *inode) {} -static inline int remove_inode_buffers(struct inode *inode) { return 1; } -static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } +static inline int mmb_sync(struct mapping_metadata_bhs *mmb) { return 0; } static inline void invalidate_bh_lrus(void) {} static inline void invalidate_bh_lrus_cpu(void) {} static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 23f36a2613a3..4551edb8d479 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -445,6 +445,13 @@ struct address_space_operations { extern const struct address_space_operations empty_aops; +/* Structure for tracking metadata buffer heads associated with the mapping */ +struct mapping_metadata_bhs { + struct address_space *mapping; /* Mapping bhs are associated with */ + spinlock_t lock; /* Lock protecting bh list */ + struct list_head list; /* The list of bhs (b_assoc_buffers) */ +}; + /** * struct address_space - Contents of a cacheable, mappable object. * @host: Owner, either the inode or the block_device. @@ -464,8 +471,6 @@ extern const struct address_space_operations empty_aops; * @flags: Error bits and flags (AS_*). * @wb_err: The most recent error which has occurred. * @i_private_lock: For use by the owner of the address_space. - * @i_private_list: For use by the owner of the address_space. - * @i_private_data: For use by the owner of the address_space. */ struct address_space { struct inode *host; @@ -484,9 +489,7 @@ struct address_space { unsigned long flags; errseq_t wb_err; spinlock_t i_private_lock; - struct list_head i_private_list; struct rw_semaphore i_mmap_rwsem; - void * i_private_data; } __attribute__((aligned(sizeof(long)))) __randomize_layout; /* * On most architectures that alignment is already the case; but @@ -3293,8 +3296,8 @@ void simple_offset_destroy(struct offset_ctx *octx); extern const struct file_operations simple_offset_dir_operations; -extern int __generic_file_fsync(struct file *, loff_t, loff_t, int); -extern int generic_file_fsync(struct file *, loff_t, loff_t, int); +extern int simple_fsync_noflush(struct file *, loff_t, loff_t, int); +extern int simple_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 65910437be1c..fc5462fe943f 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -518,6 +518,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) struct hugetlbfs_inode_info { struct inode vfs_inode; + struct resv_map *resv_map; unsigned int seals; }; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 327eaa4074d3..2ced2c8633d8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1157,15 +1157,7 @@ void resv_map_release(struct kref *ref) static inline struct resv_map *inode_resv_map(struct inode *inode) { - /* - * At inode evict time, i_mapping may not point to the original - * address space within the inode. This original address space - * contains the pointer to the resv_map. So, always use the - * address space embedded within the inode. - * The VERY common case is inode->mapping == &inode->i_data but, - * this may not be true for device special inodes. - */ - return (struct resv_map *)(&inode->i_data)->i_private_data; + return HUGETLBFS_I(inode)->resv_map; } static struct resv_map *vma_resv_map(struct vm_area_struct *vma) diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 017d84a7adf3..42b237491c4e 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -30,6 +30,7 @@ struct gmem_file { struct gmem_inode { struct shared_policy policy; struct inode vfs_inode; + struct list_head gmem_file_list; u64 flags; }; @@ -39,8 +40,8 @@ static __always_inline struct gmem_inode *GMEM_I(struct inode *inode) return container_of(inode, struct gmem_inode, vfs_inode); } -#define kvm_gmem_for_each_file(f, mapping) \ - list_for_each_entry(f, &(mapping)->i_private_list, entry) +#define kvm_gmem_for_each_file(f, inode) \ + list_for_each_entry(f, &GMEM_I(inode)->gmem_file_list, entry) /** * folio_file_pfn - like folio_file_page, but return a pfn. @@ -202,7 +203,7 @@ static void kvm_gmem_invalidate_begin(struct inode *inode, pgoff_t start, attr_filter = kvm_gmem_get_invalidate_filter(inode); - kvm_gmem_for_each_file(f, inode->i_mapping) + kvm_gmem_for_each_file(f, inode) __kvm_gmem_invalidate_begin(f, start, end, attr_filter); } @@ -223,7 +224,7 @@ static void kvm_gmem_invalidate_end(struct inode *inode, pgoff_t start, { struct gmem_file *f; - kvm_gmem_for_each_file(f, inode->i_mapping) + kvm_gmem_for_each_file(f, inode) __kvm_gmem_invalidate_end(f, start, end); } @@ -609,7 +610,7 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) kvm_get_kvm(kvm); f->kvm = kvm; xa_init(&f->bindings); - list_add(&f->entry, &inode->i_mapping->i_private_list); + list_add(&f->entry, &GMEM_I(inode)->gmem_file_list); fd_install(fd, file); return fd; @@ -945,6 +946,7 @@ static struct inode *kvm_gmem_alloc_inode(struct super_block *sb) mpol_shared_policy_init(&gi->policy, NULL); gi->flags = 0; + INIT_LIST_HEAD(&gi->gmem_file_list); return &gi->vfs_inode; }