Files
linux/fs/fserror.c
Jeff Layton 0b2600f81c treewide: change inode->i_ino from unsigned long to u64
On 32-bit architectures, unsigned long is only 32 bits wide, which
causes 64-bit inode numbers to be silently truncated. Several
filesystems (NFS, XFS, BTRFS, etc.) can generate inode numbers that
exceed 32 bits, and this truncation can lead to inode number collisions
and other subtle bugs on 32-bit systems.

Change the type of inode->i_ino from unsigned long to u64 to ensure that
inode numbers are always represented as 64-bit values regardless of
architecture. Update all format specifiers treewide from %lu/%lx to
%llu/%llx to match the new type, along with corresponding local variable
types.

This is the bulk treewide conversion. Earlier patches in this series
handled trace events separately to allow trace field reordering for
better struct packing on 32-bit.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://patch.msgid.link/20260304-iino-u64-v3-12-2257ad83d372@kernel.org
Acked-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2026-03-06 14:31:28 +01:00

195 lines
5.6 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2025 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include <linux/fs.h>
#include <linux/fsnotify.h>
#include <linux/mempool.h>
#include <linux/fserror.h>
#define FSERROR_DEFAULT_EVENT_POOL_SIZE (32)
static struct mempool fserror_events_pool;
void fserror_mount(struct super_block *sb)
{
/*
* The pending error counter is biased by 1 so that we don't wake_var
* until we're actually trying to unmount.
*/
refcount_set(&sb->s_pending_errors, 1);
}
void fserror_unmount(struct super_block *sb)
{
/*
* If we don't drop the pending error count to zero, then wait for it
* to drop below 1, which means that the pending errors cleared and
* hopefully we didn't saturate with 1 billion+ concurrent events.
*/
if (!refcount_dec_and_test(&sb->s_pending_errors))
wait_var_event(&sb->s_pending_errors,
refcount_read(&sb->s_pending_errors) < 1);
}
static inline void fserror_pending_dec(struct super_block *sb)
{
if (refcount_dec_and_test(&sb->s_pending_errors))
wake_up_var(&sb->s_pending_errors);
}
static inline void fserror_free_event(struct fserror_event *event)
{
fserror_pending_dec(event->sb);
mempool_free(event, &fserror_events_pool);
}
static void fserror_worker(struct work_struct *work)
{
struct fserror_event *event =
container_of(work, struct fserror_event, work);
struct super_block *sb = event->sb;
if (sb->s_flags & SB_ACTIVE) {
struct fs_error_report report = {
/* send positive error number to userspace */
.error = -event->error,
.inode = event->inode,
.sb = event->sb,
};
if (sb->s_op->report_error)
sb->s_op->report_error(event);
fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR, NULL, NULL,
NULL, 0);
}
iput(event->inode);
fserror_free_event(event);
}
static inline struct fserror_event *fserror_alloc_event(struct super_block *sb,
gfp_t gfp_flags)
{
struct fserror_event *event = NULL;
/*
* If pending_errors already reached zero or is no longer active,
* the superblock is being deactivated so there's no point in
* continuing.
*
* The order of the check of s_pending_errors and SB_ACTIVE are
* mandated by order of accesses in generic_shutdown_super and
* fserror_unmount. Barriers are implicitly provided by the refcount
* manipulations in this function and fserror_unmount.
*/
if (!refcount_inc_not_zero(&sb->s_pending_errors))
return NULL;
if (!(sb->s_flags & SB_ACTIVE))
goto out_pending;
event = mempool_alloc(&fserror_events_pool, gfp_flags);
if (!event)
goto out_pending;
/* mempool_alloc doesn't support GFP_ZERO */
memset(event, 0, sizeof(*event));
event->sb = sb;
INIT_WORK(&event->work, fserror_worker);
return event;
out_pending:
fserror_pending_dec(sb);
return NULL;
}
/**
* fserror_report - report a filesystem error of some kind
*
* @sb: superblock of the filesystem
* @inode: inode within that filesystem, if applicable
* @type: type of error encountered
* @pos: start of inode range affected, if applicable
* @len: length of inode range affected, if applicable
* @error: error number encountered, must be negative
* @gfp: memory allocation flags for conveying the event to a worker,
* since this function can be called from atomic contexts
*
* Report details of a filesystem error to the super_operations::report_error
* callback if present; and to fsnotify for distribution to userspace. @sb,
* @gfp, @type, and @error must all be specified. For file I/O errors, the
* @inode, @pos, and @len fields must also be specified. For file metadata
* errors, @inode must be specified. If @inode is not NULL, then @inode->i_sb
* must point to @sb.
*
* Reporting work is deferred to a workqueue to ensure that ->report_error is
* called from process context without any locks held. An active reference to
* the inode is maintained until event handling is complete, and unmount will
* wait for queued events to drain.
*/
void fserror_report(struct super_block *sb, struct inode *inode,
enum fserror_type type, loff_t pos, u64 len, int error,
gfp_t gfp)
{
struct fserror_event *event;
/* sb and inode must be from the same filesystem */
WARN_ON_ONCE(inode && inode->i_sb != sb);
/* error number must be negative */
WARN_ON_ONCE(error >= 0);
event = fserror_alloc_event(sb, gfp);
if (!event)
goto lost;
event->type = type;
event->pos = pos;
event->len = len;
event->error = error;
/*
* Can't iput from non-sleeping context, so grabbing another reference
* to the inode must be the last thing before submitting the event.
*/
if (inode) {
event->inode = igrab(inode);
if (!event->inode)
goto lost_event;
}
/*
* Use schedule_work here even if we're already in process context so
* that fsnotify and super_operations::report_error implementations are
* guaranteed to run in process context without any locks held. Since
* errors are supposed to be rare, the overhead shouldn't kill us any
* more than the failing device will.
*/
schedule_work(&event->work);
return;
lost_event:
fserror_free_event(event);
lost:
if (inode)
pr_err_ratelimited(
"%s: lost file I/O error report for ino %llu type %u pos 0x%llx len 0x%llx error %d",
sb->s_id, inode->i_ino, type, pos, len, error);
else
pr_err_ratelimited(
"%s: lost filesystem error report for type %u error %d",
sb->s_id, type, error);
}
EXPORT_SYMBOL_GPL(fserror_report);
static int __init fserror_init(void)
{
return mempool_init_kmalloc_pool(&fserror_events_pool,
FSERROR_DEFAULT_EVENT_POOL_SIZE,
sizeof(struct fserror_event));
}
fs_initcall(fserror_init);