mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
Merge tag 'ovl-fixes-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/overlayfs/vfs
Pull overlayfs fixes from Amir Goldstein: - Fix regression in 'xino' feature detection I clumsily introduced this regression myself when working on another subsystem (fsnotify). Both the regression and the fix have almost no visible impact on users except for some kmsg prints. - Fix to performance regression in v6.12. This regression was reported by Google COS developers. It is not uncommon these days for the year-old mature LTS to get adopted by distros and get exposed to many new workloads. We made a sub-smart move of making a behavior change in v6.12 which could impact performance, without making it opt-in. Fixing this mistake retroactively, to be picked by LTS. * tag 'ovl-fixes-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/overlayfs/vfs: ovl: make fsync after metadata copy-up opt-in mount option ovl: fix wrong detection of 32bit inode numbers
This commit is contained in:
@@ -783,6 +783,56 @@ controlled by the "uuid" mount option, which supports these values:
|
||||
mounted with "uuid=on".
|
||||
|
||||
|
||||
Durability and copy up
|
||||
----------------------
|
||||
|
||||
The fsync(2) system call ensures that the data and metadata of a file
|
||||
are safely written to the backing storage, which is expected to
|
||||
guarantee the existence of the information post system crash.
|
||||
|
||||
Without an fsync(2) call, there is no guarantee that the observed
|
||||
data after a system crash will be either the old or the new data, but
|
||||
in practice, the observed data after crash is often the old or new data
|
||||
or a mix of both.
|
||||
|
||||
When an overlayfs file is modified for the first time, copy up will
|
||||
create a copy of the lower file and its parent directories in the upper
|
||||
layer. Since the Linux filesystem API does not enforce any particular
|
||||
ordering on storing changes without explicit fsync(2) calls, in case
|
||||
of a system crash, the upper file could end up with no data at all
|
||||
(i.e. zeros), which would be an unusual outcome. To avoid this
|
||||
experience, overlayfs calls fsync(2) on the upper file before completing
|
||||
data copy up with rename(2) or link(2) to make the copy up "atomic".
|
||||
|
||||
By default, overlayfs does not explicitly call fsync(2) on copied up
|
||||
directories or on metadata-only copy up, so it provides no guarantee to
|
||||
persist the user's modification unless the user calls fsync(2).
|
||||
The fsync during copy up only guarantees that if a copy up is observed
|
||||
after a crash, the observed data is not zeroes or intermediate values
|
||||
from the copy up staging area.
|
||||
|
||||
On traditional local filesystems with a single journal (e.g. ext4, xfs),
|
||||
fsync on a file also persists the parent directory changes, because they
|
||||
are usually modified in the same transaction, so metadata durability during
|
||||
data copy up effectively comes for free. Overlayfs further limits risk by
|
||||
disallowing network filesystems as upper layer.
|
||||
|
||||
Overlayfs can be tuned to prefer performance or durability when storing
|
||||
to the underlying upper layer. This is controlled by the "fsync" mount
|
||||
option, which supports these values:
|
||||
|
||||
- "auto": (default)
|
||||
Call fsync(2) on upper file before completion of data copy up.
|
||||
No explicit fsync(2) on directory or metadata-only copy up.
|
||||
- "strict":
|
||||
Call fsync(2) on upper file and directories before completion of any
|
||||
copy up.
|
||||
- "volatile": [*]
|
||||
Prefer performance over durability (see `Volatile mount`_)
|
||||
|
||||
[*] The mount option "volatile" is an alias to "fsync=volatile".
|
||||
|
||||
|
||||
Volatile mount
|
||||
--------------
|
||||
|
||||
|
||||
@@ -1146,15 +1146,15 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
|
||||
return -EOVERFLOW;
|
||||
|
||||
/*
|
||||
* With metacopy disabled, we fsync after final metadata copyup, for
|
||||
* With "fsync=strict", we fsync after final metadata copyup, for
|
||||
* both regular files and directories to get atomic copyup semantics
|
||||
* on filesystems that do not use strict metadata ordering (e.g. ubifs).
|
||||
*
|
||||
* With metacopy enabled we want to avoid fsync on all meta copyup
|
||||
* By default, we want to avoid fsync on all meta copyup, because
|
||||
* that will hurt performance of workloads such as chown -R, so we
|
||||
* only fsync on data copyup as legacy behavior.
|
||||
*/
|
||||
ctx.metadata_fsync = !OVL_FS(dentry->d_sb)->config.metacopy &&
|
||||
ctx.metadata_fsync = ovl_should_sync_metadata(OVL_FS(dentry->d_sb)) &&
|
||||
(S_ISREG(ctx.stat.mode) || S_ISDIR(ctx.stat.mode));
|
||||
ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
|
||||
|
||||
|
||||
@@ -99,6 +99,12 @@ enum {
|
||||
OVL_VERITY_REQUIRE,
|
||||
};
|
||||
|
||||
enum {
|
||||
OVL_FSYNC_VOLATILE,
|
||||
OVL_FSYNC_AUTO,
|
||||
OVL_FSYNC_STRICT,
|
||||
};
|
||||
|
||||
/*
|
||||
* The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
|
||||
* where:
|
||||
@@ -656,6 +662,21 @@ static inline bool ovl_xino_warn(struct ovl_fs *ofs)
|
||||
return ofs->config.xino == OVL_XINO_ON;
|
||||
}
|
||||
|
||||
static inline bool ovl_should_sync(struct ovl_fs *ofs)
|
||||
{
|
||||
return ofs->config.fsync_mode != OVL_FSYNC_VOLATILE;
|
||||
}
|
||||
|
||||
static inline bool ovl_should_sync_metadata(struct ovl_fs *ofs)
|
||||
{
|
||||
return ofs->config.fsync_mode == OVL_FSYNC_STRICT;
|
||||
}
|
||||
|
||||
static inline bool ovl_is_volatile(struct ovl_config *config)
|
||||
{
|
||||
return config->fsync_mode == OVL_FSYNC_VOLATILE;
|
||||
}
|
||||
|
||||
/*
|
||||
* To avoid regressions in existing setups with overlay lower offline changes,
|
||||
* we allow lower changes only if none of the new features are used.
|
||||
|
||||
@@ -18,7 +18,7 @@ struct ovl_config {
|
||||
int xino;
|
||||
bool metacopy;
|
||||
bool userxattr;
|
||||
bool ovl_volatile;
|
||||
int fsync_mode;
|
||||
};
|
||||
|
||||
struct ovl_sb {
|
||||
@@ -120,11 +120,6 @@ static inline struct ovl_fs *OVL_FS(struct super_block *sb)
|
||||
return (struct ovl_fs *)sb->s_fs_info;
|
||||
}
|
||||
|
||||
static inline bool ovl_should_sync(struct ovl_fs *ofs)
|
||||
{
|
||||
return !ofs->config.ovl_volatile;
|
||||
}
|
||||
|
||||
static inline unsigned int ovl_numlower(struct ovl_entry *oe)
|
||||
{
|
||||
return oe ? oe->__numlower : 0;
|
||||
|
||||
@@ -58,6 +58,7 @@ enum ovl_opt {
|
||||
Opt_xino,
|
||||
Opt_metacopy,
|
||||
Opt_verity,
|
||||
Opt_fsync,
|
||||
Opt_volatile,
|
||||
Opt_override_creds,
|
||||
};
|
||||
@@ -140,6 +141,23 @@ static int ovl_verity_mode_def(void)
|
||||
return OVL_VERITY_OFF;
|
||||
}
|
||||
|
||||
static const struct constant_table ovl_parameter_fsync[] = {
|
||||
{ "volatile", OVL_FSYNC_VOLATILE },
|
||||
{ "auto", OVL_FSYNC_AUTO },
|
||||
{ "strict", OVL_FSYNC_STRICT },
|
||||
{}
|
||||
};
|
||||
|
||||
static const char *ovl_fsync_mode(struct ovl_config *config)
|
||||
{
|
||||
return ovl_parameter_fsync[config->fsync_mode].name;
|
||||
}
|
||||
|
||||
static int ovl_fsync_mode_def(void)
|
||||
{
|
||||
return OVL_FSYNC_AUTO;
|
||||
}
|
||||
|
||||
const struct fs_parameter_spec ovl_parameter_spec[] = {
|
||||
fsparam_string_empty("lowerdir", Opt_lowerdir),
|
||||
fsparam_file_or_string("lowerdir+", Opt_lowerdir_add),
|
||||
@@ -155,6 +173,7 @@ const struct fs_parameter_spec ovl_parameter_spec[] = {
|
||||
fsparam_enum("xino", Opt_xino, ovl_parameter_xino),
|
||||
fsparam_enum("metacopy", Opt_metacopy, ovl_parameter_bool),
|
||||
fsparam_enum("verity", Opt_verity, ovl_parameter_verity),
|
||||
fsparam_enum("fsync", Opt_fsync, ovl_parameter_fsync),
|
||||
fsparam_flag("volatile", Opt_volatile),
|
||||
fsparam_flag_no("override_creds", Opt_override_creds),
|
||||
{}
|
||||
@@ -665,8 +684,11 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
||||
case Opt_verity:
|
||||
config->verity_mode = result.uint_32;
|
||||
break;
|
||||
case Opt_fsync:
|
||||
config->fsync_mode = result.uint_32;
|
||||
break;
|
||||
case Opt_volatile:
|
||||
config->ovl_volatile = true;
|
||||
config->fsync_mode = OVL_FSYNC_VOLATILE;
|
||||
break;
|
||||
case Opt_userxattr:
|
||||
config->userxattr = true;
|
||||
@@ -800,6 +822,7 @@ int ovl_init_fs_context(struct fs_context *fc)
|
||||
ofs->config.nfs_export = ovl_nfs_export_def;
|
||||
ofs->config.xino = ovl_xino_def();
|
||||
ofs->config.metacopy = ovl_metacopy_def;
|
||||
ofs->config.fsync_mode = ovl_fsync_mode_def();
|
||||
|
||||
fc->s_fs_info = ofs;
|
||||
fc->fs_private = ctx;
|
||||
@@ -870,9 +893,9 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx,
|
||||
config->index = false;
|
||||
}
|
||||
|
||||
if (!config->upperdir && config->ovl_volatile) {
|
||||
if (!config->upperdir && ovl_is_volatile(config)) {
|
||||
pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");
|
||||
config->ovl_volatile = false;
|
||||
config->fsync_mode = ovl_fsync_mode_def();
|
||||
}
|
||||
|
||||
if (!config->upperdir && config->uuid == OVL_UUID_ON) {
|
||||
@@ -1070,8 +1093,8 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry)
|
||||
seq_printf(m, ",xino=%s", ovl_xino_mode(&ofs->config));
|
||||
if (ofs->config.metacopy != ovl_metacopy_def)
|
||||
seq_printf(m, ",metacopy=%s", str_on_off(ofs->config.metacopy));
|
||||
if (ofs->config.ovl_volatile)
|
||||
seq_puts(m, ",volatile");
|
||||
if (ofs->config.fsync_mode != ovl_fsync_mode_def())
|
||||
seq_printf(m, ",fsync=%s", ovl_fsync_mode(&ofs->config));
|
||||
if (ofs->config.userxattr)
|
||||
seq_puts(m, ",userxattr");
|
||||
if (ofs->config.verity_mode != ovl_verity_mode_def())
|
||||
|
||||
@@ -776,7 +776,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
|
||||
* For volatile mount, create a incompat/volatile/dirty file to keep
|
||||
* track of it.
|
||||
*/
|
||||
if (ofs->config.ovl_volatile) {
|
||||
if (ovl_is_volatile(&ofs->config)) {
|
||||
err = ovl_create_volatile_dirty(ofs);
|
||||
if (err < 0) {
|
||||
pr_err("Failed to create volatile/dirty file.\n");
|
||||
|
||||
@@ -85,7 +85,10 @@ int ovl_can_decode_fh(struct super_block *sb)
|
||||
if (!exportfs_can_decode_fh(sb->s_export_op))
|
||||
return 0;
|
||||
|
||||
return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN;
|
||||
if (sb->s_export_op->encode_fh == generic_encode_ino32_fh)
|
||||
return FILEID_INO32_GEN;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct dentry *ovl_indexdir(struct super_block *sb)
|
||||
|
||||
Reference in New Issue
Block a user