Files
linux/kernel/time/namespace.c
Thomas Weißschuh 7138a8698a timens: Use task_lock guard in timens_get*()
Simplify the logic in timens_get*() by converting the task_lock
usage to a guard().

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260330-timens-cleanup-v1-4-936e91c9dd30@linutronix.de
2026-04-01 17:13:36 +02:00

359 lines
7.7 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Author: Andrei Vagin <avagin@openvz.org>
* Author: Dmitry Safonov <dima@arista.com>
*/
#include <linux/time_namespace.h>
#include <linux/user_namespace.h>
#include <linux/sched/signal.h>
#include <linux/sched/task.h>
#include <linux/clocksource.h>
#include <linux/seq_file.h>
#include <linux/proc_ns.h>
#include <linux/export.h>
#include <linux/nstree.h>
#include <linux/time.h>
#include <linux/slab.h>
#include <linux/cred.h>
#include <linux/err.h>
#include <linux/mm.h>
#include <linux/cleanup.h>
#include "namespace_internal.h"
ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim,
struct timens_offsets *ns_offsets)
{
ktime_t offset;
switch (clockid) {
case CLOCK_MONOTONIC:
offset = timespec64_to_ktime(ns_offsets->monotonic);
break;
case CLOCK_BOOTTIME:
case CLOCK_BOOTTIME_ALARM:
offset = timespec64_to_ktime(ns_offsets->boottime);
break;
default:
return tim;
}
/*
* Check that @tim value is in [offset, KTIME_MAX + offset]
* and subtract offset.
*/
if (tim < offset) {
/*
* User can specify @tim *absolute* value - if it's lesser than
* the time namespace's offset - it's already expired.
*/
tim = 0;
} else {
tim = ktime_sub(tim, offset);
if (unlikely(tim > KTIME_MAX))
tim = KTIME_MAX;
}
return tim;
}
static struct ucounts *inc_time_namespaces(struct user_namespace *ns)
{
return inc_ucount(ns, current_euid(), UCOUNT_TIME_NAMESPACES);
}
static void dec_time_namespaces(struct ucounts *ucounts)
{
dec_ucount(ucounts, UCOUNT_TIME_NAMESPACES);
}
/**
* clone_time_ns - Clone a time namespace
* @user_ns: User namespace which owns a new namespace.
* @old_ns: Namespace to clone
*
* Clone @old_ns and set the clone refcount to 1
*
* Return: The new namespace or ERR_PTR.
*/
static struct time_namespace *clone_time_ns(struct user_namespace *user_ns,
struct time_namespace *old_ns)
{
struct time_namespace *ns;
struct ucounts *ucounts;
int err;
err = -ENOSPC;
ucounts = inc_time_namespaces(user_ns);
if (!ucounts)
goto fail;
err = -ENOMEM;
ns = kzalloc_obj(*ns, GFP_KERNEL_ACCOUNT);
if (!ns)
goto fail_dec;
err = timens_vdso_alloc_vvar_page(ns);
if (err)
goto fail_free;
err = ns_common_init(ns);
if (err)
goto fail_free_page;
ns->ucounts = ucounts;
ns->user_ns = get_user_ns(user_ns);
ns->offsets = old_ns->offsets;
ns->frozen_offsets = false;
ns_tree_add(ns);
return ns;
fail_free_page:
timens_vdso_free_vvar_page(ns);
fail_free:
kfree(ns);
fail_dec:
dec_time_namespaces(ucounts);
fail:
return ERR_PTR(err);
}
/**
* copy_time_ns - Create timens_for_children from @old_ns
* @flags: Cloning flags
* @user_ns: User namespace which owns a new namespace.
* @old_ns: Namespace to clone
*
* If CLONE_NEWTIME specified in @flags, creates a new timens_for_children;
* adds a refcounter to @old_ns otherwise.
*
* Return: timens_for_children namespace or ERR_PTR.
*/
struct time_namespace *copy_time_ns(u64 flags,
struct user_namespace *user_ns, struct time_namespace *old_ns)
{
if (!(flags & CLONE_NEWTIME))
return get_time_ns(old_ns);
return clone_time_ns(user_ns, old_ns);
}
DEFINE_MUTEX(timens_offset_lock);
void free_time_ns(struct time_namespace *ns)
{
ns_tree_remove(ns);
dec_time_namespaces(ns->ucounts);
put_user_ns(ns->user_ns);
ns_common_free(ns);
timens_vdso_free_vvar_page(ns);
/* Concurrent nstree traversal depends on a grace period. */
kfree_rcu(ns, ns.ns_rcu);
}
static struct ns_common *timens_get(struct task_struct *task)
{
struct time_namespace *ns;
struct nsproxy *nsproxy;
guard(task_lock)(task);
nsproxy = task->nsproxy;
if (!nsproxy)
return NULL;
ns = nsproxy->time_ns;
get_time_ns(ns);
return &ns->ns;
}
static struct ns_common *timens_for_children_get(struct task_struct *task)
{
struct time_namespace *ns;
struct nsproxy *nsproxy;
guard(task_lock)(task);
nsproxy = task->nsproxy;
if (!nsproxy)
return NULL;
ns = nsproxy->time_ns_for_children;
get_time_ns(ns);
return &ns->ns;
}
static void timens_put(struct ns_common *ns)
{
put_time_ns(to_time_ns(ns));
}
static int timens_install(struct nsset *nsset, struct ns_common *new)
{
struct nsproxy *nsproxy = nsset->nsproxy;
struct time_namespace *ns = to_time_ns(new);
if (!current_is_single_threaded())
return -EUSERS;
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
!ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
return -EPERM;
get_time_ns(ns);
put_time_ns(nsproxy->time_ns);
nsproxy->time_ns = ns;
get_time_ns(ns);
put_time_ns(nsproxy->time_ns_for_children);
nsproxy->time_ns_for_children = ns;
return 0;
}
void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk)
{
struct ns_common *nsc = &nsproxy->time_ns_for_children->ns;
struct time_namespace *ns = to_time_ns(nsc);
/* create_new_namespaces() already incremented the ref counter */
if (nsproxy->time_ns == nsproxy->time_ns_for_children)
return;
get_time_ns(ns);
put_time_ns(nsproxy->time_ns);
nsproxy->time_ns = ns;
timens_commit(tsk, ns);
}
static struct user_namespace *timens_owner(struct ns_common *ns)
{
return to_time_ns(ns)->user_ns;
}
static void show_offset(struct seq_file *m, int clockid, struct timespec64 *ts)
{
char *clock;
switch (clockid) {
case CLOCK_BOOTTIME:
clock = "boottime";
break;
case CLOCK_MONOTONIC:
clock = "monotonic";
break;
default:
clock = "unknown";
break;
}
seq_printf(m, "%-10s %10lld %9ld\n", clock, ts->tv_sec, ts->tv_nsec);
}
void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m)
{
struct time_namespace *time_ns __free(time_ns) = NULL;
struct ns_common *ns = timens_for_children_get(p);
if (!ns)
return;
time_ns = to_time_ns(ns);
show_offset(m, CLOCK_MONOTONIC, &time_ns->offsets.monotonic);
show_offset(m, CLOCK_BOOTTIME, &time_ns->offsets.boottime);
}
int proc_timens_set_offset(struct file *file, struct task_struct *p,
struct proc_timens_offset *offsets, int noffsets)
{
struct time_namespace *time_ns __free(time_ns) = NULL;
struct ns_common *ns = timens_for_children_get(p);
struct timespec64 tp;
int i;
if (!ns)
return -ESRCH;
time_ns = to_time_ns(ns);
if (!file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME))
return -EPERM;
for (i = 0; i < noffsets; i++) {
struct proc_timens_offset *off = &offsets[i];
switch (off->clockid) {
case CLOCK_MONOTONIC:
ktime_get_ts64(&tp);
break;
case CLOCK_BOOTTIME:
ktime_get_boottime_ts64(&tp);
break;
default:
return -EINVAL;
}
if (off->val.tv_sec > KTIME_SEC_MAX ||
off->val.tv_sec < -KTIME_SEC_MAX)
return -ERANGE;
tp = timespec64_add(tp, off->val);
/*
* KTIME_SEC_MAX is divided by 2 to be sure that KTIME_MAX is
* still unreachable.
*/
if (tp.tv_sec < 0 || tp.tv_sec > KTIME_SEC_MAX / 2)
return -ERANGE;
}
guard(mutex)(&timens_offset_lock);
if (time_ns->frozen_offsets)
return -EACCES;
/* Don't report errors after this line */
for (i = 0; i < noffsets; i++) {
struct proc_timens_offset *off = &offsets[i];
struct timespec64 *offset = NULL;
switch (off->clockid) {
case CLOCK_MONOTONIC:
offset = &time_ns->offsets.monotonic;
break;
case CLOCK_BOOTTIME:
offset = &time_ns->offsets.boottime;
break;
}
*offset = off->val;
}
return 0;
}
const struct proc_ns_operations timens_operations = {
.name = "time",
.get = timens_get,
.put = timens_put,
.install = timens_install,
.owner = timens_owner,
};
const struct proc_ns_operations timens_for_children_operations = {
.name = "time_for_children",
.real_ns_name = "time",
.get = timens_for_children_get,
.put = timens_put,
.install = timens_install,
.owner = timens_owner,
};
struct time_namespace init_time_ns = {
.ns = NS_COMMON_INIT(init_time_ns),
.user_ns = &init_user_ns,
.frozen_offsets = true,
};
void __init time_ns_init(void)
{
ns_tree_add(&init_time_ns);
}