Files
linux/kernel/time/namespace_vdso.c
Thomas Weißschuh 1b6c89285d timens: Remove dependency on the vDSO
Previously, missing time namespace support in the vDSO meant that time
namespaces needed to be disabled globally. This was expressed in a hard
dependency on the generic vDSO library. This also meant that architectures
without any vDSO or only a stub vDSO could not enable time namespaces.
Now that all architectures using a real vDSO are using the generic library,
that dependency is not necessary anymore.

Remove the dependency and let all architectures enable time namespaces.

Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260326-vdso-timens-decoupling-v2-2-c82693a7775f@linutronix.de
2026-03-26 15:44:23 +01:00

161 lines
4.2 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Author: Andrei Vagin <avagin@openvz.org>
* Author: Dmitry Safonov <dima@arista.com>
*/
#include <linux/cleanup.h>
#include <linux/mm.h>
#include <linux/time_namespace.h>
#include <linux/time.h>
#include <linux/vdso_datastore.h>
#include <vdso/clocksource.h>
#include <vdso/datapage.h>
#include "namespace_internal.h"
static struct timens_offset offset_from_ts(struct timespec64 off)
{
struct timens_offset ret;
ret.sec = off.tv_sec;
ret.nsec = off.tv_nsec;
return ret;
}
/*
* A time namespace VVAR page has the same layout as the VVAR page which
* contains the system wide VDSO data.
*
* For a normal task the VVAR pages are installed in the normal ordering:
* VVAR
* PVCLOCK
* HVCLOCK
* TIMENS <- Not really required
*
* Now for a timens task the pages are installed in the following order:
* TIMENS
* PVCLOCK
* HVCLOCK
* VVAR
*
* The check for vdso_clock->clock_mode is in the unlikely path of
* the seq begin magic. So for the non-timens case most of the time
* 'seq' is even, so the branch is not taken.
*
* If 'seq' is odd, i.e. a concurrent update is in progress, the extra check
* for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the
* update to finish and for 'seq' to become even anyway.
*
* Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which
* enforces the time namespace handling path.
*/
static void timens_setup_vdso_clock_data(struct vdso_clock *vc,
struct time_namespace *ns)
{
struct timens_offset *offset = vc->offset;
struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic);
struct timens_offset boottime = offset_from_ts(ns->offsets.boottime);
vc->seq = 1;
vc->clock_mode = VDSO_CLOCKMODE_TIMENS;
offset[CLOCK_MONOTONIC] = monotonic;
offset[CLOCK_MONOTONIC_RAW] = monotonic;
offset[CLOCK_MONOTONIC_COARSE] = monotonic;
offset[CLOCK_BOOTTIME] = boottime;
offset[CLOCK_BOOTTIME_ALARM] = boottime;
}
struct page *find_timens_vvar_page(struct vm_area_struct *vma)
{
if (likely(vma->vm_mm == current->mm))
return current->nsproxy->time_ns->vvar_page;
/*
* VM_PFNMAP | VM_IO protect .fault() handler from being called
* through interfaces like /proc/$pid/mem or
* process_vm_{readv,writev}() as long as there's no .access()
* in special_mapping_vmops().
* For more details check_vma_flags() and __access_remote_vm()
*/
WARN(1, "vvar_page accessed remotely");
return NULL;
}
static void timens_set_vvar_page(struct task_struct *task,
struct time_namespace *ns)
{
struct vdso_time_data *vdata;
struct vdso_clock *vc;
unsigned int i;
if (ns == &init_time_ns)
return;
/* Fast-path, taken by every task in namespace except the first. */
if (likely(ns->frozen_offsets))
return;
guard(mutex)(&timens_offset_lock);
/* Nothing to-do: vvar_page has been already initialized. */
if (ns->frozen_offsets)
return;
ns->frozen_offsets = true;
vdata = page_address(ns->vvar_page);
vc = vdata->clock_data;
for (i = 0; i < CS_BASES; i++)
timens_setup_vdso_clock_data(&vc[i], ns);
if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) {
for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++)
timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns);
}
}
/*
* The vvar page layout depends on whether a task belongs to the root or
* non-root time namespace. Whenever a task changes its namespace, the VVAR
* page tables are cleared and then they will be re-faulted with a
* corresponding layout.
* See also the comment near timens_setup_vdso_clock_data() for details.
*/
static int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
{
struct mm_struct *mm = task->mm;
struct vm_area_struct *vma;
VMA_ITERATOR(vmi, mm, 0);
guard(mmap_read_lock)(mm);
for_each_vma(vmi, vma) {
if (vma_is_special_mapping(vma, &vdso_vvar_mapping))
zap_vma_pages(vma);
}
return 0;
}
void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
{
timens_set_vvar_page(tsk, ns);
vdso_join_timens(tsk, ns);
}
int timens_vdso_alloc_vvar_page(struct time_namespace *ns)
{
ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!ns->vvar_page)
return -ENOMEM;
return 0;
}
void timens_vdso_free_vvar_page(struct time_namespace *ns)
{
__free_page(ns->vvar_page);
}