Merge tag 'timers-vdso-2026-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull vdso updates from Thomas Gleixner:

 - Make the handling of compat functions consistent and more robust

 - Rework the underlying data store so that it is dynamically allocated,
   which allows the conversion of the last holdout SPARC64 to the
   generic VDSO implementation

 - Rework the SPARC64 VDSO to utilize the generic implementation

 - Mop up the left overs of the non-generic VDSO support in the core
   code

 - Expand the VDSO selftest and make them more robust

 - Allow time namespaces to be enabled independently of the generic VDSO
   support, which was not possible before due to SPARC64 not using it

 - Various cleanups and improvements in the related code

* tag 'timers-vdso-2026-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (51 commits)
  timens: Use task_lock guard in timens_get*()
  timens: Use mutex guard in proc_timens_set_offset()
  timens: Simplify some calls to put_time_ns()
  timens: Add a __free() wrapper for put_time_ns()
  timens: Remove dependency on the vDSO
  vdso/timens: Move functions to new file
  selftests: vDSO: vdso_test_correctness: Add a test for time()
  selftests: vDSO: vdso_test_correctness: Use facilities from parse_vdso.c
  selftests: vDSO: vdso_test_correctness: Handle different tv_usec types
  selftests: vDSO: vdso_test_correctness: Drop SYS_getcpu fallbacks
  selftests: vDSO: vdso_test_gettimeofday: Remove nolibc checks
  Revert "selftests: vDSO: parse_vdso: Use UAPI headers instead of libc headers"
  random: vDSO: Remove ifdeffery
  random: vDSO: Trim vDSO includes
  vdso/datapage: Trim down unnecessary includes
  vdso/datapage: Remove inclusion of gettimeofday.h
  vdso/helpers: Explicitly include vdso/processor.h
  vdso/gettimeofday: Add explicit includes
  random: vDSO: Add explicit includes
  MIPS: vdso: Explicitly include asm/vdso/vdso.h
  ...
This commit is contained in:
Linus Torvalds
2026-04-14 10:53:44 -07:00
56 changed files with 848 additions and 1310 deletions

View File

@@ -10796,6 +10796,7 @@ S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/vdso
F: include/asm-generic/vdso/vsyscall.h
F: include/vdso/
F: kernel/time/namespace_vdso.c
F: kernel/time/vsyscall.c
F: lib/vdso/
F: tools/testing/selftests/vDSO/
@@ -21042,6 +21043,7 @@ F: include/trace/events/timer*
F: kernel/time/itimer.c
F: kernel/time/posix-*
F: kernel/time/namespace.c
F: kernel/time/namespace_vdso.c
POWER MANAGEMENT CORE
M: "Rafael J. Wysocki" <rafael@kernel.org>

View File

@@ -11,6 +11,8 @@
#include <asm/errno.h>
#include <asm/unistd.h>
#include <asm/vdso/cp15.h>
#include <vdso/clocksource.h>
#include <vdso/time32.h>
#include <uapi/linux/time.h>
#define VDSO_HAS_CLOCK_GETRES 1

View File

@@ -7,6 +7,9 @@
#ifndef __ASSEMBLER__
#include <vdso/clocksource.h>
#include <vdso/time32.h>
#include <asm/barrier.h>
#include <asm/unistd_compat_32.h>
#include <asm/errno.h>

View File

@@ -9,6 +9,8 @@
#ifndef __ASSEMBLER__
#include <vdso/clocksource.h>
#include <asm/alternative.h>
#include <asm/arch_timer.h>
#include <asm/barrier.h>

View File

@@ -52,6 +52,7 @@
#include <asm/switch_to.h>
#include <asm/unwind.h>
#include <asm/vdso.h>
#include <asm/vdso/vdso.h>
#ifdef CONFIG_STACKPROTECTOR
#include <linux/stackprotector.h>

View File

@@ -18,6 +18,7 @@
#include <asm/page.h>
#include <asm/vdso.h>
#include <asm/vdso/vdso.h>
#include <vdso/helpers.h>
#include <vdso/vsyscall.h>
#include <vdso/datapage.h>

View File

@@ -4,6 +4,9 @@
* Author: Alex Smith <alex.smith@imgtec.com>
*/
#ifndef __ASM_VDSO_VDSO_H
#define __ASM_VDSO_VDSO_H
#include <asm/sgidefs.h>
#include <vdso/page.h>
@@ -70,3 +73,5 @@ static inline void __iomem *get_gic(const struct vdso_time_data *data)
#endif /* CONFIG_CLKSRC_MIPS_GIC */
#endif /* __ASSEMBLER__ */
#endif /* __ASM_VDSO_VDSO_H */

View File

@@ -21,6 +21,7 @@
#include <asm/mips-cps.h>
#include <asm/page.h>
#include <asm/vdso.h>
#include <asm/vdso/vdso.h>
#include <vdso/helpers.h>
#include <vdso/vsyscall.h>

View File

@@ -8,6 +8,7 @@
#include <asm/barrier.h>
#include <asm/unistd.h>
#include <uapi/linux/time.h>
#include <vdso/time32.h>
#define VDSO_HAS_CLOCK_GETRES 1

View File

@@ -4,6 +4,9 @@
#ifndef __ASSEMBLER__
#include <asm/cputable.h>
#include <asm/feature-fixups.h>
/* Macros for adjusting thread priority (hardware multi-threading) */
#ifdef CONFIG_PPC64
#define HMT_very_low() asm volatile("or 31, 31, 31 # very low priority")

View File

@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#undef __powerpc64__
#include <linux/audit_arch.h>
#include <asm/unistd.h>
#include <asm/unistd_32.h>
#include "audit_32.h"

View File

@@ -12,8 +12,7 @@ LD_BFD := elf64-s390
KBUILD_LDFLAGS := -m elf64_s390
KBUILD_AFLAGS_MODULE += -fPIC
KBUILD_CFLAGS_MODULE += -fPIC
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
KBUILD_CPPFLAGS += -m64
KBUILD_CFLAGS += -fPIC
LDFLAGS_vmlinux := $(call ld-option,-no-pie)
extra_tools := relocs

View File

@@ -104,7 +104,6 @@ config SPARC64
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select GENERIC_TIME_VSYSCALL
select ARCH_CLOCKSOURCE_DATA
select ARCH_HAS_PTE_SPECIAL
select PCI_DOMAINS if PCI
select ARCH_HAS_GIGANTIC_PAGE
@@ -115,6 +114,8 @@ config SPARC64
select ARCH_SUPPORTS_SCHED_SMT if SMP
select ARCH_SUPPORTS_SCHED_MC if SMP
select ARCH_HAS_LAZY_MMU_MODE
select HAVE_GENERIC_VDSO
select GENERIC_GETTIMEOFDAY
config ARCH_PROC_KCORE_TEXT
def_bool y

View File

@@ -5,13 +5,4 @@
#ifndef _ASM_SPARC_CLOCKSOURCE_H
#define _ASM_SPARC_CLOCKSOURCE_H
/* VDSO clocksources */
#define VCLOCK_NONE 0 /* Nothing userspace can do. */
#define VCLOCK_TICK 1 /* Use %tick. */
#define VCLOCK_STICK 2 /* Use %stick. */
struct arch_clocksource_data {
int vclock_mode;
};
#endif /* _ASM_SPARC_CLOCKSOURCE_H */

View File

@@ -1,6 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef ___ASM_SPARC_PROCESSOR_H
#define ___ASM_SPARC_PROCESSOR_H
#include <asm/vdso/processor.h>
#if defined(__sparc__) && defined(__arch64__)
#include <asm/processor_64.h>
#else

View File

@@ -91,8 +91,6 @@ unsigned long __get_wchan(struct task_struct *);
extern struct task_struct *last_task_used_math;
int do_mathemu(struct pt_regs *regs, struct task_struct *fpt);
#define cpu_relax() barrier()
extern void (*sparc_idle)(void);
#endif

View File

@@ -182,31 +182,6 @@ unsigned long __get_wchan(struct task_struct *task);
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->tpc)
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->u_regs[UREG_FP])
/* Please see the commentary in asm/backoff.h for a description of
* what these instructions are doing and how they have been chosen.
* To make a long story short, we are trying to yield the current cpu
* strand during busy loops.
*/
#ifdef BUILD_VDSO
#define cpu_relax() asm volatile("\n99:\n\t" \
"rd %%ccr, %%g0\n\t" \
"rd %%ccr, %%g0\n\t" \
"rd %%ccr, %%g0\n\t" \
::: "memory")
#else /* ! BUILD_VDSO */
#define cpu_relax() asm volatile("\n99:\n\t" \
"rd %%ccr, %%g0\n\t" \
"rd %%ccr, %%g0\n\t" \
"rd %%ccr, %%g0\n\t" \
".section .pause_3insn_patch,\"ax\"\n\t"\
".word 99b\n\t" \
"wr %%g0, 128, %%asr27\n\t" \
"nop\n\t" \
"nop\n\t" \
".previous" \
::: "memory")
#endif
/* Prefetch support. This is tuned for UltraSPARC-III and later.
* UltraSPARC-I will treat these as nops, and UltraSPARC-II has
* a shallower prefetch queue than later chips.

View File

@@ -8,8 +8,6 @@
struct vdso_image {
void *data;
unsigned long size; /* Always a multiple of PAGE_SIZE */
long sym_vvar_start; /* Negative offset to the vvar area */
};
#ifdef CONFIG_SPARC64

View File

@@ -0,0 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_VDSO_CLOCKSOURCE_H
#define __ASM_VDSO_CLOCKSOURCE_H
/* VDSO clocksources */
#define VDSO_ARCH_CLOCKMODES \
VDSO_CLOCKMODE_TICK, \
VDSO_CLOCKMODE_STICK
#endif /* __ASM_VDSO_CLOCKSOURCE_H */

View File

@@ -0,0 +1,184 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright 2006 Andi Kleen, SUSE Labs.
*/
#ifndef _ASM_SPARC_VDSO_GETTIMEOFDAY_H
#define _ASM_SPARC_VDSO_GETTIMEOFDAY_H
#include <uapi/linux/time.h>
#include <uapi/linux/unistd.h>
#include <vdso/align.h>
#include <vdso/clocksource.h>
#include <vdso/datapage.h>
#include <vdso/page.h>
#include <linux/types.h>
#ifdef CONFIG_SPARC64
static __always_inline u64 vread_tick(void)
{
u64 ret;
__asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
return ret;
}
static __always_inline u64 vread_tick_stick(void)
{
u64 ret;
__asm__ __volatile__("rd %%asr24, %0" : "=r" (ret));
return ret;
}
#else
static __always_inline u64 vdso_shift_ns(u64 val, u32 amt)
{
u64 ret;
__asm__ __volatile__("sllx %H1, 32, %%g1\n\t"
"srl %L1, 0, %L1\n\t"
"or %%g1, %L1, %%g1\n\t"
"srlx %%g1, %2, %L0\n\t"
"srlx %L0, 32, %H0"
: "=r" (ret)
: "r" (val), "r" (amt)
: "g1");
return ret;
}
#define vdso_shift_ns vdso_shift_ns
static __always_inline u64 vread_tick(void)
{
register unsigned long long ret asm("o4");
__asm__ __volatile__("rd %%tick, %L0\n\t"
"srlx %L0, 32, %H0"
: "=r" (ret));
return ret;
}
static __always_inline u64 vread_tick_stick(void)
{
register unsigned long long ret asm("o4");
__asm__ __volatile__("rd %%asr24, %L0\n\t"
"srlx %L0, 32, %H0"
: "=r" (ret));
return ret;
}
#endif
static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_time_data *vd)
{
if (likely(clock_mode == VDSO_CLOCKMODE_STICK))
return vread_tick_stick();
else
return vread_tick();
}
#ifdef CONFIG_SPARC64
#define SYSCALL_STRING \
"ta 0x6d;" \
"bcs,a 1f;" \
" sub %%g0, %%o0, %%o0;" \
"1:"
#else
#define SYSCALL_STRING \
"ta 0x10;" \
"bcs,a 1f;" \
" sub %%g0, %%o0, %%o0;" \
"1:"
#endif
#define SYSCALL_CLOBBERS \
"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \
"f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \
"f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \
"f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \
"f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46", \
"f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62", \
"cc", "memory"
#ifdef CONFIG_SPARC64
static __always_inline
long clock_gettime_fallback(clockid_t clock, struct __kernel_timespec *ts)
{
register long num __asm__("g1") = __NR_clock_gettime;
register long o0 __asm__("o0") = clock;
register long o1 __asm__("o1") = (long) ts;
__asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num),
"0" (o0), "r" (o1) : SYSCALL_CLOBBERS);
return o0;
}
#else /* !CONFIG_SPARC64 */
static __always_inline
long clock_gettime_fallback(clockid_t clock, struct __kernel_timespec *ts)
{
register long num __asm__("g1") = __NR_clock_gettime64;
register long o0 __asm__("o0") = clock;
register long o1 __asm__("o1") = (long) ts;
__asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num),
"0" (o0), "r" (o1) : SYSCALL_CLOBBERS);
return o0;
}
static __always_inline
long clock_gettime32_fallback(clockid_t clock, struct old_timespec32 *ts)
{
register long num __asm__("g1") = __NR_clock_gettime;
register long o0 __asm__("o0") = clock;
register long o1 __asm__("o1") = (long) ts;
__asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num),
"0" (o0), "r" (o1) : SYSCALL_CLOBBERS);
return o0;
}
#endif /* CONFIG_SPARC64 */
static __always_inline
long gettimeofday_fallback(struct __kernel_old_timeval *tv, struct timezone *tz)
{
register long num __asm__("g1") = __NR_gettimeofday;
register long o0 __asm__("o0") = (long) tv;
register long o1 __asm__("o1") = (long) tz;
__asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num),
"0" (o0), "r" (o1) : SYSCALL_CLOBBERS);
return o0;
}
static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void)
{
unsigned long ret;
/*
* SPARC does not support native PC-relative code relocations.
* Calculate the address manually, works for 32 and 64 bit code.
*/
__asm__ __volatile__(
"1:\n"
"call 3f\n" // Jump over the embedded data and set up %o7
"nop\n" // Delay slot
"2:\n"
".word vdso_u_time_data - .\n" // Embedded offset to external symbol
"3:\n"
"add %%o7, 2b - 1b, %%o7\n" // Point %o7 to the embedded offset
"ldsw [%%o7], %0\n" // Load the offset
"add %0, %%o7, %0\n" // Calculate the absolute address
: "=r" (ret)
:
: "o7");
return (const struct vdso_time_data *)ret;
}
#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data
#endif /* _ASM_SPARC_VDSO_GETTIMEOFDAY_H */

View File

@@ -0,0 +1,41 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_SPARC_VDSO_PROCESSOR_H
#define _ASM_SPARC_VDSO_PROCESSOR_H
#include <linux/compiler.h>
#if defined(__arch64__)
/* Please see the commentary in asm/backoff.h for a description of
* what these instructions are doing and how they have been chosen.
* To make a long story short, we are trying to yield the current cpu
* strand during busy loops.
*/
#ifdef BUILD_VDSO
#define cpu_relax() asm volatile("\n99:\n\t" \
"rd %%ccr, %%g0\n\t" \
"rd %%ccr, %%g0\n\t" \
"rd %%ccr, %%g0\n\t" \
::: "memory")
#else /* ! BUILD_VDSO */
#define cpu_relax() asm volatile("\n99:\n\t" \
"rd %%ccr, %%g0\n\t" \
"rd %%ccr, %%g0\n\t" \
"rd %%ccr, %%g0\n\t" \
".section .pause_3insn_patch,\"ax\"\n\t"\
".word 99b\n\t" \
"wr %%g0, 128, %%asr27\n\t" \
"nop\n\t" \
"nop\n\t" \
".previous" \
::: "memory")
#endif /* BUILD_VDSO */
#else /* ! __arch64__ */
#define cpu_relax() barrier()
#endif /* __arch64__ */
#endif /* _ASM_SPARC_VDSO_PROCESSOR_H */

View File

@@ -0,0 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_SPARC_VDSO_VSYSCALL_H
#define _ASM_SPARC_VDSO_VSYSCALL_H
#define __VDSO_PAGES 4
#include <asm-generic/vdso/vsyscall.h>
#endif /* _ASM_SPARC_VDSO_VSYSCALL_H */

View File

@@ -1,75 +0,0 @@
/*
* Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _ASM_SPARC_VVAR_DATA_H
#define _ASM_SPARC_VVAR_DATA_H
#include <asm/clocksource.h>
#include <asm/processor.h>
#include <asm/barrier.h>
#include <linux/time.h>
#include <linux/types.h>
struct vvar_data {
unsigned int seq;
int vclock_mode;
struct { /* extract of a clocksource struct */
u64 cycle_last;
u64 mask;
int mult;
int shift;
} clock;
/* open coded 'struct timespec' */
u64 wall_time_sec;
u64 wall_time_snsec;
u64 monotonic_time_snsec;
u64 monotonic_time_sec;
u64 monotonic_time_coarse_sec;
u64 monotonic_time_coarse_nsec;
u64 wall_time_coarse_sec;
u64 wall_time_coarse_nsec;
int tz_minuteswest;
int tz_dsttime;
};
extern struct vvar_data *vvar_data;
extern int vdso_fix_stick;
static inline unsigned int vvar_read_begin(const struct vvar_data *s)
{
unsigned int ret;
repeat:
ret = READ_ONCE(s->seq);
if (unlikely(ret & 1)) {
cpu_relax();
goto repeat;
}
smp_rmb(); /* Finish all reads before we return seq */
return ret;
}
static inline int vvar_read_retry(const struct vvar_data *s,
unsigned int start)
{
smp_rmb(); /* Finish all reads before checking the value of seq */
return unlikely(s->seq != start);
}
static inline void vvar_write_begin(struct vvar_data *s)
{
++s->seq;
smp_wmb(); /* Makes sure that increment of seq is reflected */
}
static inline void vvar_write_end(struct vvar_data *s)
{
smp_wmb(); /* Makes the value of seq current before we increment */
++s->seq;
}
#endif /* _ASM_SPARC_VVAR_DATA_H */

View File

@@ -41,7 +41,6 @@ obj-$(CONFIG_SPARC32) += systbls_32.o
obj-y += time_$(BITS).o
obj-$(CONFIG_SPARC32) += windows.o
obj-y += cpu.o
obj-$(CONFIG_SPARC64) += vdso.o
obj-$(CONFIG_SPARC32) += devices.o
obj-y += ptrace_$(BITS).o
obj-y += unaligned_$(BITS).o

View File

@@ -838,14 +838,14 @@ void __init time_init_early(void)
if (tlb_type == spitfire) {
if (is_hummingbird()) {
init_tick_ops(&hbtick_operations);
clocksource_tick.archdata.vclock_mode = VCLOCK_NONE;
clocksource_tick.vdso_clock_mode = VDSO_CLOCKMODE_NONE;
} else {
init_tick_ops(&tick_operations);
clocksource_tick.archdata.vclock_mode = VCLOCK_TICK;
clocksource_tick.vdso_clock_mode = VDSO_CLOCKMODE_TICK;
}
} else {
init_tick_ops(&stick_operations);
clocksource_tick.archdata.vclock_mode = VCLOCK_STICK;
clocksource_tick.vdso_clock_mode = VDSO_CLOCKMODE_STICK;
}
}

View File

@@ -1,69 +0,0 @@
/*
* Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
* Copyright 2003 Andi Kleen, SuSE Labs.
*
* Thanks to hpa@transmeta.com for some useful hint.
* Special thanks to Ingo Molnar for his early experience with
* a different vsyscall implementation for Linux/IA32 and for the name.
*/
#include <linux/time.h>
#include <linux/timekeeper_internal.h>
#include <asm/vvar.h>
void update_vsyscall_tz(void)
{
if (unlikely(vvar_data == NULL))
return;
vvar_data->tz_minuteswest = sys_tz.tz_minuteswest;
vvar_data->tz_dsttime = sys_tz.tz_dsttime;
}
void update_vsyscall(struct timekeeper *tk)
{
struct vvar_data *vdata = vvar_data;
if (unlikely(vdata == NULL))
return;
vvar_write_begin(vdata);
vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
vdata->clock.mask = tk->tkr_mono.mask;
vdata->clock.mult = tk->tkr_mono.mult;
vdata->clock.shift = tk->tkr_mono.shift;
vdata->wall_time_sec = tk->xtime_sec;
vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec;
vdata->monotonic_time_sec = tk->xtime_sec +
tk->wall_to_monotonic.tv_sec;
vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec +
(tk->wall_to_monotonic.tv_nsec <<
tk->tkr_mono.shift);
while (vdata->monotonic_time_snsec >=
(((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
vdata->monotonic_time_snsec -=
((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
vdata->monotonic_time_sec++;
}
vdata->wall_time_coarse_sec = tk->xtime_sec;
vdata->wall_time_coarse_nsec =
(long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
vdata->monotonic_time_coarse_sec =
vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
vdata->monotonic_time_coarse_nsec =
vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) {
vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC;
vdata->monotonic_time_coarse_sec++;
}
vvar_write_end(vdata);
}

View File

@@ -3,6 +3,9 @@
# Building vDSO images for sparc.
#
# Include the generic Makefile to check the built vDSO:
include $(srctree)/lib/vdso/Makefile.include
# files to link into the vdso
vobjs-y := vdso-note.o vclock_gettime.o
@@ -90,6 +93,9 @@ KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
KBUILD_CFLAGS_32 += -mv8plus
$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
CHECKFLAGS_32 := $(filter-out -m64 -D__sparc_v9__ -D__arch64__, $(CHECKFLAGS)) -m32
$(obj)/vdso32.so.dbg: CHECKFLAGS = $(CHECKFLAGS_32)
$(obj)/vdso32.so.dbg: FORCE \
$(obj)/vdso32/vdso32.lds \
$(obj)/vdso32/vclock_gettime.o \
@@ -102,6 +108,7 @@ $(obj)/vdso32.so.dbg: FORCE \
quiet_cmd_vdso = VDSO $@
cmd_vdso = $(LD) -nostdlib -o $@ \
$(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
-T $(filter %.lds,$^) $(filter %.o,$^)
-T $(filter %.lds,$^) $(filter %.o,$^); \
$(cmd_vdso_check)
VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 -Bsymbolic --no-undefined
VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 -Bsymbolic --no-undefined -z noexecstack

View File

@@ -12,382 +12,48 @@
* Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
*/
#include <linux/kernel.h>
#include <linux/time.h>
#include <linux/string.h>
#include <asm/io.h>
#include <asm/unistd.h>
#include <asm/timex.h>
#include <asm/clocksource.h>
#include <asm/vvar.h>
#include <linux/compiler.h>
#include <linux/types.h>
#include <vdso/gettime.h>
#include <asm/vdso/gettimeofday.h>
#include "../../../../lib/vdso/gettimeofday.c"
int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
{
return __cvdso_gettimeofday(tv, tz);
}
int gettimeofday(struct __kernel_old_timeval *, struct timezone *)
__weak __alias(__vdso_gettimeofday);
#if defined(CONFIG_SPARC64)
int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
{
return __cvdso_clock_gettime(clock, ts);
}
int clock_gettime(clockid_t, struct __kernel_timespec *)
__weak __alias(__vdso_clock_gettime);
#ifdef CONFIG_SPARC64
#define SYSCALL_STRING \
"ta 0x6d;" \
"bcs,a 1f;" \
" sub %%g0, %%o0, %%o0;" \
"1:"
#else
#define SYSCALL_STRING \
"ta 0x10;" \
"bcs,a 1f;" \
" sub %%g0, %%o0, %%o0;" \
"1:"
int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts)
{
return __cvdso_clock_gettime32(clock, ts);
}
int clock_gettime(clockid_t, struct old_timespec32 *)
__weak __alias(__vdso_clock_gettime);
int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts)
{
return __cvdso_clock_gettime(clock, ts);
}
int clock_gettime64(clockid_t, struct __kernel_timespec *)
__weak __alias(__vdso_clock_gettime64);
#endif
#define SYSCALL_CLOBBERS \
"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \
"f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \
"f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \
"f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \
"f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46", \
"f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62", \
"cc", "memory"
/*
* Compute the vvar page's address in the process address space, and return it
* as a pointer to the vvar_data.
*/
notrace static __always_inline struct vvar_data *get_vvar_data(void)
{
unsigned long ret;
/*
* vdso data page is the first vDSO page so grab the PC
* and move up a page to get to the data page.
*/
__asm__("rd %%pc, %0" : "=r" (ret));
ret &= ~(8192 - 1);
ret -= 8192;
return (struct vvar_data *) ret;
}
notrace static long vdso_fallback_gettime(long clock, struct __kernel_old_timespec *ts)
{
register long num __asm__("g1") = __NR_clock_gettime;
register long o0 __asm__("o0") = clock;
register long o1 __asm__("o1") = (long) ts;
__asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num),
"0" (o0), "r" (o1) : SYSCALL_CLOBBERS);
return o0;
}
notrace static long vdso_fallback_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
{
register long num __asm__("g1") = __NR_gettimeofday;
register long o0 __asm__("o0") = (long) tv;
register long o1 __asm__("o1") = (long) tz;
__asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num),
"0" (o0), "r" (o1) : SYSCALL_CLOBBERS);
return o0;
}
#ifdef CONFIG_SPARC64
notrace static __always_inline u64 __shr64(u64 val, int amt)
{
return val >> amt;
}
notrace static __always_inline u64 vread_tick(void)
{
u64 ret;
__asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
return ret;
}
notrace static __always_inline u64 vread_tick_stick(void)
{
u64 ret;
__asm__ __volatile__("rd %%asr24, %0" : "=r" (ret));
return ret;
}
#else
notrace static __always_inline u64 __shr64(u64 val, int amt)
{
u64 ret;
__asm__ __volatile__("sllx %H1, 32, %%g1\n\t"
"srl %L1, 0, %L1\n\t"
"or %%g1, %L1, %%g1\n\t"
"srlx %%g1, %2, %L0\n\t"
"srlx %L0, 32, %H0"
: "=r" (ret)
: "r" (val), "r" (amt)
: "g1");
return ret;
}
notrace static __always_inline u64 vread_tick(void)
{
register unsigned long long ret asm("o4");
__asm__ __volatile__("rd %%tick, %L0\n\t"
"srlx %L0, 32, %H0"
: "=r" (ret));
return ret;
}
notrace static __always_inline u64 vread_tick_stick(void)
{
register unsigned long long ret asm("o4");
__asm__ __volatile__("rd %%asr24, %L0\n\t"
"srlx %L0, 32, %H0"
: "=r" (ret));
return ret;
}
#endif
notrace static __always_inline u64 vgetsns(struct vvar_data *vvar)
{
u64 v;
u64 cycles;
cycles = vread_tick();
v = (cycles - vvar->clock.cycle_last) & vvar->clock.mask;
return v * vvar->clock.mult;
}
notrace static __always_inline u64 vgetsns_stick(struct vvar_data *vvar)
{
u64 v;
u64 cycles;
cycles = vread_tick_stick();
v = (cycles - vvar->clock.cycle_last) & vvar->clock.mask;
return v * vvar->clock.mult;
}
notrace static __always_inline int do_realtime(struct vvar_data *vvar,
struct __kernel_old_timespec *ts)
{
unsigned long seq;
u64 ns;
do {
seq = vvar_read_begin(vvar);
ts->tv_sec = vvar->wall_time_sec;
ns = vvar->wall_time_snsec;
ns += vgetsns(vvar);
ns = __shr64(ns, vvar->clock.shift);
} while (unlikely(vvar_read_retry(vvar, seq)));
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
ts->tv_nsec = ns;
return 0;
}
notrace static __always_inline int do_realtime_stick(struct vvar_data *vvar,
struct __kernel_old_timespec *ts)
{
unsigned long seq;
u64 ns;
do {
seq = vvar_read_begin(vvar);
ts->tv_sec = vvar->wall_time_sec;
ns = vvar->wall_time_snsec;
ns += vgetsns_stick(vvar);
ns = __shr64(ns, vvar->clock.shift);
} while (unlikely(vvar_read_retry(vvar, seq)));
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
ts->tv_nsec = ns;
return 0;
}
notrace static __always_inline int do_monotonic(struct vvar_data *vvar,
struct __kernel_old_timespec *ts)
{
unsigned long seq;
u64 ns;
do {
seq = vvar_read_begin(vvar);
ts->tv_sec = vvar->monotonic_time_sec;
ns = vvar->monotonic_time_snsec;
ns += vgetsns(vvar);
ns = __shr64(ns, vvar->clock.shift);
} while (unlikely(vvar_read_retry(vvar, seq)));
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
ts->tv_nsec = ns;
return 0;
}
notrace static __always_inline int do_monotonic_stick(struct vvar_data *vvar,
struct __kernel_old_timespec *ts)
{
unsigned long seq;
u64 ns;
do {
seq = vvar_read_begin(vvar);
ts->tv_sec = vvar->monotonic_time_sec;
ns = vvar->monotonic_time_snsec;
ns += vgetsns_stick(vvar);
ns = __shr64(ns, vvar->clock.shift);
} while (unlikely(vvar_read_retry(vvar, seq)));
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
ts->tv_nsec = ns;
return 0;
}
notrace static int do_realtime_coarse(struct vvar_data *vvar,
struct __kernel_old_timespec *ts)
{
unsigned long seq;
do {
seq = vvar_read_begin(vvar);
ts->tv_sec = vvar->wall_time_coarse_sec;
ts->tv_nsec = vvar->wall_time_coarse_nsec;
} while (unlikely(vvar_read_retry(vvar, seq)));
return 0;
}
notrace static int do_monotonic_coarse(struct vvar_data *vvar,
struct __kernel_old_timespec *ts)
{
unsigned long seq;
do {
seq = vvar_read_begin(vvar);
ts->tv_sec = vvar->monotonic_time_coarse_sec;
ts->tv_nsec = vvar->monotonic_time_coarse_nsec;
} while (unlikely(vvar_read_retry(vvar, seq)));
return 0;
}
notrace int
__vdso_clock_gettime(clockid_t clock, struct __kernel_old_timespec *ts)
{
struct vvar_data *vvd = get_vvar_data();
switch (clock) {
case CLOCK_REALTIME:
if (unlikely(vvd->vclock_mode == VCLOCK_NONE))
break;
return do_realtime(vvd, ts);
case CLOCK_MONOTONIC:
if (unlikely(vvd->vclock_mode == VCLOCK_NONE))
break;
return do_monotonic(vvd, ts);
case CLOCK_REALTIME_COARSE:
return do_realtime_coarse(vvd, ts);
case CLOCK_MONOTONIC_COARSE:
return do_monotonic_coarse(vvd, ts);
}
/*
* Unknown clock ID ? Fall back to the syscall.
*/
return vdso_fallback_gettime(clock, ts);
}
int
clock_gettime(clockid_t, struct __kernel_old_timespec *)
__attribute__((weak, alias("__vdso_clock_gettime")));
notrace int
__vdso_clock_gettime_stick(clockid_t clock, struct __kernel_old_timespec *ts)
{
struct vvar_data *vvd = get_vvar_data();
switch (clock) {
case CLOCK_REALTIME:
if (unlikely(vvd->vclock_mode == VCLOCK_NONE))
break;
return do_realtime_stick(vvd, ts);
case CLOCK_MONOTONIC:
if (unlikely(vvd->vclock_mode == VCLOCK_NONE))
break;
return do_monotonic_stick(vvd, ts);
case CLOCK_REALTIME_COARSE:
return do_realtime_coarse(vvd, ts);
case CLOCK_MONOTONIC_COARSE:
return do_monotonic_coarse(vvd, ts);
}
/*
* Unknown clock ID ? Fall back to the syscall.
*/
return vdso_fallback_gettime(clock, ts);
}
notrace int
__vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
{
struct vvar_data *vvd = get_vvar_data();
if (likely(vvd->vclock_mode != VCLOCK_NONE)) {
if (likely(tv != NULL)) {
union tstv_t {
struct __kernel_old_timespec ts;
struct __kernel_old_timeval tv;
} *tstv = (union tstv_t *) tv;
do_realtime(vvd, &tstv->ts);
/*
* Assign before dividing to ensure that the division is
* done in the type of tv_usec, not tv_nsec.
*
* There cannot be > 1 billion usec in a second:
* do_realtime() has already distributed such overflow
* into tv_sec. So we can assign it to an int safely.
*/
tstv->tv.tv_usec = tstv->ts.tv_nsec;
tstv->tv.tv_usec /= 1000;
}
if (unlikely(tz != NULL)) {
/* Avoid memcpy. Some old compilers fail to inline it */
tz->tz_minuteswest = vvd->tz_minuteswest;
tz->tz_dsttime = vvd->tz_dsttime;
}
return 0;
}
return vdso_fallback_gettimeofday(tv, tz);
}
int
gettimeofday(struct __kernel_old_timeval *, struct timezone *)
__attribute__((weak, alias("__vdso_gettimeofday")));
notrace int
__vdso_gettimeofday_stick(struct __kernel_old_timeval *tv, struct timezone *tz)
{
struct vvar_data *vvd = get_vvar_data();
if (likely(vvd->vclock_mode != VCLOCK_NONE)) {
if (likely(tv != NULL)) {
union tstv_t {
struct __kernel_old_timespec ts;
struct __kernel_old_timeval tv;
} *tstv = (union tstv_t *) tv;
do_realtime_stick(vvd, &tstv->ts);
/*
* Assign before dividing to ensure that the division is
* done in the type of tv_usec, not tv_nsec.
*
* There cannot be > 1 billion usec in a second:
* do_realtime() has already distributed such overflow
* into tv_sec. So we can assign it to an int safely.
*/
tstv->tv.tv_usec = tstv->ts.tv_nsec;
tstv->tv.tv_usec /= 1000;
}
if (unlikely(tz != NULL)) {
/* Avoid memcpy. Some old compilers fail to inline it */
tz->tz_minuteswest = vvd->tz_minuteswest;
tz->tz_dsttime = vvd->tz_dsttime;
}
return 0;
}
return vdso_fallback_gettimeofday(tv, tz);
}

View File

@@ -4,15 +4,9 @@
* This script controls its layout.
*/
#if defined(BUILD_VDSO64)
# define SHDR_SIZE 64
#elif defined(BUILD_VDSO32)
# define SHDR_SIZE 40
#else
# error unknown VDSO target
#endif
#define NUM_FAKE_SHDRS 7
#include <vdso/datapage.h>
#include <vdso/page.h>
#include <asm/vdso/vsyscall.h>
SECTIONS
{
@@ -23,8 +17,7 @@ SECTIONS
* segment. Page size is 8192 for both 64-bit and 32-bit vdso binaries
*/
vvar_start = . -8192;
vvar_data = vvar_start;
VDSO_VVAR_SYMS
. = SIZEOF_HEADERS;
@@ -47,19 +40,8 @@ SECTIONS
*(.bss*)
*(.dynbss*)
*(.gnu.linkonce.b.*)
/*
* Ideally this would live in a C file: kept in here for
* compatibility with x86-64.
*/
VDSO_FAKE_SECTION_TABLE_START = .;
. = . + NUM_FAKE_SHDRS * SHDR_SIZE;
VDSO_FAKE_SECTION_TABLE_END = .;
} :text
.fake_shstrtab : { *(.fake_shstrtab) } :text
.note : { *(.note.*) } :text :note
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr

View File

@@ -18,10 +18,8 @@ VERSION {
global:
clock_gettime;
__vdso_clock_gettime;
__vdso_clock_gettime_stick;
gettimeofday;
__vdso_gettimeofday;
__vdso_gettimeofday_stick;
local: *;
};
}

View File

@@ -58,28 +58,6 @@
const char *outfilename;
/* Symbols that we need in vdso2c. */
enum {
sym_vvar_start,
sym_VDSO_FAKE_SECTION_TABLE_START,
sym_VDSO_FAKE_SECTION_TABLE_END,
};
struct vdso_sym {
const char *name;
int export;
};
struct vdso_sym required_syms[] = {
[sym_vvar_start] = {"vvar_start", 1},
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
"VDSO_FAKE_SECTION_TABLE_START", 0
},
[sym_VDSO_FAKE_SECTION_TABLE_END] = {
"VDSO_FAKE_SECTION_TABLE_END", 0
},
};
__attribute__((format(printf, 1, 2))) __attribute__((noreturn))
static void fail(const char *format, ...)
{
@@ -119,8 +97,6 @@ static void fail(const char *format, ...)
#define PUT_BE(x, val) \
PBE(x, val, 64, PBE(x, val, 32, PBE(x, val, 16, LAST_PBE(x, val))))
#define NSYMS ARRAY_SIZE(required_syms)
#define BITSFUNC3(name, bits, suffix) name##bits##suffix
#define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix)
#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, )

View File

@@ -17,11 +17,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
unsigned long mapping_size;
int i;
unsigned long j;
ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr;
ELF(Shdr) *symtab_hdr = NULL;
ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
ELF(Dyn) *dyn = 0, *dyn_end = 0;
INT_BITS syms[NSYMS] = {};
ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_BE(&hdr->e_phoff));
/* Walk the segment table. */
@@ -72,42 +70,6 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
if (!symtab_hdr)
fail("no symbol table\n");
strtab_hdr = raw_addr + GET_BE(&hdr->e_shoff) +
GET_BE(&hdr->e_shentsize) * GET_BE(&symtab_hdr->sh_link);
/* Walk the symbol table */
for (i = 0;
i < GET_BE(&symtab_hdr->sh_size) / GET_BE(&symtab_hdr->sh_entsize);
i++) {
int k;
ELF(Sym) *sym = raw_addr + GET_BE(&symtab_hdr->sh_offset) +
GET_BE(&symtab_hdr->sh_entsize) * i;
const char *name = raw_addr + GET_BE(&strtab_hdr->sh_offset) +
GET_BE(&sym->st_name);
for (k = 0; k < NSYMS; k++) {
if (!strcmp(name, required_syms[k].name)) {
if (syms[k]) {
fail("duplicate symbol %s\n",
required_syms[k].name);
}
/*
* Careful: we use negative addresses, but
* st_value is unsigned, so we rely
* on syms[k] being a signed type of the
* correct width.
*/
syms[k] = GET_BE(&sym->st_value);
}
}
}
/* Validate mapping addresses. */
if (syms[sym_vvar_start] % 8192)
fail("vvar_begin must be a multiple of 8192\n");
if (!name) {
fwrite(stripped_addr, stripped_len, 1, outfile);
return;
@@ -133,10 +95,5 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
fprintf(outfile, "const struct vdso_image %s_builtin = {\n", name);
fprintf(outfile, "\t.data = raw_data,\n");
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
for (i = 0; i < NSYMS; i++) {
if (required_syms[i].export && syms[i])
fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n",
required_syms[i].name, (int64_t)syms[i]);
}
fprintf(outfile, "};\n");
}

View File

@@ -17,10 +17,10 @@ VERSION {
global:
clock_gettime;
__vdso_clock_gettime;
__vdso_clock_gettime_stick;
clock_gettime64;
__vdso_clock_gettime64;
gettimeofday;
__vdso_gettimeofday;
__vdso_gettimeofday_stick;
local: *;
};
}

View File

@@ -16,17 +16,16 @@
#include <linux/linkage.h>
#include <linux/random.h>
#include <linux/elf.h>
#include <linux/vdso_datastore.h>
#include <asm/cacheflush.h>
#include <asm/spitfire.h>
#include <asm/vdso.h>
#include <asm/vvar.h>
#include <asm/page.h>
unsigned int __read_mostly vdso_enabled = 1;
#include <vdso/datapage.h>
#include <asm/vdso/vsyscall.h>
static struct vm_special_mapping vvar_mapping = {
.name = "[vvar]"
};
unsigned int __read_mostly vdso_enabled = 1;
#ifdef CONFIG_SPARC64
static struct vm_special_mapping vdso_mapping64 = {
@@ -40,207 +39,8 @@ static struct vm_special_mapping vdso_mapping32 = {
};
#endif
struct vvar_data *vvar_data;
struct vdso_elfinfo32 {
Elf32_Ehdr *hdr;
Elf32_Sym *dynsym;
unsigned long dynsymsize;
const char *dynstr;
unsigned long text;
};
struct vdso_elfinfo64 {
Elf64_Ehdr *hdr;
Elf64_Sym *dynsym;
unsigned long dynsymsize;
const char *dynstr;
unsigned long text;
};
struct vdso_elfinfo {
union {
struct vdso_elfinfo32 elf32;
struct vdso_elfinfo64 elf64;
} u;
};
static void *one_section64(struct vdso_elfinfo64 *e, const char *name,
unsigned long *size)
{
const char *snames;
Elf64_Shdr *shdrs;
unsigned int i;
shdrs = (void *)e->hdr + e->hdr->e_shoff;
snames = (void *)e->hdr + shdrs[e->hdr->e_shstrndx].sh_offset;
for (i = 1; i < e->hdr->e_shnum; i++) {
if (!strcmp(snames+shdrs[i].sh_name, name)) {
if (size)
*size = shdrs[i].sh_size;
return (void *)e->hdr + shdrs[i].sh_offset;
}
}
return NULL;
}
static int find_sections64(const struct vdso_image *image, struct vdso_elfinfo *_e)
{
struct vdso_elfinfo64 *e = &_e->u.elf64;
e->hdr = image->data;
e->dynsym = one_section64(e, ".dynsym", &e->dynsymsize);
e->dynstr = one_section64(e, ".dynstr", NULL);
if (!e->dynsym || !e->dynstr) {
pr_err("VDSO64: Missing symbol sections.\n");
return -ENODEV;
}
return 0;
}
static Elf64_Sym *find_sym64(const struct vdso_elfinfo64 *e, const char *name)
{
unsigned int i;
for (i = 0; i < (e->dynsymsize / sizeof(Elf64_Sym)); i++) {
Elf64_Sym *s = &e->dynsym[i];
if (s->st_name == 0)
continue;
if (!strcmp(e->dynstr + s->st_name, name))
return s;
}
return NULL;
}
static int patchsym64(struct vdso_elfinfo *_e, const char *orig,
const char *new)
{
struct vdso_elfinfo64 *e = &_e->u.elf64;
Elf64_Sym *osym = find_sym64(e, orig);
Elf64_Sym *nsym = find_sym64(e, new);
if (!nsym || !osym) {
pr_err("VDSO64: Missing symbols.\n");
return -ENODEV;
}
osym->st_value = nsym->st_value;
osym->st_size = nsym->st_size;
osym->st_info = nsym->st_info;
osym->st_other = nsym->st_other;
osym->st_shndx = nsym->st_shndx;
return 0;
}
static void *one_section32(struct vdso_elfinfo32 *e, const char *name,
unsigned long *size)
{
const char *snames;
Elf32_Shdr *shdrs;
unsigned int i;
shdrs = (void *)e->hdr + e->hdr->e_shoff;
snames = (void *)e->hdr + shdrs[e->hdr->e_shstrndx].sh_offset;
for (i = 1; i < e->hdr->e_shnum; i++) {
if (!strcmp(snames+shdrs[i].sh_name, name)) {
if (size)
*size = shdrs[i].sh_size;
return (void *)e->hdr + shdrs[i].sh_offset;
}
}
return NULL;
}
static int find_sections32(const struct vdso_image *image, struct vdso_elfinfo *_e)
{
struct vdso_elfinfo32 *e = &_e->u.elf32;
e->hdr = image->data;
e->dynsym = one_section32(e, ".dynsym", &e->dynsymsize);
e->dynstr = one_section32(e, ".dynstr", NULL);
if (!e->dynsym || !e->dynstr) {
pr_err("VDSO32: Missing symbol sections.\n");
return -ENODEV;
}
return 0;
}
static Elf32_Sym *find_sym32(const struct vdso_elfinfo32 *e, const char *name)
{
unsigned int i;
for (i = 0; i < (e->dynsymsize / sizeof(Elf32_Sym)); i++) {
Elf32_Sym *s = &e->dynsym[i];
if (s->st_name == 0)
continue;
if (!strcmp(e->dynstr + s->st_name, name))
return s;
}
return NULL;
}
static int patchsym32(struct vdso_elfinfo *_e, const char *orig,
const char *new)
{
struct vdso_elfinfo32 *e = &_e->u.elf32;
Elf32_Sym *osym = find_sym32(e, orig);
Elf32_Sym *nsym = find_sym32(e, new);
if (!nsym || !osym) {
pr_err("VDSO32: Missing symbols.\n");
return -ENODEV;
}
osym->st_value = nsym->st_value;
osym->st_size = nsym->st_size;
osym->st_info = nsym->st_info;
osym->st_other = nsym->st_other;
osym->st_shndx = nsym->st_shndx;
return 0;
}
static int find_sections(const struct vdso_image *image, struct vdso_elfinfo *e,
bool elf64)
{
if (elf64)
return find_sections64(image, e);
else
return find_sections32(image, e);
}
static int patch_one_symbol(struct vdso_elfinfo *e, const char *orig,
const char *new_target, bool elf64)
{
if (elf64)
return patchsym64(e, orig, new_target);
else
return patchsym32(e, orig, new_target);
}
static int stick_patch(const struct vdso_image *image, struct vdso_elfinfo *e, bool elf64)
{
int err;
err = find_sections(image, e, elf64);
if (err)
return err;
err = patch_one_symbol(e,
"__vdso_gettimeofday",
"__vdso_gettimeofday_stick", elf64);
if (err)
return err;
return patch_one_symbol(e,
"__vdso_clock_gettime",
"__vdso_clock_gettime_stick", elf64);
return 0;
}
/*
* Allocate pages for the vdso and vvar, and copy in the vdso text from the
* Allocate pages for the vdso and copy in the vdso text from the
* kernel image.
*/
static int __init init_vdso_image(const struct vdso_image *image,
@@ -248,16 +48,8 @@ static int __init init_vdso_image(const struct vdso_image *image,
bool elf64)
{
int cnpages = (image->size) / PAGE_SIZE;
struct page *dp, **dpp = NULL;
struct page *cp, **cpp = NULL;
struct vdso_elfinfo ei;
int i, dnpages = 0;
if (tlb_type != spitfire) {
int err = stick_patch(image, &ei, elf64);
if (err)
return err;
}
int i;
/*
* First, the vdso text. This is initialied data, an integral number of
@@ -280,31 +72,6 @@ static int __init init_vdso_image(const struct vdso_image *image,
copy_page(page_address(cp), image->data + i * PAGE_SIZE);
}
/*
* Now the vvar page. This is uninitialized data.
*/
if (vvar_data == NULL) {
dnpages = (sizeof(struct vvar_data) / PAGE_SIZE) + 1;
if (WARN_ON(dnpages != 1))
goto oom;
dpp = kzalloc_objs(struct page *, dnpages);
vvar_mapping.pages = dpp;
if (!dpp)
goto oom;
dp = alloc_page(GFP_KERNEL);
if (!dp)
goto oom;
dpp[0] = dp;
vvar_data = page_address(dp);
memset(vvar_data, 0, PAGE_SIZE);
vvar_data->seq = 0;
}
return 0;
oom:
if (cpp != NULL) {
@@ -316,15 +83,6 @@ static int __init init_vdso_image(const struct vdso_image *image,
vdso_mapping->pages = NULL;
}
if (dpp != NULL) {
for (i = 0; i < dnpages; i++) {
if (dpp[i] != NULL)
__free_page(dpp[i]);
}
kfree(dpp);
vvar_mapping.pages = NULL;
}
pr_warn("Cannot allocate vdso\n");
vdso_enabled = 0;
return -ENOMEM;
@@ -359,9 +117,12 @@ static unsigned long vdso_addr(unsigned long start, unsigned int len)
return start + (offset << PAGE_SHIFT);
}
static_assert(VDSO_NR_PAGES == __VDSO_PAGES);
static int map_vdso(const struct vdso_image *image,
struct vm_special_mapping *vdso_mapping)
{
const size_t area_size = image->size + VDSO_NR_PAGES * PAGE_SIZE;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long text_start, addr = 0;
@@ -374,23 +135,20 @@ static int map_vdso(const struct vdso_image *image,
* region is free.
*/
if (current->flags & PF_RANDOMIZE) {
addr = get_unmapped_area(NULL, 0,
image->size - image->sym_vvar_start,
0, 0);
addr = get_unmapped_area(NULL, 0, area_size, 0, 0);
if (IS_ERR_VALUE(addr)) {
ret = addr;
goto up_fail;
}
addr = vdso_addr(addr, image->size - image->sym_vvar_start);
addr = vdso_addr(addr, area_size);
}
addr = get_unmapped_area(NULL, addr,
image->size - image->sym_vvar_start, 0, 0);
addr = get_unmapped_area(NULL, addr, area_size, 0, 0);
if (IS_ERR_VALUE(addr)) {
ret = addr;
goto up_fail;
}
text_start = addr - image->sym_vvar_start;
text_start = addr + VDSO_NR_PAGES * PAGE_SIZE;
current->mm->context.vdso = (void __user *)text_start;
/*
@@ -408,11 +166,7 @@ static int map_vdso(const struct vdso_image *image,
goto up_fail;
}
vma = _install_special_mapping(mm,
addr,
-image->sym_vvar_start,
VM_READ|VM_MAYREAD,
&vvar_mapping);
vma = vdso_install_vvar_mapping(mm, addr);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);

View File

@@ -15,6 +15,10 @@ flags-y := -DBUILD_VDSO32 -m32 -mregparm=0
flags-$(CONFIG_X86_64) += -include $(src)/fake_32bit_build.h
flags-remove-y := -m64
# Checker flags
CHECKFLAGS := $(subst -m64,-m32,$(CHECKFLAGS))
CHECKFLAGS := $(subst -D__x86_64__,-D__i386__,$(CHECKFLAGS))
# The location of this include matters!
include $(src)/../common/Makefile.include

View File

@@ -56,11 +56,7 @@
#include <linux/sched/isolation.h>
#include <crypto/chacha.h>
#include <crypto/blake2s.h>
#ifdef CONFIG_VDSO_GETRANDOM
#include <vdso/getrandom.h>
#include <vdso/datapage.h>
#include <vdso/vsyscall.h>
#endif
#include <asm/archrandom.h>
#include <asm/processor.h>
#include <asm/irq.h>
@@ -269,7 +265,7 @@ static void crng_reseed(struct work_struct *work)
if (next_gen == ULONG_MAX)
++next_gen;
WRITE_ONCE(base_crng.generation, next_gen);
#ifdef CONFIG_VDSO_GETRANDOM
/* base_crng.generation's invalid value is ULONG_MAX, while
* vdso_k_rng_data->generation's invalid value is 0, so add one to the
* former to arrive at the latter. Use smp_store_release so that this
@@ -283,8 +279,9 @@ static void crng_reseed(struct work_struct *work)
* because the vDSO side only checks whether the value changed, without
* actually using or interpreting the value.
*/
smp_store_release((unsigned long *)&vdso_k_rng_data->generation, next_gen + 1);
#endif
if (IS_ENABLED(CONFIG_VDSO_GETRANDOM))
smp_store_release((unsigned long *)&vdso_k_rng_data->generation, next_gen + 1);
if (!static_branch_likely(&crng_is_ready))
crng_init = CRNG_READY;
spin_unlock_irqrestore(&base_crng.lock, flags);
@@ -734,9 +731,8 @@ static void __cold _credit_init_bits(size_t bits)
if (system_dfl_wq)
queue_work(system_dfl_wq, &set_ready);
atomic_notifier_call_chain(&random_ready_notifier, 0, NULL);
#ifdef CONFIG_VDSO_GETRANDOM
WRITE_ONCE(vdso_k_rng_data->is_ready, true);
#endif
if (IS_ENABLED(CONFIG_VDSO_GETRANDOM))
WRITE_ONCE(vdso_k_rng_data->is_ready, true);
wake_up_interruptible(&crng_init_wait);
kill_fasync(&fasync, SIGIO, POLL_IN);
pr_notice("crng init done\n");

View File

@@ -19,6 +19,15 @@
#error Inconsistent word size. Check asm/bitsperlong.h
#endif
#if __CHAR_BIT__ * __SIZEOF_LONG__ != __BITS_PER_LONG
#error Inconsistent word size. Check asm/bitsperlong.h
#endif
#ifndef __ASSEMBLER__
_Static_assert(sizeof(long) * 8 == __BITS_PER_LONG,
"Inconsistent word size. Check asm/bitsperlong.h");
#endif
#ifndef BITS_PER_LONG_LONG
#define BITS_PER_LONG_LONG 64
#endif

View File

@@ -25,8 +25,7 @@ struct clocksource_base;
struct clocksource;
struct module;
#if defined(CONFIG_ARCH_CLOCKSOURCE_DATA) || \
defined(CONFIG_GENERIC_GETTIMEOFDAY)
#if defined(CONFIG_GENERIC_GETTIMEOFDAY)
#include <asm/clocksource.h>
#endif
@@ -103,9 +102,6 @@ struct clocksource {
u32 shift;
u64 max_idle_ns;
u32 maxadj;
#ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
struct arch_clocksource_data archdata;
#endif
u64 max_cycles;
u64 max_raw_delta;
const char *name;

View File

@@ -8,6 +8,7 @@
#include <linux/ns_common.h>
#include <linux/err.h>
#include <linux/time64.h>
#include <linux/cleanup.h>
struct user_namespace;
extern struct user_namespace init_user_ns;
@@ -25,7 +26,9 @@ struct time_namespace {
struct ucounts *ucounts;
struct ns_common ns;
struct timens_offsets offsets;
#ifdef CONFIG_TIME_NS_VDSO
struct page *vvar_page;
#endif
/* If set prevents changing offsets after any task joined namespace. */
bool frozen_offsets;
} __randomize_layout;
@@ -38,9 +41,6 @@ static inline struct time_namespace *to_time_ns(struct ns_common *ns)
return container_of(ns, struct time_namespace, ns);
}
void __init time_ns_init(void);
extern int vdso_join_timens(struct task_struct *task,
struct time_namespace *ns);
extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns);
static inline struct time_namespace *get_time_ns(struct time_namespace *ns)
{
@@ -53,7 +53,6 @@ struct time_namespace *copy_time_ns(u64 flags,
struct time_namespace *old_ns);
void free_time_ns(struct time_namespace *ns);
void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk);
struct page *find_timens_vvar_page(struct vm_area_struct *vma);
static inline void put_time_ns(struct time_namespace *ns)
{
@@ -117,17 +116,6 @@ static inline void __init time_ns_init(void)
{
}
static inline int vdso_join_timens(struct task_struct *task,
struct time_namespace *ns)
{
return 0;
}
static inline void timens_commit(struct task_struct *tsk,
struct time_namespace *ns)
{
}
static inline struct time_namespace *get_time_ns(struct time_namespace *ns)
{
return NULL;
@@ -154,11 +142,6 @@ static inline void timens_on_fork(struct nsproxy *nsproxy,
return;
}
static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
{
return NULL;
}
static inline void timens_add_monotonic(struct timespec64 *ts) { }
static inline void timens_add_boottime(struct timespec64 *ts) { }
@@ -175,4 +158,20 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim)
}
#endif
#ifdef CONFIG_TIME_NS_VDSO
extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns);
struct page *find_timens_vvar_page(struct vm_area_struct *vma);
#else /* !CONFIG_TIME_NS_VDSO */
static inline void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
{
}
static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
{
return NULL;
}
#endif /* CONFIG_TIME_NS_VDSO */
DEFINE_FREE(time_ns, struct time_namespace *, if (_T) put_time_ns(_T))
#endif /* _LINUX_TIMENS_H */

View File

@@ -2,9 +2,15 @@
#ifndef _LINUX_VDSO_DATASTORE_H
#define _LINUX_VDSO_DATASTORE_H
#ifdef CONFIG_HAVE_GENERIC_VDSO
#include <linux/mm_types.h>
extern const struct vm_special_mapping vdso_vvar_mapping;
struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned long addr);
void __init vdso_setup_data_pages(void);
#else /* !CONFIG_HAVE_GENERIC_VDSO */
static inline void vdso_setup_data_pages(void) { }
#endif /* CONFIG_HAVE_GENERIC_VDSO */
#endif /* _LINUX_VDSO_DATASTORE_H */

View File

@@ -4,24 +4,16 @@
#ifndef __ASSEMBLY__
#include <linux/compiler.h>
#include <linux/types.h>
#include <uapi/linux/bits.h>
#include <uapi/linux/time.h>
#include <uapi/linux/types.h>
#include <uapi/asm-generic/errno-base.h>
#include <vdso/align.h>
#include <vdso/bits.h>
#include <vdso/cache.h>
#include <vdso/clocksource.h>
#include <vdso/ktime.h>
#include <vdso/limits.h>
#include <vdso/math64.h>
#include <vdso/page.h>
#include <vdso/processor.h>
#include <vdso/time.h>
#include <vdso/time32.h>
#include <vdso/time64.h>
#ifdef CONFIG_ARCH_HAS_VDSO_TIME_DATA
#include <asm/vdso/time_data.h>
@@ -80,8 +72,8 @@ struct vdso_timestamp {
* @mask: clocksource mask
* @mult: clocksource multiplier
* @shift: clocksource shift
* @basetime[clock_id]: basetime per clock_id
* @offset[clock_id]: time namespace offset per clock_id
* @basetime: basetime per clock_id
* @offset: time namespace offset per clock_id
*
* See also struct vdso_time_data for basic access and ordering information as
* struct vdso_clock is used there.
@@ -184,17 +176,6 @@ enum vdso_pages {
VDSO_NR_PAGES
};
/*
* The generic vDSO implementation requires that gettimeofday.h
* provides:
* - __arch_get_hw_counter(): to get the hw counter based on the
* clock_mode.
* - gettimeofday_fallback(): fallback for gettimeofday.
* - clock_gettime_fallback(): fallback for clock_gettime.
* - clock_getres_fallback(): fallback for clock_getres.
*/
#include <asm/vdso/gettimeofday.h>
#else /* !__ASSEMBLY__ */
#ifdef CONFIG_VDSO_GETRANDOM

View File

@@ -6,6 +6,13 @@
#include <asm/barrier.h>
#include <vdso/datapage.h>
#include <vdso/processor.h>
#include <vdso/clocksource.h>
static __always_inline bool vdso_is_timens_clock(const struct vdso_clock *vc)
{
return IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS;
}
static __always_inline u32 vdso_read_begin(const struct vdso_clock *vc)
{
@@ -18,6 +25,28 @@ static __always_inline u32 vdso_read_begin(const struct vdso_clock *vc)
return seq;
}
/*
* Variant of vdso_read_begin() to handle VDSO_CLOCKMODE_TIMENS.
*
* Time namespace enabled tasks have a special VVAR page installed which has
* vc->seq set to 1 and vc->clock_mode set to VDSO_CLOCKMODE_TIMENS. For non
* time namespace affected tasks this does not affect performance because if
* vc->seq is odd, i.e. a concurrent update is in progress the extra check for
* vc->clock_mode is just a few extra instructions while spin waiting for
* vc->seq to become even again.
*/
static __always_inline bool vdso_read_begin_timens(const struct vdso_clock *vc, u32 *seq)
{
while (unlikely((*seq = READ_ONCE(vc->seq)) & 1)) {
if (vdso_is_timens_clock(vc))
return true;
cpu_relax();
}
smp_rmb();
return false;
}
static __always_inline u32 vdso_read_retry(const struct vdso_clock *vc,
u32 start)
{
@@ -25,7 +54,7 @@ static __always_inline u32 vdso_read_retry(const struct vdso_clock *vc,
smp_rmb();
seq = READ_ONCE(vc->seq);
return seq != start;
return unlikely(seq != start);
}
static __always_inline void vdso_write_seq_begin(struct vdso_clock *vc)

View File

@@ -1400,12 +1400,14 @@ config UTS_NS
config TIME_NS
bool "TIME namespace"
depends on GENERIC_GETTIMEOFDAY
default y
help
In this namespace boottime and monotonic clocks can be set.
The time will keep going with the same pace.
config TIME_NS_VDSO
def_bool TIME_NS && GENERIC_GETTIMEOFDAY
config IPC_NS
bool "IPC namespace"
depends on (SYSVIPC || POSIX_MQUEUE)

View File

@@ -106,6 +106,7 @@
#include <linux/ptdump.h>
#include <linux/time_namespace.h>
#include <linux/unaligned.h>
#include <linux/vdso_datastore.h>
#include <net/net_namespace.h>
#include <asm/io.h>
@@ -1127,6 +1128,7 @@ void start_kernel(void)
srcu_init();
hrtimers_init();
softirq_init();
vdso_setup_data_pages();
timekeeping_init();
time_init();

View File

@@ -9,10 +9,6 @@
config CLOCKSOURCE_WATCHDOG
bool
# Architecture has extra clocksource data
config ARCH_CLOCKSOURCE_DATA
bool
# Architecture has extra clocksource init called from registration
config ARCH_CLOCKSOURCE_INIT
bool

View File

@@ -30,5 +30,6 @@ obj-$(CONFIG_GENERIC_GETTIMEOFDAY) += vsyscall.o
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
obj-$(CONFIG_TIME_NS) += namespace.o
obj-$(CONFIG_TIME_NS_VDSO) += namespace_vdso.o
obj-$(CONFIG_TEST_CLOCKSOURCE_WATCHDOG) += clocksource-wdtest.o
obj-$(CONFIG_TIME_KUNIT_TEST) += time_test.o

View File

@@ -18,8 +18,9 @@
#include <linux/cred.h>
#include <linux/err.h>
#include <linux/mm.h>
#include <linux/cleanup.h>
#include <vdso/datapage.h>
#include "namespace_internal.h"
ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim,
struct timens_offsets *ns_offsets)
@@ -93,8 +94,8 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns,
if (!ns)
goto fail_dec;
ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!ns->vvar_page)
err = timens_vdso_alloc_vvar_page(ns);
if (err)
goto fail_free;
err = ns_common_init(ns);
@@ -109,7 +110,7 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns,
return ns;
fail_free_page:
__free_page(ns->vvar_page);
timens_vdso_free_vvar_page(ns);
fail_free:
kfree(ns);
fail_dec:
@@ -138,117 +139,7 @@ struct time_namespace *copy_time_ns(u64 flags,
return clone_time_ns(user_ns, old_ns);
}
static struct timens_offset offset_from_ts(struct timespec64 off)
{
struct timens_offset ret;
ret.sec = off.tv_sec;
ret.nsec = off.tv_nsec;
return ret;
}
/*
* A time namespace VVAR page has the same layout as the VVAR page which
* contains the system wide VDSO data.
*
* For a normal task the VVAR pages are installed in the normal ordering:
* VVAR
* PVCLOCK
* HVCLOCK
* TIMENS <- Not really required
*
* Now for a timens task the pages are installed in the following order:
* TIMENS
* PVCLOCK
* HVCLOCK
* VVAR
*
* The check for vdso_clock->clock_mode is in the unlikely path of
* the seq begin magic. So for the non-timens case most of the time
* 'seq' is even, so the branch is not taken.
*
* If 'seq' is odd, i.e. a concurrent update is in progress, the extra check
* for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the
* update to finish and for 'seq' to become even anyway.
*
* Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which
* enforces the time namespace handling path.
*/
static void timens_setup_vdso_clock_data(struct vdso_clock *vc,
struct time_namespace *ns)
{
struct timens_offset *offset = vc->offset;
struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic);
struct timens_offset boottime = offset_from_ts(ns->offsets.boottime);
vc->seq = 1;
vc->clock_mode = VDSO_CLOCKMODE_TIMENS;
offset[CLOCK_MONOTONIC] = monotonic;
offset[CLOCK_MONOTONIC_RAW] = monotonic;
offset[CLOCK_MONOTONIC_COARSE] = monotonic;
offset[CLOCK_BOOTTIME] = boottime;
offset[CLOCK_BOOTTIME_ALARM] = boottime;
}
struct page *find_timens_vvar_page(struct vm_area_struct *vma)
{
if (likely(vma->vm_mm == current->mm))
return current->nsproxy->time_ns->vvar_page;
/*
* VM_PFNMAP | VM_IO protect .fault() handler from being called
* through interfaces like /proc/$pid/mem or
* process_vm_{readv,writev}() as long as there's no .access()
* in special_mapping_vmops().
* For more details check_vma_flags() and __access_remote_vm()
*/
WARN(1, "vvar_page accessed remotely");
return NULL;
}
/*
* Protects possibly multiple offsets writers racing each other
* and tasks entering the namespace.
*/
static DEFINE_MUTEX(offset_lock);
static void timens_set_vvar_page(struct task_struct *task,
struct time_namespace *ns)
{
struct vdso_time_data *vdata;
struct vdso_clock *vc;
unsigned int i;
if (ns == &init_time_ns)
return;
/* Fast-path, taken by every task in namespace except the first. */
if (likely(ns->frozen_offsets))
return;
mutex_lock(&offset_lock);
/* Nothing to-do: vvar_page has been already initialized. */
if (ns->frozen_offsets)
goto out;
ns->frozen_offsets = true;
vdata = page_address(ns->vvar_page);
vc = vdata->clock_data;
for (i = 0; i < CS_BASES; i++)
timens_setup_vdso_clock_data(&vc[i], ns);
if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) {
for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++)
timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns);
}
out:
mutex_unlock(&offset_lock);
}
DEFINE_MUTEX(timens_offset_lock);
void free_time_ns(struct time_namespace *ns)
{
@@ -256,41 +147,39 @@ void free_time_ns(struct time_namespace *ns)
dec_time_namespaces(ns->ucounts);
put_user_ns(ns->user_ns);
ns_common_free(ns);
__free_page(ns->vvar_page);
timens_vdso_free_vvar_page(ns);
/* Concurrent nstree traversal depends on a grace period. */
kfree_rcu(ns, ns.ns_rcu);
}
static struct ns_common *timens_get(struct task_struct *task)
{
struct time_namespace *ns = NULL;
struct time_namespace *ns;
struct nsproxy *nsproxy;
task_lock(task);
guard(task_lock)(task);
nsproxy = task->nsproxy;
if (nsproxy) {
ns = nsproxy->time_ns;
get_time_ns(ns);
}
task_unlock(task);
if (!nsproxy)
return NULL;
return ns ? &ns->ns : NULL;
ns = nsproxy->time_ns;
get_time_ns(ns);
return &ns->ns;
}
static struct ns_common *timens_for_children_get(struct task_struct *task)
{
struct time_namespace *ns = NULL;
struct time_namespace *ns;
struct nsproxy *nsproxy;
task_lock(task);
guard(task_lock)(task);
nsproxy = task->nsproxy;
if (nsproxy) {
ns = nsproxy->time_ns_for_children;
get_time_ns(ns);
}
task_unlock(task);
if (!nsproxy)
return NULL;
return ns ? &ns->ns : NULL;
ns = nsproxy->time_ns_for_children;
get_time_ns(ns);
return &ns->ns;
}
static void timens_put(struct ns_common *ns)
@@ -298,12 +187,6 @@ static void timens_put(struct ns_common *ns)
put_time_ns(to_time_ns(ns));
}
void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
{
timens_set_vvar_page(tsk, ns);
vdso_join_timens(tsk, ns);
}
static int timens_install(struct nsset *nsset, struct ns_common *new)
{
struct nsproxy *nsproxy = nsset->nsproxy;
@@ -367,36 +250,33 @@ static void show_offset(struct seq_file *m, int clockid, struct timespec64 *ts)
void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m)
{
struct ns_common *ns;
struct time_namespace *time_ns;
struct time_namespace *time_ns __free(time_ns) = NULL;
struct ns_common *ns = timens_for_children_get(p);
ns = timens_for_children_get(p);
if (!ns)
return;
time_ns = to_time_ns(ns);
show_offset(m, CLOCK_MONOTONIC, &time_ns->offsets.monotonic);
show_offset(m, CLOCK_BOOTTIME, &time_ns->offsets.boottime);
put_time_ns(time_ns);
}
int proc_timens_set_offset(struct file *file, struct task_struct *p,
struct proc_timens_offset *offsets, int noffsets)
{
struct ns_common *ns;
struct time_namespace *time_ns;
struct time_namespace *time_ns __free(time_ns) = NULL;
struct ns_common *ns = timens_for_children_get(p);
struct timespec64 tp;
int i, err;
int i;
ns = timens_for_children_get(p);
if (!ns)
return -ESRCH;
time_ns = to_time_ns(ns);
if (!file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME)) {
put_time_ns(time_ns);
if (!file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME))
return -EPERM;
}
for (i = 0; i < noffsets; i++) {
struct proc_timens_offset *off = &offsets[i];
@@ -409,15 +289,12 @@ int proc_timens_set_offset(struct file *file, struct task_struct *p,
ktime_get_boottime_ts64(&tp);
break;
default:
err = -EINVAL;
goto out;
return -EINVAL;
}
err = -ERANGE;
if (off->val.tv_sec > KTIME_SEC_MAX ||
off->val.tv_sec < -KTIME_SEC_MAX)
goto out;
return -ERANGE;
tp = timespec64_add(tp, off->val);
/*
@@ -425,16 +302,13 @@ int proc_timens_set_offset(struct file *file, struct task_struct *p,
* still unreachable.
*/
if (tp.tv_sec < 0 || tp.tv_sec > KTIME_SEC_MAX / 2)
goto out;
return -ERANGE;
}
mutex_lock(&offset_lock);
if (time_ns->frozen_offsets) {
err = -EACCES;
goto out_unlock;
}
guard(mutex)(&timens_offset_lock);
if (time_ns->frozen_offsets)
return -EACCES;
err = 0;
/* Don't report errors after this line */
for (i = 0; i < noffsets; i++) {
struct proc_timens_offset *off = &offsets[i];
@@ -452,12 +326,7 @@ int proc_timens_set_offset(struct file *file, struct task_struct *p,
*offset = off->val;
}
out_unlock:
mutex_unlock(&offset_lock);
out:
put_time_ns(time_ns);
return err;
return 0;
}
const struct proc_ns_operations timens_operations = {

View File

@@ -0,0 +1,28 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _TIME_NAMESPACE_INTERNAL_H
#define _TIME_NAMESPACE_INTERNAL_H
#include <linux/mutex.h>
struct time_namespace;
/*
* Protects possibly multiple offsets writers racing each other
* and tasks entering the namespace.
*/
extern struct mutex timens_offset_lock;
#ifdef CONFIG_TIME_NS_VDSO
int timens_vdso_alloc_vvar_page(struct time_namespace *ns);
void timens_vdso_free_vvar_page(struct time_namespace *ns);
#else /* !CONFIG_TIME_NS_VDSO */
static inline int timens_vdso_alloc_vvar_page(struct time_namespace *ns)
{
return 0;
}
static inline void timens_vdso_free_vvar_page(struct time_namespace *ns)
{
}
#endif /* CONFIG_TIME_NS_VDSO */
#endif /* _TIME_NAMESPACE_INTERNAL_H */

View File

@@ -0,0 +1,160 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Author: Andrei Vagin <avagin@openvz.org>
* Author: Dmitry Safonov <dima@arista.com>
*/
#include <linux/cleanup.h>
#include <linux/mm.h>
#include <linux/time_namespace.h>
#include <linux/time.h>
#include <linux/vdso_datastore.h>
#include <vdso/clocksource.h>
#include <vdso/datapage.h>
#include "namespace_internal.h"
static struct timens_offset offset_from_ts(struct timespec64 off)
{
struct timens_offset ret;
ret.sec = off.tv_sec;
ret.nsec = off.tv_nsec;
return ret;
}
/*
* A time namespace VVAR page has the same layout as the VVAR page which
* contains the system wide VDSO data.
*
* For a normal task the VVAR pages are installed in the normal ordering:
* VVAR
* PVCLOCK
* HVCLOCK
* TIMENS <- Not really required
*
* Now for a timens task the pages are installed in the following order:
* TIMENS
* PVCLOCK
* HVCLOCK
* VVAR
*
* The check for vdso_clock->clock_mode is in the unlikely path of
* the seq begin magic. So for the non-timens case most of the time
* 'seq' is even, so the branch is not taken.
*
* If 'seq' is odd, i.e. a concurrent update is in progress, the extra check
* for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the
* update to finish and for 'seq' to become even anyway.
*
* Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which
* enforces the time namespace handling path.
*/
static void timens_setup_vdso_clock_data(struct vdso_clock *vc,
struct time_namespace *ns)
{
struct timens_offset *offset = vc->offset;
struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic);
struct timens_offset boottime = offset_from_ts(ns->offsets.boottime);
vc->seq = 1;
vc->clock_mode = VDSO_CLOCKMODE_TIMENS;
offset[CLOCK_MONOTONIC] = monotonic;
offset[CLOCK_MONOTONIC_RAW] = monotonic;
offset[CLOCK_MONOTONIC_COARSE] = monotonic;
offset[CLOCK_BOOTTIME] = boottime;
offset[CLOCK_BOOTTIME_ALARM] = boottime;
}
struct page *find_timens_vvar_page(struct vm_area_struct *vma)
{
if (likely(vma->vm_mm == current->mm))
return current->nsproxy->time_ns->vvar_page;
/*
* VM_PFNMAP | VM_IO protect .fault() handler from being called
* through interfaces like /proc/$pid/mem or
* process_vm_{readv,writev}() as long as there's no .access()
* in special_mapping_vmops().
* For more details check_vma_flags() and __access_remote_vm()
*/
WARN(1, "vvar_page accessed remotely");
return NULL;
}
static void timens_set_vvar_page(struct task_struct *task,
struct time_namespace *ns)
{
struct vdso_time_data *vdata;
struct vdso_clock *vc;
unsigned int i;
if (ns == &init_time_ns)
return;
/* Fast-path, taken by every task in namespace except the first. */
if (likely(ns->frozen_offsets))
return;
guard(mutex)(&timens_offset_lock);
/* Nothing to-do: vvar_page has been already initialized. */
if (ns->frozen_offsets)
return;
ns->frozen_offsets = true;
vdata = page_address(ns->vvar_page);
vc = vdata->clock_data;
for (i = 0; i < CS_BASES; i++)
timens_setup_vdso_clock_data(&vc[i], ns);
if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) {
for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++)
timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns);
}
}
/*
* The vvar page layout depends on whether a task belongs to the root or
* non-root time namespace. Whenever a task changes its namespace, the VVAR
* page tables are cleared and then they will be re-faulted with a
* corresponding layout.
* See also the comment near timens_setup_vdso_clock_data() for details.
*/
static int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
{
struct mm_struct *mm = task->mm;
struct vm_area_struct *vma;
VMA_ITERATOR(vmi, mm, 0);
guard(mmap_read_lock)(mm);
for_each_vma(vmi, vma) {
if (vma_is_special_mapping(vma, &vdso_vvar_mapping))
zap_vma_pages(vma);
}
return 0;
}
void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
{
timens_set_vvar_page(tsk, ns);
vdso_join_timens(tsk, ns);
}
int timens_vdso_alloc_vvar_page(struct time_namespace *ns)
{
ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!ns->vvar_page)
return -ENOMEM;
return 0;
}
void timens_vdso_free_vvar_page(struct time_namespace *ns)
{
__free_page(ns->vvar_page);
}

View File

@@ -1,64 +1,92 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/linkage.h>
#include <linux/mmap_lock.h>
#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/time_namespace.h>
#include <linux/types.h>
#include <linux/vdso_datastore.h>
#include <vdso/datapage.h>
/*
* The vDSO data page.
*/
static u8 vdso_initdata[VDSO_NR_PAGES * PAGE_SIZE] __aligned(PAGE_SIZE) __initdata = {};
#ifdef CONFIG_GENERIC_GETTIMEOFDAY
static union {
struct vdso_time_data data;
u8 page[PAGE_SIZE];
} vdso_time_data_store __page_aligned_data;
struct vdso_time_data *vdso_k_time_data = &vdso_time_data_store.data;
static_assert(sizeof(vdso_time_data_store) == PAGE_SIZE);
struct vdso_time_data *vdso_k_time_data __refdata =
(void *)&vdso_initdata[VDSO_TIME_PAGE_OFFSET * PAGE_SIZE];
static_assert(sizeof(struct vdso_time_data) <= PAGE_SIZE);
#endif /* CONFIG_GENERIC_GETTIMEOFDAY */
#ifdef CONFIG_VDSO_GETRANDOM
static union {
struct vdso_rng_data data;
u8 page[PAGE_SIZE];
} vdso_rng_data_store __page_aligned_data;
struct vdso_rng_data *vdso_k_rng_data = &vdso_rng_data_store.data;
static_assert(sizeof(vdso_rng_data_store) == PAGE_SIZE);
struct vdso_rng_data *vdso_k_rng_data __refdata =
(void *)&vdso_initdata[VDSO_RNG_PAGE_OFFSET * PAGE_SIZE];
static_assert(sizeof(struct vdso_rng_data) <= PAGE_SIZE);
#endif /* CONFIG_VDSO_GETRANDOM */
#ifdef CONFIG_ARCH_HAS_VDSO_ARCH_DATA
static union {
struct vdso_arch_data data;
u8 page[VDSO_ARCH_DATA_SIZE];
} vdso_arch_data_store __page_aligned_data;
struct vdso_arch_data *vdso_k_arch_data = &vdso_arch_data_store.data;
struct vdso_arch_data *vdso_k_arch_data __refdata =
(void *)&vdso_initdata[VDSO_ARCH_PAGES_START * PAGE_SIZE];
#endif /* CONFIG_ARCH_HAS_VDSO_ARCH_DATA */
void __init vdso_setup_data_pages(void)
{
unsigned int order = get_order(VDSO_NR_PAGES * PAGE_SIZE);
struct page *pages;
/*
* Allocate the data pages dynamically. SPARC does not support mapping
* static pages to be mapped into userspace.
* It is also a requirement for mlockall() support.
*
* Do not use folios. In time namespaces the pages are mapped in a different order
* to userspace, which is not handled by the folio optimizations in finish_fault().
*/
pages = alloc_pages(GFP_KERNEL, order);
if (!pages)
panic("Unable to allocate VDSO storage pages");
/* The pages are mapped one-by-one into userspace and each one needs to be refcounted. */
split_page(pages, order);
/* Move the data already written by other subsystems to the new pages */
memcpy(page_address(pages), vdso_initdata, VDSO_NR_PAGES * PAGE_SIZE);
if (IS_ENABLED(CONFIG_GENERIC_GETTIMEOFDAY))
vdso_k_time_data = page_address(pages + VDSO_TIME_PAGE_OFFSET);
if (IS_ENABLED(CONFIG_VDSO_GETRANDOM))
vdso_k_rng_data = page_address(pages + VDSO_RNG_PAGE_OFFSET);
if (IS_ENABLED(CONFIG_ARCH_HAS_VDSO_ARCH_DATA))
vdso_k_arch_data = page_address(pages + VDSO_ARCH_PAGES_START);
}
static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *timens_page = find_timens_vvar_page(vma);
unsigned long addr, pfn;
vm_fault_t err;
struct page *page, *timens_page;
timens_page = find_timens_vvar_page(vma);
switch (vmf->pgoff) {
case VDSO_TIME_PAGE_OFFSET:
if (!IS_ENABLED(CONFIG_GENERIC_GETTIMEOFDAY))
return VM_FAULT_SIGBUS;
pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data));
page = virt_to_page(vdso_k_time_data);
if (timens_page) {
/*
* Fault in VVAR page too, since it will be accessed
* to get clock data anyway.
*/
unsigned long addr;
vm_fault_t err;
addr = vmf->address + VDSO_TIMENS_PAGE_OFFSET * PAGE_SIZE;
err = vmf_insert_pfn(vma, addr, pfn);
err = vmf_insert_page(vma, addr, page);
if (unlikely(err & VM_FAULT_ERROR))
return err;
pfn = page_to_pfn(timens_page);
page = timens_page;
}
break;
case VDSO_TIMENS_PAGE_OFFSET:
@@ -71,24 +99,25 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
*/
if (!IS_ENABLED(CONFIG_TIME_NS) || !timens_page)
return VM_FAULT_SIGBUS;
pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data));
page = virt_to_page(vdso_k_time_data);
break;
case VDSO_RNG_PAGE_OFFSET:
if (!IS_ENABLED(CONFIG_VDSO_GETRANDOM))
return VM_FAULT_SIGBUS;
pfn = __phys_to_pfn(__pa_symbol(vdso_k_rng_data));
page = virt_to_page(vdso_k_rng_data);
break;
case VDSO_ARCH_PAGES_START ... VDSO_ARCH_PAGES_END:
if (!IS_ENABLED(CONFIG_ARCH_HAS_VDSO_ARCH_DATA))
return VM_FAULT_SIGBUS;
pfn = __phys_to_pfn(__pa_symbol(vdso_k_arch_data)) +
vmf->pgoff - VDSO_ARCH_PAGES_START;
page = virt_to_page(vdso_k_arch_data) + vmf->pgoff - VDSO_ARCH_PAGES_START;
break;
default:
return VM_FAULT_SIGBUS;
}
return vmf_insert_pfn(vma, vmf->address, pfn);
get_page(page);
vmf->page = page;
return 0;
}
const struct vm_special_mapping vdso_vvar_mapping = {
@@ -100,31 +129,6 @@ struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned
{
return _install_special_mapping(mm, addr, VDSO_NR_PAGES * PAGE_SIZE,
VM_READ | VM_MAYREAD | VM_IO | VM_DONTDUMP |
VM_PFNMAP | VM_SEALED_SYSMAP,
VM_MIXEDMAP | VM_SEALED_SYSMAP,
&vdso_vvar_mapping);
}
#ifdef CONFIG_TIME_NS
/*
* The vvar page layout depends on whether a task belongs to the root or
* non-root time namespace. Whenever a task changes its namespace, the VVAR
* page tables are cleared and then they will be re-faulted with a
* corresponding layout.
* See also the comment near timens_setup_vdso_clock_data() for details.
*/
int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
{
struct mm_struct *mm = task->mm;
struct vm_area_struct *vma;
VMA_ITERATOR(vmi, mm, 0);
mmap_read_lock(mm);
for_each_vma(vmi, vma) {
if (vma_is_special_mapping(vma, &vdso_vvar_mapping))
zap_vma_pages(vma);
}
mmap_read_unlock(mm);
return 0;
}
#endif

View File

@@ -7,8 +7,11 @@
#include <linux/minmax.h>
#include <vdso/datapage.h>
#include <vdso/getrandom.h>
#include <vdso/limits.h>
#include <vdso/unaligned.h>
#include <asm/barrier.h>
#include <asm/vdso/getrandom.h>
#include <uapi/linux/errno.h>
#include <uapi/linux/mman.h>
#include <uapi/linux/random.h>

View File

@@ -3,8 +3,25 @@
* Generic userspace implementations of gettimeofday() and similar.
*/
#include <vdso/auxclock.h>
#include <vdso/clocksource.h>
#include <vdso/datapage.h>
#include <vdso/helpers.h>
#include <vdso/ktime.h>
#include <vdso/limits.h>
#include <vdso/math64.h>
#include <vdso/time32.h>
#include <vdso/time64.h>
/*
* The generic vDSO implementation requires that gettimeofday.h
* provides:
* - __arch_get_hw_counter(): to get the hw counter based on the
* clock_mode.
* - gettimeofday_fallback(): fallback for gettimeofday.
* - clock_gettime_fallback(): fallback for clock_gettime.
* - clock_getres_fallback(): fallback for clock_getres.
*/
#include <asm/vdso/gettimeofday.h>
/* Bring in default accessors */
#include <vdso/vsyscall.h>
@@ -135,7 +152,7 @@ bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *
if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns))
return false;
} while (unlikely(vdso_read_retry(vc, seq)));
} while (vdso_read_retry(vc, seq));
/* Add the namespace offset */
sec += offs->sec;
@@ -158,28 +175,12 @@ bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc,
return false;
do {
/*
* Open coded function vdso_read_begin() to handle
* VDSO_CLOCKMODE_TIMENS. Time namespace enabled tasks have a
* special VVAR page installed which has vc->seq set to 1 and
* vc->clock_mode set to VDSO_CLOCKMODE_TIMENS. For non time
* namespace affected tasks this does not affect performance
* because if vc->seq is odd, i.e. a concurrent update is in
* progress the extra check for vc->clock_mode is just a few
* extra instructions while spin waiting for vc->seq to become
* even again.
*/
while (unlikely((seq = READ_ONCE(vc->seq)) & 1)) {
if (IS_ENABLED(CONFIG_TIME_NS) &&
vc->clock_mode == VDSO_CLOCKMODE_TIMENS)
return do_hres_timens(vd, vc, clk, ts);
cpu_relax();
}
smp_rmb();
if (vdso_read_begin_timens(vc, &seq))
return do_hres_timens(vd, vc, clk, ts);
if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns))
return false;
} while (unlikely(vdso_read_retry(vc, seq)));
} while (vdso_read_retry(vc, seq));
vdso_set_timespec(ts, sec, ns);
@@ -204,7 +205,7 @@ bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock
seq = vdso_read_begin(vc);
sec = vdso_ts->sec;
nsec = vdso_ts->nsec;
} while (unlikely(vdso_read_retry(vc, seq)));
} while (vdso_read_retry(vc, seq));
/* Add the namespace offset */
sec += offs->sec;
@@ -223,21 +224,12 @@ bool do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc,
u32 seq;
do {
/*
* Open coded function vdso_read_begin() to handle
* VDSO_CLOCK_TIMENS. See comment in do_hres().
*/
while ((seq = READ_ONCE(vc->seq)) & 1) {
if (IS_ENABLED(CONFIG_TIME_NS) &&
vc->clock_mode == VDSO_CLOCKMODE_TIMENS)
return do_coarse_timens(vd, vc, clk, ts);
cpu_relax();
}
smp_rmb();
if (vdso_read_begin_timens(vc, &seq))
return do_coarse_timens(vd, vc, clk, ts);
ts->tv_sec = vdso_ts->sec;
ts->tv_nsec = vdso_ts->nsec;
} while (unlikely(vdso_read_retry(vc, seq)));
} while (vdso_read_retry(vc, seq));
return true;
}
@@ -256,20 +248,12 @@ bool do_aux(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_ti
vc = &vd->aux_clock_data[idx];
do {
/*
* Open coded function vdso_read_begin() to handle
* VDSO_CLOCK_TIMENS. See comment in do_hres().
*/
while ((seq = READ_ONCE(vc->seq)) & 1) {
if (IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS) {
vd = __arch_get_vdso_u_timens_data(vd);
vc = &vd->aux_clock_data[idx];
/* Re-read from the real time data page */
continue;
}
cpu_relax();
if (vdso_read_begin_timens(vc, &seq)) {
vd = __arch_get_vdso_u_timens_data(vd);
vc = &vd->aux_clock_data[idx];
/* Re-read from the real time data page */
continue;
}
smp_rmb();
/* Auxclock disabled? */
if (vc->clock_mode == VDSO_CLOCKMODE_NONE)
@@ -277,7 +261,7 @@ bool do_aux(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_ti
if (!vdso_get_timestamp(vd, vc, VDSO_BASE_AUX, &sec, &ns))
return false;
} while (unlikely(vdso_read_retry(vc, seq)));
} while (vdso_read_retry(vc, seq));
vdso_set_timespec(ts, sec, ns);
@@ -313,7 +297,7 @@ __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock,
return do_hres(vd, vc, clock, ts);
}
static __maybe_unused int
static int
__cvdso_clock_gettime_data(const struct vdso_time_data *vd, clockid_t clock,
struct __kernel_timespec *ts)
{
@@ -333,7 +317,7 @@ __cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
}
#ifdef BUILD_VDSO32
static __maybe_unused int
static int
__cvdso_clock_gettime32_data(const struct vdso_time_data *vd, clockid_t clock,
struct old_timespec32 *res)
{
@@ -359,7 +343,7 @@ __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
}
#endif /* BUILD_VDSO32 */
static __maybe_unused int
static int
__cvdso_gettimeofday_data(const struct vdso_time_data *vd,
struct __kernel_old_timeval *tv, struct timezone *tz)
{
@@ -376,8 +360,7 @@ __cvdso_gettimeofday_data(const struct vdso_time_data *vd,
}
if (unlikely(tz != NULL)) {
if (IS_ENABLED(CONFIG_TIME_NS) &&
vc->clock_mode == VDSO_CLOCKMODE_TIMENS)
if (vdso_is_timens_clock(vc))
vd = __arch_get_vdso_u_timens_data(vd);
tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest;
@@ -394,14 +377,13 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
}
#ifdef VDSO_HAS_TIME
static __maybe_unused __kernel_old_time_t
static __kernel_old_time_t
__cvdso_time_data(const struct vdso_time_data *vd, __kernel_old_time_t *time)
{
const struct vdso_clock *vc = vd->clock_data;
__kernel_old_time_t t;
if (IS_ENABLED(CONFIG_TIME_NS) &&
vc->clock_mode == VDSO_CLOCKMODE_TIMENS) {
if (vdso_is_timens_clock(vc)) {
vd = __arch_get_vdso_u_timens_data(vd);
vc = vd->clock_data;
}
@@ -432,8 +414,7 @@ bool __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t cloc
if (!vdso_clockid_valid(clock))
return false;
if (IS_ENABLED(CONFIG_TIME_NS) &&
vc->clock_mode == VDSO_CLOCKMODE_TIMENS)
if (vdso_is_timens_clock(vc))
vd = __arch_get_vdso_u_timens_data(vd);
/*
@@ -464,7 +445,7 @@ bool __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t cloc
return true;
}
static __maybe_unused
static
int __cvdso_clock_getres_data(const struct vdso_time_data *vd, clockid_t clock,
struct __kernel_timespec *res)
{
@@ -484,7 +465,7 @@ int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
}
#ifdef BUILD_VDSO32
static __maybe_unused int
static int
__cvdso_clock_getres_time32_data(const struct vdso_time_data *vd, clockid_t clock,
struct old_timespec32 *res)
{

View File

@@ -19,8 +19,6 @@ endif
include ../lib.mk
CFLAGS += $(TOOLS_INCLUDES)
CFLAGS_NOLIBC := -nostdlib -nostdinc -ffreestanding -fno-asynchronous-unwind-tables \
-fno-stack-protector -include $(top_srcdir)/tools/include/nolibc/nolibc.h \
-I$(top_srcdir)/tools/include/nolibc/ $(KHDR_INCLUDES)
@@ -28,13 +26,11 @@ CFLAGS_NOLIBC := -nostdlib -nostdinc -ffreestanding -fno-asynchronous-unwind-tab
$(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
$(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
$(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c
$(OUTPUT)/vdso_test_correctness: parse_vdso.c vdso_test_correctness.c
$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c | headers
$(OUTPUT)/vdso_standalone_test_x86: CFLAGS:=$(CFLAGS_NOLIBC) $(CFLAGS)
$(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c
$(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl
$(OUTPUT)/vdso_test_getrandom: parse_vdso.c
$(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \
$(KHDR_INCLUDES) \

View File

@@ -19,8 +19,7 @@
#include <stdint.h>
#include <string.h>
#include <limits.h>
#include <linux/auxvec.h>
#include <linux/elf.h>
#include <elf.h>
#include "parse_vdso.h"

View File

@@ -11,28 +11,22 @@
#include <time.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/auxv.h>
#include <sys/syscall.h>
#include <dlfcn.h>
#include <string.h>
#include <errno.h>
#include <sched.h>
#include <stdbool.h>
#include <limits.h>
#include "parse_vdso.h"
#include "vdso_config.h"
#include "vdso_call.h"
#include "kselftest.h"
static const char *version;
static const char **name;
#ifndef SYS_getcpu
# ifdef __x86_64__
# define SYS_getcpu 309
# else
# define SYS_getcpu 318
# endif
#endif
#ifndef __NR_clock_gettime64
#define __NR_clock_gettime64 403
#endif
@@ -61,6 +55,10 @@ typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
vgtod_t vdso_gettimeofday;
typedef time_t (*vtime_t)(__kernel_time_t *tloc);
vtime_t vdso_time;
typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
getcpu_t vgetcpu;
@@ -110,42 +108,39 @@ static void *vsyscall_getcpu(void)
static void fill_function_pointers(void)
{
void *vdso = dlopen("linux-vdso.so.1",
RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
if (!vdso)
vdso = dlopen("linux-gate.so.1",
RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
if (!vdso)
vdso = dlopen("linux-vdso32.so.1",
RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
if (!vdso)
vdso = dlopen("linux-vdso64.so.1",
RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
if (!vdso) {
unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
if (!sysinfo_ehdr) {
printf("[WARN]\tfailed to find vDSO\n");
return;
}
vdso_getcpu = (getcpu_t)dlsym(vdso, name[4]);
vdso_init_from_sysinfo_ehdr(sysinfo_ehdr);
vdso_getcpu = (getcpu_t)vdso_sym(version, name[4]);
if (!vdso_getcpu)
printf("Warning: failed to find getcpu in vDSO\n");
vgetcpu = (getcpu_t) vsyscall_getcpu();
vdso_clock_gettime = (vgettime_t)dlsym(vdso, name[1]);
vdso_clock_gettime = (vgettime_t)vdso_sym(version, name[1]);
if (!vdso_clock_gettime)
printf("Warning: failed to find clock_gettime in vDSO\n");
#if defined(VDSO_32BIT)
vdso_clock_gettime64 = (vgettime64_t)dlsym(vdso, name[5]);
vdso_clock_gettime64 = (vgettime64_t)vdso_sym(version, name[5]);
if (!vdso_clock_gettime64)
printf("Warning: failed to find clock_gettime64 in vDSO\n");
#endif
vdso_gettimeofday = (vgtod_t)dlsym(vdso, name[0]);
vdso_gettimeofday = (vgtod_t)vdso_sym(version, name[0]);
if (!vdso_gettimeofday)
printf("Warning: failed to find gettimeofday in vDSO\n");
vdso_time = (vtime_t)vdso_sym(version, name[2]);
if (!vdso_time)
printf("Warning: failed to find time in vDSO\n");
}
static long sys_getcpu(unsigned * cpu, unsigned * node,
@@ -169,6 +164,16 @@ static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
return syscall(__NR_gettimeofday, tv, tz);
}
static inline __kernel_old_time_t sys_time(__kernel_old_time_t *tloc)
{
#ifdef __NR_time
return syscall(__NR_time, tloc);
#else
errno = ENOSYS;
return -1;
#endif
}
static void test_getcpu(void)
{
printf("[RUN]\tTesting getcpu...\n");
@@ -412,10 +417,10 @@ static void test_gettimeofday(void)
return;
}
printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n",
(unsigned long long)start.tv_sec, start.tv_usec,
(unsigned long long)vdso.tv_sec, vdso.tv_usec,
(unsigned long long)end.tv_sec, end.tv_usec);
printf("\t%llu.%06lld %llu.%06lld %llu.%06lld\n",
(unsigned long long)start.tv_sec, (long long)start.tv_usec,
(unsigned long long)vdso.tv_sec, (long long)vdso.tv_usec,
(unsigned long long)end.tv_sec, (long long)end.tv_usec);
if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) {
printf("[FAIL]\tTimes are out of sequence\n");
@@ -435,8 +440,56 @@ static void test_gettimeofday(void)
VDSO_CALL(vdso_gettimeofday, 2, &vdso, NULL);
}
static void test_time(void)
{
__kernel_old_time_t start, end, vdso_ret, vdso_param;
if (!vdso_time)
return;
printf("[RUN]\tTesting time...\n");
if (sys_time(&start) < 0) {
if (errno == -ENOSYS) {
printf("[SKIP]\tNo time() support\n");
} else {
printf("[FAIL]\tsys_time failed (%d)\n", errno);
nerrs++;
}
return;
}
vdso_ret = VDSO_CALL(vdso_time, 1, &vdso_param);
end = sys_time(NULL);
if (vdso_ret < 0 || end < 0) {
printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
(int)vdso_ret, errno);
nerrs++;
return;
}
printf("\t%lld %lld %lld\n",
(long long)start,
(long long)vdso_ret,
(long long)end);
if (vdso_ret != vdso_param) {
printf("[FAIL]\tinconsistent return values: %lld %lld\n",
(long long)vdso_ret, (long long)vdso_param);
nerrs++;
return;
}
if (!(start <= vdso_ret) || !(vdso_ret <= end)) {
printf("[FAIL]\tTimes are out of sequence\n");
nerrs++;
}
}
int main(int argc, char **argv)
{
version = versions[VDSO_VERSION];
name = (const char **)&names[VDSO_NAMES];
fill_function_pointers();
@@ -444,6 +497,7 @@ int main(int argc, char **argv)
test_clock_gettime();
test_clock_gettime64();
test_gettimeofday();
test_time();
/*
* Test getcpu() last so that, if something goes wrong setting affinity,

View File

@@ -11,10 +11,8 @@
*/
#include <stdio.h>
#ifndef NOLIBC
#include <sys/auxv.h>
#include <sys/time.h>
#endif
#include "kselftest.h"
#include "parse_vdso.h"