mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
Merge tag 'timers-vdso-2026-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull vdso updates from Thomas Gleixner: - Make the handling of compat functions consistent and more robust - Rework the underlying data store so that it is dynamically allocated, which allows the conversion of the last holdout SPARC64 to the generic VDSO implementation - Rework the SPARC64 VDSO to utilize the generic implementation - Mop up the left overs of the non-generic VDSO support in the core code - Expand the VDSO selftest and make them more robust - Allow time namespaces to be enabled independently of the generic VDSO support, which was not possible before due to SPARC64 not using it - Various cleanups and improvements in the related code * tag 'timers-vdso-2026-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (51 commits) timens: Use task_lock guard in timens_get*() timens: Use mutex guard in proc_timens_set_offset() timens: Simplify some calls to put_time_ns() timens: Add a __free() wrapper for put_time_ns() timens: Remove dependency on the vDSO vdso/timens: Move functions to new file selftests: vDSO: vdso_test_correctness: Add a test for time() selftests: vDSO: vdso_test_correctness: Use facilities from parse_vdso.c selftests: vDSO: vdso_test_correctness: Handle different tv_usec types selftests: vDSO: vdso_test_correctness: Drop SYS_getcpu fallbacks selftests: vDSO: vdso_test_gettimeofday: Remove nolibc checks Revert "selftests: vDSO: parse_vdso: Use UAPI headers instead of libc headers" random: vDSO: Remove ifdeffery random: vDSO: Trim vDSO includes vdso/datapage: Trim down unnecessary includes vdso/datapage: Remove inclusion of gettimeofday.h vdso/helpers: Explicitly include vdso/processor.h vdso/gettimeofday: Add explicit includes random: vDSO: Add explicit includes MIPS: vdso: Explicitly include asm/vdso/vdso.h ...
This commit is contained in:
@@ -10796,6 +10796,7 @@ S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/vdso
|
||||
F: include/asm-generic/vdso/vsyscall.h
|
||||
F: include/vdso/
|
||||
F: kernel/time/namespace_vdso.c
|
||||
F: kernel/time/vsyscall.c
|
||||
F: lib/vdso/
|
||||
F: tools/testing/selftests/vDSO/
|
||||
@@ -21042,6 +21043,7 @@ F: include/trace/events/timer*
|
||||
F: kernel/time/itimer.c
|
||||
F: kernel/time/posix-*
|
||||
F: kernel/time/namespace.c
|
||||
F: kernel/time/namespace_vdso.c
|
||||
|
||||
POWER MANAGEMENT CORE
|
||||
M: "Rafael J. Wysocki" <rafael@kernel.org>
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#include <asm/errno.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/vdso/cp15.h>
|
||||
#include <vdso/clocksource.h>
|
||||
#include <vdso/time32.h>
|
||||
#include <uapi/linux/time.h>
|
||||
|
||||
#define VDSO_HAS_CLOCK_GETRES 1
|
||||
|
||||
@@ -7,6 +7,9 @@
|
||||
|
||||
#ifndef __ASSEMBLER__
|
||||
|
||||
#include <vdso/clocksource.h>
|
||||
#include <vdso/time32.h>
|
||||
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/unistd_compat_32.h>
|
||||
#include <asm/errno.h>
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
|
||||
#ifndef __ASSEMBLER__
|
||||
|
||||
#include <vdso/clocksource.h>
|
||||
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/arch_timer.h>
|
||||
#include <asm/barrier.h>
|
||||
|
||||
@@ -52,6 +52,7 @@
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/unwind.h>
|
||||
#include <asm/vdso.h>
|
||||
#include <asm/vdso/vdso.h>
|
||||
|
||||
#ifdef CONFIG_STACKPROTECTOR
|
||||
#include <linux/stackprotector.h>
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/vdso.h>
|
||||
#include <asm/vdso/vdso.h>
|
||||
#include <vdso/helpers.h>
|
||||
#include <vdso/vsyscall.h>
|
||||
#include <vdso/datapage.h>
|
||||
|
||||
@@ -4,6 +4,9 @@
|
||||
* Author: Alex Smith <alex.smith@imgtec.com>
|
||||
*/
|
||||
|
||||
#ifndef __ASM_VDSO_VDSO_H
|
||||
#define __ASM_VDSO_VDSO_H
|
||||
|
||||
#include <asm/sgidefs.h>
|
||||
#include <vdso/page.h>
|
||||
|
||||
@@ -70,3 +73,5 @@ static inline void __iomem *get_gic(const struct vdso_time_data *data)
|
||||
#endif /* CONFIG_CLKSRC_MIPS_GIC */
|
||||
|
||||
#endif /* __ASSEMBLER__ */
|
||||
|
||||
#endif /* __ASM_VDSO_VDSO_H */
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#include <asm/mips-cps.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/vdso.h>
|
||||
#include <asm/vdso/vdso.h>
|
||||
#include <vdso/helpers.h>
|
||||
#include <vdso/vsyscall.h>
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <uapi/linux/time.h>
|
||||
#include <vdso/time32.h>
|
||||
|
||||
#define VDSO_HAS_CLOCK_GETRES 1
|
||||
|
||||
|
||||
@@ -4,6 +4,9 @@
|
||||
|
||||
#ifndef __ASSEMBLER__
|
||||
|
||||
#include <asm/cputable.h>
|
||||
#include <asm/feature-fixups.h>
|
||||
|
||||
/* Macros for adjusting thread priority (hardware multi-threading) */
|
||||
#ifdef CONFIG_PPC64
|
||||
#define HMT_very_low() asm volatile("or 31, 31, 31 # very low priority")
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#undef __powerpc64__
|
||||
#include <linux/audit_arch.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/unistd_32.h>
|
||||
|
||||
#include "audit_32.h"
|
||||
|
||||
|
||||
@@ -12,8 +12,7 @@ LD_BFD := elf64-s390
|
||||
KBUILD_LDFLAGS := -m elf64_s390
|
||||
KBUILD_AFLAGS_MODULE += -fPIC
|
||||
KBUILD_CFLAGS_MODULE += -fPIC
|
||||
KBUILD_AFLAGS += -m64
|
||||
KBUILD_CFLAGS += -m64
|
||||
KBUILD_CPPFLAGS += -m64
|
||||
KBUILD_CFLAGS += -fPIC
|
||||
LDFLAGS_vmlinux := $(call ld-option,-no-pie)
|
||||
extra_tools := relocs
|
||||
|
||||
@@ -104,7 +104,6 @@ config SPARC64
|
||||
select ARCH_USE_QUEUED_RWLOCKS
|
||||
select ARCH_USE_QUEUED_SPINLOCKS
|
||||
select GENERIC_TIME_VSYSCALL
|
||||
select ARCH_CLOCKSOURCE_DATA
|
||||
select ARCH_HAS_PTE_SPECIAL
|
||||
select PCI_DOMAINS if PCI
|
||||
select ARCH_HAS_GIGANTIC_PAGE
|
||||
@@ -115,6 +114,8 @@ config SPARC64
|
||||
select ARCH_SUPPORTS_SCHED_SMT if SMP
|
||||
select ARCH_SUPPORTS_SCHED_MC if SMP
|
||||
select ARCH_HAS_LAZY_MMU_MODE
|
||||
select HAVE_GENERIC_VDSO
|
||||
select GENERIC_GETTIMEOFDAY
|
||||
|
||||
config ARCH_PROC_KCORE_TEXT
|
||||
def_bool y
|
||||
|
||||
@@ -5,13 +5,4 @@
|
||||
#ifndef _ASM_SPARC_CLOCKSOURCE_H
|
||||
#define _ASM_SPARC_CLOCKSOURCE_H
|
||||
|
||||
/* VDSO clocksources */
|
||||
#define VCLOCK_NONE 0 /* Nothing userspace can do. */
|
||||
#define VCLOCK_TICK 1 /* Use %tick. */
|
||||
#define VCLOCK_STICK 2 /* Use %stick. */
|
||||
|
||||
struct arch_clocksource_data {
|
||||
int vclock_mode;
|
||||
};
|
||||
|
||||
#endif /* _ASM_SPARC_CLOCKSOURCE_H */
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef ___ASM_SPARC_PROCESSOR_H
|
||||
#define ___ASM_SPARC_PROCESSOR_H
|
||||
|
||||
#include <asm/vdso/processor.h>
|
||||
|
||||
#if defined(__sparc__) && defined(__arch64__)
|
||||
#include <asm/processor_64.h>
|
||||
#else
|
||||
|
||||
@@ -91,8 +91,6 @@ unsigned long __get_wchan(struct task_struct *);
|
||||
extern struct task_struct *last_task_used_math;
|
||||
int do_mathemu(struct pt_regs *regs, struct task_struct *fpt);
|
||||
|
||||
#define cpu_relax() barrier()
|
||||
|
||||
extern void (*sparc_idle)(void);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -182,31 +182,6 @@ unsigned long __get_wchan(struct task_struct *task);
|
||||
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->tpc)
|
||||
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->u_regs[UREG_FP])
|
||||
|
||||
/* Please see the commentary in asm/backoff.h for a description of
|
||||
* what these instructions are doing and how they have been chosen.
|
||||
* To make a long story short, we are trying to yield the current cpu
|
||||
* strand during busy loops.
|
||||
*/
|
||||
#ifdef BUILD_VDSO
|
||||
#define cpu_relax() asm volatile("\n99:\n\t" \
|
||||
"rd %%ccr, %%g0\n\t" \
|
||||
"rd %%ccr, %%g0\n\t" \
|
||||
"rd %%ccr, %%g0\n\t" \
|
||||
::: "memory")
|
||||
#else /* ! BUILD_VDSO */
|
||||
#define cpu_relax() asm volatile("\n99:\n\t" \
|
||||
"rd %%ccr, %%g0\n\t" \
|
||||
"rd %%ccr, %%g0\n\t" \
|
||||
"rd %%ccr, %%g0\n\t" \
|
||||
".section .pause_3insn_patch,\"ax\"\n\t"\
|
||||
".word 99b\n\t" \
|
||||
"wr %%g0, 128, %%asr27\n\t" \
|
||||
"nop\n\t" \
|
||||
"nop\n\t" \
|
||||
".previous" \
|
||||
::: "memory")
|
||||
#endif
|
||||
|
||||
/* Prefetch support. This is tuned for UltraSPARC-III and later.
|
||||
* UltraSPARC-I will treat these as nops, and UltraSPARC-II has
|
||||
* a shallower prefetch queue than later chips.
|
||||
|
||||
@@ -8,8 +8,6 @@
|
||||
struct vdso_image {
|
||||
void *data;
|
||||
unsigned long size; /* Always a multiple of PAGE_SIZE */
|
||||
|
||||
long sym_vvar_start; /* Negative offset to the vvar area */
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SPARC64
|
||||
|
||||
10
arch/sparc/include/asm/vdso/clocksource.h
Normal file
10
arch/sparc/include/asm/vdso/clocksource.h
Normal file
@@ -0,0 +1,10 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __ASM_VDSO_CLOCKSOURCE_H
|
||||
#define __ASM_VDSO_CLOCKSOURCE_H
|
||||
|
||||
/* VDSO clocksources */
|
||||
#define VDSO_ARCH_CLOCKMODES \
|
||||
VDSO_CLOCKMODE_TICK, \
|
||||
VDSO_CLOCKMODE_STICK
|
||||
|
||||
#endif /* __ASM_VDSO_CLOCKSOURCE_H */
|
||||
184
arch/sparc/include/asm/vdso/gettimeofday.h
Normal file
184
arch/sparc/include/asm/vdso/gettimeofday.h
Normal file
@@ -0,0 +1,184 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright 2006 Andi Kleen, SUSE Labs.
|
||||
*/
|
||||
|
||||
#ifndef _ASM_SPARC_VDSO_GETTIMEOFDAY_H
|
||||
#define _ASM_SPARC_VDSO_GETTIMEOFDAY_H
|
||||
|
||||
#include <uapi/linux/time.h>
|
||||
#include <uapi/linux/unistd.h>
|
||||
|
||||
#include <vdso/align.h>
|
||||
#include <vdso/clocksource.h>
|
||||
#include <vdso/datapage.h>
|
||||
#include <vdso/page.h>
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#ifdef CONFIG_SPARC64
|
||||
static __always_inline u64 vread_tick(void)
|
||||
{
|
||||
u64 ret;
|
||||
|
||||
__asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __always_inline u64 vread_tick_stick(void)
|
||||
{
|
||||
u64 ret;
|
||||
|
||||
__asm__ __volatile__("rd %%asr24, %0" : "=r" (ret));
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
static __always_inline u64 vdso_shift_ns(u64 val, u32 amt)
|
||||
{
|
||||
u64 ret;
|
||||
|
||||
__asm__ __volatile__("sllx %H1, 32, %%g1\n\t"
|
||||
"srl %L1, 0, %L1\n\t"
|
||||
"or %%g1, %L1, %%g1\n\t"
|
||||
"srlx %%g1, %2, %L0\n\t"
|
||||
"srlx %L0, 32, %H0"
|
||||
: "=r" (ret)
|
||||
: "r" (val), "r" (amt)
|
||||
: "g1");
|
||||
return ret;
|
||||
}
|
||||
#define vdso_shift_ns vdso_shift_ns
|
||||
|
||||
static __always_inline u64 vread_tick(void)
|
||||
{
|
||||
register unsigned long long ret asm("o4");
|
||||
|
||||
__asm__ __volatile__("rd %%tick, %L0\n\t"
|
||||
"srlx %L0, 32, %H0"
|
||||
: "=r" (ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __always_inline u64 vread_tick_stick(void)
|
||||
{
|
||||
register unsigned long long ret asm("o4");
|
||||
|
||||
__asm__ __volatile__("rd %%asr24, %L0\n\t"
|
||||
"srlx %L0, 32, %H0"
|
||||
: "=r" (ret));
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_time_data *vd)
|
||||
{
|
||||
if (likely(clock_mode == VDSO_CLOCKMODE_STICK))
|
||||
return vread_tick_stick();
|
||||
else
|
||||
return vread_tick();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SPARC64
|
||||
#define SYSCALL_STRING \
|
||||
"ta 0x6d;" \
|
||||
"bcs,a 1f;" \
|
||||
" sub %%g0, %%o0, %%o0;" \
|
||||
"1:"
|
||||
#else
|
||||
#define SYSCALL_STRING \
|
||||
"ta 0x10;" \
|
||||
"bcs,a 1f;" \
|
||||
" sub %%g0, %%o0, %%o0;" \
|
||||
"1:"
|
||||
#endif
|
||||
|
||||
#define SYSCALL_CLOBBERS \
|
||||
"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \
|
||||
"f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \
|
||||
"f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \
|
||||
"f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \
|
||||
"f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46", \
|
||||
"f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62", \
|
||||
"cc", "memory"
|
||||
|
||||
#ifdef CONFIG_SPARC64
|
||||
|
||||
static __always_inline
|
||||
long clock_gettime_fallback(clockid_t clock, struct __kernel_timespec *ts)
|
||||
{
|
||||
register long num __asm__("g1") = __NR_clock_gettime;
|
||||
register long o0 __asm__("o0") = clock;
|
||||
register long o1 __asm__("o1") = (long) ts;
|
||||
|
||||
__asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num),
|
||||
"0" (o0), "r" (o1) : SYSCALL_CLOBBERS);
|
||||
return o0;
|
||||
}
|
||||
|
||||
#else /* !CONFIG_SPARC64 */
|
||||
|
||||
static __always_inline
|
||||
long clock_gettime_fallback(clockid_t clock, struct __kernel_timespec *ts)
|
||||
{
|
||||
register long num __asm__("g1") = __NR_clock_gettime64;
|
||||
register long o0 __asm__("o0") = clock;
|
||||
register long o1 __asm__("o1") = (long) ts;
|
||||
|
||||
__asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num),
|
||||
"0" (o0), "r" (o1) : SYSCALL_CLOBBERS);
|
||||
return o0;
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
long clock_gettime32_fallback(clockid_t clock, struct old_timespec32 *ts)
|
||||
{
|
||||
register long num __asm__("g1") = __NR_clock_gettime;
|
||||
register long o0 __asm__("o0") = clock;
|
||||
register long o1 __asm__("o1") = (long) ts;
|
||||
|
||||
__asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num),
|
||||
"0" (o0), "r" (o1) : SYSCALL_CLOBBERS);
|
||||
return o0;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SPARC64 */
|
||||
|
||||
static __always_inline
|
||||
long gettimeofday_fallback(struct __kernel_old_timeval *tv, struct timezone *tz)
|
||||
{
|
||||
register long num __asm__("g1") = __NR_gettimeofday;
|
||||
register long o0 __asm__("o0") = (long) tv;
|
||||
register long o1 __asm__("o1") = (long) tz;
|
||||
|
||||
__asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num),
|
||||
"0" (o0), "r" (o1) : SYSCALL_CLOBBERS);
|
||||
return o0;
|
||||
}
|
||||
|
||||
static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void)
|
||||
{
|
||||
unsigned long ret;
|
||||
|
||||
/*
|
||||
* SPARC does not support native PC-relative code relocations.
|
||||
* Calculate the address manually, works for 32 and 64 bit code.
|
||||
*/
|
||||
__asm__ __volatile__(
|
||||
"1:\n"
|
||||
"call 3f\n" // Jump over the embedded data and set up %o7
|
||||
"nop\n" // Delay slot
|
||||
"2:\n"
|
||||
".word vdso_u_time_data - .\n" // Embedded offset to external symbol
|
||||
"3:\n"
|
||||
"add %%o7, 2b - 1b, %%o7\n" // Point %o7 to the embedded offset
|
||||
"ldsw [%%o7], %0\n" // Load the offset
|
||||
"add %0, %%o7, %0\n" // Calculate the absolute address
|
||||
: "=r" (ret)
|
||||
:
|
||||
: "o7");
|
||||
|
||||
return (const struct vdso_time_data *)ret;
|
||||
}
|
||||
#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data
|
||||
|
||||
#endif /* _ASM_SPARC_VDSO_GETTIMEOFDAY_H */
|
||||
41
arch/sparc/include/asm/vdso/processor.h
Normal file
41
arch/sparc/include/asm/vdso/processor.h
Normal file
@@ -0,0 +1,41 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef _ASM_SPARC_VDSO_PROCESSOR_H
|
||||
#define _ASM_SPARC_VDSO_PROCESSOR_H
|
||||
|
||||
#include <linux/compiler.h>
|
||||
|
||||
#if defined(__arch64__)
|
||||
|
||||
/* Please see the commentary in asm/backoff.h for a description of
|
||||
* what these instructions are doing and how they have been chosen.
|
||||
* To make a long story short, we are trying to yield the current cpu
|
||||
* strand during busy loops.
|
||||
*/
|
||||
#ifdef BUILD_VDSO
|
||||
#define cpu_relax() asm volatile("\n99:\n\t" \
|
||||
"rd %%ccr, %%g0\n\t" \
|
||||
"rd %%ccr, %%g0\n\t" \
|
||||
"rd %%ccr, %%g0\n\t" \
|
||||
::: "memory")
|
||||
#else /* ! BUILD_VDSO */
|
||||
#define cpu_relax() asm volatile("\n99:\n\t" \
|
||||
"rd %%ccr, %%g0\n\t" \
|
||||
"rd %%ccr, %%g0\n\t" \
|
||||
"rd %%ccr, %%g0\n\t" \
|
||||
".section .pause_3insn_patch,\"ax\"\n\t"\
|
||||
".word 99b\n\t" \
|
||||
"wr %%g0, 128, %%asr27\n\t" \
|
||||
"nop\n\t" \
|
||||
"nop\n\t" \
|
||||
".previous" \
|
||||
::: "memory")
|
||||
#endif /* BUILD_VDSO */
|
||||
|
||||
#else /* ! __arch64__ */
|
||||
|
||||
#define cpu_relax() barrier()
|
||||
|
||||
#endif /* __arch64__ */
|
||||
|
||||
#endif /* _ASM_SPARC_VDSO_PROCESSOR_H */
|
||||
10
arch/sparc/include/asm/vdso/vsyscall.h
Normal file
10
arch/sparc/include/asm/vdso/vsyscall.h
Normal file
@@ -0,0 +1,10 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef _ASM_SPARC_VDSO_VSYSCALL_H
|
||||
#define _ASM_SPARC_VDSO_VSYSCALL_H
|
||||
|
||||
#define __VDSO_PAGES 4
|
||||
|
||||
#include <asm-generic/vdso/vsyscall.h>
|
||||
|
||||
#endif /* _ASM_SPARC_VDSO_VSYSCALL_H */
|
||||
@@ -1,75 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ASM_SPARC_VVAR_DATA_H
|
||||
#define _ASM_SPARC_VVAR_DATA_H
|
||||
|
||||
#include <asm/clocksource.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/barrier.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct vvar_data {
|
||||
unsigned int seq;
|
||||
|
||||
int vclock_mode;
|
||||
struct { /* extract of a clocksource struct */
|
||||
u64 cycle_last;
|
||||
u64 mask;
|
||||
int mult;
|
||||
int shift;
|
||||
} clock;
|
||||
/* open coded 'struct timespec' */
|
||||
u64 wall_time_sec;
|
||||
u64 wall_time_snsec;
|
||||
u64 monotonic_time_snsec;
|
||||
u64 monotonic_time_sec;
|
||||
u64 monotonic_time_coarse_sec;
|
||||
u64 monotonic_time_coarse_nsec;
|
||||
u64 wall_time_coarse_sec;
|
||||
u64 wall_time_coarse_nsec;
|
||||
|
||||
int tz_minuteswest;
|
||||
int tz_dsttime;
|
||||
};
|
||||
|
||||
extern struct vvar_data *vvar_data;
|
||||
extern int vdso_fix_stick;
|
||||
|
||||
static inline unsigned int vvar_read_begin(const struct vvar_data *s)
|
||||
{
|
||||
unsigned int ret;
|
||||
|
||||
repeat:
|
||||
ret = READ_ONCE(s->seq);
|
||||
if (unlikely(ret & 1)) {
|
||||
cpu_relax();
|
||||
goto repeat;
|
||||
}
|
||||
smp_rmb(); /* Finish all reads before we return seq */
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int vvar_read_retry(const struct vvar_data *s,
|
||||
unsigned int start)
|
||||
{
|
||||
smp_rmb(); /* Finish all reads before checking the value of seq */
|
||||
return unlikely(s->seq != start);
|
||||
}
|
||||
|
||||
static inline void vvar_write_begin(struct vvar_data *s)
|
||||
{
|
||||
++s->seq;
|
||||
smp_wmb(); /* Makes sure that increment of seq is reflected */
|
||||
}
|
||||
|
||||
static inline void vvar_write_end(struct vvar_data *s)
|
||||
{
|
||||
smp_wmb(); /* Makes the value of seq current before we increment */
|
||||
++s->seq;
|
||||
}
|
||||
|
||||
|
||||
#endif /* _ASM_SPARC_VVAR_DATA_H */
|
||||
@@ -41,7 +41,6 @@ obj-$(CONFIG_SPARC32) += systbls_32.o
|
||||
obj-y += time_$(BITS).o
|
||||
obj-$(CONFIG_SPARC32) += windows.o
|
||||
obj-y += cpu.o
|
||||
obj-$(CONFIG_SPARC64) += vdso.o
|
||||
obj-$(CONFIG_SPARC32) += devices.o
|
||||
obj-y += ptrace_$(BITS).o
|
||||
obj-y += unaligned_$(BITS).o
|
||||
|
||||
@@ -838,14 +838,14 @@ void __init time_init_early(void)
|
||||
if (tlb_type == spitfire) {
|
||||
if (is_hummingbird()) {
|
||||
init_tick_ops(&hbtick_operations);
|
||||
clocksource_tick.archdata.vclock_mode = VCLOCK_NONE;
|
||||
clocksource_tick.vdso_clock_mode = VDSO_CLOCKMODE_NONE;
|
||||
} else {
|
||||
init_tick_ops(&tick_operations);
|
||||
clocksource_tick.archdata.vclock_mode = VCLOCK_TICK;
|
||||
clocksource_tick.vdso_clock_mode = VDSO_CLOCKMODE_TICK;
|
||||
}
|
||||
} else {
|
||||
init_tick_ops(&stick_operations);
|
||||
clocksource_tick.archdata.vclock_mode = VCLOCK_STICK;
|
||||
clocksource_tick.vdso_clock_mode = VDSO_CLOCKMODE_STICK;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,69 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
|
||||
* Copyright 2003 Andi Kleen, SuSE Labs.
|
||||
*
|
||||
* Thanks to hpa@transmeta.com for some useful hint.
|
||||
* Special thanks to Ingo Molnar for his early experience with
|
||||
* a different vsyscall implementation for Linux/IA32 and for the name.
|
||||
*/
|
||||
|
||||
#include <linux/time.h>
|
||||
#include <linux/timekeeper_internal.h>
|
||||
|
||||
#include <asm/vvar.h>
|
||||
|
||||
void update_vsyscall_tz(void)
|
||||
{
|
||||
if (unlikely(vvar_data == NULL))
|
||||
return;
|
||||
|
||||
vvar_data->tz_minuteswest = sys_tz.tz_minuteswest;
|
||||
vvar_data->tz_dsttime = sys_tz.tz_dsttime;
|
||||
}
|
||||
|
||||
void update_vsyscall(struct timekeeper *tk)
|
||||
{
|
||||
struct vvar_data *vdata = vvar_data;
|
||||
|
||||
if (unlikely(vdata == NULL))
|
||||
return;
|
||||
|
||||
vvar_write_begin(vdata);
|
||||
vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
|
||||
vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
|
||||
vdata->clock.mask = tk->tkr_mono.mask;
|
||||
vdata->clock.mult = tk->tkr_mono.mult;
|
||||
vdata->clock.shift = tk->tkr_mono.shift;
|
||||
|
||||
vdata->wall_time_sec = tk->xtime_sec;
|
||||
vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec;
|
||||
|
||||
vdata->monotonic_time_sec = tk->xtime_sec +
|
||||
tk->wall_to_monotonic.tv_sec;
|
||||
vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec +
|
||||
(tk->wall_to_monotonic.tv_nsec <<
|
||||
tk->tkr_mono.shift);
|
||||
|
||||
while (vdata->monotonic_time_snsec >=
|
||||
(((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
|
||||
vdata->monotonic_time_snsec -=
|
||||
((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
|
||||
vdata->monotonic_time_sec++;
|
||||
}
|
||||
|
||||
vdata->wall_time_coarse_sec = tk->xtime_sec;
|
||||
vdata->wall_time_coarse_nsec =
|
||||
(long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
|
||||
|
||||
vdata->monotonic_time_coarse_sec =
|
||||
vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
|
||||
vdata->monotonic_time_coarse_nsec =
|
||||
vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
|
||||
|
||||
while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) {
|
||||
vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC;
|
||||
vdata->monotonic_time_coarse_sec++;
|
||||
}
|
||||
|
||||
vvar_write_end(vdata);
|
||||
}
|
||||
@@ -3,6 +3,9 @@
|
||||
# Building vDSO images for sparc.
|
||||
#
|
||||
|
||||
# Include the generic Makefile to check the built vDSO:
|
||||
include $(srctree)/lib/vdso/Makefile.include
|
||||
|
||||
# files to link into the vdso
|
||||
vobjs-y := vdso-note.o vclock_gettime.o
|
||||
|
||||
@@ -90,6 +93,9 @@ KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
|
||||
KBUILD_CFLAGS_32 += -mv8plus
|
||||
$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
|
||||
|
||||
CHECKFLAGS_32 := $(filter-out -m64 -D__sparc_v9__ -D__arch64__, $(CHECKFLAGS)) -m32
|
||||
$(obj)/vdso32.so.dbg: CHECKFLAGS = $(CHECKFLAGS_32)
|
||||
|
||||
$(obj)/vdso32.so.dbg: FORCE \
|
||||
$(obj)/vdso32/vdso32.lds \
|
||||
$(obj)/vdso32/vclock_gettime.o \
|
||||
@@ -102,6 +108,7 @@ $(obj)/vdso32.so.dbg: FORCE \
|
||||
quiet_cmd_vdso = VDSO $@
|
||||
cmd_vdso = $(LD) -nostdlib -o $@ \
|
||||
$(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
|
||||
-T $(filter %.lds,$^) $(filter %.o,$^)
|
||||
-T $(filter %.lds,$^) $(filter %.o,$^); \
|
||||
$(cmd_vdso_check)
|
||||
|
||||
VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 -Bsymbolic --no-undefined
|
||||
VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 -Bsymbolic --no-undefined -z noexecstack
|
||||
|
||||
@@ -12,382 +12,48 @@
|
||||
* Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/string.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/timex.h>
|
||||
#include <asm/clocksource.h>
|
||||
#include <asm/vvar.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <vdso/gettime.h>
|
||||
|
||||
#include <asm/vdso/gettimeofday.h>
|
||||
|
||||
#include "../../../../lib/vdso/gettimeofday.c"
|
||||
|
||||
int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
|
||||
{
|
||||
return __cvdso_gettimeofday(tv, tz);
|
||||
}
|
||||
|
||||
int gettimeofday(struct __kernel_old_timeval *, struct timezone *)
|
||||
__weak __alias(__vdso_gettimeofday);
|
||||
|
||||
#if defined(CONFIG_SPARC64)
|
||||
int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
|
||||
{
|
||||
return __cvdso_clock_gettime(clock, ts);
|
||||
}
|
||||
|
||||
int clock_gettime(clockid_t, struct __kernel_timespec *)
|
||||
__weak __alias(__vdso_clock_gettime);
|
||||
|
||||
#ifdef CONFIG_SPARC64
|
||||
#define SYSCALL_STRING \
|
||||
"ta 0x6d;" \
|
||||
"bcs,a 1f;" \
|
||||
" sub %%g0, %%o0, %%o0;" \
|
||||
"1:"
|
||||
#else
|
||||
#define SYSCALL_STRING \
|
||||
"ta 0x10;" \
|
||||
"bcs,a 1f;" \
|
||||
" sub %%g0, %%o0, %%o0;" \
|
||||
"1:"
|
||||
|
||||
int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts)
|
||||
{
|
||||
return __cvdso_clock_gettime32(clock, ts);
|
||||
}
|
||||
|
||||
int clock_gettime(clockid_t, struct old_timespec32 *)
|
||||
__weak __alias(__vdso_clock_gettime);
|
||||
|
||||
int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts)
|
||||
{
|
||||
return __cvdso_clock_gettime(clock, ts);
|
||||
}
|
||||
|
||||
int clock_gettime64(clockid_t, struct __kernel_timespec *)
|
||||
__weak __alias(__vdso_clock_gettime64);
|
||||
|
||||
#endif
|
||||
|
||||
#define SYSCALL_CLOBBERS \
|
||||
"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \
|
||||
"f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \
|
||||
"f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \
|
||||
"f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \
|
||||
"f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46", \
|
||||
"f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62", \
|
||||
"cc", "memory"
|
||||
|
||||
/*
|
||||
* Compute the vvar page's address in the process address space, and return it
|
||||
* as a pointer to the vvar_data.
|
||||
*/
|
||||
notrace static __always_inline struct vvar_data *get_vvar_data(void)
|
||||
{
|
||||
unsigned long ret;
|
||||
|
||||
/*
|
||||
* vdso data page is the first vDSO page so grab the PC
|
||||
* and move up a page to get to the data page.
|
||||
*/
|
||||
__asm__("rd %%pc, %0" : "=r" (ret));
|
||||
ret &= ~(8192 - 1);
|
||||
ret -= 8192;
|
||||
|
||||
return (struct vvar_data *) ret;
|
||||
}
|
||||
|
||||
notrace static long vdso_fallback_gettime(long clock, struct __kernel_old_timespec *ts)
|
||||
{
|
||||
register long num __asm__("g1") = __NR_clock_gettime;
|
||||
register long o0 __asm__("o0") = clock;
|
||||
register long o1 __asm__("o1") = (long) ts;
|
||||
|
||||
__asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num),
|
||||
"0" (o0), "r" (o1) : SYSCALL_CLOBBERS);
|
||||
return o0;
|
||||
}
|
||||
|
||||
notrace static long vdso_fallback_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
|
||||
{
|
||||
register long num __asm__("g1") = __NR_gettimeofday;
|
||||
register long o0 __asm__("o0") = (long) tv;
|
||||
register long o1 __asm__("o1") = (long) tz;
|
||||
|
||||
__asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num),
|
||||
"0" (o0), "r" (o1) : SYSCALL_CLOBBERS);
|
||||
return o0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SPARC64
|
||||
notrace static __always_inline u64 __shr64(u64 val, int amt)
|
||||
{
|
||||
return val >> amt;
|
||||
}
|
||||
|
||||
notrace static __always_inline u64 vread_tick(void)
|
||||
{
|
||||
u64 ret;
|
||||
|
||||
__asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
notrace static __always_inline u64 vread_tick_stick(void)
|
||||
{
|
||||
u64 ret;
|
||||
|
||||
__asm__ __volatile__("rd %%asr24, %0" : "=r" (ret));
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
notrace static __always_inline u64 __shr64(u64 val, int amt)
|
||||
{
|
||||
u64 ret;
|
||||
|
||||
__asm__ __volatile__("sllx %H1, 32, %%g1\n\t"
|
||||
"srl %L1, 0, %L1\n\t"
|
||||
"or %%g1, %L1, %%g1\n\t"
|
||||
"srlx %%g1, %2, %L0\n\t"
|
||||
"srlx %L0, 32, %H0"
|
||||
: "=r" (ret)
|
||||
: "r" (val), "r" (amt)
|
||||
: "g1");
|
||||
return ret;
|
||||
}
|
||||
|
||||
notrace static __always_inline u64 vread_tick(void)
|
||||
{
|
||||
register unsigned long long ret asm("o4");
|
||||
|
||||
__asm__ __volatile__("rd %%tick, %L0\n\t"
|
||||
"srlx %L0, 32, %H0"
|
||||
: "=r" (ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
notrace static __always_inline u64 vread_tick_stick(void)
|
||||
{
|
||||
register unsigned long long ret asm("o4");
|
||||
|
||||
__asm__ __volatile__("rd %%asr24, %L0\n\t"
|
||||
"srlx %L0, 32, %H0"
|
||||
: "=r" (ret));
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
notrace static __always_inline u64 vgetsns(struct vvar_data *vvar)
|
||||
{
|
||||
u64 v;
|
||||
u64 cycles;
|
||||
|
||||
cycles = vread_tick();
|
||||
v = (cycles - vvar->clock.cycle_last) & vvar->clock.mask;
|
||||
return v * vvar->clock.mult;
|
||||
}
|
||||
|
||||
notrace static __always_inline u64 vgetsns_stick(struct vvar_data *vvar)
|
||||
{
|
||||
u64 v;
|
||||
u64 cycles;
|
||||
|
||||
cycles = vread_tick_stick();
|
||||
v = (cycles - vvar->clock.cycle_last) & vvar->clock.mask;
|
||||
return v * vvar->clock.mult;
|
||||
}
|
||||
|
||||
notrace static __always_inline int do_realtime(struct vvar_data *vvar,
|
||||
struct __kernel_old_timespec *ts)
|
||||
{
|
||||
unsigned long seq;
|
||||
u64 ns;
|
||||
|
||||
do {
|
||||
seq = vvar_read_begin(vvar);
|
||||
ts->tv_sec = vvar->wall_time_sec;
|
||||
ns = vvar->wall_time_snsec;
|
||||
ns += vgetsns(vvar);
|
||||
ns = __shr64(ns, vvar->clock.shift);
|
||||
} while (unlikely(vvar_read_retry(vvar, seq)));
|
||||
|
||||
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
|
||||
ts->tv_nsec = ns;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
notrace static __always_inline int do_realtime_stick(struct vvar_data *vvar,
|
||||
struct __kernel_old_timespec *ts)
|
||||
{
|
||||
unsigned long seq;
|
||||
u64 ns;
|
||||
|
||||
do {
|
||||
seq = vvar_read_begin(vvar);
|
||||
ts->tv_sec = vvar->wall_time_sec;
|
||||
ns = vvar->wall_time_snsec;
|
||||
ns += vgetsns_stick(vvar);
|
||||
ns = __shr64(ns, vvar->clock.shift);
|
||||
} while (unlikely(vvar_read_retry(vvar, seq)));
|
||||
|
||||
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
|
||||
ts->tv_nsec = ns;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
notrace static __always_inline int do_monotonic(struct vvar_data *vvar,
|
||||
struct __kernel_old_timespec *ts)
|
||||
{
|
||||
unsigned long seq;
|
||||
u64 ns;
|
||||
|
||||
do {
|
||||
seq = vvar_read_begin(vvar);
|
||||
ts->tv_sec = vvar->monotonic_time_sec;
|
||||
ns = vvar->monotonic_time_snsec;
|
||||
ns += vgetsns(vvar);
|
||||
ns = __shr64(ns, vvar->clock.shift);
|
||||
} while (unlikely(vvar_read_retry(vvar, seq)));
|
||||
|
||||
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
|
||||
ts->tv_nsec = ns;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
notrace static __always_inline int do_monotonic_stick(struct vvar_data *vvar,
|
||||
struct __kernel_old_timespec *ts)
|
||||
{
|
||||
unsigned long seq;
|
||||
u64 ns;
|
||||
|
||||
do {
|
||||
seq = vvar_read_begin(vvar);
|
||||
ts->tv_sec = vvar->monotonic_time_sec;
|
||||
ns = vvar->monotonic_time_snsec;
|
||||
ns += vgetsns_stick(vvar);
|
||||
ns = __shr64(ns, vvar->clock.shift);
|
||||
} while (unlikely(vvar_read_retry(vvar, seq)));
|
||||
|
||||
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
|
||||
ts->tv_nsec = ns;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
notrace static int do_realtime_coarse(struct vvar_data *vvar,
|
||||
struct __kernel_old_timespec *ts)
|
||||
{
|
||||
unsigned long seq;
|
||||
|
||||
do {
|
||||
seq = vvar_read_begin(vvar);
|
||||
ts->tv_sec = vvar->wall_time_coarse_sec;
|
||||
ts->tv_nsec = vvar->wall_time_coarse_nsec;
|
||||
} while (unlikely(vvar_read_retry(vvar, seq)));
|
||||
return 0;
|
||||
}
|
||||
|
||||
notrace static int do_monotonic_coarse(struct vvar_data *vvar,
|
||||
struct __kernel_old_timespec *ts)
|
||||
{
|
||||
unsigned long seq;
|
||||
|
||||
do {
|
||||
seq = vvar_read_begin(vvar);
|
||||
ts->tv_sec = vvar->monotonic_time_coarse_sec;
|
||||
ts->tv_nsec = vvar->monotonic_time_coarse_nsec;
|
||||
} while (unlikely(vvar_read_retry(vvar, seq)));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
notrace int
|
||||
__vdso_clock_gettime(clockid_t clock, struct __kernel_old_timespec *ts)
|
||||
{
|
||||
struct vvar_data *vvd = get_vvar_data();
|
||||
|
||||
switch (clock) {
|
||||
case CLOCK_REALTIME:
|
||||
if (unlikely(vvd->vclock_mode == VCLOCK_NONE))
|
||||
break;
|
||||
return do_realtime(vvd, ts);
|
||||
case CLOCK_MONOTONIC:
|
||||
if (unlikely(vvd->vclock_mode == VCLOCK_NONE))
|
||||
break;
|
||||
return do_monotonic(vvd, ts);
|
||||
case CLOCK_REALTIME_COARSE:
|
||||
return do_realtime_coarse(vvd, ts);
|
||||
case CLOCK_MONOTONIC_COARSE:
|
||||
return do_monotonic_coarse(vvd, ts);
|
||||
}
|
||||
/*
|
||||
* Unknown clock ID ? Fall back to the syscall.
|
||||
*/
|
||||
return vdso_fallback_gettime(clock, ts);
|
||||
}
|
||||
int
|
||||
clock_gettime(clockid_t, struct __kernel_old_timespec *)
|
||||
__attribute__((weak, alias("__vdso_clock_gettime")));
|
||||
|
||||
notrace int
|
||||
__vdso_clock_gettime_stick(clockid_t clock, struct __kernel_old_timespec *ts)
|
||||
{
|
||||
struct vvar_data *vvd = get_vvar_data();
|
||||
|
||||
switch (clock) {
|
||||
case CLOCK_REALTIME:
|
||||
if (unlikely(vvd->vclock_mode == VCLOCK_NONE))
|
||||
break;
|
||||
return do_realtime_stick(vvd, ts);
|
||||
case CLOCK_MONOTONIC:
|
||||
if (unlikely(vvd->vclock_mode == VCLOCK_NONE))
|
||||
break;
|
||||
return do_monotonic_stick(vvd, ts);
|
||||
case CLOCK_REALTIME_COARSE:
|
||||
return do_realtime_coarse(vvd, ts);
|
||||
case CLOCK_MONOTONIC_COARSE:
|
||||
return do_monotonic_coarse(vvd, ts);
|
||||
}
|
||||
/*
|
||||
* Unknown clock ID ? Fall back to the syscall.
|
||||
*/
|
||||
return vdso_fallback_gettime(clock, ts);
|
||||
}
|
||||
|
||||
notrace int
|
||||
__vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
|
||||
{
|
||||
struct vvar_data *vvd = get_vvar_data();
|
||||
|
||||
if (likely(vvd->vclock_mode != VCLOCK_NONE)) {
|
||||
if (likely(tv != NULL)) {
|
||||
union tstv_t {
|
||||
struct __kernel_old_timespec ts;
|
||||
struct __kernel_old_timeval tv;
|
||||
} *tstv = (union tstv_t *) tv;
|
||||
do_realtime(vvd, &tstv->ts);
|
||||
/*
|
||||
* Assign before dividing to ensure that the division is
|
||||
* done in the type of tv_usec, not tv_nsec.
|
||||
*
|
||||
* There cannot be > 1 billion usec in a second:
|
||||
* do_realtime() has already distributed such overflow
|
||||
* into tv_sec. So we can assign it to an int safely.
|
||||
*/
|
||||
tstv->tv.tv_usec = tstv->ts.tv_nsec;
|
||||
tstv->tv.tv_usec /= 1000;
|
||||
}
|
||||
if (unlikely(tz != NULL)) {
|
||||
/* Avoid memcpy. Some old compilers fail to inline it */
|
||||
tz->tz_minuteswest = vvd->tz_minuteswest;
|
||||
tz->tz_dsttime = vvd->tz_dsttime;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
return vdso_fallback_gettimeofday(tv, tz);
|
||||
}
|
||||
int
|
||||
gettimeofday(struct __kernel_old_timeval *, struct timezone *)
|
||||
__attribute__((weak, alias("__vdso_gettimeofday")));
|
||||
|
||||
notrace int
|
||||
__vdso_gettimeofday_stick(struct __kernel_old_timeval *tv, struct timezone *tz)
|
||||
{
|
||||
struct vvar_data *vvd = get_vvar_data();
|
||||
|
||||
if (likely(vvd->vclock_mode != VCLOCK_NONE)) {
|
||||
if (likely(tv != NULL)) {
|
||||
union tstv_t {
|
||||
struct __kernel_old_timespec ts;
|
||||
struct __kernel_old_timeval tv;
|
||||
} *tstv = (union tstv_t *) tv;
|
||||
do_realtime_stick(vvd, &tstv->ts);
|
||||
/*
|
||||
* Assign before dividing to ensure that the division is
|
||||
* done in the type of tv_usec, not tv_nsec.
|
||||
*
|
||||
* There cannot be > 1 billion usec in a second:
|
||||
* do_realtime() has already distributed such overflow
|
||||
* into tv_sec. So we can assign it to an int safely.
|
||||
*/
|
||||
tstv->tv.tv_usec = tstv->ts.tv_nsec;
|
||||
tstv->tv.tv_usec /= 1000;
|
||||
}
|
||||
if (unlikely(tz != NULL)) {
|
||||
/* Avoid memcpy. Some old compilers fail to inline it */
|
||||
tz->tz_minuteswest = vvd->tz_minuteswest;
|
||||
tz->tz_dsttime = vvd->tz_dsttime;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
return vdso_fallback_gettimeofday(tv, tz);
|
||||
}
|
||||
|
||||
@@ -4,15 +4,9 @@
|
||||
* This script controls its layout.
|
||||
*/
|
||||
|
||||
#if defined(BUILD_VDSO64)
|
||||
# define SHDR_SIZE 64
|
||||
#elif defined(BUILD_VDSO32)
|
||||
# define SHDR_SIZE 40
|
||||
#else
|
||||
# error unknown VDSO target
|
||||
#endif
|
||||
|
||||
#define NUM_FAKE_SHDRS 7
|
||||
#include <vdso/datapage.h>
|
||||
#include <vdso/page.h>
|
||||
#include <asm/vdso/vsyscall.h>
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
@@ -23,8 +17,7 @@ SECTIONS
|
||||
* segment. Page size is 8192 for both 64-bit and 32-bit vdso binaries
|
||||
*/
|
||||
|
||||
vvar_start = . -8192;
|
||||
vvar_data = vvar_start;
|
||||
VDSO_VVAR_SYMS
|
||||
|
||||
. = SIZEOF_HEADERS;
|
||||
|
||||
@@ -47,19 +40,8 @@ SECTIONS
|
||||
*(.bss*)
|
||||
*(.dynbss*)
|
||||
*(.gnu.linkonce.b.*)
|
||||
|
||||
/*
|
||||
* Ideally this would live in a C file: kept in here for
|
||||
* compatibility with x86-64.
|
||||
*/
|
||||
VDSO_FAKE_SECTION_TABLE_START = .;
|
||||
. = . + NUM_FAKE_SHDRS * SHDR_SIZE;
|
||||
VDSO_FAKE_SECTION_TABLE_END = .;
|
||||
} :text
|
||||
|
||||
.fake_shstrtab : { *(.fake_shstrtab) } :text
|
||||
|
||||
|
||||
.note : { *(.note.*) } :text :note
|
||||
|
||||
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
|
||||
|
||||
@@ -18,10 +18,8 @@ VERSION {
|
||||
global:
|
||||
clock_gettime;
|
||||
__vdso_clock_gettime;
|
||||
__vdso_clock_gettime_stick;
|
||||
gettimeofday;
|
||||
__vdso_gettimeofday;
|
||||
__vdso_gettimeofday_stick;
|
||||
local: *;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -58,28 +58,6 @@
|
||||
|
||||
const char *outfilename;
|
||||
|
||||
/* Symbols that we need in vdso2c. */
|
||||
enum {
|
||||
sym_vvar_start,
|
||||
sym_VDSO_FAKE_SECTION_TABLE_START,
|
||||
sym_VDSO_FAKE_SECTION_TABLE_END,
|
||||
};
|
||||
|
||||
struct vdso_sym {
|
||||
const char *name;
|
||||
int export;
|
||||
};
|
||||
|
||||
struct vdso_sym required_syms[] = {
|
||||
[sym_vvar_start] = {"vvar_start", 1},
|
||||
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
|
||||
"VDSO_FAKE_SECTION_TABLE_START", 0
|
||||
},
|
||||
[sym_VDSO_FAKE_SECTION_TABLE_END] = {
|
||||
"VDSO_FAKE_SECTION_TABLE_END", 0
|
||||
},
|
||||
};
|
||||
|
||||
__attribute__((format(printf, 1, 2))) __attribute__((noreturn))
|
||||
static void fail(const char *format, ...)
|
||||
{
|
||||
@@ -119,8 +97,6 @@ static void fail(const char *format, ...)
|
||||
#define PUT_BE(x, val) \
|
||||
PBE(x, val, 64, PBE(x, val, 32, PBE(x, val, 16, LAST_PBE(x, val))))
|
||||
|
||||
#define NSYMS ARRAY_SIZE(required_syms)
|
||||
|
||||
#define BITSFUNC3(name, bits, suffix) name##bits##suffix
|
||||
#define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix)
|
||||
#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, )
|
||||
|
||||
@@ -17,11 +17,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
|
||||
unsigned long mapping_size;
|
||||
int i;
|
||||
unsigned long j;
|
||||
ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr;
|
||||
ELF(Shdr) *symtab_hdr = NULL;
|
||||
ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
|
||||
ELF(Dyn) *dyn = 0, *dyn_end = 0;
|
||||
INT_BITS syms[NSYMS] = {};
|
||||
|
||||
ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_BE(&hdr->e_phoff));
|
||||
|
||||
/* Walk the segment table. */
|
||||
@@ -72,42 +70,6 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
|
||||
if (!symtab_hdr)
|
||||
fail("no symbol table\n");
|
||||
|
||||
strtab_hdr = raw_addr + GET_BE(&hdr->e_shoff) +
|
||||
GET_BE(&hdr->e_shentsize) * GET_BE(&symtab_hdr->sh_link);
|
||||
|
||||
/* Walk the symbol table */
|
||||
for (i = 0;
|
||||
i < GET_BE(&symtab_hdr->sh_size) / GET_BE(&symtab_hdr->sh_entsize);
|
||||
i++) {
|
||||
int k;
|
||||
|
||||
ELF(Sym) *sym = raw_addr + GET_BE(&symtab_hdr->sh_offset) +
|
||||
GET_BE(&symtab_hdr->sh_entsize) * i;
|
||||
const char *name = raw_addr + GET_BE(&strtab_hdr->sh_offset) +
|
||||
GET_BE(&sym->st_name);
|
||||
|
||||
for (k = 0; k < NSYMS; k++) {
|
||||
if (!strcmp(name, required_syms[k].name)) {
|
||||
if (syms[k]) {
|
||||
fail("duplicate symbol %s\n",
|
||||
required_syms[k].name);
|
||||
}
|
||||
|
||||
/*
|
||||
* Careful: we use negative addresses, but
|
||||
* st_value is unsigned, so we rely
|
||||
* on syms[k] being a signed type of the
|
||||
* correct width.
|
||||
*/
|
||||
syms[k] = GET_BE(&sym->st_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate mapping addresses. */
|
||||
if (syms[sym_vvar_start] % 8192)
|
||||
fail("vvar_begin must be a multiple of 8192\n");
|
||||
|
||||
if (!name) {
|
||||
fwrite(stripped_addr, stripped_len, 1, outfile);
|
||||
return;
|
||||
@@ -133,10 +95,5 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
|
||||
fprintf(outfile, "const struct vdso_image %s_builtin = {\n", name);
|
||||
fprintf(outfile, "\t.data = raw_data,\n");
|
||||
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
|
||||
for (i = 0; i < NSYMS; i++) {
|
||||
if (required_syms[i].export && syms[i])
|
||||
fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n",
|
||||
required_syms[i].name, (int64_t)syms[i]);
|
||||
}
|
||||
fprintf(outfile, "};\n");
|
||||
}
|
||||
|
||||
@@ -17,10 +17,10 @@ VERSION {
|
||||
global:
|
||||
clock_gettime;
|
||||
__vdso_clock_gettime;
|
||||
__vdso_clock_gettime_stick;
|
||||
clock_gettime64;
|
||||
__vdso_clock_gettime64;
|
||||
gettimeofday;
|
||||
__vdso_gettimeofday;
|
||||
__vdso_gettimeofday_stick;
|
||||
local: *;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -16,17 +16,16 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/elf.h>
|
||||
#include <linux/vdso_datastore.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/spitfire.h>
|
||||
#include <asm/vdso.h>
|
||||
#include <asm/vvar.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
unsigned int __read_mostly vdso_enabled = 1;
|
||||
#include <vdso/datapage.h>
|
||||
#include <asm/vdso/vsyscall.h>
|
||||
|
||||
static struct vm_special_mapping vvar_mapping = {
|
||||
.name = "[vvar]"
|
||||
};
|
||||
unsigned int __read_mostly vdso_enabled = 1;
|
||||
|
||||
#ifdef CONFIG_SPARC64
|
||||
static struct vm_special_mapping vdso_mapping64 = {
|
||||
@@ -40,207 +39,8 @@ static struct vm_special_mapping vdso_mapping32 = {
|
||||
};
|
||||
#endif
|
||||
|
||||
struct vvar_data *vvar_data;
|
||||
|
||||
struct vdso_elfinfo32 {
|
||||
Elf32_Ehdr *hdr;
|
||||
Elf32_Sym *dynsym;
|
||||
unsigned long dynsymsize;
|
||||
const char *dynstr;
|
||||
unsigned long text;
|
||||
};
|
||||
|
||||
struct vdso_elfinfo64 {
|
||||
Elf64_Ehdr *hdr;
|
||||
Elf64_Sym *dynsym;
|
||||
unsigned long dynsymsize;
|
||||
const char *dynstr;
|
||||
unsigned long text;
|
||||
};
|
||||
|
||||
struct vdso_elfinfo {
|
||||
union {
|
||||
struct vdso_elfinfo32 elf32;
|
||||
struct vdso_elfinfo64 elf64;
|
||||
} u;
|
||||
};
|
||||
|
||||
static void *one_section64(struct vdso_elfinfo64 *e, const char *name,
|
||||
unsigned long *size)
|
||||
{
|
||||
const char *snames;
|
||||
Elf64_Shdr *shdrs;
|
||||
unsigned int i;
|
||||
|
||||
shdrs = (void *)e->hdr + e->hdr->e_shoff;
|
||||
snames = (void *)e->hdr + shdrs[e->hdr->e_shstrndx].sh_offset;
|
||||
for (i = 1; i < e->hdr->e_shnum; i++) {
|
||||
if (!strcmp(snames+shdrs[i].sh_name, name)) {
|
||||
if (size)
|
||||
*size = shdrs[i].sh_size;
|
||||
return (void *)e->hdr + shdrs[i].sh_offset;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int find_sections64(const struct vdso_image *image, struct vdso_elfinfo *_e)
|
||||
{
|
||||
struct vdso_elfinfo64 *e = &_e->u.elf64;
|
||||
|
||||
e->hdr = image->data;
|
||||
e->dynsym = one_section64(e, ".dynsym", &e->dynsymsize);
|
||||
e->dynstr = one_section64(e, ".dynstr", NULL);
|
||||
|
||||
if (!e->dynsym || !e->dynstr) {
|
||||
pr_err("VDSO64: Missing symbol sections.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static Elf64_Sym *find_sym64(const struct vdso_elfinfo64 *e, const char *name)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < (e->dynsymsize / sizeof(Elf64_Sym)); i++) {
|
||||
Elf64_Sym *s = &e->dynsym[i];
|
||||
if (s->st_name == 0)
|
||||
continue;
|
||||
if (!strcmp(e->dynstr + s->st_name, name))
|
||||
return s;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int patchsym64(struct vdso_elfinfo *_e, const char *orig,
|
||||
const char *new)
|
||||
{
|
||||
struct vdso_elfinfo64 *e = &_e->u.elf64;
|
||||
Elf64_Sym *osym = find_sym64(e, orig);
|
||||
Elf64_Sym *nsym = find_sym64(e, new);
|
||||
|
||||
if (!nsym || !osym) {
|
||||
pr_err("VDSO64: Missing symbols.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
osym->st_value = nsym->st_value;
|
||||
osym->st_size = nsym->st_size;
|
||||
osym->st_info = nsym->st_info;
|
||||
osym->st_other = nsym->st_other;
|
||||
osym->st_shndx = nsym->st_shndx;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *one_section32(struct vdso_elfinfo32 *e, const char *name,
|
||||
unsigned long *size)
|
||||
{
|
||||
const char *snames;
|
||||
Elf32_Shdr *shdrs;
|
||||
unsigned int i;
|
||||
|
||||
shdrs = (void *)e->hdr + e->hdr->e_shoff;
|
||||
snames = (void *)e->hdr + shdrs[e->hdr->e_shstrndx].sh_offset;
|
||||
for (i = 1; i < e->hdr->e_shnum; i++) {
|
||||
if (!strcmp(snames+shdrs[i].sh_name, name)) {
|
||||
if (size)
|
||||
*size = shdrs[i].sh_size;
|
||||
return (void *)e->hdr + shdrs[i].sh_offset;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int find_sections32(const struct vdso_image *image, struct vdso_elfinfo *_e)
|
||||
{
|
||||
struct vdso_elfinfo32 *e = &_e->u.elf32;
|
||||
|
||||
e->hdr = image->data;
|
||||
e->dynsym = one_section32(e, ".dynsym", &e->dynsymsize);
|
||||
e->dynstr = one_section32(e, ".dynstr", NULL);
|
||||
|
||||
if (!e->dynsym || !e->dynstr) {
|
||||
pr_err("VDSO32: Missing symbol sections.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static Elf32_Sym *find_sym32(const struct vdso_elfinfo32 *e, const char *name)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < (e->dynsymsize / sizeof(Elf32_Sym)); i++) {
|
||||
Elf32_Sym *s = &e->dynsym[i];
|
||||
if (s->st_name == 0)
|
||||
continue;
|
||||
if (!strcmp(e->dynstr + s->st_name, name))
|
||||
return s;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int patchsym32(struct vdso_elfinfo *_e, const char *orig,
|
||||
const char *new)
|
||||
{
|
||||
struct vdso_elfinfo32 *e = &_e->u.elf32;
|
||||
Elf32_Sym *osym = find_sym32(e, orig);
|
||||
Elf32_Sym *nsym = find_sym32(e, new);
|
||||
|
||||
if (!nsym || !osym) {
|
||||
pr_err("VDSO32: Missing symbols.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
osym->st_value = nsym->st_value;
|
||||
osym->st_size = nsym->st_size;
|
||||
osym->st_info = nsym->st_info;
|
||||
osym->st_other = nsym->st_other;
|
||||
osym->st_shndx = nsym->st_shndx;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int find_sections(const struct vdso_image *image, struct vdso_elfinfo *e,
|
||||
bool elf64)
|
||||
{
|
||||
if (elf64)
|
||||
return find_sections64(image, e);
|
||||
else
|
||||
return find_sections32(image, e);
|
||||
}
|
||||
|
||||
static int patch_one_symbol(struct vdso_elfinfo *e, const char *orig,
|
||||
const char *new_target, bool elf64)
|
||||
{
|
||||
if (elf64)
|
||||
return patchsym64(e, orig, new_target);
|
||||
else
|
||||
return patchsym32(e, orig, new_target);
|
||||
}
|
||||
|
||||
static int stick_patch(const struct vdso_image *image, struct vdso_elfinfo *e, bool elf64)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = find_sections(image, e, elf64);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = patch_one_symbol(e,
|
||||
"__vdso_gettimeofday",
|
||||
"__vdso_gettimeofday_stick", elf64);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return patch_one_symbol(e,
|
||||
"__vdso_clock_gettime",
|
||||
"__vdso_clock_gettime_stick", elf64);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate pages for the vdso and vvar, and copy in the vdso text from the
|
||||
* Allocate pages for the vdso and copy in the vdso text from the
|
||||
* kernel image.
|
||||
*/
|
||||
static int __init init_vdso_image(const struct vdso_image *image,
|
||||
@@ -248,16 +48,8 @@ static int __init init_vdso_image(const struct vdso_image *image,
|
||||
bool elf64)
|
||||
{
|
||||
int cnpages = (image->size) / PAGE_SIZE;
|
||||
struct page *dp, **dpp = NULL;
|
||||
struct page *cp, **cpp = NULL;
|
||||
struct vdso_elfinfo ei;
|
||||
int i, dnpages = 0;
|
||||
|
||||
if (tlb_type != spitfire) {
|
||||
int err = stick_patch(image, &ei, elf64);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
int i;
|
||||
|
||||
/*
|
||||
* First, the vdso text. This is initialied data, an integral number of
|
||||
@@ -280,31 +72,6 @@ static int __init init_vdso_image(const struct vdso_image *image,
|
||||
copy_page(page_address(cp), image->data + i * PAGE_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now the vvar page. This is uninitialized data.
|
||||
*/
|
||||
|
||||
if (vvar_data == NULL) {
|
||||
dnpages = (sizeof(struct vvar_data) / PAGE_SIZE) + 1;
|
||||
if (WARN_ON(dnpages != 1))
|
||||
goto oom;
|
||||
dpp = kzalloc_objs(struct page *, dnpages);
|
||||
vvar_mapping.pages = dpp;
|
||||
|
||||
if (!dpp)
|
||||
goto oom;
|
||||
|
||||
dp = alloc_page(GFP_KERNEL);
|
||||
if (!dp)
|
||||
goto oom;
|
||||
|
||||
dpp[0] = dp;
|
||||
vvar_data = page_address(dp);
|
||||
memset(vvar_data, 0, PAGE_SIZE);
|
||||
|
||||
vvar_data->seq = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
oom:
|
||||
if (cpp != NULL) {
|
||||
@@ -316,15 +83,6 @@ static int __init init_vdso_image(const struct vdso_image *image,
|
||||
vdso_mapping->pages = NULL;
|
||||
}
|
||||
|
||||
if (dpp != NULL) {
|
||||
for (i = 0; i < dnpages; i++) {
|
||||
if (dpp[i] != NULL)
|
||||
__free_page(dpp[i]);
|
||||
}
|
||||
kfree(dpp);
|
||||
vvar_mapping.pages = NULL;
|
||||
}
|
||||
|
||||
pr_warn("Cannot allocate vdso\n");
|
||||
vdso_enabled = 0;
|
||||
return -ENOMEM;
|
||||
@@ -359,9 +117,12 @@ static unsigned long vdso_addr(unsigned long start, unsigned int len)
|
||||
return start + (offset << PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static_assert(VDSO_NR_PAGES == __VDSO_PAGES);
|
||||
|
||||
static int map_vdso(const struct vdso_image *image,
|
||||
struct vm_special_mapping *vdso_mapping)
|
||||
{
|
||||
const size_t area_size = image->size + VDSO_NR_PAGES * PAGE_SIZE;
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long text_start, addr = 0;
|
||||
@@ -374,23 +135,20 @@ static int map_vdso(const struct vdso_image *image,
|
||||
* region is free.
|
||||
*/
|
||||
if (current->flags & PF_RANDOMIZE) {
|
||||
addr = get_unmapped_area(NULL, 0,
|
||||
image->size - image->sym_vvar_start,
|
||||
0, 0);
|
||||
addr = get_unmapped_area(NULL, 0, area_size, 0, 0);
|
||||
if (IS_ERR_VALUE(addr)) {
|
||||
ret = addr;
|
||||
goto up_fail;
|
||||
}
|
||||
addr = vdso_addr(addr, image->size - image->sym_vvar_start);
|
||||
addr = vdso_addr(addr, area_size);
|
||||
}
|
||||
addr = get_unmapped_area(NULL, addr,
|
||||
image->size - image->sym_vvar_start, 0, 0);
|
||||
addr = get_unmapped_area(NULL, addr, area_size, 0, 0);
|
||||
if (IS_ERR_VALUE(addr)) {
|
||||
ret = addr;
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
text_start = addr - image->sym_vvar_start;
|
||||
text_start = addr + VDSO_NR_PAGES * PAGE_SIZE;
|
||||
current->mm->context.vdso = (void __user *)text_start;
|
||||
|
||||
/*
|
||||
@@ -408,11 +166,7 @@ static int map_vdso(const struct vdso_image *image,
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
vma = _install_special_mapping(mm,
|
||||
addr,
|
||||
-image->sym_vvar_start,
|
||||
VM_READ|VM_MAYREAD,
|
||||
&vvar_mapping);
|
||||
vma = vdso_install_vvar_mapping(mm, addr);
|
||||
|
||||
if (IS_ERR(vma)) {
|
||||
ret = PTR_ERR(vma);
|
||||
|
||||
@@ -15,6 +15,10 @@ flags-y := -DBUILD_VDSO32 -m32 -mregparm=0
|
||||
flags-$(CONFIG_X86_64) += -include $(src)/fake_32bit_build.h
|
||||
flags-remove-y := -m64
|
||||
|
||||
# Checker flags
|
||||
CHECKFLAGS := $(subst -m64,-m32,$(CHECKFLAGS))
|
||||
CHECKFLAGS := $(subst -D__x86_64__,-D__i386__,$(CHECKFLAGS))
|
||||
|
||||
# The location of this include matters!
|
||||
include $(src)/../common/Makefile.include
|
||||
|
||||
|
||||
@@ -56,11 +56,7 @@
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <crypto/chacha.h>
|
||||
#include <crypto/blake2s.h>
|
||||
#ifdef CONFIG_VDSO_GETRANDOM
|
||||
#include <vdso/getrandom.h>
|
||||
#include <vdso/datapage.h>
|
||||
#include <vdso/vsyscall.h>
|
||||
#endif
|
||||
#include <asm/archrandom.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/irq.h>
|
||||
@@ -269,7 +265,7 @@ static void crng_reseed(struct work_struct *work)
|
||||
if (next_gen == ULONG_MAX)
|
||||
++next_gen;
|
||||
WRITE_ONCE(base_crng.generation, next_gen);
|
||||
#ifdef CONFIG_VDSO_GETRANDOM
|
||||
|
||||
/* base_crng.generation's invalid value is ULONG_MAX, while
|
||||
* vdso_k_rng_data->generation's invalid value is 0, so add one to the
|
||||
* former to arrive at the latter. Use smp_store_release so that this
|
||||
@@ -283,8 +279,9 @@ static void crng_reseed(struct work_struct *work)
|
||||
* because the vDSO side only checks whether the value changed, without
|
||||
* actually using or interpreting the value.
|
||||
*/
|
||||
smp_store_release((unsigned long *)&vdso_k_rng_data->generation, next_gen + 1);
|
||||
#endif
|
||||
if (IS_ENABLED(CONFIG_VDSO_GETRANDOM))
|
||||
smp_store_release((unsigned long *)&vdso_k_rng_data->generation, next_gen + 1);
|
||||
|
||||
if (!static_branch_likely(&crng_is_ready))
|
||||
crng_init = CRNG_READY;
|
||||
spin_unlock_irqrestore(&base_crng.lock, flags);
|
||||
@@ -734,9 +731,8 @@ static void __cold _credit_init_bits(size_t bits)
|
||||
if (system_dfl_wq)
|
||||
queue_work(system_dfl_wq, &set_ready);
|
||||
atomic_notifier_call_chain(&random_ready_notifier, 0, NULL);
|
||||
#ifdef CONFIG_VDSO_GETRANDOM
|
||||
WRITE_ONCE(vdso_k_rng_data->is_ready, true);
|
||||
#endif
|
||||
if (IS_ENABLED(CONFIG_VDSO_GETRANDOM))
|
||||
WRITE_ONCE(vdso_k_rng_data->is_ready, true);
|
||||
wake_up_interruptible(&crng_init_wait);
|
||||
kill_fasync(&fasync, SIGIO, POLL_IN);
|
||||
pr_notice("crng init done\n");
|
||||
|
||||
@@ -19,6 +19,15 @@
|
||||
#error Inconsistent word size. Check asm/bitsperlong.h
|
||||
#endif
|
||||
|
||||
#if __CHAR_BIT__ * __SIZEOF_LONG__ != __BITS_PER_LONG
|
||||
#error Inconsistent word size. Check asm/bitsperlong.h
|
||||
#endif
|
||||
|
||||
#ifndef __ASSEMBLER__
|
||||
_Static_assert(sizeof(long) * 8 == __BITS_PER_LONG,
|
||||
"Inconsistent word size. Check asm/bitsperlong.h");
|
||||
#endif
|
||||
|
||||
#ifndef BITS_PER_LONG_LONG
|
||||
#define BITS_PER_LONG_LONG 64
|
||||
#endif
|
||||
|
||||
@@ -25,8 +25,7 @@ struct clocksource_base;
|
||||
struct clocksource;
|
||||
struct module;
|
||||
|
||||
#if defined(CONFIG_ARCH_CLOCKSOURCE_DATA) || \
|
||||
defined(CONFIG_GENERIC_GETTIMEOFDAY)
|
||||
#if defined(CONFIG_GENERIC_GETTIMEOFDAY)
|
||||
#include <asm/clocksource.h>
|
||||
#endif
|
||||
|
||||
@@ -103,9 +102,6 @@ struct clocksource {
|
||||
u32 shift;
|
||||
u64 max_idle_ns;
|
||||
u32 maxadj;
|
||||
#ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
|
||||
struct arch_clocksource_data archdata;
|
||||
#endif
|
||||
u64 max_cycles;
|
||||
u64 max_raw_delta;
|
||||
const char *name;
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <linux/ns_common.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/time64.h>
|
||||
#include <linux/cleanup.h>
|
||||
|
||||
struct user_namespace;
|
||||
extern struct user_namespace init_user_ns;
|
||||
@@ -25,7 +26,9 @@ struct time_namespace {
|
||||
struct ucounts *ucounts;
|
||||
struct ns_common ns;
|
||||
struct timens_offsets offsets;
|
||||
#ifdef CONFIG_TIME_NS_VDSO
|
||||
struct page *vvar_page;
|
||||
#endif
|
||||
/* If set prevents changing offsets after any task joined namespace. */
|
||||
bool frozen_offsets;
|
||||
} __randomize_layout;
|
||||
@@ -38,9 +41,6 @@ static inline struct time_namespace *to_time_ns(struct ns_common *ns)
|
||||
return container_of(ns, struct time_namespace, ns);
|
||||
}
|
||||
void __init time_ns_init(void);
|
||||
extern int vdso_join_timens(struct task_struct *task,
|
||||
struct time_namespace *ns);
|
||||
extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns);
|
||||
|
||||
static inline struct time_namespace *get_time_ns(struct time_namespace *ns)
|
||||
{
|
||||
@@ -53,7 +53,6 @@ struct time_namespace *copy_time_ns(u64 flags,
|
||||
struct time_namespace *old_ns);
|
||||
void free_time_ns(struct time_namespace *ns);
|
||||
void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk);
|
||||
struct page *find_timens_vvar_page(struct vm_area_struct *vma);
|
||||
|
||||
static inline void put_time_ns(struct time_namespace *ns)
|
||||
{
|
||||
@@ -117,17 +116,6 @@ static inline void __init time_ns_init(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int vdso_join_timens(struct task_struct *task,
|
||||
struct time_namespace *ns)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void timens_commit(struct task_struct *tsk,
|
||||
struct time_namespace *ns)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct time_namespace *get_time_ns(struct time_namespace *ns)
|
||||
{
|
||||
return NULL;
|
||||
@@ -154,11 +142,6 @@ static inline void timens_on_fork(struct nsproxy *nsproxy,
|
||||
return;
|
||||
}
|
||||
|
||||
static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void timens_add_monotonic(struct timespec64 *ts) { }
|
||||
static inline void timens_add_boottime(struct timespec64 *ts) { }
|
||||
|
||||
@@ -175,4 +158,20 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TIME_NS_VDSO
|
||||
extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns);
|
||||
struct page *find_timens_vvar_page(struct vm_area_struct *vma);
|
||||
#else /* !CONFIG_TIME_NS_VDSO */
|
||||
static inline void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif /* CONFIG_TIME_NS_VDSO */
|
||||
|
||||
DEFINE_FREE(time_ns, struct time_namespace *, if (_T) put_time_ns(_T))
|
||||
|
||||
#endif /* _LINUX_TIMENS_H */
|
||||
|
||||
@@ -2,9 +2,15 @@
|
||||
#ifndef _LINUX_VDSO_DATASTORE_H
|
||||
#define _LINUX_VDSO_DATASTORE_H
|
||||
|
||||
#ifdef CONFIG_HAVE_GENERIC_VDSO
|
||||
#include <linux/mm_types.h>
|
||||
|
||||
extern const struct vm_special_mapping vdso_vvar_mapping;
|
||||
struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned long addr);
|
||||
|
||||
void __init vdso_setup_data_pages(void);
|
||||
#else /* !CONFIG_HAVE_GENERIC_VDSO */
|
||||
static inline void vdso_setup_data_pages(void) { }
|
||||
#endif /* CONFIG_HAVE_GENERIC_VDSO */
|
||||
|
||||
#endif /* _LINUX_VDSO_DATASTORE_H */
|
||||
|
||||
@@ -4,24 +4,16 @@
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <uapi/linux/bits.h>
|
||||
#include <uapi/linux/time.h>
|
||||
#include <uapi/linux/types.h>
|
||||
#include <uapi/asm-generic/errno-base.h>
|
||||
|
||||
#include <vdso/align.h>
|
||||
#include <vdso/bits.h>
|
||||
#include <vdso/cache.h>
|
||||
#include <vdso/clocksource.h>
|
||||
#include <vdso/ktime.h>
|
||||
#include <vdso/limits.h>
|
||||
#include <vdso/math64.h>
|
||||
#include <vdso/page.h>
|
||||
#include <vdso/processor.h>
|
||||
#include <vdso/time.h>
|
||||
#include <vdso/time32.h>
|
||||
#include <vdso/time64.h>
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_VDSO_TIME_DATA
|
||||
#include <asm/vdso/time_data.h>
|
||||
@@ -80,8 +72,8 @@ struct vdso_timestamp {
|
||||
* @mask: clocksource mask
|
||||
* @mult: clocksource multiplier
|
||||
* @shift: clocksource shift
|
||||
* @basetime[clock_id]: basetime per clock_id
|
||||
* @offset[clock_id]: time namespace offset per clock_id
|
||||
* @basetime: basetime per clock_id
|
||||
* @offset: time namespace offset per clock_id
|
||||
*
|
||||
* See also struct vdso_time_data for basic access and ordering information as
|
||||
* struct vdso_clock is used there.
|
||||
@@ -184,17 +176,6 @@ enum vdso_pages {
|
||||
VDSO_NR_PAGES
|
||||
};
|
||||
|
||||
/*
|
||||
* The generic vDSO implementation requires that gettimeofday.h
|
||||
* provides:
|
||||
* - __arch_get_hw_counter(): to get the hw counter based on the
|
||||
* clock_mode.
|
||||
* - gettimeofday_fallback(): fallback for gettimeofday.
|
||||
* - clock_gettime_fallback(): fallback for clock_gettime.
|
||||
* - clock_getres_fallback(): fallback for clock_getres.
|
||||
*/
|
||||
#include <asm/vdso/gettimeofday.h>
|
||||
|
||||
#else /* !__ASSEMBLY__ */
|
||||
|
||||
#ifdef CONFIG_VDSO_GETRANDOM
|
||||
|
||||
@@ -6,6 +6,13 @@
|
||||
|
||||
#include <asm/barrier.h>
|
||||
#include <vdso/datapage.h>
|
||||
#include <vdso/processor.h>
|
||||
#include <vdso/clocksource.h>
|
||||
|
||||
static __always_inline bool vdso_is_timens_clock(const struct vdso_clock *vc)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS;
|
||||
}
|
||||
|
||||
static __always_inline u32 vdso_read_begin(const struct vdso_clock *vc)
|
||||
{
|
||||
@@ -18,6 +25,28 @@ static __always_inline u32 vdso_read_begin(const struct vdso_clock *vc)
|
||||
return seq;
|
||||
}
|
||||
|
||||
/*
|
||||
* Variant of vdso_read_begin() to handle VDSO_CLOCKMODE_TIMENS.
|
||||
*
|
||||
* Time namespace enabled tasks have a special VVAR page installed which has
|
||||
* vc->seq set to 1 and vc->clock_mode set to VDSO_CLOCKMODE_TIMENS. For non
|
||||
* time namespace affected tasks this does not affect performance because if
|
||||
* vc->seq is odd, i.e. a concurrent update is in progress the extra check for
|
||||
* vc->clock_mode is just a few extra instructions while spin waiting for
|
||||
* vc->seq to become even again.
|
||||
*/
|
||||
static __always_inline bool vdso_read_begin_timens(const struct vdso_clock *vc, u32 *seq)
|
||||
{
|
||||
while (unlikely((*seq = READ_ONCE(vc->seq)) & 1)) {
|
||||
if (vdso_is_timens_clock(vc))
|
||||
return true;
|
||||
cpu_relax();
|
||||
}
|
||||
smp_rmb();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static __always_inline u32 vdso_read_retry(const struct vdso_clock *vc,
|
||||
u32 start)
|
||||
{
|
||||
@@ -25,7 +54,7 @@ static __always_inline u32 vdso_read_retry(const struct vdso_clock *vc,
|
||||
|
||||
smp_rmb();
|
||||
seq = READ_ONCE(vc->seq);
|
||||
return seq != start;
|
||||
return unlikely(seq != start);
|
||||
}
|
||||
|
||||
static __always_inline void vdso_write_seq_begin(struct vdso_clock *vc)
|
||||
|
||||
@@ -1400,12 +1400,14 @@ config UTS_NS
|
||||
|
||||
config TIME_NS
|
||||
bool "TIME namespace"
|
||||
depends on GENERIC_GETTIMEOFDAY
|
||||
default y
|
||||
help
|
||||
In this namespace boottime and monotonic clocks can be set.
|
||||
The time will keep going with the same pace.
|
||||
|
||||
config TIME_NS_VDSO
|
||||
def_bool TIME_NS && GENERIC_GETTIMEOFDAY
|
||||
|
||||
config IPC_NS
|
||||
bool "IPC namespace"
|
||||
depends on (SYSVIPC || POSIX_MQUEUE)
|
||||
|
||||
@@ -106,6 +106,7 @@
|
||||
#include <linux/ptdump.h>
|
||||
#include <linux/time_namespace.h>
|
||||
#include <linux/unaligned.h>
|
||||
#include <linux/vdso_datastore.h>
|
||||
#include <net/net_namespace.h>
|
||||
|
||||
#include <asm/io.h>
|
||||
@@ -1127,6 +1128,7 @@ void start_kernel(void)
|
||||
srcu_init();
|
||||
hrtimers_init();
|
||||
softirq_init();
|
||||
vdso_setup_data_pages();
|
||||
timekeeping_init();
|
||||
time_init();
|
||||
|
||||
|
||||
@@ -9,10 +9,6 @@
|
||||
config CLOCKSOURCE_WATCHDOG
|
||||
bool
|
||||
|
||||
# Architecture has extra clocksource data
|
||||
config ARCH_CLOCKSOURCE_DATA
|
||||
bool
|
||||
|
||||
# Architecture has extra clocksource init called from registration
|
||||
config ARCH_CLOCKSOURCE_INIT
|
||||
bool
|
||||
|
||||
@@ -30,5 +30,6 @@ obj-$(CONFIG_GENERIC_GETTIMEOFDAY) += vsyscall.o
|
||||
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
|
||||
obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
|
||||
obj-$(CONFIG_TIME_NS) += namespace.o
|
||||
obj-$(CONFIG_TIME_NS_VDSO) += namespace_vdso.o
|
||||
obj-$(CONFIG_TEST_CLOCKSOURCE_WATCHDOG) += clocksource-wdtest.o
|
||||
obj-$(CONFIG_TIME_KUNIT_TEST) += time_test.o
|
||||
|
||||
@@ -18,8 +18,9 @@
|
||||
#include <linux/cred.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cleanup.h>
|
||||
|
||||
#include <vdso/datapage.h>
|
||||
#include "namespace_internal.h"
|
||||
|
||||
ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim,
|
||||
struct timens_offsets *ns_offsets)
|
||||
@@ -93,8 +94,8 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns,
|
||||
if (!ns)
|
||||
goto fail_dec;
|
||||
|
||||
ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
|
||||
if (!ns->vvar_page)
|
||||
err = timens_vdso_alloc_vvar_page(ns);
|
||||
if (err)
|
||||
goto fail_free;
|
||||
|
||||
err = ns_common_init(ns);
|
||||
@@ -109,7 +110,7 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns,
|
||||
return ns;
|
||||
|
||||
fail_free_page:
|
||||
__free_page(ns->vvar_page);
|
||||
timens_vdso_free_vvar_page(ns);
|
||||
fail_free:
|
||||
kfree(ns);
|
||||
fail_dec:
|
||||
@@ -138,117 +139,7 @@ struct time_namespace *copy_time_ns(u64 flags,
|
||||
return clone_time_ns(user_ns, old_ns);
|
||||
}
|
||||
|
||||
static struct timens_offset offset_from_ts(struct timespec64 off)
|
||||
{
|
||||
struct timens_offset ret;
|
||||
|
||||
ret.sec = off.tv_sec;
|
||||
ret.nsec = off.tv_nsec;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* A time namespace VVAR page has the same layout as the VVAR page which
|
||||
* contains the system wide VDSO data.
|
||||
*
|
||||
* For a normal task the VVAR pages are installed in the normal ordering:
|
||||
* VVAR
|
||||
* PVCLOCK
|
||||
* HVCLOCK
|
||||
* TIMENS <- Not really required
|
||||
*
|
||||
* Now for a timens task the pages are installed in the following order:
|
||||
* TIMENS
|
||||
* PVCLOCK
|
||||
* HVCLOCK
|
||||
* VVAR
|
||||
*
|
||||
* The check for vdso_clock->clock_mode is in the unlikely path of
|
||||
* the seq begin magic. So for the non-timens case most of the time
|
||||
* 'seq' is even, so the branch is not taken.
|
||||
*
|
||||
* If 'seq' is odd, i.e. a concurrent update is in progress, the extra check
|
||||
* for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the
|
||||
* update to finish and for 'seq' to become even anyway.
|
||||
*
|
||||
* Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which
|
||||
* enforces the time namespace handling path.
|
||||
*/
|
||||
static void timens_setup_vdso_clock_data(struct vdso_clock *vc,
|
||||
struct time_namespace *ns)
|
||||
{
|
||||
struct timens_offset *offset = vc->offset;
|
||||
struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic);
|
||||
struct timens_offset boottime = offset_from_ts(ns->offsets.boottime);
|
||||
|
||||
vc->seq = 1;
|
||||
vc->clock_mode = VDSO_CLOCKMODE_TIMENS;
|
||||
offset[CLOCK_MONOTONIC] = monotonic;
|
||||
offset[CLOCK_MONOTONIC_RAW] = monotonic;
|
||||
offset[CLOCK_MONOTONIC_COARSE] = monotonic;
|
||||
offset[CLOCK_BOOTTIME] = boottime;
|
||||
offset[CLOCK_BOOTTIME_ALARM] = boottime;
|
||||
}
|
||||
|
||||
struct page *find_timens_vvar_page(struct vm_area_struct *vma)
|
||||
{
|
||||
if (likely(vma->vm_mm == current->mm))
|
||||
return current->nsproxy->time_ns->vvar_page;
|
||||
|
||||
/*
|
||||
* VM_PFNMAP | VM_IO protect .fault() handler from being called
|
||||
* through interfaces like /proc/$pid/mem or
|
||||
* process_vm_{readv,writev}() as long as there's no .access()
|
||||
* in special_mapping_vmops().
|
||||
* For more details check_vma_flags() and __access_remote_vm()
|
||||
*/
|
||||
|
||||
WARN(1, "vvar_page accessed remotely");
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Protects possibly multiple offsets writers racing each other
|
||||
* and tasks entering the namespace.
|
||||
*/
|
||||
static DEFINE_MUTEX(offset_lock);
|
||||
|
||||
static void timens_set_vvar_page(struct task_struct *task,
|
||||
struct time_namespace *ns)
|
||||
{
|
||||
struct vdso_time_data *vdata;
|
||||
struct vdso_clock *vc;
|
||||
unsigned int i;
|
||||
|
||||
if (ns == &init_time_ns)
|
||||
return;
|
||||
|
||||
/* Fast-path, taken by every task in namespace except the first. */
|
||||
if (likely(ns->frozen_offsets))
|
||||
return;
|
||||
|
||||
mutex_lock(&offset_lock);
|
||||
/* Nothing to-do: vvar_page has been already initialized. */
|
||||
if (ns->frozen_offsets)
|
||||
goto out;
|
||||
|
||||
ns->frozen_offsets = true;
|
||||
vdata = page_address(ns->vvar_page);
|
||||
vc = vdata->clock_data;
|
||||
|
||||
for (i = 0; i < CS_BASES; i++)
|
||||
timens_setup_vdso_clock_data(&vc[i], ns);
|
||||
|
||||
if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) {
|
||||
for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++)
|
||||
timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns);
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&offset_lock);
|
||||
}
|
||||
DEFINE_MUTEX(timens_offset_lock);
|
||||
|
||||
void free_time_ns(struct time_namespace *ns)
|
||||
{
|
||||
@@ -256,41 +147,39 @@ void free_time_ns(struct time_namespace *ns)
|
||||
dec_time_namespaces(ns->ucounts);
|
||||
put_user_ns(ns->user_ns);
|
||||
ns_common_free(ns);
|
||||
__free_page(ns->vvar_page);
|
||||
timens_vdso_free_vvar_page(ns);
|
||||
/* Concurrent nstree traversal depends on a grace period. */
|
||||
kfree_rcu(ns, ns.ns_rcu);
|
||||
}
|
||||
|
||||
static struct ns_common *timens_get(struct task_struct *task)
|
||||
{
|
||||
struct time_namespace *ns = NULL;
|
||||
struct time_namespace *ns;
|
||||
struct nsproxy *nsproxy;
|
||||
|
||||
task_lock(task);
|
||||
guard(task_lock)(task);
|
||||
nsproxy = task->nsproxy;
|
||||
if (nsproxy) {
|
||||
ns = nsproxy->time_ns;
|
||||
get_time_ns(ns);
|
||||
}
|
||||
task_unlock(task);
|
||||
if (!nsproxy)
|
||||
return NULL;
|
||||
|
||||
return ns ? &ns->ns : NULL;
|
||||
ns = nsproxy->time_ns;
|
||||
get_time_ns(ns);
|
||||
return &ns->ns;
|
||||
}
|
||||
|
||||
static struct ns_common *timens_for_children_get(struct task_struct *task)
|
||||
{
|
||||
struct time_namespace *ns = NULL;
|
||||
struct time_namespace *ns;
|
||||
struct nsproxy *nsproxy;
|
||||
|
||||
task_lock(task);
|
||||
guard(task_lock)(task);
|
||||
nsproxy = task->nsproxy;
|
||||
if (nsproxy) {
|
||||
ns = nsproxy->time_ns_for_children;
|
||||
get_time_ns(ns);
|
||||
}
|
||||
task_unlock(task);
|
||||
if (!nsproxy)
|
||||
return NULL;
|
||||
|
||||
return ns ? &ns->ns : NULL;
|
||||
ns = nsproxy->time_ns_for_children;
|
||||
get_time_ns(ns);
|
||||
return &ns->ns;
|
||||
}
|
||||
|
||||
static void timens_put(struct ns_common *ns)
|
||||
@@ -298,12 +187,6 @@ static void timens_put(struct ns_common *ns)
|
||||
put_time_ns(to_time_ns(ns));
|
||||
}
|
||||
|
||||
void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
|
||||
{
|
||||
timens_set_vvar_page(tsk, ns);
|
||||
vdso_join_timens(tsk, ns);
|
||||
}
|
||||
|
||||
static int timens_install(struct nsset *nsset, struct ns_common *new)
|
||||
{
|
||||
struct nsproxy *nsproxy = nsset->nsproxy;
|
||||
@@ -367,36 +250,33 @@ static void show_offset(struct seq_file *m, int clockid, struct timespec64 *ts)
|
||||
|
||||
void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m)
|
||||
{
|
||||
struct ns_common *ns;
|
||||
struct time_namespace *time_ns;
|
||||
struct time_namespace *time_ns __free(time_ns) = NULL;
|
||||
struct ns_common *ns = timens_for_children_get(p);
|
||||
|
||||
ns = timens_for_children_get(p);
|
||||
if (!ns)
|
||||
return;
|
||||
|
||||
time_ns = to_time_ns(ns);
|
||||
|
||||
show_offset(m, CLOCK_MONOTONIC, &time_ns->offsets.monotonic);
|
||||
show_offset(m, CLOCK_BOOTTIME, &time_ns->offsets.boottime);
|
||||
put_time_ns(time_ns);
|
||||
}
|
||||
|
||||
int proc_timens_set_offset(struct file *file, struct task_struct *p,
|
||||
struct proc_timens_offset *offsets, int noffsets)
|
||||
{
|
||||
struct ns_common *ns;
|
||||
struct time_namespace *time_ns;
|
||||
struct time_namespace *time_ns __free(time_ns) = NULL;
|
||||
struct ns_common *ns = timens_for_children_get(p);
|
||||
struct timespec64 tp;
|
||||
int i, err;
|
||||
int i;
|
||||
|
||||
ns = timens_for_children_get(p);
|
||||
if (!ns)
|
||||
return -ESRCH;
|
||||
|
||||
time_ns = to_time_ns(ns);
|
||||
|
||||
if (!file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME)) {
|
||||
put_time_ns(time_ns);
|
||||
if (!file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME))
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
for (i = 0; i < noffsets; i++) {
|
||||
struct proc_timens_offset *off = &offsets[i];
|
||||
@@ -409,15 +289,12 @@ int proc_timens_set_offset(struct file *file, struct task_struct *p,
|
||||
ktime_get_boottime_ts64(&tp);
|
||||
break;
|
||||
default:
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = -ERANGE;
|
||||
|
||||
if (off->val.tv_sec > KTIME_SEC_MAX ||
|
||||
off->val.tv_sec < -KTIME_SEC_MAX)
|
||||
goto out;
|
||||
return -ERANGE;
|
||||
|
||||
tp = timespec64_add(tp, off->val);
|
||||
/*
|
||||
@@ -425,16 +302,13 @@ int proc_timens_set_offset(struct file *file, struct task_struct *p,
|
||||
* still unreachable.
|
||||
*/
|
||||
if (tp.tv_sec < 0 || tp.tv_sec > KTIME_SEC_MAX / 2)
|
||||
goto out;
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
mutex_lock(&offset_lock);
|
||||
if (time_ns->frozen_offsets) {
|
||||
err = -EACCES;
|
||||
goto out_unlock;
|
||||
}
|
||||
guard(mutex)(&timens_offset_lock);
|
||||
if (time_ns->frozen_offsets)
|
||||
return -EACCES;
|
||||
|
||||
err = 0;
|
||||
/* Don't report errors after this line */
|
||||
for (i = 0; i < noffsets; i++) {
|
||||
struct proc_timens_offset *off = &offsets[i];
|
||||
@@ -452,12 +326,7 @@ int proc_timens_set_offset(struct file *file, struct task_struct *p,
|
||||
*offset = off->val;
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&offset_lock);
|
||||
out:
|
||||
put_time_ns(time_ns);
|
||||
|
||||
return err;
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct proc_ns_operations timens_operations = {
|
||||
|
||||
28
kernel/time/namespace_internal.h
Normal file
28
kernel/time/namespace_internal.h
Normal file
@@ -0,0 +1,28 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _TIME_NAMESPACE_INTERNAL_H
|
||||
#define _TIME_NAMESPACE_INTERNAL_H
|
||||
|
||||
#include <linux/mutex.h>
|
||||
|
||||
struct time_namespace;
|
||||
|
||||
/*
|
||||
* Protects possibly multiple offsets writers racing each other
|
||||
* and tasks entering the namespace.
|
||||
*/
|
||||
extern struct mutex timens_offset_lock;
|
||||
|
||||
#ifdef CONFIG_TIME_NS_VDSO
|
||||
int timens_vdso_alloc_vvar_page(struct time_namespace *ns);
|
||||
void timens_vdso_free_vvar_page(struct time_namespace *ns);
|
||||
#else /* !CONFIG_TIME_NS_VDSO */
|
||||
static inline int timens_vdso_alloc_vvar_page(struct time_namespace *ns)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void timens_vdso_free_vvar_page(struct time_namespace *ns)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_TIME_NS_VDSO */
|
||||
|
||||
#endif /* _TIME_NAMESPACE_INTERNAL_H */
|
||||
160
kernel/time/namespace_vdso.c
Normal file
160
kernel/time/namespace_vdso.c
Normal file
@@ -0,0 +1,160 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Author: Andrei Vagin <avagin@openvz.org>
|
||||
* Author: Dmitry Safonov <dima@arista.com>
|
||||
*/
|
||||
|
||||
#include <linux/cleanup.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/time_namespace.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/vdso_datastore.h>
|
||||
|
||||
#include <vdso/clocksource.h>
|
||||
#include <vdso/datapage.h>
|
||||
|
||||
#include "namespace_internal.h"
|
||||
|
||||
static struct timens_offset offset_from_ts(struct timespec64 off)
|
||||
{
|
||||
struct timens_offset ret;
|
||||
|
||||
ret.sec = off.tv_sec;
|
||||
ret.nsec = off.tv_nsec;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* A time namespace VVAR page has the same layout as the VVAR page which
|
||||
* contains the system wide VDSO data.
|
||||
*
|
||||
* For a normal task the VVAR pages are installed in the normal ordering:
|
||||
* VVAR
|
||||
* PVCLOCK
|
||||
* HVCLOCK
|
||||
* TIMENS <- Not really required
|
||||
*
|
||||
* Now for a timens task the pages are installed in the following order:
|
||||
* TIMENS
|
||||
* PVCLOCK
|
||||
* HVCLOCK
|
||||
* VVAR
|
||||
*
|
||||
* The check for vdso_clock->clock_mode is in the unlikely path of
|
||||
* the seq begin magic. So for the non-timens case most of the time
|
||||
* 'seq' is even, so the branch is not taken.
|
||||
*
|
||||
* If 'seq' is odd, i.e. a concurrent update is in progress, the extra check
|
||||
* for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the
|
||||
* update to finish and for 'seq' to become even anyway.
|
||||
*
|
||||
* Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which
|
||||
* enforces the time namespace handling path.
|
||||
*/
|
||||
static void timens_setup_vdso_clock_data(struct vdso_clock *vc,
|
||||
struct time_namespace *ns)
|
||||
{
|
||||
struct timens_offset *offset = vc->offset;
|
||||
struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic);
|
||||
struct timens_offset boottime = offset_from_ts(ns->offsets.boottime);
|
||||
|
||||
vc->seq = 1;
|
||||
vc->clock_mode = VDSO_CLOCKMODE_TIMENS;
|
||||
offset[CLOCK_MONOTONIC] = monotonic;
|
||||
offset[CLOCK_MONOTONIC_RAW] = monotonic;
|
||||
offset[CLOCK_MONOTONIC_COARSE] = monotonic;
|
||||
offset[CLOCK_BOOTTIME] = boottime;
|
||||
offset[CLOCK_BOOTTIME_ALARM] = boottime;
|
||||
}
|
||||
|
||||
struct page *find_timens_vvar_page(struct vm_area_struct *vma)
|
||||
{
|
||||
if (likely(vma->vm_mm == current->mm))
|
||||
return current->nsproxy->time_ns->vvar_page;
|
||||
|
||||
/*
|
||||
* VM_PFNMAP | VM_IO protect .fault() handler from being called
|
||||
* through interfaces like /proc/$pid/mem or
|
||||
* process_vm_{readv,writev}() as long as there's no .access()
|
||||
* in special_mapping_vmops().
|
||||
* For more details check_vma_flags() and __access_remote_vm()
|
||||
*/
|
||||
|
||||
WARN(1, "vvar_page accessed remotely");
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void timens_set_vvar_page(struct task_struct *task,
|
||||
struct time_namespace *ns)
|
||||
{
|
||||
struct vdso_time_data *vdata;
|
||||
struct vdso_clock *vc;
|
||||
unsigned int i;
|
||||
|
||||
if (ns == &init_time_ns)
|
||||
return;
|
||||
|
||||
/* Fast-path, taken by every task in namespace except the first. */
|
||||
if (likely(ns->frozen_offsets))
|
||||
return;
|
||||
|
||||
guard(mutex)(&timens_offset_lock);
|
||||
/* Nothing to-do: vvar_page has been already initialized. */
|
||||
if (ns->frozen_offsets)
|
||||
return;
|
||||
|
||||
ns->frozen_offsets = true;
|
||||
vdata = page_address(ns->vvar_page);
|
||||
vc = vdata->clock_data;
|
||||
|
||||
for (i = 0; i < CS_BASES; i++)
|
||||
timens_setup_vdso_clock_data(&vc[i], ns);
|
||||
|
||||
if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) {
|
||||
for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++)
|
||||
timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The vvar page layout depends on whether a task belongs to the root or
|
||||
* non-root time namespace. Whenever a task changes its namespace, the VVAR
|
||||
* page tables are cleared and then they will be re-faulted with a
|
||||
* corresponding layout.
|
||||
* See also the comment near timens_setup_vdso_clock_data() for details.
|
||||
*/
|
||||
static int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
|
||||
{
|
||||
struct mm_struct *mm = task->mm;
|
||||
struct vm_area_struct *vma;
|
||||
VMA_ITERATOR(vmi, mm, 0);
|
||||
|
||||
guard(mmap_read_lock)(mm);
|
||||
for_each_vma(vmi, vma) {
|
||||
if (vma_is_special_mapping(vma, &vdso_vvar_mapping))
|
||||
zap_vma_pages(vma);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
|
||||
{
|
||||
timens_set_vvar_page(tsk, ns);
|
||||
vdso_join_timens(tsk, ns);
|
||||
}
|
||||
|
||||
int timens_vdso_alloc_vvar_page(struct time_namespace *ns)
|
||||
{
|
||||
ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
|
||||
if (!ns->vvar_page)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void timens_vdso_free_vvar_page(struct time_namespace *ns)
|
||||
{
|
||||
__free_page(ns->vvar_page);
|
||||
}
|
||||
@@ -1,64 +1,92 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/mmap_lock.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/time_namespace.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/vdso_datastore.h>
|
||||
#include <vdso/datapage.h>
|
||||
|
||||
/*
|
||||
* The vDSO data page.
|
||||
*/
|
||||
static u8 vdso_initdata[VDSO_NR_PAGES * PAGE_SIZE] __aligned(PAGE_SIZE) __initdata = {};
|
||||
|
||||
#ifdef CONFIG_GENERIC_GETTIMEOFDAY
|
||||
static union {
|
||||
struct vdso_time_data data;
|
||||
u8 page[PAGE_SIZE];
|
||||
} vdso_time_data_store __page_aligned_data;
|
||||
struct vdso_time_data *vdso_k_time_data = &vdso_time_data_store.data;
|
||||
static_assert(sizeof(vdso_time_data_store) == PAGE_SIZE);
|
||||
struct vdso_time_data *vdso_k_time_data __refdata =
|
||||
(void *)&vdso_initdata[VDSO_TIME_PAGE_OFFSET * PAGE_SIZE];
|
||||
|
||||
static_assert(sizeof(struct vdso_time_data) <= PAGE_SIZE);
|
||||
#endif /* CONFIG_GENERIC_GETTIMEOFDAY */
|
||||
|
||||
#ifdef CONFIG_VDSO_GETRANDOM
|
||||
static union {
|
||||
struct vdso_rng_data data;
|
||||
u8 page[PAGE_SIZE];
|
||||
} vdso_rng_data_store __page_aligned_data;
|
||||
struct vdso_rng_data *vdso_k_rng_data = &vdso_rng_data_store.data;
|
||||
static_assert(sizeof(vdso_rng_data_store) == PAGE_SIZE);
|
||||
struct vdso_rng_data *vdso_k_rng_data __refdata =
|
||||
(void *)&vdso_initdata[VDSO_RNG_PAGE_OFFSET * PAGE_SIZE];
|
||||
|
||||
static_assert(sizeof(struct vdso_rng_data) <= PAGE_SIZE);
|
||||
#endif /* CONFIG_VDSO_GETRANDOM */
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_VDSO_ARCH_DATA
|
||||
static union {
|
||||
struct vdso_arch_data data;
|
||||
u8 page[VDSO_ARCH_DATA_SIZE];
|
||||
} vdso_arch_data_store __page_aligned_data;
|
||||
struct vdso_arch_data *vdso_k_arch_data = &vdso_arch_data_store.data;
|
||||
struct vdso_arch_data *vdso_k_arch_data __refdata =
|
||||
(void *)&vdso_initdata[VDSO_ARCH_PAGES_START * PAGE_SIZE];
|
||||
#endif /* CONFIG_ARCH_HAS_VDSO_ARCH_DATA */
|
||||
|
||||
void __init vdso_setup_data_pages(void)
|
||||
{
|
||||
unsigned int order = get_order(VDSO_NR_PAGES * PAGE_SIZE);
|
||||
struct page *pages;
|
||||
|
||||
/*
|
||||
* Allocate the data pages dynamically. SPARC does not support mapping
|
||||
* static pages to be mapped into userspace.
|
||||
* It is also a requirement for mlockall() support.
|
||||
*
|
||||
* Do not use folios. In time namespaces the pages are mapped in a different order
|
||||
* to userspace, which is not handled by the folio optimizations in finish_fault().
|
||||
*/
|
||||
pages = alloc_pages(GFP_KERNEL, order);
|
||||
if (!pages)
|
||||
panic("Unable to allocate VDSO storage pages");
|
||||
|
||||
/* The pages are mapped one-by-one into userspace and each one needs to be refcounted. */
|
||||
split_page(pages, order);
|
||||
|
||||
/* Move the data already written by other subsystems to the new pages */
|
||||
memcpy(page_address(pages), vdso_initdata, VDSO_NR_PAGES * PAGE_SIZE);
|
||||
|
||||
if (IS_ENABLED(CONFIG_GENERIC_GETTIMEOFDAY))
|
||||
vdso_k_time_data = page_address(pages + VDSO_TIME_PAGE_OFFSET);
|
||||
|
||||
if (IS_ENABLED(CONFIG_VDSO_GETRANDOM))
|
||||
vdso_k_rng_data = page_address(pages + VDSO_RNG_PAGE_OFFSET);
|
||||
|
||||
if (IS_ENABLED(CONFIG_ARCH_HAS_VDSO_ARCH_DATA))
|
||||
vdso_k_arch_data = page_address(pages + VDSO_ARCH_PAGES_START);
|
||||
}
|
||||
|
||||
static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
|
||||
struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
struct page *timens_page = find_timens_vvar_page(vma);
|
||||
unsigned long addr, pfn;
|
||||
vm_fault_t err;
|
||||
struct page *page, *timens_page;
|
||||
|
||||
timens_page = find_timens_vvar_page(vma);
|
||||
|
||||
switch (vmf->pgoff) {
|
||||
case VDSO_TIME_PAGE_OFFSET:
|
||||
if (!IS_ENABLED(CONFIG_GENERIC_GETTIMEOFDAY))
|
||||
return VM_FAULT_SIGBUS;
|
||||
pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data));
|
||||
page = virt_to_page(vdso_k_time_data);
|
||||
if (timens_page) {
|
||||
/*
|
||||
* Fault in VVAR page too, since it will be accessed
|
||||
* to get clock data anyway.
|
||||
*/
|
||||
unsigned long addr;
|
||||
vm_fault_t err;
|
||||
|
||||
addr = vmf->address + VDSO_TIMENS_PAGE_OFFSET * PAGE_SIZE;
|
||||
err = vmf_insert_pfn(vma, addr, pfn);
|
||||
err = vmf_insert_page(vma, addr, page);
|
||||
if (unlikely(err & VM_FAULT_ERROR))
|
||||
return err;
|
||||
pfn = page_to_pfn(timens_page);
|
||||
page = timens_page;
|
||||
}
|
||||
break;
|
||||
case VDSO_TIMENS_PAGE_OFFSET:
|
||||
@@ -71,24 +99,25 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_TIME_NS) || !timens_page)
|
||||
return VM_FAULT_SIGBUS;
|
||||
pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data));
|
||||
page = virt_to_page(vdso_k_time_data);
|
||||
break;
|
||||
case VDSO_RNG_PAGE_OFFSET:
|
||||
if (!IS_ENABLED(CONFIG_VDSO_GETRANDOM))
|
||||
return VM_FAULT_SIGBUS;
|
||||
pfn = __phys_to_pfn(__pa_symbol(vdso_k_rng_data));
|
||||
page = virt_to_page(vdso_k_rng_data);
|
||||
break;
|
||||
case VDSO_ARCH_PAGES_START ... VDSO_ARCH_PAGES_END:
|
||||
if (!IS_ENABLED(CONFIG_ARCH_HAS_VDSO_ARCH_DATA))
|
||||
return VM_FAULT_SIGBUS;
|
||||
pfn = __phys_to_pfn(__pa_symbol(vdso_k_arch_data)) +
|
||||
vmf->pgoff - VDSO_ARCH_PAGES_START;
|
||||
page = virt_to_page(vdso_k_arch_data) + vmf->pgoff - VDSO_ARCH_PAGES_START;
|
||||
break;
|
||||
default:
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
return vmf_insert_pfn(vma, vmf->address, pfn);
|
||||
get_page(page);
|
||||
vmf->page = page;
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct vm_special_mapping vdso_vvar_mapping = {
|
||||
@@ -100,31 +129,6 @@ struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned
|
||||
{
|
||||
return _install_special_mapping(mm, addr, VDSO_NR_PAGES * PAGE_SIZE,
|
||||
VM_READ | VM_MAYREAD | VM_IO | VM_DONTDUMP |
|
||||
VM_PFNMAP | VM_SEALED_SYSMAP,
|
||||
VM_MIXEDMAP | VM_SEALED_SYSMAP,
|
||||
&vdso_vvar_mapping);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TIME_NS
|
||||
/*
|
||||
* The vvar page layout depends on whether a task belongs to the root or
|
||||
* non-root time namespace. Whenever a task changes its namespace, the VVAR
|
||||
* page tables are cleared and then they will be re-faulted with a
|
||||
* corresponding layout.
|
||||
* See also the comment near timens_setup_vdso_clock_data() for details.
|
||||
*/
|
||||
int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
|
||||
{
|
||||
struct mm_struct *mm = task->mm;
|
||||
struct vm_area_struct *vma;
|
||||
VMA_ITERATOR(vmi, mm, 0);
|
||||
|
||||
mmap_read_lock(mm);
|
||||
for_each_vma(vmi, vma) {
|
||||
if (vma_is_special_mapping(vma, &vdso_vvar_mapping))
|
||||
zap_vma_pages(vma);
|
||||
}
|
||||
mmap_read_unlock(mm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -7,8 +7,11 @@
|
||||
#include <linux/minmax.h>
|
||||
#include <vdso/datapage.h>
|
||||
#include <vdso/getrandom.h>
|
||||
#include <vdso/limits.h>
|
||||
#include <vdso/unaligned.h>
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/vdso/getrandom.h>
|
||||
#include <uapi/linux/errno.h>
|
||||
#include <uapi/linux/mman.h>
|
||||
#include <uapi/linux/random.h>
|
||||
|
||||
|
||||
@@ -3,8 +3,25 @@
|
||||
* Generic userspace implementations of gettimeofday() and similar.
|
||||
*/
|
||||
#include <vdso/auxclock.h>
|
||||
#include <vdso/clocksource.h>
|
||||
#include <vdso/datapage.h>
|
||||
#include <vdso/helpers.h>
|
||||
#include <vdso/ktime.h>
|
||||
#include <vdso/limits.h>
|
||||
#include <vdso/math64.h>
|
||||
#include <vdso/time32.h>
|
||||
#include <vdso/time64.h>
|
||||
|
||||
/*
|
||||
* The generic vDSO implementation requires that gettimeofday.h
|
||||
* provides:
|
||||
* - __arch_get_hw_counter(): to get the hw counter based on the
|
||||
* clock_mode.
|
||||
* - gettimeofday_fallback(): fallback for gettimeofday.
|
||||
* - clock_gettime_fallback(): fallback for clock_gettime.
|
||||
* - clock_getres_fallback(): fallback for clock_getres.
|
||||
*/
|
||||
#include <asm/vdso/gettimeofday.h>
|
||||
|
||||
/* Bring in default accessors */
|
||||
#include <vdso/vsyscall.h>
|
||||
@@ -135,7 +152,7 @@ bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *
|
||||
|
||||
if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns))
|
||||
return false;
|
||||
} while (unlikely(vdso_read_retry(vc, seq)));
|
||||
} while (vdso_read_retry(vc, seq));
|
||||
|
||||
/* Add the namespace offset */
|
||||
sec += offs->sec;
|
||||
@@ -158,28 +175,12 @@ bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc,
|
||||
return false;
|
||||
|
||||
do {
|
||||
/*
|
||||
* Open coded function vdso_read_begin() to handle
|
||||
* VDSO_CLOCKMODE_TIMENS. Time namespace enabled tasks have a
|
||||
* special VVAR page installed which has vc->seq set to 1 and
|
||||
* vc->clock_mode set to VDSO_CLOCKMODE_TIMENS. For non time
|
||||
* namespace affected tasks this does not affect performance
|
||||
* because if vc->seq is odd, i.e. a concurrent update is in
|
||||
* progress the extra check for vc->clock_mode is just a few
|
||||
* extra instructions while spin waiting for vc->seq to become
|
||||
* even again.
|
||||
*/
|
||||
while (unlikely((seq = READ_ONCE(vc->seq)) & 1)) {
|
||||
if (IS_ENABLED(CONFIG_TIME_NS) &&
|
||||
vc->clock_mode == VDSO_CLOCKMODE_TIMENS)
|
||||
return do_hres_timens(vd, vc, clk, ts);
|
||||
cpu_relax();
|
||||
}
|
||||
smp_rmb();
|
||||
if (vdso_read_begin_timens(vc, &seq))
|
||||
return do_hres_timens(vd, vc, clk, ts);
|
||||
|
||||
if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns))
|
||||
return false;
|
||||
} while (unlikely(vdso_read_retry(vc, seq)));
|
||||
} while (vdso_read_retry(vc, seq));
|
||||
|
||||
vdso_set_timespec(ts, sec, ns);
|
||||
|
||||
@@ -204,7 +205,7 @@ bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock
|
||||
seq = vdso_read_begin(vc);
|
||||
sec = vdso_ts->sec;
|
||||
nsec = vdso_ts->nsec;
|
||||
} while (unlikely(vdso_read_retry(vc, seq)));
|
||||
} while (vdso_read_retry(vc, seq));
|
||||
|
||||
/* Add the namespace offset */
|
||||
sec += offs->sec;
|
||||
@@ -223,21 +224,12 @@ bool do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc,
|
||||
u32 seq;
|
||||
|
||||
do {
|
||||
/*
|
||||
* Open coded function vdso_read_begin() to handle
|
||||
* VDSO_CLOCK_TIMENS. See comment in do_hres().
|
||||
*/
|
||||
while ((seq = READ_ONCE(vc->seq)) & 1) {
|
||||
if (IS_ENABLED(CONFIG_TIME_NS) &&
|
||||
vc->clock_mode == VDSO_CLOCKMODE_TIMENS)
|
||||
return do_coarse_timens(vd, vc, clk, ts);
|
||||
cpu_relax();
|
||||
}
|
||||
smp_rmb();
|
||||
if (vdso_read_begin_timens(vc, &seq))
|
||||
return do_coarse_timens(vd, vc, clk, ts);
|
||||
|
||||
ts->tv_sec = vdso_ts->sec;
|
||||
ts->tv_nsec = vdso_ts->nsec;
|
||||
} while (unlikely(vdso_read_retry(vc, seq)));
|
||||
} while (vdso_read_retry(vc, seq));
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -256,20 +248,12 @@ bool do_aux(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_ti
|
||||
vc = &vd->aux_clock_data[idx];
|
||||
|
||||
do {
|
||||
/*
|
||||
* Open coded function vdso_read_begin() to handle
|
||||
* VDSO_CLOCK_TIMENS. See comment in do_hres().
|
||||
*/
|
||||
while ((seq = READ_ONCE(vc->seq)) & 1) {
|
||||
if (IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS) {
|
||||
vd = __arch_get_vdso_u_timens_data(vd);
|
||||
vc = &vd->aux_clock_data[idx];
|
||||
/* Re-read from the real time data page */
|
||||
continue;
|
||||
}
|
||||
cpu_relax();
|
||||
if (vdso_read_begin_timens(vc, &seq)) {
|
||||
vd = __arch_get_vdso_u_timens_data(vd);
|
||||
vc = &vd->aux_clock_data[idx];
|
||||
/* Re-read from the real time data page */
|
||||
continue;
|
||||
}
|
||||
smp_rmb();
|
||||
|
||||
/* Auxclock disabled? */
|
||||
if (vc->clock_mode == VDSO_CLOCKMODE_NONE)
|
||||
@@ -277,7 +261,7 @@ bool do_aux(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_ti
|
||||
|
||||
if (!vdso_get_timestamp(vd, vc, VDSO_BASE_AUX, &sec, &ns))
|
||||
return false;
|
||||
} while (unlikely(vdso_read_retry(vc, seq)));
|
||||
} while (vdso_read_retry(vc, seq));
|
||||
|
||||
vdso_set_timespec(ts, sec, ns);
|
||||
|
||||
@@ -313,7 +297,7 @@ __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock,
|
||||
return do_hres(vd, vc, clock, ts);
|
||||
}
|
||||
|
||||
static __maybe_unused int
|
||||
static int
|
||||
__cvdso_clock_gettime_data(const struct vdso_time_data *vd, clockid_t clock,
|
||||
struct __kernel_timespec *ts)
|
||||
{
|
||||
@@ -333,7 +317,7 @@ __cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
|
||||
}
|
||||
|
||||
#ifdef BUILD_VDSO32
|
||||
static __maybe_unused int
|
||||
static int
|
||||
__cvdso_clock_gettime32_data(const struct vdso_time_data *vd, clockid_t clock,
|
||||
struct old_timespec32 *res)
|
||||
{
|
||||
@@ -359,7 +343,7 @@ __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
|
||||
}
|
||||
#endif /* BUILD_VDSO32 */
|
||||
|
||||
static __maybe_unused int
|
||||
static int
|
||||
__cvdso_gettimeofday_data(const struct vdso_time_data *vd,
|
||||
struct __kernel_old_timeval *tv, struct timezone *tz)
|
||||
{
|
||||
@@ -376,8 +360,7 @@ __cvdso_gettimeofday_data(const struct vdso_time_data *vd,
|
||||
}
|
||||
|
||||
if (unlikely(tz != NULL)) {
|
||||
if (IS_ENABLED(CONFIG_TIME_NS) &&
|
||||
vc->clock_mode == VDSO_CLOCKMODE_TIMENS)
|
||||
if (vdso_is_timens_clock(vc))
|
||||
vd = __arch_get_vdso_u_timens_data(vd);
|
||||
|
||||
tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest;
|
||||
@@ -394,14 +377,13 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
|
||||
}
|
||||
|
||||
#ifdef VDSO_HAS_TIME
|
||||
static __maybe_unused __kernel_old_time_t
|
||||
static __kernel_old_time_t
|
||||
__cvdso_time_data(const struct vdso_time_data *vd, __kernel_old_time_t *time)
|
||||
{
|
||||
const struct vdso_clock *vc = vd->clock_data;
|
||||
__kernel_old_time_t t;
|
||||
|
||||
if (IS_ENABLED(CONFIG_TIME_NS) &&
|
||||
vc->clock_mode == VDSO_CLOCKMODE_TIMENS) {
|
||||
if (vdso_is_timens_clock(vc)) {
|
||||
vd = __arch_get_vdso_u_timens_data(vd);
|
||||
vc = vd->clock_data;
|
||||
}
|
||||
@@ -432,8 +414,7 @@ bool __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t cloc
|
||||
if (!vdso_clockid_valid(clock))
|
||||
return false;
|
||||
|
||||
if (IS_ENABLED(CONFIG_TIME_NS) &&
|
||||
vc->clock_mode == VDSO_CLOCKMODE_TIMENS)
|
||||
if (vdso_is_timens_clock(vc))
|
||||
vd = __arch_get_vdso_u_timens_data(vd);
|
||||
|
||||
/*
|
||||
@@ -464,7 +445,7 @@ bool __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t cloc
|
||||
return true;
|
||||
}
|
||||
|
||||
static __maybe_unused
|
||||
static
|
||||
int __cvdso_clock_getres_data(const struct vdso_time_data *vd, clockid_t clock,
|
||||
struct __kernel_timespec *res)
|
||||
{
|
||||
@@ -484,7 +465,7 @@ int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
|
||||
}
|
||||
|
||||
#ifdef BUILD_VDSO32
|
||||
static __maybe_unused int
|
||||
static int
|
||||
__cvdso_clock_getres_time32_data(const struct vdso_time_data *vd, clockid_t clock,
|
||||
struct old_timespec32 *res)
|
||||
{
|
||||
|
||||
@@ -19,8 +19,6 @@ endif
|
||||
|
||||
include ../lib.mk
|
||||
|
||||
CFLAGS += $(TOOLS_INCLUDES)
|
||||
|
||||
CFLAGS_NOLIBC := -nostdlib -nostdinc -ffreestanding -fno-asynchronous-unwind-tables \
|
||||
-fno-stack-protector -include $(top_srcdir)/tools/include/nolibc/nolibc.h \
|
||||
-I$(top_srcdir)/tools/include/nolibc/ $(KHDR_INCLUDES)
|
||||
@@ -28,13 +26,11 @@ CFLAGS_NOLIBC := -nostdlib -nostdinc -ffreestanding -fno-asynchronous-unwind-tab
|
||||
$(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
|
||||
$(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
|
||||
$(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c
|
||||
$(OUTPUT)/vdso_test_correctness: parse_vdso.c vdso_test_correctness.c
|
||||
|
||||
$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c | headers
|
||||
$(OUTPUT)/vdso_standalone_test_x86: CFLAGS:=$(CFLAGS_NOLIBC) $(CFLAGS)
|
||||
|
||||
$(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c
|
||||
$(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl
|
||||
|
||||
$(OUTPUT)/vdso_test_getrandom: parse_vdso.c
|
||||
$(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \
|
||||
$(KHDR_INCLUDES) \
|
||||
|
||||
@@ -19,8 +19,7 @@
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <linux/auxvec.h>
|
||||
#include <linux/elf.h>
|
||||
#include <elf.h>
|
||||
|
||||
#include "parse_vdso.h"
|
||||
|
||||
|
||||
@@ -11,28 +11,22 @@
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/auxv.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <dlfcn.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <sched.h>
|
||||
#include <stdbool.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "parse_vdso.h"
|
||||
#include "vdso_config.h"
|
||||
#include "vdso_call.h"
|
||||
#include "kselftest.h"
|
||||
|
||||
static const char *version;
|
||||
static const char **name;
|
||||
|
||||
#ifndef SYS_getcpu
|
||||
# ifdef __x86_64__
|
||||
# define SYS_getcpu 309
|
||||
# else
|
||||
# define SYS_getcpu 318
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef __NR_clock_gettime64
|
||||
#define __NR_clock_gettime64 403
|
||||
#endif
|
||||
@@ -61,6 +55,10 @@ typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
|
||||
|
||||
vgtod_t vdso_gettimeofday;
|
||||
|
||||
typedef time_t (*vtime_t)(__kernel_time_t *tloc);
|
||||
|
||||
vtime_t vdso_time;
|
||||
|
||||
typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
|
||||
|
||||
getcpu_t vgetcpu;
|
||||
@@ -110,42 +108,39 @@ static void *vsyscall_getcpu(void)
|
||||
|
||||
static void fill_function_pointers(void)
|
||||
{
|
||||
void *vdso = dlopen("linux-vdso.so.1",
|
||||
RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
|
||||
if (!vdso)
|
||||
vdso = dlopen("linux-gate.so.1",
|
||||
RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
|
||||
if (!vdso)
|
||||
vdso = dlopen("linux-vdso32.so.1",
|
||||
RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
|
||||
if (!vdso)
|
||||
vdso = dlopen("linux-vdso64.so.1",
|
||||
RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
|
||||
if (!vdso) {
|
||||
unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
|
||||
|
||||
if (!sysinfo_ehdr) {
|
||||
printf("[WARN]\tfailed to find vDSO\n");
|
||||
return;
|
||||
}
|
||||
|
||||
vdso_getcpu = (getcpu_t)dlsym(vdso, name[4]);
|
||||
vdso_init_from_sysinfo_ehdr(sysinfo_ehdr);
|
||||
|
||||
vdso_getcpu = (getcpu_t)vdso_sym(version, name[4]);
|
||||
if (!vdso_getcpu)
|
||||
printf("Warning: failed to find getcpu in vDSO\n");
|
||||
|
||||
vgetcpu = (getcpu_t) vsyscall_getcpu();
|
||||
|
||||
vdso_clock_gettime = (vgettime_t)dlsym(vdso, name[1]);
|
||||
vdso_clock_gettime = (vgettime_t)vdso_sym(version, name[1]);
|
||||
if (!vdso_clock_gettime)
|
||||
printf("Warning: failed to find clock_gettime in vDSO\n");
|
||||
|
||||
#if defined(VDSO_32BIT)
|
||||
vdso_clock_gettime64 = (vgettime64_t)dlsym(vdso, name[5]);
|
||||
vdso_clock_gettime64 = (vgettime64_t)vdso_sym(version, name[5]);
|
||||
if (!vdso_clock_gettime64)
|
||||
printf("Warning: failed to find clock_gettime64 in vDSO\n");
|
||||
#endif
|
||||
|
||||
vdso_gettimeofday = (vgtod_t)dlsym(vdso, name[0]);
|
||||
vdso_gettimeofday = (vgtod_t)vdso_sym(version, name[0]);
|
||||
if (!vdso_gettimeofday)
|
||||
printf("Warning: failed to find gettimeofday in vDSO\n");
|
||||
|
||||
vdso_time = (vtime_t)vdso_sym(version, name[2]);
|
||||
if (!vdso_time)
|
||||
printf("Warning: failed to find time in vDSO\n");
|
||||
|
||||
}
|
||||
|
||||
static long sys_getcpu(unsigned * cpu, unsigned * node,
|
||||
@@ -169,6 +164,16 @@ static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
|
||||
return syscall(__NR_gettimeofday, tv, tz);
|
||||
}
|
||||
|
||||
static inline __kernel_old_time_t sys_time(__kernel_old_time_t *tloc)
|
||||
{
|
||||
#ifdef __NR_time
|
||||
return syscall(__NR_time, tloc);
|
||||
#else
|
||||
errno = ENOSYS;
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void test_getcpu(void)
|
||||
{
|
||||
printf("[RUN]\tTesting getcpu...\n");
|
||||
@@ -412,10 +417,10 @@ static void test_gettimeofday(void)
|
||||
return;
|
||||
}
|
||||
|
||||
printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n",
|
||||
(unsigned long long)start.tv_sec, start.tv_usec,
|
||||
(unsigned long long)vdso.tv_sec, vdso.tv_usec,
|
||||
(unsigned long long)end.tv_sec, end.tv_usec);
|
||||
printf("\t%llu.%06lld %llu.%06lld %llu.%06lld\n",
|
||||
(unsigned long long)start.tv_sec, (long long)start.tv_usec,
|
||||
(unsigned long long)vdso.tv_sec, (long long)vdso.tv_usec,
|
||||
(unsigned long long)end.tv_sec, (long long)end.tv_usec);
|
||||
|
||||
if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) {
|
||||
printf("[FAIL]\tTimes are out of sequence\n");
|
||||
@@ -435,8 +440,56 @@ static void test_gettimeofday(void)
|
||||
VDSO_CALL(vdso_gettimeofday, 2, &vdso, NULL);
|
||||
}
|
||||
|
||||
static void test_time(void)
|
||||
{
|
||||
__kernel_old_time_t start, end, vdso_ret, vdso_param;
|
||||
|
||||
if (!vdso_time)
|
||||
return;
|
||||
|
||||
printf("[RUN]\tTesting time...\n");
|
||||
|
||||
if (sys_time(&start) < 0) {
|
||||
if (errno == -ENOSYS) {
|
||||
printf("[SKIP]\tNo time() support\n");
|
||||
} else {
|
||||
printf("[FAIL]\tsys_time failed (%d)\n", errno);
|
||||
nerrs++;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
vdso_ret = VDSO_CALL(vdso_time, 1, &vdso_param);
|
||||
end = sys_time(NULL);
|
||||
|
||||
if (vdso_ret < 0 || end < 0) {
|
||||
printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
|
||||
(int)vdso_ret, errno);
|
||||
nerrs++;
|
||||
return;
|
||||
}
|
||||
|
||||
printf("\t%lld %lld %lld\n",
|
||||
(long long)start,
|
||||
(long long)vdso_ret,
|
||||
(long long)end);
|
||||
|
||||
if (vdso_ret != vdso_param) {
|
||||
printf("[FAIL]\tinconsistent return values: %lld %lld\n",
|
||||
(long long)vdso_ret, (long long)vdso_param);
|
||||
nerrs++;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!(start <= vdso_ret) || !(vdso_ret <= end)) {
|
||||
printf("[FAIL]\tTimes are out of sequence\n");
|
||||
nerrs++;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
version = versions[VDSO_VERSION];
|
||||
name = (const char **)&names[VDSO_NAMES];
|
||||
|
||||
fill_function_pointers();
|
||||
@@ -444,6 +497,7 @@ int main(int argc, char **argv)
|
||||
test_clock_gettime();
|
||||
test_clock_gettime64();
|
||||
test_gettimeofday();
|
||||
test_time();
|
||||
|
||||
/*
|
||||
* Test getcpu() last so that, if something goes wrong setting affinity,
|
||||
|
||||
@@ -11,10 +11,8 @@
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#ifndef NOLIBC
|
||||
#include <sys/auxv.h>
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
#include "kselftest.h"
|
||||
#include "parse_vdso.h"
|
||||
|
||||
Reference in New Issue
Block a user