From 3b5584afeef05319ade0fbf5f634a64fd3e5772b Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 30 Aug 2019 14:58:55 +0100 Subject: arm64: compat: vdso: Expose BUILD_VDSO32 clock_gettime32 and clock_getres_time32 should be compiled only with the 32 bit vdso library. Expose BUILD_VDSO32 when arm64 compat is compiled, to provide an indication to the generic library to include these symbols. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20190830135902.20861-2-vincenzo.frascino@arm.com --- arch/arm64/include/asm/vdso/compat_gettimeofday.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index c50ee1b7d5cd..fe7afe0f1a3d 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -17,6 +17,7 @@ #define VDSO_HAS_CLOCK_GETRES 1 #define VDSO_HAS_32BIT_FALLBACK 1 +#define BUILD_VDSO32 1 static __always_inline int gettimeofday_fallback(struct __kernel_old_timeval *_tv, -- cgit v1.2.3-59-g8ed1b From 715f23b6104aa297feea20d4f200ca81941e23de Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Jan 2020 09:41:09 +0100 Subject: ARM: vdso: Set BUILD_VDSO32 and provide 32bit fallbacks Setting BUILD_VDSO32 is required to expose the legacy 32bit interfaces in the generic VDSO code which are going to be hidden behind an #ifdef BUILD_VDSO32. The 32bit fallbacks are necessary to remove the existing VDSO_HAS_32BIT_FALLBACK hackery. Signed-off-by: Thomas Gleixner Tested-by: Vincenzo Frascino Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/87tv4zq9dc.fsf@nanos.tec.linutronix.de --- arch/arm/include/asm/vdso/gettimeofday.h | 36 ++++++++++++++++++++++++++++++++ arch/arm/vdso/Makefile | 2 +- 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/include/asm/vdso/gettimeofday.h b/arch/arm/include/asm/vdso/gettimeofday.h index 0ad2429c324f..fe6e1f65932d 100644 --- a/arch/arm/include/asm/vdso/gettimeofday.h +++ b/arch/arm/include/asm/vdso/gettimeofday.h @@ -52,6 +52,24 @@ static __always_inline long clock_gettime_fallback( return ret; } +static __always_inline long clock_gettime32_fallback( + clockid_t _clkid, + struct old_timespec32 *_ts) +{ + register struct old_timespec32 *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_clock_gettime; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + static __always_inline int clock_getres_fallback( clockid_t _clkid, struct __kernel_timespec *_ts) @@ -70,6 +88,24 @@ static __always_inline int clock_getres_fallback( return ret; } +static __always_inline int clock_getres32_fallback( + clockid_t _clkid, + struct old_timespec32 *_ts) +{ + register struct old_timespec32 *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_clock_getres; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + static __always_inline u64 __arch_get_hw_counter(int clock_mode) { #ifdef CONFIG_ARM_ARCH_TIMER diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 0fda344beb0b..1babb392e70a 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -14,7 +14,7 @@ targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) ccflags-y := -fPIC -fno-common -fno-builtin -fno-stack-protector -ccflags-y += -DDISABLE_BRANCH_PROFILING +ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO32 ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8 ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ -- cgit v1.2.3-59-g8ed1b From 972188f3a2dac07a6f000a4418776f446259fc87 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 30 Aug 2019 14:59:00 +0100 Subject: arm64: compat: vdso: Remove unused VDSO_HAS_32BIT_FALLBACK VDSO_HAS_32BIT_FALLBACK has been removed from the core since the architectures that support the generic vDSO library have been converted to support the 32 bit fallbacks. Remove unused VDSO_HAS_32BIT_FALLBACK from arm64 compat vdso. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20190830135902.20861-7-vincenzo.frascino@arm.com --- arch/arm64/include/asm/vdso/compat_gettimeofday.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index fe7afe0f1a3d..537b1e695365 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -16,7 +16,6 @@ #define VDSO_HAS_CLOCK_GETRES 1 -#define VDSO_HAS_32BIT_FALLBACK 1 #define BUILD_VDSO32 1 static __always_inline -- cgit v1.2.3-59-g8ed1b From de0209f53aba44d9b57d0739a076dfb2db767584 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 30 Aug 2019 14:59:01 +0100 Subject: mips: vdso: Remove unused VDSO_HAS_32BIT_FALLBACK VDSO_HAS_32BIT_FALLBACK has been removed from the core since the architectures that support the generic vDSO library have been converted to support the 32 bit fallbacks. Remove unused VDSO_HAS_32BIT_FALLBACK from mips vdso. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Acked-by: Paul Burton Link: https://lore.kernel.org/r/20190830135902.20861-8-vincenzo.frascino@arm.com --- arch/mips/include/asm/vdso/gettimeofday.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h index 0ae9b4cbc153..a58687e26c5d 100644 --- a/arch/mips/include/asm/vdso/gettimeofday.h +++ b/arch/mips/include/asm/vdso/gettimeofday.h @@ -96,8 +96,6 @@ static __always_inline int clock_getres_fallback( #if _MIPS_SIM != _MIPS_SIM_ABI64 -#define VDSO_HAS_32BIT_FALLBACK 1 - static __always_inline long clock_gettime32_fallback( clockid_t _clkid, struct old_timespec32 *_ts) -- cgit v1.2.3-59-g8ed1b From 0b5c12332db5b71c6db0c102b3e6acc7c7c6a54d Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 30 Aug 2019 14:59:02 +0100 Subject: x86/vdso: Remove unused VDSO_HAS_32BIT_FALLBACK VDSO_HAS_32BIT_FALLBACK has been removed from the core since the architectures that support the generic vDSO library have been converted to support the 32 bit fallbacks. Remove unused VDSO_HAS_32BIT_FALLBACK from x86 vdso. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20190830135902.20861-9-vincenzo.frascino@arm.com --- arch/x86/include/asm/vdso/gettimeofday.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index e9ee139cf29e..52c3bcd672cf 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -96,8 +96,6 @@ long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) #else -#define VDSO_HAS_32BIT_FALLBACK 1 - static __always_inline long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) { -- cgit v1.2.3-59-g8ed1b From 6f74acfde20af1eb2178d0bd846bfd8f50b3be32 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 12 Nov 2019 01:27:08 +0000 Subject: x86/vdso: Restrict splitting VVAR VMA Forbid splitting VVAR VMA resulting in a stricter ABI and reducing the amount of corner-cases to consider while working further on VDSO time namespace support. As the offset from timens to VVAR page is computed compile-time, the pages in VVAR should stay together and not being partically mremap()'ed. Co-developed-by: Andrei Vagin Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-20-dima@arista.com --- arch/x86/entry/vdso/vma.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch') diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index f5937742b290..76cbe54e0c39 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -84,6 +84,18 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } +static int vvar_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + const struct vdso_image *image = new_vma->vm_mm->context.vdso_image; + unsigned long new_size = new_vma->vm_end - new_vma->vm_start; + + if (new_size != -image->sym_vvar_start) + return -EINVAL; + + return 0; +} + static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { @@ -136,6 +148,7 @@ static const struct vm_special_mapping vdso_mapping = { static const struct vm_special_mapping vvar_mapping = { .name = "[vvar]", .fault = vvar_fault, + .mremap = vvar_mremap, }; /* -- cgit v1.2.3-59-g8ed1b From 64b302ab66c5965702693e79690823ca120288b9 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 12 Nov 2019 01:27:10 +0000 Subject: x86/vdso: Provide vdso_data offset on vvar_page VDSO support for time namespaces needs to set up a page with the same layout as VVAR. That timens page will be placed on position of VVAR page inside namespace. That page has vdso_data->seq set to 1 to enforce the slow path and vdso_data->clock_mode set to VCLOCK_TIMENS to enforce the time namespace handling path. To prepare the time namespace page the kernel needs to know the vdso_data offset. Provide arch_get_vdso_data() helper for locating vdso_data on VVAR page. Co-developed-by: Andrei Vagin Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-22-dima@arista.com --- arch/x86/entry/vdso/vdso-layout.lds.S | 2 -- arch/x86/entry/vdso/vma.c | 11 +++++++++++ arch/x86/include/asm/vvar.h | 8 ++++---- arch/x86/kernel/vmlinux.lds.S | 4 +--- include/linux/time_namespace.h | 1 + 5 files changed, 17 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S index 93c6dc7812d0..2330daad67c3 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -21,9 +21,7 @@ SECTIONS /* Place all vvars at the offsets in asm/vvar.h. */ #define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset; -#define __VVAR_KERNEL_LDS #include -#undef __VVAR_KERNEL_LDS #undef EMIT_VVAR pvclock_page = vvar_start + PAGE_SIZE; diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 76cbe54e0c39..04e3498c6c41 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -24,6 +24,17 @@ #include #include +#undef _ASM_X86_VVAR_H +#define EMIT_VVAR(name, offset) \ + const size_t name ## _offset = offset; +#include + +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return (struct vdso_data *)(vvar_page + _vdso_data_offset); +} +#undef EMIT_VVAR + #if defined(CONFIG_X86_64) unsigned int __read_mostly vdso64_enabled = 1; #endif diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index 32f5d9a0b90e..ff2de3025388 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h @@ -19,10 +19,10 @@ #ifndef _ASM_X86_VVAR_H #define _ASM_X86_VVAR_H -#if defined(__VVAR_KERNEL_LDS) - -/* The kernel linker script defines its own magic to put vvars in the - * right place. +#ifdef EMIT_VVAR +/* + * EMIT_VVAR() is used by the kernel linker script to put vvars in the + * right place. Also, it's used by kernel code to import offsets values. */ #define DECLARE_VVAR(offset, type, name) \ EMIT_VVAR(name, offset) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3a1a819da137..e3296aa028fe 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -193,12 +193,10 @@ SECTIONS __vvar_beginning_hack = .; /* Place all vvars at the offsets in asm/vvar.h. */ -#define EMIT_VVAR(name, offset) \ +#define EMIT_VVAR(name, offset) \ . = __vvar_beginning_hack + offset; \ *(.vvar_ ## name) -#define __VVAR_KERNEL_LDS #include -#undef __VVAR_KERNEL_LDS #undef EMIT_VVAR /* diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 34ee110b5c35..063a343d1d78 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -39,6 +39,7 @@ struct time_namespace *copy_time_ns(unsigned long flags, struct time_namespace *old_ns); void free_time_ns(struct kref *kref); int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk); +struct vdso_data *arch_get_vdso_data(void *vvar_page); static inline void put_time_ns(struct time_namespace *ns) { -- cgit v1.2.3-59-g8ed1b From 550a77a74c87ecfdadc2214fef4b25ff125f65ab Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 12 Nov 2019 01:27:11 +0000 Subject: x86/vdso: Add time napespace page To support time namespaces in the VDSO with a minimal impact on regular non time namespace affected tasks, the namespace handling needs to be hidden in a slow path. The most obvious place is vdso_seq_begin(). If a task belongs to a time namespace then the VVAR page which contains the system wide VDSO data is replaced with a namespace specific page which has the same layout as the VVAR page. That page has vdso_data->seq set to 1 to enforce the slow path and vdso_data->clock_mode set to VCLOCK_TIMENS to enforce the time namespace handling path. The extra check in the case that vdso_data->seq is odd, e.g. a concurrent update of the VDSO data is in progress, is not really affecting regular tasks which are not part of a time namespace as the task is spin waiting for the update to finish and vdso_data->seq to become even again. If a time namespace task hits that code path, it invokes the corresponding time getter function which retrieves the real VVAR page, reads host time and then adds the offset for the requested clock which is stored in the special VVAR page. Allocate the time namespace page among VVAR pages and place vdso_data on it. Provide __arch_get_timens_vdso_data() helper for VDSO code to get the code-relative position of VVARs on that special page. Co-developed-by: Andrei Vagin Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-23-dima@arista.com --- arch/x86/Kconfig | 1 + arch/x86/entry/vdso/vdso-layout.lds.S | 11 +++++++++-- arch/x86/entry/vdso/vdso2c.c | 3 +++ arch/x86/include/asm/vdso.h | 1 + arch/x86/include/asm/vdso/gettimeofday.h | 8 ++++++++ arch/x86/include/asm/vvar.h | 5 ++++- 6 files changed, 26 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5e8949953660..a2488c372fa1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -124,6 +124,7 @@ config X86 select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY + select GENERIC_VDSO_TIME_NS select GUP_GET_PTE_LOW_HIGH if X86_PAE select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 select HAVE_ACPI_APEI if ACPI diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S index 2330daad67c3..ea7e0155c604 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -16,8 +16,8 @@ SECTIONS * segment. */ - vvar_start = . - 3 * PAGE_SIZE; - vvar_page = vvar_start; + vvar_start = . - 4 * PAGE_SIZE; + vvar_page = vvar_start; /* Place all vvars at the offsets in asm/vvar.h. */ #define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset; @@ -26,6 +26,13 @@ SECTIONS pvclock_page = vvar_start + PAGE_SIZE; hvclock_page = vvar_start + 2 * PAGE_SIZE; + timens_page = vvar_start + 3 * PAGE_SIZE; + +#undef _ASM_X86_VVAR_H + /* Place all vvars in timens too at the offsets in asm/vvar.h. */ +#define EMIT_VVAR(name, offset) timens_ ## name = timens_page + offset; +#include +#undef EMIT_VVAR . = SIZEOF_HEADERS; diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c index 3a4d8d4d39f8..3842873b3ae3 100644 --- a/arch/x86/entry/vdso/vdso2c.c +++ b/arch/x86/entry/vdso/vdso2c.c @@ -75,12 +75,14 @@ enum { sym_vvar_page, sym_pvclock_page, sym_hvclock_page, + sym_timens_page, }; const int special_pages[] = { sym_vvar_page, sym_pvclock_page, sym_hvclock_page, + sym_timens_page, }; struct vdso_sym { @@ -93,6 +95,7 @@ struct vdso_sym required_syms[] = { [sym_vvar_page] = {"vvar_page", true}, [sym_pvclock_page] = {"pvclock_page", true}, [sym_hvclock_page] = {"hvclock_page", true}, + [sym_timens_page] = {"timens_page", true}, {"VDSO32_NOTE_MASK", true}, {"__kernel_vsyscall", true}, {"__kernel_sigreturn", true}, diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 230474e2ddb5..bbcdc7b8f963 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -21,6 +21,7 @@ struct vdso_image { long sym_vvar_page; long sym_pvclock_page; long sym_hvclock_page; + long sym_timens_page; long sym_VDSO32_NOTE_MASK; long sym___kernel_sigreturn; long sym___kernel_rt_sigreturn; diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 52c3bcd672cf..6ee1f7dba34b 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -21,6 +21,7 @@ #include #define __vdso_data (VVAR(_vdso_data)) +#define __timens_vdso_data (TIMENS(_vdso_data)) #define VDSO_HAS_TIME 1 @@ -56,6 +57,13 @@ extern struct ms_hyperv_tsc_page hvclock_page __attribute__((visibility("hidden"))); #endif +#ifdef CONFIG_TIME_NS +static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +{ + return __timens_vdso_data; +} +#endif + #ifndef BUILD_VDSO32 static __always_inline diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index ff2de3025388..183e98e49ab9 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h @@ -33,9 +33,12 @@ extern char __vvar_page; #define DECLARE_VVAR(offset, type, name) \ extern type vvar_ ## name[CS_BASES] \ - __attribute__((visibility("hidden"))); + __attribute__((visibility("hidden"))); \ + extern type timens_ ## name[CS_BASES] \ + __attribute__((visibility("hidden"))); \ #define VVAR(name) (vvar_ ## name) +#define TIMENS(name) (timens_ ## name) #define DEFINE_VVAR(type, name) \ type name[CS_BASES] \ -- cgit v1.2.3-59-g8ed1b From af34ebeb866fafc0a9a09dda51c52ccec007ace0 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 12 Nov 2019 01:27:13 +0000 Subject: x86/vdso: Handle faults on timens page If a task belongs to a time namespace then the VVAR page which contains the system wide VDSO data is replaced with a namespace specific page which has the same layout as the VVAR page. Co-developed-by: Andrei Vagin Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-25-dima@arista.com --- arch/x86/entry/vdso/vma.c | 54 +++++++++++++++++++++++++++++++++++++++++++++-- mm/mmap.c | 2 ++ 2 files changed, 54 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 04e3498c6c41..e5f336112cb1 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -14,11 +14,14 @@ #include #include #include +#include + #include #include #include #include #include +#include #include #include #include @@ -107,10 +110,36 @@ static int vvar_mremap(const struct vm_special_mapping *sm, return 0; } +#ifdef CONFIG_TIME_NS +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops(). + * For more details check_vma_flags() and __access_remote_vm() + */ + + WARN(1, "vvar_page accessed remotely"); + + return NULL; +} +#else +static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif + static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { const struct vdso_image *image = vma->vm_mm->context.vdso_image; + unsigned long pfn; long sym_offset; if (!image) @@ -130,8 +159,21 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, return VM_FAULT_SIGBUS; if (sym_offset == image->sym_vvar_page) { - return vmf_insert_pfn(vma, vmf->address, - __pa_symbol(&__vvar_page) >> PAGE_SHIFT); + struct page *timens_page = find_timens_vvar_page(vma); + + pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT; + + /* + * If a task belongs to a time namespace then a namespace + * specific VVAR is mapped with the sym_vvar_page offset and + * the real VVAR page is mapped with the sym_timens_page + * offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (timens_page) + pfn = page_to_pfn(timens_page); + + return vmf_insert_pfn(vma, vmf->address, pfn); } else if (sym_offset == image->sym_pvclock_page) { struct pvclock_vsyscall_time_info *pvti = pvclock_get_pvti_cpu0_va(); @@ -146,6 +188,14 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK)) return vmf_insert_pfn(vma, vmf->address, virt_to_phys(tsc_pg) >> PAGE_SHIFT); + } else if (sym_offset == image->sym_timens_page) { + struct page *timens_page = find_timens_vvar_page(vma); + + if (!timens_page) + return VM_FAULT_SIGBUS; + + pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT; + return vmf_insert_pfn(vma, vmf->address, pfn); } return VM_FAULT_SIGBUS; diff --git a/mm/mmap.c b/mm/mmap.c index 9c648524e4dc..60c17d3c8762 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3342,6 +3342,8 @@ static const struct vm_operations_struct special_mapping_vmops = { .fault = special_mapping_fault, .mremap = special_mapping_mremap, .name = special_mapping_name, + /* vDSO code relies that VVAR can't be accessed remotely */ + .access = NULL, }; static const struct vm_operations_struct legacy_special_mapping_vmops = { -- cgit v1.2.3-59-g8ed1b From e6b28ec65b6d433624a2c290073bc356c4fce914 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 12 Nov 2019 01:27:14 +0000 Subject: x86/vdso: On timens page fault prefault also VVAR page As timens page has offsets to data on VVAR page VVAR is going to be accessed shortly. Set it up with timens in one page fault as optimization. Suggested-by: Thomas Gleixner Co-developed-by: Andrei Vagin Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-26-dima@arista.com --- arch/x86/entry/vdso/vma.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index e5f336112cb1..d2fd8a57af7d 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -170,8 +170,23 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, * offset. * See also the comment near timens_setup_vdso_data(). */ - if (timens_page) + if (timens_page) { + unsigned long addr; + vm_fault_t err; + + /* + * Optimization: inside time namespace pre-fault + * VVAR page too. As on timens page there are only + * offsets for clocks on VVAR, it'll be faulted + * shortly by VDSO code. + */ + addr = vmf->address + (image->sym_timens_page - sym_offset); + err = vmf_insert_pfn(vma, addr, pfn); + if (unlikely(err & VM_FAULT_ERROR)) + return err; + pfn = page_to_pfn(timens_page); + } return vmf_insert_pfn(vma, vmf->address, pfn); } else if (sym_offset == image->sym_pvclock_page) { -- cgit v1.2.3-59-g8ed1b From 70ddf65184ec1e8989322f35193e4fde7377f0cc Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 12 Nov 2019 01:27:15 +0000 Subject: x86/vdso: Zap vvar pages when switching to a time namespace The VVAR page layout depends on whether a task belongs to the root or non-root time namespace. Whenever a task changes its namespace, the VVAR page tables are cleared and then they will be re-faulted with a corresponding layout. Co-developed-by: Andrei Vagin Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-27-dima@arista.com --- arch/x86/entry/vdso/vma.c | 27 +++++++++++++++++++++++++++ include/linux/time_namespace.h | 9 +++++++++ kernel/time/namespace.c | 10 ++++++++++ 3 files changed, 46 insertions(+) (limited to 'arch') diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index d2fd8a57af7d..c1b8496b5606 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -51,6 +51,7 @@ void __init init_vdso_image(const struct vdso_image *image) image->alt_len)); } +static const struct vm_special_mapping vvar_mapping; struct linux_binprm; static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, @@ -128,6 +129,32 @@ static struct page *find_timens_vvar_page(struct vm_area_struct *vma) return NULL; } + +/* + * The vvar page layout depends on whether a task belongs to the root or + * non-root time namespace. Whenever a task changes its namespace, the VVAR + * page tables are cleared and then they will re-faulted with a + * corresponding layout. + * See also the comment near timens_setup_vdso_data() for details. + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + + if (down_write_killable(&mm->mmap_sem)) + return -EINTR; + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long size = vma->vm_end - vma->vm_start; + + if (vma_is_special_mapping(vma, &vvar_mapping)) + zap_page_range(vma, vma->vm_start, size); + } + + up_write(&mm->mmap_sem); + return 0; +} #else static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) { diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 6b7767f7df4a..04a2ba8b8a06 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -31,6 +31,9 @@ struct time_namespace { extern struct time_namespace init_time_ns; #ifdef CONFIG_TIME_NS +extern int vdso_join_timens(struct task_struct *task, + struct time_namespace *ns); + static inline struct time_namespace *get_time_ns(struct time_namespace *ns) { kref_get(&ns->kref); @@ -77,6 +80,12 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim) } #else +static inline int vdso_join_timens(struct task_struct *task, + struct time_namespace *ns) +{ + return 0; +} + static inline struct time_namespace *get_time_ns(struct time_namespace *ns) { return NULL; diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index d705c15d0273..0732964803b9 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -281,6 +281,7 @@ static void timens_put(struct ns_common *ns) static int timens_install(struct nsproxy *nsproxy, struct ns_common *new) { struct time_namespace *ns = to_time_ns(new); + int err; if (!current_is_single_threaded()) return -EUSERS; @@ -291,6 +292,10 @@ static int timens_install(struct nsproxy *nsproxy, struct ns_common *new) timens_set_vvar_page(current, ns); + err = vdso_join_timens(current, ns); + if (err) + return err; + get_time_ns(ns); put_time_ns(nsproxy->time_ns); nsproxy->time_ns = ns; @@ -305,6 +310,7 @@ int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk) { struct ns_common *nsc = &nsproxy->time_ns_for_children->ns; struct time_namespace *ns = to_time_ns(nsc); + int err; /* create_new_namespaces() already incremented the ref counter */ if (nsproxy->time_ns == nsproxy->time_ns_for_children) @@ -312,6 +318,10 @@ int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk) timens_set_vvar_page(tsk, ns); + err = vdso_join_timens(tsk, ns); + if (err) + return err; + get_time_ns(ns); put_time_ns(nsproxy->time_ns); nsproxy->time_ns = ns; -- cgit v1.2.3-59-g8ed1b From 99570c3da96a0f7aa11c6ad4981776f3adabf3b5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Jan 2020 20:43:41 +0100 Subject: MIPS: vdso: Define BUILD_VDSO32 when building a 32bit kernel The confinement of the 32bit specific VDSO functions missed to define BUILD_VDSO32 when building a 32bit MIPS kernel: arch/mips/vdso/vgettimeofday.c: In function __vdso_clock_gettime: arch/mips/vdso/vgettimeofday.c:17:9: error: implicit declaration of function __cvdso_clock_gettime32 arch/mips/vdso/vgettimeofday.c: In function __vdso_clock_getres: arch/mips/vdso/vgettimeofday.c:39:9: error: implicit declaration of function __cvdso_clock_getres_time32 Force the define for 32bit builds in the VDSO Makefile. Fixes: bf279849ad59 ("lib/vdso: Build 32 bit specific functions in the right context") Reported-by: kbuild test robot Signed-off-by: Thomas Gleixner Acked-by: Paul Burton Link: https://lore.kernel.org/r/87d0bjfaqa.fsf@nanos.tec.linutronix.de --- arch/mips/vdso/Makefile | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index e05938997e69..b2a2e032dc99 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -18,6 +18,10 @@ ccflags-vdso := \ $(filter -mno-loongson-%,$(KBUILD_CFLAGS)) \ -D__VDSO__ +ifndef CONFIG_64BIT +ccflags-vdso += -DBUILD_VDSO32 +endif + ifdef CONFIG_CC_IS_CLANG ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS)) endif -- cgit v1.2.3-59-g8ed1b