aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-01-27 16:47:05 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-01-27 16:47:05 -0800
commite279160f491392f1345f6eb4b0eeec5a6a2ecdd7 (patch)
tree018410c41f919774f70e6350fc251556bf789944 /arch
parentMerge tag 'core-debugobjects-2020-01-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (diff)
parentalarmtimer: Make alarmtimer_get_rtcdev() a stub when CONFIG_RTC_CLASS=n (diff)
downloadlinux-dev-e279160f491392f1345f6eb4b0eeec5a6a2ecdd7.tar.xz
linux-dev-e279160f491392f1345f6eb4b0eeec5a6a2ecdd7.zip
Merge tag 'timers-core-2020-01-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer updates from Thomas Gleixner: "The timekeeping and timers departement provides: - Time namespace support: If a container migrates from one host to another then it expects that clocks based on MONOTONIC and BOOTTIME are not subject to disruption. Due to different boot time and non-suspended runtime these clocks can differ significantly on two hosts, in the worst case time goes backwards which is a violation of the POSIX requirements. The time namespace addresses this problem. It allows to set offsets for clock MONOTONIC and BOOTTIME once after creation and before tasks are associated with the namespace. These offsets are taken into account by timers and timekeeping including the VDSO. Offsets for wall clock based clocks (REALTIME/TAI) are not provided by this mechanism. While in theory possible, the overhead and code complexity would be immense and not justified by the esoteric potential use cases which were discussed at Plumbers '18. The overhead for tasks in the root namespace (ie where host time offsets = 0) is in the noise and great effort was made to ensure that especially in the VDSO. If time namespace is disabled in the kernel configuration the code is compiled out. Kudos to Andrei Vagin and Dmitry Sofanov who implemented this feature and kept on for more than a year addressing review comments, finding better solutions. A pleasant experience. - Overhaul of the alarmtimer device dependency handling to ensure that the init/suspend/resume ordering is correct. - A new clocksource/event driver for Microchip PIT64 - Suspend/resume support for the Hyper-V clocksource - The usual pile of fixes, updates and improvements mostly in the driver code" * tag 'timers-core-2020-01-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (71 commits) alarmtimer: Make alarmtimer_get_rtcdev() a stub when CONFIG_RTC_CLASS=n alarmtimer: Use wakeup source from alarmtimer platform device alarmtimer: Make alarmtimer platform device child of RTC device alarmtimer: Update alarmtimer_get_rtcdev() docs to reflect reality hrtimer: Add missing sparse annotation for __run_timer() lib/vdso: Only read hrtimer_res when needed in __cvdso_clock_getres() MIPS: vdso: Define BUILD_VDSO32 when building a 32bit kernel clocksource/drivers/hyper-v: Set TSC clocksource as default w/ InvariantTSC clocksource/drivers/hyper-v: Untangle stimers and timesync from clocksources clocksource/drivers/timer-microchip-pit64b: Fix sparse warning clocksource/drivers/exynos_mct: Rename Exynos to lowercase clocksource/drivers/timer-ti-dm: Fix uninitialized pointer access clocksource/drivers/timer-ti-dm: Switch to platform_get_irq clocksource/drivers/timer-ti-dm: Convert to devm_platform_ioremap_resource clocksource/drivers/em_sti: Fix variable declaration in em_sti_probe clocksource/drivers/em_sti: Convert to devm_platform_ioremap_resource clocksource/drivers/bcm2835_timer: Fix memory leak of timer clocksource/drivers/cadence-ttc: Use ttc driver as platform driver clocksource/drivers/timer-microchip-pit64b: Add Microchip PIT64B support clocksource/drivers/hyper-v: Reserve PAGE_SIZE space for tsc page ...
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/include/asm/vdso/gettimeofday.h36
-rw-r--r--arch/arm/vdso/Makefile2
-rw-r--r--arch/arm64/include/asm/vdso/compat_gettimeofday.h2
-rw-r--r--arch/mips/include/asm/vdso/gettimeofday.h2
-rw-r--r--arch/mips/vdso/Makefile4
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/entry/vdso/vdso-layout.lds.S13
-rw-r--r--arch/x86/entry/vdso/vdso2c.c3
-rw-r--r--arch/x86/entry/vdso/vma.c120
-rw-r--r--arch/x86/include/asm/vdso.h1
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h10
-rw-r--r--arch/x86/include/asm/vvar.h13
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
13 files changed, 191 insertions, 20 deletions
diff --git a/arch/arm/include/asm/vdso/gettimeofday.h b/arch/arm/include/asm/vdso/gettimeofday.h
index 0ad2429c324f..fe6e1f65932d 100644
--- a/arch/arm/include/asm/vdso/gettimeofday.h
+++ b/arch/arm/include/asm/vdso/gettimeofday.h
@@ -52,6 +52,24 @@ static __always_inline long clock_gettime_fallback(
return ret;
}
+static __always_inline long clock_gettime32_fallback(
+ clockid_t _clkid,
+ struct old_timespec32 *_ts)
+{
+ register struct old_timespec32 *ts asm("r1") = _ts;
+ register clockid_t clkid asm("r0") = _clkid;
+ register long ret asm ("r0");
+ register long nr asm("r7") = __NR_clock_gettime;
+
+ asm volatile(
+ " swi #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
static __always_inline int clock_getres_fallback(
clockid_t _clkid,
struct __kernel_timespec *_ts)
@@ -70,6 +88,24 @@ static __always_inline int clock_getres_fallback(
return ret;
}
+static __always_inline int clock_getres32_fallback(
+ clockid_t _clkid,
+ struct old_timespec32 *_ts)
+{
+ register struct old_timespec32 *ts asm("r1") = _ts;
+ register clockid_t clkid asm("r0") = _clkid;
+ register long ret asm ("r0");
+ register long nr asm("r7") = __NR_clock_getres;
+
+ asm volatile(
+ " swi #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
static __always_inline u64 __arch_get_hw_counter(int clock_mode)
{
#ifdef CONFIG_ARM_ARCH_TIMER
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index 0fda344beb0b..1babb392e70a 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -14,7 +14,7 @@ targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds
obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
ccflags-y := -fPIC -fno-common -fno-builtin -fno-stack-protector
-ccflags-y += -DDISABLE_BRANCH_PROFILING
+ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO32
ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8
ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \
diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
index c50ee1b7d5cd..537b1e695365 100644
--- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
@@ -16,7 +16,7 @@
#define VDSO_HAS_CLOCK_GETRES 1
-#define VDSO_HAS_32BIT_FALLBACK 1
+#define BUILD_VDSO32 1
static __always_inline
int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h
index 0ae9b4cbc153..a58687e26c5d 100644
--- a/arch/mips/include/asm/vdso/gettimeofday.h
+++ b/arch/mips/include/asm/vdso/gettimeofday.h
@@ -96,8 +96,6 @@ static __always_inline int clock_getres_fallback(
#if _MIPS_SIM != _MIPS_SIM_ABI64
-#define VDSO_HAS_32BIT_FALLBACK 1
-
static __always_inline long clock_gettime32_fallback(
clockid_t _clkid,
struct old_timespec32 *_ts)
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index e05938997e69..b2a2e032dc99 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -18,6 +18,10 @@ ccflags-vdso := \
$(filter -mno-loongson-%,$(KBUILD_CFLAGS)) \
-D__VDSO__
+ifndef CONFIG_64BIT
+ccflags-vdso += -DBUILD_VDSO32
+endif
+
ifdef CONFIG_CC_IS_CLANG
ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS))
endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5e8949953660..a2488c372fa1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -124,6 +124,7 @@ config X86
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
select GENERIC_GETTIMEOFDAY
+ select GENERIC_VDSO_TIME_NS
select GUP_GET_PTE_LOW_HIGH if X86_PAE
select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
select HAVE_ACPI_APEI if ACPI
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index 93c6dc7812d0..ea7e0155c604 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -16,18 +16,23 @@ SECTIONS
* segment.
*/
- vvar_start = . - 3 * PAGE_SIZE;
- vvar_page = vvar_start;
+ vvar_start = . - 4 * PAGE_SIZE;
+ vvar_page = vvar_start;
/* Place all vvars at the offsets in asm/vvar.h. */
#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
-#define __VVAR_KERNEL_LDS
#include <asm/vvar.h>
-#undef __VVAR_KERNEL_LDS
#undef EMIT_VVAR
pvclock_page = vvar_start + PAGE_SIZE;
hvclock_page = vvar_start + 2 * PAGE_SIZE;
+ timens_page = vvar_start + 3 * PAGE_SIZE;
+
+#undef _ASM_X86_VVAR_H
+ /* Place all vvars in timens too at the offsets in asm/vvar.h. */
+#define EMIT_VVAR(name, offset) timens_ ## name = timens_page + offset;
+#include <asm/vvar.h>
+#undef EMIT_VVAR
. = SIZEOF_HEADERS;
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 3a4d8d4d39f8..3842873b3ae3 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -75,12 +75,14 @@ enum {
sym_vvar_page,
sym_pvclock_page,
sym_hvclock_page,
+ sym_timens_page,
};
const int special_pages[] = {
sym_vvar_page,
sym_pvclock_page,
sym_hvclock_page,
+ sym_timens_page,
};
struct vdso_sym {
@@ -93,6 +95,7 @@ struct vdso_sym required_syms[] = {
[sym_vvar_page] = {"vvar_page", true},
[sym_pvclock_page] = {"pvclock_page", true},
[sym_hvclock_page] = {"hvclock_page", true},
+ [sym_timens_page] = {"timens_page", true},
{"VDSO32_NOTE_MASK", true},
{"__kernel_vsyscall", true},
{"__kernel_sigreturn", true},
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index f5937742b290..c1b8496b5606 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -14,16 +14,30 @@
#include <linux/elf.h>
#include <linux/cpu.h>
#include <linux/ptrace.h>
+#include <linux/time_namespace.h>
+
#include <asm/pvclock.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
#include <asm/vdso.h>
#include <asm/vvar.h>
+#include <asm/tlb.h>
#include <asm/page.h>
#include <asm/desc.h>
#include <asm/cpufeature.h>
#include <clocksource/hyperv_timer.h>
+#undef _ASM_X86_VVAR_H
+#define EMIT_VVAR(name, offset) \
+ const size_t name ## _offset = offset;
+#include <asm/vvar.h>
+
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+ return (struct vdso_data *)(vvar_page + _vdso_data_offset);
+}
+#undef EMIT_VVAR
+
#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
#endif
@@ -37,6 +51,7 @@ void __init init_vdso_image(const struct vdso_image *image)
image->alt_len));
}
+static const struct vm_special_mapping vvar_mapping;
struct linux_binprm;
static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
@@ -84,10 +99,74 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
return 0;
}
+static int vvar_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ const struct vdso_image *image = new_vma->vm_mm->context.vdso_image;
+ unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+
+ if (new_size != -image->sym_vvar_start)
+ return -EINVAL;
+
+ return 0;
+}
+
+#ifdef CONFIG_TIME_NS
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+ if (likely(vma->vm_mm == current->mm))
+ return current->nsproxy->time_ns->vvar_page;
+
+ /*
+ * VM_PFNMAP | VM_IO protect .fault() handler from being called
+ * through interfaces like /proc/$pid/mem or
+ * process_vm_{readv,writev}() as long as there's no .access()
+ * in special_mapping_vmops().
+ * For more details check_vma_flags() and __access_remote_vm()
+ */
+
+ WARN(1, "vvar_page accessed remotely");
+
+ return NULL;
+}
+
+/*
+ * The vvar page layout depends on whether a task belongs to the root or
+ * non-root time namespace. Whenever a task changes its namespace, the VVAR
+ * page tables are cleared and then they will re-faulted with a
+ * corresponding layout.
+ * See also the comment near timens_setup_vdso_data() for details.
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+ struct mm_struct *mm = task->mm;
+ struct vm_area_struct *vma;
+
+ if (down_write_killable(&mm->mmap_sem))
+ return -EINTR;
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ unsigned long size = vma->vm_end - vma->vm_start;
+
+ if (vma_is_special_mapping(vma, &vvar_mapping))
+ zap_page_range(vma, vma->vm_start, size);
+ }
+
+ up_write(&mm->mmap_sem);
+ return 0;
+}
+#else
+static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+ return NULL;
+}
+#endif
+
static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+ unsigned long pfn;
long sym_offset;
if (!image)
@@ -107,8 +186,36 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
return VM_FAULT_SIGBUS;
if (sym_offset == image->sym_vvar_page) {
- return vmf_insert_pfn(vma, vmf->address,
- __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
+ struct page *timens_page = find_timens_vvar_page(vma);
+
+ pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
+
+ /*
+ * If a task belongs to a time namespace then a namespace
+ * specific VVAR is mapped with the sym_vvar_page offset and
+ * the real VVAR page is mapped with the sym_timens_page
+ * offset.
+ * See also the comment near timens_setup_vdso_data().
+ */
+ if (timens_page) {
+ unsigned long addr;
+ vm_fault_t err;
+
+ /*
+ * Optimization: inside time namespace pre-fault
+ * VVAR page too. As on timens page there are only
+ * offsets for clocks on VVAR, it'll be faulted
+ * shortly by VDSO code.
+ */
+ addr = vmf->address + (image->sym_timens_page - sym_offset);
+ err = vmf_insert_pfn(vma, addr, pfn);
+ if (unlikely(err & VM_FAULT_ERROR))
+ return err;
+
+ pfn = page_to_pfn(timens_page);
+ }
+
+ return vmf_insert_pfn(vma, vmf->address, pfn);
} else if (sym_offset == image->sym_pvclock_page) {
struct pvclock_vsyscall_time_info *pvti =
pvclock_get_pvti_cpu0_va();
@@ -123,6 +230,14 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
return vmf_insert_pfn(vma, vmf->address,
virt_to_phys(tsc_pg) >> PAGE_SHIFT);
+ } else if (sym_offset == image->sym_timens_page) {
+ struct page *timens_page = find_timens_vvar_page(vma);
+
+ if (!timens_page)
+ return VM_FAULT_SIGBUS;
+
+ pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
+ return vmf_insert_pfn(vma, vmf->address, pfn);
}
return VM_FAULT_SIGBUS;
@@ -136,6 +251,7 @@ static const struct vm_special_mapping vdso_mapping = {
static const struct vm_special_mapping vvar_mapping = {
.name = "[vvar]",
.fault = vvar_fault,
+ .mremap = vvar_mremap,
};
/*
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 230474e2ddb5..bbcdc7b8f963 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -21,6 +21,7 @@ struct vdso_image {
long sym_vvar_page;
long sym_pvclock_page;
long sym_hvclock_page;
+ long sym_timens_page;
long sym_VDSO32_NOTE_MASK;
long sym___kernel_sigreturn;
long sym___kernel_rt_sigreturn;
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index e9ee139cf29e..6ee1f7dba34b 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -21,6 +21,7 @@
#include <clocksource/hyperv_timer.h>
#define __vdso_data (VVAR(_vdso_data))
+#define __timens_vdso_data (TIMENS(_vdso_data))
#define VDSO_HAS_TIME 1
@@ -56,6 +57,13 @@ extern struct ms_hyperv_tsc_page hvclock_page
__attribute__((visibility("hidden")));
#endif
+#ifdef CONFIG_TIME_NS
+static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
+{
+ return __timens_vdso_data;
+}
+#endif
+
#ifndef BUILD_VDSO32
static __always_inline
@@ -96,8 +104,6 @@ long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
#else
-#define VDSO_HAS_32BIT_FALLBACK 1
-
static __always_inline
long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
{
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 32f5d9a0b90e..183e98e49ab9 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -19,10 +19,10 @@
#ifndef _ASM_X86_VVAR_H
#define _ASM_X86_VVAR_H
-#if defined(__VVAR_KERNEL_LDS)
-
-/* The kernel linker script defines its own magic to put vvars in the
- * right place.
+#ifdef EMIT_VVAR
+/*
+ * EMIT_VVAR() is used by the kernel linker script to put vvars in the
+ * right place. Also, it's used by kernel code to import offsets values.
*/
#define DECLARE_VVAR(offset, type, name) \
EMIT_VVAR(name, offset)
@@ -33,9 +33,12 @@ extern char __vvar_page;
#define DECLARE_VVAR(offset, type, name) \
extern type vvar_ ## name[CS_BASES] \
- __attribute__((visibility("hidden")));
+ __attribute__((visibility("hidden"))); \
+ extern type timens_ ## name[CS_BASES] \
+ __attribute__((visibility("hidden"))); \
#define VVAR(name) (vvar_ ## name)
+#define TIMENS(name) (timens_ ## name)
#define DEFINE_VVAR(type, name) \
type name[CS_BASES] \
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3a1a819da137..e3296aa028fe 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -193,12 +193,10 @@ SECTIONS
__vvar_beginning_hack = .;
/* Place all vvars at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset) \
+#define EMIT_VVAR(name, offset) \
. = __vvar_beginning_hack + offset; \
*(.vvar_ ## name)
-#define __VVAR_KERNEL_LDS
#include <asm/vvar.h>
-#undef __VVAR_KERNEL_LDS
#undef EMIT_VVAR
/*