From 00b26474c2f1613d7ab894c525f775c67c8a9e8f Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 21 Jun 2019 10:52:29 +0100 Subject: lib/vdso: Provide generic VDSO implementation In the last few years the kernel gained quite some architecture specific vdso implementations which contain very similar code. Introduce a generic VDSO implementation of gettimeofday() which will be shareable between architectures once they are converted over. The implementation is based on the current x86 VDSO code. [ tglx: Massaged changelog and made the kernel doc tabular ] Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Tested-by: Shijith Thotton Tested-by: Andre Przywara Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Link: https://lkml.kernel.org/r/20190621095252.32307-3-vincenzo.frascino@arm.com --- lib/Kconfig | 5 ++ lib/vdso/Kconfig | 36 ++++++++ lib/vdso/Makefile | 22 +++++ lib/vdso/gettimeofday.c | 224 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 287 insertions(+) create mode 100644 lib/vdso/Kconfig create mode 100644 lib/vdso/Makefile create mode 100644 lib/vdso/gettimeofday.c (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index 90623a0e1942..8c8eefc5e54c 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -576,6 +576,11 @@ config OID_REGISTRY config UCS2_STRING tristate +# +# generic vdso +# +source "lib/vdso/Kconfig" + source "lib/fonts/Kconfig" config SG_SPLIT diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig new file mode 100644 index 000000000000..cc00364bd2c2 --- /dev/null +++ b/lib/vdso/Kconfig @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: GPL-2.0 + +config HAVE_GENERIC_VDSO + bool + +if HAVE_GENERIC_VDSO + +config GENERIC_GETTIMEOFDAY + bool + help + This is a generic implementation of gettimeofday vdso. + Each architecture that enables this feature has to + provide the fallback implementation. + +config GENERIC_VDSO_32 + bool + depends on GENERIC_GETTIMEOFDAY && !64BIT + help + This config option helps to avoid possible performance issues + in 32 bit only architectures. + +config GENERIC_COMPAT_VDSO + bool + help + This config option enables the compat VDSO layer. + +config CROSS_COMPILE_COMPAT_VDSO + string "32 bit Toolchain prefix for compat vDSO" + default "" + depends on GENERIC_COMPAT_VDSO + help + Defines the cross-compiler prefix for compiling compat vDSO. + If a 64 bit compiler (i.e. x86_64) can compile the VDSO for + 32 bit, it does not need to define this parameter. + +endif diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile new file mode 100644 index 000000000000..c415a685d61b --- /dev/null +++ b/lib/vdso/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 + +GENERIC_VDSO_MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) +GENERIC_VDSO_DIR := $(dir $(GENERIC_VDSO_MK_PATH)) + +c-gettimeofday-$(CONFIG_GENERIC_GETTIMEOFDAY) := $(addprefix $(GENERIC_VDSO_DIR), gettimeofday.c) + +# This cmd checks that the vdso library does not contain absolute relocation +# It has to be called after the linking of the vdso library and requires it +# as a parameter. +# +# $(ARCH_REL_TYPE_ABS) is defined in the arch specific makefile and corresponds +# to the absolute relocation types printed by "objdump -R" and accepted by the +# dynamic linker. +ifndef ARCH_REL_TYPE_ABS +$(error ARCH_REL_TYPE_ABS is not set) +endif + +quiet_cmd_vdso_check = VDSOCHK $@ + cmd_vdso_check = if $(OBJDUMP) -R $@ | egrep -h "$(ARCH_REL_TYPE_ABS)"; \ + then (echo >&2 "$@: dynamic relocations are not supported"; \ + rm -f $@; /bin/false); fi diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c new file mode 100644 index 000000000000..767d3a0bcb06 --- /dev/null +++ b/lib/vdso/gettimeofday.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generic userspace implementations of gettimeofday() and similar. + */ +#include +#include +#include +#include +#include +#include +#include + +/* + * The generic vDSO implementation requires that gettimeofday.h + * provides: + * - __arch_get_vdso_data(): to get the vdso datapage. + * - __arch_get_hw_counter(): to get the hw counter based on the + * clock_mode. + * - gettimeofday_fallback(): fallback for gettimeofday. + * - clock_gettime_fallback(): fallback for clock_gettime. + * - clock_getres_fallback(): fallback for clock_getres. + */ +#include + +static int do_hres(const struct vdso_data *vd, clockid_t clk, + struct __kernel_timespec *ts) +{ + const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; + u64 cycles, last, sec, ns; + u32 seq; + + do { + seq = vdso_read_begin(vd); + cycles = __arch_get_hw_counter(vd->clock_mode) & + vd->mask; + ns = vdso_ts->nsec; + last = vd->cycle_last; + if (unlikely((s64)cycles < 0)) + return clock_gettime_fallback(clk, ts); + if (cycles > last) + ns += (cycles - last) * vd->mult; + ns >>= vd->shift; + sec = vdso_ts->sec; + } while (unlikely(vdso_read_retry(vd, seq))); + + /* + * Do this outside the loop: a race inside the loop could result + * in __iter_div_u64_rem() being extremely slow. + */ + ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; + + return 0; +} + +static void do_coarse(const struct vdso_data *vd, clockid_t clk, + struct __kernel_timespec *ts) +{ + const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; + u32 seq; + + do { + seq = vdso_read_begin(vd); + ts->tv_sec = vdso_ts->sec; + ts->tv_nsec = vdso_ts->nsec; + } while (unlikely(vdso_read_retry(vd, seq))); +} + +static __maybe_unused int +__cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) +{ + const struct vdso_data *vd = __arch_get_vdso_data(); + u32 msk; + + /* Check for negative values or invalid clocks */ + if (unlikely((u32) clock >= MAX_CLOCKS)) + goto fallback; + + /* + * Convert the clockid to a bitmask and use it to check which + * clocks are handled in the VDSO directly. + */ + msk = 1U << clock; + if (likely(msk & VDSO_HRES)) { + return do_hres(&vd[CS_HRES_COARSE], clock, ts); + } else if (msk & VDSO_COARSE) { + do_coarse(&vd[CS_HRES_COARSE], clock, ts); + return 0; + } else if (msk & VDSO_RAW) { + return do_hres(&vd[CS_RAW], clock, ts); + } + +fallback: + return clock_gettime_fallback(clock, ts); +} + +static __maybe_unused int +__cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res) +{ + struct __kernel_timespec ts; + int ret; + + if (res == NULL) + goto fallback; + + ret = __cvdso_clock_gettime(clock, &ts); + + if (ret == 0) { + res->tv_sec = ts.tv_sec; + res->tv_nsec = ts.tv_nsec; + } + + return ret; + +fallback: + return clock_gettime_fallback(clock, (struct __kernel_timespec *)res); +} + +static __maybe_unused int +__cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) +{ + const struct vdso_data *vd = __arch_get_vdso_data(); + + if (likely(tv != NULL)) { + struct __kernel_timespec ts; + + if (do_hres(&vd[CS_HRES_COARSE], CLOCK_REALTIME, &ts)) + return gettimeofday_fallback(tv, tz); + + tv->tv_sec = ts.tv_sec; + tv->tv_usec = (u32)ts.tv_nsec / NSEC_PER_USEC; + } + + if (unlikely(tz != NULL)) { + tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest; + tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime; + } + + return 0; +} + +#ifdef VDSO_HAS_TIME +static __maybe_unused time_t __cvdso_time(time_t *time) +{ + const struct vdso_data *vd = __arch_get_vdso_data(); + time_t t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec); + + if (time) + *time = t; + + return t; +} +#endif /* VDSO_HAS_TIME */ + +#ifdef VDSO_HAS_CLOCK_GETRES +static __maybe_unused +int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res) +{ + const struct vdso_data *vd = __arch_get_vdso_data(); + u64 ns; + u32 msk; + u64 hrtimer_res = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res); + + /* Check for negative values or invalid clocks */ + if (unlikely((u32) clock >= MAX_CLOCKS)) + goto fallback; + + /* + * Convert the clockid to a bitmask and use it to check which + * clocks are handled in the VDSO directly. + */ + msk = 1U << clock; + if (msk & VDSO_HRES) { + /* + * Preserves the behaviour of posix_get_hrtimer_res(). + */ + ns = hrtimer_res; + } else if (msk & VDSO_COARSE) { + /* + * Preserves the behaviour of posix_get_coarse_res(). + */ + ns = LOW_RES_NSEC; + } else if (msk & VDSO_RAW) { + /* + * Preserves the behaviour of posix_get_hrtimer_res(). + */ + ns = hrtimer_res; + } else { + goto fallback; + } + + if (res) { + res->tv_sec = 0; + res->tv_nsec = ns; + } + + return 0; + +fallback: + return clock_getres_fallback(clock, res); +} + +static __maybe_unused int +__cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res) +{ + struct __kernel_timespec ts; + int ret; + + if (res == NULL) + goto fallback; + + ret = __cvdso_clock_getres(clock, &ts); + + if (ret == 0) { + res->tv_sec = ts.tv_sec; + res->tv_nsec = ts.tv_nsec; + } + + return ret; + +fallback: + return clock_getres_fallback(clock, (struct __kernel_timespec *)res); +} +#endif /* VDSO_HAS_CLOCK_GETRES */ -- cgit v1.2.3-59-g8ed1b From 629fdf77ac4584b73bf3a7a07f5fc5ab0d27afdc Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 21 Jun 2019 10:52:36 +0100 Subject: lib/vdso: Add compat support Some 64 bit architectures have support for 32 bit applications that require a separate version of the vDSOs. Add support to the generic code for compat fallback functions. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Tested-by: Shijith Thotton Tested-by: Andre Przywara Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Link: https://lkml.kernel.org/r/20190621095252.32307-10-vincenzo.frascino@arm.com --- lib/vdso/gettimeofday.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 767d3a0bcb06..ef28cc5d7bff 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -20,7 +20,11 @@ * - clock_gettime_fallback(): fallback for clock_gettime. * - clock_getres_fallback(): fallback for clock_getres. */ +#ifdef ENABLE_COMPAT_VDSO +#include +#else #include +#endif /* ENABLE_COMPAT_VDSO */ static int do_hres(const struct vdso_data *vd, clockid_t clk, struct __kernel_timespec *ts) -- cgit v1.2.3-59-g8ed1b From 9d90b93bf325e015bbae31b83f16da5e4e17effa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 26 Jun 2019 12:02:00 +0200 Subject: lib/vdso: Make delta calculation work correctly The x86 vdso implementation on which the generic vdso library is based on has subtle (unfortunately undocumented) twists: 1) The code assumes that the clocksource mask is U64_MAX which means that no bits are masked. Which is true for any valid x86 VDSO clocksource. Stupidly it still did the mask operation for no reason and at the wrong place right after reading the clocksource. 2) It contains a sanity check to catch the case where slightly unsynchronized TSC values can be observed which would cause the delta calculation to make a huge jump. It therefore checks whether the current TSC value is larger than the value on which the current conversion is based on. If it's not larger the base value is used to prevent time jumps. #1 Is not only stupid for the X86 case because it does the masking for no reason it is also completely wrong for clocksources with a smaller mask which can legitimately wrap around during a conversion period. The core timekeeping code does it correct by applying the mask after the delta calculation: (now - base) & mask #2 is equally broken for clocksources which have smaller masks and can wrap around during a conversion period because there the now > base check is just wrong and causes stale time stamps and time going backwards issues. Unbreak it by: 1) Removing the mask operation from the clocksource read which makes the fallback detection work for all clocksources 2) Replacing the conditional delta calculation with a overrideable inline function. #2 could reuse clocksource_delta() from the timekeeping code but that results in a significant performance hit for the x86 VSDO. The timekeeping core code must have the non optimized version as it has to operate correctly with clocksources which have smaller masks as well to handle the case where TSC is discarded as timekeeper clocksource and replaced by HPET or pmtimer. For the VDSO there is no replacement clocksource. If TSC is unusable the syscall is enforced which does the right thing. To accommodate to the needs of various architectures provide an override-able inline function which defaults to the regular delta calculation with masking: (now - base) & mask Override it for x86 with the non-masking and checking version. This unbreaks the ARM64 syscall fallback operation, allows to use clocksources with arbitrary width and preserves the performance optimization for x86. Signed-off-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Cc: linux-arch@vger.kernel.org Cc: LAK Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: catalin.marinas@arm.com Cc: Will Deacon Cc: Arnd Bergmann Cc: linux@armlinux.org.uk Cc: Ralf Baechle Cc: paul.burton@mips.com Cc: Daniel Lezcano Cc: salyzyn@android.com Cc: pcc@google.com Cc: shuah@kernel.org Cc: 0x7f454c46@gmail.com Cc: linux@rasmusvillemoes.dk Cc: huw@codeweavers.com Cc: sthotton@marvell.com Cc: andre.przywara@arm.com Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1906261159230.32342@nanos.tec.linutronix.de --- arch/x86/include/asm/vdso/gettimeofday.h | 27 +++++++++++++++++++++++++++ lib/vdso/gettimeofday.c | 19 +++++++++++++++---- 2 files changed, 42 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 5b63f1f78a1f..a14039a59abd 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -229,6 +229,33 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) return __vdso_data; } +/* + * x86 specific delta calculation. + * + * The regular implementation assumes that clocksource reads are globally + * monotonic. The TSC can be slightly off across sockets which can cause + * the regular delta calculation (@cycles - @last) to return a huge time + * jump. + * + * Therefore it needs to be verified that @cycles are greater than + * @last. If not then use @last, which is the base time of the current + * conversion period. + * + * This variant also removes the masking of the subtraction because the + * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX + * which would result in a pointless operation. The compiler cannot + * optimize it away as the mask comes from the vdso data and is not compile + * time constant. + */ +static __always_inline +u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +{ + if (cycles > last) + return (cycles - last) * mult; + return 0; +} +#define vdso_calc_delta vdso_calc_delta + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index ef28cc5d7bff..2d1c1f241fd9 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -26,6 +26,18 @@ #include #endif /* ENABLE_COMPAT_VDSO */ +#ifndef vdso_calc_delta +/* + * Default implementation which works for all sane clocksources. That + * obviously excludes x86/TSC. + */ +static __always_inline +u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +{ + return ((cycles - last) & mask) * mult; +} +#endif + static int do_hres(const struct vdso_data *vd, clockid_t clk, struct __kernel_timespec *ts) { @@ -35,14 +47,13 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk, do { seq = vdso_read_begin(vd); - cycles = __arch_get_hw_counter(vd->clock_mode) & - vd->mask; + cycles = __arch_get_hw_counter(vd->clock_mode); ns = vdso_ts->nsec; last = vd->cycle_last; if (unlikely((s64)cycles < 0)) return clock_gettime_fallback(clk, ts); - if (cycles > last) - ns += (cycles - last) * vd->mult; + + ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); ns >>= vd->shift; sec = vdso_ts->sec; } while (unlikely(vdso_read_retry(vd, seq))); -- cgit v1.2.3-59-g8ed1b