From 5d51bee725cc1497352d6b0b604e42a90c680540 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2020 13:38:55 +0100 Subject: clocksource: Add common vdso clock mode storage All architectures which use the generic VDSO code have their own storage for the VDSO clock mode. That's pointless and just requires duplicate code. Provide generic storage for it. The new Kconfig symbol is intermediate and will be removed once all architectures are converted over. Signed-off-by: Thomas Gleixner Tested-by: Vincenzo Frascino Reviewed-by: Vincenzo Frascino Link: https://lkml.kernel.org/r/20200207124403.028046322@linutronix.de --- kernel/time/clocksource.c | 9 +++++++++ kernel/time/vsyscall.c | 10 ++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 428beb69426a..7cb09c4cf21c 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -928,6 +928,15 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) clocksource_arch_init(cs); +#ifdef CONFIG_GENERIC_VDSO_CLOCK_MODE + if (cs->vdso_clock_mode < 0 || + cs->vdso_clock_mode >= VDSO_CLOCKMODE_MAX) { + pr_warn("clocksource %s registered with invalid VDSO mode %d. Disabling VDSO support.\n", + cs->name, cs->vdso_clock_mode); + cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE; + } +#endif + /* Initialize mult/shift and max_idle_ns */ __clocksource_update_freq_scale(cs, scale, freq); diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 9577c89179cd..f9a5178c69bb 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -71,13 +71,19 @@ void update_vsyscall(struct timekeeper *tk) { struct vdso_data *vdata = __arch_get_k_vdso_data(); struct vdso_timestamp *vdso_ts; + s32 clock_mode; u64 nsec; /* copy vsyscall data */ vdso_write_begin(vdata); - vdata[CS_HRES_COARSE].clock_mode = __arch_get_clock_mode(tk); - vdata[CS_RAW].clock_mode = __arch_get_clock_mode(tk); +#ifdef CONFIG_GENERIC_VDSO_CLOCK_MODE + clock_mode = tk->tkr_mono.clock->vdso_clock_mode; +#else + clock_mode = __arch_get_clock_mode(tk); +#endif + vdata[CS_HRES_COARSE].clock_mode = clock_mode; + vdata[CS_RAW].clock_mode = clock_mode; /* CLOCK_REALTIME also required for time() */ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME]; -- cgit v1.2.3-59-g8ed1b From f86fd32db706613fe8d0104057efa6e83e0d7e8f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2020 13:38:59 +0100 Subject: lib/vdso: Cleanup clock mode storage leftovers Now that all architectures are converted to use the generic storage the helpers and conditionals can be removed. Signed-off-by: Thomas Gleixner Tested-by: Vincenzo Frascino Reviewed-by: Vincenzo Frascino Link: https://lkml.kernel.org/r/20200207124403.470699892@linutronix.de --- arch/arm/mm/Kconfig | 1 - arch/arm64/Kconfig | 1 - arch/mips/Kconfig | 1 - arch/x86/Kconfig | 1 - include/asm-generic/vdso/vsyscall.h | 7 ------- include/linux/clocksource.h | 4 ++-- kernel/time/vsyscall.c | 4 ---- lib/vdso/Kconfig | 3 --- lib/vdso/gettimeofday.c | 17 +++++------------ 9 files changed, 7 insertions(+), 32 deletions(-) (limited to 'kernel') diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 865e888bb84f..65e4482e3849 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -900,7 +900,6 @@ config VDSO select GENERIC_TIME_VSYSCALL select GENERIC_VDSO_32 select GENERIC_GETTIMEOFDAY - select GENERIC_VDSO_CLOCK_MODE help Place in the process address space an ELF shared object providing fast implementations of gettimeofday and diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7809d4976269..c6c32fb7f546 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -110,7 +110,6 @@ config ARM64 select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY - select GENERIC_VDSO_CLOCK_MODE select HANDLE_DOMAIN_IRQ select HARDIRQS_SW_RESEND select HAVE_PCI diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 23b5c0578776..654369a7209d 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -37,7 +37,6 @@ config MIPS select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL - select GENERIC_VDSO_CLOCK_MODE select GUP_GET_PTE_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT select HANDLE_DOMAIN_IRQ select HAVE_ARCH_COMPILER_H diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 698e9c835cc5..8b995db3d10f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -125,7 +125,6 @@ config X86 select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY - select GENERIC_VDSO_CLOCK_MODE select GENERIC_VDSO_TIME_NS select GUP_GET_PTE_LOW_HIGH if X86_PAE select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index cec543d9e87b..4a28797495d7 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -18,13 +18,6 @@ static __always_inline bool __arch_update_vdso_data(void) } #endif /* __arch_update_vdso_data */ -#ifndef __arch_get_clock_mode -static __always_inline int __arch_get_clock_mode(struct timekeeper *tk) -{ - return 0; -} -#endif /* __arch_get_clock_mode */ - #ifndef __arch_update_vsyscall static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 6d5ed1b4d24d..7fefe0b21a14 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -24,13 +24,13 @@ struct clocksource; struct module; #if defined(CONFIG_ARCH_CLOCKSOURCE_DATA) || \ - defined(CONFIG_GENERIC_VDSO_CLOCK_MODE) + defined(CONFIG_GENERIC_GETTIMEOFDAY) #include #endif enum vdso_clock_mode { VDSO_CLOCKMODE_NONE, -#ifdef CONFIG_GENERIC_VDSO_CLOCK_MODE +#ifdef CONFIG_GENERIC_GETTIMEOFDAY VDSO_ARCH_CLOCKMODES, #endif VDSO_CLOCKMODE_MAX, diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index f9a5178c69bb..d31a5ef4ade5 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -77,11 +77,7 @@ void update_vsyscall(struct timekeeper *tk) /* copy vsyscall data */ vdso_write_begin(vdata); -#ifdef CONFIG_GENERIC_VDSO_CLOCK_MODE clock_mode = tk->tkr_mono.clock->vdso_clock_mode; -#else - clock_mode = __arch_get_clock_mode(tk); -#endif vdata[CS_HRES_COARSE].clock_mode = clock_mode; vdata[CS_RAW].clock_mode = clock_mode; diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig index d9f43c84fcc6..d883ac299508 100644 --- a/lib/vdso/Kconfig +++ b/lib/vdso/Kconfig @@ -30,7 +30,4 @@ config GENERIC_VDSO_TIME_NS Selected by architectures which support time namespaces in the VDSO -config GENERIC_VDSO_CLOCK_MODE - bool - endif diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 3f2d8b859130..00f8d1f1405b 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -65,16 +65,13 @@ static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, do { seq = vdso_read_begin(vd); - if (IS_ENABLED(CONFIG_GENERIC_VDSO_CLOCK_MODE) && - vd->clock_mode == VDSO_CLOCKMODE_NONE) + + if (unlikely(vd->clock_mode == VDSO_CLOCKMODE_NONE)) return -1; + cycles = __arch_get_hw_counter(vd->clock_mode); ns = vdso_ts->nsec; last = vd->cycle_last; - if (!IS_ENABLED(CONFIG_GENERIC_VDSO_CLOCK_MODE) && - unlikely((s64)cycles < 0)) - return -1; - ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); ns >>= vd->shift; sec = vdso_ts->sec; @@ -137,16 +134,12 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, } smp_rmb(); - if (IS_ENABLED(CONFIG_GENERIC_VDSO_CLOCK_MODE) && - vd->clock_mode == VDSO_CLOCKMODE_NONE) + if (unlikely(vd->clock_mode == VDSO_CLOCKMODE_NONE)) return -1; + cycles = __arch_get_hw_counter(vd->clock_mode); ns = vdso_ts->nsec; last = vd->cycle_last; - if (!IS_ENABLED(CONFIG_GENERIC_VDSO_CLOCK_MODE) && - unlikely((s64)cycles < 0)) - return -1; - ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); ns >>= vd->shift; sec = vdso_ts->sec; -- cgit v1.2.3-59-g8ed1b From c7a18100bdffdff440c7291db6e80863fab0461e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2020 13:39:00 +0100 Subject: lib/vdso: Avoid highres update if clocksource is not VDSO capable If the current clocksource is not VDSO capable there is no point in updating the high resolution parts of the VDSO data. Replace the architecture specific check with a check for a VDSO capable clocksource and skip the update if there is none. Signed-off-by: Thomas Gleixner Tested-by: Vincenzo Frascino Reviewed-by: Vincenzo Frascino Link: https://lkml.kernel.org/r/20200207124403.563379423@linutronix.de --- arch/arm/include/asm/vdso/vsyscall.h | 7 ------- include/asm-generic/vdso/vsyscall.h | 7 ------- kernel/time/vsyscall.c | 6 +++--- 3 files changed, 3 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/arch/arm/include/asm/vdso/vsyscall.h b/arch/arm/include/asm/vdso/vsyscall.h index 002f9edd8a2b..47e41ae8ccd0 100644 --- a/arch/arm/include/asm/vdso/vsyscall.h +++ b/arch/arm/include/asm/vdso/vsyscall.h @@ -21,13 +21,6 @@ struct vdso_data *__arm_get_k_vdso_data(void) } #define __arch_get_k_vdso_data __arm_get_k_vdso_data -static __always_inline -bool __arm_update_vdso_data(void) -{ - return cntvct_ok; -} -#define __arch_update_vdso_data __arm_update_vdso_data - static __always_inline void __arm_sync_vdso_data(struct vdso_data *vdata) { diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index 4a28797495d7..c835607f78ae 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -11,13 +11,6 @@ static __always_inline struct vdso_data *__arch_get_k_vdso_data(void) } #endif /* __arch_get_k_vdso_data */ -#ifndef __arch_update_vdso_data -static __always_inline bool __arch_update_vdso_data(void) -{ - return true; -} -#endif /* __arch_update_vdso_data */ - #ifndef __arch_update_vsyscall static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index d31a5ef4ade5..54ce6eb2ca36 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -105,10 +105,10 @@ void update_vsyscall(struct timekeeper *tk) WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution); /* - * Architectures can opt out of updating the high resolution part - * of the VDSO. + * If the current clocksource is not VDSO capable, then spare the + * update of the high reolution parts. */ - if (__arch_update_vdso_data()) + if (clock_mode != VDSO_CLOCKMODE_NONE) update_vdso_data(vdata, tk); __arch_update_vsyscall(vdata, tk); -- cgit v1.2.3-59-g8ed1b From 2d6b01bd88ccabba06d342ef80eaab6b39d12497 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2020 13:39:01 +0100 Subject: lib/vdso: Move VCLOCK_TIMENS to vdso_clock_modes Move the time namespace indicator clock mode to the other ones for consistency sake. Signed-off-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Link: https://lkml.kernel.org/r/20200207124403.656097274@linutronix.de --- include/linux/clocksource.h | 3 +++ include/vdso/datapage.h | 2 -- kernel/time/namespace.c | 7 ++++--- lib/vdso/gettimeofday.c | 18 ++++++++++-------- 4 files changed, 17 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 7fefe0b21a14..02e3282719bd 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -34,6 +34,9 @@ enum vdso_clock_mode { VDSO_ARCH_CLOCKMODES, #endif VDSO_CLOCKMODE_MAX, + + /* Indicator for time namespace VDSO */ + VDSO_CLOCKMODE_TIMENS = INT_MAX }; /** diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index c5f347cc5e55..30c4cb0428d1 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -21,8 +21,6 @@ #define CS_RAW 1 #define CS_BASES (CS_RAW + 1) -#define VCLOCK_TIMENS UINT_MAX - /** * struct vdso_timestamp - basetime per clock_id * @sec: seconds diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 12858507d75a..e6ba064ce773 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -172,8 +173,8 @@ static struct timens_offset offset_from_ts(struct timespec64 off) * for vdso_data->clock_mode is a non-issue. The task is spin waiting for the * update to finish and for 'seq' to become even anyway. * - * Timens page has vdso_data->clock_mode set to VCLOCK_TIMENS which enforces - * the time namespace handling path. + * Timens page has vdso_data->clock_mode set to VDSO_CLOCKMODE_TIMENS which + * enforces the time namespace handling path. */ static void timens_setup_vdso_data(struct vdso_data *vdata, struct time_namespace *ns) @@ -183,7 +184,7 @@ static void timens_setup_vdso_data(struct vdso_data *vdata, struct timens_offset boottime = offset_from_ts(ns->offsets.boottime); vdata->seq = 1; - vdata->clock_mode = VCLOCK_TIMENS; + vdata->clock_mode = VDSO_CLOCKMODE_TIMENS; offset[CLOCK_MONOTONIC] = monotonic; offset[CLOCK_MONOTONIC_RAW] = monotonic; offset[CLOCK_MONOTONIC_COARSE] = monotonic; diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 00f8d1f1405b..a76ac8d17c5f 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -116,10 +116,10 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, do { /* - * Open coded to handle VCLOCK_TIMENS. Time namespace + * Open coded to handle VDSO_CLOCKMODE_TIMENS. Time namespace * enabled tasks have a special VVAR page installed which * has vd->seq set to 1 and vd->clock_mode set to - * VCLOCK_TIMENS. For non time namespace affected tasks + * VDSO_CLOCKMODE_TIMENS. For non time namespace affected tasks * this does not affect performance because if vd->seq is * odd, i.e. a concurrent update is in progress the extra * check for vd->clock_mode is just a few extra @@ -128,7 +128,7 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, */ while (unlikely((seq = READ_ONCE(vd->seq)) & 1)) { if (IS_ENABLED(CONFIG_TIME_NS) && - vd->clock_mode == VCLOCK_TIMENS) + vd->clock_mode == VDSO_CLOCKMODE_TIMENS) return do_hres_timens(vd, clk, ts); cpu_relax(); } @@ -200,12 +200,12 @@ static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk, do { /* - * Open coded to handle VCLOCK_TIMENS. See comment in + * Open coded to handle VDSO_CLOCK_TIMENS. See comment in * do_hres(). */ while ((seq = READ_ONCE(vd->seq)) & 1) { if (IS_ENABLED(CONFIG_TIME_NS) && - vd->clock_mode == VCLOCK_TIMENS) + vd->clock_mode == VDSO_CLOCKMODE_TIMENS) return do_coarse_timens(vd, clk, ts); cpu_relax(); } @@ -292,7 +292,7 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) if (unlikely(tz != NULL)) { if (IS_ENABLED(CONFIG_TIME_NS) && - vd->clock_mode == VCLOCK_TIMENS) + vd->clock_mode == VDSO_CLOCKMODE_TIMENS) vd = __arch_get_timens_vdso_data(); tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest; @@ -308,7 +308,8 @@ static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time const struct vdso_data *vd = __arch_get_vdso_data(); __kernel_old_time_t t; - if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VCLOCK_TIMENS) + if (IS_ENABLED(CONFIG_TIME_NS) && + vd->clock_mode == VDSO_CLOCKMODE_TIMENS) vd = __arch_get_timens_vdso_data(); t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec); @@ -332,7 +333,8 @@ int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res) if (unlikely((u32) clock >= MAX_CLOCKS)) return -1; - if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VCLOCK_TIMENS) + if (IS_ENABLED(CONFIG_TIME_NS) && + vd->clock_mode == VDSO_CLOCKMODE_TIMENS) vd = __arch_get_timens_vdso_data(); /* -- cgit v1.2.3-59-g8ed1b From 6e317c32fd39a13e4854a27958d5e35d15d196be Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Sat, 18 Jan 2020 01:59:00 +0300 Subject: timer: Improve the comment describing schedule_timeout() When working commit 6dcd5d7a7a29c1e, a mistake was noticed by Linus: schedule_timeout() was called without setting the task state to anything particular. It calls the scheduler, but doesn't delay anything, because the task stays runnable. That happens because sched_submit_work() does nothing for tasks in TASK_RUNNING state. That turned out to be the intended behavior. Adding a WARN() is not useful as the task could be woken up right after setting the state and before reaching schedule_timeout(). Improve the comment about schedule_timeout() and describe that more explicitly. Signed-off-by: Alexander Popov Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200117225900.16340-1-alex.popov@linux.com --- kernel/time/timer.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 4820823515e9..cb34fac9d9f7 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1828,21 +1828,23 @@ static void process_timeout(struct timer_list *t) * schedule_timeout - sleep until timeout * @timeout: timeout value in jiffies * - * Make the current task sleep until @timeout jiffies have - * elapsed. The routine will return immediately unless - * the current task state has been set (see set_current_state()). + * Make the current task sleep until @timeout jiffies have elapsed. + * The function behavior depends on the current task state + * (see also set_current_state() description): * - * You can set the task state as follows - + * %TASK_RUNNING - the scheduler is called, but the task does not sleep + * at all. That happens because sched_submit_work() does nothing for + * tasks in %TASK_RUNNING state. * * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to * pass before the routine returns unless the current task is explicitly - * woken up, (e.g. by wake_up_process())". + * woken up, (e.g. by wake_up_process()). * * %TASK_INTERRUPTIBLE - the routine may return early if a signal is * delivered to the current task or the current task is explicitly woken * up. * - * The current task state is guaranteed to be TASK_RUNNING when this + * The current task state is guaranteed to be %TASK_RUNNING when this * routine returns. * * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule @@ -1850,7 +1852,7 @@ static void process_timeout(struct timer_list *t) * value will be %MAX_SCHEDULE_TIMEOUT. * * Returns 0 when the timer has expired otherwise the remaining time in - * jiffies will be returned. In all cases the return value is guaranteed + * jiffies will be returned. In all cases the return value is guaranteed * to be non-negative. */ signed long __sched schedule_timeout(signed long timeout) -- cgit v1.2.3-59-g8ed1b From 5fb1c2a5bbf79ccca8d17cf97f66085be5808027 Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Sun, 16 Feb 2020 13:13:30 +0530 Subject: posix-timers: Pass lockdep expression to RCU lists head is traversed using hlist_for_each_entry_rcu outside an RCU read-side critical section but under the protection of hash_lock. Hence, add corresponding lockdep expression to silence false-positive lockdep warnings, and harden RCU lists. [ tglx: Removed the macro and put the condition right where it's used ] Signed-off-by: Amol Grover Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200216074330.GA14025@workstation-portable --- kernel/time/posix-timers.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index ff0eb30de346..07709ac30439 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -121,7 +121,8 @@ static struct k_itimer *__posix_timers_find(struct hlist_head *head, { struct k_itimer *timer; - hlist_for_each_entry_rcu(timer, head, t_hash) { + hlist_for_each_entry_rcu(timer, head, t_hash, + lockdep_is_held(&hash_lock)) { if ((timer->it_signal == sig) && (timer->it_id == id)) return timer; } -- cgit v1.2.3-59-g8ed1b From a2efdbf4fcb38ec1ae99c9d54d52c34fa867dc71 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 28 Feb 2020 11:08:45 -0600 Subject: posix-cpu-timers: cpu_clock_sample_group() no longer needs siglock As of e78c3496790e ("time, signal: Protect resource use statistics with seqlock") cpu_clock_sample_group() no longer needs siglock protection so remove the stale comment. Signed-off-by: "Eric W. Biederman" Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/87eeuevduq.fsf@x220.int.ebiederm.org --- kernel/time/posix-cpu-timers.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 8ff6da77a01f..46cc188bf5ab 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -336,9 +336,7 @@ static void __thread_group_cputime(struct task_struct *tsk, u64 *samples) /* * Sample a process (thread group) clock for the given task clkid. If the * group's cputime accounting is already enabled, read the atomic - * store. Otherwise a full update is required. Task's sighand lock must be - * held to protect the task traversal on a full update. clkid is already - * validated. + * store. Otherwise a full update is required. clkid is already validated. */ static u64 cpu_clock_sample_group(const clockid_t clkid, struct task_struct *p, bool start) -- cgit v1.2.3-59-g8ed1b From 60f2ceaa8111d9ec51af87bde4a6809f2b3235e4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 28 Feb 2020 11:09:19 -0600 Subject: posix-cpu-timers: Remove unnecessary locking around cpu_clock_sample_group As of e78c3496790e ("time, signal: Protect resource use statistics with seqlock") cpu_clock_sample_group no longers needs siglock protection. Unfortunately no one realized it at the time. Remove the extra locking that is for cpu_clock_sample_group and not for cpu_clock_sample. This significantly simplifies the code. Signed-off-by: "Eric W. Biederman" Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/878skmvdts.fsf@x220.int.ebiederm.org --- kernel/time/posix-cpu-timers.c | 66 ++++++++---------------------------------- 1 file changed, 12 insertions(+), 54 deletions(-) (limited to 'kernel') diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 46cc188bf5ab..40c2d8396bb9 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -718,31 +718,10 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp /* * Sample the clock to take the difference with the expiry time. */ - if (CPUCLOCK_PERTHREAD(timer->it_clock)) { + if (CPUCLOCK_PERTHREAD(timer->it_clock)) now = cpu_clock_sample(clkid, p); - } else { - struct sighand_struct *sighand; - unsigned long flags; - - /* - * Protect against sighand release/switch in exit/exec and - * also make timer sampling safe if it ends up calling - * thread_group_cputime(). - */ - sighand = lock_task_sighand(p, &flags); - if (unlikely(sighand == NULL)) { - /* - * The process has been reaped. - * We can't even collect a sample any more. - * Disarm the timer, nothing else to do. - */ - cpu_timer_setexpires(ctmr, 0); - return; - } else { - now = cpu_clock_sample_group(clkid, p, false); - unlock_task_sighand(p, &flags); - } - } + else + now = cpu_clock_sample_group(clkid, p, false); if (now < expires) { itp->it_value = ns_to_timespec64(expires - now); @@ -986,43 +965,22 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer) /* * Fetch the current sample and update the timer's expiry time. */ - if (CPUCLOCK_PERTHREAD(timer->it_clock)) { + if (CPUCLOCK_PERTHREAD(timer->it_clock)) now = cpu_clock_sample(clkid, p); - bump_cpu_timer(timer, now); - if (unlikely(p->exit_state)) - return; - - /* Protect timer list r/w in arm_timer() */ - sighand = lock_task_sighand(p, &flags); - if (!sighand) - return; - } else { - /* - * Protect arm_timer() and timer sampling in case of call to - * thread_group_cputime(). - */ - sighand = lock_task_sighand(p, &flags); - if (unlikely(sighand == NULL)) { - /* - * The process has been reaped. - * We can't even collect a sample any more. - */ - cpu_timer_setexpires(ctmr, 0); - return; - } else if (unlikely(p->exit_state) && thread_group_empty(p)) { - /* If the process is dying, no need to rearm */ - goto unlock; - } + else now = cpu_clock_sample_group(clkid, p, true); - bump_cpu_timer(timer, now); - /* Leave the sighand locked for the call below. */ - } + + bump_cpu_timer(timer, now); + + /* Protect timer list r/w in arm_timer() */ + sighand = lock_task_sighand(p, &flags); + if (unlikely(sighand == NULL)) + return; /* * Now re-arm for the new expiry time. */ arm_timer(timer); -unlock: unlock_task_sighand(p, &flags); } -- cgit v1.2.3-59-g8ed1b From beb41d9cbe4179058634e05d60235b6155c7b6c6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 28 Feb 2020 11:09:46 -0600 Subject: posix-cpu-timers: Pass the task into arm_timer() The task has been already computed to take siglock before calling arm_timer. So pass the benefit of that labor into arm_timer(). Signed-off-by: "Eric W. Biederman" Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/8736auvdt1.fsf@x220.int.ebiederm.org --- kernel/time/posix-cpu-timers.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 40c2d8396bb9..ef936c5a910b 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -482,12 +482,11 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk) * Insert the timer on the appropriate list before any timers that * expire later. This must be called with the sighand lock held. */ -static void arm_timer(struct k_itimer *timer) +static void arm_timer(struct k_itimer *timer, struct task_struct *p) { int clkidx = CPUCLOCK_WHICH(timer->it_clock); struct cpu_timer *ctmr = &timer->it.cpu; u64 newexp = cpu_timer_getexpires(ctmr); - struct task_struct *p = ctmr->task; struct posix_cputimer_base *base; if (CPUCLOCK_PERTHREAD(timer->it_clock)) @@ -660,7 +659,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, */ cpu_timer_setexpires(ctmr, new_expires); if (new_expires != 0 && val < new_expires) { - arm_timer(timer); + arm_timer(timer, p); } unlock_task_sighand(p, &flags); @@ -980,7 +979,7 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer) /* * Now re-arm for the new expiry time. */ - arm_timer(timer); + arm_timer(timer, p); unlock_task_sighand(p, &flags); } -- cgit v1.2.3-59-g8ed1b From 55e8c8eb2c7b6bf30e99423ccfe7ca032f498f59 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 28 Feb 2020 11:11:06 -0600 Subject: posix-cpu-timers: Store a reference to a pid not a task posix cpu timers do not handle the death of a process well. This is most clearly seen when a multi-threaded process calls exec from a thread that is not the leader of the thread group. The posix cpu timer code continues to pin the old thread group leader and is unable to find the siglock from there. This results in posix_cpu_timer_del being unable to delete a timer, posix_cpu_timer_set being unable to set a timer. Further to compensate for the problems in posix_cpu_timer_del on a multi-threaded exec all timers that point at the multi-threaded task are stopped. The code for the timers fundamentally needs to check if the target process/thread is alive. This needs an extra level of indirection. This level of indirection is already available in struct pid. So replace cpu.task with cpu.pid to get the needed extra layer of indirection. In addition to handling things more cleanly this reduces the amount of memory a timer can pin when a process exits and then is reaped from a task_struct to the vastly smaller struct pid. Fixes: e0a70217107e ("posix-cpu-timers: workaround to suppress the problems with mt exec") Signed-off-by: "Eric W. Biederman" Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/87wo86tz6d.fsf@x220.int.ebiederm.org --- include/linux/posix-timers.h | 2 +- kernel/time/posix-cpu-timers.c | 73 +++++++++++++++++++++++++++++++----------- 2 files changed, 56 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 3d10c84a97a9..e3f0f8585da4 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -69,7 +69,7 @@ static inline int clockid_to_fd(const clockid_t clk) struct cpu_timer { struct timerqueue_node node; struct timerqueue_head *head; - struct task_struct *task; + struct pid *pid; struct list_head elist; int firing; }; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index ef936c5a910b..6df468a622fe 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -118,6 +118,16 @@ static inline int validate_clock_permissions(const clockid_t clock) return __get_task_for_clock(clock, false, false) ? 0 : -EINVAL; } +static inline enum pid_type cpu_timer_pid_type(struct k_itimer *timer) +{ + return CPUCLOCK_PERTHREAD(timer->it_clock) ? PIDTYPE_PID : PIDTYPE_TGID; +} + +static inline struct task_struct *cpu_timer_task_rcu(struct k_itimer *timer) +{ + return pid_task(timer->it.cpu.pid, cpu_timer_pid_type(timer)); +} + /* * Update expiry time from increment, and increase overrun count, * given the current clock sample. @@ -391,7 +401,12 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer) new_timer->kclock = &clock_posix_cpu; timerqueue_init(&new_timer->it.cpu.node); - new_timer->it.cpu.task = p; + new_timer->it.cpu.pid = get_task_pid(p, cpu_timer_pid_type(new_timer)); + /* + * get_task_for_clock() took a reference on @p. Drop it as the timer + * holds a reference on the pid of @p. + */ + put_task_struct(p); return 0; } @@ -404,13 +419,15 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer) static int posix_cpu_timer_del(struct k_itimer *timer) { struct cpu_timer *ctmr = &timer->it.cpu; - struct task_struct *p = ctmr->task; struct sighand_struct *sighand; + struct task_struct *p; unsigned long flags; int ret = 0; - if (WARN_ON_ONCE(!p)) - return -EINVAL; + rcu_read_lock(); + p = cpu_timer_task_rcu(timer); + if (!p) + goto out; /* * Protect against sighand release/switch in exit/exec and process/ @@ -432,8 +449,10 @@ static int posix_cpu_timer_del(struct k_itimer *timer) unlock_task_sighand(p, &flags); } +out: + rcu_read_unlock(); if (!ret) - put_task_struct(p); + put_pid(ctmr->pid); return ret; } @@ -561,13 +580,21 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); u64 old_expires, new_expires, old_incr, val; struct cpu_timer *ctmr = &timer->it.cpu; - struct task_struct *p = ctmr->task; struct sighand_struct *sighand; + struct task_struct *p; unsigned long flags; int ret = 0; - if (WARN_ON_ONCE(!p)) - return -EINVAL; + rcu_read_lock(); + p = cpu_timer_task_rcu(timer); + if (!p) { + /* + * If p has just been reaped, we can no + * longer get any information about it at all. + */ + rcu_read_unlock(); + return -ESRCH; + } /* * Use the to_ktime conversion because that clamps the maximum @@ -584,8 +611,10 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, * If p has just been reaped, we can no * longer get any information about it at all. */ - if (unlikely(sighand == NULL)) + if (unlikely(sighand == NULL)) { + rcu_read_unlock(); return -ESRCH; + } /* * Disarm any old timer after extracting its expiry time. @@ -690,6 +719,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, ret = 0; out: + rcu_read_unlock(); if (old) old->it_interval = ns_to_timespec64(old_incr); @@ -701,10 +731,12 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); struct cpu_timer *ctmr = &timer->it.cpu; u64 now, expires = cpu_timer_getexpires(ctmr); - struct task_struct *p = ctmr->task; + struct task_struct *p; - if (WARN_ON_ONCE(!p)) - return; + rcu_read_lock(); + p = cpu_timer_task_rcu(timer); + if (!p) + goto out; /* * Easy part: convert the reload time. @@ -712,7 +744,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp itp->it_interval = ktime_to_timespec64(timer->it_interval); if (!expires) - return; + goto out; /* * Sample the clock to take the difference with the expiry time. @@ -732,6 +764,8 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp itp->it_value.tv_nsec = 1; itp->it_value.tv_sec = 0; } +out: + rcu_read_unlock(); } #define MAX_COLLECTED 20 @@ -952,14 +986,15 @@ static void check_process_timers(struct task_struct *tsk, static void posix_cpu_timer_rearm(struct k_itimer *timer) { clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); - struct cpu_timer *ctmr = &timer->it.cpu; - struct task_struct *p = ctmr->task; + struct task_struct *p; struct sighand_struct *sighand; unsigned long flags; u64 now; - if (WARN_ON_ONCE(!p)) - return; + rcu_read_lock(); + p = cpu_timer_task_rcu(timer); + if (!p) + goto out; /* * Fetch the current sample and update the timer's expiry time. @@ -974,13 +1009,15 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer) /* Protect timer list r/w in arm_timer() */ sighand = lock_task_sighand(p, &flags); if (unlikely(sighand == NULL)) - return; + goto out; /* * Now re-arm for the new expiry time. */ arm_timer(timer, p); unlock_task_sighand(p, &flags); +out: + rcu_read_unlock(); } /** -- cgit v1.2.3-59-g8ed1b From b95e31c07c5eb4f5c0769f12b38b0343d7961040 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 28 Feb 2020 11:15:03 -0600 Subject: posix-cpu-timers: Stop disabling timers on mt-exec The reasons why the extra posix_cpu_timers_exit_group() invocation has been added are not entirely clear from the commit message. Today all that posix_cpu_timers_exit_group() does is stop timers that are tracking the task from firing. Every other operation on those timers is still allowed. The practical implication of this is posix_cpu_timer_del() which could not get the siglock after the thread group leader has exited (because sighand == NULL) would be able to run successfully because the timer was already dequeued. With that locking issue fixed there is no point in disabling all of the timers. So remove this ``tempoary'' hack. Fixes: e0a70217107e ("posix-cpu-timers: workaround to suppress the problems with mt exec") Signed-off-by: "Eric W. Biederman" Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/87o8tityzs.fsf@x220.int.ebiederm.org --- kernel/exit.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 2833ffb0c211..df546315f699 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -103,17 +103,8 @@ static void __exit_signal(struct task_struct *tsk) #ifdef CONFIG_POSIX_TIMERS posix_cpu_timers_exit(tsk); - if (group_dead) { + if (group_dead) posix_cpu_timers_exit_group(tsk); - } else { - /* - * This can only happen if the caller is de_thread(). - * FIXME: this is the temporary hack, we should teach - * posix-cpu-timers to handle this case correctly. - */ - if (unlikely(has_group_leader_pid(tsk))) - posix_cpu_timers_exit_group(tsk); - } #endif if (group_dead) { -- cgit v1.2.3-59-g8ed1b From 4cbbc3a0eeed675449b1a4d080008927121f3da3 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 20 Jan 2020 18:05:23 +0800 Subject: timekeeping: Prevent 32bit truncation in scale64_check_overflow() While unlikely the divisor in scale64_check_overflow() could be >= 32bit in scale64_check_overflow(). do_div() truncates the divisor to 32bit at least on 32bit platforms. Use div64_u64() instead to avoid the truncation to 32-bit. [ tglx: Massaged changelog ] Signed-off-by: Wen Yang Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200120100523.45656-1-wenyang@linux.alibaba.com --- kernel/time/timekeeping.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index ca69290bee2a..4fc2af4367a7 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1005,9 +1005,8 @@ static int scale64_check_overflow(u64 mult, u64 div, u64 *base) ((int)sizeof(u64)*8 - fls64(mult) < fls64(rem))) return -EOVERFLOW; tmp *= mult; - rem *= mult; - do_div(rem, div); + rem = div64_u64(rem * mult, div); *base = tmp + rem; return 0; } -- cgit v1.2.3-59-g8ed1b From 38f7b0b1316d435f38ec3f2bb078897b7a1cfdea Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Thu, 30 Jan 2020 21:08:51 +0800 Subject: hrtimer: Cast explicitely to u32t in __ktime_divns() do_div() does a 64-by-32 division at least on 32bit platforms, while the divisor 'div' is explicitly casted to unsigned long, thus 64-bit on 64-bit platforms. The code already ensures that the divisor is less than 2^32. Hence the proper cast type is u32. Signed-off-by: Wen Yang Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200130130851.29204-1-wenyang@linux.alibaba.com --- kernel/time/hrtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 3a609e7344f3..d74fdcd3ced4 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -311,7 +311,7 @@ s64 __ktime_divns(const ktime_t kt, s64 div) div >>= 1; } tmp >>= sft; - do_div(tmp, (unsigned long) div); + do_div(tmp, (u32) div); return dclc < 0 ? -tmp : tmp; } EXPORT_SYMBOL_GPL(__ktime_divns); -- cgit v1.2.3-59-g8ed1b From d441dceb5dce71150f28add80d36d91bbfccba99 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 7 Feb 2020 14:39:29 -0500 Subject: tick/common: Make tick_periodic() check for missing ticks The tick_periodic() function is used at the beginning part of the bootup process for time keeping while the other clock sources are being initialized. The current code assumes that all the timer interrupts are handled in a timely manner with no missing ticks. That is not actually true. Some ticks are missed and there are some discrepancies between the tick time (jiffies) and the timestamp reported in the kernel log. Some systems, however, are more prone to missing ticks than the others. In the extreme case, the discrepancy can actually cause a soft lockup message to be printed by the watchdog kthread. For example, on a Cavium ThunderX2 Sabre arm64 system: [ 25.496379] watchdog: BUG: soft lockup - CPU#14 stuck for 22s! On that system, the missing ticks are especially prevalent during the smp_init() phase of the boot process. With an instrumented kernel, it was found that it took about 24s as reported by the timestamp for the tick to accumulate 4s of time. Investigation and bisection done by others seemed to point to the commit 73f381660959 ("arm64: Advertise mitigation of Spectre-v2, or lack thereof") as the culprit. It could also be a firmware issue as new firmware was promised that would fix the issue. To properly address this problem, stop assuming that there will be no missing tick in tick_periodic(). Modify it to follow the example of tick_do_update_jiffies64() by using another reference clock to check for missing ticks. Since the watchdog timer uses running_clock(), it is used here as the reference. With this applied, the soft lockup problem in the affected arm64 system is gone and tick time tracks much more closely to the timestamp time. Signed-off-by: Waiman Long Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200207193929.27308-1-longman@redhat.com --- kernel/time/tick-common.c | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 7e5d3524e924..cce4ed1515c7 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -84,12 +85,41 @@ int tick_is_oneshot_available(void) static void tick_periodic(int cpu) { if (tick_do_timer_cpu == cpu) { + /* + * Use running_clock() as reference to check for missing ticks. + */ + static ktime_t last_update; + ktime_t now; + int ticks = 1; + + now = ns_to_ktime(running_clock()); write_seqlock(&jiffies_lock); - /* Keep track of the next tick event */ - tick_next_period = ktime_add(tick_next_period, tick_period); + if (last_update) { + u64 delta = ktime_sub(now, last_update); - do_timer(1); + /* + * Check for eventually missed ticks + * + * There is likely a persistent delta between + * last_update and tick_next_period. So they are + * updated separately. + */ + if (delta >= 2 * tick_period) { + s64 period = ktime_to_ns(tick_period); + + ticks = ktime_divns(delta, period); + } + last_update = ktime_add(last_update, + ticks * tick_period); + } else { + last_update = now; + } + + /* Keep track of the next tick event */ + tick_next_period = ktime_add(tick_next_period, + ticks * tick_period); + do_timer(ticks); write_sequnlock(&jiffies_lock); update_wall_time(); } -- cgit v1.2.3-59-g8ed1b From 2c8bd58812ee3dbf0d78b566822f7eacd34bdd7b Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 9 Mar 2020 18:15:29 +0000 Subject: time/sched_clock: Expire timer in hardirq context To minimize latency, PREEMPT_RT kernels expires hrtimers in preemptible softirq context by default. This can be overriden by marking the timer's expiry with HRTIMER_MODE_HARD. sched_clock_timer is missing this annotation: if its callback is preempted and the duration of the preemption exceeds the wrap around time of the underlying clocksource, sched clock will get out of sync. Mark the sched_clock_timer for expiry in hard interrupt context. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200309181529.26558-1-a.darwish@linutronix.de --- kernel/time/sched_clock.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index e4332e3e2d56..fa3f800d7d76 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -208,7 +208,8 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) if (sched_clock_timer.function != NULL) { /* update timeout for clock wrap */ - hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); + hrtimer_start(&sched_clock_timer, cd.wrap_kt, + HRTIMER_MODE_REL_HARD); } r = rate; @@ -254,9 +255,9 @@ void __init generic_sched_clock_init(void) * Start the timer to keep sched_clock() properly updated and * sets the initial epoch. */ - hrtimer_init(&sched_clock_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&sched_clock_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); sched_clock_timer.function = sched_clock_poll; - hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); + hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL_HARD); } /* @@ -293,7 +294,7 @@ void sched_clock_resume(void) struct clock_read_data *rd = &cd.read_data[0]; rd->epoch_cyc = cd.actual_read_sched_clock(); - hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); + hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL_HARD); rd->read_sched_clock = cd.actual_read_sched_clock; } -- cgit v1.2.3-59-g8ed1b From 52da479a9aee630d2cdf37d05edfe5bcfff3e17f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 19 Mar 2020 19:47:06 +0100 Subject: Revert "tick/common: Make tick_periodic() check for missing ticks" This reverts commit d441dceb5dce71150f28add80d36d91bbfccba99 due to boot failures. Reported-by: Qian Cai Signed-off-by: Thomas Gleixner Cc: Waiman Long --- kernel/time/tick-common.c | 36 +++--------------------------------- 1 file changed, 3 insertions(+), 33 deletions(-) (limited to 'kernel') diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index cce4ed1515c7..7e5d3524e924 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include @@ -85,41 +84,12 @@ int tick_is_oneshot_available(void) static void tick_periodic(int cpu) { if (tick_do_timer_cpu == cpu) { - /* - * Use running_clock() as reference to check for missing ticks. - */ - static ktime_t last_update; - ktime_t now; - int ticks = 1; - - now = ns_to_ktime(running_clock()); write_seqlock(&jiffies_lock); - if (last_update) { - u64 delta = ktime_sub(now, last_update); - - /* - * Check for eventually missed ticks - * - * There is likely a persistent delta between - * last_update and tick_next_period. So they are - * updated separately. - */ - if (delta >= 2 * tick_period) { - s64 period = ktime_to_ns(tick_period); - - ticks = ktime_divns(delta, period); - } - last_update = ktime_add(last_update, - ticks * tick_period); - } else { - last_update = now; - } - /* Keep track of the next tick event */ - tick_next_period = ktime_add(tick_next_period, - ticks * tick_period); - do_timer(ticks); + tick_next_period = ktime_add(tick_next_period, tick_period); + + do_timer(1); write_sequnlock(&jiffies_lock); update_wall_time(); } -- cgit v1.2.3-59-g8ed1b