From b5dee3130bb4014511f5d0dd46855ed843e3fdc8 Mon Sep 17 00:00:00 2001 From: Harry Pan Date: Mon, 25 Feb 2019 20:36:41 +0800 Subject: PM / sleep: Refactor filesystems sync to reduce duplication Create a common helper to sync filesystems for system suspend and hibernation. Signed-off-by: Harry Pan Acked-by: Pavel Machek [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 5 +---- kernel/power/main.c | 9 +++++++++ kernel/power/suspend.c | 13 +++++-------- kernel/power/user.c | 5 +---- 4 files changed, 16 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index abef759de7c8..cc105ecd9c07 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -709,9 +708,7 @@ int hibernate(void) goto Exit; } - pr_info("Syncing filesystems ... \n"); - ksys_sync(); - pr_info("done.\n"); + ksys_sync_helper(); error = freeze_processes(); if (error) diff --git a/kernel/power/main.c b/kernel/power/main.c index 98e76cad128b..40472a7c5536 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "power.h" @@ -51,6 +52,14 @@ void unlock_system_sleep(void) } EXPORT_SYMBOL_GPL(unlock_system_sleep); +void ksys_sync_helper(void) +{ + pr_info("Syncing filesystems ... "); + ksys_sync(); + pr_cont("done.\n"); +} +EXPORT_SYMBOL_GPL(ksys_sync_helper); + /* Routines for PM-transition notifications */ static BLOCKING_NOTIFIER_HEAD(pm_chain_head); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 0bd595a0b610..e39059dea38b 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -568,13 +567,11 @@ static int enter_state(suspend_state_t state) if (state == PM_SUSPEND_TO_IDLE) s2idle_begin(); -#ifndef CONFIG_SUSPEND_SKIP_SYNC - trace_suspend_resume(TPS("sync_filesystems"), 0, true); - pr_info("Syncing filesystems ... "); - ksys_sync(); - pr_cont("done.\n"); - trace_suspend_resume(TPS("sync_filesystems"), 0, false); -#endif + if (!IS_ENABLED(CONFIG_SUSPEND_SKIP_SYNC)) { + trace_suspend_resume(TPS("sync_filesystems"), 0, true); + ksys_sync_helper(); + trace_suspend_resume(TPS("sync_filesystems"), 0, false); + } pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]); pm_suspend_clear_flags(); diff --git a/kernel/power/user.c b/kernel/power/user.c index 2d8b60a3c86b..cb24e840a3e6 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -10,7 +10,6 @@ */ #include -#include #include #include #include @@ -228,9 +227,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, if (data->frozen) break; - printk("Syncing filesystems ... "); - ksys_sync(); - printk("done.\n"); + ksys_sync_helper(); error = freeze_processes(); if (error) -- cgit v1.2.3-59-g8ed1b From c64546b17bc940643545dd34eac21f51764d633c Mon Sep 17 00:00:00 2001 From: Harry Pan Date: Mon, 25 Feb 2019 20:36:43 +0800 Subject: PM / sleep: Measure the time of filesystems syncing Measure the filesystems sync time during system sleep more precisely. Among other things, this allows the pr_cont() to be dropped from ksys_sync_helper() and makes automatic system suspend and hibernation profiling somewhat more straightforward. Signed-off-by: Harry Pan [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- kernel/power/main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/power/main.c b/kernel/power/main.c index 40472a7c5536..4f43e724f6eb 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -54,9 +54,14 @@ EXPORT_SYMBOL_GPL(unlock_system_sleep); void ksys_sync_helper(void) { - pr_info("Syncing filesystems ... "); + ktime_t start; + long elapsed_msecs; + + start = ktime_get(); ksys_sync(); - pr_cont("done.\n"); + elapsed_msecs = ktime_to_ms(ktime_sub(ktime_get(), start)); + pr_info("Filesystems sync: %ld.%03ld seconds\n", + elapsed_msecs / MSEC_PER_SEC, elapsed_msecs % MSEC_PER_SEC); } EXPORT_SYMBOL_GPL(ksys_sync_helper); -- cgit v1.2.3-59-g8ed1b From 9eca544b1491df90ea7102a7ed14acc3c562d97b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 28 Mar 2019 11:33:21 +0100 Subject: cpufreq: schedutil: Simplify iowait boosting There is not reason for the minimum iowait boost value in the schedutil cpufreq governor to depend on the available range of CPU frequencies. In fact, that dependency is generally confusing, because it causes the iowait boost to behave somewhat differently on CPUs with the same maximum frequency and different minimum frequencies, for example. For this reason, replace the min field in struct sugov_cpu with a constant and choose its values to be 1/8 of SCHED_CAPACITY_SCALE (for consistency with the intel_pstate driver's internal governor). [Note that policy->cpuinfo.max_freq will not be a constant any more after a subsequent change, so this change is depended on by it.] Link: https://lore.kernel.org/lkml/20190305083202.GU32494@hirez.programming.kicks-ass.net/T/#ee20bdc98b7d89f6110c0d00e5c3ee8c2ced93c3d Suggested-by: Peter Zijlstra Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Acked-by: Viresh Kumar --- kernel/sched/cpufreq_schedutil.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 5c41ea367422..b3a878aa593d 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -13,6 +13,8 @@ #include #include +#define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8) + struct sugov_tunables { struct gov_attr_set attr_set; unsigned int rate_limit_us; @@ -51,7 +53,6 @@ struct sugov_cpu { u64 last_update; unsigned long bw_dl; - unsigned long min; unsigned long max; /* The field below is for single-CPU policies only: */ @@ -291,8 +292,8 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) * * The IO wait boost of a task is disabled after a tick since the last update * of a CPU. If a new IO wait boost is requested after more then a tick, then - * we enable the boost starting from the minimum frequency, which improves - * energy efficiency by ignoring sporadic wakeups from IO. + * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy + * efficiency by ignoring sporadic wakeups from IO. */ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, bool set_iowait_boost) @@ -303,7 +304,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, if (delta_ns <= TICK_NSEC) return false; - sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0; + sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0; sg_cpu->iowait_boost_pending = set_iowait_boost; return true; @@ -317,8 +318,9 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, * * Each time a task wakes up after an IO operation, the CPU utilization can be * boosted to a certain utilization which doubles at each "frequent and - * successive" wakeup from IO, ranging from the utilization of the minimum - * OPP to the utilization of the maximum OPP. + * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization + * of the maximum OPP. + * * To keep doubling, an IO boost has to be requested at least once per tick, * otherwise we restart from the utilization of the minimum OPP. */ @@ -349,7 +351,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, } /* First wakeup after IO: start with minimum boost */ - sg_cpu->iowait_boost = sg_cpu->min; + sg_cpu->iowait_boost = IOWAIT_BOOST_MIN; } /** @@ -389,7 +391,7 @@ static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, * No boost pending; reduce the boost value. */ sg_cpu->iowait_boost >>= 1; - if (sg_cpu->iowait_boost < sg_cpu->min) { + if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) { sg_cpu->iowait_boost = 0; return util; } @@ -826,9 +828,6 @@ static int sugov_start(struct cpufreq_policy *policy) memset(sg_cpu, 0, sizeof(*sg_cpu)); sg_cpu->cpu = cpu; sg_cpu->sg_policy = sg_policy; - sg_cpu->min = - (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) / - policy->cpuinfo.max_freq; } for_each_cpu(cpu, policy->cpus) { -- cgit v1.2.3-59-g8ed1b From 6f9b83ac877fb5558d76b9f78590f3afd1bdf421 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 27 Mar 2019 15:35:47 +0100 Subject: cpuidle: Export the next timer expiration for CPUs To be able to predict the sleep duration for a CPU entering idle, it is essential to know the expiration time of the next timer. Both the teo and the menu cpuidle governors already use this information for CPU idle state selection. Moving forward, a similar prediction needs to be made for a group of idle CPUs rather than for a single one and the following changes implement a new genpd governor for that purpose. In order to support that feature, add a new function called tick_nohz_get_next_hrtimer() that will return the next hrtimer expiration time of a given CPU to be invoked after deciding whether or not to stop the scheduler tick on that CPU. Make the cpuidle core call tick_nohz_get_next_hrtimer() right before invoking the ->enter() callback provided by the cpuidle driver for the given state and store its return value in the per-CPU struct cpuidle_device, so as to make it available to code outside of cpuidle. Note that at the point when cpuidle calls tick_nohz_get_next_hrtimer(), the governor's ->select() callback has already returned and indicated whether or not the tick should be stopped, so in fact the value returned by tick_nohz_get_next_hrtimer() always is the next hrtimer expiration time for the given CPU, possibly including the tick (if it hasn't been stopped). Co-developed-by: Lina Iyer Co-developed-by: Daniel Lezcano Acked-by: Daniel Lezcano Signed-off-by: Ulf Hansson [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 19 +++++++++++++++++-- include/linux/cpuidle.h | 1 + include/linux/tick.h | 7 ++++++- kernel/time/tick-sched.c | 12 ++++++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 7f108309e871..0f4b7c45df3e 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -328,9 +328,23 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) { + int ret = 0; + + /* + * Store the next hrtimer, which becomes either next tick or the next + * timer event, whatever expires first. Additionally, to make this data + * useful for consumers outside cpuidle, we rely on that the governor's + * ->select() callback have decided, whether to stop the tick or not. + */ + WRITE_ONCE(dev->next_hrtimer, tick_nohz_get_next_hrtimer()); + if (cpuidle_state_is_coupled(drv, index)) - return cpuidle_enter_state_coupled(dev, drv, index); - return cpuidle_enter_state(dev, drv, index); + ret = cpuidle_enter_state_coupled(dev, drv, index); + else + ret = cpuidle_enter_state(dev, drv, index); + + WRITE_ONCE(dev->next_hrtimer, 0); + return ret; } /** @@ -511,6 +525,7 @@ static void __cpuidle_device_init(struct cpuidle_device *dev) { memset(dev->states_usage, 0, sizeof(dev->states_usage)); dev->last_residency = 0; + dev->next_hrtimer = 0; } /** diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 3b39472324a3..bb9a0db89f1a 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -83,6 +83,7 @@ struct cpuidle_device { unsigned int use_deepest_state:1; unsigned int poll_time_limit:1; unsigned int cpu; + ktime_t next_hrtimer; int last_residency; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; diff --git a/include/linux/tick.h b/include/linux/tick.h index 55388ab45fd4..8891b5ac3e40 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -122,6 +122,7 @@ extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern bool tick_nohz_idle_got_tick(void); +extern ktime_t tick_nohz_get_next_hrtimer(void); extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next); extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); @@ -145,7 +146,11 @@ static inline void tick_nohz_idle_restart_tick(void) { } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } static inline bool tick_nohz_idle_got_tick(void) { return false; } - +static inline ktime_t tick_nohz_get_next_hrtimer(void) +{ + /* Next wake up is the tick period, assume it starts now */ + return ktime_add(ktime_get(), TICK_NSEC); +} static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) { *delta_next = TICK_NSEC; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6fa52cd6df0b..8d18e03124ff 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1022,6 +1022,18 @@ bool tick_nohz_idle_got_tick(void) return false; } +/** + * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer + * or the tick, whatever that expires first. Note that, if the tick has been + * stopped, it returns the next hrtimer. + * + * Called from power state control code with interrupts disabled + */ +ktime_t tick_nohz_get_next_hrtimer(void) +{ + return __this_cpu_read(tick_cpu_device.evtdev)->next_event; +} + /** * tick_nohz_get_sleep_length - return the expected length of the current sleep * @delta_next: duration until the next event if the tick cannot be stopped -- cgit v1.2.3-59-g8ed1b