From da6043fe85eb5ec621e34a92540735dcebbea134 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Wed, 25 Sep 2019 15:39:12 +0100 Subject: PM / hibernate: memory_bm_find_bit(): Tighten node optimisation When looking for a bit by number we make use of the cached result from the preceding lookup to speed up operation. Firstly we check if the requested pfn is within the cached zone and if not lookup the new zone. We then check if the offset for that pfn falls within the existing cached node. This happens regardless of whether the node is within the zone we are now scanning. With certain memory layouts it is possible for this to false trigger creating a temporary alias for the pfn to a different bit. This leads the hibernation code to free memory which it was never allocated with the expected fallout. Ensure the zone we are scanning matches the cached zone before considering the cached node. Deep thanks go to Andrea for many, many, many hours of hacking and testing that went into cornering this bug. Reported-by: Andrea Righi Tested-by: Andrea Righi Signed-off-by: Andy Whitcroft Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 83105874f255..26b9168321e7 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -734,8 +734,15 @@ zone_found: * We have found the zone. Now walk the radix tree to find the leaf node * for our PFN. */ + + /* + * If the zone we wish to scan is the the current zone and the + * pfn falls into the current node then we do not need to walk + * the tree. + */ node = bm->cur.node; - if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) + if (zone == bm->cur.zone && + ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) goto node_found; node = zone->rtree; -- cgit v1.2.3-59-g8ed1b From c1d51f684c72b5eb2aecbbd47be3a2977a2dc903 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 7 Nov 2019 15:25:12 +0100 Subject: cpuidle: Use nanoseconds as the unit of time Currently, the cpuidle subsystem uses microseconds as the unit of time which (among other things) causes the idle loop to incur some integer division overhead for no clear benefit. In order to allow cpuidle to measure time in nanoseconds, add two new fields, exit_latency_ns and target_residency_ns, to represent the exit latency and target residency of an idle state in nanoseconds, respectively, to struct cpuidle_state and initialize them with the help of the corresponding values in microseconds provided by drivers. Additionally, change cpuidle_governor_latency_req() to return the idle state exit latency constraint in nanoseconds. Also meeasure idle state residency (last_residency_ns in struct cpuidle_device and time_ns in struct cpuidle_driver) in nanoseconds and update the cpuidle core and governors accordingly. However, the menu governor still computes typical intervals in microseconds to avoid integer overflows. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Acked-by: Doug Smythies Tested-by: Doug Smythies --- drivers/cpuidle/cpuidle.c | 36 +++++----- drivers/cpuidle/driver.c | 29 ++++++--- drivers/cpuidle/governor.c | 7 +- drivers/cpuidle/governors/haltpoll.c | 7 +- drivers/cpuidle/governors/ladder.c | 25 +++---- drivers/cpuidle/governors/menu.c | 123 ++++++++++++++++------------------- drivers/cpuidle/governors/teo.c | 76 ++++++++++------------ drivers/cpuidle/poll_state.c | 2 + drivers/cpuidle/sysfs.c | 20 +++++- include/linux/cpuidle.h | 8 ++- kernel/sched/idle.c | 2 +- 11 files changed, 174 insertions(+), 161 deletions(-) (limited to 'kernel') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 44ae39f2b47a..bf9b030cd7e1 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -75,24 +75,24 @@ int cpuidle_play_dead(void) static int find_deepest_state(struct cpuidle_driver *drv, struct cpuidle_device *dev, - unsigned int max_latency, + u64 max_latency_ns, unsigned int forbidden_flags, bool s2idle) { - unsigned int latency_req = 0; + u64 latency_req = 0; int i, ret = 0; for (i = 1; i < drv->state_count; i++) { struct cpuidle_state *s = &drv->states[i]; if (dev->states_usage[i].disable || - s->exit_latency <= latency_req || - s->exit_latency > max_latency || + s->exit_latency_ns <= latency_req || + s->exit_latency_ns > max_latency_ns || (s->flags & forbidden_flags) || (s2idle && !s->enter_s2idle)) continue; - latency_req = s->exit_latency; + latency_req = s->exit_latency_ns; ret = i; } return ret; @@ -124,7 +124,7 @@ void cpuidle_use_deepest_state(bool enable) int cpuidle_find_deepest_state(struct cpuidle_driver *drv, struct cpuidle_device *dev) { - return find_deepest_state(drv, dev, UINT_MAX, 0, false); + return find_deepest_state(drv, dev, U64_MAX, 0, false); } #ifdef CONFIG_SUSPEND @@ -180,7 +180,7 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev) * that interrupts won't be enabled when it exits and allows the tick to * be frozen safely. */ - index = find_deepest_state(drv, dev, UINT_MAX, 0, true); + index = find_deepest_state(drv, dev, U64_MAX, 0, true); if (index > 0) enter_s2idle_proper(drv, dev, index); @@ -209,7 +209,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, * CPU as a broadcast timer, this call may fail if it is not available. */ if (broadcast && tick_broadcast_enter()) { - index = find_deepest_state(drv, dev, target_state->exit_latency, + index = find_deepest_state(drv, dev, target_state->exit_latency_ns, CPUIDLE_FLAG_TIMER_STOP, false); if (index < 0) { default_idle_call(); @@ -247,7 +247,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, local_irq_enable(); if (entered_state >= 0) { - s64 diff, delay = drv->states[entered_state].exit_latency; + s64 diff, delay = drv->states[entered_state].exit_latency_ns; int i; /* @@ -255,15 +255,13 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, * This can be moved to within driver enter routine, * but that results in multiple copies of same code. */ - diff = ktime_us_delta(time_end, time_start); - if (diff > INT_MAX) - diff = INT_MAX; + diff = ktime_sub(time_end, time_start); - dev->last_residency = (int)diff; - dev->states_usage[entered_state].time += dev->last_residency; + dev->last_residency_ns = diff; + dev->states_usage[entered_state].time_ns += diff; dev->states_usage[entered_state].usage++; - if (diff < drv->states[entered_state].target_residency) { + if (diff < drv->states[entered_state].target_residency_ns) { for (i = entered_state - 1; i >= 0; i--) { if (dev->states_usage[i].disable) continue; @@ -281,14 +279,14 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, * Update if a deeper state would have been a * better match for the observed idle duration. */ - if (diff - delay >= drv->states[i].target_residency) + if (diff - delay >= drv->states[i].target_residency_ns) dev->states_usage[entered_state].below++; break; } } } else { - dev->last_residency = 0; + dev->last_residency_ns = 0; } return entered_state; @@ -381,7 +379,7 @@ u64 cpuidle_poll_time(struct cpuidle_driver *drv, if (dev->states_usage[i].disable) continue; - limit_ns = (u64)drv->states[i].target_residency * NSEC_PER_USEC; + limit_ns = (u64)drv->states[i].target_residency_ns; } dev->poll_limit_ns = limit_ns; @@ -552,7 +550,7 @@ static void __cpuidle_unregister_device(struct cpuidle_device *dev) static void __cpuidle_device_init(struct cpuidle_device *dev) { memset(dev->states_usage, 0, sizeof(dev->states_usage)); - dev->last_residency = 0; + dev->last_residency_ns = 0; dev->next_hrtimer = 0; } diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 9db154224999..fcaf8b2bab96 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -165,16 +165,27 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv) if (!drv->cpumask) drv->cpumask = (struct cpumask *)cpu_possible_mask; - /* - * Look for the timer stop flag in the different states, so that we know - * if the broadcast timer has to be set up. The loop is in the reverse - * order, because usually one of the deeper states have this flag set. - */ - for (i = drv->state_count - 1; i >= 0 ; i--) { - if (drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP) { + for (i = 0; i < drv->state_count; i++) { + struct cpuidle_state *s = &drv->states[i]; + + /* + * Look for the timer stop flag in the different states and if + * it is found, indicate that the broadcast timer has to be set + * up. + */ + if (s->flags & CPUIDLE_FLAG_TIMER_STOP) drv->bctimer = 1; - break; - } + + /* + * The core will use the target residency and exit latency + * values in nanoseconds, but allow drivers to provide them in + * microseconds too. + */ + if (s->target_residency > 0) + s->target_residency_ns = s->target_residency * NSEC_PER_USEC; + + if (s->exit_latency > 0) + s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC; } } diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c index e9801f26c732..e48271e117a3 100644 --- a/drivers/cpuidle/governor.c +++ b/drivers/cpuidle/governor.c @@ -107,11 +107,14 @@ int cpuidle_register_governor(struct cpuidle_governor *gov) * cpuidle_governor_latency_req - Compute a latency constraint for CPU * @cpu: Target CPU */ -int cpuidle_governor_latency_req(unsigned int cpu) +s64 cpuidle_governor_latency_req(unsigned int cpu) { int global_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); struct device *device = get_cpu_device(cpu); int device_req = dev_pm_qos_raw_resume_latency(device); - return device_req < global_req ? device_req : global_req; + if (device_req > global_req) + device_req = global_req; + + return (s64)device_req * NSEC_PER_USEC; } diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c index 7a703d2e0064..cb2a96eafc02 100644 --- a/drivers/cpuidle/governors/haltpoll.c +++ b/drivers/cpuidle/governors/haltpoll.c @@ -49,7 +49,7 @@ static int haltpoll_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, bool *stop_tick) { - int latency_req = cpuidle_governor_latency_req(dev->cpu); + s64 latency_req = cpuidle_governor_latency_req(dev->cpu); if (!drv->state_count || latency_req == 0) { *stop_tick = false; @@ -75,10 +75,9 @@ static int haltpoll_select(struct cpuidle_driver *drv, return 0; } -static void adjust_poll_limit(struct cpuidle_device *dev, unsigned int block_us) +static void adjust_poll_limit(struct cpuidle_device *dev, u64 block_ns) { unsigned int val; - u64 block_ns = block_us*NSEC_PER_USEC; /* Grow cpu_halt_poll_us if * cpu_halt_poll_us < block_ns < guest_halt_poll_us @@ -115,7 +114,7 @@ static void haltpoll_reflect(struct cpuidle_device *dev, int index) dev->last_state_idx = index; if (index != 0) - adjust_poll_limit(dev, dev->last_residency); + adjust_poll_limit(dev, dev->last_residency_ns); } /** diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index b0126b8c32fe..8e9058c4ea63 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -27,8 +27,8 @@ struct ladder_device_state { struct { u32 promotion_count; u32 demotion_count; - u32 promotion_time; - u32 demotion_time; + u64 promotion_time_ns; + u64 demotion_time_ns; } threshold; struct { int promotion_count; @@ -68,9 +68,10 @@ static int ladder_select_state(struct cpuidle_driver *drv, { struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); struct ladder_device_state *last_state; - int last_residency, last_idx = dev->last_state_idx; + int last_idx = dev->last_state_idx; int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0; - int latency_req = cpuidle_governor_latency_req(dev->cpu); + s64 latency_req = cpuidle_governor_latency_req(dev->cpu); + s64 last_residency; /* Special case when user has set very strict latency requirement */ if (unlikely(latency_req == 0)) { @@ -80,13 +81,13 @@ static int ladder_select_state(struct cpuidle_driver *drv, last_state = &ldev->states[last_idx]; - last_residency = dev->last_residency - drv->states[last_idx].exit_latency; + last_residency = dev->last_residency_ns - drv->states[last_idx].exit_latency_ns; /* consider promotion */ if (last_idx < drv->state_count - 1 && !dev->states_usage[last_idx + 1].disable && - last_residency > last_state->threshold.promotion_time && - drv->states[last_idx + 1].exit_latency <= latency_req) { + last_residency > last_state->threshold.promotion_time_ns && + drv->states[last_idx + 1].exit_latency_ns <= latency_req) { last_state->stats.promotion_count++; last_state->stats.demotion_count = 0; if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) { @@ -98,11 +99,11 @@ static int ladder_select_state(struct cpuidle_driver *drv, /* consider demotion */ if (last_idx > first_idx && (dev->states_usage[last_idx].disable || - drv->states[last_idx].exit_latency > latency_req)) { + drv->states[last_idx].exit_latency_ns > latency_req)) { int i; for (i = last_idx - 1; i > first_idx; i--) { - if (drv->states[i].exit_latency <= latency_req) + if (drv->states[i].exit_latency_ns <= latency_req) break; } ladder_do_selection(dev, ldev, last_idx, i); @@ -110,7 +111,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, } if (last_idx > first_idx && - last_residency < last_state->threshold.demotion_time) { + last_residency < last_state->threshold.demotion_time_ns) { last_state->stats.demotion_count++; last_state->stats.promotion_count = 0; if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) { @@ -150,9 +151,9 @@ static int ladder_enable_device(struct cpuidle_driver *drv, lstate->threshold.demotion_count = DEMOTION_COUNT; if (i < drv->state_count - 1) - lstate->threshold.promotion_time = state->exit_latency; + lstate->threshold.promotion_time_ns = state->exit_latency_ns; if (i > first_idx) - lstate->threshold.demotion_time = state->exit_latency; + lstate->threshold.demotion_time_ns = state->exit_latency_ns; } return 0; diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 38b2b72102a8..b0a7ad566081 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -19,22 +19,12 @@ #include #include -/* - * Please note when changing the tuning values: - * If (MAX_INTERESTING-1) * RESOLUTION > UINT_MAX, the result of - * a scaling operation multiplication may overflow on 32 bit platforms. - * In that case, #define RESOLUTION as ULL to get 64 bit result: - * #define RESOLUTION 1024ULL - * - * The default values do not overflow. - */ #define BUCKETS 12 #define INTERVAL_SHIFT 3 #define INTERVALS (1UL << INTERVAL_SHIFT) #define RESOLUTION 1024 #define DECAY 8 -#define MAX_INTERESTING 50000 - +#define MAX_INTERESTING (50000 * NSEC_PER_USEC) /* * Concepts and ideas behind the menu governor @@ -120,14 +110,14 @@ struct menu_device { int needs_update; int tick_wakeup; - unsigned int next_timer_us; + u64 next_timer_ns; unsigned int bucket; unsigned int correction_factor[BUCKETS]; unsigned int intervals[INTERVALS]; int interval_ptr; }; -static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters) +static inline int which_bucket(u64 duration_ns, unsigned long nr_iowaiters) { int bucket = 0; @@ -140,15 +130,15 @@ static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters if (nr_iowaiters) bucket = BUCKETS/2; - if (duration < 10) + if (duration_ns < 10ULL * NSEC_PER_USEC) return bucket; - if (duration < 100) + if (duration_ns < 100ULL * NSEC_PER_USEC) return bucket + 1; - if (duration < 1000) + if (duration_ns < 1000ULL * NSEC_PER_USEC) return bucket + 2; - if (duration < 10000) + if (duration_ns < 10000ULL * NSEC_PER_USEC) return bucket + 3; - if (duration < 100000) + if (duration_ns < 100000ULL * NSEC_PER_USEC) return bucket + 4; return bucket + 5; } @@ -276,13 +266,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, bool *stop_tick) { struct menu_device *data = this_cpu_ptr(&menu_devices); - int latency_req = cpuidle_governor_latency_req(dev->cpu); - int i; - int idx; - unsigned int interactivity_req; + s64 latency_req = cpuidle_governor_latency_req(dev->cpu); unsigned int predicted_us; + u64 predicted_ns; + u64 interactivity_req; unsigned long nr_iowaiters; ktime_t delta_next; + int i, idx; if (data->needs_update) { menu_update(drv, dev); @@ -290,14 +280,14 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, } /* determine the expected residency time, round up */ - data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next)); + data->next_timer_ns = tick_nohz_get_sleep_length(&delta_next); nr_iowaiters = nr_iowait_cpu(dev->cpu); - data->bucket = which_bucket(data->next_timer_us, nr_iowaiters); + data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters); if (unlikely(drv->state_count <= 1 || latency_req == 0) || - ((data->next_timer_us < drv->states[1].target_residency || - latency_req < drv->states[1].exit_latency) && + ((data->next_timer_ns < drv->states[1].target_residency_ns || + latency_req < drv->states[1].exit_latency_ns) && !dev->states_usage[0].disable)) { /* * In this case state[0] will be used no matter what, so return @@ -308,18 +298,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, return 0; } - /* - * Force the result of multiplication to be 64 bits even if both - * operands are 32 bits. - * Make sure to round up for half microseconds. - */ - predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us * - data->correction_factor[data->bucket], - RESOLUTION * DECAY); - /* - * Use the lowest expected idle interval to pick the idle state. - */ - predicted_us = min(predicted_us, get_typical_interval(data, predicted_us)); + /* Round up the result for half microseconds. */ + predicted_us = div_u64(data->next_timer_ns * + data->correction_factor[data->bucket] + + (RESOLUTION * DECAY * NSEC_PER_USEC) / 2, + RESOLUTION * DECAY * NSEC_PER_USEC); + /* Use the lowest expected idle interval to pick the idle state. */ + predicted_ns = (u64)min(predicted_us, + get_typical_interval(data, predicted_us)) * + NSEC_PER_USEC; if (tick_nohz_tick_stopped()) { /* @@ -330,14 +317,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * the known time till the closest timer event for the idle * state selection. */ - if (predicted_us < TICK_USEC) - predicted_us = ktime_to_us(delta_next); + if (predicted_ns < TICK_NSEC) + predicted_ns = delta_next; } else { /* * Use the performance multiplier and the user-configurable * latency_req to determine the maximum exit latency. */ - interactivity_req = predicted_us / performance_multiplier(nr_iowaiters); + interactivity_req = div64_u64(predicted_ns, + performance_multiplier(nr_iowaiters)); if (latency_req > interactivity_req) latency_req = interactivity_req; } @@ -356,19 +344,19 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (idx == -1) idx = i; /* first enabled state */ - if (s->target_residency > predicted_us) { + if (s->target_residency_ns > predicted_ns) { /* * Use a physical idle state, not busy polling, unless * a timer is going to trigger soon enough. */ if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && - s->exit_latency <= latency_req && - s->target_residency <= data->next_timer_us) { - predicted_us = s->target_residency; + s->exit_latency_ns <= latency_req && + s->target_residency_ns <= data->next_timer_ns) { + predicted_ns = s->target_residency_ns; idx = i; break; } - if (predicted_us < TICK_USEC) + if (predicted_ns < TICK_NSEC) break; if (!tick_nohz_tick_stopped()) { @@ -378,7 +366,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * tick in that case and let the governor run * again in the next iteration of the loop. */ - predicted_us = drv->states[idx].target_residency; + predicted_ns = drv->states[idx].target_residency_ns; break; } @@ -388,13 +376,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * closest timer event, select this one to avoid getting * stuck in the shallow one for too long. */ - if (drv->states[idx].target_residency < TICK_USEC && - s->target_residency <= ktime_to_us(delta_next)) + if (drv->states[idx].target_residency_ns < TICK_NSEC && + s->target_residency_ns <= delta_next) idx = i; return idx; } - if (s->exit_latency > latency_req) + if (s->exit_latency_ns > latency_req) break; idx = i; @@ -408,12 +396,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * expected idle duration is shorter than the tick period length. */ if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) || - predicted_us < TICK_USEC) && !tick_nohz_tick_stopped()) { - unsigned int delta_next_us = ktime_to_us(delta_next); - + predicted_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) { *stop_tick = false; - if (idx > 0 && drv->states[idx].target_residency > delta_next_us) { + if (idx > 0 && drv->states[idx].target_residency_ns > delta_next) { /* * The tick is not going to be stopped and the target * residency of the state to be returned is not within @@ -425,7 +411,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, continue; idx = i; - if (drv->states[i].target_residency <= delta_next_us) + if (drv->states[i].target_residency_ns <= delta_next) break; } } @@ -461,7 +447,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) struct menu_device *data = this_cpu_ptr(&menu_devices); int last_idx = dev->last_state_idx; struct cpuidle_state *target = &drv->states[last_idx]; - unsigned int measured_us; + u64 measured_ns; unsigned int new_factor; /* @@ -479,7 +465,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) * assume the state was never reached and the exit latency is 0. */ - if (data->tick_wakeup && data->next_timer_us > TICK_USEC) { + if (data->tick_wakeup && data->next_timer_ns > TICK_NSEC) { /* * The nohz code said that there wouldn't be any events within * the tick boundary (if the tick was stopped), but the idle @@ -489,7 +475,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) * have been idle long (but not forever) to help the idle * duration predictor do a better job next time. */ - measured_us = 9 * MAX_INTERESTING / 10; + measured_ns = 9 * MAX_INTERESTING / 10; } else if ((drv->states[last_idx].flags & CPUIDLE_FLAG_POLLING) && dev->poll_time_limit) { /* @@ -499,28 +485,29 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) * the CPU might have been woken up from idle by the next timer. * Assume that to be the case. */ - measured_us = data->next_timer_us; + measured_ns = data->next_timer_ns; } else { /* measured value */ - measured_us = dev->last_residency; + measured_ns = dev->last_residency_ns; /* Deduct exit latency */ - if (measured_us > 2 * target->exit_latency) - measured_us -= target->exit_latency; + if (measured_ns > 2 * target->exit_latency_ns) + measured_ns -= target->exit_latency_ns; else - measured_us /= 2; + measured_ns /= 2; } /* Make sure our coefficients do not exceed unity */ - if (measured_us > data->next_timer_us) - measured_us = data->next_timer_us; + if (measured_ns > data->next_timer_ns) + measured_ns = data->next_timer_ns; /* Update our correction ratio */ new_factor = data->correction_factor[data->bucket]; new_factor -= new_factor / DECAY; - if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING) - new_factor += RESOLUTION * measured_us / data->next_timer_us; + if (data->next_timer_ns > 0 && measured_ns < MAX_INTERESTING) + new_factor += div64_u64(RESOLUTION * measured_ns, + data->next_timer_ns); else /* * we were idle so long that we count it as a perfect @@ -540,7 +527,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->correction_factor[data->bucket] = new_factor; /* update the repeating-pattern data */ - data->intervals[data->interval_ptr++] = measured_us; + data->intervals[data->interval_ptr++] = ktime_to_us(measured_ns); if (data->interval_ptr >= INTERVALS) data->interval_ptr = 0; } diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 702d560eb347..ecbcfaefb0cd 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -104,7 +104,7 @@ struct teo_cpu { u64 sleep_length_ns; struct teo_idle_state states[CPUIDLE_STATE_MAX]; int interval_idx; - unsigned int intervals[INTERVALS]; + u64 intervals[INTERVALS]; }; static DEFINE_PER_CPU(struct teo_cpu, teo_cpus); @@ -117,9 +117,8 @@ static DEFINE_PER_CPU(struct teo_cpu, teo_cpus); static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); - unsigned int sleep_length_us = ktime_to_us(cpu_data->sleep_length_ns); int i, idx_hit = -1, idx_timer = -1; - unsigned int measured_us; + u64 measured_ns; if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) { /* @@ -127,23 +126,21 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) * enough to the closest timer event expected at the idle state * selection time to be discarded. */ - measured_us = UINT_MAX; + measured_ns = U64_MAX; } else { - unsigned int lat; + u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns; - lat = drv->states[dev->last_state_idx].exit_latency; - - measured_us = ktime_to_us(cpu_data->time_span_ns); + measured_ns = cpu_data->time_span_ns; /* * The delay between the wakeup and the first instruction * executed by the CPU is not likely to be worst-case every * time, so take 1/2 of the exit latency as a very rough * approximation of the average of it. */ - if (measured_us >= lat) - measured_us -= lat / 2; + if (measured_ns >= lat_ns) + measured_ns -= lat_ns / 2; else - measured_us /= 2; + measured_ns /= 2; } /* @@ -155,9 +152,9 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) cpu_data->states[i].early_hits -= early_hits >> DECAY_SHIFT; - if (drv->states[i].target_residency <= sleep_length_us) { + if (drv->states[i].target_residency_ns <= cpu_data->sleep_length_ns) { idx_timer = i; - if (drv->states[i].target_residency <= measured_us) + if (drv->states[i].target_residency_ns <= measured_ns) idx_hit = i; } } @@ -193,7 +190,7 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) * Save idle duration values corresponding to non-timer wakeups for * pattern detection. */ - cpu_data->intervals[cpu_data->interval_idx++] = measured_us; + cpu_data->intervals[cpu_data->interval_idx++] = measured_ns; if (cpu_data->interval_idx > INTERVALS) cpu_data->interval_idx = 0; } @@ -203,11 +200,11 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) * @drv: cpuidle driver containing state data. * @dev: Target CPU. * @state_idx: Index of the capping idle state. - * @duration_us: Idle duration value to match. + * @duration_ns: Idle duration value to match. */ static int teo_find_shallower_state(struct cpuidle_driver *drv, struct cpuidle_device *dev, int state_idx, - unsigned int duration_us) + u64 duration_ns) { int i; @@ -216,7 +213,7 @@ static int teo_find_shallower_state(struct cpuidle_driver *drv, continue; state_idx = i; - if (drv->states[i].target_residency <= duration_us) + if (drv->states[i].target_residency_ns <= duration_ns) break; } return state_idx; @@ -232,8 +229,9 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, bool *stop_tick) { struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); - int latency_req = cpuidle_governor_latency_req(dev->cpu); - unsigned int duration_us, hits, misses, early_hits; + s64 latency_req = cpuidle_governor_latency_req(dev->cpu); + u64 duration_ns; + unsigned int hits, misses, early_hits; int max_early_idx, constraint_idx, idx, i; ktime_t delta_tick; @@ -244,8 +242,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, cpu_data->time_span_ns = local_clock(); - cpu_data->sleep_length_ns = tick_nohz_get_sleep_length(&delta_tick); - duration_us = ktime_to_us(cpu_data->sleep_length_ns); + duration_ns = tick_nohz_get_sleep_length(&delta_tick); + cpu_data->sleep_length_ns = duration_ns; hits = 0; misses = 0; @@ -262,7 +260,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * Ignore disabled states with target residencies beyond * the anticipated idle duration. */ - if (s->target_residency > duration_us) + if (s->target_residency_ns > duration_ns) continue; /* @@ -301,7 +299,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * shallow for that role. */ if (!(tick_nohz_tick_stopped() && - drv->states[idx].target_residency < TICK_USEC)) { + drv->states[idx].target_residency_ns < TICK_NSEC)) { early_hits = cpu_data->states[i].early_hits; max_early_idx = idx; } @@ -315,10 +313,10 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, misses = cpu_data->states[i].misses; } - if (s->target_residency > duration_us) + if (s->target_residency_ns > duration_ns) break; - if (s->exit_latency > latency_req && constraint_idx > i) + if (s->exit_latency_ns > latency_req && constraint_idx > i) constraint_idx = i; idx = i; @@ -327,7 +325,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (early_hits < cpu_data->states[i].early_hits && !(tick_nohz_tick_stopped() && - drv->states[i].target_residency < TICK_USEC)) { + drv->states[i].target_residency_ns < TICK_NSEC)) { early_hits = cpu_data->states[i].early_hits; max_early_idx = i; } @@ -343,7 +341,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, */ if (hits <= misses && max_early_idx >= 0) { idx = max_early_idx; - duration_us = drv->states[idx].target_residency; + duration_ns = drv->states[idx].target_residency_ns; } /* @@ -364,9 +362,9 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * the current expected idle duration value. */ for (i = 0; i < INTERVALS; i++) { - unsigned int val = cpu_data->intervals[i]; + u64 val = cpu_data->intervals[i]; - if (val >= duration_us) + if (val >= duration_ns) continue; count++; @@ -378,17 +376,17 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * values are in the interesting range. */ if (count > INTERVALS / 2) { - unsigned int avg_us = div64_u64(sum, count); + u64 avg_ns = div64_u64(sum, count); /* * Avoid spending too much time in an idle state that * would be too shallow. */ - if (!(tick_nohz_tick_stopped() && avg_us < TICK_USEC)) { - duration_us = avg_us; - if (drv->states[idx].target_residency > avg_us) + if (!(tick_nohz_tick_stopped() && avg_ns < TICK_NSEC)) { + duration_ns = avg_ns; + if (drv->states[idx].target_residency_ns > avg_ns) idx = teo_find_shallower_state(drv, dev, - idx, avg_us); + idx, avg_ns); } } } @@ -398,9 +396,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * expected idle duration is shorter than the tick period length. */ if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) || - duration_us < TICK_USEC) && !tick_nohz_tick_stopped()) { - unsigned int delta_tick_us = ktime_to_us(delta_tick); - + duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) { *stop_tick = false; /* @@ -409,8 +405,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * till the closest timer including the tick, try to correct * that. */ - if (idx > 0 && drv->states[idx].target_residency > delta_tick_us) - idx = teo_find_shallower_state(drv, dev, idx, delta_tick_us); + if (idx > 0 && drv->states[idx].target_residency_ns > delta_tick) + idx = teo_find_shallower_state(drv, dev, idx, delta_tick); } return idx; @@ -454,7 +450,7 @@ static int teo_enable_device(struct cpuidle_driver *drv, memset(cpu_data, 0, sizeof(*cpu_data)); for (i = 0; i < INTERVALS; i++) - cpu_data->intervals[i] = UINT_MAX; + cpu_data->intervals[i] = U64_MAX; return 0; } diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c index c8fa5f41dfc4..9f1ace9c53da 100644 --- a/drivers/cpuidle/poll_state.c +++ b/drivers/cpuidle/poll_state.c @@ -49,6 +49,8 @@ void cpuidle_poll_state_init(struct cpuidle_driver *drv) snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE"); state->exit_latency = 0; state->target_residency = 0; + state->exit_latency_ns = 0; + state->target_residency_ns = 0; state->power_usage = -1; state->enter = poll_idle; state->disabled = false; diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 9f3755ac8f87..38ef770be90d 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -273,16 +273,30 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \ return sprintf(buf, "%s\n", state->_name);\ } -define_show_state_function(exit_latency) -define_show_state_function(target_residency) +#define define_show_state_time_function(_name) \ +static ssize_t show_state_##_name(struct cpuidle_state *state, \ + struct cpuidle_state_usage *state_usage, \ + char *buf) \ +{ \ + return sprintf(buf, "%llu\n", ktime_to_us(state->_name##_ns)); \ +} + +define_show_state_time_function(exit_latency) +define_show_state_time_function(target_residency) define_show_state_function(power_usage) define_show_state_ull_function(usage) -define_show_state_ull_function(time) define_show_state_str_function(name) define_show_state_str_function(desc) define_show_state_ull_function(above) define_show_state_ull_function(below) +static ssize_t show_state_time(struct cpuidle_state *state, + struct cpuidle_state_usage *state_usage, + char *buf) +{ + return sprintf(buf, "%llu\n", ktime_to_us(state_usage->time_ns)); +} + static ssize_t show_state_disable(struct cpuidle_state *state, struct cpuidle_state_usage *state_usage, char *buf) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index d23a3b1ddcf6..22602747f468 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -35,7 +35,7 @@ struct cpuidle_driver; struct cpuidle_state_usage { unsigned long long disable; unsigned long long usage; - unsigned long long time; /* in US */ + u64 time_ns; unsigned long long above; /* Number of times it's been too deep */ unsigned long long below; /* Number of times it's been too shallow */ #ifdef CONFIG_SUSPEND @@ -48,6 +48,8 @@ struct cpuidle_state { char name[CPUIDLE_NAME_LEN]; char desc[CPUIDLE_DESC_LEN]; + u64 exit_latency_ns; + u64 target_residency_ns; unsigned int flags; unsigned int exit_latency; /* in US */ int power_usage; /* in mW */ @@ -89,7 +91,7 @@ struct cpuidle_device { ktime_t next_hrtimer; int last_state_idx; - int last_residency; + u64 last_residency_ns; u64 poll_limit_ns; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; @@ -263,7 +265,7 @@ struct cpuidle_governor { #ifdef CONFIG_CPU_IDLE extern int cpuidle_register_governor(struct cpuidle_governor *gov); -extern int cpuidle_governor_latency_req(unsigned int cpu); +extern s64 cpuidle_governor_latency_req(unsigned int cpu); #else static inline int cpuidle_register_governor(struct cpuidle_governor *gov) {return 0;} diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 8dad5aa600ea..1aa260702b38 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -104,7 +104,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, * update no idle residency and return. */ if (current_clr_polling_and_test()) { - dev->last_residency = 0; + dev->last_residency_ns = 0; local_irq_enable(); return -EBUSY; } -- cgit v1.2.3-59-g8ed1b From c55b51a06b01d67a99457bb82a8c31081c7faa23 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sat, 16 Nov 2019 14:16:12 +0100 Subject: cpuidle: Allow idle injection to apply exit latency limit In some cases it may be useful to specify an exit latency limit for the idle state to be used during CPU idle time injection. Instead of duplicating the information in struct cpuidle_device or propagating the latency limit in the call stack, replace the use_deepest_state field with forced_latency_limit_ns to represent that limit, so that the deepest idle state with exit latency within that limit is forced (i.e. no governors) when it is set. A zero exit latency limit for forced idle means to use governors in the usual way (analogous to use_deepest_state equal to "false" before this change). Additionally, add play_idle_precise() taking two arguments, the duration of forced idle and the idle state exit latency limit, both in nanoseconds, and redefine play_idle() as a wrapper around that new function. This change is preparatory, no functional impact is expected. Suggested-by: Rafael J. Wysocki Signed-off-by: Daniel Lezcano [ rjw: Subject, changelog, cpuidle_use_deepest_state() kerneldoc, whitespace ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 13 +++++++------ include/linux/cpu.h | 7 ++++++- include/linux/cpuidle.h | 6 +++--- kernel/sched/idle.c | 14 +++++++------- 4 files changed, 23 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index bf9b030cd7e1..12077db1158e 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -99,20 +99,21 @@ static int find_deepest_state(struct cpuidle_driver *drv, } /** - * cpuidle_use_deepest_state - Set/clear governor override flag. - * @enable: New value of the flag. + * cpuidle_use_deepest_state - Set/unset governor override mode. + * @latency_limit_ns: Idle state exit latency limit (or no override if 0). * - * Set/unset the current CPU to use the deepest idle state (override governors - * going forward if set). + * If @latency_limit_ns is nonzero, set the current CPU to use the deepest idle + * state with exit latency within @latency_limit_ns (override governors going + * forward), or do not override governors if it is zero. */ -void cpuidle_use_deepest_state(bool enable) +void cpuidle_use_deepest_state(u64 latency_limit_ns) { struct cpuidle_device *dev; preempt_disable(); dev = cpuidle_get_device(); if (dev) - dev->use_deepest_state = enable; + dev->forced_idle_latency_limit_ns = latency_limit_ns; preempt_enable(); } diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d0633ebdaa9c..cc03a7848b63 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -179,7 +179,12 @@ void arch_cpu_idle_dead(void); int cpu_report_state(int cpu); int cpu_check_up_prepare(int cpu); void cpu_set_state_online(int cpu); -void play_idle(unsigned long duration_us); +void play_idle_precise(u64 duration_ns, u64 latency_ns); + +static inline void play_idle(unsigned long duration_us) +{ + play_idle_precise(duration_us * NSEC_PER_USEC, U64_MAX); +} #ifdef CONFIG_HOTPLUG_CPU bool cpu_wait_death(unsigned int cpu, int seconds); diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index afb6a573b46d..72b26ff1de4b 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -85,7 +85,6 @@ struct cpuidle_driver_kobj; struct cpuidle_device { unsigned int registered:1; unsigned int enabled:1; - unsigned int use_deepest_state:1; unsigned int poll_time_limit:1; unsigned int cpu; ktime_t next_hrtimer; @@ -93,6 +92,7 @@ struct cpuidle_device { int last_state_idx; u64 last_residency_ns; u64 poll_limit_ns; + u64 forced_idle_latency_limit_ns; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; struct cpuidle_driver_kobj *kobj_driver; @@ -216,7 +216,7 @@ extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv, struct cpuidle_device *dev); extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev); -extern void cpuidle_use_deepest_state(bool enable); +extern void cpuidle_use_deepest_state(u64 latency_limit_ns); #else static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv, struct cpuidle_device *dev) @@ -224,7 +224,7 @@ static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv, static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev) {return -ENODEV; } -static inline void cpuidle_use_deepest_state(bool enable) +static inline void cpuidle_use_deepest_state(u64 latency_limit_ns) { } #endif diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 1aa260702b38..cd05ffa0abfe 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -165,7 +165,7 @@ static void cpuidle_idle_call(void) * until a proper wakeup interrupt happens. */ - if (idle_should_enter_s2idle() || dev->use_deepest_state) { + if (idle_should_enter_s2idle() || dev->forced_idle_latency_limit_ns) { if (idle_should_enter_s2idle()) { rcu_idle_enter(); @@ -311,7 +311,7 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } -void play_idle(unsigned long duration_us) +void play_idle_precise(u64 duration_ns, u64 latency_ns) { struct idle_timer it; @@ -323,29 +323,29 @@ void play_idle(unsigned long duration_us) WARN_ON_ONCE(current->nr_cpus_allowed != 1); WARN_ON_ONCE(!(current->flags & PF_KTHREAD)); WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY)); - WARN_ON_ONCE(!duration_us); + WARN_ON_ONCE(!duration_ns); rcu_sleep_check(); preempt_disable(); current->flags |= PF_IDLE; - cpuidle_use_deepest_state(true); + cpuidle_use_deepest_state(latency_ns); it.done = 0; hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); it.timer.function = idle_inject_timer_fn; - hrtimer_start(&it.timer, ns_to_ktime(duration_us * NSEC_PER_USEC), + hrtimer_start(&it.timer, ns_to_ktime(duration_ns), HRTIMER_MODE_REL_PINNED); while (!READ_ONCE(it.done)) do_idle(); - cpuidle_use_deepest_state(false); + cpuidle_use_deepest_state(0); current->flags &= ~PF_IDLE; preempt_fold_need_resched(); preempt_enable(); } -EXPORT_SYMBOL_GPL(play_idle); +EXPORT_SYMBOL_GPL(play_idle_precise); void cpu_startup_entry(enum cpuhp_state state) { -- cgit v1.2.3-59-g8ed1b From 5aa9ba6312e36c18626e73506b92d1513d815435 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sat, 16 Nov 2019 14:16:13 +0100 Subject: cpuidle: Pass exit latency limit to cpuidle_use_deepest_state() Modify cpuidle_use_deepest_state() to take an additional exit latency limit argument to be passed to find_deepest_idle_state() and make cpuidle_idle_call() pass dev->forced_idle_latency_limit_ns to it for forced idle. Suggested-by: Rafael J. Wysocki Signed-off-by: Daniel Lezcano [ rjw: Rebase and rearrange code, subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 5 +++-- include/linux/cpuidle.h | 6 ++++-- kernel/sched/idle.c | 8 +++++++- 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 12077db1158e..569dbac443bd 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -123,9 +123,10 @@ void cpuidle_use_deepest_state(u64 latency_limit_ns) * @dev: cpuidle device for the given CPU. */ int cpuidle_find_deepest_state(struct cpuidle_driver *drv, - struct cpuidle_device *dev) + struct cpuidle_device *dev, + u64 latency_limit_ns) { - return find_deepest_state(drv, dev, U64_MAX, 0, false); + return find_deepest_state(drv, dev, latency_limit_ns, 0, false); } #ifdef CONFIG_SUSPEND diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 72b26ff1de4b..2dbe46b7c213 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -213,13 +213,15 @@ static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; } #ifdef CONFIG_CPU_IDLE extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv, - struct cpuidle_device *dev); + struct cpuidle_device *dev, + u64 latency_limit_ns); extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev); extern void cpuidle_use_deepest_state(u64 latency_limit_ns); #else static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv, - struct cpuidle_device *dev) + struct cpuidle_device *dev, + u64 latency_limit_ns) {return -ENODEV; } static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev) diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index cd05ffa0abfe..fc9604ddd802 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -166,6 +166,8 @@ static void cpuidle_idle_call(void) */ if (idle_should_enter_s2idle() || dev->forced_idle_latency_limit_ns) { + u64 max_latency_ns; + if (idle_should_enter_s2idle()) { rcu_idle_enter(); @@ -176,12 +178,16 @@ static void cpuidle_idle_call(void) } rcu_idle_exit(); + + max_latency_ns = U64_MAX; + } else { + max_latency_ns = dev->forced_idle_latency_limit_ns; } tick_nohz_idle_stop_tick(); rcu_idle_enter(); - next_state = cpuidle_find_deepest_state(drv, dev); + next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns); call_cpuidle(drv, dev, next_state); } else { bool stop_tick = true; -- cgit v1.2.3-59-g8ed1b