aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/cpuidle/governors
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cpuidle/governors')
-rw-r--r--drivers/cpuidle/governors/Makefile1
-rw-r--r--drivers/cpuidle/governors/haltpoll.c150
-rw-r--r--drivers/cpuidle/governors/ladder.c21
-rw-r--r--drivers/cpuidle/governors/menu.c21
-rw-r--r--drivers/cpuidle/governors/teo.c60
5 files changed, 194 insertions, 59 deletions
diff --git a/drivers/cpuidle/governors/Makefile b/drivers/cpuidle/governors/Makefile
index 42f44cc610dd..63abb5393a4d 100644
--- a/drivers/cpuidle/governors/Makefile
+++ b/drivers/cpuidle/governors/Makefile
@@ -6,3 +6,4 @@
obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o
obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o
obj-$(CONFIG_CPU_IDLE_GOV_TEO) += teo.o
+obj-$(CONFIG_CPU_IDLE_GOV_HALTPOLL) += haltpoll.o
diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c
new file mode 100644
index 000000000000..7a703d2e0064
--- /dev/null
+++ b/drivers/cpuidle/governors/haltpoll.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * haltpoll.c - haltpoll idle governor
+ *
+ * Copyright 2019 Red Hat, Inc. and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Authors: Marcelo Tosatti <mtosatti@redhat.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpuidle.h>
+#include <linux/time.h>
+#include <linux/ktime.h>
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/kvm_para.h>
+
+static unsigned int guest_halt_poll_ns __read_mostly = 200000;
+module_param(guest_halt_poll_ns, uint, 0644);
+
+/* division factor to shrink halt_poll_ns */
+static unsigned int guest_halt_poll_shrink __read_mostly = 2;
+module_param(guest_halt_poll_shrink, uint, 0644);
+
+/* multiplication factor to grow per-cpu poll_limit_ns */
+static unsigned int guest_halt_poll_grow __read_mostly = 2;
+module_param(guest_halt_poll_grow, uint, 0644);
+
+/* value in us to start growing per-cpu halt_poll_ns */
+static unsigned int guest_halt_poll_grow_start __read_mostly = 50000;
+module_param(guest_halt_poll_grow_start, uint, 0644);
+
+/* allow shrinking guest halt poll */
+static bool guest_halt_poll_allow_shrink __read_mostly = true;
+module_param(guest_halt_poll_allow_shrink, bool, 0644);
+
+/**
+ * haltpoll_select - selects the next idle state to enter
+ * @drv: cpuidle driver containing state data
+ * @dev: the CPU
+ * @stop_tick: indication on whether or not to stop the tick
+ */
+static int haltpoll_select(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev,
+ bool *stop_tick)
+{
+ int latency_req = cpuidle_governor_latency_req(dev->cpu);
+
+ if (!drv->state_count || latency_req == 0) {
+ *stop_tick = false;
+ return 0;
+ }
+
+ if (dev->poll_limit_ns == 0)
+ return 1;
+
+ /* Last state was poll? */
+ if (dev->last_state_idx == 0) {
+ /* Halt if no event occurred on poll window */
+ if (dev->poll_time_limit == true)
+ return 1;
+
+ *stop_tick = false;
+ /* Otherwise, poll again */
+ return 0;
+ }
+
+ *stop_tick = false;
+ /* Last state was halt: poll */
+ return 0;
+}
+
+static void adjust_poll_limit(struct cpuidle_device *dev, unsigned int block_us)
+{
+ unsigned int val;
+ u64 block_ns = block_us*NSEC_PER_USEC;
+
+ /* Grow cpu_halt_poll_us if
+ * cpu_halt_poll_us < block_ns < guest_halt_poll_us
+ */
+ if (block_ns > dev->poll_limit_ns && block_ns <= guest_halt_poll_ns) {
+ val = dev->poll_limit_ns * guest_halt_poll_grow;
+
+ if (val < guest_halt_poll_grow_start)
+ val = guest_halt_poll_grow_start;
+ if (val > guest_halt_poll_ns)
+ val = guest_halt_poll_ns;
+
+ dev->poll_limit_ns = val;
+ } else if (block_ns > guest_halt_poll_ns &&
+ guest_halt_poll_allow_shrink) {
+ unsigned int shrink = guest_halt_poll_shrink;
+
+ val = dev->poll_limit_ns;
+ if (shrink == 0)
+ val = 0;
+ else
+ val /= shrink;
+ dev->poll_limit_ns = val;
+ }
+}
+
+/**
+ * haltpoll_reflect - update variables and update poll time
+ * @dev: the CPU
+ * @index: the index of actual entered state
+ */
+static void haltpoll_reflect(struct cpuidle_device *dev, int index)
+{
+ dev->last_state_idx = index;
+
+ if (index != 0)
+ adjust_poll_limit(dev, dev->last_residency);
+}
+
+/**
+ * haltpoll_enable_device - scans a CPU's states and does setup
+ * @drv: cpuidle driver
+ * @dev: the CPU
+ */
+static int haltpoll_enable_device(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev)
+{
+ dev->poll_limit_ns = 0;
+
+ return 0;
+}
+
+static struct cpuidle_governor haltpoll_governor = {
+ .name = "haltpoll",
+ .rating = 9,
+ .enable = haltpoll_enable_device,
+ .select = haltpoll_select,
+ .reflect = haltpoll_reflect,
+};
+
+static int __init init_haltpoll(void)
+{
+ if (kvm_para_available())
+ return cpuidle_register_governor(&haltpoll_governor);
+
+ return 0;
+}
+
+postcore_initcall(init_haltpoll);
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index f0dddc66af26..428eeb832fe7 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -38,7 +38,6 @@ struct ladder_device_state {
struct ladder_device {
struct ladder_device_state states[CPUIDLE_STATE_MAX];
- int last_state_idx;
};
static DEFINE_PER_CPU(struct ladder_device, ladder_devices);
@@ -49,12 +48,13 @@ static DEFINE_PER_CPU(struct ladder_device, ladder_devices);
* @old_idx: the current state index
* @new_idx: the new target state index
*/
-static inline void ladder_do_selection(struct ladder_device *ldev,
+static inline void ladder_do_selection(struct cpuidle_device *dev,
+ struct ladder_device *ldev,
int old_idx, int new_idx)
{
ldev->states[old_idx].stats.promotion_count = 0;
ldev->states[old_idx].stats.demotion_count = 0;
- ldev->last_state_idx = new_idx;
+ dev->last_state_idx = new_idx;
}
/**
@@ -68,13 +68,13 @@ static int ladder_select_state(struct cpuidle_driver *drv,
{
struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
struct ladder_device_state *last_state;
- int last_residency, last_idx = ldev->last_state_idx;
+ int last_residency, last_idx = dev->last_state_idx;
int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0;
int latency_req = cpuidle_governor_latency_req(dev->cpu);
/* Special case when user has set very strict latency requirement */
if (unlikely(latency_req == 0)) {
- ladder_do_selection(ldev, last_idx, 0);
+ ladder_do_selection(dev, ldev, last_idx, 0);
return 0;
}
@@ -91,7 +91,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
last_state->stats.promotion_count++;
last_state->stats.demotion_count = 0;
if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
- ladder_do_selection(ldev, last_idx, last_idx + 1);
+ ladder_do_selection(dev, ldev, last_idx, last_idx + 1);
return last_idx + 1;
}
}
@@ -107,7 +107,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
if (drv->states[i].exit_latency <= latency_req)
break;
}
- ladder_do_selection(ldev, last_idx, i);
+ ladder_do_selection(dev, ldev, last_idx, i);
return i;
}
@@ -116,7 +116,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
last_state->stats.demotion_count++;
last_state->stats.promotion_count = 0;
if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
- ladder_do_selection(ldev, last_idx, last_idx - 1);
+ ladder_do_selection(dev, ldev, last_idx, last_idx - 1);
return last_idx - 1;
}
}
@@ -139,7 +139,7 @@ static int ladder_enable_device(struct cpuidle_driver *drv,
struct ladder_device_state *lstate;
struct cpuidle_state *state;
- ldev->last_state_idx = first_idx;
+ dev->last_state_idx = first_idx;
for (i = first_idx; i < drv->state_count; i++) {
state = &drv->states[i];
@@ -167,9 +167,8 @@ static int ladder_enable_device(struct cpuidle_driver *drv,
*/
static void ladder_reflect(struct cpuidle_device *dev, int index)
{
- struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
if (index > 0)
- ldev->last_state_idx = index;
+ dev->last_state_idx = index;
}
static struct cpuidle_governor ladder_governor = {
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index e9a28c7846d6..e5a5d0c8d66b 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -117,7 +117,6 @@
*/
struct menu_device {
- int last_state_idx;
int needs_update;
int tick_wakeup;
@@ -302,9 +301,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
!drv->states[0].disabled && !dev->states_usage[0].disable)) {
/*
* In this case state[0] will be used no matter what, so return
- * it right away and keep the tick running.
+ * it right away and keep the tick running if state[0] is a
+ * polling one.
*/
- *stop_tick = false;
+ *stop_tick = !(drv->states[0].flags & CPUIDLE_FLAG_POLLING);
return 0;
}
@@ -395,16 +395,9 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
return idx;
}
- if (s->exit_latency > latency_req) {
- /*
- * If we break out of the loop for latency reasons, use
- * the target residency of the selected state as the
- * expected idle duration so that the tick is retained
- * as long as that target residency is low enough.
- */
- predicted_us = drv->states[idx].target_residency;
+ if (s->exit_latency > latency_req)
break;
- }
+
idx = i;
}
@@ -455,7 +448,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index)
{
struct menu_device *data = this_cpu_ptr(&menu_devices);
- data->last_state_idx = index;
+ dev->last_state_idx = index;
data->needs_update = 1;
data->tick_wakeup = tick_nohz_idle_got_tick();
}
@@ -468,7 +461,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index)
static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{
struct menu_device *data = this_cpu_ptr(&menu_devices);
- int last_idx = data->last_state_idx;
+ int last_idx = dev->last_state_idx;
struct cpuidle_state *target = &drv->states[last_idx];
unsigned int measured_us;
unsigned int new_factor;
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 7d05efdbd3c6..b5a0e498f798 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -96,7 +96,6 @@ struct teo_idle_state {
* @time_span_ns: Time between idle state selection and post-wakeup update.
* @sleep_length_ns: Time till the closest timer event (at the selection time).
* @states: Idle states data corresponding to this CPU.
- * @last_state: Idle state entered by the CPU last time.
* @interval_idx: Index of the most recent saved idle interval.
* @intervals: Saved idle duration values.
*/
@@ -104,7 +103,6 @@ struct teo_cpu {
u64 time_span_ns;
u64 sleep_length_ns;
struct teo_idle_state states[CPUIDLE_STATE_MAX];
- int last_state;
int interval_idx;
unsigned int intervals[INTERVALS];
};
@@ -125,12 +123,15 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
/*
- * One of the safety nets has triggered or this was a timer
- * wakeup (or equivalent).
+ * One of the safety nets has triggered or the wakeup was close
+ * enough to the closest timer event expected at the idle state
+ * selection time to be discarded.
*/
- measured_us = sleep_length_us;
+ measured_us = UINT_MAX;
} else {
- unsigned int lat = drv->states[cpu_data->last_state].exit_latency;
+ unsigned int lat;
+
+ lat = drv->states[dev->last_state_idx].exit_latency;
measured_us = ktime_to_us(cpu_data->time_span_ns);
/*
@@ -189,15 +190,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
}
/*
- * If the total time span between idle state selection and the "reflect"
- * callback is greater than or equal to the sleep length determined at
- * the idle state selection time, the wakeup is likely to be due to a
- * timer event.
- */
- if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns)
- measured_us = UINT_MAX;
-
- /*
* Save idle duration values corresponding to non-timer wakeups for
* pattern detection.
*/
@@ -242,12 +234,12 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
int latency_req = cpuidle_governor_latency_req(dev->cpu);
unsigned int duration_us, count;
- int max_early_idx, idx, i;
+ int max_early_idx, constraint_idx, idx, i;
ktime_t delta_tick;
- if (cpu_data->last_state >= 0) {
+ if (dev->last_state_idx >= 0) {
teo_update(drv, dev);
- cpu_data->last_state = -1;
+ dev->last_state_idx = -1;
}
cpu_data->time_span_ns = local_clock();
@@ -257,6 +249,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
count = 0;
max_early_idx = -1;
+ constraint_idx = drv->state_count;
idx = -1;
for (i = 0; i < drv->state_count; i++) {
@@ -286,16 +279,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
if (s->target_residency > duration_us)
break;
- if (s->exit_latency > latency_req) {
- /*
- * If we break out of the loop for latency reasons, use
- * the target residency of the selected state as the
- * expected idle duration to avoid stopping the tick
- * as long as that target residency is low enough.
- */
- duration_us = drv->states[idx].target_residency;
- goto refine;
- }
+ if (s->exit_latency > latency_req && constraint_idx > i)
+ constraint_idx = i;
idx = i;
@@ -321,7 +306,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
duration_us = drv->states[idx].target_residency;
}
-refine:
+ /*
+ * If there is a latency constraint, it may be necessary to use a
+ * shallower idle state than the one selected so far.
+ */
+ if (constraint_idx < idx)
+ idx = constraint_idx;
+
if (idx < 0) {
idx = 0; /* No states enabled. Must use 0. */
} else if (idx > 0) {
@@ -331,13 +322,12 @@ refine:
/*
* Count and sum the most recent idle duration values less than
- * the target residency of the state selected so far, find the
- * max.
+ * the current expected idle duration value.
*/
for (i = 0; i < INTERVALS; i++) {
unsigned int val = cpu_data->intervals[i];
- if (val >= drv->states[idx].target_residency)
+ if (val >= duration_us)
continue;
count++;
@@ -356,8 +346,10 @@ refine:
* would be too shallow.
*/
if (!(tick_nohz_tick_stopped() && avg_us < TICK_USEC)) {
- idx = teo_find_shallower_state(drv, dev, idx, avg_us);
duration_us = avg_us;
+ if (drv->states[idx].target_residency > avg_us)
+ idx = teo_find_shallower_state(drv, dev,
+ idx, avg_us);
}
}
}
@@ -394,7 +386,7 @@ static void teo_reflect(struct cpuidle_device *dev, int state)
{
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
- cpu_data->last_state = state;
+ dev->last_state_idx = state;
/*
* If the wakeup was not "natural", but triggered by one of the safety
* nets, assume that the CPU might have been idle for the entire sleep