aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/idle
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/idle')
-rw-r--r--drivers/idle/intel_idle.c873
1 files changed, 667 insertions, 206 deletions
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index d55606608ac8..cfeb24d40d37 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -2,12 +2,13 @@
/*
* intel_idle.c - native hardware idle loop for modern Intel processors
*
- * Copyright (c) 2013, Intel Corporation.
+ * Copyright (c) 2013 - 2020, Intel Corporation.
* Len Brown <len.brown@intel.com>
+ * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
*/
/*
- * intel_idle is a cpuidle driver that loads on specific Intel processors
+ * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT
* in lieu of the legacy ACPI processor_idle driver. The intent is to
* make Linux more efficient on these processors, as intel_idle knows
* more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
@@ -19,17 +20,16 @@
* All CPUs have same idle states as boot CPU
*
* Chipset BM_STS (bus master status) bit is a NOP
- * for preventing entry into deep C-stats
+ * for preventing entry into deep C-states
+ *
+ * CPU will flush caches as needed when entering a C-state via MWAIT
+ * (in contrast to entering ACPI C3, in which case the WBINVD
+ * instruction needs to be executed to flush the caches)
*/
/*
* Known limitations
*
- * The driver currently initializes for_each_online_cpu() upon modprobe.
- * It it unaware of subsequent processors hot-added to the system.
- * This means that if you boot with maxcpus=n and later online
- * processors above n, those processors will use C1 only.
- *
* ACPI has a .suspend hack to turn off deep c-statees during suspend
* to avoid complications with the lapic timer workaround.
* Have not seen issues with suspend, but may need same workaround here.
@@ -37,7 +37,7 @@
*/
/* un-comment DEBUG to enable pr_debug() statements */
-#define DEBUG
+/* #define DEBUG */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -47,15 +47,18 @@
#include <linux/tick.h>
#include <trace/events/power.h>
#include <linux/sched.h>
+#include <linux/sched/smt.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/moduleparam.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
+#include <asm/nospec-branch.h>
#include <asm/mwait.h>
#include <asm/msr.h>
+#include <asm/fpu/api.h>
-#define INTEL_IDLE_VERSION "0.4.1"
+#define INTEL_IDLE_VERSION "0.5.1"
static struct cpuidle_driver intel_idle_driver = {
.name = "intel_idle",
@@ -64,12 +67,17 @@ static struct cpuidle_driver intel_idle_driver = {
/* intel_idle.max_cstate=0 disables driver */
static int max_cstate = CPUIDLE_STATE_MAX - 1;
static unsigned int disabled_states_mask;
+static unsigned int preferred_states_mask;
+
+static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
-static unsigned int mwait_substates;
+static unsigned long auto_demotion_disable_flags;
-#define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
-/* Reliable LAPIC Timer States, bit 1 for C1 etc. */
-static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */
+static enum {
+ C1E_PROMOTION_PRESERVE,
+ C1E_PROMOTION_ENABLE,
+ C1E_PROMOTION_DISABLE
+} c1e_promotion = C1E_PROMOTION_PRESERVE;
struct idle_cpu {
struct cpuidle_state *state_table;
@@ -84,13 +92,16 @@ struct idle_cpu {
bool use_acpi;
};
-static const struct idle_cpu *icpu;
-static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
-static int intel_idle(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int index);
-static void intel_idle_s2idle(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int index);
-static struct cpuidle_state *cpuidle_state_table;
+static const struct idle_cpu *icpu __initdata;
+static struct cpuidle_state *cpuidle_state_table __initdata;
+
+static unsigned int mwait_substates __initdata;
+
+/*
+ * Enable interrupts before entering the C-state. On some platforms and for
+ * some C-states, this may measurably decrease interrupt latency.
+ */
+#define CPUIDLE_FLAG_IRQ_ENABLE BIT(14)
/*
* Enable this state by default even if the ACPI _CST does not list it.
@@ -98,12 +109,15 @@ static struct cpuidle_state *cpuidle_state_table;
#define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15)
/*
- * Set this flag for states where the HW flushes the TLB for us
- * and so we don't need cross-calls to keep it consistent.
- * If this flag is set, SW flushes the TLB, so even if the
- * HW doesn't do the flushing, this flag is safe to use.
+ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
+ * above.
+ */
+#define CPUIDLE_FLAG_IBRS BIT(16)
+
+/*
+ * Initialize large xstate for the C6-state entrance.
*/
-#define CPUIDLE_FLAG_TLB_FLUSHED 0x10000
+#define CPUIDLE_FLAG_INIT_XSTATE BIT(17)
/*
* MWAIT takes an 8-bit "hint" in EAX "suggesting"
@@ -115,12 +129,114 @@ static struct cpuidle_state *cpuidle_state_table;
#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
+static __always_inline int __intel_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ struct cpuidle_state *state = &drv->states[index];
+ unsigned long eax = flg2MWAIT(state->flags);
+ unsigned long ecx = 1; /* break on interrupt flag */
+
+ mwait_idle_with_hints(eax, ecx);
+
+ return index;
+}
+
+/**
+ * intel_idle - Ask the processor to enter the given idle state.
+ * @dev: cpuidle device of the target CPU.
+ * @drv: cpuidle driver (assumed to point to intel_idle_driver).
+ * @index: Target idle state index.
+ *
+ * Use the MWAIT instruction to notify the processor that the CPU represented by
+ * @dev is idle and it can try to enter the idle state corresponding to @index.
+ *
+ * If the local APIC timer is not known to be reliable in the target idle state,
+ * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
+ *
+ * Must be called under local_irq_disable().
+ */
+static __cpuidle int intel_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ return __intel_idle(dev, drv, index);
+}
+
+static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ int ret;
+
+ raw_local_irq_enable();
+ ret = __intel_idle(dev, drv, index);
+
+ /*
+ * The lockdep hardirqs state may be changed to 'on' with timer
+ * tick interrupt followed by __do_softirq(). Use local_irq_disable()
+ * to keep the hardirqs state correct.
+ */
+ local_irq_disable();
+
+ return ret;
+}
+
+static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ bool smt_active = sched_smt_active();
+ u64 spec_ctrl = spec_ctrl_current();
+ int ret;
+
+ if (smt_active)
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+
+ ret = __intel_idle(dev, drv, index);
+
+ if (smt_active)
+ wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
+
+ return ret;
+}
+
+static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ fpu_idle_fpregs();
+ return __intel_idle(dev, drv, index);
+}
+
+/**
+ * intel_idle_s2idle - Ask the processor to enter the given idle state.
+ * @dev: cpuidle device of the target CPU.
+ * @drv: cpuidle driver (assumed to point to intel_idle_driver).
+ * @index: Target idle state index.
+ *
+ * Use the MWAIT instruction to notify the processor that the CPU represented by
+ * @dev is idle and it can try to enter the idle state corresponding to @index.
+ *
+ * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
+ * scheduler tick and suspended scheduler clock on the target CPU.
+ */
+static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ unsigned long ecx = 1; /* break on interrupt flag */
+ struct cpuidle_state *state = &drv->states[index];
+ unsigned long eax = flg2MWAIT(state->flags);
+
+ if (state->flags & CPUIDLE_FLAG_INIT_XSTATE)
+ fpu_idle_fpregs();
+
+ mwait_idle_with_hints(eax, ecx);
+
+ return 0;
+}
+
/*
* States are indexed by the cstate number,
* which is also the index into the MWAIT hint array.
* Thus C0 is a dummy.
*/
-static struct cpuidle_state nehalem_cstates[] = {
+static struct cpuidle_state nehalem_cstates[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
@@ -157,7 +273,7 @@ static struct cpuidle_state nehalem_cstates[] = {
.enter = NULL }
};
-static struct cpuidle_state snb_cstates[] = {
+static struct cpuidle_state snb_cstates[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
@@ -202,7 +318,7 @@ static struct cpuidle_state snb_cstates[] = {
.enter = NULL }
};
-static struct cpuidle_state byt_cstates[] = {
+static struct cpuidle_state byt_cstates[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
@@ -247,7 +363,7 @@ static struct cpuidle_state byt_cstates[] = {
.enter = NULL }
};
-static struct cpuidle_state cht_cstates[] = {
+static struct cpuidle_state cht_cstates[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
@@ -292,7 +408,7 @@ static struct cpuidle_state cht_cstates[] = {
.enter = NULL }
};
-static struct cpuidle_state ivb_cstates[] = {
+static struct cpuidle_state ivb_cstates[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
@@ -337,7 +453,7 @@ static struct cpuidle_state ivb_cstates[] = {
.enter = NULL }
};
-static struct cpuidle_state ivt_cstates[] = {
+static struct cpuidle_state ivt_cstates[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
@@ -374,7 +490,7 @@ static struct cpuidle_state ivt_cstates[] = {
.enter = NULL }
};
-static struct cpuidle_state ivt_cstates_4s[] = {
+static struct cpuidle_state ivt_cstates_4s[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
@@ -411,7 +527,7 @@ static struct cpuidle_state ivt_cstates_4s[] = {
.enter = NULL }
};
-static struct cpuidle_state ivt_cstates_8s[] = {
+static struct cpuidle_state ivt_cstates_8s[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
@@ -448,7 +564,7 @@ static struct cpuidle_state ivt_cstates_8s[] = {
.enter = NULL }
};
-static struct cpuidle_state hsw_cstates[] = {
+static struct cpuidle_state hsw_cstates[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
@@ -516,7 +632,7 @@ static struct cpuidle_state hsw_cstates[] = {
{
.enter = NULL }
};
-static struct cpuidle_state bdw_cstates[] = {
+static struct cpuidle_state bdw_cstates[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
@@ -585,7 +701,7 @@ static struct cpuidle_state bdw_cstates[] = {
.enter = NULL }
};
-static struct cpuidle_state skl_cstates[] = {
+static struct cpuidle_state skl_cstates[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
@@ -613,7 +729,7 @@ static struct cpuidle_state skl_cstates[] = {
{
.name = "C6",
.desc = "MWAIT 0x20",
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 85,
.target_residency = 200,
.enter = &intel_idle,
@@ -621,7 +737,7 @@ static struct cpuidle_state skl_cstates[] = {
{
.name = "C7s",
.desc = "MWAIT 0x33",
- .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 124,
.target_residency = 800,
.enter = &intel_idle,
@@ -629,7 +745,7 @@ static struct cpuidle_state skl_cstates[] = {
{
.name = "C8",
.desc = "MWAIT 0x40",
- .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 200,
.target_residency = 800,
.enter = &intel_idle,
@@ -637,7 +753,7 @@ static struct cpuidle_state skl_cstates[] = {
{
.name = "C9",
.desc = "MWAIT 0x50",
- .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 480,
.target_residency = 5000,
.enter = &intel_idle,
@@ -645,7 +761,7 @@ static struct cpuidle_state skl_cstates[] = {
{
.name = "C10",
.desc = "MWAIT 0x60",
- .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 890,
.target_residency = 5000,
.enter = &intel_idle,
@@ -654,11 +770,11 @@ static struct cpuidle_state skl_cstates[] = {
.enter = NULL }
};
-static struct cpuidle_state skx_cstates[] = {
+static struct cpuidle_state skx_cstates[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
- .flags = MWAIT2flg(0x00),
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
.exit_latency = 2,
.target_residency = 2,
.enter = &intel_idle,
@@ -674,7 +790,7 @@ static struct cpuidle_state skx_cstates[] = {
{
.name = "C6",
.desc = "MWAIT 0x20",
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 133,
.target_residency = 600,
.enter = &intel_idle,
@@ -683,7 +799,211 @@ static struct cpuidle_state skx_cstates[] = {
.enter = NULL }
};
-static struct cpuidle_state atom_cstates[] = {
+static struct cpuidle_state icx_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C1E",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+ .exit_latency = 4,
+ .target_residency = 4,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 170,
+ .target_residency = 600,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .enter = NULL }
+};
+
+/*
+ * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
+ * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
+ * But in this case there is effectively no C1, because C1 requests are
+ * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
+ * and C1E requests end up with C1, so there is effectively no C1E.
+ *
+ * By default we enable C1E and disable C1 by marking it with
+ * 'CPUIDLE_FLAG_UNUSABLE'.
+ */
+static struct cpuidle_state adl_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C1E",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+ .exit_latency = 2,
+ .target_residency = 4,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 220,
+ .target_residency = 600,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C8",
+ .desc = "MWAIT 0x40",
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 280,
+ .target_residency = 800,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C10",
+ .desc = "MWAIT 0x60",
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 680,
+ .target_residency = 2000,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .enter = NULL }
+};
+
+static struct cpuidle_state adl_l_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C1E",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+ .exit_latency = 2,
+ .target_residency = 4,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 170,
+ .target_residency = 500,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C8",
+ .desc = "MWAIT 0x40",
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 200,
+ .target_residency = 600,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C10",
+ .desc = "MWAIT 0x60",
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 230,
+ .target_residency = 700,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .enter = NULL }
+};
+
+static struct cpuidle_state adl_n_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C1E",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+ .exit_latency = 2,
+ .target_residency = 4,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 195,
+ .target_residency = 585,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C8",
+ .desc = "MWAIT 0x40",
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 260,
+ .target_residency = 1040,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C10",
+ .desc = "MWAIT 0x60",
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 660,
+ .target_residency = 1980,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .enter = NULL }
+};
+
+static struct cpuidle_state spr_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00),
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C1E",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+ .exit_latency = 2,
+ .target_residency = 4,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
+ CPUIDLE_FLAG_INIT_XSTATE,
+ .exit_latency = 290,
+ .target_residency = 800,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .enter = NULL }
+};
+
+static struct cpuidle_state atom_cstates[] __initdata = {
{
.name = "C1E",
.desc = "MWAIT 0x00",
@@ -719,7 +1039,7 @@ static struct cpuidle_state atom_cstates[] = {
{
.enter = NULL }
};
-static struct cpuidle_state tangier_cstates[] = {
+static struct cpuidle_state tangier_cstates[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
@@ -763,7 +1083,7 @@ static struct cpuidle_state tangier_cstates[] = {
{
.enter = NULL }
};
-static struct cpuidle_state avn_cstates[] = {
+static struct cpuidle_state avn_cstates[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
@@ -783,7 +1103,7 @@ static struct cpuidle_state avn_cstates[] = {
{
.enter = NULL }
};
-static struct cpuidle_state knl_cstates[] = {
+static struct cpuidle_state knl_cstates[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
@@ -804,7 +1124,7 @@ static struct cpuidle_state knl_cstates[] = {
.enter = NULL }
};
-static struct cpuidle_state bxt_cstates[] = {
+static struct cpuidle_state bxt_cstates[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
@@ -865,7 +1185,7 @@ static struct cpuidle_state bxt_cstates[] = {
.enter = NULL }
};
-static struct cpuidle_state dnv_cstates[] = {
+static struct cpuidle_state dnv_cstates[] __initdata = {
{
.name = "C1",
.desc = "MWAIT 0x00",
@@ -894,225 +1214,233 @@ static struct cpuidle_state dnv_cstates[] = {
.enter = NULL }
};
-/**
- * intel_idle
- * @dev: cpuidle_device
- * @drv: cpuidle driver
- * @index: index of cpuidle state
- *
- * Must be called under local_irq_disable().
- */
-static __cpuidle int intel_idle(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int index)
-{
- unsigned long ecx = 1; /* break on interrupt flag */
- struct cpuidle_state *state = &drv->states[index];
- unsigned long eax = flg2MWAIT(state->flags);
- unsigned int cstate;
- bool uninitialized_var(tick);
- int cpu = smp_processor_id();
-
- /*
- * leave_mm() to avoid costly and often unnecessary wakeups
- * for flushing the user TLB's associated with the active mm.
- */
- if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
- leave_mm(cpu);
-
- if (!static_cpu_has(X86_FEATURE_ARAT)) {
- cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) &
- MWAIT_CSTATE_MASK) + 1;
- tick = false;
- if (!(lapic_timer_reliable_states & (1 << (cstate)))) {
- tick = true;
- tick_broadcast_enter();
- }
- }
-
- mwait_idle_with_hints(eax, ecx);
-
- if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
- tick_broadcast_exit();
-
- return index;
-}
-
-/**
- * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle
- * @dev: cpuidle_device
- * @drv: cpuidle driver
- * @index: state index
+/*
+ * Note, depending on HW and FW revision, SnowRidge SoC may or may not support
+ * C6, and this is indicated in the CPUID mwait leaf.
*/
-static void intel_idle_s2idle(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int index)
-{
- unsigned long ecx = 1; /* break on interrupt flag */
- unsigned long eax = flg2MWAIT(drv->states[index].flags);
-
- mwait_idle_with_hints(eax, ecx);
-}
+static struct cpuidle_state snr_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00),
+ .exit_latency = 2,
+ .target_residency = 2,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C1E",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+ .exit_latency = 15,
+ .target_residency = 25,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 130,
+ .target_residency = 500,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .enter = NULL }
+};
-static const struct idle_cpu idle_cpu_nehalem = {
+static const struct idle_cpu idle_cpu_nehalem __initconst = {
.state_table = nehalem_cstates,
.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
.disable_promotion_to_c1e = true,
};
-static const struct idle_cpu idle_cpu_nhx = {
+static const struct idle_cpu idle_cpu_nhx __initconst = {
.state_table = nehalem_cstates,
.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
.disable_promotion_to_c1e = true,
.use_acpi = true,
};
-static const struct idle_cpu idle_cpu_atom = {
+static const struct idle_cpu idle_cpu_atom __initconst = {
.state_table = atom_cstates,
};
-static const struct idle_cpu idle_cpu_tangier = {
+static const struct idle_cpu idle_cpu_tangier __initconst = {
.state_table = tangier_cstates,
};
-static const struct idle_cpu idle_cpu_lincroft = {
+static const struct idle_cpu idle_cpu_lincroft __initconst = {
.state_table = atom_cstates,
.auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
};
-static const struct idle_cpu idle_cpu_snb = {
+static const struct idle_cpu idle_cpu_snb __initconst = {
.state_table = snb_cstates,
.disable_promotion_to_c1e = true,
};
-static const struct idle_cpu idle_cpu_snx = {
+static const struct idle_cpu idle_cpu_snx __initconst = {
.state_table = snb_cstates,
.disable_promotion_to_c1e = true,
.use_acpi = true,
};
-static const struct idle_cpu idle_cpu_byt = {
+static const struct idle_cpu idle_cpu_byt __initconst = {
.state_table = byt_cstates,
.disable_promotion_to_c1e = true,
.byt_auto_demotion_disable_flag = true,
};
-static const struct idle_cpu idle_cpu_cht = {
+static const struct idle_cpu idle_cpu_cht __initconst = {
.state_table = cht_cstates,
.disable_promotion_to_c1e = true,
.byt_auto_demotion_disable_flag = true,
};
-static const struct idle_cpu idle_cpu_ivb = {
+static const struct idle_cpu idle_cpu_ivb __initconst = {
.state_table = ivb_cstates,
.disable_promotion_to_c1e = true,
};
-static const struct idle_cpu idle_cpu_ivt = {
+static const struct idle_cpu idle_cpu_ivt __initconst = {
.state_table = ivt_cstates,
.disable_promotion_to_c1e = true,
.use_acpi = true,
};
-static const struct idle_cpu idle_cpu_hsw = {
+static const struct idle_cpu idle_cpu_hsw __initconst = {
.state_table = hsw_cstates,
.disable_promotion_to_c1e = true,
};
-static const struct idle_cpu idle_cpu_hsx = {
+static const struct idle_cpu idle_cpu_hsx __initconst = {
.state_table = hsw_cstates,
.disable_promotion_to_c1e = true,
.use_acpi = true,
};
-static const struct idle_cpu idle_cpu_bdw = {
+static const struct idle_cpu idle_cpu_bdw __initconst = {
.state_table = bdw_cstates,
.disable_promotion_to_c1e = true,
};
-static const struct idle_cpu idle_cpu_bdx = {
+static const struct idle_cpu idle_cpu_bdx __initconst = {
.state_table = bdw_cstates,
.disable_promotion_to_c1e = true,
.use_acpi = true,
};
-static const struct idle_cpu idle_cpu_skl = {
+static const struct idle_cpu idle_cpu_skl __initconst = {
.state_table = skl_cstates,
.disable_promotion_to_c1e = true,
};
-static const struct idle_cpu idle_cpu_skx = {
+static const struct idle_cpu idle_cpu_skx __initconst = {
.state_table = skx_cstates,
.disable_promotion_to_c1e = true,
.use_acpi = true,
};
-static const struct idle_cpu idle_cpu_avn = {
+static const struct idle_cpu idle_cpu_icx __initconst = {
+ .state_table = icx_cstates,
+ .disable_promotion_to_c1e = true,
+ .use_acpi = true,
+};
+
+static const struct idle_cpu idle_cpu_adl __initconst = {
+ .state_table = adl_cstates,
+};
+
+static const struct idle_cpu idle_cpu_adl_l __initconst = {
+ .state_table = adl_l_cstates,
+};
+
+static const struct idle_cpu idle_cpu_adl_n __initconst = {
+ .state_table = adl_n_cstates,
+};
+
+static const struct idle_cpu idle_cpu_spr __initconst = {
+ .state_table = spr_cstates,
+ .disable_promotion_to_c1e = true,
+ .use_acpi = true,
+};
+
+static const struct idle_cpu idle_cpu_avn __initconst = {
.state_table = avn_cstates,
.disable_promotion_to_c1e = true,
.use_acpi = true,
};
-static const struct idle_cpu idle_cpu_knl = {
+static const struct idle_cpu idle_cpu_knl __initconst = {
.state_table = knl_cstates,
.use_acpi = true,
};
-static const struct idle_cpu idle_cpu_bxt = {
+static const struct idle_cpu idle_cpu_bxt __initconst = {
.state_table = bxt_cstates,
.disable_promotion_to_c1e = true,
};
-static const struct idle_cpu idle_cpu_dnv = {
+static const struct idle_cpu idle_cpu_dnv __initconst = {
.state_table = dnv_cstates,
.disable_promotion_to_c1e = true,
.use_acpi = true,
};
+static const struct idle_cpu idle_cpu_snr __initconst = {
+ .state_table = snr_cstates,
+ .disable_promotion_to_c1e = true,
+ .use_acpi = true,
+};
+
static const struct x86_cpu_id intel_idle_ids[] __initconst = {
- INTEL_CPU_FAM6(NEHALEM_EP, idle_cpu_nhx),
- INTEL_CPU_FAM6(NEHALEM, idle_cpu_nehalem),
- INTEL_CPU_FAM6(NEHALEM_G, idle_cpu_nehalem),
- INTEL_CPU_FAM6(WESTMERE, idle_cpu_nehalem),
- INTEL_CPU_FAM6(WESTMERE_EP, idle_cpu_nhx),
- INTEL_CPU_FAM6(NEHALEM_EX, idle_cpu_nhx),
- INTEL_CPU_FAM6(ATOM_BONNELL, idle_cpu_atom),
- INTEL_CPU_FAM6(ATOM_BONNELL_MID, idle_cpu_lincroft),
- INTEL_CPU_FAM6(WESTMERE_EX, idle_cpu_nhx),
- INTEL_CPU_FAM6(SANDYBRIDGE, idle_cpu_snb),
- INTEL_CPU_FAM6(SANDYBRIDGE_X, idle_cpu_snx),
- INTEL_CPU_FAM6(ATOM_SALTWELL, idle_cpu_atom),
- INTEL_CPU_FAM6(ATOM_SILVERMONT, idle_cpu_byt),
- INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, idle_cpu_tangier),
- INTEL_CPU_FAM6(ATOM_AIRMONT, idle_cpu_cht),
- INTEL_CPU_FAM6(IVYBRIDGE, idle_cpu_ivb),
- INTEL_CPU_FAM6(IVYBRIDGE_X, idle_cpu_ivt),
- INTEL_CPU_FAM6(HASWELL, idle_cpu_hsw),
- INTEL_CPU_FAM6(HASWELL_X, idle_cpu_hsx),
- INTEL_CPU_FAM6(HASWELL_L, idle_cpu_hsw),
- INTEL_CPU_FAM6(HASWELL_G, idle_cpu_hsw),
- INTEL_CPU_FAM6(ATOM_SILVERMONT_D, idle_cpu_avn),
- INTEL_CPU_FAM6(BROADWELL, idle_cpu_bdw),
- INTEL_CPU_FAM6(BROADWELL_G, idle_cpu_bdw),
- INTEL_CPU_FAM6(BROADWELL_X, idle_cpu_bdx),
- INTEL_CPU_FAM6(BROADWELL_D, idle_cpu_bdx),
- INTEL_CPU_FAM6(SKYLAKE_L, idle_cpu_skl),
- INTEL_CPU_FAM6(SKYLAKE, idle_cpu_skl),
- INTEL_CPU_FAM6(KABYLAKE_L, idle_cpu_skl),
- INTEL_CPU_FAM6(KABYLAKE, idle_cpu_skl),
- INTEL_CPU_FAM6(SKYLAKE_X, idle_cpu_skx),
- INTEL_CPU_FAM6(XEON_PHI_KNL, idle_cpu_knl),
- INTEL_CPU_FAM6(XEON_PHI_KNM, idle_cpu_knl),
- INTEL_CPU_FAM6(ATOM_GOLDMONT, idle_cpu_bxt),
- INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, idle_cpu_bxt),
- INTEL_CPU_FAM6(ATOM_GOLDMONT_D, idle_cpu_dnv),
- INTEL_CPU_FAM6(ATOM_TREMONT_D, idle_cpu_dnv),
+ X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx),
+ X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem),
+ X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem),
+ X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem),
+ X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx),
+ X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft),
+ X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx),
+ X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb),
+ X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht),
+ X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb),
+ X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &idle_cpu_adl_n),
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr),
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl),
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr),
{}
};
-#define INTEL_CPU_FAM6_MWAIT \
- { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_MWAIT, 0 }
-
static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
- INTEL_CPU_FAM6_MWAIT,
+ X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
{}
};
@@ -1125,6 +1453,20 @@ static bool __init intel_idle_max_cstate_reached(int cstate)
return false;
}
+static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
+{
+ unsigned long eax = flg2MWAIT(state->flags);
+
+ if (boot_cpu_has(X86_FEATURE_ARAT))
+ return false;
+
+ /*
+ * Switch over to one-shot tick broadcast if the target C-state
+ * is deeper than C1.
+ */
+ return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
+}
+
#ifdef CONFIG_ACPI_PROCESSOR_CSTATE
#include <acpi/processor.h>
@@ -1184,14 +1526,13 @@ static bool __init intel_idle_acpi_cst_extract(void)
if (!intel_idle_cst_usable())
continue;
- if (!acpi_processor_claim_cst_control()) {
- acpi_state_table.count = 0;
- return false;
- }
+ if (!acpi_processor_claim_cst_control())
+ break;
return true;
}
+ acpi_state_table.count = 0;
pr_debug("ACPI _CST not found or not usable\n");
return false;
}
@@ -1208,7 +1549,7 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
struct acpi_processor_cx *cx;
struct cpuidle_state *state;
- if (intel_idle_max_cstate_reached(cstate))
+ if (intel_idle_max_cstate_reached(cstate - 1))
break;
cx = &acpi_state_table.states[cstate];
@@ -1216,7 +1557,7 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
state = &drv->states[drv->state_count++];
snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
- strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
+ strscpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
state->exit_latency = cx->latency;
/*
* For C1-type C-states use the same number for both the exit
@@ -1238,6 +1579,9 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
if (disabled_states_mask & BIT(cstate))
state->flags |= CPUIDLE_FLAG_OFF;
+ if (intel_idle_state_needs_timer_stop(state))
+ state->flags |= CPUIDLE_FLAG_TIMER_STOP;
+
state->enter = intel_idle;
state->enter_s2idle = intel_idle_s2idle;
}
@@ -1273,11 +1617,11 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
#endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
-/*
- * ivt_idle_state_table_update(void)
+/**
+ * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
*
- * Tune IVT multi-socket targets
- * Assumption: num_sockets == (max_package_num + 1)
+ * Tune IVT multi-socket targets.
+ * Assumption: num_sockets == (max_package_num + 1).
*/
static void __init ivt_idle_state_table_update(void)
{
@@ -1323,11 +1667,11 @@ static unsigned long long __init irtl_2_usec(unsigned long long irtl)
return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
}
-/*
- * bxt_idle_state_table_update(void)
+/**
+ * bxt_idle_state_table_update - Fix up the Broxton idle states table.
*
- * On BXT, we trust the IRTL to show the definitive maximum latency
- * We use the same value for target_residency.
+ * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
+ * definitive maximum latency and use the same value for target_residency.
*/
static void __init bxt_idle_state_table_update(void)
{
@@ -1370,11 +1714,11 @@ static void __init bxt_idle_state_table_update(void)
}
}
-/*
- * sklh_idle_state_table_update(void)
+
+/**
+ * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
*
- * On SKL-H (model 0x5e) disable C8 and C9 if:
- * C10 is enabled and SGX disabled
+ * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
*/
static void __init sklh_idle_state_table_update(void)
{
@@ -1413,6 +1757,76 @@ static void __init sklh_idle_state_table_update(void)
skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */
}
+/**
+ * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake
+ * idle states table.
+ */
+static void __init skx_idle_state_table_update(void)
+{
+ unsigned long long msr;
+
+ rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
+
+ /*
+ * 000b: C0/C1 (no package C-state support)
+ * 001b: C2
+ * 010b: C6 (non-retention)
+ * 011b: C6 (retention)
+ * 111b: No Package C state limits.
+ */
+ if ((msr & 0x7) < 2) {
+ /*
+ * Uses the CC6 + PC0 latency and 3 times of
+ * latency for target_residency if the PC6
+ * is disabled in BIOS. This is consistent
+ * with how intel_idle driver uses _CST
+ * to set the target_residency.
+ */
+ skx_cstates[2].exit_latency = 92;
+ skx_cstates[2].target_residency = 276;
+ }
+}
+
+/**
+ * adl_idle_state_table_update - Adjust AlderLake idle states table.
+ */
+static void __init adl_idle_state_table_update(void)
+{
+ /* Check if user prefers C1 over C1E. */
+ if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
+ cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
+ cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
+
+ /* Disable C1E by clearing the "C1E promotion" bit. */
+ c1e_promotion = C1E_PROMOTION_DISABLE;
+ return;
+ }
+
+ /* Make sure C1E is enabled by default */
+ c1e_promotion = C1E_PROMOTION_ENABLE;
+}
+
+/**
+ * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
+ */
+static void __init spr_idle_state_table_update(void)
+{
+ unsigned long long msr;
+
+ /*
+ * By default, the C6 state assumes the worst-case scenario of package
+ * C6. However, if PC6 is disabled, we update the numbers to match
+ * core C6.
+ */
+ rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
+
+ /* Limit value 2 and above allow for PC6. */
+ if ((msr & 0x7) < 2) {
+ spr_cstates[2].exit_latency = 190;
+ spr_cstates[2].target_residency = 600;
+ }
+}
+
static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
{
unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
@@ -1444,6 +1858,17 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
case INTEL_FAM6_SKYLAKE:
sklh_idle_state_table_update();
break;
+ case INTEL_FAM6_SKYLAKE_X:
+ skx_idle_state_table_update();
+ break;
+ case INTEL_FAM6_SAPPHIRERAPIDS_X:
+ spr_idle_state_table_update();
+ break;
+ case INTEL_FAM6_ALDERLAKE:
+ case INTEL_FAM6_ALDERLAKE_L:
+ case INTEL_FAM6_ALDERLAKE_N:
+ adl_idle_state_table_update();
+ break;
}
for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
@@ -1470,12 +1895,27 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
/* Structure copy. */
drv->states[drv->state_count] = cpuidle_state_table[cstate];
+ if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE)
+ drv->states[drv->state_count].enter = intel_idle_irq;
+
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
+ cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
+ WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE);
+ drv->states[drv->state_count].enter = intel_idle_ibrs;
+ }
+
+ if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_INIT_XSTATE)
+ drv->states[drv->state_count].enter = intel_idle_xstate;
+
if ((disabled_states_mask & BIT(drv->state_count)) ||
((icpu->use_acpi || force_use_acpi) &&
intel_idle_off_by_default(mwait_hint) &&
!(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
+ if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
+ drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
+
drv->state_count++;
}
@@ -1485,9 +1925,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
}
}
-/*
- * intel_idle_cpuidle_driver_init()
- * allocate, initialize cpuidle_states
+/**
+ * intel_idle_cpuidle_driver_init - Create the list of available idle states.
+ * @drv: cpuidle driver structure to initialize.
*/
static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
{
@@ -1509,10 +1949,19 @@ static void auto_demotion_disable(void)
unsigned long long msr_bits;
rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
- msr_bits &= ~(icpu->auto_demotion_disable_flags);
+ msr_bits &= ~auto_demotion_disable_flags;
wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
}
+static void c1e_promotion_enable(void)
+{
+ unsigned long long msr_bits;
+
+ rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
+ msr_bits |= 0x2;
+ wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
+}
+
static void c1e_promotion_disable(void)
{
unsigned long long msr_bits;
@@ -1522,10 +1971,12 @@ static void c1e_promotion_disable(void)
wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
}
-/*
- * intel_idle_cpu_init()
- * allocate, initialize, register cpuidle_devices
- * @cpu: cpu/core to initialize
+/**
+ * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
+ * @cpu: CPU to initialize.
+ *
+ * Register a cpuidle device object for @cpu and update its MSRs in accordance
+ * with the processor model flags.
*/
static int intel_idle_cpu_init(unsigned int cpu)
{
@@ -1539,13 +1990,12 @@ static int intel_idle_cpu_init(unsigned int cpu)
return -EIO;
}
- if (!icpu)
- return 0;
-
- if (icpu->auto_demotion_disable_flags)
+ if (auto_demotion_disable_flags)
auto_demotion_disable();
- if (icpu->disable_promotion_to_c1e)
+ if (c1e_promotion == C1E_PROMOTION_ENABLE)
+ c1e_promotion_enable();
+ else if (c1e_promotion == C1E_PROMOTION_DISABLE)
c1e_promotion_disable();
return 0;
@@ -1555,7 +2005,7 @@ static int intel_idle_cpu_online(unsigned int cpu)
{
struct cpuidle_device *dev;
- if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
+ if (!boot_cpu_has(X86_FEATURE_ARAT))
tick_broadcast_enable();
/*
@@ -1623,6 +2073,9 @@ static int __init intel_idle_init(void)
icpu = (const struct idle_cpu *)id->driver_data;
if (icpu) {
cpuidle_state_table = icpu->state_table;
+ auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
+ if (icpu->disable_promotion_to_c1e)
+ c1e_promotion = C1E_PROMOTION_DISABLE;
if (icpu->use_acpi || force_use_acpi)
intel_idle_acpi_cst_extract();
} else if (!intel_idle_acpi_cst_extract()) {
@@ -1646,16 +2099,13 @@ static int __init intel_idle_init(void)
goto init_driver_fail;
}
- if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */
- lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
-
retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
intel_idle_cpu_online, NULL);
if (retval < 0)
goto hp_setup_fail;
- pr_debug("lapic_timer_reliable_states 0x%x\n",
- lapic_timer_reliable_states);
+ pr_debug("Local APIC timer is reliable in %s\n",
+ boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
return 0;
@@ -1684,3 +2134,14 @@ module_param(max_cstate, int, 0444);
*/
module_param_named(states_off, disabled_states_mask, uint, 0444);
MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
+/*
+ * Some platforms come with mutually exclusive C-states, so that if one is
+ * enabled, the other C-states must not be used. Example: C1 and C1E on
+ * Sapphire Rapids platform. This parameter allows for selecting the
+ * preferred C-states among the groups of mutually exclusive C-states - the
+ * selected C-states will be registered, the other C-states from the mutually
+ * exclusive group won't be registered. If the platform has no mutually
+ * exclusive C-states, this parameter has no effect.
+ */
+module_param_named(preferred_cstates, preferred_states_mask, uint, 0444);
+MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states");