aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/cpuidle
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r--drivers/cpuidle/Kconfig9
-rw-r--r--drivers/cpuidle/Kconfig.arm36
-rw-r--r--drivers/cpuidle/Kconfig.riscv15
-rw-r--r--drivers/cpuidle/Makefile12
-rw-r--r--drivers/cpuidle/coupled.c5
-rw-r--r--drivers/cpuidle/cpuidle-arm.c3
-rw-r--r--drivers/cpuidle/cpuidle-at91.c5
-rw-r--r--drivers/cpuidle/cpuidle-big_little.c3
-rw-r--r--drivers/cpuidle/cpuidle-haltpoll.c16
-rw-r--r--drivers/cpuidle/cpuidle-kirkwood.c5
-rw-r--r--drivers/cpuidle/cpuidle-powernv.c20
-rw-r--r--drivers/cpuidle/cpuidle-psci-domain.c213
-rw-r--r--drivers/cpuidle/cpuidle-psci.c270
-rw-r--r--drivers/cpuidle/cpuidle-psci.h22
-rw-r--r--drivers/cpuidle/cpuidle-pseries.c250
-rw-r--r--drivers/cpuidle/cpuidle-qcom-spm.c196
-rw-r--r--drivers/cpuidle/cpuidle-riscv-sbi.c633
-rw-r--r--drivers/cpuidle/cpuidle-tegra.c391
-rw-r--r--drivers/cpuidle/cpuidle-ux500.c2
-rw-r--r--drivers/cpuidle/cpuidle.c114
-rw-r--r--drivers/cpuidle/driver.c4
-rw-r--r--drivers/cpuidle/dt_idle_genpd.c178
-rw-r--r--drivers/cpuidle/dt_idle_genpd.h50
-rw-r--r--drivers/cpuidle/governor.c13
-rw-r--r--drivers/cpuidle/governors/haltpoll.c3
-rw-r--r--drivers/cpuidle/governors/menu.c25
-rw-r--r--drivers/cpuidle/governors/teo.c502
-rw-r--r--drivers/cpuidle/sysfs.c83
28 files changed, 2402 insertions, 676 deletions
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index c0aeedd66f02..ff71dd662880 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -47,6 +47,10 @@ config CPU_IDLE_GOV_HALTPOLL
config DT_IDLE_STATES
bool
+config DT_IDLE_GENPD
+ depends on PM_GENERIC_DOMAINS_OF
+ bool
+
menu "ARM CPU Idle Drivers"
depends on ARM || ARM64
source "drivers/cpuidle/Kconfig.arm"
@@ -62,6 +66,11 @@ depends on PPC
source "drivers/cpuidle/Kconfig.powerpc"
endmenu
+menu "RISC-V CPU Idle Drivers"
+depends on RISCV
+source "drivers/cpuidle/Kconfig.riscv"
+endmenu
+
config HALTPOLL_CPUIDLE
tristate "Halt poll cpuidle driver"
depends on X86 && KVM_GUEST
diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm
index 62272ecfa771..747aa537389b 100644
--- a/drivers/cpuidle/Kconfig.arm
+++ b/drivers/cpuidle/Kconfig.arm
@@ -3,7 +3,8 @@
# ARM CPU Idle drivers
#
config ARM_CPUIDLE
- bool "Generic ARM/ARM64 CPU idle Driver"
+ bool "Generic ARM CPU idle Driver"
+ depends on ARM
select DT_IDLE_STATES
select CPU_IDLE_MULTIPLE_DRIVERS
help
@@ -23,6 +24,17 @@ config ARM_PSCI_CPUIDLE
It provides an idle driver that is capable of detecting and
managing idle states through the PSCI firmware interface.
+config ARM_PSCI_CPUIDLE_DOMAIN
+ bool "PSCI CPU idle Domain"
+ depends on ARM_PSCI_CPUIDLE
+ depends on PM_GENERIC_DOMAINS_OF
+ select DT_IDLE_GENPD
+ default y
+ help
+ Select this to enable the PSCI based CPUidle driver to use PM domains,
+ which is needed to support the hierarchical DT based layout of the
+ idle states.
+
config ARM_BIG_LITTLE_CPUIDLE
bool "Support for ARM big.LITTLE processors"
depends on ARCH_VEXPRESS_TC2_PM || ARCH_EXYNOS || COMPILE_TEST
@@ -86,3 +98,25 @@ config ARM_MVEBU_V7_CPUIDLE
depends on (ARCH_MVEBU || COMPILE_TEST) && !ARM64
help
Select this to enable cpuidle on Armada 370, 38x and XP processors.
+
+config ARM_TEGRA_CPUIDLE
+ bool "CPU Idle Driver for NVIDIA Tegra SoCs"
+ depends on (ARCH_TEGRA || COMPILE_TEST) && !ARM64 && MMU
+ select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP
+ select ARM_CPU_SUSPEND
+ help
+ Select this to enable cpuidle for NVIDIA Tegra20/30/114/124 SoCs.
+
+config ARM_QCOM_SPM_CPUIDLE
+ bool "CPU Idle Driver for Qualcomm Subsystem Power Manager (SPM)"
+ depends on (ARCH_QCOM || COMPILE_TEST) && !ARM64 && MMU
+ select ARM_CPU_SUSPEND
+ select CPU_IDLE_MULTIPLE_DRIVERS
+ select DT_IDLE_STATES
+ select QCOM_SCM
+ select QCOM_SPM
+ help
+ Select this to enable cpuidle for Qualcomm processors.
+ The Subsystem Power Manager (SPM) controls low power modes for the
+ CPU and L2 cores. It interface with various system drivers to put
+ the cores in low power modes.
diff --git a/drivers/cpuidle/Kconfig.riscv b/drivers/cpuidle/Kconfig.riscv
new file mode 100644
index 000000000000..78518c26af74
--- /dev/null
+++ b/drivers/cpuidle/Kconfig.riscv
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# RISC-V CPU Idle drivers
+#
+
+config RISCV_SBI_CPUIDLE
+ bool "RISC-V SBI CPU idle Driver"
+ depends on RISCV_SBI
+ select DT_IDLE_STATES
+ select CPU_IDLE_MULTIPLE_DRIVERS
+ select DT_IDLE_GENPD if PM_GENERIC_DOMAINS_OF
+ help
+ Select this option to enable RISC-V SBI firmware based CPU idle
+ driver for RISC-V systems. This drivers also supports hierarchical
+ DT based layout of the idle state.
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index cc8c769d7fa9..d103342b7cfc 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -6,6 +6,7 @@
obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
obj-$(CONFIG_DT_IDLE_STATES) += dt_idle_states.o
+obj-$(CONFIG_DT_IDLE_GENPD) += dt_idle_genpd.o
obj-$(CONFIG_ARCH_HAS_CPU_RELAX) += poll_state.o
obj-$(CONFIG_HALTPOLL_CPUIDLE) += cpuidle-haltpoll.o
@@ -21,9 +22,10 @@ obj-$(CONFIG_ARM_U8500_CPUIDLE) += cpuidle-ux500.o
obj-$(CONFIG_ARM_AT91_CPUIDLE) += cpuidle-at91.o
obj-$(CONFIG_ARM_EXYNOS_CPUIDLE) += cpuidle-exynos.o
obj-$(CONFIG_ARM_CPUIDLE) += cpuidle-arm.o
-obj-$(CONFIG_ARM_PSCI_CPUIDLE) += cpuidle_psci.o
-cpuidle_psci-y := cpuidle-psci.o
-cpuidle_psci-$(CONFIG_PM_GENERIC_DOMAINS_OF) += cpuidle-psci-domain.o
+obj-$(CONFIG_ARM_PSCI_CPUIDLE) += cpuidle-psci.o
+obj-$(CONFIG_ARM_PSCI_CPUIDLE_DOMAIN) += cpuidle-psci-domain.o
+obj-$(CONFIG_ARM_TEGRA_CPUIDLE) += cpuidle-tegra.o
+obj-$(CONFIG_ARM_QCOM_SPM_CPUIDLE) += cpuidle-qcom-spm.o
###############################################################################
# MIPS drivers
@@ -33,3 +35,7 @@ obj-$(CONFIG_MIPS_CPS_CPUIDLE) += cpuidle-cps.o
# POWERPC drivers
obj-$(CONFIG_PSERIES_CPUIDLE) += cpuidle-pseries.o
obj-$(CONFIG_POWERNV_CPUIDLE) += cpuidle-powernv.o
+
+###############################################################################
+# RISC-V drivers
+obj-$(CONFIG_RISCV_SBI_CPUIDLE) += cpuidle-riscv-sbi.o
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index 04003b90dc49..9acde71558d5 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -54,7 +54,7 @@
* variable is not locked. It is only written from the cpu that
* it stores (or by the on/offlining cpu if that cpu is offline),
* and only read after all the cpus are ready for the coupled idle
- * state are are no longer updating it.
+ * state are no longer updating it.
*
* Three atomic counters are used. alive_count tracks the number
* of cpus in the coupled set that are currently or soon will be
@@ -674,8 +674,7 @@ have_coupled:
coupled->refcnt++;
csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu);
- csd->func = cpuidle_coupled_handle_poke;
- csd->info = (void *)(unsigned long)dev->cpu;
+ INIT_CSD(csd, cpuidle_coupled_handle_poke, (void *)(unsigned long)dev->cpu);
return 0;
}
diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index 9e5156d39627..8c758920d699 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -8,6 +8,7 @@
#define pr_fmt(fmt) "CPUidle arm: " fmt
+#include <linux/cpu_cooling.h>
#include <linux/cpuidle.h>
#include <linux/cpumask.h>
#include <linux/cpu_pm.h>
@@ -124,6 +125,8 @@ static int __init arm_idle_init_cpu(int cpu)
if (ret)
goto out_kfree_drv;
+ cpuidle_cooling_register(drv);
+
return 0;
out_kfree_drv:
diff --git a/drivers/cpuidle/cpuidle-at91.c b/drivers/cpuidle/cpuidle-at91.c
index 9c5853b6ca4a..45ee8e1e71ae 100644
--- a/drivers/cpuidle/cpuidle-at91.c
+++ b/drivers/cpuidle/cpuidle-at91.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* based on arch/arm/mach-kirkwood/cpuidle.c
*
* CPU idle support for AT91 SoC
*
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- *
* The cpu idle uses wait-for-interrupt and RAM self refresh in order
* to implement two idle states -
* #1 wait-for-interrupt
diff --git a/drivers/cpuidle/cpuidle-big_little.c b/drivers/cpuidle/cpuidle-big_little.c
index 7f8ddc04342d..abe51185f243 100644
--- a/drivers/cpuidle/cpuidle-big_little.c
+++ b/drivers/cpuidle/cpuidle-big_little.c
@@ -155,8 +155,7 @@ static int __init bl_idle_driver_init(struct cpuidle_driver *drv, int part_id)
static const struct of_device_id compatible_machine_match[] = {
{ .compatible = "arm,vexpress,v2p-ca15_a7" },
- { .compatible = "samsung,exynos5420" },
- { .compatible = "samsung,exynos5800" },
+ { .compatible = "google,peach" },
{},
};
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c
index b0ce9bc78113..3a39a7f48b77 100644
--- a/drivers/cpuidle/cpuidle-haltpoll.c
+++ b/drivers/cpuidle/cpuidle-haltpoll.c
@@ -18,6 +18,10 @@
#include <linux/kvm_para.h>
#include <linux/cpuidle_haltpoll.h>
+static bool force __read_mostly;
+module_param(force, bool, 0444);
+MODULE_PARM_DESC(force, "Load unconditionally");
+
static struct cpuidle_device __percpu *haltpoll_cpuidle_devices;
static enum cpuhp_state haltpoll_hp_state;
@@ -90,6 +94,11 @@ static void haltpoll_uninit(void)
haltpoll_cpuidle_devices = NULL;
}
+static bool haltpoll_want(void)
+{
+ return kvm_para_has_hint(KVM_HINTS_REALTIME) || force;
+}
+
static int __init haltpoll_init(void)
{
int ret;
@@ -99,12 +108,11 @@ static int __init haltpoll_init(void)
if (boot_option_idle_override != IDLE_NO_OVERRIDE)
return -ENODEV;
- cpuidle_poll_state_init(drv);
-
- if (!kvm_para_available() ||
- !kvm_para_has_hint(KVM_HINTS_REALTIME))
+ if (!kvm_para_available() || !haltpoll_want())
return -ENODEV;
+ cpuidle_poll_state_init(drv);
+
ret = cpuidle_register_driver(drv);
if (ret < 0)
return ret;
diff --git a/drivers/cpuidle/cpuidle-kirkwood.c b/drivers/cpuidle/cpuidle-kirkwood.c
index 511c4f46027a..13bf743f885b 100644
--- a/drivers/cpuidle/cpuidle-kirkwood.c
+++ b/drivers/cpuidle/cpuidle-kirkwood.c
@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* CPU idle Marvell Kirkwood SoCs
*
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- *
* The cpu idle uses wait-for-interrupt and DDR self refresh in order
* to implement two idle states -
* #1 wait-for-interrupt
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 1b299e801f74..0b5461b3d7dd 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -141,7 +141,7 @@ static int stop_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
- power9_idle_type(stop_psscr_table[index].val,
+ arch300_idle_type(stop_psscr_table[index].val,
stop_psscr_table[index].mask);
return index;
}
@@ -233,8 +233,8 @@ static inline void add_powernv_state(int index, const char *name,
unsigned int exit_latency,
u64 psscr_val, u64 psscr_mask)
{
- strlcpy(powernv_states[index].name, name, CPUIDLE_NAME_LEN);
- strlcpy(powernv_states[index].desc, name, CPUIDLE_NAME_LEN);
+ strscpy(powernv_states[index].name, name, CPUIDLE_NAME_LEN);
+ strscpy(powernv_states[index].desc, name, CPUIDLE_NAME_LEN);
powernv_states[index].flags = flags;
powernv_states[index].target_residency = target_residency;
powernv_states[index].exit_latency = exit_latency;
@@ -244,20 +244,6 @@ static inline void add_powernv_state(int index, const char *name,
stop_psscr_table[index].mask = psscr_mask;
}
-/*
- * Returns 0 if prop1_len == prop2_len. Else returns -1
- */
-static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len,
- const char *prop2, int prop2_len)
-{
- if (prop1_len == prop2_len)
- return 0;
-
- pr_warn("cpuidle-powernv: array sizes don't match for %s and %s\n",
- prop1, prop2);
- return -1;
-}
-
extern u32 pnv_get_supported_cpuidle_states(void);
static int powernv_add_idle_states(void)
{
diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
index 423f03bbeb74..821984947ed9 100644
--- a/drivers/cpuidle/cpuidle-psci-domain.c
+++ b/drivers/cpuidle/cpuidle-psci-domain.c
@@ -12,6 +12,7 @@
#include <linux/cpu.h>
#include <linux/device.h>
#include <linux/kernel.h>
+#include <linux/platform_device.h>
#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
#include <linux/psci.h>
@@ -26,7 +27,7 @@ struct psci_pd_provider {
};
static LIST_HEAD(psci_pd_providers);
-static bool osi_mode_enabled __initdata;
+static bool psci_pd_allow_domain_state;
static int psci_pd_power_off(struct generic_pm_domain *pd)
{
@@ -36,6 +37,9 @@ static int psci_pd_power_off(struct generic_pm_domain *pd)
if (!state->data)
return 0;
+ if (!psci_pd_allow_domain_state)
+ return -EBUSY;
+
/* OSI mode is enabled, set the corresponding domain state. */
pd_state = state->data;
psci_set_domain_state(*pd_state);
@@ -43,73 +47,14 @@ static int psci_pd_power_off(struct generic_pm_domain *pd)
return 0;
}
-static int __init psci_pd_parse_state_nodes(struct genpd_power_state *states,
- int state_count)
-{
- int i, ret;
- u32 psci_state, *psci_state_buf;
-
- for (i = 0; i < state_count; i++) {
- ret = psci_dt_parse_state_node(to_of_node(states[i].fwnode),
- &psci_state);
- if (ret)
- goto free_state;
-
- psci_state_buf = kmalloc(sizeof(u32), GFP_KERNEL);
- if (!psci_state_buf) {
- ret = -ENOMEM;
- goto free_state;
- }
- *psci_state_buf = psci_state;
- states[i].data = psci_state_buf;
- }
-
- return 0;
-
-free_state:
- i--;
- for (; i >= 0; i--)
- kfree(states[i].data);
- return ret;
-}
-
-static int __init psci_pd_parse_states(struct device_node *np,
- struct genpd_power_state **states, int *state_count)
-{
- int ret;
-
- /* Parse the domain idle states. */
- ret = of_genpd_parse_idle_states(np, states, state_count);
- if (ret)
- return ret;
-
- /* Fill out the PSCI specifics for each found state. */
- ret = psci_pd_parse_state_nodes(*states, *state_count);
- if (ret)
- kfree(*states);
-
- return ret;
-}
-
-static void psci_pd_free_states(struct genpd_power_state *states,
- unsigned int state_count)
-{
- int i;
-
- for (i = 0; i < state_count; i++)
- kfree(states[i].data);
- kfree(states);
-}
-
-static int __init psci_pd_init(struct device_node *np)
+static int psci_pd_init(struct device_node *np, bool use_osi)
{
struct generic_pm_domain *pd;
struct psci_pd_provider *pd_provider;
struct dev_power_governor *pd_gov;
- struct genpd_power_state *states = NULL;
- int ret = -ENOMEM, state_count = 0;
+ int ret = -ENOMEM;
- pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ pd = dt_idle_pd_alloc(np, psci_dt_parse_state_node);
if (!pd)
goto out;
@@ -117,33 +62,20 @@ static int __init psci_pd_init(struct device_node *np)
if (!pd_provider)
goto free_pd;
- pd->name = kasprintf(GFP_KERNEL, "%pOF", np);
- if (!pd->name)
- goto free_pd_prov;
-
- /*
- * Parse the domain idle states and let genpd manage the state selection
- * for those being compatible with "domain-idle-state".
- */
- ret = psci_pd_parse_states(np, &states, &state_count);
- if (ret)
- goto free_name;
-
- pd->free_states = psci_pd_free_states;
- pd->name = kbasename(pd->name);
- pd->power_off = psci_pd_power_off;
- pd->states = states;
- pd->state_count = state_count;
pd->flags |= GENPD_FLAG_IRQ_SAFE | GENPD_FLAG_CPU_DOMAIN;
+ /* Allow power off when OSI has been successfully enabled. */
+ if (use_osi)
+ pd->power_off = psci_pd_power_off;
+ else
+ pd->flags |= GENPD_FLAG_ALWAYS_ON;
+
/* Use governor for CPU PM domains if it has some states to manage. */
- pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL;
+ pd_gov = pd->states ? &pm_domain_cpu_gov : NULL;
ret = pm_genpd_init(pd, pd_gov, false);
- if (ret) {
- psci_pd_free_states(states, state_count);
- goto free_name;
- }
+ if (ret)
+ goto free_pd_prov;
ret = of_genpd_add_provider_simple(np, pd);
if (ret)
@@ -157,18 +89,16 @@ static int __init psci_pd_init(struct device_node *np)
remove_pd:
pm_genpd_remove(pd);
-free_name:
- kfree(pd->name);
free_pd_prov:
kfree(pd_provider);
free_pd:
- kfree(pd);
+ dt_idle_pd_free(pd);
out:
pr_err("failed to init PM domain ret=%d %pOF\n", ret, np);
return ret;
}
-static void __init psci_pd_remove(void)
+static void psci_pd_remove(void)
{
struct psci_pd_provider *pd_provider, *it;
struct generic_pm_domain *genpd;
@@ -186,59 +116,46 @@ static void __init psci_pd_remove(void)
}
}
-static int __init psci_pd_init_topology(struct device_node *np, bool add)
+static bool psci_pd_try_set_osi_mode(void)
{
- struct device_node *node;
- struct of_phandle_args child, parent;
int ret;
- for_each_child_of_node(np, node) {
- if (of_parse_phandle_with_args(node, "power-domains",
- "#power-domain-cells", 0, &parent))
- continue;
-
- child.np = node;
- child.args_count = 0;
-
- ret = add ? of_genpd_add_subdomain(&parent, &child) :
- of_genpd_remove_subdomain(&parent, &child);
- of_node_put(parent.np);
- if (ret) {
- of_node_put(node);
- return ret;
- }
- }
+ if (!psci_has_osi_support())
+ return false;
- return 0;
-}
+ ret = psci_set_osi_mode(true);
+ if (ret)
+ return false;
-static int __init psci_pd_add_topology(struct device_node *np)
-{
- return psci_pd_init_topology(np, true);
+ return true;
}
-static void __init psci_pd_remove_topology(struct device_node *np)
+static void psci_cpuidle_domain_sync_state(struct device *dev)
{
- psci_pd_init_topology(np, false);
+ /*
+ * All devices have now been attached/probed to the PM domain topology,
+ * hence it's fine to allow domain states to be picked.
+ */
+ psci_pd_allow_domain_state = true;
}
-static const struct of_device_id psci_of_match[] __initconst = {
+static const struct of_device_id psci_of_match[] = {
{ .compatible = "arm,psci-1.0" },
{}
};
-static int __init psci_idle_init_domains(void)
+static int psci_cpuidle_domain_probe(struct platform_device *pdev)
{
- struct device_node *np = of_find_matching_node(NULL, psci_of_match);
+ struct device_node *np = pdev->dev.of_node;
struct device_node *node;
+ bool use_osi;
int ret = 0, pd_count = 0;
if (!np)
return -ENODEV;
- /* Currently limit the hierarchical topology to be used in OSI mode. */
- if (!psci_has_osi_support())
- goto out;
+ /* If OSI mode is supported, let's try to enable it. */
+ use_osi = psci_pd_try_set_osi_mode();
/*
* Parse child nodes for the "#power-domain-cells" property and
@@ -248,7 +165,7 @@ static int __init psci_idle_init_domains(void)
if (!of_find_property(node, "#power-domain-cells", NULL))
continue;
- ret = psci_pd_init(node);
+ ret = psci_pd_init(node, use_osi);
if (ret)
goto put_node;
@@ -257,52 +174,38 @@ static int __init psci_idle_init_domains(void)
/* Bail out if not using the hierarchical CPU topology. */
if (!pd_count)
- goto out;
+ goto no_pd;
/* Link genpd masters/subdomains to model the CPU topology. */
- ret = psci_pd_add_topology(np);
+ ret = dt_idle_pd_init_topology(np);
if (ret)
goto remove_pd;
- /* Try to enable OSI mode. */
- ret = psci_set_osi_mode();
- if (ret) {
- pr_warn("failed to enable OSI mode: %d\n", ret);
- psci_pd_remove_topology(np);
- goto remove_pd;
- }
-
- osi_mode_enabled = true;
- of_node_put(np);
pr_info("Initialized CPU PM domain topology\n");
- return pd_count;
+ return 0;
put_node:
of_node_put(node);
remove_pd:
- if (pd_count)
- psci_pd_remove();
+ psci_pd_remove();
pr_err("failed to create CPU PM domains ret=%d\n", ret);
-out:
- of_node_put(np);
+no_pd:
+ if (use_osi)
+ psci_set_osi_mode(false);
return ret;
}
-subsys_initcall(psci_idle_init_domains);
-struct device __init *psci_dt_attach_cpu(int cpu)
-{
- struct device *dev;
-
- if (!osi_mode_enabled)
- return NULL;
-
- dev = dev_pm_domain_attach_by_name(get_cpu_device(cpu), "psci");
- if (IS_ERR_OR_NULL(dev))
- return dev;
-
- pm_runtime_irq_safe(dev);
- if (cpu_online(cpu))
- pm_runtime_get_sync(dev);
+static struct platform_driver psci_cpuidle_domain_driver = {
+ .probe = psci_cpuidle_domain_probe,
+ .driver = {
+ .name = "psci-cpuidle-domain",
+ .of_match_table = psci_of_match,
+ .sync_state = psci_cpuidle_domain_sync_state,
+ },
+};
- return dev;
+static int __init psci_idle_init_domains(void)
+{
+ return platform_driver_register(&psci_cpuidle_domain_driver);
}
+subsys_initcall(psci_idle_init_domains);
diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
index edd7a54ef0d3..57bc3e3ae391 100644
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -9,6 +9,7 @@
#define pr_fmt(fmt) "CPUidle PSCI: " fmt
#include <linux/cpuhotplug.h>
+#include <linux/cpu_cooling.h>
#include <linux/cpuidle.h>
#include <linux/cpumask.h>
#include <linux/cpu_pm.h>
@@ -16,9 +17,13 @@
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_device.h>
+#include <linux/platform_device.h>
#include <linux/psci.h>
+#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/syscore_ops.h>
#include <asm/cpuidle.h>
@@ -32,7 +37,7 @@ struct psci_cpuidle_data {
static DEFINE_PER_CPU_READ_MOSTLY(struct psci_cpuidle_data, psci_cpuidle_data);
static DEFINE_PER_CPU(u32, domain_state);
-static bool psci_cpuidle_use_cpuhp __initdata;
+static bool psci_cpuidle_use_cpuhp;
void psci_set_domain_state(u32 state)
{
@@ -49,8 +54,9 @@ static inline int psci_enter_state(int idx, u32 state)
return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter, idx, state);
}
-static int psci_enter_domain_idle_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx)
+static int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx,
+ bool s2idle)
{
struct psci_cpuidle_data *data = this_cpu_ptr(&psci_cpuidle_data);
u32 *states = data->psci_states;
@@ -58,22 +64,51 @@ static int psci_enter_domain_idle_state(struct cpuidle_device *dev,
u32 state;
int ret;
+ ret = cpu_pm_enter();
+ if (ret)
+ return -1;
+
/* Do runtime PM to manage a hierarchical CPU toplogy. */
- pm_runtime_put_sync_suspend(pd_dev);
+ ct_irq_enter_irqson();
+ if (s2idle)
+ dev_pm_genpd_suspend(pd_dev);
+ else
+ pm_runtime_put_sync_suspend(pd_dev);
+ ct_irq_exit_irqson();
state = psci_get_domain_state();
if (!state)
state = states[idx];
- ret = psci_enter_state(idx, state);
+ ret = psci_cpu_suspend_enter(state) ? -1 : idx;
+
+ ct_irq_enter_irqson();
+ if (s2idle)
+ dev_pm_genpd_resume(pd_dev);
+ else
+ pm_runtime_get_sync(pd_dev);
+ ct_irq_exit_irqson();
- pm_runtime_get_sync(pd_dev);
+ cpu_pm_exit();
/* Clear the domain state to start fresh when back from idle. */
psci_set_domain_state(0);
return ret;
}
+static int psci_enter_domain_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
+{
+ return __psci_enter_domain_idle_state(dev, drv, idx, false);
+}
+
+static int psci_enter_s2idle_domain_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int idx)
+{
+ return __psci_enter_domain_idle_state(dev, drv, idx, true);
+}
+
static int psci_idle_cpuhp_up(unsigned int cpu)
{
struct device *pd_dev = __this_cpu_read(psci_cpuidle_data.dev);
@@ -97,13 +132,58 @@ static int psci_idle_cpuhp_down(unsigned int cpu)
return 0;
}
-static void __init psci_idle_init_cpuhp(void)
+static void psci_idle_syscore_switch(bool suspend)
+{
+ bool cleared = false;
+ struct device *dev;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ dev = per_cpu_ptr(&psci_cpuidle_data, cpu)->dev;
+
+ if (dev && suspend) {
+ dev_pm_genpd_suspend(dev);
+ } else if (dev) {
+ dev_pm_genpd_resume(dev);
+
+ /* Account for userspace having offlined a CPU. */
+ if (pm_runtime_status_suspended(dev))
+ pm_runtime_set_active(dev);
+
+ /* Clear domain state to re-start fresh. */
+ if (!cleared) {
+ psci_set_domain_state(0);
+ cleared = true;
+ }
+ }
+ }
+}
+
+static int psci_idle_syscore_suspend(void)
+{
+ psci_idle_syscore_switch(true);
+ return 0;
+}
+
+static void psci_idle_syscore_resume(void)
+{
+ psci_idle_syscore_switch(false);
+}
+
+static struct syscore_ops psci_idle_syscore_ops = {
+ .suspend = psci_idle_syscore_suspend,
+ .resume = psci_idle_syscore_resume,
+};
+
+static void psci_idle_init_cpuhp(void)
{
int err;
if (!psci_cpuidle_use_cpuhp)
return;
+ register_syscore_ops(&psci_idle_syscore_ops);
+
err = cpuhp_setup_state_nocalls(CPUHP_AP_CPU_PM_STARTING,
"cpuidle/psci:online",
psci_idle_cpuhp_up,
@@ -120,30 +200,13 @@ static int psci_enter_idle_state(struct cpuidle_device *dev,
return psci_enter_state(idx, state[idx]);
}
-static struct cpuidle_driver psci_idle_driver __initdata = {
- .name = "psci_idle",
- .owner = THIS_MODULE,
- /*
- * PSCI idle states relies on architectural WFI to
- * be represented as state index 0.
- */
- .states[0] = {
- .enter = psci_enter_idle_state,
- .exit_latency = 1,
- .target_residency = 1,
- .power_usage = UINT_MAX,
- .name = "WFI",
- .desc = "ARM WFI",
- }
-};
-
-static const struct of_device_id psci_idle_state_match[] __initconst = {
+static const struct of_device_id psci_idle_state_match[] = {
{ .compatible = "arm,idle-state",
.data = psci_enter_idle_state },
{ },
};
-int __init psci_dt_parse_state_node(struct device_node *np, u32 *state)
+int psci_dt_parse_state_node(struct device_node *np, u32 *state)
{
int err = of_property_read_u32(np, "arm,psci-suspend-param", state);
@@ -160,9 +223,33 @@ int __init psci_dt_parse_state_node(struct device_node *np, u32 *state)
return 0;
}
-static int __init psci_dt_cpu_init_idle(struct cpuidle_driver *drv,
- struct device_node *cpu_node,
- unsigned int state_count, int cpu)
+static int psci_dt_cpu_init_topology(struct cpuidle_driver *drv,
+ struct psci_cpuidle_data *data,
+ unsigned int state_count, int cpu)
+{
+ /* Currently limit the hierarchical topology to be used in OSI mode. */
+ if (!psci_has_osi_support())
+ return 0;
+
+ data->dev = psci_dt_attach_cpu(cpu);
+ if (IS_ERR_OR_NULL(data->dev))
+ return PTR_ERR_OR_ZERO(data->dev);
+
+ /*
+ * Using the deepest state for the CPU to trigger a potential selection
+ * of a shared state for the domain, assumes the domain states are all
+ * deeper states.
+ */
+ drv->states[state_count - 1].enter = psci_enter_domain_idle_state;
+ drv->states[state_count - 1].enter_s2idle = psci_enter_s2idle_domain_idle_state;
+ psci_cpuidle_use_cpuhp = true;
+
+ return 0;
+}
+
+static int psci_dt_cpu_init_idle(struct device *dev, struct cpuidle_driver *drv,
+ struct device_node *cpu_node,
+ unsigned int state_count, int cpu)
{
int i, ret = 0;
u32 *psci_states;
@@ -170,7 +257,8 @@ static int __init psci_dt_cpu_init_idle(struct cpuidle_driver *drv,
struct psci_cpuidle_data *data = per_cpu_ptr(&psci_cpuidle_data, cpu);
state_count++; /* Add WFI state too */
- psci_states = kcalloc(state_count, sizeof(*psci_states), GFP_KERNEL);
+ psci_states = devm_kcalloc(dev, state_count, sizeof(*psci_states),
+ GFP_KERNEL);
if (!psci_states)
return -ENOMEM;
@@ -183,47 +271,26 @@ static int __init psci_dt_cpu_init_idle(struct cpuidle_driver *drv,
of_node_put(state_node);
if (ret)
- goto free_mem;
+ return ret;
pr_debug("psci-power-state %#x index %d\n", psci_states[i], i);
}
- if (i != state_count) {
- ret = -ENODEV;
- goto free_mem;
- }
-
- /* Currently limit the hierarchical topology to be used in OSI mode. */
- if (psci_has_osi_support()) {
- data->dev = psci_dt_attach_cpu(cpu);
- if (IS_ERR(data->dev)) {
- ret = PTR_ERR(data->dev);
- goto free_mem;
- }
+ if (i != state_count)
+ return -ENODEV;
- /*
- * Using the deepest state for the CPU to trigger a potential
- * selection of a shared state for the domain, assumes the
- * domain states are all deeper states.
- */
- if (data->dev) {
- drv->states[state_count - 1].enter =
- psci_enter_domain_idle_state;
- psci_cpuidle_use_cpuhp = true;
- }
- }
+ /* Initialize optional data, used for the hierarchical topology. */
+ ret = psci_dt_cpu_init_topology(drv, data, state_count, cpu);
+ if (ret < 0)
+ return ret;
/* Idle states parsed correctly, store them in the per-cpu struct. */
data->psci_states = psci_states;
return 0;
-
-free_mem:
- kfree(psci_states);
- return ret;
}
-static __init int psci_cpu_init_idle(struct cpuidle_driver *drv,
- unsigned int cpu, unsigned int state_count)
+static int psci_cpu_init_idle(struct device *dev, struct cpuidle_driver *drv,
+ unsigned int cpu, unsigned int state_count)
{
struct device_node *cpu_node;
int ret;
@@ -239,14 +306,22 @@ static __init int psci_cpu_init_idle(struct cpuidle_driver *drv,
if (!cpu_node)
return -ENODEV;
- ret = psci_dt_cpu_init_idle(drv, cpu_node, state_count, cpu);
+ ret = psci_dt_cpu_init_idle(dev, drv, cpu_node, state_count, cpu);
of_node_put(cpu_node);
return ret;
}
-static int __init psci_idle_init_cpu(int cpu)
+static void psci_cpu_deinit_idle(int cpu)
+{
+ struct psci_cpuidle_data *data = per_cpu_ptr(&psci_cpuidle_data, cpu);
+
+ psci_dt_detach_cpu(data->dev);
+ psci_cpuidle_use_cpuhp = false;
+}
+
+static int psci_idle_init_cpu(struct device *dev, int cpu)
{
struct cpuidle_driver *drv;
struct device_node *cpu_node;
@@ -269,17 +344,26 @@ static int __init psci_idle_init_cpu(int cpu)
if (ret)
return ret;
- drv = kmemdup(&psci_idle_driver, sizeof(*drv), GFP_KERNEL);
+ drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL);
if (!drv)
return -ENOMEM;
+ drv->name = "psci_idle";
+ drv->owner = THIS_MODULE;
drv->cpumask = (struct cpumask *)cpumask_of(cpu);
/*
- * Initialize idle states data, starting at index 1, since
- * by default idle state 0 is the quiescent state reached
- * by the cpu by executing the wfi instruction.
- *
+ * PSCI idle states relies on architectural WFI to be represented as
+ * state index 0.
+ */
+ drv->states[0].enter = psci_enter_idle_state;
+ drv->states[0].exit_latency = 1;
+ drv->states[0].target_residency = 1;
+ drv->states[0].power_usage = UINT_MAX;
+ strcpy(drv->states[0].name, "WFI");
+ strcpy(drv->states[0].desc, "ARM WFI");
+
+ /*
* If no DT idle states are detected (ret == 0) let the driver
* initialization fail accordingly since there is no reason to
* initialize the idle driver if only wfi is supported, the
@@ -287,46 +371,45 @@ static int __init psci_idle_init_cpu(int cpu)
* on idle entry.
*/
ret = dt_init_idle_driver(drv, psci_idle_state_match, 1);
- if (ret <= 0) {
- ret = ret ? : -ENODEV;
- goto out_kfree_drv;
- }
+ if (ret <= 0)
+ return ret ? : -ENODEV;
/*
* Initialize PSCI idle states.
*/
- ret = psci_cpu_init_idle(drv, cpu, ret);
+ ret = psci_cpu_init_idle(dev, drv, cpu, ret);
if (ret) {
pr_err("CPU %d failed to PSCI idle\n", cpu);
- goto out_kfree_drv;
+ return ret;
}
ret = cpuidle_register(drv, NULL);
if (ret)
- goto out_kfree_drv;
+ goto deinit;
- return 0;
+ cpuidle_cooling_register(drv);
-out_kfree_drv:
- kfree(drv);
+ return 0;
+deinit:
+ psci_cpu_deinit_idle(cpu);
return ret;
}
/*
- * psci_idle_init - Initializes PSCI cpuidle driver
+ * psci_idle_probe - Initializes PSCI cpuidle driver
*
* Initializes PSCI cpuidle driver for all CPUs, if any CPU fails
* to register cpuidle driver then rollback to cancel all CPUs
* registration.
*/
-static int __init psci_idle_init(void)
+static int psci_cpuidle_probe(struct platform_device *pdev)
{
int cpu, ret;
struct cpuidle_driver *drv;
struct cpuidle_device *dev;
for_each_possible_cpu(cpu) {
- ret = psci_idle_init_cpu(cpu);
+ ret = psci_idle_init_cpu(&pdev->dev, cpu);
if (ret)
goto out_fail;
}
@@ -339,9 +422,34 @@ out_fail:
dev = per_cpu(cpuidle_devices, cpu);
drv = cpuidle_get_cpu_driver(dev);
cpuidle_unregister(drv);
- kfree(drv);
+ psci_cpu_deinit_idle(cpu);
}
return ret;
}
+
+static struct platform_driver psci_cpuidle_driver = {
+ .probe = psci_cpuidle_probe,
+ .driver = {
+ .name = "psci-cpuidle",
+ },
+};
+
+static int __init psci_idle_init(void)
+{
+ struct platform_device *pdev;
+ int ret;
+
+ ret = platform_driver_register(&psci_cpuidle_driver);
+ if (ret)
+ return ret;
+
+ pdev = platform_device_register_simple("psci-cpuidle", -1, NULL, 0);
+ if (IS_ERR(pdev)) {
+ platform_driver_unregister(&psci_cpuidle_driver);
+ return PTR_ERR(pdev);
+ }
+
+ return 0;
+}
device_initcall(psci_idle_init);
diff --git a/drivers/cpuidle/cpuidle-psci.h b/drivers/cpuidle/cpuidle-psci.h
index 7299a04dd467..4e132640ed64 100644
--- a/drivers/cpuidle/cpuidle-psci.h
+++ b/drivers/cpuidle/cpuidle-psci.h
@@ -3,15 +3,29 @@
#ifndef __CPUIDLE_PSCI_H
#define __CPUIDLE_PSCI_H
+struct device;
struct device_node;
void psci_set_domain_state(u32 state);
-int __init psci_dt_parse_state_node(struct device_node *np, u32 *state);
+int psci_dt_parse_state_node(struct device_node *np, u32 *state);
+
+#ifdef CONFIG_ARM_PSCI_CPUIDLE_DOMAIN
+
+#include "dt_idle_genpd.h"
+
+static inline struct device *psci_dt_attach_cpu(int cpu)
+{
+ return dt_idle_attach_cpu(cpu, "psci");
+}
+
+static inline void psci_dt_detach_cpu(struct device *dev)
+{
+ dt_idle_detach_cpu(dev);
+}
-#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
-struct device __init *psci_dt_attach_cpu(int cpu);
#else
-static inline struct device __init *psci_dt_attach_cpu(int cpu) { return NULL; }
+static inline struct device *psci_dt_attach_cpu(int cpu) { return NULL; }
+static inline void psci_dt_detach_cpu(struct device *dev) { }
#endif
#endif /* __CPUIDLE_PSCI_H */
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index 74c247972bb3..7e7ab5597d7a 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -19,9 +19,11 @@
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/runlatch.h>
+#include <asm/idle.h>
#include <asm/plpar_wrappers.h>
+#include <asm/rtas.h>
-struct cpuidle_driver pseries_idle_driver = {
+static struct cpuidle_driver pseries_idle_driver = {
.name = "pseries_idle",
.owner = THIS_MODULE,
};
@@ -31,39 +33,15 @@ static struct cpuidle_state *cpuidle_state_table __read_mostly;
static u64 snooze_timeout __read_mostly;
static bool snooze_timeout_en __read_mostly;
-static inline void idle_loop_prolog(unsigned long *in_purr)
-{
- ppc64_runlatch_off();
- *in_purr = mfspr(SPRN_PURR);
- /*
- * Indicate to the HV that we are idle. Now would be
- * a good time to find other work to dispatch.
- */
- get_lppaca()->idle = 1;
-}
-
-static inline void idle_loop_epilog(unsigned long in_purr)
-{
- u64 wait_cycles;
-
- wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles);
- wait_cycles += mfspr(SPRN_PURR) - in_purr;
- get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles);
- get_lppaca()->idle = 0;
-
- ppc64_runlatch_on();
-}
-
static int snooze_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
- unsigned long in_purr;
u64 snooze_exit_time;
set_thread_flag(TIF_POLLING_NRFLAG);
- idle_loop_prolog(&in_purr);
+ pseries_idle_prolog();
local_irq_enable();
snooze_exit_time = get_tb() + snooze_timeout;
@@ -87,7 +65,7 @@ static int snooze_loop(struct cpuidle_device *dev,
local_irq_disable();
- idle_loop_epilog(in_purr);
+ pseries_idle_epilog();
return index;
}
@@ -109,22 +87,152 @@ static void check_and_cede_processor(void)
}
}
+/*
+ * XCEDE: Extended CEDE states discovered through the
+ * "ibm,get-systems-parameter" RTAS call with the token
+ * CEDE_LATENCY_TOKEN
+ */
+
+/*
+ * Section 7.3.16 System Parameters Option of PAPR version 2.8.1 has a
+ * table with all the parameters to ibm,get-system-parameters.
+ * CEDE_LATENCY_TOKEN corresponds to the token value for Cede Latency
+ * Settings Information.
+ */
+#define CEDE_LATENCY_TOKEN 45
+
+/*
+ * If the platform supports the cede latency settings information system
+ * parameter it must provide the following information in the NULL terminated
+ * parameter string:
+ *
+ * a. The first byte is the length “N” of each cede latency setting record minus
+ * one (zero indicates a length of 1 byte).
+ *
+ * b. For each supported cede latency setting a cede latency setting record
+ * consisting of the first “N” bytes as per the following table.
+ *
+ * -----------------------------
+ * | Field | Field |
+ * | Name | Length |
+ * -----------------------------
+ * | Cede Latency | 1 Byte |
+ * | Specifier Value | |
+ * -----------------------------
+ * | Maximum wakeup | |
+ * | latency in | 8 Bytes |
+ * | tb-ticks | |
+ * -----------------------------
+ * | Responsive to | |
+ * | external | 1 Byte |
+ * | interrupts | |
+ * -----------------------------
+ *
+ * This version has cede latency record size = 10.
+ *
+ * The structure xcede_latency_payload represents a) and b) with
+ * xcede_latency_record representing the table in b).
+ *
+ * xcede_latency_parameter is what gets returned by
+ * ibm,get-systems-parameter RTAS call when made with
+ * CEDE_LATENCY_TOKEN.
+ *
+ * These structures are only used to represent the data obtained by the RTAS
+ * call. The data is in big-endian.
+ */
+struct xcede_latency_record {
+ u8 hint;
+ __be64 latency_ticks;
+ u8 wake_on_irqs;
+} __packed;
+
+// Make space for 16 records, which "should be enough".
+struct xcede_latency_payload {
+ u8 record_size;
+ struct xcede_latency_record records[16];
+} __packed;
+
+struct xcede_latency_parameter {
+ __be16 payload_size;
+ struct xcede_latency_payload payload;
+ u8 null_char;
+} __packed;
+
+static unsigned int nr_xcede_records;
+static struct xcede_latency_parameter xcede_latency_parameter __initdata;
+
+static int __init parse_cede_parameters(void)
+{
+ struct xcede_latency_payload *payload;
+ u32 total_xcede_records_size;
+ u8 xcede_record_size;
+ u16 payload_size;
+ int ret, i;
+
+ ret = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+ NULL, CEDE_LATENCY_TOKEN, __pa(&xcede_latency_parameter),
+ sizeof(xcede_latency_parameter));
+ if (ret) {
+ pr_err("xcede: Error parsing CEDE_LATENCY_TOKEN\n");
+ return ret;
+ }
+
+ payload_size = be16_to_cpu(xcede_latency_parameter.payload_size);
+ payload = &xcede_latency_parameter.payload;
+
+ xcede_record_size = payload->record_size + 1;
+
+ if (xcede_record_size != sizeof(struct xcede_latency_record)) {
+ pr_err("xcede: Expected record-size %lu. Observed size %u.\n",
+ sizeof(struct xcede_latency_record), xcede_record_size);
+ return -EINVAL;
+ }
+
+ pr_info("xcede: xcede_record_size = %d\n", xcede_record_size);
+
+ /*
+ * Since the payload_size includes the last NULL byte and the
+ * xcede_record_size, the remaining bytes correspond to array of all
+ * cede_latency settings.
+ */
+ total_xcede_records_size = payload_size - 2;
+ nr_xcede_records = total_xcede_records_size / xcede_record_size;
+
+ for (i = 0; i < nr_xcede_records; i++) {
+ struct xcede_latency_record *record = &payload->records[i];
+ u64 latency_ticks = be64_to_cpu(record->latency_ticks);
+ u8 wake_on_irqs = record->wake_on_irqs;
+ u8 hint = record->hint;
+
+ pr_info("xcede: Record %d : hint = %u, latency = 0x%llx tb ticks, Wake-on-irq = %u\n",
+ i, hint, latency_ticks, wake_on_irqs);
+ }
+
+ return 0;
+}
+
+#define NR_DEDICATED_STATES 2 /* snooze, CEDE */
+static u8 cede_latency_hint[NR_DEDICATED_STATES];
+
static int dedicated_cede_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
- unsigned long in_purr;
+ u8 old_latency_hint;
- idle_loop_prolog(&in_purr);
+ pseries_idle_prolog();
get_lppaca()->donate_dedicated_cpu = 1;
+ old_latency_hint = get_lppaca()->cede_latency_hint;
+ get_lppaca()->cede_latency_hint = cede_latency_hint[index];
HMT_medium();
check_and_cede_processor();
local_irq_disable();
get_lppaca()->donate_dedicated_cpu = 0;
+ get_lppaca()->cede_latency_hint = old_latency_hint;
- idle_loop_epilog(in_purr);
+ pseries_idle_epilog();
return index;
}
@@ -133,9 +241,8 @@ static int shared_cede_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
- unsigned long in_purr;
- idle_loop_prolog(&in_purr);
+ pseries_idle_prolog();
/*
* Yield the processor to the hypervisor. We return if
@@ -147,7 +254,7 @@ static int shared_cede_loop(struct cpuidle_device *dev,
check_and_cede_processor();
local_irq_disable();
- idle_loop_epilog(in_purr);
+ pseries_idle_epilog();
return index;
}
@@ -155,7 +262,7 @@ static int shared_cede_loop(struct cpuidle_device *dev,
/*
* States for dedicated partition case.
*/
-static struct cpuidle_state dedicated_states[] = {
+static struct cpuidle_state dedicated_states[NR_DEDICATED_STATES] = {
{ /* Snooze */
.name = "snooze",
.desc = "snooze",
@@ -236,11 +343,67 @@ static int pseries_cpuidle_driver_init(void)
return 0;
}
+static void __init fixup_cede0_latency(void)
+{
+ struct xcede_latency_payload *payload;
+ u64 min_xcede_latency_us = UINT_MAX;
+ int i;
+
+ if (parse_cede_parameters())
+ return;
+
+ pr_info("cpuidle: Skipping the %d Extended CEDE idle states\n",
+ nr_xcede_records);
+
+ payload = &xcede_latency_parameter.payload;
+
+ /*
+ * The CEDE idle state maps to CEDE(0). While the hypervisor
+ * does not advertise CEDE(0) exit latency values, it does
+ * advertise the latency values of the extended CEDE states.
+ * We use the lowest advertised exit latency value as a proxy
+ * for the exit latency of CEDE(0).
+ */
+ for (i = 0; i < nr_xcede_records; i++) {
+ struct xcede_latency_record *record = &payload->records[i];
+ u8 hint = record->hint;
+ u64 latency_tb = be64_to_cpu(record->latency_ticks);
+ u64 latency_us = DIV_ROUND_UP_ULL(tb_to_ns(latency_tb), NSEC_PER_USEC);
+
+ /*
+ * We expect the exit latency of an extended CEDE
+ * state to be non-zero, it to since it takes at least
+ * a few nanoseconds to wakeup the idle CPU and
+ * dispatch the virtual processor into the Linux
+ * Guest.
+ *
+ * So we consider only non-zero value for performing
+ * the fixup of CEDE(0) latency.
+ */
+ if (latency_us == 0) {
+ pr_warn("cpuidle: Skipping xcede record %d [hint=%d]. Exit latency = 0us\n",
+ i, hint);
+ continue;
+ }
+
+ if (latency_us < min_xcede_latency_us)
+ min_xcede_latency_us = latency_us;
+ }
+
+ if (min_xcede_latency_us != UINT_MAX) {
+ dedicated_states[1].exit_latency = min_xcede_latency_us;
+ dedicated_states[1].target_residency = 10 * (min_xcede_latency_us);
+ pr_info("cpuidle: Fixed up CEDE exit latency to %llu us\n",
+ min_xcede_latency_us);
+ }
+
+}
+
/*
* pseries_idle_probe()
* Choose state table for shared versus dedicated partition
*/
-static int pseries_idle_probe(void)
+static int __init pseries_idle_probe(void)
{
if (cpuidle_disable != IDLE_NO_OVERRIDE)
@@ -257,8 +420,23 @@ static int pseries_idle_probe(void)
cpuidle_state_table = shared_states;
max_idle_state = ARRAY_SIZE(shared_states);
} else {
+ /*
+ * Use firmware provided latency values
+ * starting with POWER10 platforms. In the
+ * case that we are running on a POWER10
+ * platform but in an earlier compat mode, we
+ * can still use the firmware provided values.
+ *
+ * However, on platforms prior to POWER10, we
+ * cannot rely on the accuracy of the firmware
+ * provided latency values. On such platforms,
+ * go with the conservative default estimate
+ * of 10us.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31) || pvr_version_is(PVR_POWER10))
+ fixup_cede0_latency();
cpuidle_state_table = dedicated_states;
- max_idle_state = ARRAY_SIZE(dedicated_states);
+ max_idle_state = NR_DEDICATED_STATES;
}
} else
return -ENODEV;
diff --git a/drivers/cpuidle/cpuidle-qcom-spm.c b/drivers/cpuidle/cpuidle-qcom-spm.c
new file mode 100644
index 000000000000..beedf22cbe78
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-qcom-spm.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2011-2014, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2014,2015, Linaro Ltd.
+ *
+ * SAW power controller driver
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/cpuidle.h>
+#include <linux/cpu_pm.h>
+#include <linux/qcom_scm.h>
+#include <soc/qcom/spm.h>
+
+#include <asm/proc-fns.h>
+#include <asm/suspend.h>
+
+#include "dt_idle_states.h"
+
+struct cpuidle_qcom_spm_data {
+ struct cpuidle_driver cpuidle_driver;
+ struct spm_driver_data *spm;
+};
+
+static int qcom_pm_collapse(unsigned long int unused)
+{
+ qcom_scm_cpu_power_down(QCOM_SCM_CPU_PWR_DOWN_L2_ON);
+
+ /*
+ * Returns here only if there was a pending interrupt and we did not
+ * power down as a result.
+ */
+ return -1;
+}
+
+static int qcom_cpu_spc(struct spm_driver_data *drv)
+{
+ int ret;
+
+ spm_set_low_power_mode(drv, PM_SLEEP_MODE_SPC);
+ ret = cpu_suspend(0, qcom_pm_collapse);
+ /*
+ * ARM common code executes WFI without calling into our driver and
+ * if the SPM mode is not reset, then we may accidently power down the
+ * cpu when we intended only to gate the cpu clock.
+ * Ensure the state is set to standby before returning.
+ */
+ spm_set_low_power_mode(drv, PM_SLEEP_MODE_STBY);
+
+ return ret;
+}
+
+static int spm_enter_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
+{
+ struct cpuidle_qcom_spm_data *data = container_of(drv, struct cpuidle_qcom_spm_data,
+ cpuidle_driver);
+
+ return CPU_PM_CPU_IDLE_ENTER_PARAM(qcom_cpu_spc, idx, data->spm);
+}
+
+static struct cpuidle_driver qcom_spm_idle_driver = {
+ .name = "qcom_spm",
+ .owner = THIS_MODULE,
+ .states[0] = {
+ .enter = spm_enter_idle_state,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .power_usage = UINT_MAX,
+ .name = "WFI",
+ .desc = "ARM WFI",
+ }
+};
+
+static const struct of_device_id qcom_idle_state_match[] = {
+ { .compatible = "qcom,idle-state-spc", .data = spm_enter_idle_state },
+ { },
+};
+
+static int spm_cpuidle_register(struct device *cpuidle_dev, int cpu)
+{
+ struct platform_device *pdev = NULL;
+ struct device_node *cpu_node, *saw_node;
+ struct cpuidle_qcom_spm_data *data = NULL;
+ int ret;
+
+ cpu_node = of_cpu_device_node_get(cpu);
+ if (!cpu_node)
+ return -ENODEV;
+
+ saw_node = of_parse_phandle(cpu_node, "qcom,saw", 0);
+ if (!saw_node)
+ return -ENODEV;
+
+ pdev = of_find_device_by_node(saw_node);
+ of_node_put(saw_node);
+ of_node_put(cpu_node);
+ if (!pdev)
+ return -ENODEV;
+
+ data = devm_kzalloc(cpuidle_dev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->spm = dev_get_drvdata(&pdev->dev);
+ if (!data->spm)
+ return -EINVAL;
+
+ data->cpuidle_driver = qcom_spm_idle_driver;
+ data->cpuidle_driver.cpumask = (struct cpumask *)cpumask_of(cpu);
+
+ ret = dt_init_idle_driver(&data->cpuidle_driver,
+ qcom_idle_state_match, 1);
+ if (ret <= 0)
+ return ret ? : -ENODEV;
+
+ return cpuidle_register(&data->cpuidle_driver, NULL);
+}
+
+static int spm_cpuidle_drv_probe(struct platform_device *pdev)
+{
+ int cpu, ret;
+
+ if (!qcom_scm_is_available())
+ return -EPROBE_DEFER;
+
+ ret = qcom_scm_set_warm_boot_addr(cpu_resume_arm);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "set warm boot addr failed");
+
+ for_each_possible_cpu(cpu) {
+ ret = spm_cpuidle_register(&pdev->dev, cpu);
+ if (ret && ret != -ENODEV) {
+ dev_err(&pdev->dev,
+ "Cannot register for CPU%d: %d\n", cpu, ret);
+ }
+ }
+
+ return 0;
+}
+
+static struct platform_driver spm_cpuidle_driver = {
+ .probe = spm_cpuidle_drv_probe,
+ .driver = {
+ .name = "qcom-spm-cpuidle",
+ .suppress_bind_attrs = true,
+ },
+};
+
+static bool __init qcom_spm_find_any_cpu(void)
+{
+ struct device_node *cpu_node, *saw_node;
+
+ for_each_of_cpu_node(cpu_node) {
+ saw_node = of_parse_phandle(cpu_node, "qcom,saw", 0);
+ if (of_device_is_available(saw_node)) {
+ of_node_put(saw_node);
+ of_node_put(cpu_node);
+ return true;
+ }
+ of_node_put(saw_node);
+ }
+ return false;
+}
+
+static int __init qcom_spm_cpuidle_init(void)
+{
+ struct platform_device *pdev;
+ int ret;
+
+ ret = platform_driver_register(&spm_cpuidle_driver);
+ if (ret)
+ return ret;
+
+ /* Make sure there is actually any CPU managed by the SPM */
+ if (!qcom_spm_find_any_cpu())
+ return 0;
+
+ pdev = platform_device_register_simple("qcom-spm-cpuidle",
+ -1, NULL, 0);
+ if (IS_ERR(pdev)) {
+ platform_driver_unregister(&spm_cpuidle_driver);
+ return PTR_ERR(pdev);
+ }
+
+ return 0;
+}
+device_initcall(qcom_spm_cpuidle_init);
diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c
new file mode 100644
index 000000000000..05fe2902df9a
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-riscv-sbi.c
@@ -0,0 +1,633 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * RISC-V SBI CPU idle driver.
+ *
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (c) 2022 Ventana Micro Systems Inc.
+ */
+
+#define pr_fmt(fmt) "cpuidle-riscv-sbi: " fmt
+
+#include <linux/cpuidle.h>
+#include <linux/cpumask.h>
+#include <linux/cpu_pm.h>
+#include <linux/cpu_cooling.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+#include <asm/cpuidle.h>
+#include <asm/sbi.h>
+#include <asm/smp.h>
+#include <asm/suspend.h>
+
+#include "dt_idle_states.h"
+#include "dt_idle_genpd.h"
+
+struct sbi_cpuidle_data {
+ u32 *states;
+ struct device *dev;
+};
+
+struct sbi_domain_state {
+ bool available;
+ u32 state;
+};
+
+static DEFINE_PER_CPU_READ_MOSTLY(struct sbi_cpuidle_data, sbi_cpuidle_data);
+static DEFINE_PER_CPU(struct sbi_domain_state, domain_state);
+static bool sbi_cpuidle_use_osi;
+static bool sbi_cpuidle_use_cpuhp;
+static bool sbi_cpuidle_pd_allow_domain_state;
+
+static inline void sbi_set_domain_state(u32 state)
+{
+ struct sbi_domain_state *data = this_cpu_ptr(&domain_state);
+
+ data->available = true;
+ data->state = state;
+}
+
+static inline u32 sbi_get_domain_state(void)
+{
+ struct sbi_domain_state *data = this_cpu_ptr(&domain_state);
+
+ return data->state;
+}
+
+static inline void sbi_clear_domain_state(void)
+{
+ struct sbi_domain_state *data = this_cpu_ptr(&domain_state);
+
+ data->available = false;
+}
+
+static inline bool sbi_is_domain_state_available(void)
+{
+ struct sbi_domain_state *data = this_cpu_ptr(&domain_state);
+
+ return data->available;
+}
+
+static int sbi_suspend_finisher(unsigned long suspend_type,
+ unsigned long resume_addr,
+ unsigned long opaque)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_SUSPEND,
+ suspend_type, resume_addr, opaque, 0, 0, 0);
+
+ return (ret.error) ? sbi_err_map_linux_errno(ret.error) : 0;
+}
+
+static int sbi_suspend(u32 state)
+{
+ if (state & SBI_HSM_SUSP_NON_RET_BIT)
+ return cpu_suspend(state, sbi_suspend_finisher);
+ else
+ return sbi_suspend_finisher(state, 0, 0);
+}
+
+static int sbi_cpuidle_enter_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
+{
+ u32 *states = __this_cpu_read(sbi_cpuidle_data.states);
+ u32 state = states[idx];
+
+ if (state & SBI_HSM_SUSP_NON_RET_BIT)
+ return CPU_PM_CPU_IDLE_ENTER_PARAM(sbi_suspend, idx, state);
+ else
+ return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM(sbi_suspend,
+ idx, state);
+}
+
+static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx,
+ bool s2idle)
+{
+ struct sbi_cpuidle_data *data = this_cpu_ptr(&sbi_cpuidle_data);
+ u32 *states = data->states;
+ struct device *pd_dev = data->dev;
+ u32 state;
+ int ret;
+
+ ret = cpu_pm_enter();
+ if (ret)
+ return -1;
+
+ /* Do runtime PM to manage a hierarchical CPU toplogy. */
+ ct_irq_enter_irqson();
+ if (s2idle)
+ dev_pm_genpd_suspend(pd_dev);
+ else
+ pm_runtime_put_sync_suspend(pd_dev);
+ ct_irq_exit_irqson();
+
+ if (sbi_is_domain_state_available())
+ state = sbi_get_domain_state();
+ else
+ state = states[idx];
+
+ ret = sbi_suspend(state) ? -1 : idx;
+
+ ct_irq_enter_irqson();
+ if (s2idle)
+ dev_pm_genpd_resume(pd_dev);
+ else
+ pm_runtime_get_sync(pd_dev);
+ ct_irq_exit_irqson();
+
+ cpu_pm_exit();
+
+ /* Clear the domain state to start fresh when back from idle. */
+ sbi_clear_domain_state();
+ return ret;
+}
+
+static int sbi_enter_domain_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
+{
+ return __sbi_enter_domain_idle_state(dev, drv, idx, false);
+}
+
+static int sbi_enter_s2idle_domain_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int idx)
+{
+ return __sbi_enter_domain_idle_state(dev, drv, idx, true);
+}
+
+static int sbi_cpuidle_cpuhp_up(unsigned int cpu)
+{
+ struct device *pd_dev = __this_cpu_read(sbi_cpuidle_data.dev);
+
+ if (pd_dev)
+ pm_runtime_get_sync(pd_dev);
+
+ return 0;
+}
+
+static int sbi_cpuidle_cpuhp_down(unsigned int cpu)
+{
+ struct device *pd_dev = __this_cpu_read(sbi_cpuidle_data.dev);
+
+ if (pd_dev) {
+ pm_runtime_put_sync(pd_dev);
+ /* Clear domain state to start fresh at next online. */
+ sbi_clear_domain_state();
+ }
+
+ return 0;
+}
+
+static void sbi_idle_init_cpuhp(void)
+{
+ int err;
+
+ if (!sbi_cpuidle_use_cpuhp)
+ return;
+
+ err = cpuhp_setup_state_nocalls(CPUHP_AP_CPU_PM_STARTING,
+ "cpuidle/sbi:online",
+ sbi_cpuidle_cpuhp_up,
+ sbi_cpuidle_cpuhp_down);
+ if (err)
+ pr_warn("Failed %d while setup cpuhp state\n", err);
+}
+
+static const struct of_device_id sbi_cpuidle_state_match[] = {
+ { .compatible = "riscv,idle-state",
+ .data = sbi_cpuidle_enter_state },
+ { },
+};
+
+static bool sbi_suspend_state_is_valid(u32 state)
+{
+ if (state > SBI_HSM_SUSPEND_RET_DEFAULT &&
+ state < SBI_HSM_SUSPEND_RET_PLATFORM)
+ return false;
+ if (state > SBI_HSM_SUSPEND_NON_RET_DEFAULT &&
+ state < SBI_HSM_SUSPEND_NON_RET_PLATFORM)
+ return false;
+ return true;
+}
+
+static int sbi_dt_parse_state_node(struct device_node *np, u32 *state)
+{
+ int err = of_property_read_u32(np, "riscv,sbi-suspend-param", state);
+
+ if (err) {
+ pr_warn("%pOF missing riscv,sbi-suspend-param property\n", np);
+ return err;
+ }
+
+ if (!sbi_suspend_state_is_valid(*state)) {
+ pr_warn("Invalid SBI suspend state %#x\n", *state);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int sbi_dt_cpu_init_topology(struct cpuidle_driver *drv,
+ struct sbi_cpuidle_data *data,
+ unsigned int state_count, int cpu)
+{
+ /* Currently limit the hierarchical topology to be used in OSI mode. */
+ if (!sbi_cpuidle_use_osi)
+ return 0;
+
+ data->dev = dt_idle_attach_cpu(cpu, "sbi");
+ if (IS_ERR_OR_NULL(data->dev))
+ return PTR_ERR_OR_ZERO(data->dev);
+
+ /*
+ * Using the deepest state for the CPU to trigger a potential selection
+ * of a shared state for the domain, assumes the domain states are all
+ * deeper states.
+ */
+ drv->states[state_count - 1].enter = sbi_enter_domain_idle_state;
+ drv->states[state_count - 1].enter_s2idle =
+ sbi_enter_s2idle_domain_idle_state;
+ sbi_cpuidle_use_cpuhp = true;
+
+ return 0;
+}
+
+static int sbi_cpuidle_dt_init_states(struct device *dev,
+ struct cpuidle_driver *drv,
+ unsigned int cpu,
+ unsigned int state_count)
+{
+ struct sbi_cpuidle_data *data = per_cpu_ptr(&sbi_cpuidle_data, cpu);
+ struct device_node *state_node;
+ struct device_node *cpu_node;
+ u32 *states;
+ int i, ret;
+
+ cpu_node = of_cpu_device_node_get(cpu);
+ if (!cpu_node)
+ return -ENODEV;
+
+ states = devm_kcalloc(dev, state_count, sizeof(*states), GFP_KERNEL);
+ if (!states) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ /* Parse SBI specific details from state DT nodes */
+ for (i = 1; i < state_count; i++) {
+ state_node = of_get_cpu_state_node(cpu_node, i - 1);
+ if (!state_node)
+ break;
+
+ ret = sbi_dt_parse_state_node(state_node, &states[i]);
+ of_node_put(state_node);
+
+ if (ret)
+ return ret;
+
+ pr_debug("sbi-state %#x index %d\n", states[i], i);
+ }
+ if (i != state_count) {
+ ret = -ENODEV;
+ goto fail;
+ }
+
+ /* Initialize optional data, used for the hierarchical topology. */
+ ret = sbi_dt_cpu_init_topology(drv, data, state_count, cpu);
+ if (ret < 0)
+ return ret;
+
+ /* Store states in the per-cpu struct. */
+ data->states = states;
+
+fail:
+ of_node_put(cpu_node);
+
+ return ret;
+}
+
+static void sbi_cpuidle_deinit_cpu(int cpu)
+{
+ struct sbi_cpuidle_data *data = per_cpu_ptr(&sbi_cpuidle_data, cpu);
+
+ dt_idle_detach_cpu(data->dev);
+ sbi_cpuidle_use_cpuhp = false;
+}
+
+static int sbi_cpuidle_init_cpu(struct device *dev, int cpu)
+{
+ struct cpuidle_driver *drv;
+ unsigned int state_count = 0;
+ int ret = 0;
+
+ drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL);
+ if (!drv)
+ return -ENOMEM;
+
+ drv->name = "sbi_cpuidle";
+ drv->owner = THIS_MODULE;
+ drv->cpumask = (struct cpumask *)cpumask_of(cpu);
+
+ /* RISC-V architectural WFI to be represented as state index 0. */
+ drv->states[0].enter = sbi_cpuidle_enter_state;
+ drv->states[0].exit_latency = 1;
+ drv->states[0].target_residency = 1;
+ drv->states[0].power_usage = UINT_MAX;
+ strcpy(drv->states[0].name, "WFI");
+ strcpy(drv->states[0].desc, "RISC-V WFI");
+
+ /*
+ * If no DT idle states are detected (ret == 0) let the driver
+ * initialization fail accordingly since there is no reason to
+ * initialize the idle driver if only wfi is supported, the
+ * default archictectural back-end already executes wfi
+ * on idle entry.
+ */
+ ret = dt_init_idle_driver(drv, sbi_cpuidle_state_match, 1);
+ if (ret <= 0) {
+ pr_debug("HART%ld: failed to parse DT idle states\n",
+ cpuid_to_hartid_map(cpu));
+ return ret ? : -ENODEV;
+ }
+ state_count = ret + 1; /* Include WFI state as well */
+
+ /* Initialize idle states from DT. */
+ ret = sbi_cpuidle_dt_init_states(dev, drv, cpu, state_count);
+ if (ret) {
+ pr_err("HART%ld: failed to init idle states\n",
+ cpuid_to_hartid_map(cpu));
+ return ret;
+ }
+
+ ret = cpuidle_register(drv, NULL);
+ if (ret)
+ goto deinit;
+
+ cpuidle_cooling_register(drv);
+
+ return 0;
+deinit:
+ sbi_cpuidle_deinit_cpu(cpu);
+ return ret;
+}
+
+static void sbi_cpuidle_domain_sync_state(struct device *dev)
+{
+ /*
+ * All devices have now been attached/probed to the PM domain
+ * topology, hence it's fine to allow domain states to be picked.
+ */
+ sbi_cpuidle_pd_allow_domain_state = true;
+}
+
+#ifdef CONFIG_DT_IDLE_GENPD
+
+static int sbi_cpuidle_pd_power_off(struct generic_pm_domain *pd)
+{
+ struct genpd_power_state *state = &pd->states[pd->state_idx];
+ u32 *pd_state;
+
+ if (!state->data)
+ return 0;
+
+ if (!sbi_cpuidle_pd_allow_domain_state)
+ return -EBUSY;
+
+ /* OSI mode is enabled, set the corresponding domain state. */
+ pd_state = state->data;
+ sbi_set_domain_state(*pd_state);
+
+ return 0;
+}
+
+struct sbi_pd_provider {
+ struct list_head link;
+ struct device_node *node;
+};
+
+static LIST_HEAD(sbi_pd_providers);
+
+static int sbi_pd_init(struct device_node *np)
+{
+ struct generic_pm_domain *pd;
+ struct sbi_pd_provider *pd_provider;
+ struct dev_power_governor *pd_gov;
+ int ret = -ENOMEM;
+
+ pd = dt_idle_pd_alloc(np, sbi_dt_parse_state_node);
+ if (!pd)
+ goto out;
+
+ pd_provider = kzalloc(sizeof(*pd_provider), GFP_KERNEL);
+ if (!pd_provider)
+ goto free_pd;
+
+ pd->flags |= GENPD_FLAG_IRQ_SAFE | GENPD_FLAG_CPU_DOMAIN;
+
+ /* Allow power off when OSI is available. */
+ if (sbi_cpuidle_use_osi)
+ pd->power_off = sbi_cpuidle_pd_power_off;
+ else
+ pd->flags |= GENPD_FLAG_ALWAYS_ON;
+
+ /* Use governor for CPU PM domains if it has some states to manage. */
+ pd_gov = pd->states ? &pm_domain_cpu_gov : NULL;
+
+ ret = pm_genpd_init(pd, pd_gov, false);
+ if (ret)
+ goto free_pd_prov;
+
+ ret = of_genpd_add_provider_simple(np, pd);
+ if (ret)
+ goto remove_pd;
+
+ pd_provider->node = of_node_get(np);
+ list_add(&pd_provider->link, &sbi_pd_providers);
+
+ pr_debug("init PM domain %s\n", pd->name);
+ return 0;
+
+remove_pd:
+ pm_genpd_remove(pd);
+free_pd_prov:
+ kfree(pd_provider);
+free_pd:
+ dt_idle_pd_free(pd);
+out:
+ pr_err("failed to init PM domain ret=%d %pOF\n", ret, np);
+ return ret;
+}
+
+static void sbi_pd_remove(void)
+{
+ struct sbi_pd_provider *pd_provider, *it;
+ struct generic_pm_domain *genpd;
+
+ list_for_each_entry_safe(pd_provider, it, &sbi_pd_providers, link) {
+ of_genpd_del_provider(pd_provider->node);
+
+ genpd = of_genpd_remove_last(pd_provider->node);
+ if (!IS_ERR(genpd))
+ kfree(genpd);
+
+ of_node_put(pd_provider->node);
+ list_del(&pd_provider->link);
+ kfree(pd_provider);
+ }
+}
+
+static int sbi_genpd_probe(struct device_node *np)
+{
+ struct device_node *node;
+ int ret = 0, pd_count = 0;
+
+ if (!np)
+ return -ENODEV;
+
+ /*
+ * Parse child nodes for the "#power-domain-cells" property and
+ * initialize a genpd/genpd-of-provider pair when it's found.
+ */
+ for_each_child_of_node(np, node) {
+ if (!of_find_property(node, "#power-domain-cells", NULL))
+ continue;
+
+ ret = sbi_pd_init(node);
+ if (ret)
+ goto put_node;
+
+ pd_count++;
+ }
+
+ /* Bail out if not using the hierarchical CPU topology. */
+ if (!pd_count)
+ goto no_pd;
+
+ /* Link genpd masters/subdomains to model the CPU topology. */
+ ret = dt_idle_pd_init_topology(np);
+ if (ret)
+ goto remove_pd;
+
+ return 0;
+
+put_node:
+ of_node_put(node);
+remove_pd:
+ sbi_pd_remove();
+ pr_err("failed to create CPU PM domains ret=%d\n", ret);
+no_pd:
+ return ret;
+}
+
+#else
+
+static inline int sbi_genpd_probe(struct device_node *np)
+{
+ return 0;
+}
+
+#endif
+
+static int sbi_cpuidle_probe(struct platform_device *pdev)
+{
+ int cpu, ret;
+ struct cpuidle_driver *drv;
+ struct cpuidle_device *dev;
+ struct device_node *np, *pds_node;
+
+ /* Detect OSI support based on CPU DT nodes */
+ sbi_cpuidle_use_osi = true;
+ for_each_possible_cpu(cpu) {
+ np = of_cpu_device_node_get(cpu);
+ if (np &&
+ of_find_property(np, "power-domains", NULL) &&
+ of_find_property(np, "power-domain-names", NULL)) {
+ continue;
+ } else {
+ sbi_cpuidle_use_osi = false;
+ break;
+ }
+ }
+
+ /* Populate generic power domains from DT nodes */
+ pds_node = of_find_node_by_path("/cpus/power-domains");
+ if (pds_node) {
+ ret = sbi_genpd_probe(pds_node);
+ of_node_put(pds_node);
+ if (ret)
+ return ret;
+ }
+
+ /* Initialize CPU idle driver for each CPU */
+ for_each_possible_cpu(cpu) {
+ ret = sbi_cpuidle_init_cpu(&pdev->dev, cpu);
+ if (ret) {
+ pr_debug("HART%ld: idle driver init failed\n",
+ cpuid_to_hartid_map(cpu));
+ goto out_fail;
+ }
+ }
+
+ /* Setup CPU hotplut notifiers */
+ sbi_idle_init_cpuhp();
+
+ pr_info("idle driver registered for all CPUs\n");
+
+ return 0;
+
+out_fail:
+ while (--cpu >= 0) {
+ dev = per_cpu(cpuidle_devices, cpu);
+ drv = cpuidle_get_cpu_driver(dev);
+ cpuidle_unregister(drv);
+ sbi_cpuidle_deinit_cpu(cpu);
+ }
+
+ return ret;
+}
+
+static struct platform_driver sbi_cpuidle_driver = {
+ .probe = sbi_cpuidle_probe,
+ .driver = {
+ .name = "sbi-cpuidle",
+ .sync_state = sbi_cpuidle_domain_sync_state,
+ },
+};
+
+static int __init sbi_cpuidle_init(void)
+{
+ int ret;
+ struct platform_device *pdev;
+
+ /*
+ * The SBI HSM suspend function is only available when:
+ * 1) SBI version is 0.3 or higher
+ * 2) SBI HSM extension is available
+ */
+ if ((sbi_spec_version < sbi_mk_version(0, 3)) ||
+ sbi_probe_extension(SBI_EXT_HSM) <= 0) {
+ pr_info("HSM suspend not available\n");
+ return 0;
+ }
+
+ ret = platform_driver_register(&sbi_cpuidle_driver);
+ if (ret)
+ return ret;
+
+ pdev = platform_device_register_simple("sbi-cpuidle",
+ -1, NULL, 0);
+ if (IS_ERR(pdev)) {
+ platform_driver_unregister(&sbi_cpuidle_driver);
+ return PTR_ERR(pdev);
+ }
+
+ return 0;
+}
+device_initcall(sbi_cpuidle_init);
diff --git a/drivers/cpuidle/cpuidle-tegra.c b/drivers/cpuidle/cpuidle-tegra.c
new file mode 100644
index 000000000000..9845629aeb6d
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-tegra.c
@@ -0,0 +1,391 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CPU idle driver for Tegra CPUs
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ * Copyright (c) 2011 Google, Inc.
+ * Author: Colin Cross <ccross@android.com>
+ * Gary King <gking@nvidia.com>
+ *
+ * Rework for 3.3 by Peter De Schrijver <pdeschrijver@nvidia.com>
+ *
+ * Tegra20/124 driver unification by Dmitry Osipenko <digetx@gmail.com>
+ */
+
+#define pr_fmt(fmt) "tegra-cpuidle: " fmt
+
+#include <linux/atomic.h>
+#include <linux/cpuidle.h>
+#include <linux/cpumask.h>
+#include <linux/cpu_pm.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+#include <linux/clk/tegra.h>
+#include <linux/firmware/trusted_foundations.h>
+
+#include <soc/tegra/cpuidle.h>
+#include <soc/tegra/flowctrl.h>
+#include <soc/tegra/fuse.h>
+#include <soc/tegra/irq.h>
+#include <soc/tegra/pm.h>
+#include <soc/tegra/pmc.h>
+
+#include <asm/cpuidle.h>
+#include <asm/firmware.h>
+#include <asm/smp_plat.h>
+#include <asm/suspend.h>
+
+enum tegra_state {
+ TEGRA_C1,
+ TEGRA_C7,
+ TEGRA_CC6,
+ TEGRA_STATE_COUNT,
+};
+
+static atomic_t tegra_idle_barrier;
+static atomic_t tegra_abort_flag;
+
+static void tegra_cpuidle_report_cpus_state(void)
+{
+ unsigned long cpu, lcpu, csr;
+
+ for_each_cpu(lcpu, cpu_possible_mask) {
+ cpu = cpu_logical_map(lcpu);
+ csr = flowctrl_read_cpu_csr(cpu);
+
+ pr_err("cpu%lu: online=%d flowctrl_csr=0x%08lx\n",
+ cpu, cpu_online(lcpu), csr);
+ }
+}
+
+static int tegra_cpuidle_wait_for_secondary_cpus_parking(void)
+{
+ unsigned int retries = 3;
+
+ while (retries--) {
+ unsigned int delay_us = 10;
+ unsigned int timeout_us = 500 * 1000 / delay_us;
+
+ /*
+ * The primary CPU0 core shall wait for the secondaries
+ * shutdown in order to power-off CPU's cluster safely.
+ * The timeout value depends on the current CPU frequency,
+ * it takes about 40-150us in average and over 1000us in
+ * a worst case scenario.
+ */
+ do {
+ if (tegra_cpu_rail_off_ready())
+ return 0;
+
+ udelay(delay_us);
+
+ } while (timeout_us--);
+
+ pr_err("secondary CPU taking too long to park\n");
+
+ tegra_cpuidle_report_cpus_state();
+ }
+
+ pr_err("timed out waiting secondaries to park\n");
+
+ return -ETIMEDOUT;
+}
+
+static void tegra_cpuidle_unpark_secondary_cpus(void)
+{
+ unsigned int cpu, lcpu;
+
+ for_each_cpu(lcpu, cpu_online_mask) {
+ cpu = cpu_logical_map(lcpu);
+
+ if (cpu > 0) {
+ tegra_enable_cpu_clock(cpu);
+ tegra_cpu_out_of_reset(cpu);
+ flowctrl_write_cpu_halt(cpu, 0);
+ }
+ }
+}
+
+static int tegra_cpuidle_cc6_enter(unsigned int cpu)
+{
+ int ret;
+
+ if (cpu > 0) {
+ ret = cpu_suspend(cpu, tegra_pm_park_secondary_cpu);
+ } else {
+ ret = tegra_cpuidle_wait_for_secondary_cpus_parking();
+ if (!ret)
+ ret = tegra_pm_enter_lp2();
+
+ tegra_cpuidle_unpark_secondary_cpus();
+ }
+
+ return ret;
+}
+
+static int tegra_cpuidle_c7_enter(void)
+{
+ int err;
+
+ err = call_firmware_op(prepare_idle, TF_PM_MODE_LP2_NOFLUSH_L2);
+ if (err && err != -ENOSYS)
+ return err;
+
+ return cpu_suspend(0, tegra30_pm_secondary_cpu_suspend);
+}
+
+static int tegra_cpuidle_coupled_barrier(struct cpuidle_device *dev)
+{
+ if (tegra_pending_sgi()) {
+ /*
+ * CPU got local interrupt that will be lost after GIC's
+ * shutdown because GIC driver doesn't save/restore the
+ * pending SGI state across CPU cluster PM. Abort and retry
+ * next time.
+ */
+ atomic_set(&tegra_abort_flag, 1);
+ }
+
+ cpuidle_coupled_parallel_barrier(dev, &tegra_idle_barrier);
+
+ if (atomic_read(&tegra_abort_flag)) {
+ cpuidle_coupled_parallel_barrier(dev, &tegra_idle_barrier);
+ atomic_set(&tegra_abort_flag, 0);
+ return -EINTR;
+ }
+
+ return 0;
+}
+
+static int tegra_cpuidle_state_enter(struct cpuidle_device *dev,
+ int index, unsigned int cpu)
+{
+ int err;
+
+ /*
+ * CC6 state is the "CPU cluster power-off" state. In order to
+ * enter this state, at first the secondary CPU cores need to be
+ * parked into offline mode, then the last CPU should clean out
+ * remaining dirty cache lines into DRAM and trigger Flow Controller
+ * logic that turns off the cluster's power domain (which includes
+ * CPU cores, GIC and L2 cache).
+ */
+ if (index == TEGRA_CC6) {
+ err = tegra_cpuidle_coupled_barrier(dev);
+ if (err)
+ return err;
+ }
+
+ local_fiq_disable();
+ RCU_NONIDLE(tegra_pm_set_cpu_in_lp2());
+ cpu_pm_enter();
+
+ switch (index) {
+ case TEGRA_C7:
+ err = tegra_cpuidle_c7_enter();
+ break;
+
+ case TEGRA_CC6:
+ err = tegra_cpuidle_cc6_enter(cpu);
+ break;
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ cpu_pm_exit();
+ RCU_NONIDLE(tegra_pm_clear_cpu_in_lp2());
+ local_fiq_enable();
+
+ return err ?: index;
+}
+
+static int tegra_cpuidle_adjust_state_index(int index, unsigned int cpu)
+{
+ /*
+ * On Tegra30 CPU0 can't be power-gated separately from secondary
+ * cores because it gates the whole CPU cluster.
+ */
+ if (cpu > 0 || index != TEGRA_C7 || tegra_get_chip_id() != TEGRA30)
+ return index;
+
+ /* put CPU0 into C1 if C7 is requested and secondaries are online */
+ if (!IS_ENABLED(CONFIG_PM_SLEEP) || num_online_cpus() > 1)
+ index = TEGRA_C1;
+ else
+ index = TEGRA_CC6;
+
+ return index;
+}
+
+static int tegra_cpuidle_enter(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ unsigned int cpu = cpu_logical_map(dev->cpu);
+ int ret;
+
+ index = tegra_cpuidle_adjust_state_index(index, cpu);
+ if (dev->states_usage[index].disable)
+ return -1;
+
+ if (index == TEGRA_C1)
+ ret = arm_cpuidle_simple_enter(dev, drv, index);
+ else
+ ret = tegra_cpuidle_state_enter(dev, index, cpu);
+
+ if (ret < 0) {
+ if (ret != -EINTR || index != TEGRA_CC6)
+ pr_err_once("failed to enter state %d err: %d\n",
+ index, ret);
+ index = -1;
+ } else {
+ index = ret;
+ }
+
+ return index;
+}
+
+static int tegra114_enter_s2idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ tegra_cpuidle_enter(dev, drv, index);
+
+ return 0;
+}
+
+/*
+ * The previous versions of Tegra CPUIDLE driver used a different "legacy"
+ * terminology for naming of the idling states, while this driver uses the
+ * new terminology.
+ *
+ * Mapping of the old terms into the new ones:
+ *
+ * Old | New
+ * ---------
+ * LP3 | C1 (CPU core clock gating)
+ * LP2 | C7 (CPU core power gating)
+ * LP2 | CC6 (CPU cluster power gating)
+ *
+ * Note that that the older CPUIDLE driver versions didn't explicitly
+ * differentiate the LP2 states because these states either used the same
+ * code path or because CC6 wasn't supported.
+ */
+static struct cpuidle_driver tegra_idle_driver = {
+ .name = "tegra_idle",
+ .states = {
+ [TEGRA_C1] = ARM_CPUIDLE_WFI_STATE_PWR(600),
+ [TEGRA_C7] = {
+ .enter = tegra_cpuidle_enter,
+ .exit_latency = 2000,
+ .target_residency = 2200,
+ .power_usage = 100,
+ .flags = CPUIDLE_FLAG_TIMER_STOP,
+ .name = "C7",
+ .desc = "CPU core powered off",
+ },
+ [TEGRA_CC6] = {
+ .enter = tegra_cpuidle_enter,
+ .exit_latency = 5000,
+ .target_residency = 10000,
+ .power_usage = 0,
+ .flags = CPUIDLE_FLAG_TIMER_STOP |
+ CPUIDLE_FLAG_COUPLED,
+ .name = "CC6",
+ .desc = "CPU cluster powered off",
+ },
+ },
+ .state_count = TEGRA_STATE_COUNT,
+ .safe_state_index = TEGRA_C1,
+};
+
+static inline void tegra_cpuidle_disable_state(enum tegra_state state)
+{
+ cpuidle_driver_state_disabled(&tegra_idle_driver, state, true);
+}
+
+/*
+ * Tegra20 HW appears to have a bug such that PCIe device interrupts, whether
+ * they are legacy IRQs or MSI, are lost when CC6 is enabled. To work around
+ * this, simply disable CC6 if the PCI driver and DT node are both enabled.
+ */
+void tegra_cpuidle_pcie_irqs_in_use(void)
+{
+ struct cpuidle_state *state_cc6 = &tegra_idle_driver.states[TEGRA_CC6];
+
+ if ((state_cc6->flags & CPUIDLE_FLAG_UNUSABLE) ||
+ tegra_get_chip_id() != TEGRA20)
+ return;
+
+ pr_info("disabling CC6 state, since PCIe IRQs are in use\n");
+ tegra_cpuidle_disable_state(TEGRA_CC6);
+}
+
+static void tegra_cpuidle_setup_tegra114_c7_state(void)
+{
+ struct cpuidle_state *s = &tegra_idle_driver.states[TEGRA_C7];
+
+ s->enter_s2idle = tegra114_enter_s2idle;
+ s->target_residency = 1000;
+ s->exit_latency = 500;
+}
+
+static int tegra_cpuidle_probe(struct platform_device *pdev)
+{
+ if (tegra_pmc_get_suspend_mode() == TEGRA_SUSPEND_NOT_READY)
+ return -EPROBE_DEFER;
+
+ /* LP2 could be disabled in device-tree */
+ if (tegra_pmc_get_suspend_mode() < TEGRA_SUSPEND_LP2)
+ tegra_cpuidle_disable_state(TEGRA_CC6);
+
+ /*
+ * Required suspend-resume functionality, which is provided by the
+ * Tegra-arch core and PMC driver, is unavailable if PM-sleep option
+ * is disabled.
+ */
+ if (!IS_ENABLED(CONFIG_PM_SLEEP)) {
+ tegra_cpuidle_disable_state(TEGRA_C7);
+ tegra_cpuidle_disable_state(TEGRA_CC6);
+ }
+
+ /*
+ * Generic WFI state (also known as C1 or LP3) and the coupled CPU
+ * cluster power-off (CC6 or LP2) states are common for all Tegra SoCs.
+ */
+ switch (tegra_get_chip_id()) {
+ case TEGRA20:
+ /* Tegra20 isn't capable to power-off individual CPU cores */
+ tegra_cpuidle_disable_state(TEGRA_C7);
+ break;
+
+ case TEGRA30:
+ break;
+
+ case TEGRA114:
+ case TEGRA124:
+ tegra_cpuidle_setup_tegra114_c7_state();
+
+ /* coupled CC6 (LP2) state isn't implemented yet */
+ tegra_cpuidle_disable_state(TEGRA_CC6);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return cpuidle_register(&tegra_idle_driver, cpu_possible_mask);
+}
+
+static struct platform_driver tegra_cpuidle_driver = {
+ .probe = tegra_cpuidle_probe,
+ .driver = {
+ .name = "tegra-cpuidle",
+ },
+};
+builtin_platform_driver(tegra_cpuidle_driver);
diff --git a/drivers/cpuidle/cpuidle-ux500.c b/drivers/cpuidle/cpuidle-ux500.c
index a2d34be17a09..f7d778580e9b 100644
--- a/drivers/cpuidle/cpuidle-ux500.c
+++ b/drivers/cpuidle/cpuidle-ux500.c
@@ -117,7 +117,7 @@ static int dbx500_cpuidle_probe(struct platform_device *pdev)
static struct platform_driver dbx500_cpuidle_plat_driver = {
.driver = {
- .name = "cpuidle-dbx500",
+ .name = "db8500-cpuidle",
},
.probe = dbx500_cpuidle_probe,
};
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index de81298051b3..6eceb1988243 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -8,6 +8,7 @@
* This code is licenced under the GPL.
*/
+#include "linux/percpu-defs.h"
#include <linux/clockchips.h>
#include <linux/kernel.h>
#include <linux/mutex.h>
@@ -22,6 +23,8 @@
#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/tick.h>
+#include <linux/mmu_context.h>
+#include <linux/context_tracking.h>
#include <trace/events/power.h>
#include "cpuidle.h"
@@ -137,29 +140,25 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index)
{
ktime_t time_start, time_end;
+ struct cpuidle_state *target_state = &drv->states[index];
time_start = ns_to_ktime(local_clock());
- /*
- * trace_suspend_resume() called by tick_freeze() for the last CPU
- * executing it contains RCU usage regarded as invalid in the idle
- * context, so tell RCU about that.
- */
- RCU_NONIDLE(tick_freeze());
+ tick_freeze();
/*
* The state used here cannot be a "coupled" one, because the "coupled"
* cpuidle mechanism enables interrupts and doing that with timekeeping
* suspended is generally unsafe.
*/
stop_critical_timings();
- drv->states[index].enter_s2idle(dev, drv, index);
- WARN_ON(!irqs_disabled());
- /*
- * timekeeping_resume() that will be called by tick_unfreeze() for the
- * first CPU executing it calls functions containing RCU read-side
- * critical sections, so tell RCU about that.
- */
- RCU_NONIDLE(tick_unfreeze());
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
+ ct_idle_enter();
+ target_state->enter_s2idle(dev, drv, index);
+ if (WARN_ON_ONCE(!irqs_disabled()))
+ local_irq_disable();
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
+ ct_idle_exit();
+ tick_unfreeze();
start_critical_timings();
time_end = ns_to_ktime(local_clock());
@@ -186,9 +185,10 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* be frozen safely.
*/
index = find_deepest_state(drv, dev, U64_MAX, 0, true);
- if (index > 0)
+ if (index > 0) {
enter_s2idle_proper(drv, dev, index);
-
+ local_irq_enable();
+ }
return index;
}
#endif /* CONFIG_SUSPEND */
@@ -224,19 +224,26 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
broadcast = false;
}
+ if (target_state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
+ leave_mm(dev->cpu);
+
/* Take note of the planned idle state. */
sched_idle_set_state(target_state);
- trace_cpu_idle_rcuidle(index, dev->cpu);
+ trace_cpu_idle(index, dev->cpu);
time_start = ns_to_ktime(local_clock());
stop_critical_timings();
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
+ ct_idle_enter();
entered_state = target_state->enter(dev, drv, index);
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
+ ct_idle_exit();
start_critical_timings();
sched_clock_idle_wakeup_event();
time_end = ns_to_ktime(local_clock());
- trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
+ trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu);
/* The cpu is no longer idle or about to enter idle. */
sched_idle_set_state(NULL);
@@ -273,6 +280,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
/* Shallower states are enabled, so update. */
dev->states_usage[entered_state].above++;
+ trace_cpu_idle_miss(dev->cpu, entered_state, false);
break;
}
} else if (diff > delay) {
@@ -284,14 +292,17 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
* Update if a deeper state would have been a
* better match for the observed idle duration.
*/
- if (diff - delay >= drv->states[i].target_residency_ns)
+ if (diff - delay >= drv->states[i].target_residency_ns) {
dev->states_usage[entered_state].below++;
+ trace_cpu_idle_miss(dev->cpu, entered_state, true);
+ }
break;
}
}
} else {
dev->last_residency_ns = 0;
+ dev->states_usage[index].rejected++;
}
return entered_state;
@@ -362,6 +373,19 @@ void cpuidle_reflect(struct cpuidle_device *dev, int index)
cpuidle_curr_governor->reflect(dev, index);
}
+/*
+ * Min polling interval of 10usec is a guess. It is assuming that
+ * for most users, the time for a single ping-pong workload like
+ * perf bench pipe would generally complete within 10usec but
+ * this is hardware dependant. Actual time can be estimated with
+ *
+ * perf bench sched pipe -l 10000
+ *
+ * Run multiple times to avoid cpufreq effects.
+ */
+#define CPUIDLE_POLL_MIN 10000
+#define CPUIDLE_POLL_MAX (TICK_NSEC / 16)
+
/**
* cpuidle_poll_time - return amount of time to poll for,
* governors can override dev->poll_limit_ns if necessary
@@ -376,15 +400,23 @@ u64 cpuidle_poll_time(struct cpuidle_driver *drv,
int i;
u64 limit_ns;
+ BUILD_BUG_ON(CPUIDLE_POLL_MIN > CPUIDLE_POLL_MAX);
+
if (dev->poll_limit_ns)
return dev->poll_limit_ns;
- limit_ns = TICK_NSEC;
+ limit_ns = CPUIDLE_POLL_MAX;
for (i = 1; i < drv->state_count; i++) {
+ u64 state_limit;
+
if (dev->states_usage[i].disable)
continue;
- limit_ns = drv->states[i].target_residency_ns;
+ state_limit = drv->states[i].target_residency_ns;
+ if (state_limit < CPUIDLE_POLL_MIN)
+ continue;
+
+ limit_ns = min_t(u64, state_limit, CPUIDLE_POLL_MAX);
break;
}
@@ -736,53 +768,15 @@ int cpuidle_register(struct cpuidle_driver *drv,
}
EXPORT_SYMBOL_GPL(cpuidle_register);
-#ifdef CONFIG_SMP
-
-/*
- * This function gets called when a part of the kernel has a new latency
- * requirement. This means we need to get all processors out of their C-state,
- * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
- * wakes them all right up.
- */
-static int cpuidle_latency_notify(struct notifier_block *b,
- unsigned long l, void *v)
-{
- wake_up_all_idle_cpus();
- return NOTIFY_OK;
-}
-
-static struct notifier_block cpuidle_latency_notifier = {
- .notifier_call = cpuidle_latency_notify,
-};
-
-static inline void latency_notifier_init(struct notifier_block *n)
-{
- pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n);
-}
-
-#else /* CONFIG_SMP */
-
-#define latency_notifier_init(x) do { } while (0)
-
-#endif /* CONFIG_SMP */
-
/**
* cpuidle_init - core initializer
*/
static int __init cpuidle_init(void)
{
- int ret;
-
if (cpuidle_disabled())
return -ENODEV;
- ret = cpuidle_add_interface(cpu_subsys.dev_root);
- if (ret)
- return ret;
-
- latency_notifier_init(&cpuidle_latency_notifier);
-
- return 0;
+ return cpuidle_add_interface(cpu_subsys.dev_root);
}
module_param(off, int, 0444);
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 4070e573bf43..f70aa17e2a8e 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -181,9 +181,13 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
*/
if (s->target_residency > 0)
s->target_residency_ns = s->target_residency * NSEC_PER_USEC;
+ else if (s->target_residency_ns < 0)
+ s->target_residency_ns = 0;
if (s->exit_latency > 0)
s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC;
+ else if (s->exit_latency_ns < 0)
+ s->exit_latency_ns = 0;
}
}
diff --git a/drivers/cpuidle/dt_idle_genpd.c b/drivers/cpuidle/dt_idle_genpd.c
new file mode 100644
index 000000000000..b37165514d4e
--- /dev/null
+++ b/drivers/cpuidle/dt_idle_genpd.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PM domains for CPUs via genpd.
+ *
+ * Copyright (C) 2019 Linaro Ltd.
+ * Author: Ulf Hansson <ulf.hansson@linaro.org>
+ *
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (c) 2022 Ventana Micro Systems Inc.
+ */
+
+#define pr_fmt(fmt) "dt-idle-genpd: " fmt
+
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "dt_idle_genpd.h"
+
+static int pd_parse_state_nodes(
+ int (*parse_state)(struct device_node *, u32 *),
+ struct genpd_power_state *states, int state_count)
+{
+ int i, ret;
+ u32 state, *state_buf;
+
+ for (i = 0; i < state_count; i++) {
+ ret = parse_state(to_of_node(states[i].fwnode), &state);
+ if (ret)
+ goto free_state;
+
+ state_buf = kmalloc(sizeof(u32), GFP_KERNEL);
+ if (!state_buf) {
+ ret = -ENOMEM;
+ goto free_state;
+ }
+ *state_buf = state;
+ states[i].data = state_buf;
+ }
+
+ return 0;
+
+free_state:
+ i--;
+ for (; i >= 0; i--)
+ kfree(states[i].data);
+ return ret;
+}
+
+static int pd_parse_states(struct device_node *np,
+ int (*parse_state)(struct device_node *, u32 *),
+ struct genpd_power_state **states,
+ int *state_count)
+{
+ int ret;
+
+ /* Parse the domain idle states. */
+ ret = of_genpd_parse_idle_states(np, states, state_count);
+ if (ret)
+ return ret;
+
+ /* Fill out the dt specifics for each found state. */
+ ret = pd_parse_state_nodes(parse_state, *states, *state_count);
+ if (ret)
+ kfree(*states);
+
+ return ret;
+}
+
+static void pd_free_states(struct genpd_power_state *states,
+ unsigned int state_count)
+{
+ int i;
+
+ for (i = 0; i < state_count; i++)
+ kfree(states[i].data);
+ kfree(states);
+}
+
+void dt_idle_pd_free(struct generic_pm_domain *pd)
+{
+ pd_free_states(pd->states, pd->state_count);
+ kfree(pd->name);
+ kfree(pd);
+}
+
+struct generic_pm_domain *dt_idle_pd_alloc(struct device_node *np,
+ int (*parse_state)(struct device_node *, u32 *))
+{
+ struct generic_pm_domain *pd;
+ struct genpd_power_state *states = NULL;
+ int ret, state_count = 0;
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ goto out;
+
+ pd->name = kasprintf(GFP_KERNEL, "%pOF", np);
+ if (!pd->name)
+ goto free_pd;
+
+ /*
+ * Parse the domain idle states and let genpd manage the state selection
+ * for those being compatible with "domain-idle-state".
+ */
+ ret = pd_parse_states(np, parse_state, &states, &state_count);
+ if (ret)
+ goto free_name;
+
+ pd->free_states = pd_free_states;
+ pd->name = kbasename(pd->name);
+ pd->states = states;
+ pd->state_count = state_count;
+
+ pr_debug("alloc PM domain %s\n", pd->name);
+ return pd;
+
+free_name:
+ kfree(pd->name);
+free_pd:
+ kfree(pd);
+out:
+ pr_err("failed to alloc PM domain %pOF\n", np);
+ return NULL;
+}
+
+int dt_idle_pd_init_topology(struct device_node *np)
+{
+ struct device_node *node;
+ struct of_phandle_args child, parent;
+ int ret;
+
+ for_each_child_of_node(np, node) {
+ if (of_parse_phandle_with_args(node, "power-domains",
+ "#power-domain-cells", 0, &parent))
+ continue;
+
+ child.np = node;
+ child.args_count = 0;
+ ret = of_genpd_add_subdomain(&parent, &child);
+ of_node_put(parent.np);
+ if (ret) {
+ of_node_put(node);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+struct device *dt_idle_attach_cpu(int cpu, const char *name)
+{
+ struct device *dev;
+
+ dev = dev_pm_domain_attach_by_name(get_cpu_device(cpu), name);
+ if (IS_ERR_OR_NULL(dev))
+ return dev;
+
+ pm_runtime_irq_safe(dev);
+ if (cpu_online(cpu))
+ pm_runtime_get_sync(dev);
+
+ dev_pm_syscore_device(dev, true);
+
+ return dev;
+}
+
+void dt_idle_detach_cpu(struct device *dev)
+{
+ if (IS_ERR_OR_NULL(dev))
+ return;
+
+ dev_pm_domain_detach(dev, false);
+}
diff --git a/drivers/cpuidle/dt_idle_genpd.h b/drivers/cpuidle/dt_idle_genpd.h
new file mode 100644
index 000000000000..a95483d08a02
--- /dev/null
+++ b/drivers/cpuidle/dt_idle_genpd.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DT_IDLE_GENPD
+#define __DT_IDLE_GENPD
+
+struct device_node;
+struct generic_pm_domain;
+
+#ifdef CONFIG_DT_IDLE_GENPD
+
+void dt_idle_pd_free(struct generic_pm_domain *pd);
+
+struct generic_pm_domain *dt_idle_pd_alloc(struct device_node *np,
+ int (*parse_state)(struct device_node *, u32 *));
+
+int dt_idle_pd_init_topology(struct device_node *np);
+
+struct device *dt_idle_attach_cpu(int cpu, const char *name);
+
+void dt_idle_detach_cpu(struct device *dev);
+
+#else
+
+static inline void dt_idle_pd_free(struct generic_pm_domain *pd)
+{
+}
+
+static inline struct generic_pm_domain *dt_idle_pd_alloc(
+ struct device_node *np,
+ int (*parse_state)(struct device_node *, u32 *))
+{
+ return NULL;
+}
+
+static inline int dt_idle_pd_init_topology(struct device_node *np)
+{
+ return 0;
+}
+
+static inline struct device *dt_idle_attach_cpu(int cpu, const char *name)
+{
+ return NULL;
+}
+
+static inline void dt_idle_detach_cpu(struct device *dev)
+{
+}
+
+#endif
+
+#endif
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
index e48271e117a3..0d0f9751ff8f 100644
--- a/drivers/cpuidle/governor.c
+++ b/drivers/cpuidle/governor.c
@@ -63,12 +63,11 @@ int cpuidle_switch_governor(struct cpuidle_governor *gov)
cpuidle_curr_governor = gov;
- if (gov) {
- list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
- cpuidle_enable_device(dev);
- cpuidle_install_idle_handler();
- printk(KERN_INFO "cpuidle: using governor %s\n", gov->name);
- }
+ list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
+ cpuidle_enable_device(dev);
+
+ cpuidle_install_idle_handler();
+ pr_info("cpuidle: using governor %s\n", gov->name);
return 0;
}
@@ -109,9 +108,9 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
*/
s64 cpuidle_governor_latency_req(unsigned int cpu)
{
- int global_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
struct device *device = get_cpu_device(cpu);
int device_req = dev_pm_qos_raw_resume_latency(device);
+ int global_req = cpu_latency_qos_limit();
if (device_req > global_req)
device_req = global_req;
diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c
index cb2a96eafc02..1dff3a52917d 100644
--- a/drivers/cpuidle/governors/haltpoll.c
+++ b/drivers/cpuidle/governors/haltpoll.c
@@ -19,6 +19,7 @@
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/kvm_para.h>
+#include <trace/events/power.h>
static unsigned int guest_halt_poll_ns __read_mostly = 200000;
module_param(guest_halt_poll_ns, uint, 0644);
@@ -90,6 +91,7 @@ static void adjust_poll_limit(struct cpuidle_device *dev, u64 block_ns)
if (val > guest_halt_poll_ns)
val = guest_halt_poll_ns;
+ trace_guest_halt_poll_ns_grow(val, dev->poll_limit_ns);
dev->poll_limit_ns = val;
} else if (block_ns > guest_halt_poll_ns &&
guest_halt_poll_allow_shrink) {
@@ -100,6 +102,7 @@ static void adjust_poll_limit(struct cpuidle_device *dev, u64 block_ns)
val = 0;
else
val /= shrink;
+ trace_guest_halt_poll_ns_shrink(val, dev->poll_limit_ns);
dev->poll_limit_ns = val;
}
}
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index b0a7ad566081..c4922684f305 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -34,7 +34,7 @@
* 1) Energy break even point
* 2) Performance impact
* 3) Latency tolerance (from pmqos infrastructure)
- * These these three factors are treated independently.
+ * These three factors are treated independently.
*
* Energy break even point
* -----------------------
@@ -117,7 +117,7 @@ struct menu_device {
int interval_ptr;
};
-static inline int which_bucket(u64 duration_ns, unsigned long nr_iowaiters)
+static inline int which_bucket(u64 duration_ns, unsigned int nr_iowaiters)
{
int bucket = 0;
@@ -150,7 +150,7 @@ static inline int which_bucket(u64 duration_ns, unsigned long nr_iowaiters)
* to be, the higher this multiplier, and thus the higher
* the barrier to go to an expensive C state.
*/
-static inline int performance_multiplier(unsigned long nr_iowaiters)
+static inline int performance_multiplier(unsigned int nr_iowaiters)
{
/* for IO wait tasks (per cpu!) we add 10x each */
return 1 + 10 * nr_iowaiters;
@@ -270,8 +270,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
unsigned int predicted_us;
u64 predicted_ns;
u64 interactivity_req;
- unsigned long nr_iowaiters;
- ktime_t delta_next;
+ unsigned int nr_iowaiters;
+ ktime_t delta, delta_tick;
int i, idx;
if (data->needs_update) {
@@ -280,7 +280,12 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
}
/* determine the expected residency time, round up */
- data->next_timer_ns = tick_nohz_get_sleep_length(&delta_next);
+ delta = tick_nohz_get_sleep_length(&delta_tick);
+ if (unlikely(delta < 0)) {
+ delta = 0;
+ delta_tick = 0;
+ }
+ data->next_timer_ns = delta;
nr_iowaiters = nr_iowait_cpu(dev->cpu);
data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
@@ -318,7 +323,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* state selection.
*/
if (predicted_ns < TICK_NSEC)
- predicted_ns = delta_next;
+ predicted_ns = data->next_timer_ns;
} else {
/*
* Use the performance multiplier and the user-configurable
@@ -377,7 +382,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* stuck in the shallow one for too long.
*/
if (drv->states[idx].target_residency_ns < TICK_NSEC &&
- s->target_residency_ns <= delta_next)
+ s->target_residency_ns <= delta_tick)
idx = i;
return idx;
@@ -399,7 +404,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
predicted_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
*stop_tick = false;
- if (idx > 0 && drv->states[idx].target_residency_ns > delta_next) {
+ if (idx > 0 && drv->states[idx].target_residency_ns > delta_tick) {
/*
* The tick is not going to be stopped and the target
* residency of the state to be returned is not within
@@ -411,7 +416,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
continue;
idx = i;
- if (drv->states[i].target_residency_ns <= delta_next)
+ if (drv->states[i].target_residency_ns <= delta_tick)
break;
}
}
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 6deaaf5f05b5..d9262db79cae 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -2,47 +2,103 @@
/*
* Timer events oriented CPU idle governor
*
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018 - 2021 Intel Corporation
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ */
+
+/**
+ * DOC: teo-description
*
* The idea of this governor is based on the observation that on many systems
* timer events are two or more orders of magnitude more frequent than any
- * other interrupts, so they are likely to be the most significant source of CPU
+ * other interrupts, so they are likely to be the most significant cause of CPU
* wakeups from idle states. Moreover, information about what happened in the
* (relatively recent) past can be used to estimate whether or not the deepest
- * idle state with target residency within the time to the closest timer is
- * likely to be suitable for the upcoming idle time of the CPU and, if not, then
- * which of the shallower idle states to choose.
+ * idle state with target residency within the (known) time till the closest
+ * timer event, referred to as the sleep length, is likely to be suitable for
+ * the upcoming CPU idle period and, if not, then which of the shallower idle
+ * states to choose instead of it.
+ *
+ * Of course, non-timer wakeup sources are more important in some use cases
+ * which can be covered by taking a few most recent idle time intervals of the
+ * CPU into account. However, even in that context it is not necessary to
+ * consider idle duration values greater than the sleep length, because the
+ * closest timer will ultimately wake up the CPU anyway unless it is woken up
+ * earlier.
+ *
+ * Thus this governor estimates whether or not the prospective idle duration of
+ * a CPU is likely to be significantly shorter than the sleep length and selects
+ * an idle state for it accordingly.
+ *
+ * The computations carried out by this governor are based on using bins whose
+ * boundaries are aligned with the target residency parameter values of the CPU
+ * idle states provided by the %CPUIdle driver in the ascending order. That is,
+ * the first bin spans from 0 up to, but not including, the target residency of
+ * the second idle state (idle state 1), the second bin spans from the target
+ * residency of idle state 1 up to, but not including, the target residency of
+ * idle state 2, the third bin spans from the target residency of idle state 2
+ * up to, but not including, the target residency of idle state 3 and so on.
+ * The last bin spans from the target residency of the deepest idle state
+ * supplied by the driver to infinity.
+ *
+ * Two metrics called "hits" and "intercepts" are associated with each bin.
+ * They are updated every time before selecting an idle state for the given CPU
+ * in accordance with what happened last time.
+ *
+ * The "hits" metric reflects the relative frequency of situations in which the
+ * sleep length and the idle duration measured after CPU wakeup fall into the
+ * same bin (that is, the CPU appears to wake up "on time" relative to the sleep
+ * length). In turn, the "intercepts" metric reflects the relative frequency of
+ * situations in which the measured idle duration is so much shorter than the
+ * sleep length that the bin it falls into corresponds to an idle state
+ * shallower than the one whose bin is fallen into by the sleep length (these
+ * situations are referred to as "intercepts" below).
+ *
+ * In addition to the metrics described above, the governor counts recent
+ * intercepts (that is, intercepts that have occurred during the last
+ * %NR_RECENT invocations of it for the given CPU) for each bin.
*
- * Of course, non-timer wakeup sources are more important in some use cases and
- * they can be covered by taking a few most recent idle time intervals of the
- * CPU into account. However, even in that case it is not necessary to consider
- * idle duration values greater than the time till the closest timer, as the
- * patterns that they may belong to produce average values close enough to
- * the time till the closest timer (sleep length) anyway.
+ * In order to select an idle state for a CPU, the governor takes the following
+ * steps (modulo the possible latency constraint that must be taken into account
+ * too):
*
- * Thus this governor estimates whether or not the upcoming idle time of the CPU
- * is likely to be significantly shorter than the sleep length and selects an
- * idle state for it in accordance with that, as follows:
+ * 1. Find the deepest CPU idle state whose target residency does not exceed
+ * the current sleep length (the candidate idle state) and compute 3 sums as
+ * follows:
*
- * - Find an idle state on the basis of the sleep length and state statistics
- * collected over time:
+ * - The sum of the "hits" and "intercepts" metrics for the candidate state
+ * and all of the deeper idle states (it represents the cases in which the
+ * CPU was idle long enough to avoid being intercepted if the sleep length
+ * had been equal to the current one).
*
- * o Find the deepest idle state whose target residency is less than or equal
- * to the sleep length.
+ * - The sum of the "intercepts" metrics for all of the idle states shallower
+ * than the candidate one (it represents the cases in which the CPU was not
+ * idle long enough to avoid being intercepted if the sleep length had been
+ * equal to the current one).
*
- * o Select it if it matched both the sleep length and the observed idle
- * duration in the past more often than it matched the sleep length alone
- * (i.e. the observed idle duration was significantly shorter than the sleep
- * length matched by it).
+ * - The sum of the numbers of recent intercepts for all of the idle states
+ * shallower than the candidate one.
*
- * o Otherwise, select the shallower state with the greatest matched "early"
- * wakeups metric.
+ * 2. If the second sum is greater than the first one or the third sum is
+ * greater than %NR_RECENT / 2, the CPU is likely to wake up early, so look
+ * for an alternative idle state to select.
*
- * - If the majority of the most recent idle duration values are below the
- * target residency of the idle state selected so far, use those values to
- * compute the new expected idle duration and find an idle state matching it
- * (which has to be shallower than the one selected so far).
+ * - Traverse the idle states shallower than the candidate one in the
+ * descending order.
+ *
+ * - For each of them compute the sum of the "intercepts" metrics and the sum
+ * of the numbers of recent intercepts over all of the idle states between
+ * it and the candidate one (including the former and excluding the
+ * latter).
+ *
+ * - If each of these sums that needs to be taken into account (because the
+ * check related to it has indicated that the CPU is likely to wake up
+ * early) is greater than a half of the corresponding sum computed in step
+ * 1 (which means that the target residency of the state in question had
+ * not exceeded the idle duration in over a half of the relevant cases),
+ * select the given idle state instead of the candidate one.
+ *
+ * 3. By default, select the candidate state.
*/
#include <linux/cpuidle.h>
@@ -60,64 +116,51 @@
/*
* Number of the most recent idle duration values to take into consideration for
- * the detection of wakeup patterns.
+ * the detection of recent early wakeup patterns.
*/
-#define INTERVALS 8
+#define NR_RECENT 9
/**
- * struct teo_idle_state - Idle state data used by the TEO cpuidle governor.
- * @early_hits: "Early" CPU wakeups "matching" this state.
- * @hits: "On time" CPU wakeups "matching" this state.
- * @misses: CPU wakeups "missing" this state.
- *
- * A CPU wakeup is "matched" by a given idle state if the idle duration measured
- * after the wakeup is between the target residency of that state and the target
- * residency of the next one (or if this is the deepest available idle state, it
- * "matches" a CPU wakeup when the measured idle duration is at least equal to
- * its target residency).
- *
- * Also, from the TEO governor perspective, a CPU wakeup from idle is "early" if
- * it occurs significantly earlier than the closest expected timer event (that
- * is, early enough to match an idle state shallower than the one matching the
- * time till the closest timer event). Otherwise, the wakeup is "on time", or
- * it is a "hit".
- *
- * A "miss" occurs when the given state doesn't match the wakeup, but it matches
- * the time till the closest timer event used for idle state selection.
+ * struct teo_bin - Metrics used by the TEO cpuidle governor.
+ * @intercepts: The "intercepts" metric.
+ * @hits: The "hits" metric.
+ * @recent: The number of recent "intercepts".
*/
-struct teo_idle_state {
- unsigned int early_hits;
+struct teo_bin {
+ unsigned int intercepts;
unsigned int hits;
- unsigned int misses;
+ unsigned int recent;
};
/**
* struct teo_cpu - CPU data used by the TEO cpuidle governor.
* @time_span_ns: Time between idle state selection and post-wakeup update.
* @sleep_length_ns: Time till the closest timer event (at the selection time).
- * @states: Idle states data corresponding to this CPU.
- * @interval_idx: Index of the most recent saved idle interval.
- * @intervals: Saved idle duration values.
+ * @state_bins: Idle state data bins for this CPU.
+ * @total: Grand total of the "intercepts" and "hits" mertics for all bins.
+ * @next_recent_idx: Index of the next @recent_idx entry to update.
+ * @recent_idx: Indices of bins corresponding to recent "intercepts".
*/
struct teo_cpu {
- u64 time_span_ns;
- u64 sleep_length_ns;
- struct teo_idle_state states[CPUIDLE_STATE_MAX];
- int interval_idx;
- u64 intervals[INTERVALS];
+ s64 time_span_ns;
+ s64 sleep_length_ns;
+ struct teo_bin state_bins[CPUIDLE_STATE_MAX];
+ unsigned int total;
+ int next_recent_idx;
+ int recent_idx[NR_RECENT];
};
static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
/**
- * teo_update - Update CPU data after wakeup.
+ * teo_update - Update CPU metrics after wakeup.
* @drv: cpuidle driver containing state data.
* @dev: Target CPU.
*/
static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
- int i, idx_hit = -1, idx_timer = -1;
+ int i, idx_timer = 0, idx_duration = 0;
u64 measured_ns;
if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
@@ -150,56 +193,52 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
measured_ns /= 2;
}
+ cpu_data->total = 0;
+
/*
- * Decay the "early hits" metric for all of the states and find the
- * states matching the sleep length and the measured idle duration.
+ * Decay the "hits" and "intercepts" metrics for all of the bins and
+ * find the bins that the sleep length and the measured idle duration
+ * fall into.
*/
for (i = 0; i < drv->state_count; i++) {
- unsigned int early_hits = cpu_data->states[i].early_hits;
+ s64 target_residency_ns = drv->states[i].target_residency_ns;
+ struct teo_bin *bin = &cpu_data->state_bins[i];
+
+ bin->hits -= bin->hits >> DECAY_SHIFT;
+ bin->intercepts -= bin->intercepts >> DECAY_SHIFT;
- cpu_data->states[i].early_hits -= early_hits >> DECAY_SHIFT;
+ cpu_data->total += bin->hits + bin->intercepts;
- if (drv->states[i].target_residency_ns <= cpu_data->sleep_length_ns) {
+ if (target_residency_ns <= cpu_data->sleep_length_ns) {
idx_timer = i;
- if (drv->states[i].target_residency_ns <= measured_ns)
- idx_hit = i;
+ if (target_residency_ns <= measured_ns)
+ idx_duration = i;
}
}
- /*
- * Update the "hits" and "misses" data for the state matching the sleep
- * length. If it matches the measured idle duration too, this is a hit,
- * so increase the "hits" metric for it then. Otherwise, this is a
- * miss, so increase the "misses" metric for it. In the latter case
- * also increase the "early hits" metric for the state that actually
- * matches the measured idle duration.
- */
- if (idx_timer >= 0) {
- unsigned int hits = cpu_data->states[idx_timer].hits;
- unsigned int misses = cpu_data->states[idx_timer].misses;
-
- hits -= hits >> DECAY_SHIFT;
- misses -= misses >> DECAY_SHIFT;
-
- if (idx_timer > idx_hit) {
- misses += PULSE;
- if (idx_hit >= 0)
- cpu_data->states[idx_hit].early_hits += PULSE;
- } else {
- hits += PULSE;
- }
+ i = cpu_data->next_recent_idx++;
+ if (cpu_data->next_recent_idx >= NR_RECENT)
+ cpu_data->next_recent_idx = 0;
- cpu_data->states[idx_timer].misses = misses;
- cpu_data->states[idx_timer].hits = hits;
- }
+ if (cpu_data->recent_idx[i] >= 0)
+ cpu_data->state_bins[cpu_data->recent_idx[i]].recent--;
/*
- * Save idle duration values corresponding to non-timer wakeups for
- * pattern detection.
+ * If the measured idle duration falls into the same bin as the sleep
+ * length, this is a "hit", so update the "hits" metric for that bin.
+ * Otherwise, update the "intercepts" metric for the bin fallen into by
+ * the measured idle duration.
*/
- cpu_data->intervals[cpu_data->interval_idx++] = measured_ns;
- if (cpu_data->interval_idx >= INTERVALS)
- cpu_data->interval_idx = 0;
+ if (idx_timer == idx_duration) {
+ cpu_data->state_bins[idx_timer].hits += PULSE;
+ cpu_data->recent_idx[i] = -1;
+ } else {
+ cpu_data->state_bins[idx_duration].intercepts += PULSE;
+ cpu_data->state_bins[idx_duration].recent++;
+ cpu_data->recent_idx[i] = idx_duration;
+ }
+
+ cpu_data->total += PULSE;
}
static bool teo_time_ok(u64 interval_ns)
@@ -207,6 +246,12 @@ static bool teo_time_ok(u64 interval_ns)
return !tick_nohz_tick_stopped() || interval_ns >= TICK_NSEC;
}
+static s64 teo_middle_of_bin(int idx, struct cpuidle_driver *drv)
+{
+ return (drv->states[idx].target_residency_ns +
+ drv->states[idx+1].target_residency_ns) / 2;
+}
+
/**
* teo_find_shallower_state - Find shallower idle state matching given duration.
* @drv: cpuidle driver containing state data.
@@ -216,7 +261,7 @@ static bool teo_time_ok(u64 interval_ns)
*/
static int teo_find_shallower_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int state_idx,
- u64 duration_ns)
+ s64 duration_ns)
{
int i;
@@ -242,10 +287,18 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
{
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
- u64 duration_ns;
- unsigned int hits, misses, early_hits;
- int max_early_idx, prev_max_early_idx, constraint_idx, idx, i;
+ unsigned int idx_intercept_sum = 0;
+ unsigned int intercept_sum = 0;
+ unsigned int idx_recent_sum = 0;
+ unsigned int recent_sum = 0;
+ unsigned int idx_hit_sum = 0;
+ unsigned int hit_sum = 0;
+ int constraint_idx = 0;
+ int idx0 = 0, idx = -1;
+ bool alt_intercepts, alt_recent;
ktime_t delta_tick;
+ s64 duration_ns;
+ int i;
if (dev->last_state_idx >= 0) {
teo_update(drv, dev);
@@ -257,164 +310,150 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
duration_ns = tick_nohz_get_sleep_length(&delta_tick);
cpu_data->sleep_length_ns = duration_ns;
- hits = 0;
- misses = 0;
- early_hits = 0;
- max_early_idx = -1;
- prev_max_early_idx = -1;
- constraint_idx = drv->state_count;
- idx = -1;
+ /* Check if there is any choice in the first place. */
+ if (drv->state_count < 2) {
+ idx = 0;
+ goto end;
+ }
+ if (!dev->states_usage[0].disable) {
+ idx = 0;
+ if (drv->states[1].target_residency_ns > duration_ns)
+ goto end;
+ }
- for (i = 0; i < drv->state_count; i++) {
+ /*
+ * Find the deepest idle state whose target residency does not exceed
+ * the current sleep length and the deepest idle state not deeper than
+ * the former whose exit latency does not exceed the current latency
+ * constraint. Compute the sums of metrics for early wakeup pattern
+ * detection.
+ */
+ for (i = 1; i < drv->state_count; i++) {
+ struct teo_bin *prev_bin = &cpu_data->state_bins[i-1];
struct cpuidle_state *s = &drv->states[i];
- if (dev->states_usage[i].disable) {
- /*
- * Ignore disabled states with target residencies beyond
- * the anticipated idle duration.
- */
- if (s->target_residency_ns > duration_ns)
- continue;
-
- /*
- * This state is disabled, so the range of idle duration
- * values corresponding to it is covered by the current
- * candidate state, but still the "hits" and "misses"
- * metrics of the disabled state need to be used to
- * decide whether or not the state covering the range in
- * question is good enough.
- */
- hits = cpu_data->states[i].hits;
- misses = cpu_data->states[i].misses;
-
- if (early_hits >= cpu_data->states[i].early_hits ||
- idx < 0)
- continue;
-
- /*
- * If the current candidate state has been the one with
- * the maximum "early hits" metric so far, the "early
- * hits" metric of the disabled state replaces the
- * current "early hits" count to avoid selecting a
- * deeper state with lower "early hits" metric.
- */
- if (max_early_idx == idx) {
- early_hits = cpu_data->states[i].early_hits;
- continue;
- }
-
- /*
- * The current candidate state is closer to the disabled
- * one than the current maximum "early hits" state, so
- * replace the latter with it, but in case the maximum
- * "early hits" state index has not been set so far,
- * check if the current candidate state is not too
- * shallow for that role.
- */
- if (teo_time_ok(drv->states[idx].target_residency_ns)) {
- prev_max_early_idx = max_early_idx;
- early_hits = cpu_data->states[i].early_hits;
- max_early_idx = idx;
- }
+ /*
+ * Update the sums of idle state mertics for all of the states
+ * shallower than the current one.
+ */
+ intercept_sum += prev_bin->intercepts;
+ hit_sum += prev_bin->hits;
+ recent_sum += prev_bin->recent;
+ if (dev->states_usage[i].disable)
continue;
- }
if (idx < 0) {
idx = i; /* first enabled state */
- hits = cpu_data->states[i].hits;
- misses = cpu_data->states[i].misses;
+ idx0 = i;
}
if (s->target_residency_ns > duration_ns)
break;
- if (s->exit_latency_ns > latency_req && constraint_idx > i)
+ idx = i;
+
+ if (s->exit_latency_ns <= latency_req)
constraint_idx = i;
- idx = i;
- hits = cpu_data->states[i].hits;
- misses = cpu_data->states[i].misses;
-
- if (early_hits < cpu_data->states[i].early_hits &&
- teo_time_ok(drv->states[i].target_residency_ns)) {
- prev_max_early_idx = max_early_idx;
- early_hits = cpu_data->states[i].early_hits;
- max_early_idx = i;
- }
+ idx_intercept_sum = intercept_sum;
+ idx_hit_sum = hit_sum;
+ idx_recent_sum = recent_sum;
}
- /*
- * If the "hits" metric of the idle state matching the sleep length is
- * greater than its "misses" metric, that is the one to use. Otherwise,
- * it is more likely that one of the shallower states will match the
- * idle duration observed after wakeup, so take the one with the maximum
- * "early hits" metric, but if that cannot be determined, just use the
- * state selected so far.
- */
- if (hits <= misses) {
- /*
- * The current candidate state is not suitable, so take the one
- * whose "early hits" metric is the maximum for the range of
- * shallower states.
- */
- if (idx == max_early_idx)
- max_early_idx = prev_max_early_idx;
-
- if (max_early_idx >= 0) {
- idx = max_early_idx;
- duration_ns = drv->states[idx].target_residency_ns;
- }
+ /* Avoid unnecessary overhead. */
+ if (idx < 0) {
+ idx = 0; /* No states enabled, must use 0. */
+ goto end;
+ } else if (idx == idx0) {
+ goto end;
}
/*
- * If there is a latency constraint, it may be necessary to use a
- * shallower idle state than the one selected so far.
+ * If the sum of the intercepts metric for all of the idle states
+ * shallower than the current candidate one (idx) is greater than the
+ * sum of the intercepts and hits metrics for the candidate state and
+ * all of the deeper states, or the sum of the numbers of recent
+ * intercepts over all of the states shallower than the candidate one
+ * is greater than a half of the number of recent events taken into
+ * account, the CPU is likely to wake up early, so find an alternative
+ * idle state to select.
*/
- if (constraint_idx < idx)
- idx = constraint_idx;
-
- if (idx < 0) {
- idx = 0; /* No states enabled. Must use 0. */
- } else if (idx > 0) {
- unsigned int count = 0;
- u64 sum = 0;
+ alt_intercepts = 2 * idx_intercept_sum > cpu_data->total - idx_hit_sum;
+ alt_recent = idx_recent_sum > NR_RECENT / 2;
+ if (alt_recent || alt_intercepts) {
+ s64 first_suitable_span_ns = duration_ns;
+ int first_suitable_idx = idx;
/*
- * Count and sum the most recent idle duration values less than
- * the current expected idle duration value.
+ * Look for the deepest idle state whose target residency had
+ * not exceeded the idle duration in over a half of the relevant
+ * cases (both with respect to intercepts overall and with
+ * respect to the recent intercepts only) in the past.
+ *
+ * Take the possible latency constraint and duration limitation
+ * present if the tick has been stopped already into account.
*/
- for (i = 0; i < INTERVALS; i++) {
- u64 val = cpu_data->intervals[i];
+ intercept_sum = 0;
+ recent_sum = 0;
+
+ for (i = idx - 1; i >= 0; i--) {
+ struct teo_bin *bin = &cpu_data->state_bins[i];
+ s64 span_ns;
+
+ intercept_sum += bin->intercepts;
+ recent_sum += bin->recent;
+
+ span_ns = teo_middle_of_bin(i, drv);
+
+ if ((!alt_recent || 2 * recent_sum > idx_recent_sum) &&
+ (!alt_intercepts ||
+ 2 * intercept_sum > idx_intercept_sum)) {
+ if (teo_time_ok(span_ns) &&
+ !dev->states_usage[i].disable) {
+ idx = i;
+ duration_ns = span_ns;
+ } else {
+ /*
+ * The current state is too shallow or
+ * disabled, so take the first enabled
+ * deeper state with suitable time span.
+ */
+ idx = first_suitable_idx;
+ duration_ns = first_suitable_span_ns;
+ }
+ break;
+ }
- if (val >= duration_ns)
+ if (dev->states_usage[i].disable)
continue;
- count++;
- sum += val;
- }
+ if (!teo_time_ok(span_ns)) {
+ /*
+ * The current state is too shallow, but if an
+ * alternative candidate state has been found,
+ * it may still turn out to be a better choice.
+ */
+ if (first_suitable_idx != idx)
+ continue;
- /*
- * Give up unless the majority of the most recent idle duration
- * values are in the interesting range.
- */
- if (count > INTERVALS / 2) {
- u64 avg_ns = div64_u64(sum, count);
-
- /*
- * Avoid spending too much time in an idle state that
- * would be too shallow.
- */
- if (teo_time_ok(avg_ns)) {
- duration_ns = avg_ns;
- if (drv->states[idx].target_residency_ns > avg_ns)
- idx = teo_find_shallower_state(drv, dev,
- idx, avg_ns);
+ break;
}
+
+ first_suitable_span_ns = span_ns;
+ first_suitable_idx = i;
}
}
/*
+ * If there is a latency constraint, it may be necessary to select an
+ * idle state shallower than the current candidate one.
+ */
+ if (idx > constraint_idx)
+ idx = constraint_idx;
+
+end:
+ /*
* Don't stop the tick if the selected state is a polling one or if the
* expected idle duration is shorter than the tick period length.
*/
@@ -428,7 +467,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* till the closest timer including the tick, try to correct
* that.
*/
- if (idx > 0 && drv->states[idx].target_residency_ns > delta_tick)
+ if (idx > idx0 &&
+ drv->states[idx].target_residency_ns > delta_tick)
idx = teo_find_shallower_state(drv, dev, idx, delta_tick);
}
@@ -472,8 +512,8 @@ static int teo_enable_device(struct cpuidle_driver *drv,
memset(cpu_data, 0, sizeof(*cpu_data));
- for (i = 0; i < INTERVALS; i++)
- cpu_data->intervals[i] = U64_MAX;
+ for (i = 0; i < NR_RECENT; i++)
+ cpu_data->recent_idx[i] = -1;
return 0;
}
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index cdeedbf02646..2b496a53cbca 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -18,14 +18,6 @@
#include "cpuidle.h"
-static unsigned int sysfs_switch;
-static int __init cpuidle_sysfs_setup(char *unused)
-{
- sysfs_switch = 1;
- return 1;
-}
-__setup("cpuidle_sysfs_switch", cpuidle_sysfs_setup);
-
static ssize_t show_available_governors(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -35,10 +27,10 @@ static ssize_t show_available_governors(struct device *dev,
mutex_lock(&cpuidle_lock);
list_for_each_entry(tmp, &cpuidle_governors, governor_list) {
- if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) -
- CPUIDLE_NAME_LEN - 2))
+ if (i >= (ssize_t) (PAGE_SIZE - (CPUIDLE_NAME_LEN + 2)))
goto out;
- i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name);
+
+ i += scnprintf(&buf[i], CPUIDLE_NAME_LEN + 1, "%s ", tmp->name);
}
out:
@@ -85,58 +77,43 @@ static ssize_t store_current_governor(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
- char gov_name[CPUIDLE_NAME_LEN];
- int ret = -EINVAL;
- size_t len = count;
+ char gov_name[CPUIDLE_NAME_LEN + 1];
+ int ret;
struct cpuidle_governor *gov;
- if (!len || len >= sizeof(gov_name))
+ ret = sscanf(buf, "%" __stringify(CPUIDLE_NAME_LEN) "s", gov_name);
+ if (ret != 1)
return -EINVAL;
- memcpy(gov_name, buf, len);
- gov_name[len] = '\0';
- if (gov_name[len - 1] == '\n')
- gov_name[--len] = '\0';
-
mutex_lock(&cpuidle_lock);
-
+ ret = -EINVAL;
list_for_each_entry(gov, &cpuidle_governors, governor_list) {
- if (strlen(gov->name) == len && !strcmp(gov->name, gov_name)) {
+ if (!strncmp(gov->name, gov_name, CPUIDLE_NAME_LEN)) {
ret = cpuidle_switch_governor(gov);
break;
}
}
-
mutex_unlock(&cpuidle_lock);
- if (ret)
- return ret;
- else
- return count;
+ return ret ? ret : count;
}
-static DEVICE_ATTR(current_driver, 0444, show_current_driver, NULL);
-static DEVICE_ATTR(current_governor_ro, 0444, show_current_governor, NULL);
-
-static struct attribute *cpuidle_default_attrs[] = {
- &dev_attr_current_driver.attr,
- &dev_attr_current_governor_ro.attr,
- NULL
-};
-
static DEVICE_ATTR(available_governors, 0444, show_available_governors, NULL);
+static DEVICE_ATTR(current_driver, 0444, show_current_driver, NULL);
static DEVICE_ATTR(current_governor, 0644, show_current_governor,
- store_current_governor);
+ store_current_governor);
+static DEVICE_ATTR(current_governor_ro, 0444, show_current_governor, NULL);
-static struct attribute *cpuidle_switch_attrs[] = {
+static struct attribute *cpuidle_attrs[] = {
&dev_attr_available_governors.attr,
&dev_attr_current_driver.attr,
&dev_attr_current_governor.attr,
+ &dev_attr_current_governor_ro.attr,
NULL
};
static struct attribute_group cpuidle_attr_group = {
- .attrs = cpuidle_default_attrs,
+ .attrs = cpuidle_attrs,
.name = "cpuidle",
};
@@ -146,9 +123,6 @@ static struct attribute_group cpuidle_attr_group = {
*/
int cpuidle_add_interface(struct device *dev)
{
- if (sysfs_switch)
- cpuidle_attr_group.attrs = cpuidle_switch_attrs;
-
return sysfs_create_group(&dev->kobj, &cpuidle_attr_group);
}
@@ -167,11 +141,6 @@ struct cpuidle_attr {
ssize_t (*store)(struct cpuidle_device *, const char *, size_t count);
};
-#define define_one_ro(_name, show) \
- static struct cpuidle_attr attr_##_name = __ATTR(_name, 0444, show, NULL)
-#define define_one_rw(_name, show, store) \
- static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store)
-
#define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr)
struct cpuidle_device_kobj {
@@ -287,6 +256,7 @@ define_show_state_time_function(exit_latency)
define_show_state_time_function(target_residency)
define_show_state_function(power_usage)
define_show_state_ull_function(usage)
+define_show_state_ull_function(rejected)
define_show_state_str_function(name)
define_show_state_str_function(desc)
define_show_state_ull_function(above)
@@ -343,6 +313,7 @@ define_one_state_ro(latency, show_state_exit_latency);
define_one_state_ro(residency, show_state_target_residency);
define_one_state_ro(power, show_state_power_usage);
define_one_state_ro(usage, show_state_usage);
+define_one_state_ro(rejected, show_state_rejected);
define_one_state_ro(time, show_state_time);
define_one_state_rw(disable, show_state_disable, store_state_disable);
define_one_state_ro(above, show_state_above);
@@ -356,6 +327,7 @@ static struct attribute *cpuidle_state_default_attrs[] = {
&attr_residency.attr,
&attr_power.attr,
&attr_usage.attr,
+ &attr_rejected.attr,
&attr_time.attr,
&attr_disable.attr,
&attr_above.attr,
@@ -363,6 +335,7 @@ static struct attribute *cpuidle_state_default_attrs[] = {
&attr_default_status.attr,
NULL
};
+ATTRIBUTE_GROUPS(cpuidle_state_default);
struct cpuidle_state_kobj {
struct cpuidle_state *state;
@@ -431,12 +404,12 @@ static inline void cpuidle_remove_s2idle_attr_group(struct cpuidle_state_kobj *k
#define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr)
static ssize_t cpuidle_state_show(struct kobject *kobj, struct attribute *attr,
- char * buf)
+ char *buf)
{
int ret = -EIO;
struct cpuidle_state *state = kobj_to_state(kobj);
struct cpuidle_state_usage *state_usage = kobj_to_state_usage(kobj);
- struct cpuidle_state_attr * cattr = attr_to_stateattr(attr);
+ struct cpuidle_state_attr *cattr = attr_to_stateattr(attr);
if (cattr->show)
ret = cattr->show(state, state_usage, buf);
@@ -476,7 +449,7 @@ static void cpuidle_state_sysfs_release(struct kobject *kobj)
static struct kobj_type ktype_state_cpuidle = {
.sysfs_ops = &cpuidle_state_sysfs_ops,
- .default_attrs = cpuidle_state_default_attrs,
+ .default_groups = cpuidle_state_default_groups,
.release = cpuidle_state_sysfs_release,
};
@@ -515,6 +488,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device)
ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle,
&kdev->kobj, "state%d", i);
if (ret) {
+ kobject_put(&kobj->kobj);
kfree(kobj);
goto error_state;
}
@@ -532,7 +506,7 @@ error_state:
}
/**
- * cpuidle_remove_driver_sysfs - removes the cpuidle states sysfs attributes
+ * cpuidle_remove_state_sysfs - removes the cpuidle states sysfs attributes
* @device: the target device
*/
static void cpuidle_remove_state_sysfs(struct cpuidle_device *device)
@@ -618,10 +592,11 @@ static struct attribute *cpuidle_driver_default_attrs[] = {
&attr_driver_name.attr,
NULL
};
+ATTRIBUTE_GROUPS(cpuidle_driver_default);
static struct kobj_type ktype_driver_cpuidle = {
.sysfs_ops = &cpuidle_driver_sysfs_ops,
- .default_attrs = cpuidle_driver_default_attrs,
+ .default_groups = cpuidle_driver_default_groups,
.release = cpuidle_driver_sysfs_release,
};
@@ -646,6 +621,7 @@ static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev)
ret = kobject_init_and_add(&kdrv->kobj, &ktype_driver_cpuidle,
&kdev->kobj, "driver");
if (ret) {
+ kobject_put(&kdrv->kobj);
kfree(kdrv);
return ret;
}
@@ -733,17 +709,18 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev)
if (!kdev)
return -ENOMEM;
kdev->dev = dev;
- dev->kobj_dev = kdev;
init_completion(&kdev->kobj_unregister);
error = kobject_init_and_add(&kdev->kobj, &ktype_cpuidle, &cpu_dev->kobj,
"cpuidle");
if (error) {
+ kobject_put(&kdev->kobj);
kfree(kdev);
return error;
}
+ dev->kobj_dev = kdev;
kobject_uevent(&kdev->kobj, KOBJ_ADD);
return 0;