aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/powercap
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/powercap')
-rw-r--r--drivers/powercap/Kconfig25
-rw-r--r--drivers/powercap/Makefile3
-rw-r--r--drivers/powercap/dtpm.c661
-rw-r--r--drivers/powercap/dtpm_cpu.c297
-rw-r--r--drivers/powercap/dtpm_devfreq.c203
-rw-r--r--drivers/powercap/dtpm_subsys.h22
-rw-r--r--drivers/powercap/idle_inject.c37
-rw-r--r--drivers/powercap/intel_rapl_common.c390
-rw-r--r--drivers/powercap/intel_rapl_msr.c73
-rw-r--r--drivers/powercap/powercap_sys.c9
10 files changed, 1535 insertions, 185 deletions
diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
index dc1c1381d7fa..515e3ceb3393 100644
--- a/drivers/powercap/Kconfig
+++ b/drivers/powercap/Kconfig
@@ -23,14 +23,14 @@ config INTEL_RAPL
tristate "Intel RAPL Support via MSR Interface"
depends on X86 && IOSF_MBI
select INTEL_RAPL_CORE
- ---help---
+ help
This enables support for the Intel Running Average Power Limit (RAPL)
technology via MSR interface, which allows power limits to be enforced
and monitored on modern Intel processors (Sandy Bridge and later).
In RAPL, the platform level settings are divided into domains for
fine grained control. These domains include processor package, DRAM
- controller, CPU core (Power Plance 0), graphics uncore (Power Plane
+ controller, CPU core (Power Plane 0), graphics uncore (Power Plane
1), etc.
config IDLE_INJECT
@@ -43,4 +43,25 @@ config IDLE_INJECT
CPUs for power capping. Idle period can be injected
synchronously on a set of specified CPUs or alternatively
on a per CPU basis.
+
+config DTPM
+ bool "Power capping for Dynamic Thermal Power Management (EXPERIMENTAL)"
+ depends on OF
+ help
+ This enables support for the power capping for the dynamic
+ thermal power management userspace engine.
+
+config DTPM_CPU
+ bool "Add CPU power capping based on the energy model"
+ depends on DTPM && ENERGY_MODEL
+ help
+ This enables support for CPU power limitation based on
+ energy model.
+
+config DTPM_DEVFREQ
+ bool "Add device power capping based on the energy model"
+ depends on DTPM && ENERGY_MODEL
+ help
+ This enables support for device power limitation based on
+ energy model.
endif
diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile
index 7255c94ec61c..494617cdad88 100644
--- a/drivers/powercap/Makefile
+++ b/drivers/powercap/Makefile
@@ -1,4 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_DTPM) += dtpm.o
+obj-$(CONFIG_DTPM_CPU) += dtpm_cpu.o
+obj-$(CONFIG_DTPM_DEVFREQ) += dtpm_devfreq.o
obj-$(CONFIG_POWERCAP) += powercap_sys.o
obj-$(CONFIG_INTEL_RAPL_CORE) += intel_rapl_common.o
obj-$(CONFIG_INTEL_RAPL) += intel_rapl_msr.o
diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
new file mode 100644
index 000000000000..ce920f17f45f
--- /dev/null
+++ b/drivers/powercap/dtpm.c
@@ -0,0 +1,661 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2020 Linaro Limited
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * The powercap based Dynamic Thermal Power Management framework
+ * provides to the userspace a consistent API to set the power limit
+ * on some devices.
+ *
+ * DTPM defines the functions to create a tree of constraints. Each
+ * parent node is a virtual description of the aggregation of the
+ * children. It propagates the constraints set at its level to its
+ * children and collect the children power information. The leaves of
+ * the tree are the real devices which have the ability to get their
+ * current power consumption and set their power limit.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/dtpm.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/powercap.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+
+#include "dtpm_subsys.h"
+
+#define DTPM_POWER_LIMIT_FLAG 0
+
+static const char *constraint_name[] = {
+ "Instantaneous",
+};
+
+static DEFINE_MUTEX(dtpm_lock);
+static struct powercap_control_type *pct;
+static struct dtpm *root;
+
+static int get_time_window_us(struct powercap_zone *pcz, int cid, u64 *window)
+{
+ return -ENOSYS;
+}
+
+static int set_time_window_us(struct powercap_zone *pcz, int cid, u64 window)
+{
+ return -ENOSYS;
+}
+
+static int get_max_power_range_uw(struct powercap_zone *pcz, u64 *max_power_uw)
+{
+ struct dtpm *dtpm = to_dtpm(pcz);
+
+ *max_power_uw = dtpm->power_max - dtpm->power_min;
+
+ return 0;
+}
+
+static int __get_power_uw(struct dtpm *dtpm, u64 *power_uw)
+{
+ struct dtpm *child;
+ u64 power;
+ int ret = 0;
+
+ if (dtpm->ops) {
+ *power_uw = dtpm->ops->get_power_uw(dtpm);
+ return 0;
+ }
+
+ *power_uw = 0;
+
+ list_for_each_entry(child, &dtpm->children, sibling) {
+ ret = __get_power_uw(child, &power);
+ if (ret)
+ break;
+ *power_uw += power;
+ }
+
+ return ret;
+}
+
+static int get_power_uw(struct powercap_zone *pcz, u64 *power_uw)
+{
+ return __get_power_uw(to_dtpm(pcz), power_uw);
+}
+
+static void __dtpm_rebalance_weight(struct dtpm *dtpm)
+{
+ struct dtpm *child;
+
+ list_for_each_entry(child, &dtpm->children, sibling) {
+
+ pr_debug("Setting weight '%d' for '%s'\n",
+ child->weight, child->zone.name);
+
+ child->weight = DIV64_U64_ROUND_CLOSEST(
+ child->power_max * 1024, dtpm->power_max);
+
+ __dtpm_rebalance_weight(child);
+ }
+}
+
+static void __dtpm_sub_power(struct dtpm *dtpm)
+{
+ struct dtpm *parent = dtpm->parent;
+
+ while (parent) {
+ parent->power_min -= dtpm->power_min;
+ parent->power_max -= dtpm->power_max;
+ parent->power_limit -= dtpm->power_limit;
+ parent = parent->parent;
+ }
+}
+
+static void __dtpm_add_power(struct dtpm *dtpm)
+{
+ struct dtpm *parent = dtpm->parent;
+
+ while (parent) {
+ parent->power_min += dtpm->power_min;
+ parent->power_max += dtpm->power_max;
+ parent->power_limit += dtpm->power_limit;
+ parent = parent->parent;
+ }
+}
+
+/**
+ * dtpm_update_power - Update the power on the dtpm
+ * @dtpm: a pointer to a dtpm structure to update
+ *
+ * Function to update the power values of the dtpm node specified in
+ * parameter. These new values will be propagated to the tree.
+ *
+ * Return: zero on success, -EINVAL if the values are inconsistent
+ */
+int dtpm_update_power(struct dtpm *dtpm)
+{
+ int ret;
+
+ __dtpm_sub_power(dtpm);
+
+ ret = dtpm->ops->update_power_uw(dtpm);
+ if (ret)
+ pr_err("Failed to update power for '%s': %d\n",
+ dtpm->zone.name, ret);
+
+ if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags))
+ dtpm->power_limit = dtpm->power_max;
+
+ __dtpm_add_power(dtpm);
+
+ if (root)
+ __dtpm_rebalance_weight(root);
+
+ return ret;
+}
+
+/**
+ * dtpm_release_zone - Cleanup when the node is released
+ * @pcz: a pointer to a powercap_zone structure
+ *
+ * Do some housecleaning and update the weight on the tree. The
+ * release will be denied if the node has children. This function must
+ * be called by the specific release callback of the different
+ * backends.
+ *
+ * Return: 0 on success, -EBUSY if there are children
+ */
+int dtpm_release_zone(struct powercap_zone *pcz)
+{
+ struct dtpm *dtpm = to_dtpm(pcz);
+ struct dtpm *parent = dtpm->parent;
+
+ if (!list_empty(&dtpm->children))
+ return -EBUSY;
+
+ if (parent)
+ list_del(&dtpm->sibling);
+
+ __dtpm_sub_power(dtpm);
+
+ if (dtpm->ops)
+ dtpm->ops->release(dtpm);
+ else
+ kfree(dtpm);
+
+ return 0;
+}
+
+static int get_power_limit_uw(struct powercap_zone *pcz,
+ int cid, u64 *power_limit)
+{
+ *power_limit = to_dtpm(pcz)->power_limit;
+
+ return 0;
+}
+
+/*
+ * Set the power limit on the nodes, the power limit is distributed
+ * given the weight of the children.
+ *
+ * The dtpm node lock must be held when calling this function.
+ */
+static int __set_power_limit_uw(struct dtpm *dtpm, int cid, u64 power_limit)
+{
+ struct dtpm *child;
+ int ret = 0;
+ u64 power;
+
+ /*
+ * A max power limitation means we remove the power limit,
+ * otherwise we set a constraint and flag the dtpm node.
+ */
+ if (power_limit == dtpm->power_max) {
+ clear_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags);
+ } else {
+ set_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags);
+ }
+
+ pr_debug("Setting power limit for '%s': %llu uW\n",
+ dtpm->zone.name, power_limit);
+
+ /*
+ * Only leaves of the dtpm tree has ops to get/set the power
+ */
+ if (dtpm->ops) {
+ dtpm->power_limit = dtpm->ops->set_power_uw(dtpm, power_limit);
+ } else {
+ dtpm->power_limit = 0;
+
+ list_for_each_entry(child, &dtpm->children, sibling) {
+
+ /*
+ * Integer division rounding will inevitably
+ * lead to a different min or max value when
+ * set several times. In order to restore the
+ * initial value, we force the child's min or
+ * max power every time if the constraint is
+ * at the boundaries.
+ */
+ if (power_limit == dtpm->power_max) {
+ power = child->power_max;
+ } else if (power_limit == dtpm->power_min) {
+ power = child->power_min;
+ } else {
+ power = DIV_ROUND_CLOSEST_ULL(
+ power_limit * child->weight, 1024);
+ }
+
+ pr_debug("Setting power limit for '%s': %llu uW\n",
+ child->zone.name, power);
+
+ ret = __set_power_limit_uw(child, cid, power);
+ if (!ret)
+ ret = get_power_limit_uw(&child->zone, cid, &power);
+
+ if (ret)
+ break;
+
+ dtpm->power_limit += power;
+ }
+ }
+
+ return ret;
+}
+
+static int set_power_limit_uw(struct powercap_zone *pcz,
+ int cid, u64 power_limit)
+{
+ struct dtpm *dtpm = to_dtpm(pcz);
+ int ret;
+
+ /*
+ * Don't allow values outside of the power range previously
+ * set when initializing the power numbers.
+ */
+ power_limit = clamp_val(power_limit, dtpm->power_min, dtpm->power_max);
+
+ ret = __set_power_limit_uw(dtpm, cid, power_limit);
+
+ pr_debug("%s: power limit: %llu uW, power max: %llu uW\n",
+ dtpm->zone.name, dtpm->power_limit, dtpm->power_max);
+
+ return ret;
+}
+
+static const char *get_constraint_name(struct powercap_zone *pcz, int cid)
+{
+ return constraint_name[cid];
+}
+
+static int get_max_power_uw(struct powercap_zone *pcz, int id, u64 *max_power)
+{
+ *max_power = to_dtpm(pcz)->power_max;
+
+ return 0;
+}
+
+static struct powercap_zone_constraint_ops constraint_ops = {
+ .set_power_limit_uw = set_power_limit_uw,
+ .get_power_limit_uw = get_power_limit_uw,
+ .set_time_window_us = set_time_window_us,
+ .get_time_window_us = get_time_window_us,
+ .get_max_power_uw = get_max_power_uw,
+ .get_name = get_constraint_name,
+};
+
+static struct powercap_zone_ops zone_ops = {
+ .get_max_power_range_uw = get_max_power_range_uw,
+ .get_power_uw = get_power_uw,
+ .release = dtpm_release_zone,
+};
+
+/**
+ * dtpm_init - Allocate and initialize a dtpm struct
+ * @dtpm: The dtpm struct pointer to be initialized
+ * @ops: The dtpm device specific ops, NULL for a virtual node
+ */
+void dtpm_init(struct dtpm *dtpm, struct dtpm_ops *ops)
+{
+ if (dtpm) {
+ INIT_LIST_HEAD(&dtpm->children);
+ INIT_LIST_HEAD(&dtpm->sibling);
+ dtpm->weight = 1024;
+ dtpm->ops = ops;
+ }
+}
+
+/**
+ * dtpm_unregister - Unregister a dtpm node from the hierarchy tree
+ * @dtpm: a pointer to a dtpm structure corresponding to the node to be removed
+ *
+ * Call the underlying powercap unregister function. That will call
+ * the release callback of the powercap zone.
+ */
+void dtpm_unregister(struct dtpm *dtpm)
+{
+ powercap_unregister_zone(pct, &dtpm->zone);
+
+ pr_debug("Unregistered dtpm node '%s'\n", dtpm->zone.name);
+}
+
+/**
+ * dtpm_register - Register a dtpm node in the hierarchy tree
+ * @name: a string specifying the name of the node
+ * @dtpm: a pointer to a dtpm structure corresponding to the new node
+ * @parent: a pointer to a dtpm structure corresponding to the parent node
+ *
+ * Create a dtpm node in the tree. If no parent is specified, the node
+ * is the root node of the hierarchy. If the root node already exists,
+ * then the registration will fail. The powercap controller must be
+ * initialized before calling this function.
+ *
+ * The dtpm structure must be initialized with the power numbers
+ * before calling this function.
+ *
+ * Return: zero on success, a negative value in case of error:
+ * -EAGAIN: the function is called before the framework is initialized.
+ * -EBUSY: the root node is already inserted
+ * -EINVAL: * there is no root node yet and @parent is specified
+ * * no all ops are defined
+ * * parent have ops which are reserved for leaves
+ * Other negative values are reported back from the powercap framework
+ */
+int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
+{
+ struct powercap_zone *pcz;
+
+ if (!pct)
+ return -EAGAIN;
+
+ if (root && !parent)
+ return -EBUSY;
+
+ if (!root && parent)
+ return -EINVAL;
+
+ if (parent && parent->ops)
+ return -EINVAL;
+
+ if (!dtpm)
+ return -EINVAL;
+
+ if (dtpm->ops && !(dtpm->ops->set_power_uw &&
+ dtpm->ops->get_power_uw &&
+ dtpm->ops->update_power_uw &&
+ dtpm->ops->release))
+ return -EINVAL;
+
+ pcz = powercap_register_zone(&dtpm->zone, pct, name,
+ parent ? &parent->zone : NULL,
+ &zone_ops, MAX_DTPM_CONSTRAINTS,
+ &constraint_ops);
+ if (IS_ERR(pcz))
+ return PTR_ERR(pcz);
+
+ if (parent) {
+ list_add_tail(&dtpm->sibling, &parent->children);
+ dtpm->parent = parent;
+ } else {
+ root = dtpm;
+ }
+
+ if (dtpm->ops && !dtpm->ops->update_power_uw(dtpm)) {
+ __dtpm_add_power(dtpm);
+ dtpm->power_limit = dtpm->power_max;
+ }
+
+ pr_debug("Registered dtpm node '%s' / %llu-%llu uW, \n",
+ dtpm->zone.name, dtpm->power_min, dtpm->power_max);
+
+ return 0;
+}
+
+static struct dtpm *dtpm_setup_virtual(const struct dtpm_node *hierarchy,
+ struct dtpm *parent)
+{
+ struct dtpm *dtpm;
+ int ret;
+
+ dtpm = kzalloc(sizeof(*dtpm), GFP_KERNEL);
+ if (!dtpm)
+ return ERR_PTR(-ENOMEM);
+ dtpm_init(dtpm, NULL);
+
+ ret = dtpm_register(hierarchy->name, dtpm, parent);
+ if (ret) {
+ pr_err("Failed to register dtpm node '%s': %d\n",
+ hierarchy->name, ret);
+ kfree(dtpm);
+ return ERR_PTR(ret);
+ }
+
+ return dtpm;
+}
+
+static struct dtpm *dtpm_setup_dt(const struct dtpm_node *hierarchy,
+ struct dtpm *parent)
+{
+ struct device_node *np;
+ int i, ret;
+
+ np = of_find_node_by_path(hierarchy->name);
+ if (!np) {
+ pr_err("Failed to find '%s'\n", hierarchy->name);
+ return ERR_PTR(-ENXIO);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) {
+
+ if (!dtpm_subsys[i]->setup)
+ continue;
+
+ ret = dtpm_subsys[i]->setup(parent, np);
+ if (ret) {
+ pr_err("Failed to setup '%s': %d\n", dtpm_subsys[i]->name, ret);
+ of_node_put(np);
+ return ERR_PTR(ret);
+ }
+ }
+
+ of_node_put(np);
+
+ /*
+ * By returning a NULL pointer, we let know the caller there
+ * is no child for us as we are a leaf of the tree
+ */
+ return NULL;
+}
+
+typedef struct dtpm * (*dtpm_node_callback_t)(const struct dtpm_node *, struct dtpm *);
+
+static dtpm_node_callback_t dtpm_node_callback[] = {
+ [DTPM_NODE_VIRTUAL] = dtpm_setup_virtual,
+ [DTPM_NODE_DT] = dtpm_setup_dt,
+};
+
+static int dtpm_for_each_child(const struct dtpm_node *hierarchy,
+ const struct dtpm_node *it, struct dtpm *parent)
+{
+ struct dtpm *dtpm;
+ int i, ret;
+
+ for (i = 0; hierarchy[i].name; i++) {
+
+ if (hierarchy[i].parent != it)
+ continue;
+
+ dtpm = dtpm_node_callback[hierarchy[i].type](&hierarchy[i], parent);
+
+ /*
+ * A NULL pointer means there is no children, hence we
+ * continue without going deeper in the recursivity.
+ */
+ if (!dtpm)
+ continue;
+
+ /*
+ * There are multiple reasons why the callback could
+ * fail. The generic glue is abstracting the backend
+ * and therefore it is not possible to report back or
+ * take a decision based on the error. In any case,
+ * if this call fails, it is not critical in the
+ * hierarchy creation, we can assume the underlying
+ * service is not found, so we continue without this
+ * branch in the tree but with a warning to log the
+ * information the node was not created.
+ */
+ if (IS_ERR(dtpm)) {
+ pr_warn("Failed to create '%s' in the hierarchy\n",
+ hierarchy[i].name);
+ continue;
+ }
+
+ ret = dtpm_for_each_child(hierarchy, &hierarchy[i], dtpm);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * dtpm_create_hierarchy - Create the dtpm hierarchy
+ * @hierarchy: An array of struct dtpm_node describing the hierarchy
+ *
+ * The function is called by the platform specific code with the
+ * description of the different node in the hierarchy. It creates the
+ * tree in the sysfs filesystem under the powercap dtpm entry.
+ *
+ * The expected tree has the format:
+ *
+ * struct dtpm_node hierarchy[] = {
+ * [0] { .name = "topmost", type = DTPM_NODE_VIRTUAL },
+ * [1] { .name = "package", .type = DTPM_NODE_VIRTUAL, .parent = &hierarchy[0] },
+ * [2] { .name = "/cpus/cpu0", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ * [3] { .name = "/cpus/cpu1", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ * [4] { .name = "/cpus/cpu2", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ * [5] { .name = "/cpus/cpu3", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ * [6] { }
+ * };
+ *
+ * The last element is always an empty one and marks the end of the
+ * array.
+ *
+ * Return: zero on success, a negative value in case of error. Errors
+ * are reported back from the underlying functions.
+ */
+int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table)
+{
+ const struct of_device_id *match;
+ const struct dtpm_node *hierarchy;
+ struct device_node *np;
+ int i, ret;
+
+ mutex_lock(&dtpm_lock);
+
+ if (pct) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
+ pct = powercap_register_control_type(NULL, "dtpm", NULL);
+ if (IS_ERR(pct)) {
+ pr_err("Failed to register control type\n");
+ ret = PTR_ERR(pct);
+ goto out_pct;
+ }
+
+ ret = -ENODEV;
+ np = of_find_node_by_path("/");
+ if (!np)
+ goto out_err;
+
+ match = of_match_node(dtpm_match_table, np);
+
+ of_node_put(np);
+
+ if (!match)
+ goto out_err;
+
+ hierarchy = match->data;
+ if (!hierarchy) {
+ ret = -EFAULT;
+ goto out_err;
+ }
+
+ ret = dtpm_for_each_child(hierarchy, NULL, NULL);
+ if (ret)
+ goto out_err;
+
+ for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) {
+
+ if (!dtpm_subsys[i]->init)
+ continue;
+
+ ret = dtpm_subsys[i]->init();
+ if (ret)
+ pr_info("Failed to initialize '%s': %d",
+ dtpm_subsys[i]->name, ret);
+ }
+
+ mutex_unlock(&dtpm_lock);
+
+ return 0;
+
+out_err:
+ powercap_unregister_control_type(pct);
+out_pct:
+ pct = NULL;
+out_unlock:
+ mutex_unlock(&dtpm_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dtpm_create_hierarchy);
+
+static void __dtpm_destroy_hierarchy(struct dtpm *dtpm)
+{
+ struct dtpm *child, *aux;
+
+ list_for_each_entry_safe(child, aux, &dtpm->children, sibling)
+ __dtpm_destroy_hierarchy(child);
+
+ /*
+ * At this point, we know all children were removed from the
+ * recursive call before
+ */
+ dtpm_unregister(dtpm);
+}
+
+void dtpm_destroy_hierarchy(void)
+{
+ int i;
+
+ mutex_lock(&dtpm_lock);
+
+ if (!pct)
+ goto out_unlock;
+
+ __dtpm_destroy_hierarchy(root);
+
+
+ for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) {
+
+ if (!dtpm_subsys[i]->exit)
+ continue;
+
+ dtpm_subsys[i]->exit();
+ }
+
+ powercap_unregister_control_type(pct);
+
+ pct = NULL;
+
+ root = NULL;
+
+out_unlock:
+ mutex_unlock(&dtpm_lock);
+}
+EXPORT_SYMBOL_GPL(dtpm_destroy_hierarchy);
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
new file mode 100644
index 000000000000..2ff7717530bf
--- /dev/null
+++ b/drivers/powercap/dtpm_cpu.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2020 Linaro Limited
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * The DTPM CPU is based on the energy model. It hooks the CPU in the
+ * DTPM tree which in turns update the power number by propagating the
+ * power number from the CPU energy model information to the parents.
+ *
+ * The association between the power and the performance state, allows
+ * to set the power of the CPU at the OPP granularity.
+ *
+ * The CPU hotplug is supported and the power numbers will be updated
+ * if a CPU is hot plugged / unplugged.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cpumask.h>
+#include <linux/cpufreq.h>
+#include <linux/cpuhotplug.h>
+#include <linux/dtpm.h>
+#include <linux/energy_model.h>
+#include <linux/of.h>
+#include <linux/pm_qos.h>
+#include <linux/slab.h>
+#include <linux/units.h>
+
+struct dtpm_cpu {
+ struct dtpm dtpm;
+ struct freq_qos_request qos_req;
+ int cpu;
+};
+
+static DEFINE_PER_CPU(struct dtpm_cpu *, dtpm_per_cpu);
+
+static struct dtpm_cpu *to_dtpm_cpu(struct dtpm *dtpm)
+{
+ return container_of(dtpm, struct dtpm_cpu, dtpm);
+}
+
+static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
+{
+ struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
+ struct em_perf_domain *pd = em_cpu_get(dtpm_cpu->cpu);
+ struct cpumask cpus;
+ unsigned long freq;
+ u64 power;
+ int i, nr_cpus;
+
+ cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
+ nr_cpus = cpumask_weight(&cpus);
+
+ for (i = 0; i < pd->nr_perf_states; i++) {
+
+ power = pd->table[i].power * nr_cpus;
+
+ if (power > power_limit)
+ break;
+ }
+
+ freq = pd->table[i - 1].frequency;
+
+ freq_qos_update_request(&dtpm_cpu->qos_req, freq);
+
+ power_limit = pd->table[i - 1].power * nr_cpus;
+
+ return power_limit;
+}
+
+static u64 scale_pd_power_uw(struct cpumask *pd_mask, u64 power)
+{
+ unsigned long max, sum_util = 0;
+ int cpu;
+
+ /*
+ * The capacity is the same for all CPUs belonging to
+ * the same perf domain.
+ */
+ max = arch_scale_cpu_capacity(cpumask_first(pd_mask));
+
+ for_each_cpu_and(cpu, pd_mask, cpu_online_mask)
+ sum_util += sched_cpu_util(cpu);
+
+ return (power * ((sum_util << 10) / max)) >> 10;
+}
+
+static u64 get_pd_power_uw(struct dtpm *dtpm)
+{
+ struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
+ struct em_perf_domain *pd;
+ struct cpumask *pd_mask;
+ unsigned long freq;
+ int i;
+
+ pd = em_cpu_get(dtpm_cpu->cpu);
+
+ pd_mask = em_span_cpus(pd);
+
+ freq = cpufreq_quick_get(dtpm_cpu->cpu);
+
+ for (i = 0; i < pd->nr_perf_states; i++) {
+
+ if (pd->table[i].frequency < freq)
+ continue;
+
+ return scale_pd_power_uw(pd_mask, pd->table[i].power *
+ MICROWATT_PER_MILLIWATT);
+ }
+
+ return 0;
+}
+
+static int update_pd_power_uw(struct dtpm *dtpm)
+{
+ struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
+ struct em_perf_domain *em = em_cpu_get(dtpm_cpu->cpu);
+ struct cpumask cpus;
+ int nr_cpus;
+
+ cpumask_and(&cpus, cpu_online_mask, to_cpumask(em->cpus));
+ nr_cpus = cpumask_weight(&cpus);
+
+ dtpm->power_min = em->table[0].power;
+ dtpm->power_min *= MICROWATT_PER_MILLIWATT;
+ dtpm->power_min *= nr_cpus;
+
+ dtpm->power_max = em->table[em->nr_perf_states - 1].power;
+ dtpm->power_max *= MICROWATT_PER_MILLIWATT;
+ dtpm->power_max *= nr_cpus;
+
+ return 0;
+}
+
+static void pd_release(struct dtpm *dtpm)
+{
+ struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
+ struct cpufreq_policy *policy;
+
+ if (freq_qos_request_active(&dtpm_cpu->qos_req))
+ freq_qos_remove_request(&dtpm_cpu->qos_req);
+
+ policy = cpufreq_cpu_get(dtpm_cpu->cpu);
+ if (policy) {
+ for_each_cpu(dtpm_cpu->cpu, policy->related_cpus)
+ per_cpu(dtpm_per_cpu, dtpm_cpu->cpu) = NULL;
+ }
+
+ kfree(dtpm_cpu);
+}
+
+static struct dtpm_ops dtpm_ops = {
+ .set_power_uw = set_pd_power_limit,
+ .get_power_uw = get_pd_power_uw,
+ .update_power_uw = update_pd_power_uw,
+ .release = pd_release,
+};
+
+static int cpuhp_dtpm_cpu_offline(unsigned int cpu)
+{
+ struct dtpm_cpu *dtpm_cpu;
+
+ dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
+ if (dtpm_cpu)
+ dtpm_update_power(&dtpm_cpu->dtpm);
+
+ return 0;
+}
+
+static int cpuhp_dtpm_cpu_online(unsigned int cpu)
+{
+ struct dtpm_cpu *dtpm_cpu;
+
+ dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
+ if (dtpm_cpu)
+ return dtpm_update_power(&dtpm_cpu->dtpm);
+
+ return 0;
+}
+
+static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
+{
+ struct dtpm_cpu *dtpm_cpu;
+ struct cpufreq_policy *policy;
+ struct em_perf_domain *pd;
+ char name[CPUFREQ_NAME_LEN];
+ int ret = -ENOMEM;
+
+ dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
+ if (dtpm_cpu)
+ return 0;
+
+ policy = cpufreq_cpu_get(cpu);
+ if (!policy)
+ return 0;
+
+ pd = em_cpu_get(cpu);
+ if (!pd || em_is_artificial(pd))
+ return -EINVAL;
+
+ dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);
+ if (!dtpm_cpu)
+ return -ENOMEM;
+
+ dtpm_init(&dtpm_cpu->dtpm, &dtpm_ops);
+ dtpm_cpu->cpu = cpu;
+
+ for_each_cpu(cpu, policy->related_cpus)
+ per_cpu(dtpm_per_cpu, cpu) = dtpm_cpu;
+
+ snprintf(name, sizeof(name), "cpu%d-cpufreq", dtpm_cpu->cpu);
+
+ ret = dtpm_register(name, &dtpm_cpu->dtpm, parent);
+ if (ret)
+ goto out_kfree_dtpm_cpu;
+
+ ret = freq_qos_add_request(&policy->constraints,
+ &dtpm_cpu->qos_req, FREQ_QOS_MAX,
+ pd->table[pd->nr_perf_states - 1].frequency);
+ if (ret)
+ goto out_dtpm_unregister;
+
+ return 0;
+
+out_dtpm_unregister:
+ dtpm_unregister(&dtpm_cpu->dtpm);
+ dtpm_cpu = NULL;
+
+out_kfree_dtpm_cpu:
+ for_each_cpu(cpu, policy->related_cpus)
+ per_cpu(dtpm_per_cpu, cpu) = NULL;
+ kfree(dtpm_cpu);
+
+ return ret;
+}
+
+static int dtpm_cpu_setup(struct dtpm *dtpm, struct device_node *np)
+{
+ int cpu;
+
+ cpu = of_cpu_node_to_id(np);
+ if (cpu < 0)
+ return 0;
+
+ return __dtpm_cpu_setup(cpu, dtpm);
+}
+
+static int dtpm_cpu_init(void)
+{
+ int ret;
+
+ /*
+ * The callbacks at CPU hotplug time are calling
+ * dtpm_update_power() which in turns calls update_pd_power().
+ *
+ * The function update_pd_power() uses the online mask to
+ * figure out the power consumption limits.
+ *
+ * At CPUHP_AP_ONLINE_DYN, the CPU is present in the CPU
+ * online mask when the cpuhp_dtpm_cpu_online function is
+ * called, but the CPU is still in the online mask for the
+ * tear down callback. So the power can not be updated when
+ * the CPU is unplugged.
+ *
+ * At CPUHP_AP_DTPM_CPU_DEAD, the situation is the opposite as
+ * above. The CPU online mask is not up to date when the CPU
+ * is plugged in.
+ *
+ * For this reason, we need to call the online and offline
+ * callbacks at different moments when the CPU online mask is
+ * consistent with the power numbers we want to update.
+ */
+ ret = cpuhp_setup_state(CPUHP_AP_DTPM_CPU_DEAD, "dtpm_cpu:offline",
+ NULL, cpuhp_dtpm_cpu_offline);
+ if (ret < 0)
+ return ret;
+
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dtpm_cpu:online",
+ cpuhp_dtpm_cpu_online, NULL);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static void dtpm_cpu_exit(void)
+{
+ cpuhp_remove_state_nocalls(CPUHP_AP_ONLINE_DYN);
+ cpuhp_remove_state_nocalls(CPUHP_AP_DTPM_CPU_DEAD);
+}
+
+struct dtpm_subsys_ops dtpm_cpu_ops = {
+ .name = KBUILD_MODNAME,
+ .init = dtpm_cpu_init,
+ .exit = dtpm_cpu_exit,
+ .setup = dtpm_cpu_setup,
+};
diff --git a/drivers/powercap/dtpm_devfreq.c b/drivers/powercap/dtpm_devfreq.c
new file mode 100644
index 000000000000..91276761a31d
--- /dev/null
+++ b/drivers/powercap/dtpm_devfreq.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2021 Linaro Limited
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * The devfreq device combined with the energy model and the load can
+ * give an estimation of the power consumption as well as limiting the
+ * power.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cpumask.h>
+#include <linux/devfreq.h>
+#include <linux/dtpm.h>
+#include <linux/energy_model.h>
+#include <linux/of.h>
+#include <linux/pm_qos.h>
+#include <linux/slab.h>
+#include <linux/units.h>
+
+struct dtpm_devfreq {
+ struct dtpm dtpm;
+ struct dev_pm_qos_request qos_req;
+ struct devfreq *devfreq;
+};
+
+static struct dtpm_devfreq *to_dtpm_devfreq(struct dtpm *dtpm)
+{
+ return container_of(dtpm, struct dtpm_devfreq, dtpm);
+}
+
+static int update_pd_power_uw(struct dtpm *dtpm)
+{
+ struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm);
+ struct devfreq *devfreq = dtpm_devfreq->devfreq;
+ struct device *dev = devfreq->dev.parent;
+ struct em_perf_domain *pd = em_pd_get(dev);
+
+ dtpm->power_min = pd->table[0].power;
+ dtpm->power_min *= MICROWATT_PER_MILLIWATT;
+
+ dtpm->power_max = pd->table[pd->nr_perf_states - 1].power;
+ dtpm->power_max *= MICROWATT_PER_MILLIWATT;
+
+ return 0;
+}
+
+static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
+{
+ struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm);
+ struct devfreq *devfreq = dtpm_devfreq->devfreq;
+ struct device *dev = devfreq->dev.parent;
+ struct em_perf_domain *pd = em_pd_get(dev);
+ unsigned long freq;
+ u64 power;
+ int i;
+
+ for (i = 0; i < pd->nr_perf_states; i++) {
+
+ power = pd->table[i].power * MICROWATT_PER_MILLIWATT;
+ if (power > power_limit)
+ break;
+ }
+
+ freq = pd->table[i - 1].frequency;
+
+ dev_pm_qos_update_request(&dtpm_devfreq->qos_req, freq);
+
+ power_limit = pd->table[i - 1].power * MICROWATT_PER_MILLIWATT;
+
+ return power_limit;
+}
+
+static void _normalize_load(struct devfreq_dev_status *status)
+{
+ if (status->total_time > 0xfffff) {
+ status->total_time >>= 10;
+ status->busy_time >>= 10;
+ }
+
+ status->busy_time <<= 10;
+ status->busy_time /= status->total_time ? : 1;
+
+ status->busy_time = status->busy_time ? : 1;
+ status->total_time = 1024;
+}
+
+static u64 get_pd_power_uw(struct dtpm *dtpm)
+{
+ struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm);
+ struct devfreq *devfreq = dtpm_devfreq->devfreq;
+ struct device *dev = devfreq->dev.parent;
+ struct em_perf_domain *pd = em_pd_get(dev);
+ struct devfreq_dev_status status;
+ unsigned long freq;
+ u64 power;
+ int i;
+
+ mutex_lock(&devfreq->lock);
+ status = devfreq->last_status;
+ mutex_unlock(&devfreq->lock);
+
+ freq = DIV_ROUND_UP(status.current_frequency, HZ_PER_KHZ);
+ _normalize_load(&status);
+
+ for (i = 0; i < pd->nr_perf_states; i++) {
+
+ if (pd->table[i].frequency < freq)
+ continue;
+
+ power = pd->table[i].power * MICROWATT_PER_MILLIWATT;
+ power *= status.busy_time;
+ power >>= 10;
+
+ return power;
+ }
+
+ return 0;
+}
+
+static void pd_release(struct dtpm *dtpm)
+{
+ struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm);
+
+ if (dev_pm_qos_request_active(&dtpm_devfreq->qos_req))
+ dev_pm_qos_remove_request(&dtpm_devfreq->qos_req);
+
+ kfree(dtpm_devfreq);
+}
+
+static struct dtpm_ops dtpm_ops = {
+ .set_power_uw = set_pd_power_limit,
+ .get_power_uw = get_pd_power_uw,
+ .update_power_uw = update_pd_power_uw,
+ .release = pd_release,
+};
+
+static int __dtpm_devfreq_setup(struct devfreq *devfreq, struct dtpm *parent)
+{
+ struct device *dev = devfreq->dev.parent;
+ struct dtpm_devfreq *dtpm_devfreq;
+ struct em_perf_domain *pd;
+ int ret = -ENOMEM;
+
+ pd = em_pd_get(dev);
+ if (!pd) {
+ ret = dev_pm_opp_of_register_em(dev, NULL);
+ if (ret) {
+ pr_err("No energy model available for '%s'\n", dev_name(dev));
+ return -EINVAL;
+ }
+ }
+
+ dtpm_devfreq = kzalloc(sizeof(*dtpm_devfreq), GFP_KERNEL);
+ if (!dtpm_devfreq)
+ return -ENOMEM;
+
+ dtpm_init(&dtpm_devfreq->dtpm, &dtpm_ops);
+
+ dtpm_devfreq->devfreq = devfreq;
+
+ ret = dtpm_register(dev_name(dev), &dtpm_devfreq->dtpm, parent);
+ if (ret) {
+ pr_err("Failed to register '%s': %d\n", dev_name(dev), ret);
+ kfree(dtpm_devfreq);
+ return ret;
+ }
+
+ ret = dev_pm_qos_add_request(dev, &dtpm_devfreq->qos_req,
+ DEV_PM_QOS_MAX_FREQUENCY,
+ PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
+ if (ret) {
+ pr_err("Failed to add QoS request: %d\n", ret);
+ goto out_dtpm_unregister;
+ }
+
+ dtpm_update_power(&dtpm_devfreq->dtpm);
+
+ return 0;
+
+out_dtpm_unregister:
+ dtpm_unregister(&dtpm_devfreq->dtpm);
+
+ return ret;
+}
+
+static int dtpm_devfreq_setup(struct dtpm *dtpm, struct device_node *np)
+{
+ struct devfreq *devfreq;
+
+ devfreq = devfreq_get_devfreq_by_node(np);
+ if (IS_ERR(devfreq))
+ return 0;
+
+ return __dtpm_devfreq_setup(devfreq, dtpm);
+}
+
+struct dtpm_subsys_ops dtpm_devfreq_ops = {
+ .name = KBUILD_MODNAME,
+ .setup = dtpm_devfreq_setup,
+};
diff --git a/drivers/powercap/dtpm_subsys.h b/drivers/powercap/dtpm_subsys.h
new file mode 100644
index 000000000000..db1712938a96
--- /dev/null
+++ b/drivers/powercap/dtpm_subsys.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 Linaro Ltd
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ */
+#ifndef ___DTPM_SUBSYS_H__
+#define ___DTPM_SUBSYS_H__
+
+extern struct dtpm_subsys_ops dtpm_cpu_ops;
+extern struct dtpm_subsys_ops dtpm_devfreq_ops;
+
+struct dtpm_subsys_ops *dtpm_subsys[] = {
+#ifdef CONFIG_DTPM_CPU
+ &dtpm_cpu_ops,
+#endif
+#ifdef CONFIG_DTPM_DEVFREQ
+ &dtpm_devfreq_ops,
+#endif
+};
+
+#endif
diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
index cd1270614cc6..999e218d7793 100644
--- a/drivers/powercap/idle_inject.c
+++ b/drivers/powercap/idle_inject.c
@@ -12,15 +12,15 @@
*
* All of the kthreads used for idle injection are created at init time.
*
- * Next, the users of the the idle injection framework provide a cpumask via
+ * Next, the users of the idle injection framework provide a cpumask via
* its register function. The kthreads will be synchronized with respect to
* this cpumask.
*
* The idle + run duration is specified via separate helpers and that allows
* idle injection to be started.
*
- * The idle injection kthreads will call play_idle() with the idle duration
- * specified as per the above.
+ * The idle injection kthreads will call play_idle_precise() with the idle
+ * duration and max allowed latency specified as per the above.
*
* After all of them have been woken up, a timer is set to start the next idle
* injection cycle.
@@ -43,6 +43,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/smpboot.h>
+#include <linux/idle_inject.h>
#include <uapi/linux/sched/types.h>
@@ -61,13 +62,15 @@ struct idle_inject_thread {
* @timer: idle injection period timer
* @idle_duration_us: duration of CPU idle time to inject
* @run_duration_us: duration of CPU run time to allow
+ * @latency_us: max allowed latency
* @cpumask: mask of CPUs affected by idle injection
*/
struct idle_inject_device {
struct hrtimer timer;
unsigned int idle_duration_us;
unsigned int run_duration_us;
- unsigned long int cpumask[0];
+ unsigned int latency_us;
+ unsigned long cpumask[];
};
static DEFINE_PER_CPU(struct idle_inject_thread, idle_inject_thread);
@@ -98,7 +101,7 @@ static void idle_inject_wakeup(struct idle_inject_device *ii_dev)
*
* This function is called when the idle injection timer expires. It wakes up
* idle injection tasks associated with the timer and they, in turn, invoke
- * play_idle() to inject a specified amount of CPU idle time.
+ * play_idle_precise() to inject a specified amount of CPU idle time.
*
* Return: HRTIMER_RESTART.
*/
@@ -122,8 +125,8 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
* idle_inject_fn - idle injection work function
* @cpu: the CPU owning the task
*
- * This function calls play_idle() to inject a specified amount of CPU idle
- * time.
+ * This function calls play_idle_precise() to inject a specified amount of CPU
+ * idle time.
*/
static void idle_inject_fn(unsigned int cpu)
{
@@ -138,7 +141,8 @@ static void idle_inject_fn(unsigned int cpu)
*/
iit->should_run = 0;
- play_idle(READ_ONCE(ii_dev->idle_duration_us));
+ play_idle_precise(READ_ONCE(ii_dev->idle_duration_us) * NSEC_PER_USEC,
+ READ_ONCE(ii_dev->latency_us) * NSEC_PER_USEC);
}
/**
@@ -170,6 +174,16 @@ void idle_inject_get_duration(struct idle_inject_device *ii_dev,
}
/**
+ * idle_inject_set_latency - set the maximum latency allowed
+ * @latency_us: set the latency requirement for the idle state
+ */
+void idle_inject_set_latency(struct idle_inject_device *ii_dev,
+ unsigned int latency_us)
+{
+ WRITE_ONCE(ii_dev->latency_us, latency_us);
+}
+
+/**
* idle_inject_start - start idle injections
* @ii_dev: idle injection control device structure
*
@@ -240,7 +254,7 @@ void idle_inject_stop(struct idle_inject_device *ii_dev)
iit = per_cpu_ptr(&idle_inject_thread, cpu);
iit->should_run = 0;
- wait_task_inactive(iit->tsk, 0);
+ wait_task_inactive(iit->tsk, TASK_ANY);
}
cpu_hotplug_enable();
@@ -255,9 +269,7 @@ void idle_inject_stop(struct idle_inject_device *ii_dev)
*/
static void idle_inject_setup(unsigned int cpu)
{
- struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 };
-
- sched_setscheduler(current, SCHED_FIFO, &param);
+ sched_set_fifo(current);
}
/**
@@ -297,6 +309,7 @@ struct idle_inject_device *idle_inject_register(struct cpumask *cpumask)
cpumask_copy(to_cpumask(ii_dev->cpumask), cpumask);
hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
ii_dev->timer.function = idle_inject_timer_fn;
+ ii_dev->latency_us = UINT_MAX;
for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) {
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 73257cf107d9..26d00b1853b4 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -26,9 +26,6 @@
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
-/* Local defines */
-#define MSR_PLATFORM_POWER_LIMIT 0x0000065C
-
/* bitmasks for RAPL MSRs, used by primitive access functions */
#define ENERGY_STATUS_MASK 0xffffffff
@@ -42,6 +39,8 @@
#define POWER_HIGH_LOCK BIT_ULL(63)
#define POWER_LOW_LOCK BIT(31)
+#define POWER_LIMIT4_MASK 0x1FFF
+
#define TIME_WINDOW1_MASK (0x7FULL<<17)
#define TIME_WINDOW2_MASK (0x7FULL<<49)
@@ -62,6 +61,20 @@
#define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff
#define PP_POLICY_MASK 0x1F
+/*
+ * SPR has different layout for Psys Domain PowerLimit registers.
+ * There are 17 bits of PL1 and PL2 instead of 15 bits.
+ * The Enable bits and TimeWindow bits are also shifted as a result.
+ */
+#define PSYS_POWER_LIMIT1_MASK 0x1FFFF
+#define PSYS_POWER_LIMIT1_ENABLE BIT(17)
+
+#define PSYS_POWER_LIMIT2_MASK (0x1FFFFULL<<32)
+#define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49)
+
+#define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19)
+#define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51)
+
/* Non HW constants */
#define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */
#define RAPL_PRIMITIVE_DUMMY BIT(2)
@@ -85,6 +98,7 @@ enum unit_type {
static const char pl1_name[] = "long_term";
static const char pl2_name[] = "short_term";
+static const char pl4_name[] = "peak_power";
#define power_zone_to_rapl_domain(_zone) \
container_of(_zone, struct rapl_domain, power_zone)
@@ -96,6 +110,8 @@ struct rapl_defaults {
u64 (*compute_time_window)(struct rapl_package *rp, u64 val,
bool to_raw);
unsigned int dram_domain_energy_unit;
+ unsigned int psys_domain_energy_unit;
+ bool spr_psys_bits;
};
static struct rapl_defaults *rapl_defaults;
@@ -157,16 +173,16 @@ static int get_energy_counter(struct powercap_zone *power_zone,
/* prevent CPU hotplug, make sure the RAPL domain does not go
* away while reading the counter.
*/
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now)) {
*energy_raw = energy_now;
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
- put_online_cpus();
+ cpus_read_unlock();
return -EIO;
}
@@ -215,11 +231,11 @@ static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
return -EACCES;
- get_online_cpus();
+ cpus_read_lock();
rapl_write_data_raw(rd, PL1_ENABLE, mode);
if (rapl_defaults->set_floor_freq)
rapl_defaults->set_floor_freq(rd, mode);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
@@ -233,13 +249,13 @@ static int get_domain_enable(struct powercap_zone *power_zone, bool *mode)
*mode = false;
return 0;
}
- get_online_cpus();
+ cpus_read_lock();
if (rapl_read_data_raw(rd, PL1_ENABLE, true, &val)) {
- put_online_cpus();
+ cpus_read_unlock();
return -EIO;
}
*mode = val;
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
@@ -316,7 +332,7 @@ static int set_power_limit(struct powercap_zone *power_zone, int cid,
int ret = 0;
int id;
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
@@ -340,13 +356,16 @@ static int set_power_limit(struct powercap_zone *power_zone, int cid,
case PL2_ENABLE:
rapl_write_data_raw(rd, POWER_LIMIT2, power_limit);
break;
+ case PL4_ENABLE:
+ rapl_write_data_raw(rd, POWER_LIMIT4, power_limit);
+ break;
default:
ret = -EINVAL;
}
if (!ret)
package_power_limit_irq_save(rp);
set_exit:
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
@@ -359,7 +378,7 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
int ret = 0;
int id;
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
@@ -374,8 +393,11 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
case PL2_ENABLE:
prim = POWER_LIMIT2;
break;
+ case PL4_ENABLE:
+ prim = POWER_LIMIT4;
+ break;
default:
- put_online_cpus();
+ cpus_read_unlock();
return -EINVAL;
}
if (rapl_read_data_raw(rd, prim, true, &val))
@@ -384,7 +406,7 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
*data = val;
get_exit:
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
@@ -396,7 +418,7 @@ static int set_time_window(struct powercap_zone *power_zone, int cid,
int ret = 0;
int id;
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
@@ -416,7 +438,7 @@ static int set_time_window(struct powercap_zone *power_zone, int cid,
}
set_time_exit:
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
@@ -428,7 +450,7 @@ static int get_time_window(struct powercap_zone *power_zone, int cid,
int ret = 0;
int id;
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
@@ -443,15 +465,22 @@ static int get_time_window(struct powercap_zone *power_zone, int cid,
case PL2_ENABLE:
ret = rapl_read_data_raw(rd, TIME_WINDOW2, true, &val);
break;
+ case PL4_ENABLE:
+ /*
+ * Time window parameter is not applicable for PL4 entry
+ * so assigining '0' as default value.
+ */
+ val = 0;
+ break;
default:
- put_online_cpus();
+ cpus_read_unlock();
return -EINVAL;
}
if (!ret)
*data = val;
get_time_exit:
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
@@ -477,7 +506,7 @@ static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data)
int prim;
int ret = 0;
- get_online_cpus();
+ cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
switch (rd->rpl[id].prim_id) {
case PL1_ENABLE:
@@ -486,8 +515,11 @@ static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data)
case PL2_ENABLE:
prim = MAX_POWER;
break;
+ case PL4_ENABLE:
+ prim = MAX_POWER;
+ break;
default:
- put_online_cpus();
+ cpus_read_unlock();
return -EINVAL;
}
if (rapl_read_data_raw(rd, prim, true, &val))
@@ -495,7 +527,11 @@ static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data)
else
*data = val;
- put_online_cpus();
+ /* As a generalization rule, PL4 would be around two times PL2. */
+ if (rd->rpl[id].prim_id == PL4_ENABLE)
+ *data = *data * 2;
+
+ cpus_read_unlock();
return ret;
}
@@ -523,25 +559,53 @@ static void rapl_init_domains(struct rapl_package *rp)
continue;
rd->rp = rp;
- rd->name = rapl_domain_names[i];
+
+ if (i == RAPL_DOMAIN_PLATFORM && rp->id > 0) {
+ snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "psys-%d",
+ topology_physical_package_id(rp->lead_cpu));
+ } else
+ snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "%s",
+ rapl_domain_names[i]);
+
rd->id = i;
rd->rpl[0].prim_id = PL1_ENABLE;
rd->rpl[0].name = pl1_name;
- /* some domain may support two power limits */
- if (rp->priv->limits[i] == 2) {
+
+ /*
+ * The PL2 power domain is applicable for limits two
+ * and limits three
+ */
+ if (rp->priv->limits[i] >= 2) {
rd->rpl[1].prim_id = PL2_ENABLE;
rd->rpl[1].name = pl2_name;
}
+ /* Enable PL4 domain if the total power limits are three */
+ if (rp->priv->limits[i] == 3) {
+ rd->rpl[2].prim_id = PL4_ENABLE;
+ rd->rpl[2].name = pl4_name;
+ }
+
for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++)
rd->regs[j] = rp->priv->regs[i][j];
- if (i == RAPL_DOMAIN_DRAM) {
+ switch (i) {
+ case RAPL_DOMAIN_DRAM:
rd->domain_energy_unit =
rapl_defaults->dram_domain_energy_unit;
if (rd->domain_energy_unit)
pr_info("DRAM domain energy unit %dpj\n",
rd->domain_energy_unit);
+ break;
+ case RAPL_DOMAIN_PLATFORM:
+ rd->domain_energy_unit =
+ rapl_defaults->psys_domain_energy_unit;
+ if (rd->domain_energy_unit)
+ pr_info("Platform domain energy unit %dpj\n",
+ rd->domain_energy_unit);
+ break;
+ default:
+ break;
}
rd++;
}
@@ -571,7 +635,7 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
case ARBITRARY_UNIT:
default:
return value;
- };
+ }
if (to_raw)
return div64_u64(value, units) * scale;
@@ -590,6 +654,8 @@ static struct rapl_primitive_info rpi[] = {
RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32,
RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
+ PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0,
+ RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15,
@@ -600,6 +666,8 @@ static struct rapl_primitive_info rpi[] = {
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ PRIMITIVE_INFO_INIT(PL4_ENABLE, POWER_LIMIT4_MASK, 0,
+ RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
@@ -616,12 +684,51 @@ static struct rapl_primitive_info rpi[] = {
RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0,
RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0),
+ PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0,
+ RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
+ PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32,
+ RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
+ PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19,
+ RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
+ PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51,
+ RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
/* non-hardware */
PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT,
RAPL_PRIMITIVE_DERIVED),
{NULL, 0, 0, 0},
};
+static enum rapl_primitives
+prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim)
+{
+ if (!rapl_defaults->spr_psys_bits)
+ return prim;
+
+ if (rd->id != RAPL_DOMAIN_PLATFORM)
+ return prim;
+
+ switch (prim) {
+ case POWER_LIMIT1:
+ return PSYS_POWER_LIMIT1;
+ case POWER_LIMIT2:
+ return PSYS_POWER_LIMIT2;
+ case PL1_ENABLE:
+ return PSYS_PL1_ENABLE;
+ case PL2_ENABLE:
+ return PSYS_PL2_ENABLE;
+ case TIME_WINDOW1:
+ return PSYS_TIME_WINDOW1;
+ case TIME_WINDOW2:
+ return PSYS_TIME_WINDOW2;
+ default:
+ return prim;
+ }
+}
+
/* Read primitive data based on its related struct rapl_primitive_info.
* if xlate flag is set, return translated data based on data units, i.e.
* time, energy, and power.
@@ -639,7 +746,8 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
enum rapl_primitives prim, bool xlate, u64 *data)
{
u64 value;
- struct rapl_primitive_info *rp = &rpi[prim];
+ enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
+ struct rapl_primitive_info *rp = &rpi[prim_fixed];
struct reg_action ra;
int cpu;
@@ -685,7 +793,8 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
enum rapl_primitives prim,
unsigned long long value)
{
- struct rapl_primitive_info *rp = &rpi[prim];
+ enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
+ struct rapl_primitive_info *rp = &rpi[prim_fixed];
int cpu;
u64 bits;
struct reg_action ra;
@@ -885,6 +994,9 @@ static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
y = value & 0x1f;
value = (1 << y) * (4 + f) * rp->time_unit / 4;
} else {
+ if (value < rp->time_unit)
+ return 0;
+
do_div(value, rp->time_unit);
y = ilog2(value);
f = div64_u64(4 * (value - (1 << y)), 1 << y);
@@ -901,7 +1013,7 @@ static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value,
* where time_unit is default to 1 sec. Never 0.
*/
if (!to_raw)
- return (value) ? value *= rp->time_unit : rp->time_unit;
+ return (value) ? value * rp->time_unit : rp->time_unit;
value = div64_u64(value, rp->time_unit);
@@ -922,6 +1034,14 @@ static const struct rapl_defaults rapl_defaults_hsw_server = {
.dram_domain_energy_unit = 15300,
};
+static const struct rapl_defaults rapl_defaults_spr_server = {
+ .check_unit = rapl_check_unit_core,
+ .set_floor_freq = set_floor_freq_default,
+ .compute_time_window = rapl_compute_time_window_core,
+ .psys_domain_energy_unit = 1000000000,
+ .spr_psys_bits = true,
+};
+
static const struct rapl_defaults rapl_defaults_byt = {
.floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_BYT,
.check_unit = rapl_check_unit_atom,
@@ -950,53 +1070,71 @@ static const struct rapl_defaults rapl_defaults_cht = {
.compute_time_window = rapl_compute_time_window_atom,
};
+static const struct rapl_defaults rapl_defaults_amd = {
+ .check_unit = rapl_check_unit_core,
+};
+
static const struct x86_cpu_id rapl_ids[] __initconst = {
- INTEL_CPU_FAM6(SANDYBRIDGE, rapl_defaults_core),
- INTEL_CPU_FAM6(SANDYBRIDGE_X, rapl_defaults_core),
-
- INTEL_CPU_FAM6(IVYBRIDGE, rapl_defaults_core),
- INTEL_CPU_FAM6(IVYBRIDGE_X, rapl_defaults_core),
-
- INTEL_CPU_FAM6(HASWELL, rapl_defaults_core),
- INTEL_CPU_FAM6(HASWELL_L, rapl_defaults_core),
- INTEL_CPU_FAM6(HASWELL_G, rapl_defaults_core),
- INTEL_CPU_FAM6(HASWELL_X, rapl_defaults_hsw_server),
-
- INTEL_CPU_FAM6(BROADWELL, rapl_defaults_core),
- INTEL_CPU_FAM6(BROADWELL_G, rapl_defaults_core),
- INTEL_CPU_FAM6(BROADWELL_D, rapl_defaults_core),
- INTEL_CPU_FAM6(BROADWELL_X, rapl_defaults_hsw_server),
-
- INTEL_CPU_FAM6(SKYLAKE, rapl_defaults_core),
- INTEL_CPU_FAM6(SKYLAKE_L, rapl_defaults_core),
- INTEL_CPU_FAM6(SKYLAKE_X, rapl_defaults_hsw_server),
- INTEL_CPU_FAM6(KABYLAKE_L, rapl_defaults_core),
- INTEL_CPU_FAM6(KABYLAKE, rapl_defaults_core),
- INTEL_CPU_FAM6(CANNONLAKE_L, rapl_defaults_core),
- INTEL_CPU_FAM6(ICELAKE_L, rapl_defaults_core),
- INTEL_CPU_FAM6(ICELAKE, rapl_defaults_core),
- INTEL_CPU_FAM6(ICELAKE_NNPI, rapl_defaults_core),
- INTEL_CPU_FAM6(ICELAKE_X, rapl_defaults_hsw_server),
- INTEL_CPU_FAM6(ICELAKE_D, rapl_defaults_hsw_server),
- INTEL_CPU_FAM6(COMETLAKE_L, rapl_defaults_core),
- INTEL_CPU_FAM6(COMETLAKE, rapl_defaults_core),
- INTEL_CPU_FAM6(TIGERLAKE_L, rapl_defaults_core),
-
- INTEL_CPU_FAM6(ATOM_SILVERMONT, rapl_defaults_byt),
- INTEL_CPU_FAM6(ATOM_AIRMONT, rapl_defaults_cht),
- INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, rapl_defaults_tng),
- INTEL_CPU_FAM6(ATOM_AIRMONT_MID, rapl_defaults_ann),
- INTEL_CPU_FAM6(ATOM_GOLDMONT, rapl_defaults_core),
- INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, rapl_defaults_core),
- INTEL_CPU_FAM6(ATOM_GOLDMONT_D, rapl_defaults_core),
- INTEL_CPU_FAM6(ATOM_TREMONT_D, rapl_defaults_core),
- INTEL_CPU_FAM6(ATOM_TREMONT_L, rapl_defaults_core),
-
- INTEL_CPU_FAM6(XEON_PHI_KNL, rapl_defaults_hsw_server),
- INTEL_CPU_FAM6(XEON_PHI_KNM, rapl_defaults_hsw_server),
+ X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &rapl_defaults_core),
+
+ X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &rapl_defaults_core),
+
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &rapl_defaults_hsw_server),
+
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &rapl_defaults_hsw_server),
+
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &rapl_defaults_hsw_server),
+ X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &rapl_defaults_hsw_server),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &rapl_defaults_hsw_server),
+ X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server),
+ X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core),
+
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &rapl_defaults_cht),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &rapl_defaults_tng),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_MID, &rapl_defaults_ann),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &rapl_defaults_core),
+
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &rapl_defaults_hsw_server),
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &rapl_defaults_hsw_server),
+
+ X86_MATCH_VENDOR_FAM(AMD, 0x17, &rapl_defaults_amd),
+ X86_MATCH_VENDOR_FAM(AMD, 0x19, &rapl_defaults_amd),
+ X86_MATCH_VENDOR_FAM(HYGON, 0x18, &rapl_defaults_amd),
{}
};
-
MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
/* Read once for all raw primitive data for domains */
@@ -1053,13 +1191,17 @@ static int rapl_package_register_powercap(struct rapl_package *rp)
}
/* now register domains as children of the socket/package */
for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
+ struct powercap_zone *parent = rp->power_zone;
+
if (rd->id == RAPL_DOMAIN_PACKAGE)
continue;
+ if (rd->id == RAPL_DOMAIN_PLATFORM)
+ parent = NULL;
/* number of power limits per domain varies */
nr_pl = find_nr_power_limit(rd);
power_zone = powercap_register_zone(&rd->power_zone,
rp->priv->control_type,
- rd->name, rp->power_zone,
+ rd->name, parent,
&zone_ops[rd->id], nr_pl,
&constraint_ops);
@@ -1086,67 +1228,6 @@ err_cleanup:
return ret;
}
-int rapl_add_platform_domain(struct rapl_if_priv *priv)
-{
- struct rapl_domain *rd;
- struct powercap_zone *power_zone;
- struct reg_action ra;
- int ret;
-
- ra.reg = priv->regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_STATUS];
- ra.mask = ~0;
- ret = priv->read_raw(0, &ra);
- if (ret || !ra.value)
- return -ENODEV;
-
- ra.reg = priv->regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_LIMIT];
- ra.mask = ~0;
- ret = priv->read_raw(0, &ra);
- if (ret || !ra.value)
- return -ENODEV;
-
- rd = kzalloc(sizeof(*rd), GFP_KERNEL);
- if (!rd)
- return -ENOMEM;
-
- rd->name = rapl_domain_names[RAPL_DOMAIN_PLATFORM];
- rd->id = RAPL_DOMAIN_PLATFORM;
- rd->regs[RAPL_DOMAIN_REG_LIMIT] =
- priv->regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_LIMIT];
- rd->regs[RAPL_DOMAIN_REG_STATUS] =
- priv->regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_STATUS];
- rd->rpl[0].prim_id = PL1_ENABLE;
- rd->rpl[0].name = pl1_name;
- rd->rpl[1].prim_id = PL2_ENABLE;
- rd->rpl[1].name = pl2_name;
- rd->rp = rapl_find_package_domain(0, priv);
-
- power_zone = powercap_register_zone(&rd->power_zone, priv->control_type,
- "psys", NULL,
- &zone_ops[RAPL_DOMAIN_PLATFORM],
- 2, &constraint_ops);
-
- if (IS_ERR(power_zone)) {
- kfree(rd);
- return PTR_ERR(power_zone);
- }
-
- priv->platform_rapl_domain = rd;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(rapl_add_platform_domain);
-
-void rapl_remove_platform_domain(struct rapl_if_priv *priv)
-{
- if (priv->platform_rapl_domain) {
- powercap_unregister_zone(priv->control_type,
- &priv->platform_rapl_domain->power_zone);
- kfree(priv->platform_rapl_domain);
- }
-}
-EXPORT_SYMBOL_GPL(rapl_remove_platform_domain);
-
static int rapl_check_domain(int cpu, int domain, struct rapl_package *rp)
{
struct reg_action ra;
@@ -1156,11 +1237,9 @@ static int rapl_check_domain(int cpu, int domain, struct rapl_package *rp)
case RAPL_DOMAIN_PP0:
case RAPL_DOMAIN_PP1:
case RAPL_DOMAIN_DRAM:
+ case RAPL_DOMAIN_PLATFORM:
ra.reg = rp->priv->regs[domain][RAPL_DOMAIN_REG_STATUS];
break;
- case RAPL_DOMAIN_PLATFORM:
- /* PSYS(PLATFORM) is not a CPU domain, so avoid printng error */
- return -EINVAL;
default:
pr_err("invalid domain id %d\n", domain);
return -EINVAL;
@@ -1169,7 +1248,7 @@ static int rapl_check_domain(int cpu, int domain, struct rapl_package *rp)
* values, otherwise skip it.
*/
- ra.mask = ~0;
+ ra.mask = ENERGY_STATUS_MASK;
if (rp->priv->read_raw(cpu, &ra) || !ra.value)
return -ENODEV;
@@ -1255,6 +1334,7 @@ void rapl_remove_package(struct rapl_package *rp)
if (find_nr_power_limit(rd) > 1) {
rapl_write_data_raw(rd, PL2_ENABLE, 0);
rapl_write_data_raw(rd, PL2_CLAMP, 0);
+ rapl_write_data_raw(rd, PL4_ENABLE, 0);
}
if (rd->id == RAPL_DOMAIN_PACKAGE) {
rd_package = rd;
@@ -1294,7 +1374,6 @@ struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv)
{
int id = topology_logical_die_id(cpu);
struct rapl_package *rp;
- struct cpuinfo_x86 *c = &cpu_data(cpu);
int ret;
if (!rapl_defaults)
@@ -1311,10 +1390,11 @@ struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv)
if (topology_max_die_per_package() > 1)
snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH,
- "package-%d-die-%d", c->phys_proc_id, c->cpu_die_id);
+ "package-%d-die-%d",
+ topology_physical_package_id(cpu), topology_die_id(cpu));
else
snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d",
- c->phys_proc_id);
+ topology_physical_package_id(cpu));
/* check if the package contains valid domains */
if (rapl_detect_domains(rp, cpu) || rapl_defaults->check_unit(rp, cpu)) {
@@ -1341,7 +1421,7 @@ static void power_limit_state_save(void)
struct rapl_domain *rd;
int nr_pl, ret, i;
- get_online_cpus();
+ cpus_read_lock();
list_for_each_entry(rp, &rapl_packages, plist) {
if (!rp->power_zone)
continue;
@@ -1363,10 +1443,17 @@ static void power_limit_state_save(void)
if (ret)
rd->rpl[i].last_power_limit = 0;
break;
+ case PL4_ENABLE:
+ ret = rapl_read_data_raw(rd,
+ POWER_LIMIT4, true,
+ &rd->rpl[i].last_power_limit);
+ if (ret)
+ rd->rpl[i].last_power_limit = 0;
+ break;
}
}
}
- put_online_cpus();
+ cpus_read_unlock();
}
static void power_limit_state_restore(void)
@@ -1375,7 +1462,7 @@ static void power_limit_state_restore(void)
struct rapl_domain *rd;
int nr_pl, i;
- get_online_cpus();
+ cpus_read_lock();
list_for_each_entry(rp, &rapl_packages, plist) {
if (!rp->power_zone)
continue;
@@ -1393,10 +1480,15 @@ static void power_limit_state_restore(void)
rapl_write_data_raw(rd, POWER_LIMIT2,
rd->rpl[i].last_power_limit);
break;
+ case PL4_ENABLE:
+ if (rd->rpl[i].last_power_limit)
+ rapl_write_data_raw(rd, POWER_LIMIT4,
+ rd->rpl[i].last_power_limit);
+ break;
}
}
}
- put_online_cpus();
+ cpus_read_unlock();
}
static int rapl_pm_callback(struct notifier_block *nb,
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index d5487965bdfe..bc6adda58883 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -28,9 +28,12 @@
/* Local defines */
#define MSR_PLATFORM_POWER_LIMIT 0x0000065C
+#define MSR_VR_CURRENT_CONFIG 0x00000601
/* private data for RAPL MSR Interface */
-static struct rapl_if_priv rapl_msr_priv = {
+static struct rapl_if_priv *rapl_msr_priv;
+
+static struct rapl_if_priv rapl_msr_priv_intel = {
.reg_unit = MSR_RAPL_POWER_UNIT,
.regs[RAPL_DOMAIN_PACKAGE] = {
MSR_PKG_POWER_LIMIT, MSR_PKG_ENERGY_STATUS, MSR_PKG_PERF_STATUS, 0, MSR_PKG_POWER_INFO },
@@ -43,6 +46,15 @@ static struct rapl_if_priv rapl_msr_priv = {
.regs[RAPL_DOMAIN_PLATFORM] = {
MSR_PLATFORM_POWER_LIMIT, MSR_PLATFORM_ENERGY_STATUS, 0, 0, 0},
.limits[RAPL_DOMAIN_PACKAGE] = 2,
+ .limits[RAPL_DOMAIN_PLATFORM] = 2,
+};
+
+static struct rapl_if_priv rapl_msr_priv_amd = {
+ .reg_unit = MSR_AMD_RAPL_POWER_UNIT,
+ .regs[RAPL_DOMAIN_PACKAGE] = {
+ 0, MSR_AMD_PKG_ENERGY_STATUS, 0, 0, 0 },
+ .regs[RAPL_DOMAIN_PP0] = {
+ 0, MSR_AMD_CORE_ENERGY_STATUS, 0, 0, 0 },
};
/* Handles CPU hotplug on multi-socket systems.
@@ -56,9 +68,9 @@ static int rapl_cpu_online(unsigned int cpu)
{
struct rapl_package *rp;
- rp = rapl_find_package_domain(cpu, &rapl_msr_priv);
+ rp = rapl_find_package_domain(cpu, rapl_msr_priv);
if (!rp) {
- rp = rapl_add_package(cpu, &rapl_msr_priv);
+ rp = rapl_add_package(cpu, rapl_msr_priv);
if (IS_ERR(rp))
return PTR_ERR(rp);
}
@@ -71,7 +83,7 @@ static int rapl_cpu_down_prep(unsigned int cpu)
struct rapl_package *rp;
int lead_cpu;
- rp = rapl_find_package_domain(cpu, &rapl_msr_priv);
+ rp = rapl_find_package_domain(cpu, rapl_msr_priv);
if (!rp)
return 0;
@@ -123,41 +135,68 @@ static int rapl_msr_write_raw(int cpu, struct reg_action *ra)
return ra->err;
}
+/* List of verified CPUs. */
+static const struct x86_cpu_id pl4_support_ids[] = {
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_TIGERLAKE_L, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_L, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_N, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_RAPTORLAKE, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_RAPTORLAKE_P, X86_FEATURE_ANY },
+ {}
+};
+
static int rapl_msr_probe(struct platform_device *pdev)
{
+ const struct x86_cpu_id *id = x86_match_cpu(pl4_support_ids);
int ret;
- rapl_msr_priv.read_raw = rapl_msr_read_raw;
- rapl_msr_priv.write_raw = rapl_msr_write_raw;
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_INTEL:
+ rapl_msr_priv = &rapl_msr_priv_intel;
+ break;
+ case X86_VENDOR_HYGON:
+ case X86_VENDOR_AMD:
+ rapl_msr_priv = &rapl_msr_priv_amd;
+ break;
+ default:
+ pr_err("intel-rapl does not support CPU vendor %d\n", boot_cpu_data.x86_vendor);
+ return -ENODEV;
+ }
+ rapl_msr_priv->read_raw = rapl_msr_read_raw;
+ rapl_msr_priv->write_raw = rapl_msr_write_raw;
+
+ if (id) {
+ rapl_msr_priv->limits[RAPL_DOMAIN_PACKAGE] = 3;
+ rapl_msr_priv->regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PL4] =
+ MSR_VR_CURRENT_CONFIG;
+ pr_info("PL4 support detected.\n");
+ }
- rapl_msr_priv.control_type = powercap_register_control_type(NULL, "intel-rapl", NULL);
- if (IS_ERR(rapl_msr_priv.control_type)) {
+ rapl_msr_priv->control_type = powercap_register_control_type(NULL, "intel-rapl", NULL);
+ if (IS_ERR(rapl_msr_priv->control_type)) {
pr_debug("failed to register powercap control_type.\n");
- return PTR_ERR(rapl_msr_priv.control_type);
+ return PTR_ERR(rapl_msr_priv->control_type);
}
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powercap/rapl:online",
rapl_cpu_online, rapl_cpu_down_prep);
if (ret < 0)
goto out;
- rapl_msr_priv.pcap_rapl_online = ret;
-
- /* Don't bail out if PSys is not supported */
- rapl_add_platform_domain(&rapl_msr_priv);
+ rapl_msr_priv->pcap_rapl_online = ret;
return 0;
out:
if (ret)
- powercap_unregister_control_type(rapl_msr_priv.control_type);
+ powercap_unregister_control_type(rapl_msr_priv->control_type);
return ret;
}
static int rapl_msr_remove(struct platform_device *pdev)
{
- cpuhp_remove_state(rapl_msr_priv.pcap_rapl_online);
- rapl_remove_platform_domain(&rapl_msr_priv);
- powercap_unregister_control_type(rapl_msr_priv.control_type);
+ cpuhp_remove_state(rapl_msr_priv->pcap_rapl_online);
+ powercap_unregister_control_type(rapl_msr_priv->control_type);
return 0;
}
diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c
index f808c5fa9838..f0654a932b37 100644
--- a/drivers/powercap/powercap_sys.c
+++ b/drivers/powercap/powercap_sys.c
@@ -170,9 +170,8 @@ static ssize_t show_constraint_name(struct device *dev,
if (pconst && pconst->ops && pconst->ops->get_name) {
name = pconst->ops->get_name(power_zone, id);
if (name) {
- snprintf(buf, POWERCAP_CONSTRAINT_NAME_LEN,
- "%s\n", name);
- buf[POWERCAP_CONSTRAINT_NAME_LEN] = '\0';
+ sprintf(buf, "%.*s\n", POWERCAP_CONSTRAINT_NAME_LEN - 1,
+ name);
len = strlen(buf);
}
}
@@ -367,9 +366,9 @@ static void create_power_zone_common_attributes(
&dev_attr_max_energy_range_uj.attr;
if (power_zone->ops->get_energy_uj) {
if (power_zone->ops->reset_energy_uj)
- dev_attr_energy_uj.attr.mode = S_IWUSR | S_IRUGO;
+ dev_attr_energy_uj.attr.mode = S_IWUSR | S_IRUSR;
else
- dev_attr_energy_uj.attr.mode = S_IRUGO;
+ dev_attr_energy_uj.attr.mode = S_IRUSR;
power_zone->zone_dev_attrs[count++] =
&dev_attr_energy_uj.attr;
}