diff options
Diffstat (limited to 'drivers/cpufreq')
45 files changed, 2651 insertions, 827 deletions
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index c3038cdc6865..2a84fc63371e 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -268,7 +268,7 @@ config LOONGSON2_CPUFREQ This option adds a CPUFreq driver for loongson processors which support software configurable cpu frequency. - Loongson2F and it's successors support this feature. + Loongson2F and its successors support this feature. If in doubt, say N. diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 954749afb5fe..82e5de1f6f8c 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -153,7 +153,7 @@ config ARM_OMAP2PLUS_CPUFREQ config ARM_QCOM_CPUFREQ_NVMEM tristate "Qualcomm nvmem based CPUFreq" depends on ARCH_QCOM - depends on QCOM_QFPROM + depends on NVMEM_QCOM_QFPROM depends on QCOM_SMEM select PM_OPP help diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 92701a18bdd9..310779b07daf 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -34,6 +34,38 @@ config X86_PCC_CPUFREQ If in doubt, say N. +config X86_AMD_PSTATE + tristate "AMD Processor P-State driver" + depends on X86 && ACPI + select ACPI_PROCESSOR + select ACPI_CPPC_LIB if X86_64 + select CPU_FREQ_GOV_SCHEDUTIL if SMP + help + This driver adds a CPUFreq driver which utilizes a fine grain + processor performance frequency control range instead of legacy + performance levels. _CPC needs to be present in the ACPI tables + of the system. + + For details, take a look at: + <file:Documentation/admin-guide/pm/amd-pstate.rst>. + + If in doubt, say N. + +config X86_AMD_PSTATE_UT + tristate "selftest for AMD Processor P-State driver" + depends on X86 && ACPI_PROCESSOR + default n + help + This kernel module is used for testing. It's safe to say M here. + + It can also be built-in without X86_AMD_PSTATE enabled. + Currently, only tests for amd-pstate are supported. If X86_AMD_PSTATE + is set disabled, it can tell the users test can only run on amd-pstate + driver, please set X86_AMD_PSTATE enabled. + In the future, comparison tests will be added. It can set amd-pstate + disabled and set acpi-cpufreq enabled to run test cases, then compare + the test results. + config X86_ACPI_CPUFREQ tristate "ACPI Processor P-States driver" depends on ACPI_PROCESSOR diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 48ee5859030c..49b98c62c5af 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -17,6 +17,10 @@ obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET) += cpufreq_governor_attr_set.o obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o +# Traces +CFLAGS_amd-pstate-trace.o := -I$(src) +amd_pstate-y := amd-pstate.o amd-pstate-trace.o + ################################################################################## # x86 drivers. # Link order matters. K8 is preferred to ACPI because of firmware bugs in early @@ -25,6 +29,8 @@ obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o # speedstep-* is preferred over p4-clockmod. obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o +obj-$(CONFIG_X86_AMD_PSTATE) += amd_pstate.o +obj-$(CONFIG_X86_AMD_PSTATE_UT) += amd-pstate-ut.o obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 3d514b82d055..1bb2b90ebb21 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -78,6 +78,8 @@ static bool boost_state(unsigned int cpu) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: + case X86_VENDOR_CENTAUR: + case X86_VENDOR_ZHAOXIN: rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi); msr = lo | ((u64)hi << 32); return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); @@ -97,6 +99,8 @@ static int boost_set_msr(bool enable) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: + case X86_VENDOR_CENTAUR: + case X86_VENDOR_ZHAOXIN: msr_addr = MSR_IA32_MISC_ENABLE; msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE; break; diff --git a/drivers/cpufreq/amd-pstate-trace.c b/drivers/cpufreq/amd-pstate-trace.c new file mode 100644 index 000000000000..891b696dcd69 --- /dev/null +++ b/drivers/cpufreq/amd-pstate-trace.c @@ -0,0 +1,2 @@ +#define CREATE_TRACE_POINTS +#include "amd-pstate-trace.h" diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h new file mode 100644 index 000000000000..35f38ae67fb1 --- /dev/null +++ b/drivers/cpufreq/amd-pstate-trace.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * amd-pstate-trace.h - AMD Processor P-state Frequency Driver Tracer + * + * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Author: Huang Rui <ray.huang@amd.com> + */ + +#if !defined(_AMD_PSTATE_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _AMD_PSTATE_TRACE_H + +#include <linux/cpufreq.h> +#include <linux/tracepoint.h> +#include <linux/trace_events.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM amd_cpu + +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE amd-pstate-trace + +#define TPS(x) tracepoint_string(x) + +TRACE_EVENT(amd_pstate_perf, + + TP_PROTO(unsigned long min_perf, + unsigned long target_perf, + unsigned long capacity, + u64 freq, + u64 mperf, + u64 aperf, + u64 tsc, + unsigned int cpu_id, + bool changed, + bool fast_switch + ), + + TP_ARGS(min_perf, + target_perf, + capacity, + freq, + mperf, + aperf, + tsc, + cpu_id, + changed, + fast_switch + ), + + TP_STRUCT__entry( + __field(unsigned long, min_perf) + __field(unsigned long, target_perf) + __field(unsigned long, capacity) + __field(unsigned long long, freq) + __field(unsigned long long, mperf) + __field(unsigned long long, aperf) + __field(unsigned long long, tsc) + __field(unsigned int, cpu_id) + __field(bool, changed) + __field(bool, fast_switch) + ), + + TP_fast_assign( + __entry->min_perf = min_perf; + __entry->target_perf = target_perf; + __entry->capacity = capacity; + __entry->freq = freq; + __entry->mperf = mperf; + __entry->aperf = aperf; + __entry->tsc = tsc; + __entry->cpu_id = cpu_id; + __entry->changed = changed; + __entry->fast_switch = fast_switch; + ), + + TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u changed=%s fast_switch=%s", + (unsigned long)__entry->min_perf, + (unsigned long)__entry->target_perf, + (unsigned long)__entry->capacity, + (unsigned long long)__entry->freq, + (unsigned long long)__entry->mperf, + (unsigned long long)__entry->aperf, + (unsigned long long)__entry->tsc, + (unsigned int)__entry->cpu_id, + (__entry->changed) ? "true" : "false", + (__entry->fast_switch) ? "true" : "false" + ) +); + +#endif /* _AMD_PSTATE_TRACE_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . + +#include <trace/define_trace.h> diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c new file mode 100644 index 000000000000..e4a5b4d90f83 --- /dev/null +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -0,0 +1,293 @@ +// SPDX-License-Identifier: GPL-1.0-or-later +/* + * AMD Processor P-state Frequency Driver Unit Test + * + * Copyright (C) 2022 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Author: Meng Li <li.meng@amd.com> + * + * The AMD P-State Unit Test is a test module for testing the amd-pstate + * driver. 1) It can help all users to verify their processor support + * (SBIOS/Firmware or Hardware). 2) Kernel can have a basic function + * test to avoid the kernel regression during the update. 3) We can + * introduce more functional or performance tests to align the result + * together, it will benefit power and performance scale optimization. + * + * This driver implements basic framework with plans to enhance it with + * additional test cases to improve the depth and coverage of the test. + * + * See Documentation/admin-guide/pm/amd-pstate.rst Unit Tests for + * amd-pstate to get more detail. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/fs.h> +#include <linux/amd-pstate.h> + +#include <acpi/cppc_acpi.h> + +/* + * Abbreviations: + * amd_pstate_ut: used as a shortform for AMD P-State unit test. + * It helps to keep variable names smaller, simpler + */ +enum amd_pstate_ut_result { + AMD_PSTATE_UT_RESULT_PASS, + AMD_PSTATE_UT_RESULT_FAIL, +}; + +struct amd_pstate_ut_struct { + const char *name; + void (*func)(u32 index); + enum amd_pstate_ut_result result; +}; + +/* + * Kernel module for testing the AMD P-State unit test + */ +static void amd_pstate_ut_acpi_cpc_valid(u32 index); +static void amd_pstate_ut_check_enabled(u32 index); +static void amd_pstate_ut_check_perf(u32 index); +static void amd_pstate_ut_check_freq(u32 index); + +static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = { + {"amd_pstate_ut_acpi_cpc_valid", amd_pstate_ut_acpi_cpc_valid }, + {"amd_pstate_ut_check_enabled", amd_pstate_ut_check_enabled }, + {"amd_pstate_ut_check_perf", amd_pstate_ut_check_perf }, + {"amd_pstate_ut_check_freq", amd_pstate_ut_check_freq } +}; + +static bool get_shared_mem(void) +{ + bool result = false; + char path[] = "/sys/module/amd_pstate/parameters/shared_mem"; + char buf[5] = {0}; + struct file *filp = NULL; + loff_t pos = 0; + ssize_t ret; + + if (!boot_cpu_has(X86_FEATURE_CPPC)) { + filp = filp_open(path, O_RDONLY, 0); + if (IS_ERR(filp)) + pr_err("%s unable to open %s file!\n", __func__, path); + else { + ret = kernel_read(filp, &buf, sizeof(buf), &pos); + if (ret < 0) + pr_err("%s read %s file fail ret=%ld!\n", + __func__, path, (long)ret); + filp_close(filp, NULL); + } + + if ('Y' == *buf) + result = true; + } + + return result; +} + +/* + * check the _CPC object is present in SBIOS. + */ +static void amd_pstate_ut_acpi_cpc_valid(u32 index) +{ + if (acpi_cpc_valid()) + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; + else { + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + pr_err("%s the _CPC object is not present in SBIOS!\n", __func__); + } +} + +static void amd_pstate_ut_pstate_enable(u32 index) +{ + int ret = 0; + u64 cppc_enable = 0; + + ret = rdmsrl_safe(MSR_AMD_CPPC_ENABLE, &cppc_enable); + if (ret) { + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + pr_err("%s rdmsrl_safe MSR_AMD_CPPC_ENABLE ret=%d error!\n", __func__, ret); + return; + } + if (cppc_enable) + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; + else { + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + pr_err("%s amd pstate must be enabled!\n", __func__); + } +} + +/* + * check if amd pstate is enabled + */ +static void amd_pstate_ut_check_enabled(u32 index) +{ + if (get_shared_mem()) + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; + else + amd_pstate_ut_pstate_enable(index); +} + +/* + * check if performance values are reasonable. + * highest_perf >= nominal_perf > lowest_nonlinear_perf > lowest_perf > 0 + */ +static void amd_pstate_ut_check_perf(u32 index) +{ + int cpu = 0, ret = 0; + u32 highest_perf = 0, nominal_perf = 0, lowest_nonlinear_perf = 0, lowest_perf = 0; + u64 cap1 = 0; + struct cppc_perf_caps cppc_perf; + struct cpufreq_policy *policy = NULL; + struct amd_cpudata *cpudata = NULL; + + highest_perf = amd_get_highest_perf(); + + for_each_possible_cpu(cpu) { + policy = cpufreq_cpu_get(cpu); + if (!policy) + break; + cpudata = policy->driver_data; + + if (get_shared_mem()) { + ret = cppc_get_perf_caps(cpu, &cppc_perf); + if (ret) { + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret); + return; + } + + nominal_perf = cppc_perf.nominal_perf; + lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; + lowest_perf = cppc_perf.lowest_perf; + } else { + ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); + if (ret) { + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret); + return; + } + + nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1); + lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1); + lowest_perf = AMD_CPPC_LOWEST_PERF(cap1); + } + + if ((highest_perf != READ_ONCE(cpudata->highest_perf)) || + (nominal_perf != READ_ONCE(cpudata->nominal_perf)) || + (lowest_nonlinear_perf != READ_ONCE(cpudata->lowest_nonlinear_perf)) || + (lowest_perf != READ_ONCE(cpudata->lowest_perf))) { + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + pr_err("%s cpu%d highest=%d %d nominal=%d %d lowest_nonlinear=%d %d lowest=%d %d, they should be equal!\n", + __func__, cpu, highest_perf, cpudata->highest_perf, + nominal_perf, cpudata->nominal_perf, + lowest_nonlinear_perf, cpudata->lowest_nonlinear_perf, + lowest_perf, cpudata->lowest_perf); + return; + } + + if (!((highest_perf >= nominal_perf) && + (nominal_perf > lowest_nonlinear_perf) && + (lowest_nonlinear_perf > lowest_perf) && + (lowest_perf > 0))) { + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n", + __func__, cpu, highest_perf, nominal_perf, + lowest_nonlinear_perf, lowest_perf); + return; + } + } + + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; +} + +/* + * Check if frequency values are reasonable. + * max_freq >= nominal_freq > lowest_nonlinear_freq > min_freq > 0 + * check max freq when set support boost mode. + */ +static void amd_pstate_ut_check_freq(u32 index) +{ + int cpu = 0; + struct cpufreq_policy *policy = NULL; + struct amd_cpudata *cpudata = NULL; + + for_each_possible_cpu(cpu) { + policy = cpufreq_cpu_get(cpu); + if (!policy) + break; + cpudata = policy->driver_data; + + if (!((cpudata->max_freq >= cpudata->nominal_freq) && + (cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) && + (cpudata->lowest_nonlinear_freq > cpudata->min_freq) && + (cpudata->min_freq > 0))) { + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", + __func__, cpu, cpudata->max_freq, cpudata->nominal_freq, + cpudata->lowest_nonlinear_freq, cpudata->min_freq); + return; + } + + if (cpudata->min_freq != policy->min) { + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + pr_err("%s cpu%d cpudata_min_freq=%d policy_min=%d, they should be equal!\n", + __func__, cpu, cpudata->min_freq, policy->min); + return; + } + + if (cpudata->boost_supported) { + if ((policy->max == cpudata->max_freq) || + (policy->max == cpudata->nominal_freq)) + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; + else { + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n", + __func__, cpu, policy->max, cpudata->max_freq, + cpudata->nominal_freq); + return; + } + } else { + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + pr_err("%s cpu%d must support boost!\n", __func__, cpu); + return; + } + } + + amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; +} + +static int __init amd_pstate_ut_init(void) +{ + u32 i = 0, arr_size = ARRAY_SIZE(amd_pstate_ut_cases); + + for (i = 0; i < arr_size; i++) { + amd_pstate_ut_cases[i].func(i); + switch (amd_pstate_ut_cases[i].result) { + case AMD_PSTATE_UT_RESULT_PASS: + pr_info("%-4d %-20s\t success!\n", i+1, amd_pstate_ut_cases[i].name); + break; + case AMD_PSTATE_UT_RESULT_FAIL: + default: + pr_info("%-4d %-20s\t fail!\n", i+1, amd_pstate_ut_cases[i].name); + break; + } + } + + return 0; +} + +static void __exit amd_pstate_ut_exit(void) +{ +} + +module_init(amd_pstate_ut_init); +module_exit(amd_pstate_ut_exit); + +MODULE_AUTHOR("Meng Li <li.meng@amd.com>"); +MODULE_DESCRIPTION("AMD P-state driver Test module"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c new file mode 100644 index 000000000000..ace7d50cf2ac --- /dev/null +++ b/drivers/cpufreq/amd-pstate.c @@ -0,0 +1,669 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * amd-pstate.c - AMD Processor P-state Frequency Driver + * + * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Author: Huang Rui <ray.huang@amd.com> + * + * AMD P-State introduces a new CPU performance scaling design for AMD + * processors using the ACPI Collaborative Performance and Power Control (CPPC) + * feature which works with the AMD SMU firmware providing a finer grained + * frequency control range. It is to replace the legacy ACPI P-States control, + * allows a flexible, low-latency interface for the Linux kernel to directly + * communicate the performance hints to hardware. + * + * AMD P-State is supported on recent AMD Zen base CPU series include some of + * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD + * P-State supported system. And there are two types of hardware implementations + * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution. + * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/sched.h> +#include <linux/cpufreq.h> +#include <linux/compiler.h> +#include <linux/dmi.h> +#include <linux/slab.h> +#include <linux/acpi.h> +#include <linux/io.h> +#include <linux/delay.h> +#include <linux/uaccess.h> +#include <linux/static_call.h> +#include <linux/amd-pstate.h> + +#include <acpi/processor.h> +#include <acpi/cppc_acpi.h> + +#include <asm/msr.h> +#include <asm/processor.h> +#include <asm/cpufeature.h> +#include <asm/cpu_device_id.h> +#include "amd-pstate-trace.h" + +#define AMD_PSTATE_TRANSITION_LATENCY 20000 +#define AMD_PSTATE_TRANSITION_DELAY 1000 + +/* + * TODO: We need more time to fine tune processors with shared memory solution + * with community together. + * + * There are some performance drops on the CPU benchmarks which reports from + * Suse. We are co-working with them to fine tune the shared memory solution. So + * we disable it by default to go acpi-cpufreq on these processors and add a + * module parameter to be able to enable it manually for debugging. + */ +static bool shared_mem = false; +module_param(shared_mem, bool, 0444); +MODULE_PARM_DESC(shared_mem, + "enable amd-pstate on processors with shared memory solution (false = disabled (default), true = enabled)"); + +static struct cpufreq_driver amd_pstate_driver; + +static inline int pstate_enable(bool enable) +{ + return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable); +} + +static int cppc_enable(bool enable) +{ + int cpu, ret = 0; + + for_each_present_cpu(cpu) { + ret = cppc_set_enable(cpu, enable); + if (ret) + return ret; + } + + return ret; +} + +DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable); + +static inline int amd_pstate_enable(bool enable) +{ + return static_call(amd_pstate_enable)(enable); +} + +static int pstate_init_perf(struct amd_cpudata *cpudata) +{ + u64 cap1; + u32 highest_perf; + + int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, + &cap1); + if (ret) + return ret; + + /* + * TODO: Introduce AMD specific power feature. + * + * CPPC entry doesn't indicate the highest performance in some ASICs. + */ + highest_perf = amd_get_highest_perf(); + if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1)) + highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); + + WRITE_ONCE(cpudata->highest_perf, highest_perf); + + WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); + WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); + WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); + + return 0; +} + +static int cppc_init_perf(struct amd_cpudata *cpudata) +{ + struct cppc_perf_caps cppc_perf; + u32 highest_perf; + + int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); + if (ret) + return ret; + + highest_perf = amd_get_highest_perf(); + if (highest_perf > cppc_perf.highest_perf) + highest_perf = cppc_perf.highest_perf; + + WRITE_ONCE(cpudata->highest_perf, highest_perf); + + WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); + WRITE_ONCE(cpudata->lowest_nonlinear_perf, + cppc_perf.lowest_nonlinear_perf); + WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); + + return 0; +} + +DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf); + +static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) +{ + return static_call(amd_pstate_init_perf)(cpudata); +} + +static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, + u32 des_perf, u32 max_perf, bool fast_switch) +{ + if (fast_switch) + wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); + else + wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, + READ_ONCE(cpudata->cppc_req_cached)); +} + +static void cppc_update_perf(struct amd_cpudata *cpudata, + u32 min_perf, u32 des_perf, + u32 max_perf, bool fast_switch) +{ + struct cppc_perf_ctrls perf_ctrls; + + perf_ctrls.max_perf = max_perf; + perf_ctrls.min_perf = min_perf; + perf_ctrls.desired_perf = des_perf; + + cppc_set_perf(cpudata->cpu, &perf_ctrls); +} + +DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf); + +static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, + u32 min_perf, u32 des_perf, + u32 max_perf, bool fast_switch) +{ + static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, + max_perf, fast_switch); +} + +static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) +{ + u64 aperf, mperf, tsc; + unsigned long flags; + + local_irq_save(flags); + rdmsrl(MSR_IA32_APERF, aperf); + rdmsrl(MSR_IA32_MPERF, mperf); + tsc = rdtsc(); + + if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) { + local_irq_restore(flags); + return false; + } + + local_irq_restore(flags); + + cpudata->cur.aperf = aperf; + cpudata->cur.mperf = mperf; + cpudata->cur.tsc = tsc; + cpudata->cur.aperf -= cpudata->prev.aperf; + cpudata->cur.mperf -= cpudata->prev.mperf; + cpudata->cur.tsc -= cpudata->prev.tsc; + + cpudata->prev.aperf = aperf; + cpudata->prev.mperf = mperf; + cpudata->prev.tsc = tsc; + + cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf); + + return true; +} + +static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, + u32 des_perf, u32 max_perf, bool fast_switch) +{ + u64 prev = READ_ONCE(cpudata->cppc_req_cached); + u64 value = prev; + + des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); + value &= ~AMD_CPPC_MIN_PERF(~0L); + value |= AMD_CPPC_MIN_PERF(min_perf); + + value &= ~AMD_CPPC_DES_PERF(~0L); + value |= AMD_CPPC_DES_PERF(des_perf); + + value &= ~AMD_CPPC_MAX_PERF(~0L); + value |= AMD_CPPC_MAX_PERF(max_perf); + + if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { + trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, + cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc, + cpudata->cpu, (value != prev), fast_switch); + } + + if (value == prev) + return; + + WRITE_ONCE(cpudata->cppc_req_cached, value); + + amd_pstate_update_perf(cpudata, min_perf, des_perf, + max_perf, fast_switch); +} + +static int amd_pstate_verify(struct cpufreq_policy_data *policy) +{ + cpufreq_verify_within_cpu_limits(policy); + + return 0; +} + +static int amd_pstate_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + struct cpufreq_freqs freqs; + struct amd_cpudata *cpudata = policy->driver_data; + unsigned long max_perf, min_perf, des_perf, cap_perf; + + if (!cpudata->max_freq) + return -ENODEV; + + cap_perf = READ_ONCE(cpudata->highest_perf); + min_perf = READ_ONCE(cpudata->lowest_perf); + max_perf = cap_perf; + + freqs.old = policy->cur; + freqs.new = target_freq; + + des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf, + cpudata->max_freq); + + cpufreq_freq_transition_begin(policy, &freqs); + amd_pstate_update(cpudata, min_perf, des_perf, + max_perf, false); + cpufreq_freq_transition_end(policy, &freqs, false); + + return 0; +} + +static void amd_pstate_adjust_perf(unsigned int cpu, + unsigned long _min_perf, + unsigned long target_perf, + unsigned long capacity) +{ + unsigned long max_perf, min_perf, des_perf, + cap_perf, lowest_nonlinear_perf; + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct amd_cpudata *cpudata = policy->driver_data; + + cap_perf = READ_ONCE(cpudata->highest_perf); + lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); + + des_perf = cap_perf; + if (target_perf < capacity) + des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity); + + min_perf = READ_ONCE(cpudata->highest_perf); + if (_min_perf < capacity) + min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity); + + if (min_perf < lowest_nonlinear_perf) + min_perf = lowest_nonlinear_perf; + + max_perf = cap_perf; + if (max_perf < min_perf) + max_perf = min_perf; + + amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true); +} + +static int amd_get_min_freq(struct amd_cpudata *cpudata) +{ + struct cppc_perf_caps cppc_perf; + + int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); + if (ret) + return ret; + + /* Switch to khz */ + return cppc_perf.lowest_freq * 1000; +} + +static int amd_get_max_freq(struct amd_cpudata *cpudata) +{ + struct cppc_perf_caps cppc_perf; + u32 max_perf, max_freq, nominal_freq, nominal_perf; + u64 boost_ratio; + + int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); + if (ret) + return ret; + + nominal_freq = cppc_perf.nominal_freq; + nominal_perf = READ_ONCE(cpudata->nominal_perf); + max_perf = READ_ONCE(cpudata->highest_perf); + + boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT, + nominal_perf); + + max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT; + + /* Switch to khz */ + return max_freq * 1000; +} + +static int amd_get_nominal_freq(struct amd_cpudata *cpudata) +{ + struct cppc_perf_caps cppc_perf; + + int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); + if (ret) + return ret; + + /* Switch to khz */ + return cppc_perf.nominal_freq * 1000; +} + +static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata) +{ + struct cppc_perf_caps cppc_perf; + u32 lowest_nonlinear_freq, lowest_nonlinear_perf, + nominal_freq, nominal_perf; + u64 lowest_nonlinear_ratio; + + int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); + if (ret) + return ret; + + nominal_freq = cppc_perf.nominal_freq; + nominal_perf = READ_ONCE(cpudata->nominal_perf); + + lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; + + lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, + nominal_perf); + + lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT; + + /* Switch to khz */ + return lowest_nonlinear_freq * 1000; +} + +static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) +{ + struct amd_cpudata *cpudata = policy->driver_data; + int ret; + + if (!cpudata->boost_supported) { + pr_err("Boost mode is not supported by this processor or SBIOS\n"); + return -EINVAL; + } + + if (state) + policy->cpuinfo.max_freq = cpudata->max_freq; + else + policy->cpuinfo.max_freq = cpudata->nominal_freq; + + policy->max = policy->cpuinfo.max_freq; + + ret = freq_qos_update_request(&cpudata->req[1], + policy->cpuinfo.max_freq); + if (ret < 0) + return ret; + + return 0; +} + +static void amd_pstate_boost_init(struct amd_cpudata *cpudata) +{ + u32 highest_perf, nominal_perf; + + highest_perf = READ_ONCE(cpudata->highest_perf); + nominal_perf = READ_ONCE(cpudata->nominal_perf); + + if (highest_perf <= nominal_perf) + return; + + cpudata->boost_supported = true; + amd_pstate_driver.boost_enabled = true; +} + +static int amd_pstate_cpu_init(struct cpufreq_policy *policy) +{ + int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; + struct device *dev; + struct amd_cpudata *cpudata; + + dev = get_cpu_device(policy->cpu); + if (!dev) + return -ENODEV; + + cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); + if (!cpudata) + return -ENOMEM; + + cpudata->cpu = policy->cpu; + + ret = amd_pstate_init_perf(cpudata); + if (ret) + goto free_cpudata1; + + min_freq = amd_get_min_freq(cpudata); + max_freq = amd_get_max_freq(cpudata); + nominal_freq = amd_get_nominal_freq(cpudata); + lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); + + if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { + dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", + min_freq, max_freq); + ret = -EINVAL; + goto free_cpudata1; + } + + policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY; + policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY; + + policy->min = min_freq; + policy->max = max_freq; + + policy->cpuinfo.min_freq = min_freq; + policy->cpuinfo.max_freq = max_freq; + + /* It will be updated by governor */ + policy->cur = policy->cpuinfo.min_freq; + + if (boot_cpu_has(X86_FEATURE_CPPC)) + policy->fast_switch_possible = true; + + ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0], + FREQ_QOS_MIN, policy->cpuinfo.min_freq); + if (ret < 0) { + dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); + goto free_cpudata1; + } + + ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1], + FREQ_QOS_MAX, policy->cpuinfo.max_freq); + if (ret < 0) { + dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret); + goto free_cpudata2; + } + + /* Initial processor data capability frequencies */ + cpudata->max_freq = max_freq; + cpudata->min_freq = min_freq; + cpudata->nominal_freq = nominal_freq; + cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; + + policy->driver_data = cpudata; + + amd_pstate_boost_init(cpudata); + + return 0; + +free_cpudata2: + freq_qos_remove_request(&cpudata->req[0]); +free_cpudata1: + kfree(cpudata); + return ret; +} + +static int amd_pstate_cpu_exit(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata = policy->driver_data; + + freq_qos_remove_request(&cpudata->req[1]); + freq_qos_remove_request(&cpudata->req[0]); + kfree(cpudata); + + return 0; +} + +static int amd_pstate_cpu_resume(struct cpufreq_policy *policy) +{ + int ret; + + ret = amd_pstate_enable(true); + if (ret) + pr_err("failed to enable amd-pstate during resume, return %d\n", ret); + + return ret; +} + +static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy) +{ + int ret; + + ret = amd_pstate_enable(false); + if (ret) + pr_err("failed to disable amd-pstate during suspend, return %d\n", ret); + + return ret; +} + +/* Sysfs attributes */ + +/* + * This frequency is to indicate the maximum hardware frequency. + * If boost is not active but supported, the frequency will be larger than the + * one in cpuinfo. + */ +static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, + char *buf) +{ + int max_freq; + struct amd_cpudata *cpudata = policy->driver_data; + + max_freq = amd_get_max_freq(cpudata); + if (max_freq < 0) + return max_freq; + + return sprintf(&buf[0], "%u\n", max_freq); +} + +static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, + char *buf) +{ + int freq; + struct amd_cpudata *cpudata = policy->driver_data; + + freq = amd_get_lowest_nonlinear_freq(cpudata); + if (freq < 0) + return freq; + + return sprintf(&buf[0], "%u\n", freq); +} + +/* + * In some of ASICs, the highest_perf is not the one in the _CPC table, so we + * need to expose it to sysfs. + */ +static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, + char *buf) +{ + u32 perf; + struct amd_cpudata *cpudata = policy->driver_data; + + perf = READ_ONCE(cpudata->highest_perf); + + return sprintf(&buf[0], "%u\n", perf); +} + +cpufreq_freq_attr_ro(amd_pstate_max_freq); +cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); + +cpufreq_freq_attr_ro(amd_pstate_highest_perf); + +static struct freq_attr *amd_pstate_attr[] = { + &amd_pstate_max_freq, + &amd_pstate_lowest_nonlinear_freq, + &amd_pstate_highest_perf, + NULL, +}; + +static struct cpufreq_driver amd_pstate_driver = { + .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, + .verify = amd_pstate_verify, + .target = amd_pstate_target, + .init = amd_pstate_cpu_init, + .exit = amd_pstate_cpu_exit, + .suspend = amd_pstate_cpu_suspend, + .resume = amd_pstate_cpu_resume, + .set_boost = amd_pstate_set_boost, + .name = "amd-pstate", + .attr = amd_pstate_attr, +}; + +static int __init amd_pstate_init(void) +{ + int ret; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return -ENODEV; + + if (!acpi_cpc_valid()) { + pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); + return -ENODEV; + } + + /* don't keep reloading if cpufreq_driver exists */ + if (cpufreq_get_current_driver()) + return -EEXIST; + + /* capability check */ + if (boot_cpu_has(X86_FEATURE_CPPC)) { + pr_debug("AMD CPPC MSR based functionality is supported\n"); + amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf; + } else if (shared_mem) { + static_call_update(amd_pstate_enable, cppc_enable); + static_call_update(amd_pstate_init_perf, cppc_init_perf); + static_call_update(amd_pstate_update_perf, cppc_update_perf); + } else { + pr_info("This processor supports shared memory solution, you can enable it with amd_pstate.shared_mem=1\n"); + return -ENODEV; + } + + /* enable amd pstate feature */ + ret = amd_pstate_enable(true); + if (ret) { + pr_err("failed to enable amd-pstate with return %d\n", ret); + return ret; + } + + ret = cpufreq_register_driver(&amd_pstate_driver); + if (ret) + pr_err("failed to register amd_pstate_driver with return %d\n", + ret); + + return ret; +} + +static void __exit amd_pstate_exit(void) +{ + cpufreq_unregister_driver(&amd_pstate_driver); + + amd_pstate_enable(false); +} + +module_init(amd_pstate_init); +module_exit(amd_pstate_exit); + +MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>"); +MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/bmips-cpufreq.c b/drivers/cpufreq/bmips-cpufreq.c index f7c23fa468f0..39221a9a187a 100644 --- a/drivers/cpufreq/bmips-cpufreq.c +++ b/drivers/cpufreq/bmips-cpufreq.c @@ -156,7 +156,7 @@ static struct cpufreq_driver bmips_cpufreq_driver = { .name = BMIPS_CPUFREQ_PREFIX, }; -static int __init bmips_cpufreq_probe(void) +static int __init bmips_cpufreq_driver_init(void) { struct cpufreq_compat *cc; struct device_node *np; @@ -176,7 +176,13 @@ static int __init bmips_cpufreq_probe(void) return cpufreq_register_driver(&bmips_cpufreq_driver); } -device_initcall(bmips_cpufreq_probe); +module_init(bmips_cpufreq_driver_init); + +static void __exit bmips_cpufreq_driver_exit(void) +{ + cpufreq_unregister_driver(&bmips_cpufreq_driver); +} +module_exit(bmips_cpufreq_driver_exit); MODULE_AUTHOR("Markus Mayer <mmayer@broadcom.com>"); MODULE_DESCRIPTION("CPUfreq driver for Broadcom BMIPS SoCs"); diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index db17196266e4..432dfb4e8027 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -61,7 +61,17 @@ static struct cppc_workaround_oem_info wa_info[] = { } }; +static struct cpufreq_driver cppc_cpufreq_driver; + +static enum { + FIE_UNSET = -1, + FIE_ENABLED, + FIE_DISABLED +} fie_disabled = FIE_UNSET; + #ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE +module_param(fie_disabled, int, 0444); +MODULE_PARM_DESC(fie_disabled, "Disable Frequency Invariance Engine (FIE)"); /* Frequency invariance support */ struct cppc_freq_invariance { @@ -75,7 +85,6 @@ struct cppc_freq_invariance { static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv); static struct kthread_worker *kworker_fie; -static struct cpufreq_driver cppc_cpufreq_driver; static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu); static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, struct cppc_perf_fb_ctrs *fb_ctrs_t0, @@ -157,7 +166,7 @@ static void cppc_cpufreq_cpu_fie_init(struct cpufreq_policy *policy) struct cppc_freq_invariance *cppc_fi; int cpu, ret; - if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + if (fie_disabled) return; for_each_cpu(cpu, policy->cpus) { @@ -198,7 +207,7 @@ static void cppc_cpufreq_cpu_fie_exit(struct cpufreq_policy *policy) struct cppc_freq_invariance *cppc_fi; int cpu; - if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + if (fie_disabled) return; /* policy->cpus will be empty here, use related_cpus instead */ @@ -228,7 +237,15 @@ static void __init cppc_freq_invariance_init(void) }; int ret; - if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + if (fie_disabled != FIE_ENABLED && fie_disabled != FIE_DISABLED) { + fie_disabled = FIE_ENABLED; + if (cppc_perf_ctrs_in_pcc()) { + pr_info("FIE not enabled on systems with registers in PCC\n"); + fie_disabled = FIE_DISABLED; + } + } + + if (fie_disabled) return; kworker_fie = kthread_create_worker(0, "cppc_fie"); @@ -246,7 +263,7 @@ static void __init cppc_freq_invariance_init(void) static void cppc_freq_invariance_exit(void) { - if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + if (fie_disabled) return; kthread_destroy_worker(kworker_fie); @@ -303,52 +320,48 @@ static u64 cppc_get_dmi_max_khz(void) /* * If CPPC lowest_freq and nominal_freq registers are exposed then we can - * use them to convert perf to freq and vice versa - * - * If the perf/freq point lies between Nominal and Lowest, we can treat - * (Low perf, Low freq) and (Nom Perf, Nom freq) as 2D co-ordinates of a line - * and extrapolate the rest - * For perf/freq > Nominal, we use the ratio perf:freq at Nominal for conversion + * use them to convert perf to freq and vice versa. The conversion is + * extrapolated as an affine function passing by the 2 points: + * - (Low perf, Low freq) + * - (Nominal perf, Nominal perf) */ static unsigned int cppc_cpufreq_perf_to_khz(struct cppc_cpudata *cpu_data, unsigned int perf) { struct cppc_perf_caps *caps = &cpu_data->perf_caps; + s64 retval, offset = 0; static u64 max_khz; u64 mul, div; if (caps->lowest_freq && caps->nominal_freq) { - if (perf >= caps->nominal_perf) { - mul = caps->nominal_freq; - div = caps->nominal_perf; - } else { - mul = caps->nominal_freq - caps->lowest_freq; - div = caps->nominal_perf - caps->lowest_perf; - } + mul = caps->nominal_freq - caps->lowest_freq; + div = caps->nominal_perf - caps->lowest_perf; + offset = caps->nominal_freq - div64_u64(caps->nominal_perf * mul, div); } else { if (!max_khz) max_khz = cppc_get_dmi_max_khz(); mul = max_khz; div = caps->highest_perf; } - return (u64)perf * mul / div; + + retval = offset + div64_u64(perf * mul, div); + if (retval >= 0) + return retval; + return 0; } static unsigned int cppc_cpufreq_khz_to_perf(struct cppc_cpudata *cpu_data, unsigned int freq) { struct cppc_perf_caps *caps = &cpu_data->perf_caps; + s64 retval, offset = 0; static u64 max_khz; u64 mul, div; if (caps->lowest_freq && caps->nominal_freq) { - if (freq >= caps->nominal_freq) { - mul = caps->nominal_perf; - div = caps->nominal_freq; - } else { - mul = caps->lowest_perf; - div = caps->lowest_freq; - } + mul = caps->nominal_perf - caps->lowest_perf; + div = caps->nominal_freq - caps->lowest_freq; + offset = caps->nominal_perf - div64_u64(caps->nominal_freq * mul, div); } else { if (!max_khz) max_khz = cppc_get_dmi_max_khz(); @@ -356,7 +369,10 @@ static unsigned int cppc_cpufreq_khz_to_perf(struct cppc_cpudata *cpu_data, div = max_khz; } - return (u64)freq * mul / div; + retval = offset + div64_u64(freq * mul, div); + if (retval >= 0) + return retval; + return 0; } static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, @@ -390,6 +406,27 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, return ret; } +static unsigned int cppc_cpufreq_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct cppc_cpudata *cpu_data = policy->driver_data; + unsigned int cpu = policy->cpu; + u32 desired_perf; + int ret; + + desired_perf = cppc_cpufreq_khz_to_perf(cpu_data, target_freq); + cpu_data->perf_ctrls.desired_perf = desired_perf; + ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); + + if (ret) { + pr_debug("Failed to set target on CPU:%d. ret:%d\n", + cpu, ret); + return 0; + } + + return target_freq; +} + static int cppc_verify_policy(struct cpufreq_policy_data *policy) { cpufreq_verify_within_cpu_limits(policy); @@ -420,15 +457,199 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) } return cppc_get_transition_latency(cpu) / NSEC_PER_USEC; } - #else - static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) { return cppc_get_transition_latency(cpu) / NSEC_PER_USEC; } #endif +#if defined(CONFIG_ARM64) && defined(CONFIG_ENERGY_MODEL) + +static DEFINE_PER_CPU(unsigned int, efficiency_class); +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy); + +/* Create an artificial performance state every CPPC_EM_CAP_STEP capacity unit. */ +#define CPPC_EM_CAP_STEP (20) +/* Increase the cost value by CPPC_EM_COST_STEP every performance state. */ +#define CPPC_EM_COST_STEP (1) +/* Add a cost gap correspnding to the energy of 4 CPUs. */ +#define CPPC_EM_COST_GAP (4 * SCHED_CAPACITY_SCALE * CPPC_EM_COST_STEP \ + / CPPC_EM_CAP_STEP) + +static unsigned int get_perf_level_count(struct cpufreq_policy *policy) +{ + struct cppc_perf_caps *perf_caps; + unsigned int min_cap, max_cap; + struct cppc_cpudata *cpu_data; + int cpu = policy->cpu; + + cpu_data = policy->driver_data; + perf_caps = &cpu_data->perf_caps; + max_cap = arch_scale_cpu_capacity(cpu); + min_cap = div_u64(max_cap * perf_caps->lowest_perf, perf_caps->highest_perf); + if ((min_cap == 0) || (max_cap < min_cap)) + return 0; + return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP; +} + +/* + * The cost is defined as: + * cost = power * max_frequency / frequency + */ +static inline unsigned long compute_cost(int cpu, int step) +{ + return CPPC_EM_COST_GAP * per_cpu(efficiency_class, cpu) + + step * CPPC_EM_COST_STEP; +} + +static int cppc_get_cpu_power(struct device *cpu_dev, + unsigned long *power, unsigned long *KHz) +{ + unsigned long perf_step, perf_prev, perf, perf_check; + unsigned int min_step, max_step, step, step_check; + unsigned long prev_freq = *KHz; + unsigned int min_cap, max_cap; + struct cpufreq_policy *policy; + + struct cppc_perf_caps *perf_caps; + struct cppc_cpudata *cpu_data; + + policy = cpufreq_cpu_get_raw(cpu_dev->id); + cpu_data = policy->driver_data; + perf_caps = &cpu_data->perf_caps; + max_cap = arch_scale_cpu_capacity(cpu_dev->id); + min_cap = div_u64(max_cap * perf_caps->lowest_perf, + perf_caps->highest_perf); + + perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap; + min_step = min_cap / CPPC_EM_CAP_STEP; + max_step = max_cap / CPPC_EM_CAP_STEP; + + perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); + step = perf_prev / perf_step; + + if (step > max_step) + return -EINVAL; + + if (min_step == max_step) { + step = max_step; + perf = perf_caps->highest_perf; + } else if (step < min_step) { + step = min_step; + perf = perf_caps->lowest_perf; + } else { + step++; + if (step == max_step) + perf = perf_caps->highest_perf; + else + perf = step * perf_step; + } + + *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf); + perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); + step_check = perf_check / perf_step; + + /* + * To avoid bad integer approximation, check that new frequency value + * increased and that the new frequency will be converted to the + * desired step value. + */ + while ((*KHz == prev_freq) || (step_check != step)) { + perf++; + *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf); + perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); + step_check = perf_check / perf_step; + } + + /* + * With an artificial EM, only the cost value is used. Still the power + * is populated such as 0 < power < EM_MAX_POWER. This allows to add + * more sense to the artificial performance states. + */ + *power = compute_cost(cpu_dev->id, step); + + return 0; +} + +static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz, + unsigned long *cost) +{ + unsigned long perf_step, perf_prev; + struct cppc_perf_caps *perf_caps; + struct cpufreq_policy *policy; + struct cppc_cpudata *cpu_data; + unsigned int max_cap; + int step; + + policy = cpufreq_cpu_get_raw(cpu_dev->id); + cpu_data = policy->driver_data; + perf_caps = &cpu_data->perf_caps; + max_cap = arch_scale_cpu_capacity(cpu_dev->id); + + perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, KHz); + perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap; + step = perf_prev / perf_step; + + *cost = compute_cost(cpu_dev->id, step); + + return 0; +} + +static int populate_efficiency_class(void) +{ + struct acpi_madt_generic_interrupt *gicc; + DECLARE_BITMAP(used_classes, 256) = {}; + int class, cpu, index; + + for_each_possible_cpu(cpu) { + gicc = acpi_cpu_get_madt_gicc(cpu); + class = gicc->efficiency_class; + bitmap_set(used_classes, class, 1); + } + + if (bitmap_weight(used_classes, 256) <= 1) { + pr_debug("Efficiency classes are all equal (=%d). " + "No EM registered", class); + return -EINVAL; + } + + /* + * Squeeze efficiency class values on [0:#efficiency_class-1]. + * Values are per spec in [0:255]. + */ + index = 0; + for_each_set_bit(class, used_classes, 256) { + for_each_possible_cpu(cpu) { + gicc = acpi_cpu_get_madt_gicc(cpu); + if (gicc->efficiency_class == class) + per_cpu(efficiency_class, cpu) = index; + } + index++; + } + cppc_cpufreq_driver.register_em = cppc_cpufreq_register_em; + + return 0; +} + +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy) +{ + struct cppc_cpudata *cpu_data; + struct em_data_callback em_cb = + EM_ADV_DATA_CB(cppc_get_cpu_power, cppc_get_cpu_cost); + + cpu_data = policy->driver_data; + em_dev_register_perf_domain(get_cpu_device(policy->cpu), + get_perf_level_count(policy), &em_cb, + cpu_data->shared_cpu_map, 0); +} + +#else +static int populate_efficiency_class(void) +{ + return 0; +} +#endif static struct cppc_cpudata *cppc_cpufreq_get_cpu_data(unsigned int cpu) { @@ -537,6 +758,9 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) goto out; } + policy->fast_switch_possible = cppc_allow_fast_switch(); + policy->dvfs_possible_from_any_cpu = true; + /* * If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost * is supported. @@ -682,6 +906,7 @@ static struct cpufreq_driver cppc_cpufreq_driver = { .verify = cppc_verify_policy, .target = cppc_cpufreq_set_target, .get = cppc_cpufreq_get_rate, + .fast_switch = cppc_cpufreq_fast_switch, .init = cppc_cpufreq_cpu_init, .exit = cppc_cpufreq_cpu_exit, .set_boost = cppc_cpufreq_set_boost, @@ -727,6 +952,7 @@ static void cppc_check_hisi_workaround(void) wa_info[i].oem_revision == tbl->oem_revision) { /* Overwrite the get() callback */ cppc_cpufreq_driver.get = hisi_cppc_cpufreq_get_rate; + fie_disabled = FIE_DISABLED; break; } } @@ -738,11 +964,12 @@ static int __init cppc_cpufreq_init(void) { int ret; - if ((acpi_disabled) || !acpi_cpc_valid()) + if (!acpi_cpc_valid()) return -ENODEV; cppc_check_hisi_workaround(); cppc_freq_invariance_init(); + populate_efficiency_class(); ret = cpufreq_register_driver(&cppc_cpufreq_driver); if (ret) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index ca1d103ec449..6ac3800db450 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -110,6 +110,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "fsl,imx7ulp", }, { .compatible = "fsl,imx7d", }, + { .compatible = "fsl,imx7s", }, { .compatible = "fsl,imx8mq", }, { .compatible = "fsl,imx8mm", }, { .compatible = "fsl,imx8mn", }, @@ -126,6 +127,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "mediatek,mt8173", }, { .compatible = "mediatek,mt8176", }, { .compatible = "mediatek,mt8183", }, + { .compatible = "mediatek,mt8186", }, { .compatible = "mediatek,mt8365", }, { .compatible = "mediatek,mt8516", }, @@ -138,10 +140,13 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,msm8996", }, { .compatible = "qcom,qcs404", }, { .compatible = "qcom,sa8155p" }, + { .compatible = "qcom,sa8540p" }, { .compatible = "qcom,sc7180", }, { .compatible = "qcom,sc7280", }, { .compatible = "qcom,sc8180x", }, + { .compatible = "qcom,sc8280xp", }, { .compatible = "qcom,sdm845", }, + { .compatible = "qcom,sm6115", }, { .compatible = "qcom,sm6350", }, { .compatible = "qcom,sm8150", }, { .compatible = "qcom,sm8250", }, diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 8fcaba541539..4aec4b2a5225 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -29,9 +29,9 @@ struct private_data { cpumask_var_t cpus; struct device *cpu_dev; - struct opp_table *opp_table; struct cpufreq_frequency_table *freq_table; bool have_static_opps; + int opp_token; }; static LIST_HEAD(priv_list); @@ -193,7 +193,7 @@ static int dt_cpufreq_early_init(struct device *dev, int cpu) struct private_data *priv; struct device *cpu_dev; bool fallback = false; - const char *reg_name; + const char *reg_name[] = { NULL, NULL }; int ret; /* Check if this CPU is already covered by some other policy */ @@ -218,15 +218,12 @@ static int dt_cpufreq_early_init(struct device *dev, int cpu) * OPP layer will be taking care of regulators now, but it needs to know * the name of the regulator first. */ - reg_name = find_supply_name(cpu_dev); - if (reg_name) { - priv->opp_table = dev_pm_opp_set_regulators(cpu_dev, ®_name, - 1); - if (IS_ERR(priv->opp_table)) { - ret = PTR_ERR(priv->opp_table); - if (ret != -EPROBE_DEFER) - dev_err(cpu_dev, "failed to set regulators: %d\n", - ret); + reg_name[0] = find_supply_name(cpu_dev); + if (reg_name[0]) { + priv->opp_token = dev_pm_opp_set_regulators(cpu_dev, reg_name); + if (priv->opp_token < 0) { + ret = dev_err_probe(cpu_dev, priv->opp_token, + "failed to set regulators\n"); goto free_cpumask; } } @@ -295,7 +292,7 @@ static int dt_cpufreq_early_init(struct device *dev, int cpu) out: if (priv->have_static_opps) dev_pm_opp_of_cpumask_remove_table(priv->cpus); - dev_pm_opp_put_regulators(priv->opp_table); + dev_pm_opp_put_regulators(priv->opp_token); free_cpumask: free_cpumask_var(priv->cpus); return ret; @@ -309,7 +306,7 @@ static void dt_cpufreq_release(void) dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &priv->freq_table); if (priv->have_static_opps) dev_pm_opp_of_cpumask_remove_table(priv->cpus); - dev_pm_opp_put_regulators(priv->opp_table); + dev_pm_opp_put_regulators(priv->opp_token); free_cpumask_var(priv->cpus); list_del(&priv->node); } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 096c3848fa41..69b3d61852ac 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -28,6 +28,7 @@ #include <linux/suspend.h> #include <linux/syscore_ops.h> #include <linux/tick.h> +#include <linux/units.h> #include <trace/events/power.h> static LIST_HEAD(cpufreq_policy_list); @@ -531,7 +532,7 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy, target_freq = clamp_val(target_freq, policy->min, policy->max); - if (!cpufreq_driver->target_index) + if (!policy->freq_table) return target_freq; idx = cpufreq_frequency_table_target(policy, target_freq, relation); @@ -842,12 +843,14 @@ ssize_t cpufreq_show_cpus(const struct cpumask *mask, char *buf) unsigned int cpu; for_each_cpu(cpu, mask) { - if (i) - i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " "); - i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu); + i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u ", cpu); if (i >= (PAGE_SIZE - 5)) break; } + + /* Remove the extra space at the end */ + i--; + i += sprintf(&buf[i], "\n"); return i; } @@ -924,7 +927,7 @@ cpufreq_freq_attr_rw(scaling_max_freq); cpufreq_freq_attr_rw(scaling_governor); cpufreq_freq_attr_rw(scaling_setspeed); -static struct attribute *default_attrs[] = { +static struct attribute *cpufreq_attrs[] = { &cpuinfo_min_freq.attr, &cpuinfo_max_freq.attr, &cpuinfo_transition_latency.attr, @@ -938,6 +941,7 @@ static struct attribute *default_attrs[] = { &scaling_setspeed.attr, NULL }; +ATTRIBUTE_GROUPS(cpufreq); #define to_policy(k) container_of(k, struct cpufreq_policy, kobj) #define to_attr(a) container_of(a, struct freq_attr, attr) @@ -946,13 +950,14 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) { struct cpufreq_policy *policy = to_policy(kobj); struct freq_attr *fattr = to_attr(attr); - ssize_t ret; + ssize_t ret = -EBUSY; if (!fattr->show) return -EIO; down_read(&policy->rwsem); - ret = fattr->show(policy, buf); + if (likely(!policy_is_inactive(policy))) + ret = fattr->show(policy, buf); up_read(&policy->rwsem); return ret; @@ -963,25 +968,15 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, { struct cpufreq_policy *policy = to_policy(kobj); struct freq_attr *fattr = to_attr(attr); - ssize_t ret = -EINVAL; + ssize_t ret = -EBUSY; if (!fattr->store) return -EIO; - /* - * cpus_read_trylock() is used here to work around a circular lock - * dependency problem with respect to the cpufreq_register_driver(). - */ - if (!cpus_read_trylock()) - return -EBUSY; - - if (cpu_online(policy->cpu)) { - down_write(&policy->rwsem); + down_write(&policy->rwsem); + if (likely(!policy_is_inactive(policy))) ret = fattr->store(policy, buf, count); - up_write(&policy->rwsem); - } - - cpus_read_unlock(); + up_write(&policy->rwsem); return ret; } @@ -1000,7 +995,7 @@ static const struct sysfs_ops sysfs_ops = { static struct kobj_type ktype_cpufreq = { .sysfs_ops = &sysfs_ops, - .default_attrs = default_attrs, + .default_groups = cpufreq_groups, .release = cpufreq_sysfs_release, }; @@ -1018,11 +1013,12 @@ static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu, dev_err(dev, "cpufreq symlink creation failed\n"); } -static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, +static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu, struct device *dev) { dev_dbg(dev, "%s: Removing symlink\n", __func__); sysfs_remove_link(&dev->kobj, "cpufreq"); + cpumask_clear_cpu(cpu, policy->real_cpus); } static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) @@ -1277,6 +1273,13 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) unsigned long flags; int cpu; + /* + * The callers must ensure the policy is inactive by now, to avoid any + * races with show()/store() callbacks. + */ + if (unlikely(!policy_is_inactive(policy))) + pr_warn("%s: Freeing active policy\n", __func__); + /* Remove policy from list */ write_lock_irqsave(&cpufreq_driver_lock, flags); list_del(&policy->policy_list); @@ -1336,24 +1339,24 @@ static int cpufreq_online(unsigned int cpu) down_write(&policy->rwsem); policy->cpu = cpu; policy->governor = NULL; - up_write(&policy->rwsem); } else { new_policy = true; policy = cpufreq_policy_alloc(cpu); if (!policy) return -ENOMEM; + down_write(&policy->rwsem); } if (!new_policy && cpufreq_driver->online) { + /* Recover policy->cpus using related_cpus */ + cpumask_copy(policy->cpus, policy->related_cpus); + ret = cpufreq_driver->online(policy); if (ret) { pr_debug("%s: %d: initialization failed\n", __func__, __LINE__); goto out_exit_policy; } - - /* Recover policy->cpus using related_cpus */ - cpumask_copy(policy->cpus, policy->related_cpus); } else { cpumask_copy(policy->cpus, cpumask_of(cpu)); @@ -1381,7 +1384,6 @@ static int cpufreq_online(unsigned int cpu) cpumask_copy(policy->related_cpus, policy->cpus); } - down_write(&policy->rwsem); /* * affected cpus must always be the one, which are online. We aren't * managing offline cpus here. @@ -1403,7 +1405,7 @@ static int cpufreq_online(unsigned int cpu) ret = freq_qos_add_request(&policy->constraints, policy->min_freq_req, FREQ_QOS_MIN, - policy->min); + FREQ_QOS_MIN_DEFAULT_VALUE); if (ret < 0) { /* * So we don't call freq_qos_remove_request() for an @@ -1423,7 +1425,7 @@ static int cpufreq_online(unsigned int cpu) ret = freq_qos_add_request(&policy->constraints, policy->max_freq_req, FREQ_QOS_MAX, - policy->max); + FREQ_QOS_MAX_DEFAULT_VALUE); if (ret < 0) { policy->max_freq_req = NULL; goto out_destroy_policy; @@ -1517,6 +1519,10 @@ static int cpufreq_online(unsigned int cpu) kobject_uevent(&policy->kobj, KOBJ_ADD); + /* Callback for handling stuff after policy is ready */ + if (cpufreq_driver->ready) + cpufreq_driver->ready(policy); + if (cpufreq_thermal_control_enabled(cpufreq_driver)) policy->cdev = of_cpufreq_cooling_register(policy); @@ -1526,9 +1532,7 @@ static int cpufreq_online(unsigned int cpu) out_destroy_policy: for_each_cpu(j, policy->real_cpus) - remove_cpu_dev_symlink(policy, get_cpu_device(j)); - - up_write(&policy->rwsem); + remove_cpu_dev_symlink(policy, j, get_cpu_device(j)); out_offline_policy: if (cpufreq_driver->offline) @@ -1539,6 +1543,9 @@ out_exit_policy: cpufreq_driver->exit(policy); out_free_policy: + cpumask_clear(policy->cpus); + up_write(&policy->rwsem); + cpufreq_policy_free(policy); return ret; } @@ -1570,47 +1577,36 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) return 0; } -static int cpufreq_offline(unsigned int cpu) +static void __cpufreq_offline(unsigned int cpu, struct cpufreq_policy *policy) { - struct cpufreq_policy *policy; int ret; - pr_debug("%s: unregistering CPU %u\n", __func__, cpu); - - policy = cpufreq_cpu_get_raw(cpu); - if (!policy) { - pr_debug("%s: No cpu_data found\n", __func__); - return 0; - } - - down_write(&policy->rwsem); if (has_target()) cpufreq_stop_governor(policy); cpumask_clear_cpu(cpu, policy->cpus); - if (policy_is_inactive(policy)) { - if (has_target()) - strncpy(policy->last_governor, policy->governor->name, - CPUFREQ_NAME_LEN); - else - policy->last_policy = policy->policy; - } else if (cpu == policy->cpu) { - /* Nominate new CPU */ - policy->cpu = cpumask_any(policy->cpus); - } - - /* Start governor again for active policy */ if (!policy_is_inactive(policy)) { + /* Nominate a new CPU if necessary. */ + if (cpu == policy->cpu) + policy->cpu = cpumask_any(policy->cpus); + + /* Start the governor again for the active policy. */ if (has_target()) { ret = cpufreq_start_governor(policy); if (ret) pr_err("%s: Failed to start governor\n", __func__); } - goto unlock; + return; } + if (has_target()) + strncpy(policy->last_governor, policy->governor->name, + CPUFREQ_NAME_LEN); + else + policy->last_policy = policy->policy; + if (cpufreq_thermal_control_enabled(cpufreq_driver)) { cpufreq_cooling_unregister(policy->cdev); policy->cdev = NULL; @@ -1629,8 +1625,24 @@ static int cpufreq_offline(unsigned int cpu) cpufreq_driver->exit(policy); policy->freq_table = NULL; } +} + +static int cpufreq_offline(unsigned int cpu) +{ + struct cpufreq_policy *policy; + + pr_debug("%s: unregistering CPU %u\n", __func__, cpu); + + policy = cpufreq_cpu_get_raw(cpu); + if (!policy) { + pr_debug("%s: No cpu_data found\n", __func__); + return 0; + } + + down_write(&policy->rwsem); + + __cpufreq_offline(cpu, policy); -unlock: up_write(&policy->rwsem); return 0; } @@ -1648,19 +1660,25 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) if (!policy) return; - if (cpu_online(cpu)) - cpufreq_offline(cpu); + down_write(&policy->rwsem); - cpumask_clear_cpu(cpu, policy->real_cpus); - remove_cpu_dev_symlink(policy, dev); + if (cpu_online(cpu)) + __cpufreq_offline(cpu, policy); - if (cpumask_empty(policy->real_cpus)) { - /* We did light-weight exit earlier, do full tear down now */ - if (cpufreq_driver->offline) - cpufreq_driver->exit(policy); + remove_cpu_dev_symlink(policy, cpu, dev); - cpufreq_policy_free(policy); + if (!cpumask_empty(policy->real_cpus)) { + up_write(&policy->rwsem); + return; } + + /* We did light-weight exit earlier, do full tear down now */ + if (cpufreq_driver->offline) + cpufreq_driver->exit(policy); + + up_write(&policy->rwsem); + + cpufreq_policy_free(policy); } /** @@ -1702,6 +1720,16 @@ static unsigned int cpufreq_verify_current_freq(struct cpufreq_policy *policy, b return new_freq; if (policy->cur != new_freq) { + /* + * For some platforms, the frequency returned by hardware may be + * slightly different from what is provided in the frequency + * table, for example hardware may return 499 MHz instead of 500 + * MHz. In such cases it is better to avoid getting into + * unnecessary frequency updates. + */ + if (abs(policy->cur - new_freq) < HZ_PER_MHZ) + return policy->cur; + cpufreq_out_of_sync(policy, new_freq); if (update) schedule_work(&policy->update); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 0879ec3c170c..b6bd0ff35323 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -146,7 +146,7 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy) /************************** sysfs interface ************************/ -static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set, +static ssize_t sampling_down_factor_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -161,7 +161,7 @@ static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set, return count; } -static ssize_t store_up_threshold(struct gov_attr_set *attr_set, +static ssize_t up_threshold_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -177,7 +177,7 @@ static ssize_t store_up_threshold(struct gov_attr_set *attr_set, return count; } -static ssize_t store_down_threshold(struct gov_attr_set *attr_set, +static ssize_t down_threshold_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -195,7 +195,7 @@ static ssize_t store_down_threshold(struct gov_attr_set *attr_set, return count; } -static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set, +static ssize_t ignore_nice_load_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -220,7 +220,7 @@ static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set, return count; } -static ssize_t store_freq_step(struct gov_attr_set *attr_set, const char *buf, +static ssize_t freq_step_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -257,7 +257,7 @@ gov_attr_rw(ignore_nice_load); gov_attr_rw(down_threshold); gov_attr_rw(freq_step); -static struct attribute *cs_attributes[] = { +static struct attribute *cs_attrs[] = { &sampling_rate.attr, &sampling_down_factor.attr, &up_threshold.attr, @@ -266,6 +266,7 @@ static struct attribute *cs_attributes[] = { &freq_step.attr, NULL }; +ATTRIBUTE_GROUPS(cs); /************************** sysfs end ************************/ @@ -315,7 +316,7 @@ static void cs_start(struct cpufreq_policy *policy) static struct dbs_governor cs_governor = { .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"), - .kobj_type = { .default_attrs = cs_attributes }, + .kobj_type = { .default_groups = cs_groups }, .gov_dbs_update = cs_dbs_update, .alloc = cs_alloc, .free = cs_free, diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 63f7c219062b..85da677c43d6 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -27,7 +27,7 @@ static DEFINE_MUTEX(gov_dbs_data_mutex); /* Common sysfs tunables */ /* - * store_sampling_rate - update sampling rate effective immediately if needed. + * sampling_rate_store - update sampling rate effective immediately if needed. * * If new rate is smaller than the old, simply updating * dbs.sampling_rate might not be appropriate. For example, if the @@ -41,7 +41,7 @@ static DEFINE_MUTEX(gov_dbs_data_mutex); * This must be called with dbs_data->mutex held, otherwise traversing * policy_dbs_list isn't safe. */ -ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf, +ssize_t sampling_rate_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -80,7 +80,7 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf, return count; } -EXPORT_SYMBOL_GPL(store_sampling_rate); +EXPORT_SYMBOL_GPL(sampling_rate_store); /** * gov_update_cpu_data - Update CPU load data. @@ -388,6 +388,15 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs, gov->free(policy_dbs); } +static void cpufreq_dbs_data_release(struct kobject *kobj) +{ + struct dbs_data *dbs_data = to_dbs_data(to_gov_attr_set(kobj)); + struct dbs_governor *gov = dbs_data->gov; + + gov->exit(dbs_data); + kfree(dbs_data); +} + int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); @@ -425,6 +434,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) goto free_policy_dbs_info; } + dbs_data->gov = gov; gov_attr_set_init(&dbs_data->attr_set, &policy_dbs->list); ret = gov->init(dbs_data); @@ -447,6 +457,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) policy->governor_data = policy_dbs; gov->kobj_type.sysfs_ops = &governor_sysfs_ops; + gov->kobj_type.release = cpufreq_dbs_data_release; ret = kobject_init_and_add(&dbs_data->attr_set.kobj, &gov->kobj_type, get_governor_parent_kobj(policy), "%s", gov->gov.name); @@ -488,13 +499,8 @@ void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy) policy->governor_data = NULL; - if (!count) { - if (!have_governor_per_policy()) - gov->gdbs_data = NULL; - - gov->exit(dbs_data); - kfree(dbs_data); - } + if (!count && !have_governor_per_policy()) + gov->gdbs_data = NULL; free_policy_dbs_info(policy_dbs, gov); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index bab8e6140377..168c23fd7fca 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -37,6 +37,7 @@ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; /* Governor demand based switching data (per-policy or global). */ struct dbs_data { struct gov_attr_set attr_set; + struct dbs_governor *gov; void *tuners; unsigned int ignore_nice_load; unsigned int sampling_rate; @@ -51,7 +52,7 @@ static inline struct dbs_data *to_dbs_data(struct gov_attr_set *attr_set) } #define gov_show_one(_gov, file_name) \ -static ssize_t show_##file_name \ +static ssize_t file_name##_show \ (struct gov_attr_set *attr_set, char *buf) \ { \ struct dbs_data *dbs_data = to_dbs_data(attr_set); \ @@ -60,7 +61,7 @@ static ssize_t show_##file_name \ } #define gov_show_one_common(file_name) \ -static ssize_t show_##file_name \ +static ssize_t file_name##_show \ (struct gov_attr_set *attr_set, char *buf) \ { \ struct dbs_data *dbs_data = to_dbs_data(attr_set); \ @@ -68,12 +69,10 @@ static ssize_t show_##file_name \ } #define gov_attr_ro(_name) \ -static struct governor_attr _name = \ -__ATTR(_name, 0444, show_##_name, NULL) +static struct governor_attr _name = __ATTR_RO(_name) #define gov_attr_rw(_name) \ -static struct governor_attr _name = \ -__ATTR(_name, 0644, show_##_name, store_##_name) +static struct governor_attr _name = __ATTR_RW(_name) /* Common to all CPUs of a policy */ struct policy_dbs_info { @@ -176,7 +175,7 @@ void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), unsigned int powersave_bias); void od_unregister_powersave_bias_handler(void); -ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf, +ssize_t sampling_rate_store(struct gov_attr_set *attr_set, const char *buf, size_t count); void gov_update_cpu_data(struct dbs_data *dbs_data); #endif /* _CPUFREQ_GOVERNOR_H */ diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c index a6f365b9cc1a..771770ea0ed0 100644 --- a/drivers/cpufreq/cpufreq_governor_attr_set.c +++ b/drivers/cpufreq/cpufreq_governor_attr_set.c @@ -8,11 +8,6 @@ #include "cpufreq_governor.h" -static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj) -{ - return container_of(kobj, struct gov_attr_set, kobj); -} - static inline struct governor_attr *to_gov_attr(struct attribute *attr) { return container_of(attr, struct governor_attr, attr); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 3b8f924771b4..c52d19d67557 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -202,7 +202,7 @@ static unsigned int od_dbs_update(struct cpufreq_policy *policy) /************************** sysfs interface ************************/ static struct dbs_governor od_dbs_gov; -static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf, +static ssize_t io_is_busy_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -220,7 +220,7 @@ static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf, return count; } -static ssize_t store_up_threshold(struct gov_attr_set *attr_set, +static ssize_t up_threshold_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -237,7 +237,7 @@ static ssize_t store_up_threshold(struct gov_attr_set *attr_set, return count; } -static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set, +static ssize_t sampling_down_factor_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -265,7 +265,7 @@ static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set, return count; } -static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set, +static ssize_t ignore_nice_load_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -290,7 +290,7 @@ static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set, return count; } -static ssize_t store_powersave_bias(struct gov_attr_set *attr_set, +static ssize_t powersave_bias_store(struct gov_attr_set *attr_set, const char *buf, size_t count) { struct dbs_data *dbs_data = to_dbs_data(attr_set); @@ -328,7 +328,7 @@ gov_attr_rw(sampling_down_factor); gov_attr_rw(ignore_nice_load); gov_attr_rw(powersave_bias); -static struct attribute *od_attributes[] = { +static struct attribute *od_attrs[] = { &sampling_rate.attr, &up_threshold.attr, &sampling_down_factor.attr, @@ -337,6 +337,7 @@ static struct attribute *od_attributes[] = { &io_is_busy.attr, NULL }; +ATTRIBUTE_GROUPS(od); /************************** sysfs end ************************/ @@ -401,7 +402,7 @@ static struct od_ops od_ops = { static struct dbs_governor od_dbs_gov = { .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("ondemand"), - .kobj_type = { .default_attrs = od_attributes }, + .kobj_type = { .default_groups = od_groups }, .gov_dbs_update = od_dbs_update, .alloc = od_alloc, .free = od_free, @@ -415,10 +416,13 @@ static struct dbs_governor od_dbs_gov = { static void od_set_powersave_bias(unsigned int powersave_bias) { unsigned int cpu; - cpumask_t done; + cpumask_var_t done; + + if (!alloc_cpumask_var(&done, GFP_KERNEL)) + return; default_powersave_bias = powersave_bias; - cpumask_clear(&done); + cpumask_clear(done); cpus_read_lock(); for_each_online_cpu(cpu) { @@ -427,7 +431,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias) struct dbs_data *dbs_data; struct od_dbs_tuners *od_tuners; - if (cpumask_test_cpu(cpu, &done)) + if (cpumask_test_cpu(cpu, done)) continue; policy = cpufreq_cpu_get_raw(cpu); @@ -438,13 +442,15 @@ static void od_set_powersave_bias(unsigned int powersave_bias) if (!policy_dbs) continue; - cpumask_or(&done, &done, policy->cpus); + cpumask_or(done, done, policy->cpus); dbs_data = policy_dbs->dbs_data; od_tuners = dbs_data->tuners; od_tuners->powersave_bias = default_powersave_bias; } cpus_read_unlock(); + + free_cpumask_var(done); } void od_register_powersave_bias_handler(unsigned int (*f) diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c index ac57cddc5f2f..a45864701143 100644 --- a/drivers/cpufreq/highbank-cpufreq.c +++ b/drivers/cpufreq/highbank-cpufreq.c @@ -55,7 +55,7 @@ static struct notifier_block hb_cpufreq_clk_nb = { .notifier_call = hb_cpufreq_clk_notify, }; -static int hb_cpufreq_driver_init(void) +static int __init hb_cpufreq_driver_init(void) { struct platform_device_info devinfo = { .name = "cpufreq-dt", }; struct device *cpu_dev; diff --git a/drivers/cpufreq/imx-cpufreq-dt.c b/drivers/cpufreq/imx-cpufreq-dt.c index 3fe9125156b4..76e553af2071 100644 --- a/drivers/cpufreq/imx-cpufreq-dt.c +++ b/drivers/cpufreq/imx-cpufreq-dt.c @@ -31,8 +31,8 @@ /* cpufreq-dt device registered by imx-cpufreq-dt */ static struct platform_device *cpufreq_dt_pdev; -static struct opp_table *cpufreq_opp_table; static struct device *cpu_dev; +static int cpufreq_opp_token; enum IMX7ULP_CPUFREQ_CLKS { ARM, @@ -153,9 +153,9 @@ static int imx_cpufreq_dt_probe(struct platform_device *pdev) dev_info(&pdev->dev, "cpu speed grade %d mkt segment %d supported-hw %#x %#x\n", speed_grade, mkt_segment, supported_hw[0], supported_hw[1]); - cpufreq_opp_table = dev_pm_opp_set_supported_hw(cpu_dev, supported_hw, 2); - if (IS_ERR(cpufreq_opp_table)) { - ret = PTR_ERR(cpufreq_opp_table); + cpufreq_opp_token = dev_pm_opp_set_supported_hw(cpu_dev, supported_hw, 2); + if (cpufreq_opp_token < 0) { + ret = cpufreq_opp_token; dev_err(&pdev->dev, "Failed to set supported opp: %d\n", ret); return ret; } @@ -163,7 +163,7 @@ static int imx_cpufreq_dt_probe(struct platform_device *pdev) cpufreq_dt_pdev = platform_device_register_data( &pdev->dev, "cpufreq-dt", -1, NULL, 0); if (IS_ERR(cpufreq_dt_pdev)) { - dev_pm_opp_put_supported_hw(cpufreq_opp_table); + dev_pm_opp_put_supported_hw(cpufreq_opp_token); ret = PTR_ERR(cpufreq_dt_pdev); dev_err(&pdev->dev, "Failed to register cpufreq-dt: %d\n", ret); return ret; @@ -176,7 +176,7 @@ static int imx_cpufreq_dt_remove(struct platform_device *pdev) { platform_device_unregister(cpufreq_dt_pdev); if (!of_machine_is_compatible("fsl,imx7ulp")) - dev_pm_opp_put_supported_hw(cpufreq_opp_table); + dev_pm_opp_put_supported_hw(cpufreq_opp_token); else clk_bulk_put(ARRAY_SIZE(imx7ulp_clks), imx7ulp_clks); diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 90beb26ed34e..ad4ce8493144 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -396,9 +396,7 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) ret = imx6q_opp_check_speed_grading(cpu_dev); } if (ret) { - if (ret != -EPROBE_DEFER) - dev_err(cpu_dev, "failed to read ocotp: %d\n", - ret); + dev_err_probe(cpu_dev, ret, "failed to read ocotp\n"); goto out_free_opp; } diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index dec2a5649ac1..6ff73c30769f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -27,6 +27,7 @@ #include <linux/pm_qos.h> #include <trace/events/power.h> +#include <asm/cpu.h> #include <asm/div64.h> #include <asm/msr.h> #include <asm/cpu_device_id.h> @@ -280,10 +281,10 @@ static struct cpudata **all_cpu_data; * structure is used to store those callbacks. */ struct pstate_funcs { - int (*get_max)(void); - int (*get_max_physical)(void); - int (*get_min)(void); - int (*get_turbo)(void); + int (*get_max)(int cpu); + int (*get_max_physical)(int cpu); + int (*get_min)(int cpu); + int (*get_turbo)(int cpu); int (*get_scaling)(void); int (*get_cpu_scaling)(int cpu); int (*get_aperf_mperf_shift)(void); @@ -398,16 +399,6 @@ static int intel_pstate_get_cppc_guaranteed(int cpu) return cppc_perf.nominal_perf; } - -static u32 intel_pstate_cppc_nominal(int cpu) -{ - u64 nominal_perf; - - if (cppc_get_nominal_perf(cpu, &nominal_perf)) - return 0; - - return nominal_perf; -} #else /* CONFIG_ACPI_CPPC_LIB */ static inline void intel_pstate_set_itmt_prio(int cpu) { @@ -531,35 +522,18 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) { int perf_ctl_max_phys = cpu->pstate.max_pstate_physical; int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling; - int perf_ctl_turbo = pstate_funcs.get_turbo(); - int turbo_freq = perf_ctl_turbo * perf_ctl_scaling; + int perf_ctl_turbo = pstate_funcs.get_turbo(cpu->cpu); int scaling = cpu->pstate.scaling; pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys); - pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, pstate_funcs.get_max()); pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo); pr_debug("CPU%d: perf_ctl_scaling = %d\n", cpu->cpu, perf_ctl_scaling); pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate); pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate); pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling); - /* - * If the product of the HWP performance scaling factor and the HWP_CAP - * highest performance is greater than the maximum turbo frequency - * corresponding to the pstate_funcs.get_turbo() return value, the - * scaling factor is too high, so recompute it to make the HWP_CAP - * highest performance correspond to the maximum turbo frequency. - */ - cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * scaling; - if (turbo_freq < cpu->pstate.turbo_freq) { - cpu->pstate.turbo_freq = turbo_freq; - scaling = DIV_ROUND_UP(turbo_freq, cpu->pstate.turbo_pstate); - cpu->pstate.scaling = scaling; - - pr_debug("CPU%d: refined HWP-to-frequency scaling factor: %d\n", - cpu->cpu, scaling); - } - + cpu->pstate.turbo_freq = rounddown(cpu->pstate.turbo_pstate * scaling, + perf_ctl_scaling); cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling, perf_ctl_scaling); @@ -664,19 +638,29 @@ static int intel_pstate_set_epb(int cpu, s16 pref) * 3 balance_power * 4 power */ + +enum energy_perf_value_index { + EPP_INDEX_DEFAULT = 0, + EPP_INDEX_PERFORMANCE, + EPP_INDEX_BALANCE_PERFORMANCE, + EPP_INDEX_BALANCE_POWERSAVE, + EPP_INDEX_POWERSAVE, +}; + static const char * const energy_perf_strings[] = { - "default", - "performance", - "balance_performance", - "balance_power", - "power", + [EPP_INDEX_DEFAULT] = "default", + [EPP_INDEX_PERFORMANCE] = "performance", + [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance", + [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power", + [EPP_INDEX_POWERSAVE] = "power", NULL }; -static const unsigned int epp_values[] = { - HWP_EPP_PERFORMANCE, - HWP_EPP_BALANCE_PERFORMANCE, - HWP_EPP_BALANCE_POWERSAVE, - HWP_EPP_POWERSAVE +static unsigned int epp_values[] = { + [EPP_INDEX_DEFAULT] = 0, /* Unused index */ + [EPP_INDEX_PERFORMANCE] = HWP_EPP_PERFORMANCE, + [EPP_INDEX_BALANCE_PERFORMANCE] = HWP_EPP_BALANCE_PERFORMANCE, + [EPP_INDEX_BALANCE_POWERSAVE] = HWP_EPP_BALANCE_POWERSAVE, + [EPP_INDEX_POWERSAVE] = HWP_EPP_POWERSAVE, }; static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data, int *raw_epp) @@ -690,14 +674,14 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data, int *raw return epp; if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { - if (epp == HWP_EPP_PERFORMANCE) - return 1; - if (epp == HWP_EPP_BALANCE_PERFORMANCE) - return 2; - if (epp == HWP_EPP_BALANCE_POWERSAVE) - return 3; - if (epp == HWP_EPP_POWERSAVE) - return 4; + if (epp == epp_values[EPP_INDEX_PERFORMANCE]) + return EPP_INDEX_PERFORMANCE; + if (epp == epp_values[EPP_INDEX_BALANCE_PERFORMANCE]) + return EPP_INDEX_BALANCE_PERFORMANCE; + if (epp == epp_values[EPP_INDEX_BALANCE_POWERSAVE]) + return EPP_INDEX_BALANCE_POWERSAVE; + if (epp == epp_values[EPP_INDEX_POWERSAVE]) + return EPP_INDEX_POWERSAVE; *raw_epp = epp; return 0; } else if (boot_cpu_has(X86_FEATURE_EPB)) { @@ -757,7 +741,7 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, if (use_raw) epp = raw_epp; else if (epp == -EINVAL) - epp = epp_values[pref_index - 1]; + epp = epp_values[pref_index]; /* * To avoid confusion, refuse to set EPP to any values different @@ -843,7 +827,7 @@ static ssize_t store_energy_performance_preference( * upfront. */ if (!raw) - epp = ret ? epp_values[ret - 1] : cpu->epp_default; + epp = ret ? epp_values[ret] : cpu->epp_default; if (cpu->epp_cached != epp) { int err; @@ -1124,19 +1108,22 @@ static void intel_pstate_update_policies(void) cpufreq_update_policy(cpu); } +static void __intel_pstate_update_max_freq(struct cpudata *cpudata, + struct cpufreq_policy *policy) +{ + policy->cpuinfo.max_freq = global.turbo_disabled_mf ? + cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; + refresh_frequency_limits(policy); +} + static void intel_pstate_update_max_freq(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); - struct cpudata *cpudata; if (!policy) return; - cpudata = all_cpu_data[cpu]; - policy->cpuinfo.max_freq = global.turbo_disabled_mf ? - cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; - - refresh_frequency_limits(policy); + __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); cpufreq_cpu_release(policy); } @@ -1309,6 +1296,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, mutex_unlock(&intel_pstate_limits_lock); intel_pstate_update_policies(); + arch_set_max_freq_ratio(global.no_turbo); mutex_unlock(&intel_pstate_driver_lock); @@ -1584,8 +1572,15 @@ static void intel_pstate_notify_work(struct work_struct *work) { struct cpudata *cpudata = container_of(to_delayed_work(work), struct cpudata, hwp_notify_work); + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu); + + if (policy) { + intel_pstate_get_hwp_cap(cpudata); + __intel_pstate_update_max_freq(cpudata, policy); + + cpufreq_cpu_release(policy); + } - cpufreq_update_policy(cpudata->cpu); wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); } @@ -1672,6 +1667,37 @@ static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) } } +static void intel_pstate_update_epp_defaults(struct cpudata *cpudata) +{ + cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); + + /* + * If this CPU gen doesn't call for change in balance_perf + * EPP return. + */ + if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) + return; + + /* + * If powerup EPP is something other than chipset default 0x80 and + * - is more performance oriented than 0x80 (default balance_perf EPP) + * - But less performance oriented than performance EPP + * then use this as new balance_perf EPP. + */ + if (cpudata->epp_default < HWP_EPP_BALANCE_PERFORMANCE && + cpudata->epp_default > HWP_EPP_PERFORMANCE) { + epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = cpudata->epp_default; + return; + } + + /* + * Use hard coded value per gen to update the balance_perf + * and default EPP. + */ + cpudata->epp_default = epp_values[EPP_INDEX_BALANCE_PERFORMANCE]; + intel_pstate_set_epp(cpudata, cpudata->epp_default); +} + static void intel_pstate_hwp_enable(struct cpudata *cpudata) { /* First disable HWP notification interrupt till we activate again */ @@ -1679,13 +1705,16 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata) wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); - if (cpudata->epp_default == -EINVAL) - cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); intel_pstate_enable_hwp_interrupt(cpudata); + + if (cpudata->epp_default >= 0) + return; + + intel_pstate_update_epp_defaults(cpudata); } -static int atom_get_min_pstate(void) +static int atom_get_min_pstate(int not_used) { u64 value; @@ -1693,7 +1722,7 @@ static int atom_get_min_pstate(void) return (value >> 8) & 0x7F; } -static int atom_get_max_pstate(void) +static int atom_get_max_pstate(int not_used) { u64 value; @@ -1701,7 +1730,7 @@ static int atom_get_max_pstate(void) return (value >> 16) & 0x7F; } -static int atom_get_turbo_pstate(void) +static int atom_get_turbo_pstate(int not_used) { u64 value; @@ -1779,23 +1808,23 @@ static void atom_get_vid(struct cpudata *cpudata) cpudata->vid.turbo = value & 0x7f; } -static int core_get_min_pstate(void) +static int core_get_min_pstate(int cpu) { u64 value; - rdmsrl(MSR_PLATFORM_INFO, value); + rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value); return (value >> 40) & 0xFF; } -static int core_get_max_pstate_physical(void) +static int core_get_max_pstate_physical(int cpu) { u64 value; - rdmsrl(MSR_PLATFORM_INFO, value); + rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value); return (value >> 8) & 0xFF; } -static int core_get_tdp_ratio(u64 plat_info) +static int core_get_tdp_ratio(int cpu, u64 plat_info) { /* Check how many TDP levels present */ if (plat_info & 0x600000000) { @@ -1805,13 +1834,13 @@ static int core_get_tdp_ratio(u64 plat_info) int err; /* Get the TDP level (0, 1, 2) to get ratios */ - err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl); + err = rdmsrl_safe_on_cpu(cpu, MSR_CONFIG_TDP_CONTROL, &tdp_ctrl); if (err) return err; /* TDP MSR are continuous starting at 0x648 */ tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x03); - err = rdmsrl_safe(tdp_msr, &tdp_ratio); + err = rdmsrl_safe_on_cpu(cpu, tdp_msr, &tdp_ratio); if (err) return err; @@ -1828,7 +1857,7 @@ static int core_get_tdp_ratio(u64 plat_info) return -ENXIO; } -static int core_get_max_pstate(void) +static int core_get_max_pstate(int cpu) { u64 tar; u64 plat_info; @@ -1836,10 +1865,10 @@ static int core_get_max_pstate(void) int tdp_ratio; int err; - rdmsrl(MSR_PLATFORM_INFO, plat_info); + rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &plat_info); max_pstate = (plat_info >> 8) & 0xFF; - tdp_ratio = core_get_tdp_ratio(plat_info); + tdp_ratio = core_get_tdp_ratio(cpu, plat_info); if (tdp_ratio <= 0) return max_pstate; @@ -1848,7 +1877,7 @@ static int core_get_max_pstate(void) return tdp_ratio; } - err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar); + err = rdmsrl_safe_on_cpu(cpu, MSR_TURBO_ACTIVATION_RATIO, &tar); if (!err) { int tar_levels; @@ -1863,13 +1892,13 @@ static int core_get_max_pstate(void) return max_pstate; } -static int core_get_turbo_pstate(void) +static int core_get_turbo_pstate(int cpu) { u64 value; int nont, ret; - rdmsrl(MSR_TURBO_RATIO_LIMIT, value); - nont = core_get_max_pstate(); + rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value); + nont = core_get_max_pstate(cpu); ret = (value) & 255; if (ret <= nont) ret = nont; @@ -1897,50 +1926,37 @@ static int knl_get_aperf_mperf_shift(void) return 10; } -static int knl_get_turbo_pstate(void) +static int knl_get_turbo_pstate(int cpu) { u64 value; int nont, ret; - rdmsrl(MSR_TURBO_RATIO_LIMIT, value); - nont = core_get_max_pstate(); + rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value); + nont = core_get_max_pstate(cpu); ret = (((value) >> 8) & 0xFF); if (ret <= nont) ret = nont; return ret; } -#ifdef CONFIG_ACPI_CPPC_LIB -static u32 hybrid_ref_perf; - -static int hybrid_get_cpu_scaling(int cpu) +static void hybrid_get_type(void *data) { - return DIV_ROUND_UP(core_get_scaling() * hybrid_ref_perf, - intel_pstate_cppc_nominal(cpu)); + u8 *cpu_type = data; + + *cpu_type = get_this_hybrid_cpu_type(); } -static void intel_pstate_cppc_set_cpu_scaling(void) +static int hybrid_get_cpu_scaling(int cpu) { - u32 min_nominal_perf = U32_MAX; - int cpu; - - for_each_present_cpu(cpu) { - u32 nominal_perf = intel_pstate_cppc_nominal(cpu); + u8 cpu_type = 0; - if (nominal_perf && nominal_perf < min_nominal_perf) - min_nominal_perf = nominal_perf; - } + smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1); + /* P-cores have a smaller perf level-to-freqency scaling factor. */ + if (cpu_type == 0x40) + return 78741; - if (min_nominal_perf < U32_MAX) { - hybrid_ref_perf = min_nominal_perf; - pstate_funcs.get_cpu_scaling = hybrid_get_cpu_scaling; - } + return core_get_scaling(); } -#else -static inline void intel_pstate_cppc_set_cpu_scaling(void) -{ -} -#endif /* CONFIG_ACPI_CPPC_LIB */ static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) { @@ -1970,10 +1986,10 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu) static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { - int perf_ctl_max_phys = pstate_funcs.get_max_physical(); + int perf_ctl_max_phys = pstate_funcs.get_max_physical(cpu->cpu); int perf_ctl_scaling = pstate_funcs.get_scaling(); - cpu->pstate.min_pstate = pstate_funcs.get_min(); + cpu->pstate.min_pstate = pstate_funcs.get_min(cpu->cpu); cpu->pstate.max_pstate_physical = perf_ctl_max_phys; cpu->pstate.perf_ctl_scaling = perf_ctl_scaling; @@ -1989,8 +2005,8 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) } } else { cpu->pstate.scaling = perf_ctl_scaling; - cpu->pstate.max_pstate = pstate_funcs.get_max(); - cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); + cpu->pstate.max_pstate = pstate_funcs.get_max(cpu->cpu); + cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(cpu->cpu); } if (cpu->pstate.scaling == perf_ctl_scaling) { @@ -2361,6 +2377,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { X86_MATCH(SKYLAKE_X, core_funcs), X86_MATCH(COMETLAKE, core_funcs), X86_MATCH(ICELAKE_X, core_funcs), + X86_MATCH(TIGERLAKE, core_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); @@ -2370,6 +2387,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { X86_MATCH(BROADWELL_X, core_funcs), X86_MATCH(SKYLAKE_X, core_funcs), X86_MATCH(ICELAKE_X, core_funcs), + X86_MATCH(SAPPHIRERAPIDS_X, core_funcs), {} }; @@ -2486,18 +2504,14 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, * HWP needs some special consideration, because HWP_REQUEST uses * abstract values to represent performance rather than pure ratios. */ - if (hwp_active) { - intel_pstate_get_hwp_cap(cpu); + if (hwp_active && cpu->pstate.scaling != perf_ctl_scaling) { + int scaling = cpu->pstate.scaling; + int freq; - if (cpu->pstate.scaling != perf_ctl_scaling) { - int scaling = cpu->pstate.scaling; - int freq; - - freq = max_policy_perf * perf_ctl_scaling; - max_policy_perf = DIV_ROUND_UP(freq, scaling); - freq = min_policy_perf * perf_ctl_scaling; - min_policy_perf = DIV_ROUND_UP(freq, scaling); - } + freq = max_policy_perf * perf_ctl_scaling; + max_policy_perf = DIV_ROUND_UP(freq, scaling); + freq = min_policy_perf * perf_ctl_scaling; + min_policy_perf = DIV_ROUND_UP(freq, scaling); } pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n", @@ -3168,9 +3182,9 @@ static unsigned int force_load __initdata; static int __init intel_pstate_msrs_not_valid(void) { - if (!pstate_funcs.get_max() || - !pstate_funcs.get_min() || - !pstate_funcs.get_turbo()) + if (!pstate_funcs.get_max(0) || + !pstate_funcs.get_min(0) || + !pstate_funcs.get_turbo(0)) return -ENODEV; return 0; @@ -3349,6 +3363,16 @@ static bool intel_pstate_hwp_is_enabled(void) return !!(value & 0x1); } +static const struct x86_cpu_id intel_epp_balance_perf[] = { + /* + * Set EPP value as 102, this is the max suggested EPP + * which can result in one core turbo frequency for + * AlderLake Mobile CPUs. + */ + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 102), + {} +}; + static int __init intel_pstate_init(void) { static struct cpudata **_all_cpu_data; @@ -3387,7 +3411,7 @@ static int __init intel_pstate_init(void) default_driver = &intel_pstate; if (boot_cpu_has(X86_FEATURE_HYBRID_CPU)) - intel_pstate_cppc_set_cpu_scaling(); + pstate_funcs.get_cpu_scaling = hybrid_get_cpu_scaling; goto hwp_cpu_matched; } @@ -3438,6 +3462,13 @@ hwp_cpu_matched: intel_pstate_sysfs_expose_params(); + if (hwp_active) { + const struct x86_cpu_id *id = x86_match_cpu(intel_epp_balance_perf); + + if (id) + epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = id->driver_data; + } + mutex_lock(&intel_pstate_driver_lock); rc = intel_pstate_register_driver(default_driver); mutex_unlock(&intel_pstate_driver_lock); diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index c538a153ee82..3e000e1a75c6 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -668,9 +668,9 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle, u32 nesting_level, void *context, void **return_value) { - struct acpi_device *d; + struct acpi_device *d = acpi_fetch_acpi_dev(obj_handle); - if (acpi_bus_get_device(obj_handle, &d)) + if (!d) return 0; *return_value = acpi_driver_data(d); diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index 8ddbd0c5ce37..f0e0a35c7f21 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -36,6 +36,8 @@ enum { struct mtk_cpufreq_data { struct cpufreq_frequency_table *table; void __iomem *reg_bases[REG_ARRAY_SIZE]; + struct resource *res; + void __iomem *base; int nr_opp; }; @@ -49,8 +51,8 @@ static const u16 cpufreq_mtk_offsets[REG_ARRAY_SIZE] = { }; static int __maybe_unused -mtk_cpufreq_get_cpu_power(unsigned long *mW, - unsigned long *KHz, struct device *cpu_dev) +mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *uW, + unsigned long *KHz) { struct mtk_cpufreq_data *data; struct cpufreq_policy *policy; @@ -69,8 +71,9 @@ mtk_cpufreq_get_cpu_power(unsigned long *mW, i--; *KHz = data->table[i].frequency; - *mW = readl_relaxed(data->reg_bases[REG_EM_POWER_TBL] + - i * LUT_ROW_SIZE) / 1000; + /* Provide micro-Watts value to the Energy Model */ + *uW = readl_relaxed(data->reg_bases[REG_EM_POWER_TBL] + + i * LUT_ROW_SIZE); return 0; } @@ -156,6 +159,7 @@ static int mtk_cpu_resources_init(struct platform_device *pdev, { struct mtk_cpufreq_data *data; struct device *dev = &pdev->dev; + struct resource *res; void __iomem *base; int ret, i; int index; @@ -170,9 +174,26 @@ static int mtk_cpu_resources_init(struct platform_device *pdev, if (index < 0) return index; - base = devm_platform_ioremap_resource(pdev, index); - if (IS_ERR(base)) - return PTR_ERR(base); + res = platform_get_resource(pdev, IORESOURCE_MEM, index); + if (!res) { + dev_err(dev, "failed to get mem resource %d\n", index); + return -ENODEV; + } + + if (!request_mem_region(res->start, resource_size(res), res->name)) { + dev_err(dev, "failed to request resource %pR\n", res); + return -EBUSY; + } + + base = ioremap(res->start, resource_size(res)); + if (!base) { + dev_err(dev, "failed to map resource %pR\n", res); + ret = -ENOMEM; + goto release_region; + } + + data->base = base; + data->res = res; for (i = REG_FREQ_LUT_TABLE; i < REG_ARRAY_SIZE; i++) data->reg_bases[i] = base + offsets[i]; @@ -187,6 +208,9 @@ static int mtk_cpu_resources_init(struct platform_device *pdev, policy->driver_data = data; return 0; +release_region: + release_mem_region(res->start, resource_size(res)); + return ret; } static int mtk_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) @@ -233,9 +257,13 @@ static int mtk_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) static int mtk_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy) { struct mtk_cpufreq_data *data = policy->driver_data; + struct resource *res = data->res; + void __iomem *base = data->base; /* HW should be in paused state now */ writel_relaxed(0x0, data->reg_bases[REG_FREQ_ENABLE]); + iounmap(base); + release_mem_region(res->start, resource_size(res)); return 0; } diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 866163883b48..7f2680bc9a0f 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -8,18 +8,22 @@ #include <linux/cpu.h> #include <linux/cpufreq.h> #include <linux/cpumask.h> +#include <linux/minmax.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/pm_opp.h> #include <linux/regulator/consumer.h> -#include <linux/slab.h> -#include <linux/thermal.h> -#define MIN_VOLT_SHIFT (100000) -#define MAX_VOLT_SHIFT (200000) -#define MAX_VOLT_LIMIT (1150000) -#define VOLT_TOL (10000) +struct mtk_cpufreq_platform_data { + int min_volt_shift; + int max_volt_shift; + int proc_max_volt; + int sram_min_volt; + int sram_max_volt; + bool ccifreq_supported; +}; /* * The struct mtk_cpu_dvfs_info holds necessary information for doing CPU DVFS @@ -35,6 +39,7 @@ struct mtk_cpu_dvfs_info { struct cpumask cpus; struct device *cpu_dev; + struct device *cci_dev; struct regulator *proc_reg; struct regulator *sram_reg; struct clk *cpu_clk; @@ -42,8 +47,20 @@ struct mtk_cpu_dvfs_info { struct list_head list_head; int intermediate_voltage; bool need_voltage_tracking; + int vproc_on_boot; + int pre_vproc; + /* Avoid race condition for regulators between notify and policy */ + struct mutex reg_lock; + struct notifier_block opp_nb; + unsigned int opp_cpu; + unsigned long current_freq; + const struct mtk_cpufreq_platform_data *soc_data; + int vtrack_max; + bool ccifreq_bound; }; +static struct platform_device *cpufreq_pdev; + static LIST_HEAD(dvfs_info_list); static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_lookup(int cpu) @@ -61,142 +78,123 @@ static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_lookup(int cpu) static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info, int new_vproc) { + const struct mtk_cpufreq_platform_data *soc_data = info->soc_data; struct regulator *proc_reg = info->proc_reg; struct regulator *sram_reg = info->sram_reg; - int old_vproc, old_vsram, new_vsram, vsram, vproc, ret; - - old_vproc = regulator_get_voltage(proc_reg); - if (old_vproc < 0) { - pr_err("%s: invalid Vproc value: %d\n", __func__, old_vproc); - return old_vproc; - } - /* Vsram should not exceed the maximum allowed voltage of SoC. */ - new_vsram = min(new_vproc + MIN_VOLT_SHIFT, MAX_VOLT_LIMIT); - - if (old_vproc < new_vproc) { - /* - * When scaling up voltages, Vsram and Vproc scale up step - * by step. At each step, set Vsram to (Vproc + 200mV) first, - * then set Vproc to (Vsram - 100mV). - * Keep doing it until Vsram and Vproc hit target voltages. - */ - do { - old_vsram = regulator_get_voltage(sram_reg); - if (old_vsram < 0) { - pr_err("%s: invalid Vsram value: %d\n", - __func__, old_vsram); - return old_vsram; - } - old_vproc = regulator_get_voltage(proc_reg); - if (old_vproc < 0) { - pr_err("%s: invalid Vproc value: %d\n", - __func__, old_vproc); - return old_vproc; - } - - vsram = min(new_vsram, old_vproc + MAX_VOLT_SHIFT); + int pre_vproc, pre_vsram, new_vsram, vsram, vproc, ret; + int retry = info->vtrack_max; + + pre_vproc = regulator_get_voltage(proc_reg); + if (pre_vproc < 0) { + dev_err(info->cpu_dev, + "invalid Vproc value: %d\n", pre_vproc); + return pre_vproc; + } - if (vsram + VOLT_TOL >= MAX_VOLT_LIMIT) { - vsram = MAX_VOLT_LIMIT; + pre_vsram = regulator_get_voltage(sram_reg); + if (pre_vsram < 0) { + dev_err(info->cpu_dev, "invalid Vsram value: %d\n", pre_vsram); + return pre_vsram; + } - /* - * If the target Vsram hits the maximum voltage, - * try to set the exact voltage value first. - */ - ret = regulator_set_voltage(sram_reg, vsram, - vsram); - if (ret) - ret = regulator_set_voltage(sram_reg, - vsram - VOLT_TOL, - vsram); + new_vsram = clamp(new_vproc + soc_data->min_volt_shift, + soc_data->sram_min_volt, soc_data->sram_max_volt); - vproc = new_vproc; - } else { - ret = regulator_set_voltage(sram_reg, vsram, - vsram + VOLT_TOL); + do { + if (pre_vproc <= new_vproc) { + vsram = clamp(pre_vproc + soc_data->max_volt_shift, + soc_data->sram_min_volt, new_vsram); + ret = regulator_set_voltage(sram_reg, vsram, + soc_data->sram_max_volt); - vproc = vsram - MIN_VOLT_SHIFT; - } if (ret) return ret; + if (vsram == soc_data->sram_max_volt || + new_vsram == soc_data->sram_min_volt) + vproc = new_vproc; + else + vproc = vsram - soc_data->min_volt_shift; + ret = regulator_set_voltage(proc_reg, vproc, - vproc + VOLT_TOL); + soc_data->proc_max_volt); if (ret) { - regulator_set_voltage(sram_reg, old_vsram, - old_vsram); + regulator_set_voltage(sram_reg, pre_vsram, + soc_data->sram_max_volt); return ret; } - } while (vproc < new_vproc || vsram < new_vsram); - } else if (old_vproc > new_vproc) { - /* - * When scaling down voltages, Vsram and Vproc scale down step - * by step. At each step, set Vproc to (Vsram - 200mV) first, - * then set Vproc to (Vproc + 100mV). - * Keep doing it until Vsram and Vproc hit target voltages. - */ - do { - old_vproc = regulator_get_voltage(proc_reg); - if (old_vproc < 0) { - pr_err("%s: invalid Vproc value: %d\n", - __func__, old_vproc); - return old_vproc; - } - old_vsram = regulator_get_voltage(sram_reg); - if (old_vsram < 0) { - pr_err("%s: invalid Vsram value: %d\n", - __func__, old_vsram); - return old_vsram; - } - - vproc = max(new_vproc, old_vsram - MAX_VOLT_SHIFT); + } else if (pre_vproc > new_vproc) { + vproc = max(new_vproc, + pre_vsram - soc_data->max_volt_shift); ret = regulator_set_voltage(proc_reg, vproc, - vproc + VOLT_TOL); + soc_data->proc_max_volt); if (ret) return ret; if (vproc == new_vproc) vsram = new_vsram; else - vsram = max(new_vsram, vproc + MIN_VOLT_SHIFT); - - if (vsram + VOLT_TOL >= MAX_VOLT_LIMIT) { - vsram = MAX_VOLT_LIMIT; - - /* - * If the target Vsram hits the maximum voltage, - * try to set the exact voltage value first. - */ - ret = regulator_set_voltage(sram_reg, vsram, - vsram); - if (ret) - ret = regulator_set_voltage(sram_reg, - vsram - VOLT_TOL, - vsram); - } else { - ret = regulator_set_voltage(sram_reg, vsram, - vsram + VOLT_TOL); - } + vsram = max(new_vsram, + vproc + soc_data->min_volt_shift); + ret = regulator_set_voltage(sram_reg, vsram, + soc_data->sram_max_volt); if (ret) { - regulator_set_voltage(proc_reg, old_vproc, - old_vproc); + regulator_set_voltage(proc_reg, pre_vproc, + soc_data->proc_max_volt); return ret; } - } while (vproc > new_vproc + VOLT_TOL || - vsram > new_vsram + VOLT_TOL); - } + } + + pre_vproc = vproc; + pre_vsram = vsram; + + if (--retry < 0) { + dev_err(info->cpu_dev, + "over loop count, failed to set voltage\n"); + return -EINVAL; + } + } while (vproc != new_vproc || vsram != new_vsram); return 0; } static int mtk_cpufreq_set_voltage(struct mtk_cpu_dvfs_info *info, int vproc) { + const struct mtk_cpufreq_platform_data *soc_data = info->soc_data; + int ret; + if (info->need_voltage_tracking) - return mtk_cpufreq_voltage_tracking(info, vproc); + ret = mtk_cpufreq_voltage_tracking(info, vproc); else - return regulator_set_voltage(info->proc_reg, vproc, - vproc + VOLT_TOL); + ret = regulator_set_voltage(info->proc_reg, vproc, + soc_data->proc_max_volt); + if (!ret) + info->pre_vproc = vproc; + + return ret; +} + +static bool is_ccifreq_ready(struct mtk_cpu_dvfs_info *info) +{ + struct device_link *sup_link; + + if (info->ccifreq_bound) + return true; + + sup_link = device_link_add(info->cpu_dev, info->cci_dev, + DL_FLAG_AUTOREMOVE_CONSUMER); + if (!sup_link) { + dev_err(info->cpu_dev, "cpu%d: sup_link is NULL\n", info->opp_cpu); + return false; + } + + if (sup_link->supplier->links.status != DL_DEV_DRIVER_BOUND) + return false; + + info->ccifreq_bound = true; + + return true; } static int mtk_cpufreq_set_target(struct cpufreq_policy *policy, @@ -208,219 +206,372 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy, struct mtk_cpu_dvfs_info *info = policy->driver_data; struct device *cpu_dev = info->cpu_dev; struct dev_pm_opp *opp; - long freq_hz, old_freq_hz; - int vproc, old_vproc, inter_vproc, target_vproc, ret; + long freq_hz, pre_freq_hz; + int vproc, pre_vproc, inter_vproc, target_vproc, ret; inter_vproc = info->intermediate_voltage; - old_freq_hz = clk_get_rate(cpu_clk); - old_vproc = regulator_get_voltage(info->proc_reg); - if (old_vproc < 0) { - pr_err("%s: invalid Vproc value: %d\n", __func__, old_vproc); - return old_vproc; + pre_freq_hz = clk_get_rate(cpu_clk); + + mutex_lock(&info->reg_lock); + + if (unlikely(info->pre_vproc <= 0)) + pre_vproc = regulator_get_voltage(info->proc_reg); + else + pre_vproc = info->pre_vproc; + + if (pre_vproc < 0) { + dev_err(cpu_dev, "invalid Vproc value: %d\n", pre_vproc); + ret = pre_vproc; + goto out; } freq_hz = freq_table[index].frequency * 1000; opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_hz); if (IS_ERR(opp)) { - pr_err("cpu%d: failed to find OPP for %ld\n", - policy->cpu, freq_hz); - return PTR_ERR(opp); + dev_err(cpu_dev, "cpu%d: failed to find OPP for %ld\n", + policy->cpu, freq_hz); + ret = PTR_ERR(opp); + goto out; } vproc = dev_pm_opp_get_voltage(opp); dev_pm_opp_put(opp); /* + * If MediaTek cci is supported but is not ready, we will use the value + * of max(target cpu voltage, booting voltage) to prevent high freqeuncy + * low voltage crash. + */ + if (info->soc_data->ccifreq_supported && !is_ccifreq_ready(info)) + vproc = max(vproc, info->vproc_on_boot); + + /* * If the new voltage or the intermediate voltage is higher than the * current voltage, scale up voltage first. */ - target_vproc = (inter_vproc > vproc) ? inter_vproc : vproc; - if (old_vproc < target_vproc) { + target_vproc = max(inter_vproc, vproc); + if (pre_vproc <= target_vproc) { ret = mtk_cpufreq_set_voltage(info, target_vproc); if (ret) { - pr_err("cpu%d: failed to scale up voltage!\n", - policy->cpu); - mtk_cpufreq_set_voltage(info, old_vproc); - return ret; + dev_err(cpu_dev, + "cpu%d: failed to scale up voltage!\n", policy->cpu); + mtk_cpufreq_set_voltage(info, pre_vproc); + goto out; } } /* Reparent the CPU clock to intermediate clock. */ ret = clk_set_parent(cpu_clk, info->inter_clk); if (ret) { - pr_err("cpu%d: failed to re-parent cpu clock!\n", - policy->cpu); - mtk_cpufreq_set_voltage(info, old_vproc); - WARN_ON(1); - return ret; + dev_err(cpu_dev, + "cpu%d: failed to re-parent cpu clock!\n", policy->cpu); + mtk_cpufreq_set_voltage(info, pre_vproc); + goto out; } /* Set the original PLL to target rate. */ ret = clk_set_rate(armpll, freq_hz); if (ret) { - pr_err("cpu%d: failed to scale cpu clock rate!\n", - policy->cpu); + dev_err(cpu_dev, + "cpu%d: failed to scale cpu clock rate!\n", policy->cpu); clk_set_parent(cpu_clk, armpll); - mtk_cpufreq_set_voltage(info, old_vproc); - return ret; + mtk_cpufreq_set_voltage(info, pre_vproc); + goto out; } /* Set parent of CPU clock back to the original PLL. */ ret = clk_set_parent(cpu_clk, armpll); if (ret) { - pr_err("cpu%d: failed to re-parent cpu clock!\n", - policy->cpu); + dev_err(cpu_dev, + "cpu%d: failed to re-parent cpu clock!\n", policy->cpu); mtk_cpufreq_set_voltage(info, inter_vproc); - WARN_ON(1); - return ret; + goto out; } /* * If the new voltage is lower than the intermediate voltage or the * original voltage, scale down to the new voltage. */ - if (vproc < inter_vproc || vproc < old_vproc) { + if (vproc < inter_vproc || vproc < pre_vproc) { ret = mtk_cpufreq_set_voltage(info, vproc); if (ret) { - pr_err("cpu%d: failed to scale down voltage!\n", - policy->cpu); + dev_err(cpu_dev, + "cpu%d: failed to scale down voltage!\n", policy->cpu); clk_set_parent(cpu_clk, info->inter_clk); - clk_set_rate(armpll, old_freq_hz); + clk_set_rate(armpll, pre_freq_hz); clk_set_parent(cpu_clk, armpll); - return ret; + goto out; } } - return 0; + info->current_freq = freq_hz; + +out: + mutex_unlock(&info->reg_lock); + + return ret; } #define DYNAMIC_POWER "dynamic-power-coefficient" +static int mtk_cpufreq_opp_notifier(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct dev_pm_opp *opp = data; + struct dev_pm_opp *new_opp; + struct mtk_cpu_dvfs_info *info; + unsigned long freq, volt; + struct cpufreq_policy *policy; + int ret = 0; + + info = container_of(nb, struct mtk_cpu_dvfs_info, opp_nb); + + if (event == OPP_EVENT_ADJUST_VOLTAGE) { + freq = dev_pm_opp_get_freq(opp); + + mutex_lock(&info->reg_lock); + if (info->current_freq == freq) { + volt = dev_pm_opp_get_voltage(opp); + ret = mtk_cpufreq_set_voltage(info, volt); + if (ret) + dev_err(info->cpu_dev, + "failed to scale voltage: %d\n", ret); + } + mutex_unlock(&info->reg_lock); + } else if (event == OPP_EVENT_DISABLE) { + freq = dev_pm_opp_get_freq(opp); + + /* case of current opp item is disabled */ + if (info->current_freq == freq) { + freq = 1; + new_opp = dev_pm_opp_find_freq_ceil(info->cpu_dev, + &freq); + if (IS_ERR(new_opp)) { + dev_err(info->cpu_dev, + "all opp items are disabled\n"); + ret = PTR_ERR(new_opp); + return notifier_from_errno(ret); + } + + dev_pm_opp_put(new_opp); + policy = cpufreq_cpu_get(info->opp_cpu); + if (policy) { + cpufreq_driver_target(policy, freq / 1000, + CPUFREQ_RELATION_L); + cpufreq_cpu_put(policy); + } + } + } + + return notifier_from_errno(ret); +} + +static struct device *of_get_cci(struct device *cpu_dev) +{ + struct device_node *np; + struct platform_device *pdev; + + np = of_parse_phandle(cpu_dev->of_node, "mediatek,cci", 0); + if (IS_ERR_OR_NULL(np)) + return NULL; + + pdev = of_find_device_by_node(np); + of_node_put(np); + if (IS_ERR_OR_NULL(pdev)) + return NULL; + + return &pdev->dev; +} + static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) { struct device *cpu_dev; - struct regulator *proc_reg = ERR_PTR(-ENODEV); - struct regulator *sram_reg = ERR_PTR(-ENODEV); - struct clk *cpu_clk = ERR_PTR(-ENODEV); - struct clk *inter_clk = ERR_PTR(-ENODEV); struct dev_pm_opp *opp; unsigned long rate; int ret; cpu_dev = get_cpu_device(cpu); if (!cpu_dev) { - pr_err("failed to get cpu%d device\n", cpu); + dev_err(cpu_dev, "failed to get cpu%d device\n", cpu); return -ENODEV; } + info->cpu_dev = cpu_dev; - cpu_clk = clk_get(cpu_dev, "cpu"); - if (IS_ERR(cpu_clk)) { - if (PTR_ERR(cpu_clk) == -EPROBE_DEFER) - pr_warn("cpu clk for cpu%d not ready, retry.\n", cpu); - else - pr_err("failed to get cpu clk for cpu%d\n", cpu); - - ret = PTR_ERR(cpu_clk); - return ret; + info->ccifreq_bound = false; + if (info->soc_data->ccifreq_supported) { + info->cci_dev = of_get_cci(info->cpu_dev); + if (IS_ERR_OR_NULL(info->cci_dev)) { + ret = PTR_ERR(info->cci_dev); + dev_err(cpu_dev, "cpu%d: failed to get cci device\n", cpu); + return -ENODEV; + } } - inter_clk = clk_get(cpu_dev, "intermediate"); - if (IS_ERR(inter_clk)) { - if (PTR_ERR(inter_clk) == -EPROBE_DEFER) - pr_warn("intermediate clk for cpu%d not ready, retry.\n", - cpu); - else - pr_err("failed to get intermediate clk for cpu%d\n", - cpu); + info->cpu_clk = clk_get(cpu_dev, "cpu"); + if (IS_ERR(info->cpu_clk)) { + ret = PTR_ERR(info->cpu_clk); + return dev_err_probe(cpu_dev, ret, + "cpu%d: failed to get cpu clk\n", cpu); + } - ret = PTR_ERR(inter_clk); + info->inter_clk = clk_get(cpu_dev, "intermediate"); + if (IS_ERR(info->inter_clk)) { + ret = PTR_ERR(info->inter_clk); + dev_err_probe(cpu_dev, ret, + "cpu%d: failed to get intermediate clk\n", cpu); goto out_free_resources; } - proc_reg = regulator_get_optional(cpu_dev, "proc"); - if (IS_ERR(proc_reg)) { - if (PTR_ERR(proc_reg) == -EPROBE_DEFER) - pr_warn("proc regulator for cpu%d not ready, retry.\n", - cpu); - else - pr_err("failed to get proc regulator for cpu%d\n", - cpu); + info->proc_reg = regulator_get_optional(cpu_dev, "proc"); + if (IS_ERR(info->proc_reg)) { + ret = PTR_ERR(info->proc_reg); + dev_err_probe(cpu_dev, ret, + "cpu%d: failed to get proc regulator\n", cpu); + goto out_free_resources; + } - ret = PTR_ERR(proc_reg); + ret = regulator_enable(info->proc_reg); + if (ret) { + dev_warn(cpu_dev, "cpu%d: failed to enable vproc\n", cpu); goto out_free_resources; } /* Both presence and absence of sram regulator are valid cases. */ - sram_reg = regulator_get_exclusive(cpu_dev, "sram"); + info->sram_reg = regulator_get_optional(cpu_dev, "sram"); + if (IS_ERR(info->sram_reg)) { + ret = PTR_ERR(info->sram_reg); + if (ret == -EPROBE_DEFER) + goto out_free_resources; + + info->sram_reg = NULL; + } else { + ret = regulator_enable(info->sram_reg); + if (ret) { + dev_warn(cpu_dev, "cpu%d: failed to enable vsram\n", cpu); + goto out_free_resources; + } + } /* Get OPP-sharing information from "operating-points-v2" bindings */ ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, &info->cpus); if (ret) { - pr_err("failed to get OPP-sharing information for cpu%d\n", - cpu); + dev_err(cpu_dev, + "cpu%d: failed to get OPP-sharing information\n", cpu); goto out_free_resources; } ret = dev_pm_opp_of_cpumask_add_table(&info->cpus); if (ret) { - pr_warn("no OPP table for cpu%d\n", cpu); + dev_warn(cpu_dev, "cpu%d: no OPP table\n", cpu); goto out_free_resources; } + ret = clk_prepare_enable(info->cpu_clk); + if (ret) + goto out_free_opp_table; + + ret = clk_prepare_enable(info->inter_clk); + if (ret) + goto out_disable_mux_clock; + + if (info->soc_data->ccifreq_supported) { + info->vproc_on_boot = regulator_get_voltage(info->proc_reg); + if (info->vproc_on_boot < 0) { + ret = info->vproc_on_boot; + dev_err(info->cpu_dev, + "invalid Vproc value: %d\n", info->vproc_on_boot); + goto out_disable_inter_clock; + } + } + /* Search a safe voltage for intermediate frequency. */ - rate = clk_get_rate(inter_clk); + rate = clk_get_rate(info->inter_clk); opp = dev_pm_opp_find_freq_ceil(cpu_dev, &rate); if (IS_ERR(opp)) { - pr_err("failed to get intermediate opp for cpu%d\n", cpu); + dev_err(cpu_dev, "cpu%d: failed to get intermediate opp\n", cpu); ret = PTR_ERR(opp); - goto out_free_opp_table; + goto out_disable_inter_clock; } info->intermediate_voltage = dev_pm_opp_get_voltage(opp); dev_pm_opp_put(opp); - info->cpu_dev = cpu_dev; - info->proc_reg = proc_reg; - info->sram_reg = IS_ERR(sram_reg) ? NULL : sram_reg; - info->cpu_clk = cpu_clk; - info->inter_clk = inter_clk; + mutex_init(&info->reg_lock); + info->current_freq = clk_get_rate(info->cpu_clk); + + info->opp_cpu = cpu; + info->opp_nb.notifier_call = mtk_cpufreq_opp_notifier; + ret = dev_pm_opp_register_notifier(cpu_dev, &info->opp_nb); + if (ret) { + dev_err(cpu_dev, "cpu%d: failed to register opp notifier\n", cpu); + goto out_disable_inter_clock; + } /* * If SRAM regulator is present, software "voltage tracking" is needed * for this CPU power domain. */ - info->need_voltage_tracking = !IS_ERR(sram_reg); + info->need_voltage_tracking = (info->sram_reg != NULL); + + /* + * We assume min voltage is 0 and tracking target voltage using + * min_volt_shift for each iteration. + * The vtrack_max is 3 times of expeted iteration count. + */ + info->vtrack_max = 3 * DIV_ROUND_UP(max(info->soc_data->sram_max_volt, + info->soc_data->proc_max_volt), + info->soc_data->min_volt_shift); return 0; +out_disable_inter_clock: + clk_disable_unprepare(info->inter_clk); + +out_disable_mux_clock: + clk_disable_unprepare(info->cpu_clk); + out_free_opp_table: dev_pm_opp_of_cpumask_remove_table(&info->cpus); out_free_resources: - if (!IS_ERR(proc_reg)) - regulator_put(proc_reg); - if (!IS_ERR(sram_reg)) - regulator_put(sram_reg); - if (!IS_ERR(cpu_clk)) - clk_put(cpu_clk); - if (!IS_ERR(inter_clk)) - clk_put(inter_clk); + if (regulator_is_enabled(info->proc_reg)) + regulator_disable(info->proc_reg); + if (info->sram_reg && regulator_is_enabled(info->sram_reg)) + regulator_disable(info->sram_reg); + + if (!IS_ERR(info->proc_reg)) + regulator_put(info->proc_reg); + if (!IS_ERR(info->sram_reg)) + regulator_put(info->sram_reg); + if (!IS_ERR(info->cpu_clk)) + clk_put(info->cpu_clk); + if (!IS_ERR(info->inter_clk)) + clk_put(info->inter_clk); return ret; } static void mtk_cpu_dvfs_info_release(struct mtk_cpu_dvfs_info *info) { - if (!IS_ERR(info->proc_reg)) + if (!IS_ERR(info->proc_reg)) { + regulator_disable(info->proc_reg); regulator_put(info->proc_reg); - if (!IS_ERR(info->sram_reg)) + } + if (!IS_ERR(info->sram_reg)) { + regulator_disable(info->sram_reg); regulator_put(info->sram_reg); - if (!IS_ERR(info->cpu_clk)) + } + if (!IS_ERR(info->cpu_clk)) { + clk_disable_unprepare(info->cpu_clk); clk_put(info->cpu_clk); - if (!IS_ERR(info->inter_clk)) + } + if (!IS_ERR(info->inter_clk)) { + clk_disable_unprepare(info->inter_clk); clk_put(info->inter_clk); + } dev_pm_opp_of_cpumask_remove_table(&info->cpus); + dev_pm_opp_unregister_notifier(info->cpu_dev, &info->opp_nb); } static int mtk_cpufreq_init(struct cpufreq_policy *policy) @@ -432,14 +583,15 @@ static int mtk_cpufreq_init(struct cpufreq_policy *policy) info = mtk_cpu_dvfs_info_lookup(policy->cpu); if (!info) { pr_err("dvfs info for cpu%d is not initialized.\n", - policy->cpu); + policy->cpu); return -EINVAL; } ret = dev_pm_opp_init_cpufreq_table(info->cpu_dev, &freq_table); if (ret) { - pr_err("failed to init cpufreq table for cpu%d: %d\n", - policy->cpu, ret); + dev_err(info->cpu_dev, + "failed to init cpufreq table for cpu%d: %d\n", + policy->cpu, ret); return ret; } @@ -476,9 +628,17 @@ static struct cpufreq_driver mtk_cpufreq_driver = { static int mtk_cpufreq_probe(struct platform_device *pdev) { + const struct mtk_cpufreq_platform_data *data; struct mtk_cpu_dvfs_info *info, *tmp; int cpu, ret; + data = dev_get_platdata(&pdev->dev); + if (!data) { + dev_err(&pdev->dev, + "failed to get mtk cpufreq platform data\n"); + return -ENODEV; + } + for_each_possible_cpu(cpu) { info = mtk_cpu_dvfs_info_lookup(cpu); if (info) @@ -490,6 +650,7 @@ static int mtk_cpufreq_probe(struct platform_device *pdev) goto release_dvfs_info_list; } + info->soc_data = data; ret = mtk_cpu_dvfs_info_init(info, cpu); if (ret) { dev_err(&pdev->dev, @@ -525,20 +686,47 @@ static struct platform_driver mtk_cpufreq_platdrv = { .probe = mtk_cpufreq_probe, }; +static const struct mtk_cpufreq_platform_data mt2701_platform_data = { + .min_volt_shift = 100000, + .max_volt_shift = 200000, + .proc_max_volt = 1150000, + .sram_min_volt = 0, + .sram_max_volt = 1150000, + .ccifreq_supported = false, +}; + +static const struct mtk_cpufreq_platform_data mt8183_platform_data = { + .min_volt_shift = 100000, + .max_volt_shift = 200000, + .proc_max_volt = 1150000, + .sram_min_volt = 0, + .sram_max_volt = 1150000, + .ccifreq_supported = true, +}; + +static const struct mtk_cpufreq_platform_data mt8186_platform_data = { + .min_volt_shift = 100000, + .max_volt_shift = 250000, + .proc_max_volt = 1118750, + .sram_min_volt = 850000, + .sram_max_volt = 1118750, + .ccifreq_supported = true, +}; + /* List of machines supported by this driver */ static const struct of_device_id mtk_cpufreq_machines[] __initconst = { - { .compatible = "mediatek,mt2701", }, - { .compatible = "mediatek,mt2712", }, - { .compatible = "mediatek,mt7622", }, - { .compatible = "mediatek,mt7623", }, - { .compatible = "mediatek,mt8167", }, - { .compatible = "mediatek,mt817x", }, - { .compatible = "mediatek,mt8173", }, - { .compatible = "mediatek,mt8176", }, - { .compatible = "mediatek,mt8183", }, - { .compatible = "mediatek,mt8365", }, - { .compatible = "mediatek,mt8516", }, - + { .compatible = "mediatek,mt2701", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt2712", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt7622", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt7623", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt8167", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt817x", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt8173", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt8176", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt8183", .data = &mt8183_platform_data }, + { .compatible = "mediatek,mt8186", .data = &mt8186_platform_data }, + { .compatible = "mediatek,mt8365", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt8516", .data = &mt2701_platform_data }, { } }; MODULE_DEVICE_TABLE(of, mtk_cpufreq_machines); @@ -547,7 +735,7 @@ static int __init mtk_cpufreq_driver_init(void) { struct device_node *np; const struct of_device_id *match; - struct platform_device *pdev; + const struct mtk_cpufreq_platform_data *data; int err; np = of_find_node_by_path("/"); @@ -560,6 +748,7 @@ static int __init mtk_cpufreq_driver_init(void) pr_debug("Machine is not compatible with mtk-cpufreq\n"); return -ENODEV; } + data = match->data; err = platform_driver_register(&mtk_cpufreq_platdrv); if (err) @@ -571,16 +760,24 @@ static int __init mtk_cpufreq_driver_init(void) * and the device registration codes are put here to handle defer * probing. */ - pdev = platform_device_register_simple("mtk-cpufreq", -1, NULL, 0); - if (IS_ERR(pdev)) { + cpufreq_pdev = platform_device_register_data(NULL, "mtk-cpufreq", -1, + data, sizeof(*data)); + if (IS_ERR(cpufreq_pdev)) { pr_err("failed to register mtk-cpufreq platform device\n"); platform_driver_unregister(&mtk_cpufreq_platdrv); - return PTR_ERR(pdev); + return PTR_ERR(cpufreq_pdev); } return 0; } -device_initcall(mtk_cpufreq_driver_init); +module_init(mtk_cpufreq_driver_init) + +static void __exit mtk_cpufreq_driver_exit(void) +{ + platform_device_unregister(cpufreq_pdev); + platform_driver_unregister(&mtk_cpufreq_platdrv); +} +module_exit(mtk_cpufreq_driver_exit) MODULE_DESCRIPTION("MediaTek CPUFreq driver"); MODULE_AUTHOR("Pi-Cheng Chen <pi-cheng.chen@linaro.org>"); diff --git a/drivers/cpufreq/mvebu-cpufreq.c b/drivers/cpufreq/mvebu-cpufreq.c index 6d33a639f902..7f3cfe668f30 100644 --- a/drivers/cpufreq/mvebu-cpufreq.c +++ b/drivers/cpufreq/mvebu-cpufreq.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * CPUFreq support for Armada 370/XP platforms. * @@ -6,10 +7,6 @@ * Yehuda Yitschak <yehuday@marvell.com> * Gregory Clement <gregory.clement@free-electrons.com> * Thomas Petazzoni <thomas.petazzoni@free-electrons.com> - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. */ #define pr_fmt(fmt) "mvebu-pmsu: " fmt diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c index 815645170c4d..039a66bbe1be 100644 --- a/drivers/cpufreq/pasemi-cpufreq.c +++ b/drivers/cpufreq/pasemi-cpufreq.c @@ -18,7 +18,6 @@ #include <asm/hw_irq.h> #include <asm/io.h> -#include <asm/prom.h> #include <asm/time.h> #include <asm/smp.h> diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 4f20c6a9108d..4b8ee2014da6 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -24,7 +24,7 @@ #include <linux/device.h> #include <linux/hardirq.h> #include <linux/of_device.h> -#include <asm/prom.h> + #include <asm/machdep.h> #include <asm/irq.h> #include <asm/pmac_feature.h> @@ -470,6 +470,10 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) if (slew_done_gpio_np) slew_done_gpio = read_gpio(slew_done_gpio_np); + of_node_put(volt_gpio_np); + of_node_put(freq_gpio_np); + of_node_put(slew_done_gpio_np); + /* If we use the frequency GPIOs, calculate the min/max speeds based * on the bus frequencies */ diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c index d7542a106e6b..ba9c31d98bd6 100644 --- a/drivers/cpufreq/pmac64-cpufreq.c +++ b/drivers/cpufreq/pmac64-cpufreq.c @@ -22,7 +22,7 @@ #include <linux/completion.h> #include <linux/mutex.h> #include <linux/of_device.h> -#include <asm/prom.h> + #include <asm/machdep.h> #include <asm/irq.h> #include <asm/sections.h> diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 12ab4014af71..d289036beff2 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -1172,14 +1172,14 @@ static int powernowk8_init(void) unsigned int i, supported_cpus = 0; int ret; + if (!x86_match_cpu(powernow_k8_ids)) + return -ENODEV; + if (boot_cpu_has(X86_FEATURE_HW_PSTATE)) { __request_acpi_cpufreq(); return -ENODEV; } - if (!x86_match_cpu(powernow_k8_ids)) - return -ENODEV; - cpus_read_lock(); for_each_online_cpu(i) { smp_call_function_single(i, check_supported_cpu, &ret, 1); diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c index c58abb4cca3a..e3313ce63b38 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq.c @@ -12,7 +12,6 @@ #include <linux/of_platform.h> #include <asm/machdep.h> -#include <asm/prom.h> #include <asm/cell-regs.h> #include "ppc_cbe_cpufreq.h" diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c index 037fe23bc6ed..4fba3637b115 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c @@ -13,9 +13,9 @@ #include <linux/init.h> #include <linux/of_platform.h> #include <linux/pm_qos.h> +#include <linux/slab.h> #include <asm/processor.h> -#include <asm/prom.h> #include <asm/pmi.h> #include <asm/cell-regs.h> diff --git a/drivers/cpufreq/pxa2xx-cpufreq.c b/drivers/cpufreq/pxa2xx-cpufreq.c index f0b6f52eb2c3..ed1ae061a687 100644 --- a/drivers/cpufreq/pxa2xx-cpufreq.c +++ b/drivers/cpufreq/pxa2xx-cpufreq.c @@ -24,11 +24,9 @@ #include <linux/cpufreq.h> #include <linux/err.h> #include <linux/regulator/consumer.h> +#include <linux/soc/pxa/cpu.h> #include <linux/io.h> -#include <mach/pxa2xx-regs.h> -#include <mach/smemc.h> - #ifdef DEBUG static unsigned int freq_debug; module_param(freq_debug, uint, 0); @@ -106,8 +104,6 @@ static struct pxa_freqs pxa27x_freqs[] = { static struct cpufreq_frequency_table pxa27x_freq_table[NUM_PXA27x_FREQS+1]; -extern unsigned get_clk_frequency_khz(int info); - #ifdef CONFIG_REGULATOR static int pxa_cpufreq_change_voltage(const struct pxa_freqs *pxa_freq) diff --git a/drivers/cpufreq/pxa3xx-cpufreq.c b/drivers/cpufreq/pxa3xx-cpufreq.c index 32f993c94675..4afa48d172db 100644 --- a/drivers/cpufreq/pxa3xx-cpufreq.c +++ b/drivers/cpufreq/pxa3xx-cpufreq.c @@ -8,12 +8,11 @@ #include <linux/sched.h> #include <linux/init.h> #include <linux/cpufreq.h> +#include <linux/soc/pxa/cpu.h> +#include <linux/clk/pxa.h> #include <linux/slab.h> #include <linux/io.h> -#include <mach/generic.h> -#include <mach/pxa3xx-regs.h> - #define HSS_104M (0) #define HSS_156M (1) #define HSS_208M (2) @@ -34,6 +33,28 @@ #define DMCFS_26M (0) #define DMCFS_260M (3) +#define ACCR_XPDIS (1 << 31) /* Core PLL Output Disable */ +#define ACCR_SPDIS (1 << 30) /* System PLL Output Disable */ +#define ACCR_D0CS (1 << 26) /* D0 Mode Clock Select */ +#define ACCR_PCCE (1 << 11) /* Power Mode Change Clock Enable */ +#define ACCR_DDR_D0CS (1 << 7) /* DDR SDRAM clock frequency in D0CS (PXA31x only) */ + +#define ACCR_SMCFS_MASK (0x7 << 23) /* Static Memory Controller Frequency Select */ +#define ACCR_SFLFS_MASK (0x3 << 18) /* Frequency Select for Internal Memory Controller */ +#define ACCR_XSPCLK_MASK (0x3 << 16) /* Core Frequency during Frequency Change */ +#define ACCR_HSS_MASK (0x3 << 14) /* System Bus-Clock Frequency Select */ +#define ACCR_DMCFS_MASK (0x3 << 12) /* Dynamic Memory Controller Clock Frequency Select */ +#define ACCR_XN_MASK (0x7 << 8) /* Core PLL Turbo-Mode-to-Run-Mode Ratio */ +#define ACCR_XL_MASK (0x1f) /* Core PLL Run-Mode-to-Oscillator Ratio */ + +#define ACCR_SMCFS(x) (((x) & 0x7) << 23) +#define ACCR_SFLFS(x) (((x) & 0x3) << 18) +#define ACCR_XSPCLK(x) (((x) & 0x3) << 16) +#define ACCR_HSS(x) (((x) & 0x3) << 14) +#define ACCR_DMCFS(x) (((x) & 0x3) << 12) +#define ACCR_XN(x) (((x) & 0x7) << 8) +#define ACCR_XL(x) ((x) & 0x1f) + struct pxa3xx_freq_info { unsigned int cpufreq_mhz; unsigned int core_xl : 5; @@ -111,41 +132,29 @@ static int setup_freqs_table(struct cpufreq_policy *policy, static void __update_core_freq(struct pxa3xx_freq_info *info) { - uint32_t mask = ACCR_XN_MASK | ACCR_XL_MASK; - uint32_t accr = ACCR; - uint32_t xclkcfg; - - accr &= ~(ACCR_XN_MASK | ACCR_XL_MASK | ACCR_XSPCLK_MASK); - accr |= ACCR_XN(info->core_xn) | ACCR_XL(info->core_xl); + u32 mask, disable, enable, xclkcfg; + mask = ACCR_XN_MASK | ACCR_XL_MASK; + disable = mask | ACCR_XSPCLK_MASK; + enable = ACCR_XN(info->core_xn) | ACCR_XL(info->core_xl); /* No clock until core PLL is re-locked */ - accr |= ACCR_XSPCLK(XSPCLK_NONE); - + enable |= ACCR_XSPCLK(XSPCLK_NONE); xclkcfg = (info->core_xn == 2) ? 0x3 : 0x2; /* turbo bit */ - ACCR = accr; - __asm__("mcr p14, 0, %0, c6, c0, 0\n" : : "r"(xclkcfg)); - - while ((ACSR & mask) != (accr & mask)) - cpu_relax(); + pxa3xx_clk_update_accr(disable, enable, xclkcfg, mask); } static void __update_bus_freq(struct pxa3xx_freq_info *info) { - uint32_t mask; - uint32_t accr = ACCR; - - mask = ACCR_SMCFS_MASK | ACCR_SFLFS_MASK | ACCR_HSS_MASK | - ACCR_DMCFS_MASK; - - accr &= ~mask; - accr |= ACCR_SMCFS(info->smcfs) | ACCR_SFLFS(info->sflfs) | - ACCR_HSS(info->hss) | ACCR_DMCFS(info->dmcfs); + u32 mask, disable, enable; - ACCR = accr; + mask = ACCR_SMCFS_MASK | ACCR_SFLFS_MASK | ACCR_HSS_MASK | + ACCR_DMCFS_MASK; + disable = mask; + enable = ACCR_SMCFS(info->smcfs) | ACCR_SFLFS(info->sflfs) | + ACCR_HSS(info->hss) | ACCR_DMCFS(info->dmcfs); - while ((ACSR & mask) != (accr & mask)) - cpu_relax(); + pxa3xx_clk_update_accr(disable, enable, 0, mask); } static unsigned int pxa3xx_cpufreq_get(unsigned int cpu) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index a2be0df7e174..833589bc95e4 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -13,8 +13,10 @@ #include <linux/of_address.h> #include <linux/of_platform.h> #include <linux/pm_opp.h> +#include <linux/pm_qos.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/units.h> #define LUT_MAX_ENTRIES 40U #define LUT_SRC GENMASK(31, 30) @@ -24,12 +26,15 @@ #define CLK_HW_DIV 2 #define LUT_TURBO_IND 1 -#define HZ_PER_KHZ 1000 +#define GT_IRQ_STATUS BIT(2) struct qcom_cpufreq_soc_data { u32 reg_enable; + u32 reg_domain_state; + u32 reg_dcvs_ctrl; u32 reg_freq_lut; u32 reg_volt_lut; + u32 reg_intr_clr; u32 reg_current_vote; u32 reg_perf_state; u8 lut_row_size; @@ -46,9 +51,14 @@ struct qcom_cpufreq_data { */ struct mutex throttle_lock; int throttle_irq; + char irq_name[15]; bool cancel_throttle; struct delayed_work throttle_work; struct cpufreq_policy *policy; + + bool per_core_dcvs; + + struct freq_qos_request throttle_freq_req; }; static unsigned long cpu_hw_rate, xo_rate; @@ -101,9 +111,14 @@ static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy, struct qcom_cpufreq_data *data = policy->driver_data; const struct qcom_cpufreq_soc_data *soc_data = data->soc_data; unsigned long freq = policy->freq_table[index].frequency; + unsigned int i; writel_relaxed(index, data->base + soc_data->reg_perf_state); + if (data->per_core_dcvs) + for (i = 1; i < cpumask_weight(policy->related_cpus); i++) + writel_relaxed(index, data->base + soc_data->reg_perf_state + i * 4); + if (icc_scaling_enabled) qcom_cpufreq_set_bw(policy, freq); @@ -136,10 +151,15 @@ static unsigned int qcom_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, struct qcom_cpufreq_data *data = policy->driver_data; const struct qcom_cpufreq_soc_data *soc_data = data->soc_data; unsigned int index; + unsigned int i; index = policy->cached_resolved_idx; writel_relaxed(index, data->base + soc_data->reg_perf_state); + if (data->per_core_dcvs) + for (i = 1; i < cpumask_weight(policy->related_cpus); i++) + writel_relaxed(index, data->base + soc_data->reg_perf_state + i * 4); + return policy->freq_table[index].frequency; } @@ -266,45 +286,48 @@ static void qcom_get_related_cpus(int index, struct cpumask *m) } } -static unsigned int qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) +static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) { - unsigned int val = readl_relaxed(data->base + data->soc_data->reg_current_vote); + unsigned int lval; + + if (data->soc_data->reg_current_vote) + lval = readl_relaxed(data->base + data->soc_data->reg_current_vote) & 0x3ff; + else + lval = readl_relaxed(data->base + data->soc_data->reg_domain_state) & 0xff; - return (val & 0x3FF) * 19200; + return lval * xo_rate; } static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) { - unsigned long max_capacity, capacity, freq_hz, throttled_freq; struct cpufreq_policy *policy = data->policy; - int cpu = cpumask_first(policy->cpus); + int cpu = cpumask_first(policy->related_cpus); struct device *dev = get_cpu_device(cpu); + unsigned long freq_hz, throttled_freq; struct dev_pm_opp *opp; - unsigned int freq; /* * Get the h/w throttled frequency, normalize it using the * registered opp table and use it to calculate thermal pressure. */ - freq = qcom_lmh_get_throttle_freq(data); - freq_hz = freq * HZ_PER_KHZ; + freq_hz = qcom_lmh_get_throttle_freq(data); opp = dev_pm_opp_find_freq_floor(dev, &freq_hz); if (IS_ERR(opp) && PTR_ERR(opp) == -ERANGE) - dev_pm_opp_find_freq_ceil(dev, &freq_hz); + opp = dev_pm_opp_find_freq_ceil(dev, &freq_hz); - throttled_freq = freq_hz / HZ_PER_KHZ; - - /* Update thermal pressure */ + if (IS_ERR(opp)) { + dev_warn(dev, "Can't find the OPP for throttling: %pe!\n", opp); + } else { + dev_pm_opp_put(opp); + } - max_capacity = arch_scale_cpu_capacity(cpu); - capacity = mult_frac(max_capacity, throttled_freq, policy->cpuinfo.max_freq); + throttled_freq = freq_hz / HZ_PER_KHZ; - /* Don't pass boost capacity to scheduler */ - if (capacity > max_capacity) - capacity = max_capacity; + freq_qos_update_request(&data->throttle_freq_req, throttled_freq); - arch_set_thermal_pressure(policy->cpus, max_capacity - capacity); + /* Update thermal pressure (the boost frequencies are accepted) */ + arch_update_thermal_pressure(policy->related_cpus, throttled_freq); /* * In the unlikely case policy is unregistered do not enable @@ -342,13 +365,18 @@ static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data) /* Disable interrupt and enable polling */ disable_irq_nosync(c_data->throttle_irq); - qcom_lmh_dcvs_notify(c_data); + schedule_delayed_work(&c_data->throttle_work, 0); - return 0; + if (c_data->soc_data->reg_intr_clr) + writel_relaxed(GT_IRQ_STATUS, + c_data->base + c_data->soc_data->reg_intr_clr); + + return IRQ_HANDLED; } static const struct qcom_cpufreq_soc_data qcom_soc_data = { .reg_enable = 0x0, + .reg_dcvs_ctrl = 0xbc, .reg_freq_lut = 0x110, .reg_volt_lut = 0x114, .reg_current_vote = 0x704, @@ -358,8 +386,11 @@ static const struct qcom_cpufreq_soc_data qcom_soc_data = { static const struct qcom_cpufreq_soc_data epss_soc_data = { .reg_enable = 0x0, + .reg_domain_state = 0x20, + .reg_dcvs_ctrl = 0xb0, .reg_freq_lut = 0x100, .reg_volt_lut = 0x200, + .reg_intr_clr = 0x308, .reg_perf_state = 0x320, .lut_row_size = 4, }; @@ -375,16 +406,25 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index) { struct qcom_cpufreq_data *data = policy->driver_data; struct platform_device *pdev = cpufreq_get_driver_data(); - char irq_name[15]; int ret; /* * Look for LMh interrupt. If no interrupt line is specified / * if there is an error, allow cpufreq to be enabled as usual. */ - data->throttle_irq = platform_get_irq(pdev, index); - if (data->throttle_irq <= 0) - return data->throttle_irq == -EPROBE_DEFER ? -EPROBE_DEFER : 0; + data->throttle_irq = platform_get_irq_optional(pdev, index); + if (data->throttle_irq == -ENXIO) + return 0; + if (data->throttle_irq < 0) + return data->throttle_irq; + + ret = freq_qos_add_request(&policy->constraints, + &data->throttle_freq_req, FREQ_QOS_MAX, + FREQ_QOS_MAX_DEFAULT_VALUE); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to add freq constraint (%d)\n", ret); + return ret; + } data->cancel_throttle = false; data->policy = policy; @@ -392,27 +432,67 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index) mutex_init(&data->throttle_lock); INIT_DEFERRABLE_WORK(&data->throttle_work, qcom_lmh_dcvs_poll); - snprintf(irq_name, sizeof(irq_name), "dcvsh-irq-%u", policy->cpu); + snprintf(data->irq_name, sizeof(data->irq_name), "dcvsh-irq-%u", policy->cpu); ret = request_threaded_irq(data->throttle_irq, NULL, qcom_lmh_dcvs_handle_irq, - IRQF_ONESHOT, irq_name, data); + IRQF_ONESHOT | IRQF_NO_AUTOEN, data->irq_name, data); if (ret) { - dev_err(&pdev->dev, "Error registering %s: %d\n", irq_name, ret); + dev_err(&pdev->dev, "Error registering %s: %d\n", data->irq_name, ret); return 0; } + ret = irq_set_affinity_and_hint(data->throttle_irq, policy->cpus); + if (ret) + dev_err(&pdev->dev, "Failed to set CPU affinity of %s[%d]\n", + data->irq_name, data->throttle_irq); + return 0; } -static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data) +static int qcom_cpufreq_hw_cpu_online(struct cpufreq_policy *policy) { + struct qcom_cpufreq_data *data = policy->driver_data; + struct platform_device *pdev = cpufreq_get_driver_data(); + int ret; + if (data->throttle_irq <= 0) - return; + return 0; + + mutex_lock(&data->throttle_lock); + data->cancel_throttle = false; + mutex_unlock(&data->throttle_lock); + + ret = irq_set_affinity_and_hint(data->throttle_irq, policy->cpus); + if (ret) + dev_err(&pdev->dev, "Failed to set CPU affinity of %s[%d]\n", + data->irq_name, data->throttle_irq); + + return ret; +} + +static int qcom_cpufreq_hw_cpu_offline(struct cpufreq_policy *policy) +{ + struct qcom_cpufreq_data *data = policy->driver_data; + + if (data->throttle_irq <= 0) + return 0; mutex_lock(&data->throttle_lock); data->cancel_throttle = true; mutex_unlock(&data->throttle_lock); cancel_delayed_work_sync(&data->throttle_work); + irq_set_affinity_and_hint(data->throttle_irq, NULL); + disable_irq_nosync(data->throttle_irq); + + return 0; +} + +static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data) +{ + if (data->throttle_irq <= 0) + return; + + freq_qos_remove_request(&data->throttle_freq_req); free_irq(data->throttle_irq, data); } @@ -482,8 +562,11 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) goto error; } + if (readl_relaxed(base + data->soc_data->reg_dcvs_ctrl) & 0x1) + data->per_core_dcvs = true; + qcom_get_related_cpus(index, policy->cpus); - if (!cpumask_weight(policy->cpus)) { + if (cpumask_empty(policy->cpus)) { dev_err(dev, "Domain-%d failed to get related CPUs\n", index); ret = -ENOENT; goto error; @@ -543,6 +626,14 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy) return 0; } +static void qcom_cpufreq_ready(struct cpufreq_policy *policy) +{ + struct qcom_cpufreq_data *data = policy->driver_data; + + if (data->throttle_irq >= 0) + enable_irq(data->throttle_irq); +} + static struct freq_attr *qcom_cpufreq_hw_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, &cpufreq_freq_attr_scaling_boost_freqs, @@ -558,10 +649,13 @@ static struct cpufreq_driver cpufreq_qcom_hw_driver = { .get = qcom_cpufreq_hw_get, .init = qcom_cpufreq_hw_cpu_init, .exit = qcom_cpufreq_hw_cpu_exit, + .online = qcom_cpufreq_hw_cpu_online, + .offline = qcom_cpufreq_hw_cpu_offline, .register_em = cpufreq_register_em_with_opp, .fast_switch = qcom_cpufreq_hw_fast_switch, .name = "qcom-cpufreq-hw", .attr = qcom_cpufreq_hw_attr, + .ready = qcom_cpufreq_ready, }; static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index d1744b5d9619..a577586b23be 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -55,9 +55,7 @@ struct qcom_cpufreq_match_data { }; struct qcom_cpufreq_drv { - struct opp_table **names_opp_tables; - struct opp_table **hw_opp_tables; - struct opp_table **genpd_opp_tables; + int *opp_tokens; u32 versions; const struct qcom_cpufreq_match_data *data; }; @@ -66,7 +64,7 @@ static struct platform_device *cpufreq_dt_pdev, *cpufreq_pdev; static void get_krait_bin_format_a(struct device *cpu_dev, int *speed, int *pvs, int *pvs_ver, - struct nvmem_cell *pvs_nvmem, u8 *buf) + u8 *buf) { u32 pte_efuse; @@ -97,7 +95,7 @@ static void get_krait_bin_format_a(struct device *cpu_dev, static void get_krait_bin_format_b(struct device *cpu_dev, int *speed, int *pvs, int *pvs_ver, - struct nvmem_cell *pvs_nvmem, u8 *buf) + u8 *buf) { u32 pte_efuse, redundant_sel; @@ -130,7 +128,7 @@ static void get_krait_bin_format_b(struct device *cpu_dev, } /* Check PVS_BLOW_STATUS */ - pte_efuse = *(((u32 *)buf) + 4); + pte_efuse = *(((u32 *)buf) + 1); pte_efuse &= BIT(21); if (pte_efuse) { dev_dbg(cpu_dev, "PVS bin: %d\n", *pvs); @@ -215,6 +213,7 @@ static int qcom_cpufreq_krait_name_version(struct device *cpu_dev, int speed = 0, pvs = 0, pvs_ver = 0; u8 *speedbin; size_t len; + int ret = 0; speedbin = nvmem_cell_read(speedbin_nvmem, &len); @@ -224,15 +223,16 @@ static int qcom_cpufreq_krait_name_version(struct device *cpu_dev, switch (len) { case 4: get_krait_bin_format_a(cpu_dev, &speed, &pvs, &pvs_ver, - speedbin_nvmem, speedbin); + speedbin); break; case 8: get_krait_bin_format_b(cpu_dev, &speed, &pvs, &pvs_ver, - speedbin_nvmem, speedbin); + speedbin); break; default: dev_err(cpu_dev, "Unable to read nvmem data. Defaulting to 0!\n"); - return -ENODEV; + ret = -ENODEV; + goto len_error; } snprintf(*pvs_name, sizeof("speedXX-pvsXX-vXX"), "speed%d-pvs%d-v%d", @@ -240,8 +240,9 @@ static int qcom_cpufreq_krait_name_version(struct device *cpu_dev, drv->versions = (1 << speed); +len_error: kfree(speedbin); - return 0; + return ret; } static const struct qcom_cpufreq_match_data match_data_kryo = { @@ -264,7 +265,8 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) struct nvmem_cell *speedbin_nvmem; struct device_node *np; struct device *cpu_dev; - char *pvs_name = "speedXX-pvsXX-vXX"; + char pvs_name_buffer[] = "speedXX-pvsXX-vXX"; + char *pvs_name = pvs_name_buffer; unsigned cpu; const struct of_device_id *match; int ret; @@ -297,11 +299,8 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) if (drv->data->get_version) { speedbin_nvmem = of_nvmem_cell_get(np, NULL); if (IS_ERR(speedbin_nvmem)) { - if (PTR_ERR(speedbin_nvmem) != -EPROBE_DEFER) - dev_err(cpu_dev, - "Could not get nvmem cell: %ld\n", - PTR_ERR(speedbin_nvmem)); - ret = PTR_ERR(speedbin_nvmem); + ret = dev_err_probe(cpu_dev, PTR_ERR(speedbin_nvmem), + "Could not get nvmem cell\n"); goto free_drv; } @@ -315,72 +314,43 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) } of_node_put(np); - drv->names_opp_tables = kcalloc(num_possible_cpus(), - sizeof(*drv->names_opp_tables), + drv->opp_tokens = kcalloc(num_possible_cpus(), sizeof(*drv->opp_tokens), GFP_KERNEL); - if (!drv->names_opp_tables) { + if (!drv->opp_tokens) { ret = -ENOMEM; goto free_drv; } - drv->hw_opp_tables = kcalloc(num_possible_cpus(), - sizeof(*drv->hw_opp_tables), - GFP_KERNEL); - if (!drv->hw_opp_tables) { - ret = -ENOMEM; - goto free_opp_names; - } - - drv->genpd_opp_tables = kcalloc(num_possible_cpus(), - sizeof(*drv->genpd_opp_tables), - GFP_KERNEL); - if (!drv->genpd_opp_tables) { - ret = -ENOMEM; - goto free_opp; - } for_each_possible_cpu(cpu) { + struct dev_pm_opp_config config = { + .supported_hw = NULL, + }; + cpu_dev = get_cpu_device(cpu); if (NULL == cpu_dev) { ret = -ENODEV; - goto free_genpd_opp; + goto free_opp; } if (drv->data->get_version) { + config.supported_hw = &drv->versions; + config.supported_hw_count = 1; - if (pvs_name) { - drv->names_opp_tables[cpu] = dev_pm_opp_set_prop_name( - cpu_dev, - pvs_name); - if (IS_ERR(drv->names_opp_tables[cpu])) { - ret = PTR_ERR(drv->names_opp_tables[cpu]); - dev_err(cpu_dev, "Failed to add OPP name %s\n", - pvs_name); - goto free_opp; - } - } - - drv->hw_opp_tables[cpu] = dev_pm_opp_set_supported_hw( - cpu_dev, &drv->versions, 1); - if (IS_ERR(drv->hw_opp_tables[cpu])) { - ret = PTR_ERR(drv->hw_opp_tables[cpu]); - dev_err(cpu_dev, - "Failed to set supported hardware\n"); - goto free_genpd_opp; - } + if (pvs_name) + config.prop_name = pvs_name; } if (drv->data->genpd_names) { - drv->genpd_opp_tables[cpu] = - dev_pm_opp_attach_genpd(cpu_dev, - drv->data->genpd_names, - NULL); - if (IS_ERR(drv->genpd_opp_tables[cpu])) { - ret = PTR_ERR(drv->genpd_opp_tables[cpu]); - if (ret != -EPROBE_DEFER) - dev_err(cpu_dev, - "Could not attach to pm_domain: %d\n", - ret); - goto free_genpd_opp; + config.genpd_names = drv->data->genpd_names; + config.virt_devs = NULL; + } + + if (config.supported_hw || config.genpd_names) { + drv->opp_tokens[cpu] = dev_pm_opp_set_config(cpu_dev, &config); + if (drv->opp_tokens[cpu] < 0) { + ret = drv->opp_tokens[cpu]; + dev_err(cpu_dev, "Failed to set OPP config\n"); + goto free_opp; } } } @@ -395,27 +365,10 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) ret = PTR_ERR(cpufreq_dt_pdev); dev_err(cpu_dev, "Failed to register platform device\n"); -free_genpd_opp: - for_each_possible_cpu(cpu) { - if (IS_ERR(drv->genpd_opp_tables[cpu])) - break; - dev_pm_opp_detach_genpd(drv->genpd_opp_tables[cpu]); - } - kfree(drv->genpd_opp_tables); free_opp: - for_each_possible_cpu(cpu) { - if (IS_ERR(drv->names_opp_tables[cpu])) - break; - dev_pm_opp_put_prop_name(drv->names_opp_tables[cpu]); - } - for_each_possible_cpu(cpu) { - if (IS_ERR(drv->hw_opp_tables[cpu])) - break; - dev_pm_opp_put_supported_hw(drv->hw_opp_tables[cpu]); - } - kfree(drv->hw_opp_tables); -free_opp_names: - kfree(drv->names_opp_tables); + for_each_possible_cpu(cpu) + dev_pm_opp_clear_config(drv->opp_tokens[cpu]); + kfree(drv->opp_tokens); free_drv: kfree(drv); @@ -429,15 +382,10 @@ static int qcom_cpufreq_remove(struct platform_device *pdev) platform_device_unregister(cpufreq_dt_pdev); - for_each_possible_cpu(cpu) { - dev_pm_opp_put_supported_hw(drv->names_opp_tables[cpu]); - dev_pm_opp_put_supported_hw(drv->hw_opp_tables[cpu]); - dev_pm_opp_detach_genpd(drv->genpd_opp_tables[cpu]); - } + for_each_possible_cpu(cpu) + dev_pm_opp_clear_config(drv->opp_tokens[cpu]); - kfree(drv->names_opp_tables); - kfree(drv->hw_opp_tables); - kfree(drv->genpd_opp_tables); + kfree(drv->opp_tokens); kfree(drv); return 0; diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index 6b6b20da2bcf..573b417e1483 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -275,6 +275,7 @@ static int qoriq_cpufreq_probe(struct platform_device *pdev) np = of_find_matching_node(NULL, qoriq_cpufreq_blacklist); if (np) { + of_node_put(np); dev_info(&pdev->dev, "Disabling due to erratum A-008083"); return -ENODEV; } diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 1e0cd4d165f0..513a071845c2 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -19,6 +19,7 @@ #include <linux/slab.h> #include <linux/scmi_protocol.h> #include <linux/types.h> +#include <linux/units.h> struct scmi_data { int domain_id; @@ -96,9 +97,10 @@ scmi_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask) } static int __maybe_unused -scmi_get_cpu_power(unsigned long *power, unsigned long *KHz, - struct device *cpu_dev) +scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power, + unsigned long *KHz) { + enum scmi_power_scale power_scale = perf_ops->power_scale_get(ph); unsigned long Hz; int ret, domain; @@ -112,6 +114,10 @@ scmi_get_cpu_power(unsigned long *power, unsigned long *KHz, if (ret) return ret; + /* Convert the power to uW if it is mW (ignore bogoW) */ + if (power_scale == SCMI_POWER_MILLIWATTS) + *power *= MICROWATT_PER_MILLIWATT; + /* The EM framework specifies the frequency in KHz. */ *KHz = Hz / 1000; @@ -154,7 +160,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) * table and opp-shared. */ ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, priv->opp_shared_cpus); - if (ret || !cpumask_weight(priv->opp_shared_cpus)) { + if (ret || cpumask_empty(priv->opp_shared_cpus)) { /* * Either opp-table is not set or no opp-shared was found. * Use the CPU mask from SCMI to designate CPUs sharing an OPP @@ -249,8 +255,9 @@ static int scmi_cpufreq_exit(struct cpufreq_policy *policy) static void scmi_cpufreq_register_em(struct cpufreq_policy *policy) { struct em_data_callback em_cb = EM_DATA_CB(scmi_get_cpu_power); - bool power_scale_mw = perf_ops->power_scale_mw_get(ph); + enum scmi_power_scale power_scale = perf_ops->power_scale_get(ph); struct scmi_data *priv = policy->driver_data; + bool em_power_scale = false; /* * This callback will be called for each policy, but we don't need to @@ -262,9 +269,13 @@ static void scmi_cpufreq_register_em(struct cpufreq_policy *policy) if (!priv->nr_opp) return; + if (power_scale == SCMI_POWER_MILLIWATTS + || power_scale == SCMI_POWER_MICROWATTS) + em_power_scale = true; + em_dev_register_perf_domain(get_cpu_device(policy->cpu), priv->nr_opp, &em_cb, priv->opp_shared_cpus, - power_scale_mw); + em_power_scale); } static struct cpufreq_driver scmi_cpufreq_driver = { diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index bda3e7d42964..fd2c16821d54 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -1,17 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * System Control and Power Interface (SCPI) based CPUFreq Interface driver * * Copyright (C) 2015 ARM Ltd. * Sudeep Holla <sudeep.holla@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c index fdb0a722d881..1a63aeea8711 100644 --- a/drivers/cpufreq/sti-cpufreq.c +++ b/drivers/cpufreq/sti-cpufreq.c @@ -156,9 +156,13 @@ static int sti_cpufreq_set_opp_info(void) unsigned int hw_info_offset; unsigned int version[VERSION_ELEMENTS]; int pcode, substrate, major, minor; - int ret; + int opp_token, ret; char name[MAX_PCODE_NAME_LEN]; - struct opp_table *opp_table; + struct dev_pm_opp_config config = { + .supported_hw = version, + .supported_hw_count = ARRAY_SIZE(version), + .prop_name = name, + }; reg_fields = sti_cpufreq_match(); if (!reg_fields) { @@ -210,21 +214,14 @@ use_defaults: snprintf(name, MAX_PCODE_NAME_LEN, "pcode%d", pcode); - opp_table = dev_pm_opp_set_prop_name(dev, name); - if (IS_ERR(opp_table)) { - dev_err(dev, "Failed to set prop name\n"); - return PTR_ERR(opp_table); - } - version[0] = BIT(major); version[1] = BIT(minor); version[2] = BIT(substrate); - opp_table = dev_pm_opp_set_supported_hw(dev, version, VERSION_ELEMENTS); - if (IS_ERR(opp_table)) { - dev_err(dev, "Failed to set supported hardware\n"); - ret = PTR_ERR(opp_table); - goto err_put_prop_name; + opp_token = dev_pm_opp_set_config(dev, &config); + if (opp_token < 0) { + dev_err(dev, "Failed to set OPP config\n"); + return opp_token; } dev_dbg(dev, "pcode: %d major: %d minor: %d substrate: %d\n", @@ -233,10 +230,6 @@ use_defaults: version[0], version[1], version[2]); return 0; - -err_put_prop_name: - dev_pm_opp_put_prop_name(opp_table); - return ret; } static int sti_cpufreq_fetch_syscon_registers(void) @@ -259,7 +252,7 @@ static int sti_cpufreq_fetch_syscon_registers(void) return 0; } -static int sti_cpufreq_init(void) +static int __init sti_cpufreq_init(void) { int ret; diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c index 2deed8d8773f..1583a370da39 100644 --- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c +++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c @@ -56,12 +56,9 @@ static int sun50i_cpufreq_get_efuse(u32 *versions) speedbin_nvmem = of_nvmem_cell_get(np, NULL); of_node_put(np); - if (IS_ERR(speedbin_nvmem)) { - if (PTR_ERR(speedbin_nvmem) != -EPROBE_DEFER) - pr_err("Could not get nvmem cell: %ld\n", - PTR_ERR(speedbin_nvmem)); - return PTR_ERR(speedbin_nvmem); - } + if (IS_ERR(speedbin_nvmem)) + return dev_err_probe(cpu_dev, PTR_ERR(speedbin_nvmem), + "Could not get nvmem cell\n"); speedbin = nvmem_cell_read(speedbin_nvmem, &len); nvmem_cell_put(speedbin_nvmem); @@ -86,20 +83,22 @@ static int sun50i_cpufreq_get_efuse(u32 *versions) static int sun50i_cpufreq_nvmem_probe(struct platform_device *pdev) { - struct opp_table **opp_tables; + int *opp_tokens; char name[MAX_NAME_LEN]; unsigned int cpu; u32 speed = 0; int ret; - opp_tables = kcalloc(num_possible_cpus(), sizeof(*opp_tables), + opp_tokens = kcalloc(num_possible_cpus(), sizeof(*opp_tokens), GFP_KERNEL); - if (!opp_tables) + if (!opp_tokens) return -ENOMEM; ret = sun50i_cpufreq_get_efuse(&speed); - if (ret) + if (ret) { + kfree(opp_tokens); return ret; + } snprintf(name, MAX_NAME_LEN, "speed%d", speed); @@ -111,9 +110,9 @@ static int sun50i_cpufreq_nvmem_probe(struct platform_device *pdev) goto free_opp; } - opp_tables[cpu] = dev_pm_opp_set_prop_name(cpu_dev, name); - if (IS_ERR(opp_tables[cpu])) { - ret = PTR_ERR(opp_tables[cpu]); + opp_tokens[cpu] = dev_pm_opp_set_prop_name(cpu_dev, name); + if (opp_tokens[cpu] < 0) { + ret = opp_tokens[cpu]; pr_err("Failed to set prop name\n"); goto free_opp; } @@ -122,7 +121,7 @@ static int sun50i_cpufreq_nvmem_probe(struct platform_device *pdev) cpufreq_dt_pdev = platform_device_register_simple("cpufreq-dt", -1, NULL, 0); if (!IS_ERR(cpufreq_dt_pdev)) { - platform_set_drvdata(pdev, opp_tables); + platform_set_drvdata(pdev, opp_tokens); return 0; } @@ -130,27 +129,24 @@ static int sun50i_cpufreq_nvmem_probe(struct platform_device *pdev) pr_err("Failed to register platform device\n"); free_opp: - for_each_possible_cpu(cpu) { - if (IS_ERR_OR_NULL(opp_tables[cpu])) - break; - dev_pm_opp_put_prop_name(opp_tables[cpu]); - } - kfree(opp_tables); + for_each_possible_cpu(cpu) + dev_pm_opp_put_prop_name(opp_tokens[cpu]); + kfree(opp_tokens); return ret; } static int sun50i_cpufreq_nvmem_remove(struct platform_device *pdev) { - struct opp_table **opp_tables = platform_get_drvdata(pdev); + int *opp_tokens = platform_get_drvdata(pdev); unsigned int cpu; platform_device_unregister(cpufreq_dt_pdev); for_each_possible_cpu(cpu) - dev_pm_opp_put_prop_name(opp_tables[cpu]); + dev_pm_opp_put_prop_name(opp_tokens[cpu]); - kfree(opp_tables); + kfree(opp_tokens); return 0; } diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c index ac381db25dbe..4596c3e323aa 100644 --- a/drivers/cpufreq/tegra194-cpufreq.c +++ b/drivers/cpufreq/tegra194-cpufreq.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved + * Copyright (c) 2020 - 2022, NVIDIA CORPORATION. All rights reserved */ #include <linux/cpu.h> @@ -24,22 +24,19 @@ #define CPUFREQ_TBL_STEP_HZ (50 * KHZ * KHZ) #define MAX_CNT ~0U -/* cpufreq transisition latency */ -#define TEGRA_CPUFREQ_TRANSITION_LATENCY (300 * 1000) /* unit in nanoseconds */ +#define NDIV_MASK 0x1FF -enum cluster { - CLUSTER0, - CLUSTER1, - CLUSTER2, - CLUSTER3, - MAX_CLUSTERS, -}; +#define CORE_OFFSET(cpu) (cpu * 8) +#define CMU_CLKS_BASE 0x2000 +#define SCRATCH_FREQ_CORE_REG(data, cpu) (data->regs + CMU_CLKS_BASE + CORE_OFFSET(cpu)) -struct tegra194_cpufreq_data { - void __iomem *regs; - size_t num_clusters; - struct cpufreq_frequency_table **tables; -}; +#define MMCRAB_CLUSTER_BASE(cl) (0x30000 + (cl * 0x10000)) +#define CLUSTER_ACTMON_BASE(data, cl) \ + (data->regs + (MMCRAB_CLUSTER_BASE(cl) + data->soc->actmon_cntr_base)) +#define CORE_ACTMON_CNTR_REG(data, cl, cpu) (CLUSTER_ACTMON_BASE(data, cl) + CORE_OFFSET(cpu)) + +/* cpufreq transisition latency */ +#define TEGRA_CPUFREQ_TRANSITION_LATENCY (300 * 1000) /* unit in nanoseconds */ struct tegra_cpu_ctr { u32 cpu; @@ -52,13 +49,135 @@ struct read_counters_work { struct tegra_cpu_ctr c; }; +struct tegra_cpufreq_ops { + void (*read_counters)(struct tegra_cpu_ctr *c); + void (*set_cpu_ndiv)(struct cpufreq_policy *policy, u64 ndiv); + void (*get_cpu_cluster_id)(u32 cpu, u32 *cpuid, u32 *clusterid); + int (*get_cpu_ndiv)(u32 cpu, u32 cpuid, u32 clusterid, u64 *ndiv); +}; + +struct tegra_cpufreq_soc { + struct tegra_cpufreq_ops *ops; + int maxcpus_per_cluster; + unsigned int num_clusters; + phys_addr_t actmon_cntr_base; +}; + +struct tegra194_cpufreq_data { + void __iomem *regs; + struct cpufreq_frequency_table **tables; + const struct tegra_cpufreq_soc *soc; +}; + static struct workqueue_struct *read_counters_wq; -static void get_cpu_cluster(void *cluster) +static void tegra_get_cpu_mpidr(void *mpidr) +{ + *((u64 *)mpidr) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; +} + +static void tegra234_get_cpu_cluster_id(u32 cpu, u32 *cpuid, u32 *clusterid) +{ + u64 mpidr; + + smp_call_function_single(cpu, tegra_get_cpu_mpidr, &mpidr, true); + + if (cpuid) + *cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 1); + if (clusterid) + *clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 2); +} + +static int tegra234_get_cpu_ndiv(u32 cpu, u32 cpuid, u32 clusterid, u64 *ndiv) +{ + struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); + void __iomem *freq_core_reg; + u64 mpidr_id; + + /* use physical id to get address of per core frequency register */ + mpidr_id = (clusterid * data->soc->maxcpus_per_cluster) + cpuid; + freq_core_reg = SCRATCH_FREQ_CORE_REG(data, mpidr_id); + + *ndiv = readl(freq_core_reg) & NDIV_MASK; + + return 0; +} + +static void tegra234_set_cpu_ndiv(struct cpufreq_policy *policy, u64 ndiv) +{ + struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); + void __iomem *freq_core_reg; + u32 cpu, cpuid, clusterid; + u64 mpidr_id; + + for_each_cpu_and(cpu, policy->cpus, cpu_online_mask) { + data->soc->ops->get_cpu_cluster_id(cpu, &cpuid, &clusterid); + + /* use physical id to get address of per core frequency register */ + mpidr_id = (clusterid * data->soc->maxcpus_per_cluster) + cpuid; + freq_core_reg = SCRATCH_FREQ_CORE_REG(data, mpidr_id); + + writel(ndiv, freq_core_reg); + } +} + +/* + * This register provides access to two counter values with a single + * 64-bit read. The counter values are used to determine the average + * actual frequency a core has run at over a period of time. + * [63:32] PLLP counter: Counts at fixed frequency (408 MHz) + * [31:0] Core clock counter: Counts on every core clock cycle + */ +static void tegra234_read_counters(struct tegra_cpu_ctr *c) +{ + struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); + void __iomem *actmon_reg; + u32 cpuid, clusterid; + u64 val; + + data->soc->ops->get_cpu_cluster_id(c->cpu, &cpuid, &clusterid); + actmon_reg = CORE_ACTMON_CNTR_REG(data, clusterid, cpuid); + + val = readq(actmon_reg); + c->last_refclk_cnt = upper_32_bits(val); + c->last_coreclk_cnt = lower_32_bits(val); + udelay(US_DELAY); + val = readq(actmon_reg); + c->refclk_cnt = upper_32_bits(val); + c->coreclk_cnt = lower_32_bits(val); +} + +static struct tegra_cpufreq_ops tegra234_cpufreq_ops = { + .read_counters = tegra234_read_counters, + .get_cpu_cluster_id = tegra234_get_cpu_cluster_id, + .get_cpu_ndiv = tegra234_get_cpu_ndiv, + .set_cpu_ndiv = tegra234_set_cpu_ndiv, +}; + +static const struct tegra_cpufreq_soc tegra234_cpufreq_soc = { + .ops = &tegra234_cpufreq_ops, + .actmon_cntr_base = 0x9000, + .maxcpus_per_cluster = 4, + .num_clusters = 3, +}; + +static const struct tegra_cpufreq_soc tegra239_cpufreq_soc = { + .ops = &tegra234_cpufreq_ops, + .actmon_cntr_base = 0x4000, + .maxcpus_per_cluster = 8, + .num_clusters = 1, +}; + +static void tegra194_get_cpu_cluster_id(u32 cpu, u32 *cpuid, u32 *clusterid) { - u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; + u64 mpidr; + + smp_call_function_single(cpu, tegra_get_cpu_mpidr, &mpidr, true); - *((uint32_t *)cluster) = MPIDR_AFFINITY_LEVEL(mpidr, 1); + if (cpuid) + *cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0); + if (clusterid) + *clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); } /* @@ -85,11 +204,24 @@ static inline u32 map_ndiv_to_freq(struct mrq_cpu_ndiv_limits_response return nltbl->ref_clk_hz / KHZ * ndiv / (nltbl->pdiv * nltbl->mdiv); } +static void tegra194_read_counters(struct tegra_cpu_ctr *c) +{ + u64 val; + + val = read_freq_feedback(); + c->last_refclk_cnt = lower_32_bits(val); + c->last_coreclk_cnt = upper_32_bits(val); + udelay(US_DELAY); + val = read_freq_feedback(); + c->refclk_cnt = lower_32_bits(val); + c->coreclk_cnt = upper_32_bits(val); +} + static void tegra_read_counters(struct work_struct *work) { + struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); struct read_counters_work *read_counters_work; struct tegra_cpu_ctr *c; - u64 val; /* * ref_clk_counter(32 bit counter) runs on constant clk, @@ -107,13 +239,7 @@ static void tegra_read_counters(struct work_struct *work) work); c = &read_counters_work->c; - val = read_freq_feedback(); - c->last_refclk_cnt = lower_32_bits(val); - c->last_coreclk_cnt = upper_32_bits(val); - udelay(US_DELAY); - val = read_freq_feedback(); - c->refclk_cnt = lower_32_bits(val); - c->coreclk_cnt = upper_32_bits(val); + data->soc->ops->read_counters(c); } /* @@ -177,7 +303,7 @@ static unsigned int tegra194_calculate_speed(u32 cpu) return (rate_mhz * KHZ); /* in KHz */ } -static void get_cpu_ndiv(void *ndiv) +static void tegra194_get_cpu_ndiv_sysreg(void *ndiv) { u64 ndiv_val; @@ -186,30 +312,39 @@ static void get_cpu_ndiv(void *ndiv) *(u64 *)ndiv = ndiv_val; } -static void set_cpu_ndiv(void *data) +static int tegra194_get_cpu_ndiv(u32 cpu, u32 cpuid, u32 clusterid, u64 *ndiv) +{ + return smp_call_function_single(cpu, tegra194_get_cpu_ndiv_sysreg, &ndiv, true); +} + +static void tegra194_set_cpu_ndiv_sysreg(void *data) { - struct cpufreq_frequency_table *tbl = data; - u64 ndiv_val = (u64)tbl->driver_data; + u64 ndiv_val = *(u64 *)data; asm volatile("msr s3_0_c15_c0_4, %0" : : "r" (ndiv_val)); } +static void tegra194_set_cpu_ndiv(struct cpufreq_policy *policy, u64 ndiv) +{ + on_each_cpu_mask(policy->cpus, tegra194_set_cpu_ndiv_sysreg, &ndiv, true); +} + static unsigned int tegra194_get_speed(u32 cpu) { struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); struct cpufreq_frequency_table *pos; + u32 cpuid, clusterid; unsigned int rate; u64 ndiv; int ret; - u32 cl; - smp_call_function_single(cpu, get_cpu_cluster, &cl, true); + data->soc->ops->get_cpu_cluster_id(cpu, &cpuid, &clusterid); /* reconstruct actual cpu freq using counters */ rate = tegra194_calculate_speed(cpu); /* get last written ndiv value */ - ret = smp_call_function_single(cpu, get_cpu_ndiv, &ndiv, true); + ret = data->soc->ops->get_cpu_ndiv(cpu, cpuid, clusterid, &ndiv); if (WARN_ON_ONCE(ret)) return rate; @@ -219,7 +354,7 @@ static unsigned int tegra194_get_speed(u32 cpu) * to the last written ndiv value from freq_table. This is * done to return consistent value. */ - cpufreq_for_each_valid_entry(pos, data->tables[cl]) { + cpufreq_for_each_valid_entry(pos, data->tables[clusterid]) { if (pos->driver_data != ndiv) continue; @@ -237,19 +372,22 @@ static unsigned int tegra194_get_speed(u32 cpu) static int tegra194_cpufreq_init(struct cpufreq_policy *policy) { struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); - u32 cpu; - u32 cl; + int maxcpus_per_cluster = data->soc->maxcpus_per_cluster; + u32 start_cpu, cpu; + u32 clusterid; - smp_call_function_single(policy->cpu, get_cpu_cluster, &cl, true); + data->soc->ops->get_cpu_cluster_id(policy->cpu, NULL, &clusterid); - if (cl >= data->num_clusters || !data->tables[cl]) + if (clusterid >= data->soc->num_clusters || !data->tables[clusterid]) return -EINVAL; + start_cpu = rounddown(policy->cpu, maxcpus_per_cluster); /* set same policy for all cpus in a cluster */ - for (cpu = (cl * 2); cpu < ((cl + 1) * 2); cpu++) - cpumask_set_cpu(cpu, policy->cpus); - - policy->freq_table = data->tables[cl]; + for (cpu = start_cpu; cpu < (start_cpu + maxcpus_per_cluster); cpu++) { + if (cpu_possible(cpu)) + cpumask_set_cpu(cpu, policy->cpus); + } + policy->freq_table = data->tables[clusterid]; policy->cpuinfo.transition_latency = TEGRA_CPUFREQ_TRANSITION_LATENCY; return 0; @@ -259,13 +397,14 @@ static int tegra194_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index) { struct cpufreq_frequency_table *tbl = policy->freq_table + index; + struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); /* * Each core writes frequency in per core register. Then both cores * in a cluster run at same frequency which is the maximum frequency * request out of the values requested by both cores in that cluster. */ - on_each_cpu_mask(policy->cpus, set_cpu_ndiv, tbl, true); + data->soc->ops->set_cpu_ndiv(policy, (u64)tbl->driver_data); return 0; } @@ -280,6 +419,19 @@ static struct cpufreq_driver tegra194_cpufreq_driver = { .attr = cpufreq_generic_attr, }; +static struct tegra_cpufreq_ops tegra194_cpufreq_ops = { + .read_counters = tegra194_read_counters, + .get_cpu_cluster_id = tegra194_get_cpu_cluster_id, + .get_cpu_ndiv = tegra194_get_cpu_ndiv, + .set_cpu_ndiv = tegra194_set_cpu_ndiv, +}; + +static const struct tegra_cpufreq_soc tegra194_cpufreq_soc = { + .ops = &tegra194_cpufreq_ops, + .maxcpus_per_cluster = 2, + .num_clusters = 4, +}; + static void tegra194_cpufreq_free_resources(void) { destroy_workqueue(read_counters_wq); @@ -359,6 +511,7 @@ init_freq_table(struct platform_device *pdev, struct tegra_bpmp *bpmp, static int tegra194_cpufreq_probe(struct platform_device *pdev) { + const struct tegra_cpufreq_soc *soc; struct tegra194_cpufreq_data *data; struct tegra_bpmp *bpmp; int err, i; @@ -367,12 +520,27 @@ static int tegra194_cpufreq_probe(struct platform_device *pdev) if (!data) return -ENOMEM; - data->num_clusters = MAX_CLUSTERS; - data->tables = devm_kcalloc(&pdev->dev, data->num_clusters, + soc = of_device_get_match_data(&pdev->dev); + + if (soc->ops && soc->maxcpus_per_cluster && soc->num_clusters) { + data->soc = soc; + } else { + dev_err(&pdev->dev, "soc data missing\n"); + return -EINVAL; + } + + data->tables = devm_kcalloc(&pdev->dev, data->soc->num_clusters, sizeof(*data->tables), GFP_KERNEL); if (!data->tables) return -ENOMEM; + if (soc->actmon_cntr_base) { + /* mmio registers are used for frequency request and re-construction */ + data->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(data->regs)) + return PTR_ERR(data->regs); + } + platform_set_drvdata(pdev, data); bpmp = tegra_bpmp_get(&pdev->dev); @@ -386,7 +554,7 @@ static int tegra194_cpufreq_probe(struct platform_device *pdev) goto put_bpmp; } - for (i = 0; i < data->num_clusters; i++) { + for (i = 0; i < data->soc->num_clusters; i++) { data->tables[i] = init_freq_table(pdev, bpmp, i); if (IS_ERR(data->tables[i])) { err = PTR_ERR(data->tables[i]); @@ -416,7 +584,9 @@ static int tegra194_cpufreq_remove(struct platform_device *pdev) } static const struct of_device_id tegra194_cpufreq_of_match[] = { - { .compatible = "nvidia,tegra194-ccplex", }, + { .compatible = "nvidia,tegra194-ccplex", .data = &tegra194_cpufreq_soc }, + { .compatible = "nvidia,tegra234-ccplex-cluster", .data = &tegra234_cpufreq_soc }, + { .compatible = "nvidia,tegra239-ccplex-cluster", .data = &tegra239_cpufreq_soc }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, tegra194_cpufreq_of_match); diff --git a/drivers/cpufreq/tegra20-cpufreq.c b/drivers/cpufreq/tegra20-cpufreq.c index e8db3d75be25..ab7ac7df9e62 100644 --- a/drivers/cpufreq/tegra20-cpufreq.c +++ b/drivers/cpufreq/tegra20-cpufreq.c @@ -32,9 +32,9 @@ static bool cpu0_node_has_opp_v2_prop(void) return ret; } -static void tegra20_cpufreq_put_supported_hw(void *opp_table) +static void tegra20_cpufreq_put_supported_hw(void *opp_token) { - dev_pm_opp_put_supported_hw(opp_table); + dev_pm_opp_put_supported_hw((unsigned long) opp_token); } static void tegra20_cpufreq_dt_unregister(void *cpufreq_dt) @@ -45,7 +45,6 @@ static void tegra20_cpufreq_dt_unregister(void *cpufreq_dt) static int tegra20_cpufreq_probe(struct platform_device *pdev) { struct platform_device *cpufreq_dt; - struct opp_table *opp_table; struct device *cpu_dev; u32 versions[2]; int err; @@ -71,16 +70,15 @@ static int tegra20_cpufreq_probe(struct platform_device *pdev) if (WARN_ON(!cpu_dev)) return -ENODEV; - opp_table = dev_pm_opp_set_supported_hw(cpu_dev, versions, 2); - err = PTR_ERR_OR_ZERO(opp_table); - if (err) { + err = dev_pm_opp_set_supported_hw(cpu_dev, versions, 2); + if (err < 0) { dev_err(&pdev->dev, "failed to set supported hw: %d\n", err); return err; } err = devm_add_action_or_reset(&pdev->dev, tegra20_cpufreq_put_supported_hw, - opp_table); + (void *)((unsigned long) err)); if (err) return err; diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index 8f9fdd864391..f64180dd2005 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -60,7 +60,6 @@ struct ti_cpufreq_data { struct device_node *opp_node; struct regmap *syscon; const struct ti_cpufreq_soc_data *soc_data; - struct opp_table *opp_table; }; static unsigned long amx3_efuse_xlate(struct ti_cpufreq_data *opp_data, @@ -173,7 +172,7 @@ static struct ti_cpufreq_soc_data omap34xx_soc_data = { * seems to always read as 0). */ -static const char * const omap3_reg_names[] = {"cpu0", "vbb"}; +static const char * const omap3_reg_names[] = {"cpu0", "vbb", NULL}; static struct ti_cpufreq_soc_data omap36xx_soc_data = { .reg_names = omap3_reg_names, @@ -324,10 +323,13 @@ static int ti_cpufreq_probe(struct platform_device *pdev) { u32 version[VERSION_COUNT]; const struct of_device_id *match; - struct opp_table *ti_opp_table; struct ti_cpufreq_data *opp_data; - const char * const default_reg_names[] = {"vdd", "vbb"}; + const char * const default_reg_names[] = {"vdd", "vbb", NULL}; int ret; + struct dev_pm_opp_config config = { + .supported_hw = version, + .supported_hw_count = ARRAY_SIZE(version), + }; match = dev_get_platdata(&pdev->dev); if (!match) @@ -370,33 +372,21 @@ static int ti_cpufreq_probe(struct platform_device *pdev) if (ret) goto fail_put_node; - ti_opp_table = dev_pm_opp_set_supported_hw(opp_data->cpu_dev, - version, VERSION_COUNT); - if (IS_ERR(ti_opp_table)) { - dev_err(opp_data->cpu_dev, - "Failed to set supported hardware\n"); - ret = PTR_ERR(ti_opp_table); - goto fail_put_node; - } - - opp_data->opp_table = ti_opp_table; - if (opp_data->soc_data->multi_regulator) { - const char * const *reg_names = default_reg_names; - if (opp_data->soc_data->reg_names) - reg_names = opp_data->soc_data->reg_names; - ti_opp_table = dev_pm_opp_set_regulators(opp_data->cpu_dev, - reg_names, - ARRAY_SIZE(default_reg_names)); - if (IS_ERR(ti_opp_table)) { - dev_pm_opp_put_supported_hw(opp_data->opp_table); - ret = PTR_ERR(ti_opp_table); - goto fail_put_node; - } + config.regulator_names = opp_data->soc_data->reg_names; + else + config.regulator_names = default_reg_names; + } + + ret = dev_pm_opp_set_config(opp_data->cpu_dev, &config); + if (ret < 0) { + dev_err(opp_data->cpu_dev, "Failed to set OPP config\n"); + goto fail_put_node; } of_node_put(opp_data->opp_node); + register_cpufreq_dt: platform_device_register_simple("cpufreq-dt", -1, NULL, 0); @@ -408,7 +398,7 @@ fail_put_node: return ret; } -static int ti_cpufreq_init(void) +static int __init ti_cpufreq_init(void) { const struct of_device_id *match; |