From 9f04f8794b55ba376c429a0e24b07e2d5cdcc259 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 11:54:32 -0600 Subject: [PATCH 01/24] cpufreq/amd-pstate: Cache CPPC request in shared mem case too ANBZ: #26498 commit 9f5daa2f2f6dddd2a847d077bfddc7dc0d9dab10 upstream In order to prevent a potential write for shmem_update_perf() cache the request into the cppc_req_cached variable normally only used for the MSR case. This adds symmetry into the code and potentially avoids extra writes. Reviewed-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello Signed-off-by: Hemanth Selam Signed-off-by: mohanasv --- drivers/cpufreq/amd-pstate.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 30411c3a22bf..cb4e1838787e 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -496,6 +496,8 @@ static int shmem_update_perf(struct amd_cpudata *cpudata, u8 min_perf, u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) { struct cppc_perf_ctrls perf_ctrls; + u64 value, prev; + int ret; if (cppc_state == AMD_PSTATE_ACTIVE) { int ret = shmem_set_epp(cpudata, epp); @@ -504,11 +506,29 @@ static int shmem_update_perf(struct amd_cpudata *cpudata, u8 min_perf, return ret; } + value = prev = READ_ONCE(cpudata->cppc_req_cached); + + value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | + AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + + if (value == prev) + return 0; + perf_ctrls.max_perf = max_perf; perf_ctrls.min_perf = min_perf; perf_ctrls.desired_perf = des_perf; - return cppc_set_perf(cpudata->cpu, &perf_ctrls); + ret = cppc_set_perf(cpudata->cpu, &perf_ctrls); + if (ret) + return ret; + + WRITE_ONCE(cpudata->cppc_req_cached, value); + + return 0; } static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) -- Gitee From d6d89fdbfec3dac2e4f911eaa6db35b95d7a884d Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 11:57:38 -0600 Subject: [PATCH 02/24] cpufreq/amd-pstate: Move all EPP tracing into *_update_perf and *_set_epp functions ANBZ: #26498 commit 77fbea69b0ffad0e46c7018d07e04b6628482e14 upstream The EPP tracing is done by the caller today, but this precludes the information about whether the CPPC request has changed. Move it into the update_perf and set_epp functions and include information about whether the request has changed from the last one. amd_pstate_update_perf() and amd_pstate_set_epp() now require the policy as an argument instead of the cpudata. Reviewed-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello Signed-off-by: Hemanth Selam Signed-off-by: mohanasv --- drivers/cpufreq/amd-pstate-trace.h | 13 +++- drivers/cpufreq/amd-pstate.c | 118 +++++++++++++++++------------ 2 files changed, 80 insertions(+), 51 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h index f457d4af2c62..32e1bdc588c5 100644 --- a/drivers/cpufreq/amd-pstate-trace.h +++ b/drivers/cpufreq/amd-pstate-trace.h @@ -90,7 +90,8 @@ TRACE_EVENT(amd_pstate_epp_perf, u8 epp, u8 min_perf, u8 max_perf, - bool boost + bool boost, + bool changed ), TP_ARGS(cpu_id, @@ -98,7 +99,8 @@ TRACE_EVENT(amd_pstate_epp_perf, epp, min_perf, max_perf, - boost), + boost, + changed), TP_STRUCT__entry( __field(unsigned int, cpu_id) @@ -107,6 +109,7 @@ TRACE_EVENT(amd_pstate_epp_perf, __field(u8, min_perf) __field(u8, max_perf) __field(bool, boost) + __field(bool, changed) ), TP_fast_assign( @@ -116,15 +119,17 @@ TRACE_EVENT(amd_pstate_epp_perf, __entry->min_perf = min_perf; __entry->max_perf = max_perf; __entry->boost = boost; + __entry->changed = changed; ), - TP_printk("cpu%u: [%hhu<->%hhu]/%hhu, epp=%hhu, boost=%u", + TP_printk("cpu%u: [%hhu<->%hhu]/%hhu, epp=%hhu, boost=%u, changed=%u", (unsigned int)__entry->cpu_id, (u8)__entry->min_perf, (u8)__entry->max_perf, (u8)__entry->highest_perf, (u8)__entry->epp, - (bool)__entry->boost + (bool)__entry->boost, + (bool)__entry->changed ) ); diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index cb4e1838787e..c3be162232d9 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -228,9 +228,10 @@ static u8 shmem_get_epp(struct amd_cpudata *cpudata) return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, epp); } -static int msr_update_perf(struct amd_cpudata *cpudata, u8 min_perf, +static int msr_update_perf(struct cpufreq_policy *policy, u8 min_perf, u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) { + struct amd_cpudata *cpudata = policy->driver_data; u64 value, prev; value = prev = READ_ONCE(cpudata->cppc_req_cached); @@ -242,6 +243,18 @@ static int msr_update_perf(struct amd_cpudata *cpudata, u8 min_perf, value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + if (trace_amd_pstate_epp_perf_enabled()) { + union perf_cached perf = READ_ONCE(cpudata->perf); + + trace_amd_pstate_epp_perf(cpudata->cpu, + perf.highest_perf, + epp, + min_perf, + max_perf, + policy->boost_enabled, + value != prev); + } + if (value == prev) return 0; @@ -256,24 +269,26 @@ static int msr_update_perf(struct amd_cpudata *cpudata, u8 min_perf, } WRITE_ONCE(cpudata->cppc_req_cached, value); - WRITE_ONCE(cpudata->epp_cached, epp); + if (epp != cpudata->epp_cached) + WRITE_ONCE(cpudata->epp_cached, epp); return 0; } DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); -static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, +static inline int amd_pstate_update_perf(struct cpufreq_policy *policy, u8 min_perf, u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) { - return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, + return static_call(amd_pstate_update_perf)(policy, min_perf, des_perf, max_perf, epp, fast_switch); } -static int msr_set_epp(struct amd_cpudata *cpudata, u8 epp) +static int msr_set_epp(struct cpufreq_policy *policy, u8 epp) { + struct amd_cpudata *cpudata = policy->driver_data; u64 value, prev; int ret; @@ -281,6 +296,19 @@ static int msr_set_epp(struct amd_cpudata *cpudata, u8 epp) value &= ~AMD_CPPC_EPP_PERF_MASK; value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + if (trace_amd_pstate_epp_perf_enabled()) { + union perf_cached perf = cpudata->perf; + + trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, + epp, + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, + cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MAX_PERF_MASK, + cpudata->cppc_req_cached), + policy->boost_enabled, + value != prev); + } + if (value == prev) return 0; @@ -299,15 +327,29 @@ static int msr_set_epp(struct amd_cpudata *cpudata, u8 epp) DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp); -static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u8 epp) +static inline int amd_pstate_set_epp(struct cpufreq_policy *policy, u8 epp) { - return static_call(amd_pstate_set_epp)(cpudata, epp); + return static_call(amd_pstate_set_epp)(policy, epp); } -static int shmem_set_epp(struct amd_cpudata *cpudata, u8 epp) +static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) { - int ret; + struct amd_cpudata *cpudata = policy->driver_data; struct cppc_perf_ctrls perf_ctrls; + int ret; + + if (trace_amd_pstate_epp_perf_enabled()) { + union perf_cached perf = cpudata->perf; + + trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, + epp, + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, + cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MAX_PERF_MASK, + cpudata->cppc_req_cached), + policy->boost_enabled, + epp != cpudata->epp_cached); + } if (epp == cpudata->epp_cached) return 0; @@ -339,17 +381,7 @@ static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy, return -EBUSY; } - if (trace_amd_pstate_epp_perf_enabled()) { - union perf_cached perf = READ_ONCE(cpudata->perf); - - trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, - epp, - FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), - FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), - policy->boost_enabled); - } - - return amd_pstate_set_epp(cpudata, epp); + return amd_pstate_set_epp(policy, epp); } static inline int msr_cppc_enable(bool enable) @@ -492,15 +524,16 @@ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) return static_call(amd_pstate_init_perf)(cpudata); } -static int shmem_update_perf(struct amd_cpudata *cpudata, u8 min_perf, +static int shmem_update_perf(struct cpufreq_policy *policy, u8 min_perf, u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) { + struct amd_cpudata *cpudata = policy->driver_data; struct cppc_perf_ctrls perf_ctrls; u64 value, prev; int ret; if (cppc_state == AMD_PSTATE_ACTIVE) { - int ret = shmem_set_epp(cpudata, epp); + int ret = shmem_set_epp(policy, epp); if (ret) return ret; @@ -515,6 +548,18 @@ static int shmem_update_perf(struct amd_cpudata *cpudata, u8 min_perf, value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + if (trace_amd_pstate_epp_perf_enabled()) { + union perf_cached perf = READ_ONCE(cpudata->perf); + + trace_amd_pstate_epp_perf(cpudata->cpu, + perf.highest_perf, + epp, + min_perf, + max_perf, + policy->boost_enabled, + value != prev); + } + if (value == prev) return 0; @@ -592,7 +637,7 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf, cpudata->cpu, fast_switch); } - amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch); + amd_pstate_update_perf(policy, min_perf, des_perf, max_perf, 0, fast_switch); } static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) @@ -1525,7 +1570,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) return ret; WRITE_ONCE(cpudata->cppc_req_cached, value); } - ret = amd_pstate_set_epp(cpudata, cpudata->epp_default); + ret = amd_pstate_set_epp(policy, cpudata->epp_default); if (ret) return ret; @@ -1566,14 +1611,8 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) epp = READ_ONCE(cpudata->epp_cached); perf = READ_ONCE(cpudata->perf); - if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, epp, - perf.min_limit_perf, - perf.max_limit_perf, - policy->boost_enabled); - } - return amd_pstate_update_perf(cpudata, perf.min_limit_perf, 0U, + return amd_pstate_update_perf(policy, perf.min_limit_perf, 0U, perf.max_limit_perf, epp, false); } @@ -1605,20 +1644,12 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) { - struct amd_cpudata *cpudata = policy->driver_data; - union perf_cached perf = READ_ONCE(cpudata->perf); int ret; ret = amd_pstate_cppc_enable(true); if (ret) pr_err("failed to enable amd pstate during resume, return %d\n", ret); - if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, - cpudata->epp_cached, - FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), - perf.highest_perf, policy->boost_enabled); - } return amd_pstate_epp_update_limit(policy); } @@ -1646,14 +1677,7 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) if (cpudata->suspended) return 0; - if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, - AMD_CPPC_EPP_BALANCE_POWERSAVE, - perf.lowest_perf, perf.lowest_perf, - policy->boost_enabled); - } - - return amd_pstate_update_perf(cpudata, perf.lowest_perf, 0, perf.lowest_perf, + return amd_pstate_update_perf(policy, perf.lowest_perf, 0, perf.lowest_perf, AMD_CPPC_EPP_BALANCE_POWERSAVE, false); } -- Gitee From d6ab9b1e1ffa7482396d81a756c53d64a885f737 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:02:14 -0600 Subject: [PATCH 03/24] cpufreq/amd-pstate: Update cppc_req_cached for shared mem EPP writes ANBZ: #26498 commit 1905fac6f9e08e34135e089704a0733ce711eb83 upstream On EPP only writes update the cached variable so that the min/max performance controls don't need to be updated again. Reviewed-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello Signed-off-by: Hemanth Selam Signed-off-by: mohanasv --- drivers/cpufreq/amd-pstate.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index c3be162232d9..2f3ecbd5a907 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -336,6 +336,7 @@ static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) { struct amd_cpudata *cpudata = policy->driver_data; struct cppc_perf_ctrls perf_ctrls; + u64 value; int ret; if (trace_amd_pstate_epp_perf_enabled()) { @@ -362,6 +363,11 @@ static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) } WRITE_ONCE(cpudata->epp_cached, epp); + value = READ_ONCE(cpudata->cppc_req_cached); + value &= ~AMD_CPPC_EPP_PERF_MASK; + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + WRITE_ONCE(cpudata->cppc_req_cached, value); + return ret; } -- Gitee From e94cbd47c7a2e76ca293b5325322041705a3c832 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 14:44:15 -0600 Subject: [PATCH 04/24] cpufreq/amd-pstate: Drop debug statements for policy setting ANBZ: #26498 commit 93039a60fb28f72196769869aa4b502f1849a373 upstream There are trace events that exist now for all amd-pstate modes that will output information right before programming to the hardware. This makes the existing debug statements unnecessary remaining overhead. Drop them. Reviewed-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello Signed-off-by: Hemanth Selam Signed-off-by: mohanasv --- drivers/cpufreq/amd-pstate.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 2f3ecbd5a907..8b946f28af5a 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -667,7 +667,6 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) } cpufreq_verify_within_cpu_limits(policy_data); - pr_debug("policy_max =%d, policy_min=%d\n", policy_data->max, policy_data->min); return 0; } @@ -1630,9 +1629,6 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) if (!policy->cpuinfo.max_freq) return -ENODEV; - pr_debug("set_policy: cpuinfo.max %u policy->max %u\n", - policy->cpuinfo.max_freq, policy->max); - cpudata->policy = policy->policy; ret = amd_pstate_epp_update_limit(policy); -- Gitee From c5453f59272d6ee928a79beef2ce46762aafe185 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 16 Dec 2024 10:41:23 -0600 Subject: [PATCH 05/24] cpufreq/amd-pstate: Rework CPPC enabling ANBZ: #26498 commit 2064543f5ba0d2929e3e9b3a616c3262a57c7925 upstream The CPPC enable register is configured as "write once". That is any future writes don't actually do anything. Because of this, all the cleanup paths that currently exist for CPPC disable are non-effective. Rework CPPC enable to only enable after all the CAP registers have been read to avoid enabling CPPC on CPUs with invalid _CPC or unpopulated MSRs. As the register is write once, remove all cleanup paths as well. Reviewed-by: Dhananjay Ugwekar Signed-off-by: Mario Limonciello Signed-off-by: Hemanth Selam Signed-off-by: mohanasv --- drivers/cpufreq/amd-pstate.c | 179 +++++++---------------------------- 1 file changed, 35 insertions(+), 144 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 8b946f28af5a..bf4f9a8b4bf8 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -85,7 +85,6 @@ static struct cpufreq_driver *current_pstate_driver; static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_epp_driver; static int cppc_state = AMD_PSTATE_UNDEFINED; -static bool cppc_enabled; static bool amd_pstate_prefcore = true; static struct quirk_entry *quirks; @@ -371,89 +370,21 @@ static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) return ret; } -static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy, - int pref_index) +static inline int msr_cppc_enable(struct cpufreq_policy *policy) { - struct amd_cpudata *cpudata = policy->driver_data; - u8 epp; - - if (!pref_index) - epp = cpudata->epp_default; - else - epp = epp_values[pref_index]; - - if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { - pr_debug("EPP cannot be set under performance policy\n"); - return -EBUSY; - } - - return amd_pstate_set_epp(policy, epp); -} - -static inline int msr_cppc_enable(bool enable) -{ - int ret, cpu; - unsigned long logical_proc_id_mask = 0; - - /* - * MSR_AMD_CPPC_ENABLE is write-once, once set it cannot be cleared. - */ - if (!enable) - return 0; - - if (enable == cppc_enabled) - return 0; - - for_each_present_cpu(cpu) { - unsigned long logical_id = topology_logical_package_id(cpu); - - if (test_bit(logical_id, &logical_proc_id_mask)) - continue; - - set_bit(logical_id, &logical_proc_id_mask); - - ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE, - enable); - if (ret) - return ret; - } - - cppc_enabled = enable; - return 0; + return wrmsrl_safe_on_cpu(policy->cpu, MSR_AMD_CPPC_ENABLE, 1); } -static int shmem_cppc_enable(bool enable) +static int shmem_cppc_enable(struct cpufreq_policy *policy) { - int cpu, ret = 0; - struct cppc_perf_ctrls perf_ctrls; - - if (enable == cppc_enabled) - return 0; - - for_each_present_cpu(cpu) { - ret = cppc_set_enable(cpu, enable); - if (ret) - return ret; - - /* Enable autonomous mode for EPP */ - if (cppc_state == AMD_PSTATE_ACTIVE) { - /* Set desired perf as zero to allow EPP firmware control */ - perf_ctrls.desired_perf = 0; - ret = cppc_set_perf(cpu, &perf_ctrls); - if (ret) - return ret; - } - } - - cppc_enabled = enable; - return ret; + return cppc_set_enable(policy->cpu, 1); } DEFINE_STATIC_CALL(amd_pstate_cppc_enable, msr_cppc_enable); -static inline int amd_pstate_cppc_enable(bool enable) +static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy) { - return static_call(amd_pstate_cppc_enable)(enable); + return static_call(amd_pstate_cppc_enable)(policy); } static int msr_init_perf(struct amd_cpudata *cpudata) @@ -1064,6 +995,10 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) perf.highest_perf); policy->boost_supported = READ_ONCE(cpudata->boost_supported); + ret = amd_pstate_cppc_enable(policy); + if (ret) + goto free_cpudata1; + /* It will be updated by governor */ policy->cur = policy->cpuinfo.min_freq; @@ -1110,28 +1045,6 @@ static void amd_pstate_cpu_exit(struct cpufreq_policy *policy) kfree(cpudata); } -static int amd_pstate_cpu_resume(struct cpufreq_policy *policy) -{ - int ret; - - ret = amd_pstate_cppc_enable(true); - if (ret) - pr_err("failed to enable amd-pstate during resume, return %d\n", ret); - - return ret; -} - -static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy) -{ - int ret; - - ret = amd_pstate_cppc_enable(false); - if (ret) - pr_err("failed to disable amd-pstate during suspend, return %d\n", ret); - - return ret; -} - /* Sysfs attributes */ /* @@ -1223,8 +1136,10 @@ static ssize_t show_energy_performance_available_preferences( static ssize_t store_energy_performance_preference( struct cpufreq_policy *policy, const char *buf, size_t count) { + struct amd_cpudata *cpudata = policy->driver_data; char str_preference[21]; ssize_t ret; + u8 epp; ret = sscanf(buf, "%20s", str_preference); if (ret != 1) @@ -1234,7 +1149,17 @@ static ssize_t store_energy_performance_preference( if (ret < 0) return -EINVAL; - ret = amd_pstate_set_energy_pref_index(policy, ret); + if (!ret) + epp = cpudata->epp_default; + else + epp = epp_values[ret]; + + if (epp > 0 && policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + pr_debug("EPP cannot be set under performance policy\n"); + return -EBUSY; + } + + ret = amd_pstate_set_epp(policy, epp); return ret ? ret : count; } @@ -1267,7 +1192,6 @@ static ssize_t show_energy_performance_preference( static void amd_pstate_driver_cleanup(void) { - amd_pstate_cppc_enable(false); cppc_state = AMD_PSTATE_DISABLE; current_pstate_driver = NULL; } @@ -1301,14 +1225,6 @@ static int amd_pstate_register_driver(int mode) cppc_state = mode; - ret = amd_pstate_cppc_enable(true); - if (ret) { - pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n", - ret); - amd_pstate_driver_cleanup(); - return ret; - } - /* at least one CPU supports CPB */ current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB); @@ -1548,11 +1464,15 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf); + policy->driver_data = cpudata; + + ret = amd_pstate_cppc_enable(policy); + if (ret) + goto free_cpudata1; /* It will be updated by governor */ policy->cur = policy->cpuinfo.min_freq; - policy->driver_data = cpudata; policy->boost_supported = READ_ONCE(cpudata->boost_supported); @@ -1644,31 +1564,11 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) return 0; } -static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) -{ - int ret; - - ret = amd_pstate_cppc_enable(true); - if (ret) - pr_err("failed to enable amd pstate during resume, return %d\n", ret); - - - return amd_pstate_epp_update_limit(policy); -} - static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) { - struct amd_cpudata *cpudata = policy->driver_data; - int ret; - - pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); + pr_debug("AMD CPU Core %d going online\n", policy->cpu); - ret = amd_pstate_epp_reenable(policy); - if (ret) - return ret; - cpudata->suspended = false; - - return 0; + return amd_pstate_cppc_enable(policy); } static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) @@ -1686,11 +1586,6 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - int ret; - - /* avoid suspending when EPP is not enabled */ - if (cppc_state != AMD_PSTATE_ACTIVE) - return 0; /* invalidate to ensure it's rewritten during resume */ cpudata->cppc_req_cached = 0; @@ -1698,11 +1593,6 @@ static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) /* set this flag to avoid setting core offline*/ cpudata->suspended = true; - /* disable CPPC in lowlevel firmware */ - ret = amd_pstate_cppc_enable(false); - if (ret) - pr_err("failed to suspend, return %d\n", ret); - return 0; } @@ -1711,8 +1601,12 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy) struct amd_cpudata *cpudata = policy->driver_data; if (cpudata->suspended) { + int ret; + /* enable amd pstate from suspend state*/ - amd_pstate_epp_reenable(policy); + ret = amd_pstate_epp_update_limit(policy); + if (ret) + return ret; cpudata->suspended = false; } @@ -1727,8 +1621,6 @@ static struct cpufreq_driver amd_pstate_driver = { .fast_switch = amd_pstate_fast_switch, .init = amd_pstate_cpu_init, .exit = amd_pstate_cpu_exit, - .suspend = amd_pstate_cpu_suspend, - .resume = amd_pstate_cpu_resume, .set_boost = amd_pstate_set_boost, .update_limits = amd_pstate_update_limits, .name = "amd-pstate", @@ -1895,7 +1787,6 @@ static int __init amd_pstate_init(void) global_attr_free: cpufreq_unregister_driver(current_pstate_driver); - amd_pstate_cppc_enable(false); return ret; } device_initcall(amd_pstate_init); -- Gitee From f6c2fdc085b23b1e7d0ef9c99538ab7730e3ff40 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sun, 19 Jan 2025 07:05:43 -0600 Subject: [PATCH 06/24] cpufreq/amd-pstate: Stop caching EPP ANBZ: #26498 commit 4e16c1175238f2b9d86199b427c5db1a24c9a85f upstream EPP values are cached in the cpudata structure per CPU. This is needless though because they are also cached in the CPPC request variable. Drop the separate cache for EPP values and always reference the CPPC request variable when needed. Reviewed-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello Signed-off-by: Hemanth Selam Signed-off-by: mohanasv --- drivers/cpufreq/amd-pstate.c | 19 ++++++++++--------- drivers/cpufreq/amd-pstate.h | 1 - 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index bf4f9a8b4bf8..e2497c00c9a3 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -268,8 +268,6 @@ static int msr_update_perf(struct cpufreq_policy *policy, u8 min_perf, } WRITE_ONCE(cpudata->cppc_req_cached, value); - if (epp != cpudata->epp_cached) - WRITE_ONCE(cpudata->epp_cached, epp); return 0; } @@ -318,7 +316,6 @@ static int msr_set_epp(struct cpufreq_policy *policy, u8 epp) } /* update both so that msr_update_perf() can effectively check */ - WRITE_ONCE(cpudata->epp_cached, epp); WRITE_ONCE(cpudata->cppc_req_cached, value); return ret; @@ -335,9 +332,12 @@ static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) { struct amd_cpudata *cpudata = policy->driver_data; struct cppc_perf_ctrls perf_ctrls; + u8 epp_cached; u64 value; int ret; + + epp_cached = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); if (trace_amd_pstate_epp_perf_enabled()) { union perf_cached perf = cpudata->perf; @@ -348,10 +348,10 @@ static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), policy->boost_enabled, - epp != cpudata->epp_cached); + epp != epp_cached); } - if (epp == cpudata->epp_cached) + if (epp == epp_cached) return 0; perf_ctrls.energy_perf = epp; @@ -360,7 +360,6 @@ static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) pr_debug("failed to set energy perf value (%d)\n", ret); return ret; } - WRITE_ONCE(cpudata->epp_cached, epp); value = READ_ONCE(cpudata->cppc_req_cached); value &= ~AMD_CPPC_EPP_PERF_MASK; @@ -1168,9 +1167,11 @@ static ssize_t show_energy_performance_preference( struct cpufreq_policy *policy, char *buf) { struct amd_cpudata *cpudata = policy->driver_data; - u8 preference; + u8 preference, epp; + + epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); - switch (cpudata->epp_cached) { + switch (epp) { case AMD_CPPC_EPP_PERFORMANCE: preference = EPP_INDEX_PERFORMANCE; break; @@ -1533,7 +1534,7 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) epp = 0; else - epp = READ_ONCE(cpudata->epp_cached); + epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); perf = READ_ONCE(cpudata->perf); diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index 1557e1afea79..fbe1c08d3f06 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -102,7 +102,6 @@ struct amd_cpudata { bool hw_prefcore; /* EPP feature related attributes*/ - u8 epp_cached; u32 policy; bool suspended; u8 epp_default; -- Gitee From 1b199ed39bed7f20c9a7f05ac335772a83664e5e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 19 Feb 2025 14:08:14 -0600 Subject: [PATCH 07/24] cpufreq/amd-pstate: Drop actions in amd_pstate_epp_cpu_offline() ANBZ: #26498 commit efb758c8c803217e58248f03db372c5e23827dae upstream When the CPU goes offline there is no need to change the CPPC request because the CPU will go into the deepest C-state it supports already. Actually changing the CPPC request when it goes offline messes up the cached values and can lead to the wrong values being restored when it comes back. Instead drop the actions and if the CPU comes back online let amd_pstate_epp_set_policy() restore it to expected values. Reviewed-by: Dhananjay Ugwekar Signed-off-by: Mario Limonciello Signed-off-by: Hemanth Selam Signed-off-by: mohanasv --- drivers/cpufreq/amd-pstate.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index e2497c00c9a3..60f10d63a771 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1574,14 +1574,7 @@ static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) { - struct amd_cpudata *cpudata = policy->driver_data; - union perf_cached perf = READ_ONCE(cpudata->perf); - - if (cpudata->suspended) - return 0; - - return amd_pstate_update_perf(policy, perf.lowest_perf, 0, perf.lowest_perf, - AMD_CPPC_EPP_BALANCE_POWERSAVE, false); + return 0; } static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) -- Gitee From 571f888b287ab416de069f7866954c83cd4282ee Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Mon, 7 Apr 2025 08:19:26 +0000 Subject: [PATCH 08/24] cpufreq/amd-pstate: Fix min_limit perf and freq updation for performance governor ANBZ: #26498 commit 56a49e19e1aea1374e9ba58cfd40260587bb7355 upstream The min_limit perf and freq values can get disconnected with performance governor, as we only modify the perf value in the special case. Fix that by modifying the perf and freq values together Fixes: 009d1c29a451 ("cpufreq/amd-pstate: Move perf values into a union") Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20250407081925.850473-1-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Hemanth Selam Signed-off-by: mohanasv --- drivers/cpufreq/amd-pstate.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 60f10d63a771..e01496464125 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -607,13 +607,16 @@ static void amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) union perf_cached perf = READ_ONCE(cpudata->perf); perf.max_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->max); - perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min); + WRITE_ONCE(cpudata->max_limit_freq, policy->max); - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf); + WRITE_ONCE(cpudata->min_limit_freq, min(cpudata->nominal_freq, cpudata->max_limit_freq)); + } else { + perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min); + WRITE_ONCE(cpudata->min_limit_freq, policy->min); + } - WRITE_ONCE(cpudata->max_limit_freq, policy->max); - WRITE_ONCE(cpudata->min_limit_freq, policy->min); WRITE_ONCE(cpudata->perf, perf); } -- Gitee From 33f554fdee9083a4794b6e2f3caf0de3ad5fea95 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 28 Aug 2024 13:47:25 +0200 Subject: [PATCH 09/24] x86/sched: Add basic support for CPU capacity scaling ANBZ: #26498 commit 5a9d10145a54f7a3fb6297c0082bf030e04db3bc upstream In order be able to compute the sizes of tasks consistently across all CPUs in a hybrid system, it is necessary to provide CPU capacity scaling information to the scheduler via arch_scale_cpu_capacity(). Moreover, the value returned by arch_scale_freq_capacity() for the given CPU must correspond to the arch_scale_cpu_capacity() return value for it, or utilization computations will be inaccurate. Add support for it through per-CPU variables holding the capacity and maximum-to-base frequency ratio (times SCHED_CAPACITY_SCALE) that will be returned by arch_scale_cpu_capacity() and used by scale_freq_tick() to compute arch_freq_scale for the current CPU, respectively. In order to avoid adding measurable overhead for non-hybrid x86 systems, which are the vast majority in the field, whether or not the new hybrid CPU capacity scaling will be in effect is controlled by a static key. This static key is set by calling arch_enable_hybrid_capacity_scale() which also allocates memory for the per-CPU data and initializes it. Next, arch_set_cpu_capacity() is used to set the per-CPU variables mentioned above for each CPU and arch_rebuild_sched_domains() needs to be called for the scheduler to realize that capacity-aware scheduling can be used going forward. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Reviewed-by: Ricardo Neri Tested-by: Ricardo Neri # scale invariance Link: https://patch.msgid.link/10523497.nUPlyArG6x@rjwysocki.net [ rjw: Added parens to function kerneldoc comments ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Hemanth Selam Signed-off-by: mohanasv --- arch/x86/include/asm/topology.h | 13 +++++ arch/x86/kernel/cpu/aperfmperf.c | 89 +++++++++++++++++++++++++++++++- 2 files changed, 100 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 3235ba1e5b06..f18e19630d95 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -220,9 +220,22 @@ static inline long arch_scale_freq_capacity(int cpu) } #define arch_scale_freq_capacity arch_scale_freq_capacity +bool arch_enable_hybrid_capacity_scale(void); +void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap, + unsigned long cap_freq, unsigned long base_freq); + +unsigned long arch_scale_cpu_capacity(int cpu); +#define arch_scale_cpu_capacity arch_scale_cpu_capacity + extern void arch_set_max_freq_ratio(bool turbo_disabled); extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled); #else +static inline bool arch_enable_hybrid_capacity_scale(void) { return false; } +static inline void arch_set_cpu_capacity(int cpu, unsigned long cap, + unsigned long max_cap, + unsigned long cap_freq, + unsigned long base_freq) { } + static inline void arch_set_max_freq_ratio(bool turbo_disabled) { } static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { } #endif diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index fdbb5f07448f..5dd4cc7836a7 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -347,9 +347,89 @@ static DECLARE_WORK(disable_freq_invariance_work, DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE; +static DEFINE_STATIC_KEY_FALSE(arch_hybrid_cap_scale_key); + +struct arch_hybrid_cpu_scale { + unsigned long capacity; + unsigned long freq_ratio; +}; + +static struct arch_hybrid_cpu_scale __percpu *arch_cpu_scale; + +/** + * arch_enable_hybrid_capacity_scale() - Enable hybrid CPU capacity scaling + * + * Allocate memory for per-CPU data used by hybrid CPU capacity scaling, + * initialize it and set the static key controlling its code paths. + * + * Must be called before arch_set_cpu_capacity(). + */ +bool arch_enable_hybrid_capacity_scale(void) +{ + int cpu; + + if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) { + WARN_ONCE(1, "Hybrid CPU capacity scaling already enabled"); + return true; + } + + arch_cpu_scale = alloc_percpu(struct arch_hybrid_cpu_scale); + if (!arch_cpu_scale) + return false; + + for_each_possible_cpu(cpu) { + per_cpu_ptr(arch_cpu_scale, cpu)->capacity = SCHED_CAPACITY_SCALE; + per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio = arch_max_freq_ratio; + } + + static_branch_enable(&arch_hybrid_cap_scale_key); + + pr_info("Hybrid CPU capacity scaling enabled\n"); + + return true; +} + +/** + * arch_set_cpu_capacity() - Set scale-invariance parameters for a CPU + * @cpu: Target CPU. + * @cap: Capacity of @cpu at its maximum frequency, relative to @max_cap. + * @max_cap: System-wide maximum CPU capacity. + * @cap_freq: Frequency of @cpu corresponding to @cap. + * @base_freq: Frequency of @cpu at which MPERF counts. + * + * The units in which @cap and @max_cap are expressed do not matter, so long + * as they are consistent, because the former is effectively divided by the + * latter. Analogously for @cap_freq and @base_freq. + * + * After calling this function for all CPUs, call arch_rebuild_sched_domains() + * to let the scheduler know that capacity-aware scheduling can be used going + * forward. + */ +void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap, + unsigned long cap_freq, unsigned long base_freq) +{ + if (static_branch_likely(&arch_hybrid_cap_scale_key)) { + WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity, + div_u64(cap << SCHED_CAPACITY_SHIFT, max_cap)); + WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio, + div_u64(cap_freq << SCHED_CAPACITY_SHIFT, base_freq)); + } else { + WARN_ONCE(1, "Hybrid CPU capacity scaling not enabled"); + } +} + +unsigned long arch_scale_cpu_capacity(int cpu) +{ + if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) + return READ_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity); + + return SCHED_CAPACITY_SCALE; +} +EXPORT_SYMBOL_GPL(arch_scale_cpu_capacity); + static void scale_freq_tick(u64 acnt, u64 mcnt) { - u64 freq_scale; + u64 freq_scale, freq_ratio; if (!arch_scale_freq_invariant()) return; @@ -357,7 +437,12 @@ static void scale_freq_tick(u64 acnt, u64 mcnt) if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt)) goto error; - if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt) + if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) + freq_ratio = READ_ONCE(this_cpu_ptr(arch_cpu_scale)->freq_ratio); + else + freq_ratio = arch_max_freq_ratio; + + if (check_mul_overflow(mcnt, freq_ratio, &mcnt) || !mcnt) goto error; freq_scale = div64_u64(acnt, mcnt); -- Gitee From 965337fb03774fac36c15d763e22c06678ef4455 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 4 Sep 2024 13:44:54 +0200 Subject: [PATCH 10/24] cpufreq: intel_pstate: Set asymmetric CPU capacity on hybrid systems ANBZ: #26498 commit 929ebc93ccaa8d183a2ba9f1cf769d1bfb847cca upstream Make intel_pstate use the HWP_HIGHEST_PERF values from MSR_HWP_CAPABILITIES to set asymmetric CPU capacity information via the previously introduced arch_set_cpu_capacity() on hybrid systems without SMT. Setting asymmetric CPU capacity is generally necessary to allow the scheduler to compute task sizes in a consistent way across all CPUs in a system where they differ by capacity. That, in turn, should help to improve scheduling decisions. It is also necessary for the schedutil cpufreq governor to operate as expected on hybrid systems where tasks migrate between CPUs of different capacities. The underlying observation is that intel_pstate already uses MSR_HWP_CAPABILITIES to get CPU performance information which is exposed by it via sysfs and CPU performance scaling is based on it. Thus using this information for setting asymmetric CPU capacity is consistent with what the driver has been doing already. Moreover, HWP_HIGHEST_PERF reflects the maximum capacity of a given CPU including both the instructions-per-cycle (IPC) factor and the maximum turbo frequency and the units in which that value is expressed are the same for all CPUs in the system, so the maximum capacity ratio between two CPUs can be obtained by computing the ratio of their HWP_HIGHEST_PERF values. Of course, in principle that capacity ratio need not be directly applicable at lower frequencies, so using it for providing the asymmetric CPU capacity information to the scheduler is a rough approximation, but it is as good as it gets. Also, measurements indicate that this approximation is not too bad in practice. If the given system is hybrid and non-SMT, the new code disables ITMT support in the scheduler (because it may get in the way of asymmetric CPU capacity code in the scheduler that automatically gets enabled by setting asymmetric CPU capacity) after initializing all online CPUs and finds the one with the maximum HWP_HIGHEST_PERF value. Next, it computes the capacity number for each (online) CPU by dividing the product of its HWP_HIGHEST_PERF and SCHED_CAPACITY_SCALE by the maximum HWP_HIGHEST_PERF. When a CPU goes offline, its capacity is reset to SCHED_CAPACITY_SCALE and if it is the one with the maximum HWP_HIGHEST_PERF value, the capacity numbers for all of the other online CPUs are recomputed. This also takes care of a cleanup during driver operation mode changes. Analogously, when a new CPU goes online, its capacity number is updated and if its HWP_HIGHEST_PERF value is greater than the current maximum one, the capacity numbers for all of the other online CPUs are recomputed. The case when the driver is notified of a CPU capacity change, either through the HWP interrupt or through an ACPI notification, is handled similarly to the CPU online case above, except that if the target CPU is the current highest-capacity one and its capacity is reduced, the capacity numbers for all of the other online CPUs need to be recomputed either. If the driver's "no_trubo" sysfs attribute is updated, all of the CPU capacity information is computed from scratch to reflect the new turbo status. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ricardo Neri Tested-by: Ricardo Neri # scale invariance Link: https://patch.msgid.link/1979653.PYKUYFuaPT@rjwysocki.net [ rjw: Fixed a typo in the changelog ] [ rjw: Renamed 3 new functions and added a comment ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Hemanth Selam Signed-off-by: mohanasv --- drivers/cpufreq/intel_pstate.c | 236 ++++++++++++++++++++++++++++++++- 1 file changed, 232 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 909359b717f3..98cec67750d4 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -215,6 +216,7 @@ struct global_params { * @hwp_req_cached: Cached value of the last HWP Request MSR * @hwp_cap_cached: Cached value of the last HWP Capabilities MSR * @last_io_update: Last time when IO wake flag was set + * @capacity_perf: Highest perf used for scale invariance * @sched_flags: Store scheduler flags for possible cross CPU update * @hwp_boost_min: Last HWP boosted min performance * @suspended: Whether or not the driver has been suspended. @@ -253,6 +255,7 @@ struct cpudata { u64 hwp_req_cached; u64 hwp_cap_cached; u64 last_io_update; + unsigned int capacity_perf; unsigned int sched_flags; u32 hwp_boost_min; bool suspended; @@ -295,6 +298,7 @@ static int hwp_mode_bdw __ro_after_init; static bool per_cpu_limits __ro_after_init; static bool hwp_forced __ro_after_init; static bool hwp_boost __read_mostly; +static bool hwp_is_hybrid; static struct cpufreq_driver *intel_pstate_driver __read_mostly; @@ -930,6 +934,139 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { NULL, }; +static struct cpudata *hybrid_max_perf_cpu __read_mostly; +/* + * Protects hybrid_max_perf_cpu, the capacity_perf fields in struct cpudata, + * and the x86 arch scale-invariance information from concurrent updates. + */ +static DEFINE_MUTEX(hybrid_capacity_lock); + +static void hybrid_set_cpu_capacity(struct cpudata *cpu) +{ + arch_set_cpu_capacity(cpu->cpu, cpu->capacity_perf, + hybrid_max_perf_cpu->capacity_perf, + cpu->capacity_perf, + cpu->pstate.max_pstate_physical); + + pr_debug("CPU%d: perf = %u, max. perf = %u, base perf = %d\n", cpu->cpu, + cpu->capacity_perf, hybrid_max_perf_cpu->capacity_perf, + cpu->pstate.max_pstate_physical); +} + +static void hybrid_clear_cpu_capacity(unsigned int cpunum) +{ + arch_set_cpu_capacity(cpunum, 1, 1, 1, 1); +} + +static void hybrid_get_capacity_perf(struct cpudata *cpu) +{ + if (READ_ONCE(global.no_turbo)) { + cpu->capacity_perf = cpu->pstate.max_pstate_physical; + return; + } + + cpu->capacity_perf = HWP_HIGHEST_PERF(READ_ONCE(cpu->hwp_cap_cached)); +} + +static void hybrid_set_capacity_of_cpus(void) +{ + int cpunum; + + for_each_online_cpu(cpunum) { + struct cpudata *cpu = all_cpu_data[cpunum]; + + if (cpu) + hybrid_set_cpu_capacity(cpu); + } +} + +static void hybrid_update_cpu_capacity_scaling(void) +{ + struct cpudata *max_perf_cpu = NULL; + unsigned int max_cap_perf = 0; + int cpunum; + + for_each_online_cpu(cpunum) { + struct cpudata *cpu = all_cpu_data[cpunum]; + + if (!cpu) + continue; + + /* + * During initialization, CPU performance at full capacity needs + * to be determined. + */ + if (!hybrid_max_perf_cpu) + hybrid_get_capacity_perf(cpu); + + /* + * If hybrid_max_perf_cpu is not NULL at this point, it is + * being replaced, so don't take it into account when looking + * for the new one. + */ + if (cpu == hybrid_max_perf_cpu) + continue; + + if (cpu->capacity_perf > max_cap_perf) { + max_cap_perf = cpu->capacity_perf; + max_perf_cpu = cpu; + } + } + + if (max_perf_cpu) { + hybrid_max_perf_cpu = max_perf_cpu; + hybrid_set_capacity_of_cpus(); + } else { + pr_info("Found no CPUs with nonzero maximum performance\n"); + /* Revert to the flat CPU capacity structure. */ + for_each_online_cpu(cpunum) + hybrid_clear_cpu_capacity(cpunum); + } +} + +static void __hybrid_init_cpu_capacity_scaling(void) +{ + hybrid_max_perf_cpu = NULL; + hybrid_update_cpu_capacity_scaling(); +} + +static void hybrid_init_cpu_capacity_scaling(void) +{ + bool disable_itmt = false; + + mutex_lock(&hybrid_capacity_lock); + + /* + * If hybrid_max_perf_cpu is set at this point, the hybrid CPU capacity + * scaling has been enabled already and the driver is just changing the + * operation mode. + */ + if (hybrid_max_perf_cpu) { + __hybrid_init_cpu_capacity_scaling(); + goto unlock; + } + + /* + * On hybrid systems, use asym capacity instead of ITMT, but because + * the capacity of SMT threads is not deterministic even approximately, + * do not do that when SMT is in use. + */ + if (hwp_is_hybrid && !sched_smt_active() && arch_enable_hybrid_capacity_scale()) { + __hybrid_init_cpu_capacity_scaling(); + disable_itmt = true; + } + +unlock: + mutex_unlock(&hybrid_capacity_lock); + + /* + * Disabling ITMT causes sched domains to be rebuilt to disable asym + * packing and enable asym capacity. + */ + if (disable_itmt) + sched_clear_itmt_support(); +} + static void __intel_pstate_get_hwp_cap(struct cpudata *cpu) { u64 cap; @@ -958,6 +1095,43 @@ static void intel_pstate_get_hwp_cap(struct cpudata *cpu) } } +static void hybrid_update_capacity(struct cpudata *cpu) +{ + unsigned int max_cap_perf; + + mutex_lock(&hybrid_capacity_lock); + + if (!hybrid_max_perf_cpu) + goto unlock; + + /* + * The maximum performance of the CPU may have changed, but assume + * that the performance of the other CPUs has not changed. + */ + max_cap_perf = hybrid_max_perf_cpu->capacity_perf; + + intel_pstate_get_hwp_cap(cpu); + + hybrid_get_capacity_perf(cpu); + /* Should hybrid_max_perf_cpu be replaced by this CPU? */ + if (cpu->capacity_perf > max_cap_perf) { + hybrid_max_perf_cpu = cpu; + hybrid_set_capacity_of_cpus(); + goto unlock; + } + + /* If this CPU is hybrid_max_perf_cpu, should it be replaced? */ + if (cpu == hybrid_max_perf_cpu && cpu->capacity_perf < max_cap_perf) { + hybrid_update_cpu_capacity_scaling(); + goto unlock; + } + + hybrid_set_cpu_capacity(cpu); + +unlock: + mutex_unlock(&hybrid_capacity_lock); +} + static void intel_pstate_hwp_set(unsigned int cpu) { struct cpudata *cpu_data = all_cpu_data[cpu]; @@ -1066,6 +1240,22 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu) value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE); wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); + + mutex_lock(&hybrid_capacity_lock); + + if (!hybrid_max_perf_cpu) { + mutex_unlock(&hybrid_capacity_lock); + + return; + } + + if (hybrid_max_perf_cpu == cpu) + hybrid_update_cpu_capacity_scaling(); + + mutex_unlock(&hybrid_capacity_lock); + + /* Reset the capacity of the CPU going offline to the initial value. */ + hybrid_clear_cpu_capacity(cpu->cpu); } #define POWER_CTL_EE_ENABLE 1 @@ -1161,21 +1351,46 @@ static void __intel_pstate_update_max_freq(struct cpudata *cpudata, static void intel_pstate_update_limits(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); + struct cpudata *cpudata; if (!policy) return; - __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); + cpudata = all_cpu_data[cpu]; + + __intel_pstate_update_max_freq(cpudata, policy); + + /* Prevent the driver from being unregistered now. */ + mutex_lock(&intel_pstate_driver_lock); cpufreq_cpu_release(policy); + + hybrid_update_capacity(cpudata); + + mutex_unlock(&intel_pstate_driver_lock); } static void intel_pstate_update_limits_for_all(void) { int cpu; - for_each_possible_cpu(cpu) - intel_pstate_update_limits(cpu); + for_each_possible_cpu(cpu) { + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); + + if (!policy) + continue; + + __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); + + cpufreq_cpu_release(policy); + } + + mutex_lock(&hybrid_capacity_lock); + + if (hybrid_max_perf_cpu) + __hybrid_init_cpu_capacity_scaling(); + + mutex_unlock(&hybrid_capacity_lock); } /************************** sysfs begin ************************/ @@ -1614,6 +1829,13 @@ static void intel_pstate_notify_work(struct work_struct *work) __intel_pstate_update_max_freq(cpudata, policy); cpufreq_cpu_release(policy); + + /* + * The driver will not be unregistered while this function is + * running, so update the capacity without acquiring the driver + * lock. + */ + hybrid_update_capacity(cpudata); } wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); @@ -2030,8 +2252,10 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) if (pstate_funcs.get_cpu_scaling) { cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu); - if (cpu->pstate.scaling != perf_ctl_scaling) + if (cpu->pstate.scaling != perf_ctl_scaling) { intel_pstate_hybrid_hwp_adjust(cpu); + hwp_is_hybrid = true; + } } else { cpu->pstate.scaling = perf_ctl_scaling; } @@ -2707,6 +2931,8 @@ static int intel_pstate_cpu_online(struct cpufreq_policy *policy) */ intel_pstate_hwp_reenable(cpu); cpu->suspended = false; + + hybrid_update_capacity(cpu); } return 0; @@ -3147,6 +3373,8 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver) global.min_perf_pct = min_perf_pct_min(); + hybrid_init_cpu_capacity_scaling(); + return 0; } -- Gitee From 29d32aff361f07c039f79252850e7056079b26c2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 28 Mar 2025 21:40:43 +0100 Subject: [PATCH 11/24] cpufreq: Consolidate some code in cpufreq_online() ANBZ: #26498 commit 387b51709db546033cf9a940860b6614a5cda5b6 upstream Notice that the policy->cpu update in cpufreq_policy_alloc() can be moved to cpufreq_online() and then it can be carried out under the policy rwsem, along with the clearing of policy->governor (unnecessary in the "new policy" code branch, but also not harmful). If this is done, the bottom parts of the "if (policy)" branches become identical and they can be collapsed and moved below the conditional. Modify the code accordingly which makes it somewhat easier to follow. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Reviewed-by: Mario Limonciello Acked-by: Sudeep Holla Tested-by: Sudeep Holla Link: https://patch.msgid.link/13741234.uLZWGnKmhe@rjwysocki.net Signed-off-by: Hemanth Selam Signed-off-by: mohanasv --- drivers/cpufreq/cpufreq.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index cc98d8cf5433..5a6668973788 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1315,7 +1315,6 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) init_waitqueue_head(&policy->transition_wait); INIT_WORK(&policy->update, handle_update); - policy->cpu = cpu; return policy; err_min_qos_notifier: @@ -1403,17 +1402,18 @@ static int cpufreq_online(unsigned int cpu) /* This is the only online CPU for the policy. Start over. */ new_policy = false; - down_write(&policy->rwsem); - policy->cpu = cpu; - policy->governor = NULL; } else { new_policy = true; policy = cpufreq_policy_alloc(cpu); if (!policy) return -ENOMEM; - down_write(&policy->rwsem); } + down_write(&policy->rwsem); + + policy->cpu = cpu; + policy->governor = NULL; + if (!new_policy && cpufreq_driver->online) { /* Recover policy->cpus using related_cpus */ cpumask_copy(policy->cpus, policy->related_cpus); -- Gitee From 9c7994d5fc42661e7d81397add29c87cfe11852c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 28 Mar 2025 21:41:49 +0100 Subject: [PATCH 12/24] cpufreq: Split cpufreq_online() ANBZ: #26498 commit 68974e3a15b92639396f39d1cd09d5288ba687bc upstream In preparation for the introduction of cpufreq policy locking guards, move the part of cpufreq_online() that is carried out under the policy rwsem into a separate function called cpufreq_policy_online(). No intentional functional impact. [Backport Changes] 1. The upstream patch was auto-merged successfully. However, while reviewing the diff, some changes appear due to line shifts in the file. These are not actual conflicts, so this is not a conflict-related deviation. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Reviewed-by: Mario Limonciello Acked-by: Sudeep Holla Tested-by: Sudeep Holla Link: https://patch.msgid.link/3354747.aeNJFYEL58@rjwysocki.net Signed-off-by: Hemanth Selam Signed-off-by: mohanasv --- drivers/cpufreq/cpufreq.c | 91 ++++++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 39 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5a6668973788..f6e95be8c91a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1383,32 +1383,13 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) kfree(policy); } -static int cpufreq_online(unsigned int cpu) +static int cpufreq_policy_online(struct cpufreq_policy *policy, + unsigned int cpu, bool new_policy) { - struct cpufreq_policy *policy; - bool new_policy; unsigned long flags; unsigned int j; int ret; - pr_debug("%s: bringing CPU%u online\n", __func__, cpu); - - /* Check if this CPU already has a policy to manage it */ - policy = per_cpu(cpufreq_cpu_data, cpu); - if (policy) { - WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus)); - if (!policy_is_inactive(policy)) - return cpufreq_add_policy_cpu(policy, cpu); - - /* This is the only online CPU for the policy. Start over. */ - new_policy = false; - } else { - new_policy = true; - policy = cpufreq_policy_alloc(cpu); - if (!policy) - return -ENOMEM; - } - down_write(&policy->rwsem); policy->cpu = cpu; @@ -1435,7 +1416,7 @@ static int cpufreq_online(unsigned int cpu) if (ret) { pr_debug("%s: %d: initialization failed\n", __func__, __LINE__); - goto out_free_policy; + goto out_clear_policy; } /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ @@ -1586,21 +1567,10 @@ static int cpufreq_online(unsigned int cpu) goto out_destroy_policy; } +out_unlock: up_write(&policy->rwsem); - kobject_uevent(&policy->kobj, KOBJ_ADD); - - /* Callback for handling stuff after policy is ready */ - if (cpufreq_driver->ready) - cpufreq_driver->ready(policy); - - /* Register cpufreq cooling only for a new policy */ - if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver)) - policy->cdev = of_cpufreq_cooling_register(policy); - - pr_debug("initialization complete\n"); - - return 0; + return ret; out_destroy_policy: for_each_cpu(j, policy->real_cpus) @@ -1614,12 +1584,55 @@ static int cpufreq_online(unsigned int cpu) if (cpufreq_driver->exit) cpufreq_driver->exit(policy); -out_free_policy: +out_clear_policy: cpumask_clear(policy->cpus); - up_write(&policy->rwsem); - cpufreq_policy_free(policy); - return ret; + goto out_unlock; +} + +static int cpufreq_online(unsigned int cpu) +{ + struct cpufreq_policy *policy; + bool new_policy; + int ret; + + pr_debug("%s: bringing CPU%u online\n", __func__, cpu); + + /* Check if this CPU already has a policy to manage it */ + policy = per_cpu(cpufreq_cpu_data, cpu); + if (policy) { + WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus)); + if (!policy_is_inactive(policy)) + return cpufreq_add_policy_cpu(policy, cpu); + + /* This is the only online CPU for the policy. Start over. */ + new_policy = false; + } else { + new_policy = true; + policy = cpufreq_policy_alloc(cpu); + if (!policy) + return -ENOMEM; + } + + ret = cpufreq_policy_online(policy, cpu, new_policy); + if (ret) { + cpufreq_policy_free(policy); + return ret; + } + + kobject_uevent(&policy->kobj, KOBJ_ADD); + + /* Callback for handling stuff after policy is ready */ + if (cpufreq_driver->ready) + cpufreq_driver->ready(policy); + + /* Register cpufreq cooling only for a new policy */ + if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver)) + policy->cdev = of_cpufreq_cooling_register(policy); + + pr_debug("initialization complete\n"); + + return 0; } /** -- Gitee From bf14c2a551c6d2f13676908feda85c9ccb13f92c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 28 Mar 2025 21:42:48 +0100 Subject: [PATCH 13/24] cpufreq: Add and use cpufreq policy locking guards ANBZ: #26498 commit 6fec833b9d70c54ceacbf7d07665215fbd0cddef upstream Introduce "read" and "write" locking guards for cpufreq policies and use them where applicable in the cpufreq core. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Reviewed-by: Mario Limonciello Acked-by: Sudeep Holla Tested-by: Sudeep Holla Link: https://patch.msgid.link/8518682.T7Z3S40VBb@rjwysocki.net Signed-off-by: Hemanth Selam Signed-off-by: mohanasv --- drivers/cpufreq/cpufreq.c | 121 +++++++++++++++++--------------------- include/linux/cpufreq.h | 6 ++ 2 files changed, 60 insertions(+), 67 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index f6e95be8c91a..d2dfc72633d4 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1010,17 +1010,16 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) { struct cpufreq_policy *policy = to_policy(kobj); struct freq_attr *fattr = to_attr(attr); - ssize_t ret = -EBUSY; if (!fattr->show) return -EIO; - down_read(&policy->rwsem); + guard(cpufreq_policy_read)(policy); + if (likely(!policy_is_inactive(policy))) - ret = fattr->show(policy, buf); - up_read(&policy->rwsem); + return fattr->show(policy, buf); - return ret; + return -EBUSY; } static ssize_t store(struct kobject *kobj, struct attribute *attr, @@ -1028,17 +1027,16 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, { struct cpufreq_policy *policy = to_policy(kobj); struct freq_attr *fattr = to_attr(attr); - ssize_t ret = -EBUSY; if (!fattr->store) return -EIO; - down_write(&policy->rwsem); + guard(cpufreq_policy_write)(policy); + if (likely(!policy_is_inactive(policy))) - ret = fattr->store(policy, buf, count); - up_write(&policy->rwsem); + return fattr->store(policy, buf, count); - return ret; + return -EBUSY; } static void cpufreq_sysfs_release(struct kobject *kobj) @@ -1175,7 +1173,8 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp if (cpumask_test_cpu(cpu, policy->cpus)) return 0; - down_write(&policy->rwsem); + guard(cpufreq_policy_write)(policy); + if (has_target()) cpufreq_stop_governor(policy); @@ -1186,7 +1185,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp if (ret) pr_err("%s: Failed to start governor\n", __func__); } - up_write(&policy->rwsem); + return ret; } @@ -1206,9 +1205,10 @@ static void handle_update(struct work_struct *work) container_of(work, struct cpufreq_policy, update); pr_debug("handle_update for cpu %u called\n", policy->cpu); - down_write(&policy->rwsem); + + guard(cpufreq_policy_write)(policy); + refresh_frequency_limits(policy); - up_write(&policy->rwsem); } static int cpufreq_notifier_min(struct notifier_block *nb, unsigned long freq, @@ -1234,11 +1234,11 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy) struct kobject *kobj; struct completion *cmp; - down_write(&policy->rwsem); - cpufreq_stats_free_table(policy); - kobj = &policy->kobj; - cmp = &policy->kobj_unregister; - up_write(&policy->rwsem); + scoped_guard(cpufreq_policy_write, policy) { + cpufreq_stats_free_table(policy); + kobj = &policy->kobj; + cmp = &policy->kobj_unregister; + } kobject_put(kobj); /* @@ -1390,7 +1390,7 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy, unsigned int j; int ret; - down_write(&policy->rwsem); + guard(cpufreq_policy_write)(policy); policy->cpu = cpu; policy->governor = NULL; @@ -1567,10 +1567,7 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy, goto out_destroy_policy; } -out_unlock: - up_write(&policy->rwsem); - - return ret; + return 0; out_destroy_policy: for_each_cpu(j, policy->real_cpus) @@ -1587,7 +1584,7 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy, out_clear_policy: cpumask_clear(policy->cpus); - goto out_unlock; + return ret; } static int cpufreq_online(unsigned int cpu) @@ -1722,11 +1719,10 @@ static int cpufreq_offline(unsigned int cpu) return 0; } - down_write(&policy->rwsem); + guard(cpufreq_policy_write)(policy); __cpufreq_offline(cpu, policy); - up_write(&policy->rwsem); return 0; } @@ -1743,33 +1739,29 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) if (!policy) return; - down_write(&policy->rwsem); + scoped_guard(cpufreq_policy_write, policy) { + if (cpu_online(cpu)) + __cpufreq_offline(cpu, policy); - if (cpu_online(cpu)) - __cpufreq_offline(cpu, policy); + remove_cpu_dev_symlink(policy, cpu, dev); - remove_cpu_dev_symlink(policy, cpu, dev); + if (!cpumask_empty(policy->real_cpus)) + return; - if (!cpumask_empty(policy->real_cpus)) { - up_write(&policy->rwsem); - return; - } + /* + * Unregister cpufreq cooling once all the CPUs of the policy + * are removed. + */ + if (cpufreq_thermal_control_enabled(cpufreq_driver)) { + cpufreq_cooling_unregister(policy->cdev); + policy->cdev = NULL; + } - /* - * Unregister cpufreq cooling once all the CPUs of the policy are - * removed. - */ - if (cpufreq_thermal_control_enabled(cpufreq_driver)) { - cpufreq_cooling_unregister(policy->cdev); - policy->cdev = NULL; + /* We did light-weight exit earlier, do full tear down now */ + if (cpufreq_driver->offline && cpufreq_driver->exit) + cpufreq_driver->exit(policy); } - /* We did light-weight exit earlier, do full tear down now */ - if (cpufreq_driver->offline && cpufreq_driver->exit) - cpufreq_driver->exit(policy); - - up_write(&policy->rwsem); - cpufreq_policy_free(policy); } @@ -1922,15 +1914,16 @@ unsigned int cpufreq_get(unsigned int cpu) struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); unsigned int ret_freq = 0; - if (policy) { - down_read(&policy->rwsem); + if (!policy) + return 0; + + scoped_guard(cpufreq_policy_read, policy) { if (cpufreq_driver->get) ret_freq = __cpufreq_get(policy); - up_read(&policy->rwsem); - - cpufreq_cpu_put(policy); } + cpufreq_cpu_put(policy); + return ret_freq; } EXPORT_SYMBOL(cpufreq_get); @@ -1990,9 +1983,9 @@ void cpufreq_suspend(void) for_each_active_policy(policy) { if (has_target()) { - down_write(&policy->rwsem); - cpufreq_stop_governor(policy); - up_write(&policy->rwsem); + scoped_guard(cpufreq_policy_write, policy) { + cpufreq_stop_governor(policy); + } } if (cpufreq_driver->suspend && cpufreq_driver->suspend(policy)) @@ -2033,9 +2026,9 @@ void cpufreq_resume(void) pr_err("%s: Failed to resume driver: %s\n", __func__, cpufreq_driver->name); } else if (has_target()) { - down_write(&policy->rwsem); - ret = cpufreq_start_governor(policy); - up_write(&policy->rwsem); + scoped_guard(cpufreq_policy_write, policy) { + ret = cpufreq_start_governor(policy); + } if (ret) pr_err("%s: Failed to start governor for CPU%u's policy\n", @@ -2403,15 +2396,9 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { - int ret; + guard(cpufreq_policy_write)(policy); - down_write(&policy->rwsem); - - ret = __cpufreq_driver_target(policy, target_freq, relation); - - up_write(&policy->rwsem); - - return ret; + return __cpufreq_driver_target(policy, target_freq, relation); } EXPORT_SYMBOL_GPL(cpufreq_driver_target); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 202b3185d923..c7e9c487a820 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -170,6 +170,12 @@ struct cpufreq_policy { struct notifier_block nb_max; }; +DEFINE_GUARD(cpufreq_policy_write, struct cpufreq_policy *, + down_write(&_T->rwsem), up_write(&_T->rwsem)) + +DEFINE_GUARD(cpufreq_policy_read, struct cpufreq_policy *, + down_read(&_T->rwsem), up_read(&_T->rwsem)) + /* * Used for passing new cpufreq policy data to the cpufreq driver's ->verify() * callback for sanitization. That callback is only expected to modify the min -- Gitee From aaba61f437a29848043f39e50592d5f03081461e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 28 Mar 2025 21:43:46 +0100 Subject: [PATCH 14/24] cpufreq: intel_pstate: Rearrange max frequency updates handling code ANBZ: #26498 commit 973207ae3d7c3c92df4a382df5d7bd695deaa904 upstream Rename __intel_pstate_update_max_freq() to intel_pstate_update_max_freq() and move the cpufreq policy reference counting and locking into it (and implement the locking with the recently introduced cpufreq policy "write" locking guard). No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello Acked-by: Srinivas Pandruvada Link: https://patch.msgid.link/2315023.iZASKD2KPV@rjwysocki.net Signed-off-by: Hemanth Selam Signed-off-by: mohanasv --- drivers/cpufreq/intel_pstate.c | 52 +++++++++++----------------------- 1 file changed, 17 insertions(+), 35 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 98cec67750d4..861c7c9c3684 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1336,9 +1336,16 @@ static void intel_pstate_update_policies(void) cpufreq_update_policy(cpu); } -static void __intel_pstate_update_max_freq(struct cpudata *cpudata, - struct cpufreq_policy *policy) +static bool intel_pstate_update_max_freq(struct cpudata *cpudata) { + struct cpufreq_policy *policy __free(put_cpufreq_policy); + + policy = cpufreq_cpu_get(cpudata->cpu); + if (!policy) + return false; + + guard(cpufreq_policy_write)(policy); + if (hwp_active) intel_pstate_get_hwp_cap(cpudata); @@ -1346,44 +1353,24 @@ static void __intel_pstate_update_max_freq(struct cpudata *cpudata, cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; refresh_frequency_limits(policy); + + return true; } static void intel_pstate_update_limits(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); - struct cpudata *cpudata; - - if (!policy) - return; - - cpudata = all_cpu_data[cpu]; - - __intel_pstate_update_max_freq(cpudata, policy); - - /* Prevent the driver from being unregistered now. */ - mutex_lock(&intel_pstate_driver_lock); + struct cpudata *cpudata = all_cpu_data[cpu]; - cpufreq_cpu_release(policy); - - hybrid_update_capacity(cpudata); - - mutex_unlock(&intel_pstate_driver_lock); + if (intel_pstate_update_max_freq(cpudata)) + hybrid_update_capacity(cpudata); } static void intel_pstate_update_limits_for_all(void) { int cpu; - for_each_possible_cpu(cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); - - if (!policy) - continue; - - __intel_pstate_update_max_freq(all_cpu_data[cpu], policy); - - cpufreq_cpu_release(policy); - } + for_each_possible_cpu(cpu) + intel_pstate_update_max_freq(all_cpu_data[cpu]); mutex_lock(&hybrid_capacity_lock); @@ -1823,13 +1810,8 @@ static void intel_pstate_notify_work(struct work_struct *work) { struct cpudata *cpudata = container_of(to_delayed_work(work), struct cpudata, hwp_notify_work); - struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu); - - if (policy) { - __intel_pstate_update_max_freq(cpudata, policy); - - cpufreq_cpu_release(policy); + if (intel_pstate_update_max_freq(cpudata)) { /* * The driver will not be unregistered while this function is * running, so update the capacity without acquiring the driver -- Gitee From 56f4222f29ce448e88b528189ca5749ce085872e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 28 Mar 2025 21:48:54 +0100 Subject: [PATCH 15/24] cpufreq: Pass policy pointer to ->update_limits() ANBZ: #26498 commit eaff6b62d3439ca6ee00dba4f77673a8c37dac20 upstream Since cpufreq_update_limits() obtains a cpufreq policy pointer for the given CPU and reference counts the corresponding policy object, it may as well pass the policy pointer to the cpufreq driver's ->update_limits() callback which allows that callback to avoid invoking cpufreq_cpu_get() for the same CPU. Accordingly, redefine ->update_limits() to take a policy pointer instead of a CPU number and update both drivers implementing it, intel_pstate and amd-pstate, as needed. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Reviewed-by: Mario Limonciello Acked-by: Srinivas Pandruvada Acked-by: Sudeep Holla Tested-by: Sudeep Holla Link: https://patch.msgid.link/8560367.NyiUUSuA9g@rjwysocki.net Signed-off-by: Hemanth Selam Signed-off-by: mohanasv --- drivers/cpufreq/amd-pstate.c | 7 ++----- drivers/cpufreq/cpufreq.c | 2 +- drivers/cpufreq/intel_pstate.c | 29 ++++++++++++++++++----------- include/linux/cpufreq.h | 2 +- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index e01496464125..b94b8913f2ce 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -824,19 +824,16 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) schedule_work(&sched_prefcore_work); } -static void amd_pstate_update_limits(unsigned int cpu) +static void amd_pstate_update_limits(struct cpufreq_policy *policy) { - struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); struct amd_cpudata *cpudata; u32 prev_high = 0, cur_high = 0; bool highest_perf_changed = false; + unsigned int cpu = policy->cpu; if (!amd_pstate_prefcore) return; - if (!policy) - return; - if (amd_get_highest_perf(cpu, &cur_high)) return; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d2dfc72633d4..c6d681ae2f97 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2757,7 +2757,7 @@ void cpufreq_update_limits(unsigned int cpu) return; if (cpufreq_driver->update_limits) - cpufreq_driver->update_limits(cpu); + cpufreq_driver->update_limits(policy); else cpufreq_update_policy(cpu); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 861c7c9c3684..e6888fb57afe 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1336,14 +1336,9 @@ static void intel_pstate_update_policies(void) cpufreq_update_policy(cpu); } -static bool intel_pstate_update_max_freq(struct cpudata *cpudata) +static void __intel_pstate_update_max_freq(struct cpufreq_policy *policy, + struct cpudata *cpudata) { - struct cpufreq_policy *policy __free(put_cpufreq_policy); - - policy = cpufreq_cpu_get(cpudata->cpu); - if (!policy) - return false; - guard(cpufreq_policy_write)(policy); if (hwp_active) @@ -1353,16 +1348,28 @@ static bool intel_pstate_update_max_freq(struct cpudata *cpudata) cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; refresh_frequency_limits(policy); +} + +static bool intel_pstate_update_max_freq(struct cpudata *cpudata) +{ + struct cpufreq_policy *policy __free(put_cpufreq_policy); + + policy = cpufreq_cpu_get(cpudata->cpu); + if (!policy) + return false; + + __intel_pstate_update_max_freq(policy, cpudata); return true; } -static void intel_pstate_update_limits(unsigned int cpu) +static void intel_pstate_update_limits(struct cpufreq_policy *policy) { - struct cpudata *cpudata = all_cpu_data[cpu]; + struct cpudata *cpudata = all_cpu_data[policy->cpu]; - if (intel_pstate_update_max_freq(cpudata)) - hybrid_update_capacity(cpudata); + __intel_pstate_update_max_freq(policy, cpudata); + + hybrid_update_capacity(cpudata); } static void intel_pstate_update_limits_for_all(void) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index c7e9c487a820..080ddc3187e1 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -390,7 +390,7 @@ struct cpufreq_driver { unsigned int (*get)(unsigned int cpu); /* Called to update policy limits on firmware notifications. */ - void (*update_limits)(unsigned int cpu); + void (*update_limits)(struct cpufreq_policy *policy); /* optional */ int (*bios_limit)(int cpu, unsigned int *limit); -- Gitee From 47306de427b8f9af1342df15ca93bb16e84f94e9 Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Fri, 11 Apr 2025 08:14:39 +0000 Subject: [PATCH 16/24] cpufreq/amd-pstate: Enable ITMT support after initializing core rankings ANBZ: #26498 commit d87e4026d1b20e4f237b29e0c956ad415f533de2 upstream When working on dynamic ITMT priority support, it was observed that "asym_prefer_cpu" on AMD systems supporting Preferred Core ranking was always set to the first CPU in the sched group when the system boots up despite another CPU in the group having a higher ITMT ranking. "asym_prefer_cpu" is cached when the sched domain hierarchy is constructed. On AMD systems that support Preferred Core rankings, sched domains are rebuilt when ITMT support is enabled for the first time from amd_pstate*_cpu_init(). Since amd_pstate*_cpu_init() is called to initialize the cpudata for each CPU, the ITMT support is enabled after the first CPU initializes its asym priority but this is too early since other CPUs have not yet initialized their asym priorities and the sched domain is rebuilt only once when the support is toggled on for the first time. Initialize the asym priorities first in amd_pstate*_cpu_init() and then enable ITMT support later in amd_pstate_register_driver() to ensure all CPUs have correctly initialized their asym priorities before sched domain hierarchy is rebuilt. Clear the ITMT support when the amd-pstate driver unregisters since core rankings cannot be trusted unless the update_limits() callback is operational. Remove the delayed work mechanism now that ITMT support is only toggled from the driver init path which is outside the cpuhp critical section. Fixes: f3a052391822 ("cpufreq: amd-pstate: Enable amd-pstate preferred core support") Signed-off-by: K Prateek Nayak Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20250411081439.27652-1-kprateek.nayak@amd.com Signed-off-by: Mario Limonciello Signed-off-by: priyankagunturi Signed-off-by: mohanasv --- drivers/cpufreq/amd-pstate.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index b94b8913f2ce..42cbf4d64456 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -794,16 +794,6 @@ static void amd_perf_ctl_reset(unsigned int cpu) wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); } -/* - * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks - * due to locking, so queue the work for later. - */ -static void amd_pstste_sched_prefcore_workfn(struct work_struct *work) -{ - sched_set_itmt_support(); -} -static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); - #define CPPC_MAX_PERF U8_MAX static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) @@ -814,14 +804,8 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) cpudata->hw_prefcore = true; - /* - * The priorities can be set regardless of whether or not - * sched_set_itmt_support(true) has been called and it is valid to - * update them at any time after it has been called. - */ + /* Priorities must be initialized before ITMT support can be toggled on. */ sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu); - - schedule_work(&sched_prefcore_work); } static void amd_pstate_update_limits(struct cpufreq_policy *policy) @@ -1193,6 +1177,9 @@ static ssize_t show_energy_performance_preference( static void amd_pstate_driver_cleanup(void) { + if (amd_pstate_prefcore) + sched_clear_itmt_support(); + cppc_state = AMD_PSTATE_DISABLE; current_pstate_driver = NULL; } @@ -1235,6 +1222,10 @@ static int amd_pstate_register_driver(int mode) return ret; } + /* Enable ITMT support once all CPUs have initialized their asym priorities. */ + if (amd_pstate_prefcore) + sched_set_itmt_support(); + return 0; } -- Gitee From acd38ccb427421afeb869374a5398a3b50f09704 Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Wed, 9 Apr 2025 05:34:44 +0000 Subject: [PATCH 17/24] sched/topology: Introduce sched_update_asym_prefer_cpu() ANBZ: #26498 commit 0e3f6c3696424fa90d6f512779d617a05a1cf031 upstream A subset of AMD Processors supporting Preferred Core Rankings also feature the ability to dynamically switch these rankings at runtime to bias load balancing towards or away from the LLC domain with larger cache. To support dynamically updating "sg->asym_prefer_cpu" without needing to rebuild the sched domain, introduce sched_update_asym_prefer_cpu() which recomutes the "asym_prefer_cpu" when the core-ranking of a CPU changes. sched_update_asym_prefer_cpu() swaps the "sg->asym_prefer_cpu" with the CPU whose ranking has changed if the new ranking is greater than that of the "asym_prefer_cpu". If CPU whose ranking has changed is the current "asym_prefer_cpu", it scans the CPUs of the sched groups to find the new "asym_prefer_cpu" and sets it accordingly. get_group() for non-overlapping sched domains returns the sched group for the first CPU in the sched_group_span() which ensures all CPUs in the group see the updated value of "asym_prefer_cpu". Overlapping groups are allocated differently and will require moving the "asym_prefer_cpu" to "sg->sgc" but since the current implementations do not set "SD_ASYM_PACKING" at NUMA domains, skip additional indirection and place a SCHED_WARN_ON() to alert any future users. Signed-off-by: K Prateek Nayak Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250409053446.23367-3-kprateek.nayak@amd.com Signed-off-by: priyankagunturi Signed-off-by: mohanasv --- include/linux/sched/topology.h | 6 ++++ kernel/sched/topology.c | 58 ++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 336d8bbd5c17..7ce562c7fd64 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -216,6 +216,8 @@ struct sched_domain_topology_level { }; extern void __init set_sched_topology(struct sched_domain_topology_level *tl); +extern void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio); + #ifdef CONFIG_SCHED_DEBUG # define SD_INIT_NAME(type) .name = #type @@ -249,6 +251,10 @@ static inline bool cpus_share_resources(int this_cpu, int that_cpu) return true; } +static inline void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio) +{ +} + #endif /* !CONFIG_SMP */ #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index c2bd6e3e7807..3ee0ec5ce03d 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1327,6 +1327,64 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd) update_group_capacity(sd, cpu); } +#ifdef CONFIG_SMP + +/* Update the "asym_prefer_cpu" when arch_asym_cpu_priority() changes. */ +void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio) +{ + int asym_prefer_cpu = cpu; + struct sched_domain *sd; + + guard(rcu)(); + + for_each_domain(cpu, sd) { + struct sched_group *sg; + int group_cpu; + + if (!(sd->flags & SD_ASYM_PACKING)) + continue; + + /* + * Groups of overlapping domain are replicated per NUMA + * node and will require updating "asym_prefer_cpu" on + * each local copy. + * + * If you are hitting this warning, consider moving + * "sg->asym_prefer_cpu" to "sg->sgc->asym_prefer_cpu" + * which is shared by all the overlapping groups. + */ + WARN_ON_ONCE(sd->flags & SD_OVERLAP); + + sg = sd->groups; + if (cpu != sg->asym_prefer_cpu) { + /* + * Since the parent is a superset of the current group, + * if the cpu is not the "asym_prefer_cpu" at the + * current level, it cannot be the preferred CPU at a + * higher levels either. + */ + if (!sched_asym_prefer(cpu, sg->asym_prefer_cpu)) + return; + + WRITE_ONCE(sg->asym_prefer_cpu, cpu); + continue; + } + + /* Ranking has improved; CPU is still the preferred one. */ + if (new_prio >= old_prio) + continue; + + for_each_cpu(group_cpu, sched_group_span(sg)) { + if (sched_asym_prefer(group_cpu, asym_prefer_cpu)) + asym_prefer_cpu = group_cpu; + } + + WRITE_ONCE(sg->asym_prefer_cpu, asym_prefer_cpu); + } +} + +#endif /* CONFIG_SMP */ + /* * Asymmetric CPU capacity bits */ -- Gitee From 3f8b0697073d1033b066a7bb44dfbb89618b3702 Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Wed, 9 Apr 2025 05:34:45 +0000 Subject: [PATCH 18/24] cpufreq/amd-pstate: Update asym_prefer_cpu when core rankings change ANBZ: #26498 commit 8157fbc907452aa5674df2de23c1c7305c907006 upstream A subset of AMD systems supporting Preferred Core rankings can have their rankings changed dynamically at runtime. Update the "sg->asym_prefer_cpu" across the local hierarchy of CPU when the preferred core ranking changes. Signed-off-by: K Prateek Nayak Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mario Limonciello Link: https://lore.kernel.org/r/20250409053446.23367-4-kprateek.nayak@amd.com Signed-off-by: priyankagunturi Signed-off-by: mohanasv --- drivers/cpufreq/amd-pstate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 42cbf4d64456..9bb790b993f4 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -828,8 +828,10 @@ static void amd_pstate_update_limits(struct cpufreq_policy *policy) if (highest_perf_changed) { WRITE_ONCE(cpudata->prefcore_ranking, cur_high); - if (cur_high < CPPC_MAX_PERF) + if (cur_high < CPPC_MAX_PERF) { sched_set_itmt_core_prio((int)cur_high, cpu); + sched_update_asym_prefer_cpu(cpu, prev_high, cur_high); + } } } -- Gitee From 65606187de297ab3edbf20a6e431bf1bfacac9c7 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Tue, 15 Apr 2025 08:23:09 +0000 Subject: [PATCH 19/24] cpufreq/amd-pstate: Move max_perf limiting in amd_pstate_update ANBZ: #26498 commit afc9506c2a8169e871cfafd9b94e0e631ae8f9cb upstream Move up the max_perf limiting, so that we clamp the des_perf with the updated max_perf. Signed-off-by: Dhananjay Ugwekar Link: https://lore.kernel.org/r/20250415082308.3341-1-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello Signed-off-by: priyankagunturi Signed-off-by: mohanasv --- drivers/cpufreq/amd-pstate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 9bb790b993f4..aeeb1be787e4 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -554,6 +554,10 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf, if (!policy) return; + /* limit the max perf when core performance boost feature is disabled */ + if (!cpudata->boost_supported) + max_perf = min_t(u8, perf.nominal_perf, max_perf); + des_perf = clamp_t(u8, des_perf, min_perf, max_perf); policy->cur = perf_to_freq(perf, cpudata->nominal_freq, des_perf); @@ -563,10 +567,6 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf, des_perf = 0; } - /* limit the max perf when core performance boost feature is disabled */ - if (!cpudata->boost_supported) - max_perf = min_t(u8, perf.nominal_perf, max_perf); - if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc, -- Gitee From a2b8b9822e1ef5294d15f5e4a9553921016d9719 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Mon, 28 Apr 2025 06:25:21 +0000 Subject: [PATCH 20/24] cpufreq/amd-pstate: Add offline, online and suspend callbacks for amd_pstate_driver ANBZ: #26498 commit 98b52c6b0b528764b454350a11c53f757304afbc upstream Rename and use the existing amd_pstate_epp callbacks for amd_pstate driver as well. Remove the debug print in online callback while at it. These callbacks will be needed to support the "Requested CPU Min Frequency" BIOS option. Reviewed-by: Mario Limonciello Signed-off-by: Dhananjay Ugwekar Link: https://lore.kernel.org/r/20250428062520.4997-2-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello Signed-off-by: priyankagunturi Signed-off-by: mohanasv --- drivers/cpufreq/amd-pstate.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index aeeb1be787e4..6ce096377bdc 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1558,19 +1558,17 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) return 0; } -static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) +static int amd_pstate_cpu_online(struct cpufreq_policy *policy) { - pr_debug("AMD CPU Core %d going online\n", policy->cpu); - return amd_pstate_cppc_enable(policy); } -static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) +static int amd_pstate_cpu_offline(struct cpufreq_policy *policy) { return 0; } -static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) +static int amd_pstate_suspend(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; @@ -1608,6 +1606,9 @@ static struct cpufreq_driver amd_pstate_driver = { .fast_switch = amd_pstate_fast_switch, .init = amd_pstate_cpu_init, .exit = amd_pstate_cpu_exit, + .online = amd_pstate_cpu_online, + .offline = amd_pstate_cpu_offline, + .suspend = amd_pstate_suspend, .set_boost = amd_pstate_set_boost, .update_limits = amd_pstate_update_limits, .name = "amd-pstate", @@ -1620,9 +1621,9 @@ static struct cpufreq_driver amd_pstate_epp_driver = { .setpolicy = amd_pstate_epp_set_policy, .init = amd_pstate_epp_cpu_init, .exit = amd_pstate_epp_cpu_exit, - .offline = amd_pstate_epp_cpu_offline, - .online = amd_pstate_epp_cpu_online, - .suspend = amd_pstate_epp_suspend, + .offline = amd_pstate_cpu_offline, + .online = amd_pstate_cpu_online, + .suspend = amd_pstate_suspend, .resume = amd_pstate_epp_resume, .update_limits = amd_pstate_update_limits, .set_boost = amd_pstate_set_boost, -- Gitee From 2956cd68f589210be963f8c21618e3ef95e47cef Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Mon, 28 Apr 2025 07:16:24 +0000 Subject: [PATCH 21/24] cpufreq/amd-pstate: Add support for the "Requested CPU Min frequency" BIOS option ANBZ: #26498 commit 608a76b65288698677843d2f22418e03391ef95f upstream Initialize lower frequency limit to the "Requested CPU Min frequency" BIOS option (if it is set) value as part of the driver->init() callback. The BIOS specified value is passed by the PMFW as min_perf in CPPC_REQ MSR. To ensure that we don't mistake a stale min_perf value in CPPC_REQ value as the "Requested CPU Min frequency" during a kexec wakeup, reset the CPPC_REQ.min_perf value back to the BIOS specified one in the offline, exit and suspend callbacks. amd_pstate_target() and amd_pstate_epp_update_limit() which are invoked as part of the resume() and online() callbacks will take care of restoring the CPPC_REQ back to the correct values. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20250428071623.4309-1-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello Signed-off-by: priyankagunturi Signed-off-by: mohanasv --- drivers/cpufreq/amd-pstate.c | 82 ++++++++++++++++++++++++++++++------ drivers/cpufreq/amd-pstate.h | 2 + 2 files changed, 71 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 6ce096377bdc..1693479019f3 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -389,7 +389,8 @@ static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy) static int msr_init_perf(struct amd_cpudata *cpudata) { union perf_cached perf = READ_ONCE(cpudata->perf); - u64 cap1, numerator; + u64 cap1, numerator, cppc_req; + u8 min_perf; int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &cap1); @@ -400,6 +401,22 @@ static int msr_init_perf(struct amd_cpudata *cpudata) if (ret) return ret; + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req); + if (ret) + return ret; + + WRITE_ONCE(cpudata->cppc_req_cached, cppc_req); + min_perf = FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cppc_req); + + /* + * Clear out the min_perf part to check if the rest of the MSR is 0, if yes, this is an + * indication that the min_perf value is the one specified through the BIOS option + */ + cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK); + + if (!cppc_req) + perf.bios_min_perf = min_perf; + perf.highest_perf = numerator; perf.max_limit_perf = numerator; perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1); @@ -580,20 +597,26 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) { /* * Initialize lower frequency limit (i.e.policy->min) with - * lowest_nonlinear_frequency which is the most energy efficient - * frequency. Override the initial value set by cpufreq core and - * amd-pstate qos_requests. + * lowest_nonlinear_frequency or the min frequency (if) specified in BIOS, + * Override the initial value set by cpufreq core and amd-pstate qos_requests. */ if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) { struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(policy_data->cpu); struct amd_cpudata *cpudata; + union perf_cached perf; if (!policy) return -EINVAL; cpudata = policy->driver_data; - policy_data->min = cpudata->lowest_nonlinear_freq; + perf = READ_ONCE(cpudata->perf); + + if (perf.bios_min_perf) + policy_data->min = perf_to_freq(perf, cpudata->nominal_freq, + perf.bios_min_perf); + else + policy_data->min = cpudata->lowest_nonlinear_freq; } cpufreq_verify_within_cpu_limits(policy_data); @@ -1023,6 +1046,10 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) static void amd_pstate_cpu_exit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf = READ_ONCE(cpudata->perf); + + /* Reset CPPC_REQ MSR to the BIOS value */ + amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); freq_qos_remove_request(&cpudata->req[1]); freq_qos_remove_request(&cpudata->req[0]); @@ -1418,7 +1445,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) struct amd_cpudata *cpudata; union perf_cached perf; struct device *dev; - u64 value; int ret; /* @@ -1483,12 +1509,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE; } - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); - if (ret) - return ret; - WRITE_ONCE(cpudata->cppc_req_cached, value); - } ret = amd_pstate_set_epp(policy, cpudata->epp_default); if (ret) return ret; @@ -1508,6 +1528,11 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) struct amd_cpudata *cpudata = policy->driver_data; if (cpudata) { + union perf_cached perf = READ_ONCE(cpudata->perf); + + /* Reset CPPC_REQ MSR to the BIOS value */ + amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); + kfree(cpudata); policy->driver_data = NULL; } @@ -1565,12 +1590,31 @@ static int amd_pstate_cpu_online(struct cpufreq_policy *policy) static int amd_pstate_cpu_offline(struct cpufreq_policy *policy) { - return 0; + struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf = READ_ONCE(cpudata->perf); + + /* + * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified + * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the + * limits, epp and desired perf will get reset to the cached values in cpudata struct + */ + return amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); } static int amd_pstate_suspend(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf = READ_ONCE(cpudata->perf); + int ret; + + /* + * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified + * min_perf value across kexec reboots. If this CPU is just resumed back without kexec, + * the limits, epp and desired perf will get reset to the cached values in cpudata struct + */ + ret = amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); + if (ret) + return ret; /* invalidate to ensure it's rewritten during resume */ cpudata->cppc_req_cached = 0; @@ -1581,6 +1625,17 @@ static int amd_pstate_suspend(struct cpufreq_policy *policy) return 0; } +static int amd_pstate_resume(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf = READ_ONCE(cpudata->perf); + int cur_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->cur); + + /* Set CPPC_REQ to last sane value until the governor updates it */ + return amd_pstate_update_perf(policy, perf.min_limit_perf, cur_perf, perf.max_limit_perf, + 0U, false); +} + static int amd_pstate_epp_resume(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; @@ -1609,6 +1664,7 @@ static struct cpufreq_driver amd_pstate_driver = { .online = amd_pstate_cpu_online, .offline = amd_pstate_cpu_offline, .suspend = amd_pstate_suspend, + .resume = amd_pstate_resume, .set_boost = amd_pstate_set_boost, .update_limits = amd_pstate_update_limits, .name = "amd-pstate", diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index fbe1c08d3f06..2f7ae364d331 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -30,6 +30,7 @@ * @lowest_perf: the absolute lowest performance level of the processor * @min_limit_perf: Cached value of the performance corresponding to policy->min * @max_limit_perf: Cached value of the performance corresponding to policy->max + * @bios_min_perf: Cached perf value corresponding to the "Requested CPU Min Frequency" BIOS option */ union perf_cached { struct { @@ -39,6 +40,7 @@ union perf_cached { u8 lowest_perf; u8 min_limit_perf; u8 max_limit_perf; + u8 bios_min_perf; }; u64 val; }; -- Gitee From b6cefb51cefe66efd14a84ce529abcb91f7e9861 Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Fri, 11 Apr 2025 17:38:48 +0800 Subject: [PATCH 22/24] ACPI: CPPC: Add IS_OPTIONAL_CPC_REG macro to judge if a cpc_reg is optional ANBZ: #26498 commit e3d7935a6c6138da51626d2b956a079dd0e6671b upstream In ACPI 6.5, s8.4.6.1 _CPC (Continuous Performance Control), whether each of the per-cpu cpc_regs[] is mandatory or optional is defined. Since the CPC_SUPPORTED() check is only for optional _CPC fields, another macro to check if the field is optional is needed. Reviewed-by: Pierre Gondois Signed-off-by: Lifeng Zheng Reviewed-by: Mario Limonciello Link: https://patch.msgid.link/20250411093855.982491-2-zhenglifeng1@huawei.com Signed-off-by: Rafael J. Wysocki Signed-off-by: priyanka-mani Signed-off-by: mohanasv --- drivers/acpi/cppc_acpi.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 15916b2abaab..c611125762f3 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -129,6 +129,20 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr); #define CPC_SUPPORTED(cpc) ((cpc)->type == ACPI_TYPE_INTEGER ? \ !!(cpc)->cpc_entry.int_value : \ !IS_NULL_REG(&(cpc)->cpc_entry.reg)) + +/* + * Each bit indicates the optionality of the register in per-cpu + * cpc_regs[] with the corresponding index. 0 means mandatory and 1 + * means optional. + */ +#define REG_OPTIONAL (0x1FC7D0) + +/* + * Use the index of the register in per-cpu cpc_regs[] to check if + * it's an optional one. + */ +#define IS_OPTIONAL_CPC_REG(reg_idx) (REG_OPTIONAL & (1U << (reg_idx))) + /* * Arbitrary Retries in case the remote processor is slow to respond * to PCC commands. Keeping it high enough to cover emulators where -- Gitee From cb9958d403865868d9b8648452f0626b3a7ab5db Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Fri, 11 Apr 2025 17:38:49 +0800 Subject: [PATCH 23/24] ACPI: CPPC: Optimize cppc_get_perf() ANBZ: #26498 commit 45f3763a2122553e548fa0430b77605dc23f00cc upstream Optimize cppc_get_perf() with three changes: 1. Change the error kind to "no such device" when pcc_ss_id < 0, as other register value getting functions. 2. Add a check to ensure the pointer 'perf' is no null. 3. Add a check to verify if the register is supported to be read before using it. The logic is: (1) If the register is of the integer type, check whether the register is optional and its value is 0. If yes, the register is not supported. (2) If the register is of other types, a null one is not supported. 4. Return the result of cpc_read() instead of 0. Reviewed-by: Pierre Gondois Signed-off-by: Lifeng Zheng Reviewed-by: Mario Limonciello Link: https://patch.msgid.link/20250411093855.982491-3-zhenglifeng1@huawei.com Signed-off-by: Rafael J. Wysocki Signed-off-by: priyanka-mani Signed-off-by: mohanasv --- drivers/acpi/cppc_acpi.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index c611125762f3..2f0913a988f9 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1197,6 +1197,9 @@ static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf) struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); struct cpc_register_resource *reg; + if (perf == NULL) + return -EINVAL; + if (!cpc_desc) { pr_debug("No CPC descriptor for CPU:%d\n", cpunum); return -ENODEV; @@ -1204,20 +1207,29 @@ static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf) reg = &cpc_desc->cpc_regs[reg_idx]; + if ((reg->type == ACPI_TYPE_INTEGER && IS_OPTIONAL_CPC_REG(reg_idx) && + !reg->cpc_entry.int_value) || (reg->type != ACPI_TYPE_INTEGER && + IS_NULL_REG(®->cpc_entry.reg))) { + pr_debug("CPC register is not supported\n"); + return -EOPNOTSUPP; + } + if (CPC_IN_PCC(reg)) { int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); struct cppc_pcc_data *pcc_ss_data = NULL; - int ret = 0; + int ret; - if (pcc_ss_id < 0) - return -EIO; + if (pcc_ss_id < 0) { + pr_debug("Invalid pcc_ss_id\n"); + return -ENODEV; + } pcc_ss_data = pcc_data[pcc_ss_id]; down_write(&pcc_ss_data->pcc_lock); if (send_pcc_cmd(pcc_ss_id, CMD_READ) >= 0) - cpc_read(cpunum, reg, perf); + ret = cpc_read(cpunum, reg, perf); else ret = -EIO; @@ -1226,9 +1238,7 @@ static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf) return ret; } - cpc_read(cpunum, reg, perf); - - return 0; + return cpc_read(cpunum, reg, perf); } /** -- Gitee From 5349f4a2ab97fa3f7db6441f604c82cd7ec48990 Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Fri, 11 Apr 2025 17:38:50 +0800 Subject: [PATCH 24/24] ACPI: CPPC: Rename cppc_get_perf() to cppc_get_reg_val() ANBZ: #26498 commit 714d103ce868cd0718d0e5262cbc76384a2eb03a upstream Rename cppc_get_perf() to cppc_get_reg_val() as a generic function to read CPPC registers. Reviewed-by: Pierre Gondois Signed-off-by: Lifeng Zheng Reviewed-by: Mario Limonciello Link: https://patch.msgid.link/20250411093855.982491-4-zhenglifeng1@huawei.com Signed-off-by: Rafael J. Wysocki Signed-off-by: priyanka-mani Signed-off-by: mohanasv --- drivers/acpi/cppc_acpi.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 2f0913a988f9..6dc835316085 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1192,16 +1192,16 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) return ret_val; } -static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf) +static int cppc_get_reg_val(int cpu, enum cppc_regs reg_idx, u64 *val) { - struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); + struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); struct cpc_register_resource *reg; - if (perf == NULL) + if (val == NULL) return -EINVAL; if (!cpc_desc) { - pr_debug("No CPC descriptor for CPU:%d\n", cpunum); + pr_debug("No CPC descriptor for CPU:%d\n", cpu); return -ENODEV; } @@ -1215,7 +1215,7 @@ static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf) } if (CPC_IN_PCC(reg)) { - int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cppc_pcc_data *pcc_ss_data = NULL; int ret; @@ -1229,7 +1229,7 @@ static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf) down_write(&pcc_ss_data->pcc_lock); if (send_pcc_cmd(pcc_ss_id, CMD_READ) >= 0) - ret = cpc_read(cpunum, reg, perf); + ret = cpc_read(cpu, reg, val); else ret = -EIO; @@ -1238,7 +1238,7 @@ static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf) return ret; } - return cpc_read(cpunum, reg, perf); + return cpc_read(cpu, reg, val); } /** @@ -1250,7 +1250,7 @@ static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf) */ int cppc_get_desired_perf(int cpunum, u64 *desired_perf) { - return cppc_get_perf(cpunum, DESIRED_PERF, desired_perf); + return cppc_get_reg_val(cpunum, DESIRED_PERF, desired_perf); } EXPORT_SYMBOL_GPL(cppc_get_desired_perf); @@ -1263,7 +1263,7 @@ EXPORT_SYMBOL_GPL(cppc_get_desired_perf); */ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) { - return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf); + return cppc_get_reg_val(cpunum, NOMINAL_PERF, nominal_perf); } /** @@ -1275,7 +1275,7 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) */ int cppc_get_highest_perf(int cpunum, u64 *highest_perf) { - return cppc_get_perf(cpunum, HIGHEST_PERF, highest_perf); + return cppc_get_reg_val(cpunum, HIGHEST_PERF, highest_perf); } EXPORT_SYMBOL_GPL(cppc_get_highest_perf); @@ -1288,7 +1288,7 @@ EXPORT_SYMBOL_GPL(cppc_get_highest_perf); */ int cppc_get_epp_perf(int cpunum, u64 *epp_perf) { - return cppc_get_perf(cpunum, ENERGY_PERF, epp_perf); + return cppc_get_reg_val(cpunum, ENERGY_PERF, epp_perf); } EXPORT_SYMBOL_GPL(cppc_get_epp_perf); -- Gitee