From 06577c6bdc2de0be2b7559fc2f5ff08c51732d8d Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Sun, 9 Jun 2024 17:39:24 -0700 Subject: [PATCH 01/25] cpu: Move CPU hotplug function declarations into their own header ANBZ: #24559 commit 195fb517ee25bfefde9c74ecd86348eccbd6d2e4 upstream. Avoid upcoming #include hell when wants to use lockdep_assert_cpus_held() and creates a #include loop that would break the build for arch/riscv. [ bp: s/cpu/CPU/g ] Intel-SIG: commit 195fb517ee25 cpu: Move CPU hotplug function declarations into their own header Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240610003927.341707-2-tony.luck@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- include/linux/cpu.h | 33 +-------------------------- include/linux/cpuhplock.h | 47 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 32 deletions(-) create mode 100644 include/linux/cpuhplock.h diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 6b4f9f169688..51f99eca8c6f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -18,6 +18,7 @@ #include #include #include +#include #include struct device; @@ -130,38 +131,6 @@ static inline int add_cpu(unsigned int cpu) { return 0;} #endif /* CONFIG_SMP */ extern struct bus_type cpu_subsys; -extern int lockdep_is_cpus_held(void); - -#ifdef CONFIG_HOTPLUG_CPU -extern void cpus_write_lock(void); -extern void cpus_write_unlock(void); -extern void cpus_read_lock(void); -extern void cpus_read_unlock(void); -extern int cpus_read_trylock(void); -extern void lockdep_assert_cpus_held(void); -extern void cpu_hotplug_disable(void); -extern void cpu_hotplug_enable(void); -void clear_tasks_mm_cpumask(int cpu); -int remove_cpu(unsigned int cpu); -int cpu_device_down(struct device *dev); -extern void smp_shutdown_nonboot_cpus(unsigned int primary_cpu); - -#else /* CONFIG_HOTPLUG_CPU */ - -static inline void cpus_write_lock(void) { } -static inline void cpus_write_unlock(void) { } -static inline void cpus_read_lock(void) { } -static inline void cpus_read_unlock(void) { } -static inline int cpus_read_trylock(void) { return true; } -static inline void lockdep_assert_cpus_held(void) { } -static inline void cpu_hotplug_disable(void) { } -static inline void cpu_hotplug_enable(void) { } -static inline int remove_cpu(unsigned int cpu) { return -EPERM; } -static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } -#endif /* !CONFIG_HOTPLUG_CPU */ - -DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock()) - #ifdef CONFIG_PM_SLEEP_SMP extern int freeze_secondary_cpus(int primary); extern void thaw_secondary_cpus(void); diff --git a/include/linux/cpuhplock.h b/include/linux/cpuhplock.h new file mode 100644 index 000000000000..386abc482264 --- /dev/null +++ b/include/linux/cpuhplock.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * include/linux/cpuhplock.h - CPU hotplug locking + * + * Locking functions for CPU hotplug. + */ +#ifndef _LINUX_CPUHPLOCK_H_ +#define _LINUX_CPUHPLOCK_H_ + +#include +#include + +struct device; + +extern int lockdep_is_cpus_held(void); + +#ifdef CONFIG_HOTPLUG_CPU +extern void cpus_write_lock(void); +extern void cpus_write_unlock(void); +extern void cpus_read_lock(void); +extern void cpus_read_unlock(void); +extern int cpus_read_trylock(void); +extern void lockdep_assert_cpus_held(void); +extern void cpu_hotplug_disable(void); +extern void cpu_hotplug_enable(void); +void clear_tasks_mm_cpumask(int cpu); +int remove_cpu(unsigned int cpu); +int cpu_device_down(struct device *dev); +extern void smp_shutdown_nonboot_cpus(unsigned int primary_cpu); + +#else /* CONFIG_HOTPLUG_CPU */ + +static inline void cpus_write_lock(void) { } +static inline void cpus_write_unlock(void) { } +static inline void cpus_read_lock(void) { } +static inline void cpus_read_unlock(void) { } +static inline int cpus_read_trylock(void) { return true; } +static inline void lockdep_assert_cpus_held(void) { } +static inline void cpu_hotplug_disable(void) { } +static inline void cpu_hotplug_enable(void) { } +static inline int remove_cpu(unsigned int cpu) { return -EPERM; } +static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } +#endif /* !CONFIG_HOTPLUG_CPU */ + +DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock()) + +#endif /* _LINUX_CPUHPLOCK_H_ */ -- Gitee From 28ac431ddf47aa146459da5c0b6b26aeb3820fbc Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Sun, 9 Jun 2024 17:39:25 -0700 Subject: [PATCH 02/25] cpu: Drop "extern" from function declarations in cpuhplock.h ANBZ: #24559 commit ddefcfdeb5a2238cbcb07b80dda9ac3136735b1e upstream. This file was created with a direct cut and paste from cpu.h so kept the legacy declaration style. But the Linux coding standard for function declarations in header files is to avoid use of "extern". Drop "extern" from all function declarations. Intel-SIG: commit ddefcfdeb5a2 cpu: Drop "extern" from function declarations in cpuhplock.h Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240610003927.341707-3-tony.luck@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- include/linux/cpuhplock.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/linux/cpuhplock.h b/include/linux/cpuhplock.h index 386abc482264..431560bbd045 100644 --- a/include/linux/cpuhplock.h +++ b/include/linux/cpuhplock.h @@ -15,18 +15,18 @@ struct device; extern int lockdep_is_cpus_held(void); #ifdef CONFIG_HOTPLUG_CPU -extern void cpus_write_lock(void); -extern void cpus_write_unlock(void); -extern void cpus_read_lock(void); -extern void cpus_read_unlock(void); -extern int cpus_read_trylock(void); -extern void lockdep_assert_cpus_held(void); -extern void cpu_hotplug_disable(void); -extern void cpu_hotplug_enable(void); +void cpus_write_lock(void); +void cpus_write_unlock(void); +void cpus_read_lock(void); +void cpus_read_unlock(void); +int cpus_read_trylock(void); +void lockdep_assert_cpus_held(void); +void cpu_hotplug_disable(void); +void cpu_hotplug_enable(void); void clear_tasks_mm_cpumask(int cpu); int remove_cpu(unsigned int cpu); int cpu_device_down(struct device *dev); -extern void smp_shutdown_nonboot_cpus(unsigned int primary_cpu); +void smp_shutdown_nonboot_cpus(unsigned int primary_cpu); #else /* CONFIG_HOTPLUG_CPU */ -- Gitee From 5371863f16acdab7b3e756385016b1ce70446910 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 22 Aug 2025 12:30:26 +0800 Subject: [PATCH 03/25] cacheinfo: Add function to get cacheinfo for a given CPU and cache level ANBZ: #24559 commit 685cb1674060c2cb1b9da051a12933c082b8e874 upstream. Resctrl open codes a search for information about a given cache level in a couple of places (and more are on the way). Provide a new inline function get_cpu_cacheinfo_level() in to do the search and return a pointer to the cacheinfo structure. Add lockdep_assert_cpus_held() to enforce the comment that cpuhp lock must be held. Simplify the existing get_cpu_cacheinfo_id() by using this new function to do the search. Intel-SIG: commit 685cb1674060 cacheinfo: Add function to get cacheinfo for a given CPU and cache level Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/r/20240610003927.341707-4-tony.luck@intel.com [ Zhang Rui: resolve conflict (get_cpu_cacheinfo_id return value changed) and amend commit log ] Signed-off-by: Zhang Rui --- include/linux/cacheinfo.h | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 251e8b393ec7..b11a4f0eb06f 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -3,6 +3,7 @@ #define _LINUX_CACHEINFO_H #include +#include #include #include @@ -114,23 +115,37 @@ const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf); unsigned long cache_of_get_id(struct device_node *np); /* - * Get the id of the cache associated with @cpu at level @level. + * Get the cacheinfo structure for the cache associated with @cpu at + * level @level. * cpuhp lock must be held. */ -static inline unsigned long get_cpu_cacheinfo_id(int cpu, int level) +static inline struct cacheinfo *get_cpu_cacheinfo_level(int cpu, int level) { struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); int i; + lockdep_assert_cpus_held(); + for (i = 0; i < ci->num_leaves; i++) { if (ci->info_list[i].level == level) { if (ci->info_list[i].attributes & CACHE_ID) - return ci->info_list[i].id; - return ~0UL; + return &ci->info_list[i]; + return NULL; } } - return ~0UL; + return NULL; +} + +/* + * Get the id of the cache associated with @cpu at level @level. + * cpuhp lock must be held. + */ +static inline int get_cpu_cacheinfo_id(int cpu, int level) +{ + struct cacheinfo *ci = get_cpu_cacheinfo_level(cpu, level); + + return ci ? ci->id : -1; } /* -- Gitee From 549ec099ad62f7d2f02804692287129f38bf271b Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 27 Aug 2025 14:46:10 +0800 Subject: [PATCH 04/25] x86/resctrl: Replace open coded cacheinfo searches ANBZ: #24559 commit f385f024639431bec3e70c33cdbc9563894b3ee5 upstream. pseudo_lock_region_init() and rdtgroup_cbm_to_size() open code a search for details of a particular cache level. Replace with get_cpu_cacheinfo_level(). Intel-SIG: commit f385f0246394 x86/resctrl: Replace open coded cacheinfo searches Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/r/20240610003927.341707-5-tony.luck@intel.com [ Zhang Rui: resolve conflict (file relocated) and amend commit log ] Signed-off-by: Zhang Rui --- fs/resctrl/psuedo_lock.c | 17 ++++++----------- fs/resctrl/rdtgroup.c | 14 +++++--------- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/fs/resctrl/psuedo_lock.c b/fs/resctrl/psuedo_lock.c index 077c2abb6edd..eaefea3db858 100644 --- a/fs/resctrl/psuedo_lock.c +++ b/fs/resctrl/psuedo_lock.c @@ -226,9 +226,8 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr) */ static int pseudo_lock_region_init(struct pseudo_lock_region *plr) { - struct cpu_cacheinfo *ci; + struct cacheinfo *ci; int ret; - int i; /* Pick the first cpu we find that is associated with the cache. */ plr->cpu = cpumask_first(&plr->d->cpu_mask); @@ -240,15 +239,11 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr) goto out_region; } - ci = get_cpu_cacheinfo(plr->cpu); - - plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm); - - for (i = 0; i < ci->num_leaves; i++) { - if (ci->info_list[i].level == plr->s->res->cache_level) { - plr->line_size = ci->info_list[i].coherency_line_size; - return 0; - } + ci = get_cpu_cacheinfo_level(plr->cpu, plr->s->res->cache_level); + if (ci) { + plr->line_size = ci->coherency_line_size; + plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm); + return 0; } ret = -1; diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index aba699474e86..b7ed97a89164 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1960,18 +1960,14 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d, unsigned long cbm) { - struct cpu_cacheinfo *ci; unsigned int size = 0; - int num_b, i; + struct cacheinfo *ci; + int num_b; num_b = bitmap_weight(&cbm, r->cache.cbm_len); - ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask)); - for (i = 0; i < ci->num_leaves; i++) { - if (ci->info_list[i].level == r->cache_level) { - size = ci->info_list[i].size / r->cache.cbm_len * num_b; - break; - } - } + ci = get_cpu_cacheinfo_level(cpumask_any(&d->cpu_mask), r->cache_level); + if (ci) + size = ci->size / r->cache.cbm_len * num_b; return size; } -- Gitee From eef6a8541e47abd1a04b85b3466f58d99922ec57 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 27 Aug 2025 15:16:40 +0800 Subject: [PATCH 05/25] x86/resctrl: Prepare for new domain scope ANBZ: #24559 commit f436cb6913a57bf3e1e66d18bc663e6c20751929 upstream. Resctrl resources operate on subsets of CPUs in the system with the defining attribute of each subset being an instance of a particular level of cache. E.g. all CPUs sharing an L3 cache would be part of the same domain. In preparation for features that are scoped at the NUMA node level, change the code from explicit references to "cache_level" to a more generic scope. At this point the only options for this scope are groups of CPUs that share an L2 cache or L3 cache. Clean up the error handling when looking up domains. Report invalid ids before calling rdt_find_domain() in preparation for better messages when scope can be other than cache scope. This means that rdt_find_domain() will never return an error. So remove checks for error from the call sites. Intel-SIG: commit f436cb6913a5 x86/resctrl: Prepare for new domain scope Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-2-tony.luck@intel.com [ Zhang Rui: resolve conflict (file relocated) amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/cpu/resctrl/core.c | 46 +++++++++++++++++++++--------- fs/resctrl/ctrlmondata.c | 2 +- fs/resctrl/psuedo_lock.c | 6 +++- fs/resctrl/rdtgroup.c | 5 +++- include/linux/resctrl.h | 9 ++++-- 5 files changed, 49 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 526f58b8f26c..f40a51e4c768 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -67,7 +67,7 @@ struct rdt_hw_resource rdt_resources_all[] = { .r_resctrl = { .rid = RDT_RESOURCE_L3, .name = "L3", - .cache_level = 3, + .scope = RESCTRL_L3_CACHE, .domains = domain_init(RDT_RESOURCE_L3), .format_str = "%d=%0*x", .fflags = RFTYPE_RES_CACHE, @@ -80,7 +80,7 @@ struct rdt_hw_resource rdt_resources_all[] = { .r_resctrl = { .rid = RDT_RESOURCE_L2, .name = "L2", - .cache_level = 2, + .scope = RESCTRL_L2_CACHE, .domains = domain_init(RDT_RESOURCE_L2), .format_str = "%d=%0*x", .fflags = RFTYPE_RES_CACHE, @@ -93,7 +93,7 @@ struct rdt_hw_resource rdt_resources_all[] = { .r_resctrl = { .rid = RDT_RESOURCE_MBA, .name = "MB", - .cache_level = 3, + .scope = RESCTRL_L3_CACHE, .domains = domain_init(RDT_RESOURCE_MBA), .format_str = "%d=%*u", .fflags = RFTYPE_RES_MB, @@ -104,7 +104,7 @@ struct rdt_hw_resource rdt_resources_all[] = { .r_resctrl = { .rid = RDT_RESOURCE_SMBA, .name = "SMBA", - .cache_level = 3, + .scope = RESCTRL_L3_CACHE, .domains = domain_init(RDT_RESOURCE_SMBA), .format_str = "%d=%*u", .fflags = RFTYPE_RES_MB, @@ -381,9 +381,6 @@ static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, struct rdt_domain *d; struct list_head *l; - if (id < 0) - return ERR_PTR(-ENODEV); - list_for_each(l, &r->domains) { d = list_entry(l, struct rdt_domain, list); /* When id is found, return its domain. */ @@ -476,6 +473,19 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom) return 0; } +static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope) +{ + switch (scope) { + case RESCTRL_L2_CACHE: + case RESCTRL_L3_CACHE: + return get_cpu_cacheinfo_id(cpu, scope); + default: + break; + } + + return -EINVAL; +} + /* * domain_add_cpu - Add a cpu to a resource's domain list. * @@ -491,7 +501,7 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom) */ static void domain_add_cpu(int cpu, struct rdt_resource *r) { - int id = get_cpu_cacheinfo_id(cpu, r->cache_level); + int id = get_domain_id_from_scope(cpu, r->scope); struct list_head *add_pos = NULL; struct rdt_hw_domain *hw_dom; struct rdt_domain *d; @@ -500,12 +510,14 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) BUG_ON(id > NR_CPUS); lockdep_assert_held(&domain_list_lock); - d = rdt_find_domain(r, id, &add_pos); - if (IS_ERR(d)) { - pr_warn("Couldn't find cache id for CPU %d\n", cpu); + if (id < 0) { + pr_warn_once("Can't find domain id for CPU:%d scope:%d for resource %s\n", + cpu, r->scope, r->name); return; } + d = rdt_find_domain(r, id, &add_pos); + if (d) { cpumask_set_cpu(cpu, &d->cpu_mask); if (r->cache.arch_has_per_cpu_cfg) @@ -551,16 +563,22 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) static void domain_remove_cpu(int cpu, struct rdt_resource *r) { - int id = get_cpu_cacheinfo_id(cpu, r->cache_level); + int id = get_domain_id_from_scope(cpu, r->scope); struct rdt_hw_domain *hw_dom; struct rdt_domain *d; BUG_ON(id > NR_CPUS); lockdep_assert_held(&domain_list_lock); + if (id < 0) { + pr_warn_once("Can't find domain id for CPU:%d scope:%d for resource %s\n", + cpu, r->scope, r->name); + return; + } + d = rdt_find_domain(r, id, NULL); - if (IS_ERR_OR_NULL(d)) { - pr_warn("Couldn't find cache id for CPU %d\n", cpu); + if (!d) { + pr_warn("Couldn't find domain with id=%d for CPU %d\n", id, cpu); return; } hw_dom = resctrl_to_arch_dom(d); diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 253443f87646..5ec86070838c 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -512,7 +512,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) r = resctrl_arch_get_resource(resid); d = resctrl_arch_find_domain(r, domid); - if (IS_ERR_OR_NULL(d)) { + if (!d) { ret = -ENOENT; goto out; } diff --git a/fs/resctrl/psuedo_lock.c b/fs/resctrl/psuedo_lock.c index eaefea3db858..90952859f3e7 100644 --- a/fs/resctrl/psuedo_lock.c +++ b/fs/resctrl/psuedo_lock.c @@ -226,9 +226,13 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr) */ static int pseudo_lock_region_init(struct pseudo_lock_region *plr) { + enum resctrl_scope scope = plr->s->res->scope; struct cacheinfo *ci; int ret; + if (WARN_ON_ONCE(scope != RESCTRL_L2_CACHE && scope != RESCTRL_L3_CACHE)) + return -ENODEV; + /* Pick the first cpu we find that is associated with the cache. */ plr->cpu = cpumask_first(&plr->d->cpu_mask); @@ -239,7 +243,7 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr) goto out_region; } - ci = get_cpu_cacheinfo_level(plr->cpu, plr->s->res->cache_level); + ci = get_cpu_cacheinfo_level(plr->cpu, scope); if (ci) { plr->line_size = ci->coherency_line_size; plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index b7ed97a89164..5360382192df 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1964,8 +1964,11 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct cacheinfo *ci; int num_b; + if (WARN_ON_ONCE(r->scope != RESCTRL_L2_CACHE && r->scope != RESCTRL_L3_CACHE)) + return size; + num_b = bitmap_weight(&cbm, r->cache.cbm_len); - ci = get_cpu_cacheinfo_level(cpumask_any(&d->cpu_mask), r->cache_level); + ci = get_cpu_cacheinfo_level(cpumask_any(&d->cpu_mask), r->scope); if (ci) size = ci->size / r->cache.cbm_len * num_b; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 75dacdc47c8a..ab2e4c3dc526 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -197,12 +197,17 @@ struct resctrl_mon { struct list_head evt_list; }; +enum resctrl_scope { + RESCTRL_L2_CACHE = 2, + RESCTRL_L3_CACHE = 3, +}; + /** * struct rdt_resource - attributes of a resctrl resource * @rid: The index of the resource * @alloc_capable: Is allocation available on this machine * @mon_capable: Is monitor feature available on this machine - * @cache_level: Which cache level defines scope of this resource + * @scope: Scope of this resource * @cache: Cache allocation related data * @membw: If the component has bandwidth controls, their properties. * @domains: RCU list of all domains for this resource @@ -219,7 +224,7 @@ struct rdt_resource { int rid; bool alloc_capable; bool mon_capable; - int cache_level; + enum resctrl_scope scope; struct resctrl_cache cache; struct resctrl_membw membw; struct resctrl_mon mon; -- Gitee From d8aefea10bbd167fbc9b1efaf738c7de89623bdc Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 27 Aug 2025 19:35:57 +0800 Subject: [PATCH 06/25] x86/resctrl: Prepare to split rdt_domain structure ANBZ: #24559 commit c103d4d48e1599a88001fa6215be27d55f3c025b upstream. The rdt_domain structure is used for both control and monitor features. It is about to be split into separate structures for these two usages because the scope for control and monitoring features for a resource will be different for future resources. To allow for common code that scans a list of domains looking for a specific domain id, move all the common fields ("list", "id", "cpu_mask") into their own structure within the rdt_domain structure. Intel-SIG: commit c103d4d48e15 x86/resctrl: Prepare to split rdt_domain structure Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-3-tony.luck@intel.com [ Zhang Rui: resolve conflict (file relocated) and amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/cpu/resctrl/core.c | 22 +++---- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 6 +- arch/x86/kernel/cpu/resctrl/monitor.c | 2 +- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 16 +++--- fs/resctrl/ctrlmondata.c | 18 +++--- fs/resctrl/monitor.c | 10 ++-- fs/resctrl/psuedo_lock.c | 14 ++--- fs/resctrl/rdtgroup.c | 70 +++++++++++------------ include/linux/resctrl.h | 20 +++++-- 9 files changed, 93 insertions(+), 85 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index f40a51e4c768..05aca7befe41 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -382,12 +382,12 @@ static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, struct list_head *l; list_for_each(l, &r->domains) { - d = list_entry(l, struct rdt_domain, list); + d = list_entry(l, struct rdt_domain, hdr.list); /* When id is found, return its domain. */ - if (id == d->id) + if (id == d->hdr.id) return d; /* Stop searching when finding id's position in sorted list. */ - if (id < d->id) + if (id < d->hdr.id) break; } @@ -519,7 +519,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) d = rdt_find_domain(r, id, &add_pos); if (d) { - cpumask_set_cpu(cpu, &d->cpu_mask); + cpumask_set_cpu(cpu, &d->hdr.cpu_mask); if (r->cache.arch_has_per_cpu_cfg) rdt_domain_reconfigure_cdp(r); resctrl_arch_mbm_cntr_assign_configure(); @@ -531,10 +531,10 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) return; d = &hw_dom->d_resctrl; - d->id = id; + d->hdr.id = id; r->rdt_domain_list[id] = d; - cpumask_set_cpu(cpu, &d->cpu_mask); + cpumask_set_cpu(cpu, &d->hdr.cpu_mask); rdt_domain_reconfigure_cdp(r); @@ -551,11 +551,11 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) return; } - list_add_tail_rcu(&d->list, add_pos); + list_add_tail_rcu(&d->hdr.list, add_pos); err = resctrl_online_domain(r, d); if (err) { - list_del_rcu(&d->list); + list_del_rcu(&d->hdr.list); synchronize_rcu(); domain_free(hw_dom); } @@ -583,10 +583,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) } hw_dom = resctrl_to_arch_dom(d); - cpumask_clear_cpu(cpu, &d->cpu_mask); - if (cpumask_empty(&d->cpu_mask)) { + cpumask_clear_cpu(cpu, &d->hdr.cpu_mask); + if (cpumask_empty(&d->hdr.cpu_mask)) { resctrl_offline_domain(r, d); - list_del_rcu(&d->list); + list_del_rcu(&d->hdr.list); synchronize_rcu(); /* diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index c5c3eaea27b6..ce249843c58f 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -30,7 +30,7 @@ static bool apply_config(struct rdt_hw_domain *hw_dom, struct rdt_domain *dom = &hw_dom->d_resctrl; if (cfg->new_ctrl != hw_dom->ctrl_val[idx]) { - cpumask_set_cpu(cpumask_any(&dom->cpu_mask), cpu_mask); + cpumask_set_cpu(cpumask_any(&dom->hdr.cpu_mask), cpu_mask); hw_dom->ctrl_val[idx] = cfg->new_ctrl; return true; @@ -47,7 +47,7 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, u32 idx = resctrl_get_config_index(closid, t); struct msr_param msr_param; - if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) + if (!cpumask_test_cpu(smp_processor_id(), &d->hdr.cpu_mask)) return -EINVAL; hw_dom->ctrl_val[idx] = cfg_val; @@ -77,7 +77,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) return -ENOMEM; msr_param.res = NULL; - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->domains, hdr.list) { hw_dom = resctrl_to_arch_dom(d); for (t = 0; t < CDP_NUM_TYPES; t++) { cfg = &hw_dom->d_resctrl.staged_config[t]; diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 0ed28ef8c25a..232d0a61a3c2 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -197,7 +197,7 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, resctrl_arch_rmid_read_context_check(); - if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) + if (!cpumask_test_cpu(smp_processor_id(), &d->hdr.cpu_mask)) return -EINVAL; ret = __rmid_read(rmid, eventid, &msr_val); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 50a49a1ea604..dea77a5553c3 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -114,14 +114,14 @@ static int set_cache_qos_cfg(int level, bool enable) return -ENOMEM; r_l = &rdt_resources_all[level].r_resctrl; - list_for_each_entry(d, &r_l->domains, list) { + list_for_each_entry(d, &r_l->domains, hdr.list) { if (r_l->cache.arch_has_per_cpu_cfg) /* Pick all the CPUs in the domain instance */ - for_each_cpu(cpu, &d->cpu_mask) + for_each_cpu(cpu, &d->hdr.cpu_mask) cpumask_set_cpu(cpu, cpu_mask); else /* Pick one CPU from each domain instance to update MSR */ - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); + cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask); } /* Update QOS_CFG MSR on all the CPUs in cpu_mask */ @@ -334,8 +334,8 @@ static void _resctrl_abmc_enable(struct rdt_resource *r, bool enable) * Reset the architectural state so that reading of hardware * counter is not considered as an overflow in the next update. */ - list_for_each_entry(d, &r->domains, list) { - on_each_cpu_mask(&d->cpu_mask, + list_for_each_entry(d, &r->domains, hdr.list) { + on_each_cpu_mask(&d->hdr.cpu_mask, resctrl_abmc_set_one_amd, &enable, 1); resctrl_arch_reset_rmid_all(r, d); } @@ -431,7 +431,7 @@ int resctrl_arch_assign_cntr(void *dom, enum resctrl_event_id evtid, arch_mbm = &hw_dom->arch_mbm_local[rmid]; } - smp_call_function_any(&d->cpu_mask, rdtgroup_abmc_cfg, &abmc_cfg, 1); + smp_call_function_any(&d->hdr.cpu_mask, rdtgroup_abmc_cfg, &abmc_cfg, 1); /* * Reset the architectural state so that reading of hardware @@ -467,9 +467,9 @@ static int reset_all_ctrls(struct rdt_resource *r) * CBMs in all domains to the maximum mask value. Pick one CPU * from each domain to update the MSRs below. */ - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->domains, hdr.list) { hw_dom = resctrl_to_arch_dom(d); - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); + cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask); for (i = 0; i < hw_res->num_closid; i++) hw_dom->ctrl_val[i] = r->default_ctrl; diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 5ec86070838c..129bf9b21199 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -80,7 +80,7 @@ static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, cfg = &d->staged_config[s->conf_type]; if (cfg->have_new_ctrl) { - rdt_last_cmd_printf("Duplicate domain %d\n", d->id); + rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id); return -EINVAL; } @@ -160,7 +160,7 @@ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, cfg = &d->staged_config[s->conf_type]; if (cfg->have_new_ctrl) { - rdt_last_cmd_printf("Duplicate domain %d\n", d->id); + rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id); return -EINVAL; } @@ -252,8 +252,8 @@ static int parse_line(char *line, struct resctrl_schema *s, return -EINVAL; } dom = strim(dom); - list_for_each_entry(d, &r->domains, list) { - if (d->id == dom_id) { + list_for_each_entry(d, &r->domains, hdr.list) { + if (d->hdr.id == dom_id) { data.buf = dom; data.rdtgrp = rdtgrp; if (parse_ctrlval(&data, s, d)) @@ -385,7 +385,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo lockdep_assert_cpus_held(); seq_printf(s, "%*s:", max_name_width, schema->name); - list_for_each_entry(dom, &r->domains, list) { + list_for_each_entry(dom, &r->domains, hdr.list) { if (sep) seq_puts(s, ";"); @@ -395,7 +395,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo ctrl_val = resctrl_arch_get_config(r, dom, closid, schema->conf_type); - seq_printf(s, r->format_str, dom->id, max_data_width, + seq_printf(s, r->format_str, dom->hdr.id, max_data_width, ctrl_val); sep = true; } @@ -424,7 +424,7 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of, } else { seq_printf(s, "%s:%d=%x\n", rdtgrp->plr->s->res->name, - rdtgrp->plr->d->id, + rdtgrp->plr->d->hdr.id, rdtgrp->plr->cbm); } } else { @@ -472,7 +472,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, return; } - cpu = cpumask_any_housekeeping(&d->cpu_mask, RESCTRL_PICK_ANY_CPU); + cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask, RESCTRL_PICK_ANY_CPU); /* * cpumask_any_housekeeping() prefers housekeeping CPUs, but @@ -481,7 +481,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, * counters on some platforms if its called in IRQ context. */ if (tick_nohz_full_cpu(cpu)) - smp_call_function_any(&d->cpu_mask, mon_event_count, rr, 1); + smp_call_function_any(&d->hdr.cpu_mask, mon_event_count, rr, 1); else smp_call_on_cpu(cpu, smp_mon_event_count, rr, false); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index b6bb310bb10b..dce9e730e995 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -283,7 +283,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry) idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid); entry->busy = 0; - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->domains, hdr.list) { /* * For the first limbo RMID in the domain, * setup up the limbo worker. @@ -595,7 +595,7 @@ void cqm_handle_limbo(struct work_struct *work) __check_limbo(d, false); if (has_busy_rmid(d)) { - d->cqm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask, + d->cqm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask, RESCTRL_PICK_ANY_CPU); schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo, delay); @@ -619,7 +619,7 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms, unsigned long delay = msecs_to_jiffies(delay_ms); int cpu; - cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu); + cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu); dom->cqm_work_cpu = cpu; if (cpu < nr_cpu_ids) @@ -675,7 +675,7 @@ void mbm_handle_overflow(struct work_struct *work) * Re-check for housekeeping CPUs. This allows the overflow handler to * move off a nohz_full CPU quickly. */ - d->mbm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask, + d->mbm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask, RESCTRL_PICK_ANY_CPU); schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay); @@ -704,7 +704,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms, */ if (!resctrl_mounted || !resctrl_arch_mon_capable()) return; - cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu); + cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu); dom->mbm_work_cpu = cpu; if (cpu < nr_cpu_ids) diff --git a/fs/resctrl/psuedo_lock.c b/fs/resctrl/psuedo_lock.c index 90952859f3e7..cb507b395d53 100644 --- a/fs/resctrl/psuedo_lock.c +++ b/fs/resctrl/psuedo_lock.c @@ -155,7 +155,7 @@ static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr) int cpu; int ret; - for_each_cpu(cpu, &plr->d->cpu_mask) { + for_each_cpu(cpu, &plr->d->hdr.cpu_mask) { pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL); if (!pm_req) { rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n"); @@ -234,7 +234,7 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr) return -ENODEV; /* Pick the first cpu we find that is associated with the cache. */ - plr->cpu = cpumask_first(&plr->d->cpu_mask); + plr->cpu = cpumask_first(&plr->d->hdr.cpu_mask); if (!cpu_online(plr->cpu)) { rdt_last_cmd_printf("CPU %u associated with cache not online\n", @@ -663,10 +663,10 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) if (!r->alloc_capable) continue; - list_for_each_entry(d_i, &r->domains, list) { + list_for_each_entry(d_i, &r->domains, hdr.list) { if (d_i->plr) cpumask_or(cpu_with_psl, cpu_with_psl, - &d_i->cpu_mask); + &d_i->hdr.cpu_mask); } } @@ -674,7 +674,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) * Next test if new pseudo-locked region would intersect with * existing region. */ - if (cpumask_intersects(&d->cpu_mask, cpu_with_psl)) + if (cpumask_intersects(&d->hdr.cpu_mask, cpu_with_psl)) ret = true; free_cpumask_var(cpu_with_psl); @@ -714,7 +714,7 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel) } plr->thread_done = 0; - cpu = cpumask_first(&plr->d->cpu_mask); + cpu = cpumask_first(&plr->d->hdr.cpu_mask); if (!cpu_online(cpu)) { ret = -ENODEV; goto out; @@ -1045,7 +1045,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma) * may be scheduled elsewhere and invalidate entries in the * pseudo-locked region. */ - if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) { + if (!cpumask_subset(current->cpus_ptr, &plr->d->hdr.cpu_mask)) { mutex_unlock(&rdtgroup_mutex); return -EINVAL; } diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 5360382192df..ce8098fcfbe3 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -105,7 +105,7 @@ void rdt_staged_configs_clear(void) if (!r->alloc_capable) continue; - list_for_each_entry(dom, &r->domains, list) + list_for_each_entry(dom, &r->domains, hdr.list) memset(dom->staged_config, 0, sizeof(dom->staged_config)); } } @@ -369,7 +369,7 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of, rdt_last_cmd_puts("Cache domain offline\n"); ret = -ENODEV; } else { - mask = &rdtgrp->plr->d->cpu_mask; + mask = &rdtgrp->plr->d->hdr.cpu_mask; seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n", cpumask_pr_args(mask)); @@ -913,7 +913,7 @@ static void rdtgroup_mbm_cntr_reset(struct rdt_resource *r) mbm_cntrs_init(r); - list_for_each_entry(dom, &r->domains, list) + list_for_each_entry(dom, &r->domains, hdr.list) bitmap_zero(dom->mbm_cntr_map, r->mon.num_mbm_cntrs); /* Reset the cntr_id's for all the monitor groups */ @@ -1023,8 +1023,8 @@ static int rdtgroup_mbm_control_show(struct kernfs_open_file *of, seq_printf(s, "%s//", rdtg->kn->name); - list_for_each_entry(dom, &r->domains, list) - seq_printf(s, "%d=%s;", dom->id, + list_for_each_entry(dom, &r->domains, hdr.list) + seq_printf(s, "%d=%s;", dom->hdr.id, rdtgroup_mon_state_to_str(rdtg, dom, str)); seq_putc(s, '\n'); @@ -1032,8 +1032,8 @@ static int rdtgroup_mbm_control_show(struct kernfs_open_file *of, mon.crdtgrp_list) { seq_printf(s, "%s/%s/", rdtg->kn->name, crg->kn->name); - list_for_each_entry(dom, &r->domains, list) - seq_printf(s, "%d=%s;", dom->id, + list_for_each_entry(dom, &r->domains, hdr.list) + seq_printf(s, "%d=%s;", dom->hdr.id, rdtgroup_mon_state_to_str(crg, dom, str)); seq_putc(s, '\n'); } @@ -1208,8 +1208,8 @@ static int rdtgroup_process_flags(struct rdt_resource *r, } /* Verify if the dom_id is valid */ - list_for_each_entry(d, &r->domains, list) { - if (d->id == dom_id) { + list_for_each_entry(d, &r->domains, hdr.list) { + if (d->hdr.id == dom_id) { found = 1; break; } @@ -1530,12 +1530,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, cpus_read_lock(); mutex_lock(&rdtgroup_mutex); hw_shareable = r->cache.shareable_bits; - list_for_each_entry(dom, &r->domains, list) { + list_for_each_entry(dom, &r->domains, hdr.list) { if (sep) seq_putc(seq, ';'); sw_shareable = 0; exclusive = 0; - seq_printf(seq, "%d=", dom->id); + seq_printf(seq, "%d=", dom->hdr.id); for (i = 0; i < closids_supported(); i++) { if (!closid_allocated(i)) continue; @@ -1852,7 +1852,7 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA) continue; has_cache = true; - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->domains, hdr.list) { ctrl = resctrl_arch_get_config(r, d, closid, s->conf_type); if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) { @@ -1968,7 +1968,7 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, return size; num_b = bitmap_weight(&cbm, r->cache.cbm_len); - ci = get_cpu_cacheinfo_level(cpumask_any(&d->cpu_mask), r->scope); + ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), r->scope); if (ci) size = ci->size / r->cache.cbm_len * num_b; @@ -2012,7 +2012,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res, rdtgrp->plr->d, rdtgrp->plr->cbm); - seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size); + seq_printf(s, "%d=%u\n", rdtgrp->plr->d->hdr.id, size); } goto out; } @@ -2024,7 +2024,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, type = schema->conf_type; sep = false; seq_printf(s, "%*s:", max_name_width, schema->name); - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->domains, hdr.list) { if (sep) seq_putc(s, ';'); if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { @@ -2042,7 +2042,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, else size = rdtgroup_cbm_to_size(r, d, ctrl); } - seq_printf(s, "%d=%u", d->id, size); + seq_printf(s, "%d=%u", d->hdr.id, size); sep = true; } seq_putc(s, '\n'); @@ -2063,7 +2063,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid cpus_read_lock(); mutex_lock(&rdtgroup_mutex); - list_for_each_entry(dom, &r->domains, list) { + list_for_each_entry(dom, &r->domains, hdr.list) { if (sep) seq_puts(s, ";"); @@ -2071,7 +2071,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid if (val == INVALID_CONFIG_VALUE) break; - seq_printf(s, "%d=0x%02x", dom->id, val); + seq_printf(s, "%d=0x%02x", dom->hdr.id, val); sep = true; } seq_puts(s, "\n"); @@ -2126,7 +2126,7 @@ static int mbm_config_write_domain(struct rdt_resource *r, * are scoped at the domain level. Writing any of these MSRs * on one CPU is observed by all the CPUs in the domain. */ - smp_call_function_any(&d->cpu_mask, resctrl_arch_event_config_set, + smp_call_function_any(&d->hdr.cpu_mask, resctrl_arch_event_config_set, &mon_info, 1); if (mon_info.err) { rdt_last_cmd_puts("Invalid event configuration\n"); @@ -2182,8 +2182,8 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) return -EINVAL; } - list_for_each_entry(d, &r->domains, list) { - if (d->id == dom_id) { + list_for_each_entry(d, &r->domains, hdr.list) { + if (d->hdr.id == dom_id) { err = mbm_config_write_domain(r, d, evtid, val); if (err) return err; @@ -2290,7 +2290,7 @@ int rdtgroup_assign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) if (rdtgroup_alloc_cntr(rdtgrp, index)) return -EINVAL; - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->domains, hdr.list) { resctrl_arch_assign_cntr(d, evtid, rdtgrp->mon.rmid, rdtgrp->mon.cntr_id[index], rdtgrp->closid, true); @@ -2304,7 +2304,7 @@ static int rdtgroup_mbm_cntr_test(struct rdt_resource *r, u32 cntr_id) { struct rdt_domain *d; - list_for_each_entry(d, &r->domains, list) + list_for_each_entry(d, &r->domains, hdr.list) if (test_bit(cntr_id, d->mbm_cntr_map)) return 1; @@ -2337,7 +2337,7 @@ int rdtgroup_unassign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) return -EINVAL; if (rdtgrp->mon.cntr_id[index] != MON_CNTR_UNSET) { - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->domains, hdr.list) { resctrl_arch_assign_cntr(d, evtid, rdtgrp->mon.rmid, rdtgrp->mon.cntr_id[index], rdtgrp->closid, false); @@ -2820,7 +2820,7 @@ static inline bool is_mba_linear(void) static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d) { u32 num_closid = resctrl_arch_get_num_closid(r); - int cpu = cpumask_any(&d->cpu_mask); + int cpu = cpumask_any(&d->hdr.cpu_mask); int i; d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val), @@ -2869,7 +2869,7 @@ static int set_mba_sc(bool mba_sc) r->membw.mba_sc = mba_sc; - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->domains, hdr.list) { for (i = 0; i < num_closid; i++) d->mbps_val[i] = MBA_MAX_MBPS; } @@ -3236,7 +3236,7 @@ static int rdt_get_tree(struct fs_context *fc) resctrl_mounted = true; if (resctrl_is_mbm_enabled()) { - list_for_each_entry(dom, &l3->domains, list) + list_for_each_entry(dom, &l3->domains, hdr.list) mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL, RESCTRL_PICK_ANY_CPU); } @@ -3477,7 +3477,7 @@ static void rdt_kill_sb(struct super_block *sb) * When resctrl is umounted, forcefully cancel delayed works since the * new mount option may be changed. */ - list_for_each_entry(d, &l3->domains, list) { + list_for_each_entry(d, &l3->domains, hdr.list) { if (resctrl_is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) { @@ -3560,7 +3560,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, char name[32]; int ret; - sprintf(name, "mon_%s_%02d", r->name, d->id); + sprintf(name, "mon_%s_%02d", r->name, d->hdr.id); /* create the directory */ kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); if (IS_ERR(kn)) @@ -3576,7 +3576,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, } priv.u.rid = r->rid; - priv.u.domid = d->id; + priv.u.domid = d->hdr.id; list_for_each_entry(mevt, &r->mon.evt_list, list) { priv.u.evtid = mevt->evtid; ret = mon_addfile(kn, mevt->name, priv.priv); @@ -3627,7 +3627,7 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn, /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); - list_for_each_entry(dom, &r->domains, list) { + list_for_each_entry(dom, &r->domains, hdr.list) { ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp); if (ret) return ret; @@ -3791,7 +3791,7 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s, */ tmp_cbm = cfg->new_ctrl; if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) { - rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id); + rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->hdr.id); return -ENOSPC; } cfg->have_new_ctrl = true; @@ -3814,7 +3814,7 @@ static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) struct rdt_domain *d; int ret; - list_for_each_entry(d, &s->res->domains, list) { + list_for_each_entry(d, &s->res->domains, hdr.list) { ret = __init_one_rdt_domain(d, s, closid); if (ret < 0) return ret; @@ -3829,7 +3829,7 @@ static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid) struct resctrl_staged_config *cfg; struct rdt_domain *d; - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->domains, hdr.list) { if (is_mba_sc(r)) { d->mbps_val[closid] = MBA_MAX_MBPS; continue; @@ -4500,7 +4500,7 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d) * per domain monitor data directories. */ if (resctrl_mounted && resctrl_arch_mon_capable()) - rmdir_mondata_subdir_allrdtgrp(r, d->id); + rmdir_mondata_subdir_allrdtgrp(r, d->hdr.id); if (resctrl_is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index ab2e4c3dc526..abdfa0878097 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -91,10 +91,20 @@ struct resctrl_staged_config { }; /** - * struct rdt_domain - group of CPUs sharing a resctrl resource + * struct rdt_domain_hdr - common header for different domain types * @list: all instances of this resource * @id: unique id for this instance * @cpu_mask: which CPUs share this resource + */ +struct rdt_domain_hdr { + struct list_head list; + int id; + struct cpumask cpu_mask; +}; + +/** + * struct rdt_domain - group of CPUs sharing a resctrl resource + * @hdr: common header for different domain types * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold * @mbm_total: saved state for MBM total bandwidth * @mbm_local: saved state for MBM local bandwidth @@ -110,9 +120,7 @@ struct resctrl_staged_config { * by closid */ struct rdt_domain { - struct list_head list; - int id; - struct cpumask cpu_mask; + struct rdt_domain_hdr hdr; unsigned long *rmid_busy_llc; struct mbm_state *mbm_total; struct mbm_state *mbm_local; @@ -346,9 +354,9 @@ resctrl_get_domain_from_cpu(int cpu, struct rdt_resource *r) { struct rdt_domain *d; - list_for_each_entry_rcu(d, &r->domains, list) { + list_for_each_entry(d, &r->domains, hdr.list) { /* Find the domain that contains this CPU */ - if (cpumask_test_cpu(cpu, &d->cpu_mask)) + if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) return d; } -- Gitee From 9198f1a82f72c8e415eba0b4fe0cb7981a6268bf Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 26 Sep 2025 11:08:16 +0800 Subject: [PATCH 07/25] x86/resctrl: Prepare for different scope for control/monitor operations ANBZ: #24559 commit cd84f72b6a5c10f79f19fab67b0edfbc4fdbc5b1 upstream. Resctrl assumes that control and monitor operations on a resource are performed at the same scope. Prepare for systems that use different scope (specifically Intel needs to split the RDT_RESOURCE_L3 resource to use L3 scope for cache control and NODE scope for cache occupancy and memory bandwidth monitoring). Create separate domain lists for control and monitor operations. Note that errors during initialization of either control or monitor functions on a domain would previously result in that domain being excluded from both control and monitor operations. Now the domains are allocated independently it is no longer required to disable both control and monitor operations if either fail. Intel-SIG: commit cd84f72b6a5c x86/resctrl: Prepare for different scope for control/monitor operations Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-4-tony.luck@intel.com [ Zhang Rui: resolve conflict (file relocated) and amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/cpu/resctrl/core.c | 206 +++++++++++++++++----- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 2 +- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 8 +- fs/resctrl/ctrlmondata.c | 11 +- fs/resctrl/monitor.c | 4 +- fs/resctrl/psuedo_lock.c | 4 +- fs/resctrl/rdtgroup.c | 73 ++++---- include/linux/resctrl.h | 45 ++++- 8 files changed, 259 insertions(+), 94 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 05aca7befe41..d4b2bdeaf1ad 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -59,7 +59,8 @@ static void mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); -#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains) +#define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains) +#define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains) struct rdt_hw_resource rdt_resources_all[] = { [RDT_RESOURCE_L3] = @@ -67,8 +68,10 @@ struct rdt_hw_resource rdt_resources_all[] = { .r_resctrl = { .rid = RDT_RESOURCE_L3, .name = "L3", - .scope = RESCTRL_L3_CACHE, - .domains = domain_init(RDT_RESOURCE_L3), + .ctrl_scope = RESCTRL_L3_CACHE, + .mon_scope = RESCTRL_L3_CACHE, + .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L3), + .mon_domains = mon_domain_init(RDT_RESOURCE_L3), .format_str = "%d=%0*x", .fflags = RFTYPE_RES_CACHE, }, @@ -80,8 +83,8 @@ struct rdt_hw_resource rdt_resources_all[] = { .r_resctrl = { .rid = RDT_RESOURCE_L2, .name = "L2", - .scope = RESCTRL_L2_CACHE, - .domains = domain_init(RDT_RESOURCE_L2), + .ctrl_scope = RESCTRL_L2_CACHE, + .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L2), .format_str = "%d=%0*x", .fflags = RFTYPE_RES_CACHE, }, @@ -93,8 +96,8 @@ struct rdt_hw_resource rdt_resources_all[] = { .r_resctrl = { .rid = RDT_RESOURCE_MBA, .name = "MB", - .scope = RESCTRL_L3_CACHE, - .domains = domain_init(RDT_RESOURCE_MBA), + .ctrl_scope = RESCTRL_L3_CACHE, + .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA), .format_str = "%d=%*u", .fflags = RFTYPE_RES_MB, }, @@ -104,8 +107,8 @@ struct rdt_hw_resource rdt_resources_all[] = { .r_resctrl = { .rid = RDT_RESOURCE_SMBA, .name = "SMBA", - .scope = RESCTRL_L3_CACHE, - .domains = domain_init(RDT_RESOURCE_SMBA), + .ctrl_scope = RESCTRL_L3_CACHE, + .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA), .format_str = "%d=%*u", .fflags = RFTYPE_RES_MB, }, @@ -358,7 +361,7 @@ void rdt_ctrl_update(void *arg) int cpu = smp_processor_id(); struct rdt_domain *d; - d = resctrl_get_domain_from_cpu(cpu, r); + d = get_ctrl_domain_from_cpu(cpu, r); if (d) { hw_res->msr_update(d, m, r); return; @@ -375,19 +378,19 @@ void rdt_ctrl_update(void *arg) * caller, return the first domain whose id is bigger than the input id. * The domain list is sorted by id in ascending order. */ -static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, - struct list_head **pos) +struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id, + struct list_head **pos) { - struct rdt_domain *d; + struct rdt_domain_hdr *d; struct list_head *l; - list_for_each(l, &r->domains) { - d = list_entry(l, struct rdt_domain, hdr.list); + list_for_each(l, h) { + d = list_entry(l, struct rdt_domain_hdr, list); /* When id is found, return its domain. */ - if (id == d->hdr.id) + if (id == d->id) return d; /* Stop searching when finding id's position in sorted list. */ - if (id < d->hdr.id) + if (id < d->id) break; } @@ -397,11 +400,6 @@ static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, return NULL; } -struct rdt_domain *resctrl_arch_find_domain(struct rdt_resource *r, int id) -{ - return rdt_find_domain(r, id, NULL); -} - static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); @@ -499,11 +497,12 @@ static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope) * in the schemata file and schemata input is validated to have the same order * as this list. */ -static void domain_add_cpu(int cpu, struct rdt_resource *r) +static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r) { - int id = get_domain_id_from_scope(cpu, r->scope); + int id = get_domain_id_from_scope(cpu, r->ctrl_scope); struct list_head *add_pos = NULL; struct rdt_hw_domain *hw_dom; + struct rdt_domain_hdr *hdr; struct rdt_domain *d; int err; @@ -511,18 +510,18 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) lockdep_assert_held(&domain_list_lock); if (id < 0) { - pr_warn_once("Can't find domain id for CPU:%d scope:%d for resource %s\n", - cpu, r->scope, r->name); + pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n", + cpu, r->ctrl_scope, r->name); return; } - d = rdt_find_domain(r, id, &add_pos); + hdr = rdt_find_domain(&r->ctrl_domains, id, &add_pos); + if (hdr) { + if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN)) + return; + d = container_of(hdr, struct rdt_domain, hdr); - if (d) { cpumask_set_cpu(cpu, &d->hdr.cpu_mask); - if (r->cache.arch_has_per_cpu_cfg) - rdt_domain_reconfigure_cdp(r); - resctrl_arch_mbm_cntr_assign_configure(); return; } @@ -532,17 +531,72 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) d = &hw_dom->d_resctrl; d->hdr.id = id; + d->hdr.type = RESCTRL_CTRL_DOMAIN; r->rdt_domain_list[id] = d; cpumask_set_cpu(cpu, &d->hdr.cpu_mask); rdt_domain_reconfigure_cdp(r); - if (r->alloc_capable && domain_setup_ctrlval(r, d)) { + if (domain_setup_ctrlval(r, d)) { domain_free(hw_dom); return; } + list_add_tail_rcu(&d->hdr.list, add_pos); + + err = resctrl_online_ctrl_domain(r, d); + if (err) { + list_del_rcu(&d->hdr.list); + synchronize_rcu(); + domain_free(hw_dom); + } +} + +static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) +{ + int id = get_domain_id_from_scope(cpu, r->mon_scope); + struct list_head *add_pos = NULL; + struct rdt_hw_domain *hw_dom; + struct rdt_domain_hdr *hdr; + struct rdt_domain *d; + int err; + + BUG_ON(id > NR_CPUS); + lockdep_assert_held(&domain_list_lock); + + if (id < 0) { + pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n", + cpu, r->mon_scope, r->name); + return; + } + + hdr = rdt_find_domain(&r->mon_domains, id, &add_pos); + if (hdr) { + if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) + return; + d = container_of(hdr, struct rdt_domain, hdr); + + cpumask_set_cpu(cpu, &d->hdr.cpu_mask); + if (r->cache.arch_has_per_cpu_cfg) + rdt_domain_reconfigure_cdp(r); + resctrl_arch_mbm_cntr_assign_configure(); + return; + } + + hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu)); + if (!hw_dom) + return; + + d = &hw_dom->d_resctrl; + d->hdr.id = id; + d->hdr.type = RESCTRL_MON_DOMAIN; + r->rdt_domain_list[id] = d; + + cpumask_set_cpu(cpu, &d->hdr.cpu_mask); + + rdt_domain_reconfigure_cdp(r); + resctrl_mbm_evt_config_init(hw_dom); resctrl_arch_mbm_cntr_assign_configure(); @@ -553,7 +607,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) list_add_tail_rcu(&d->hdr.list, add_pos); - err = resctrl_online_domain(r, d); + err = resctrl_online_mon_domain(r, d); if (err) { list_del_rcu(&d->hdr.list); synchronize_rcu(); @@ -561,31 +615,46 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) } } -static void domain_remove_cpu(int cpu, struct rdt_resource *r) +static void domain_add_cpu(int cpu, struct rdt_resource *r) { - int id = get_domain_id_from_scope(cpu, r->scope); + if (r->alloc_capable) + domain_add_cpu_ctrl(cpu, r); + if (r->mon_capable) + domain_add_cpu_mon(cpu, r); +} + +static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r) +{ + int id = get_domain_id_from_scope(cpu, r->ctrl_scope); struct rdt_hw_domain *hw_dom; + struct rdt_domain_hdr *hdr; struct rdt_domain *d; BUG_ON(id > NR_CPUS); lockdep_assert_held(&domain_list_lock); if (id < 0) { - pr_warn_once("Can't find domain id for CPU:%d scope:%d for resource %s\n", - cpu, r->scope, r->name); + pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n", + cpu, r->ctrl_scope, r->name); return; } - d = rdt_find_domain(r, id, NULL); - if (!d) { - pr_warn("Couldn't find domain with id=%d for CPU %d\n", id, cpu); + hdr = rdt_find_domain(&r->ctrl_domains, id, NULL); + if (!hdr) { + pr_warn("Couldn't find control domain with id=%d for CPU %d\n", id, cpu); return; } + + if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN)) + return; + + d = container_of(hdr, struct rdt_domain, hdr); hw_dom = resctrl_to_arch_dom(d); cpumask_clear_cpu(cpu, &d->hdr.cpu_mask); + if (cpumask_empty(&d->hdr.cpu_mask)) { - resctrl_offline_domain(r, d); + resctrl_offline_ctrl_domain(r, d); list_del_rcu(&d->hdr.list); synchronize_rcu(); @@ -602,6 +671,61 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) } } +static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r) +{ + int id = get_domain_id_from_scope(cpu, r->mon_scope); + struct rdt_hw_domain *hw_dom; + struct rdt_domain_hdr *hdr; + struct rdt_domain *d; + + BUG_ON(id > NR_CPUS); + lockdep_assert_held(&domain_list_lock); + + if (id < 0) { + pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n", + cpu, r->mon_scope, r->name); + return; + } + + hdr = rdt_find_domain(&r->mon_domains, id, NULL); + if (!hdr) { + pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n", + id, cpu, r->name); + return; + } + + if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) + return; + + d = container_of(hdr, struct rdt_domain, hdr); + hw_dom = resctrl_to_arch_dom(d); + + cpumask_clear_cpu(cpu, &d->hdr.cpu_mask); + if (cpumask_empty(&d->hdr.cpu_mask)) { + resctrl_offline_mon_domain(r, d); + list_del_rcu(&d->hdr.list); + synchronize_rcu(); + /* + * rdt_domain "d" is going to be freed below, so clear + * its pointer from pseudo_lock_region struct. + */ + if (d->plr) + d->plr->d = NULL; + r->rdt_domain_list[id] = NULL; + domain_free(hw_dom); + + return; + } +} + +static void domain_remove_cpu(int cpu, struct rdt_resource *r) +{ + if (r->alloc_capable) + domain_remove_cpu_ctrl(cpu, r); + if (r->mon_capable) + domain_remove_cpu_mon(cpu, r); +} + static void clear_closid_rmid(int cpu) { struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index ce249843c58f..b9097e00ab5a 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -77,7 +77,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) return -ENOMEM; msr_param.res = NULL; - list_for_each_entry(d, &r->domains, hdr.list) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { hw_dom = resctrl_to_arch_dom(d); for (t = 0; t < CDP_NUM_TYPES; t++) { cfg = &hw_dom->d_resctrl.staged_config[t]; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index dea77a5553c3..f8a00511baf3 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -114,7 +114,7 @@ static int set_cache_qos_cfg(int level, bool enable) return -ENOMEM; r_l = &rdt_resources_all[level].r_resctrl; - list_for_each_entry(d, &r_l->domains, hdr.list) { + list_for_each_entry(d, &r_l->ctrl_domains, hdr.list) { if (r_l->cache.arch_has_per_cpu_cfg) /* Pick all the CPUs in the domain instance */ for_each_cpu(cpu, &d->hdr.cpu_mask) @@ -334,7 +334,7 @@ static void _resctrl_abmc_enable(struct rdt_resource *r, bool enable) * Reset the architectural state so that reading of hardware * counter is not considered as an overflow in the next update. */ - list_for_each_entry(d, &r->domains, hdr.list) { + list_for_each_entry(d, &r->mon_domains, hdr.list) { on_each_cpu_mask(&d->hdr.cpu_mask, resctrl_abmc_set_one_amd, &enable, 1); resctrl_arch_reset_rmid_all(r, d); @@ -464,10 +464,10 @@ static int reset_all_ctrls(struct rdt_resource *r) /* * Disable resource control for this resource by setting all - * CBMs in all domains to the maximum mask value. Pick one CPU + * CBMs in all ctrl_domains to the maximum mask value. Pick one CPU * from each domain to update the MSRs below. */ - list_for_each_entry(d, &r->domains, hdr.list) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { hw_dom = resctrl_to_arch_dom(d); cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask); diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 129bf9b21199..2cb017457bda 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -252,7 +252,7 @@ static int parse_line(char *line, struct resctrl_schema *s, return -EINVAL; } dom = strim(dom); - list_for_each_entry(d, &r->domains, hdr.list) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { if (d->hdr.id == dom_id) { data.buf = dom; data.rdtgrp = rdtgrp; @@ -385,7 +385,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo lockdep_assert_cpus_held(); seq_printf(s, "%*s:", max_name_width, schema->name); - list_for_each_entry(dom, &r->domains, hdr.list) { + list_for_each_entry(dom, &r->ctrl_domains, hdr.list) { if (sep) seq_puts(s, ";"); @@ -491,6 +491,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, int rdtgroup_mondata_show(struct seq_file *m, void *arg) { struct kernfs_open_file *of = m->private; + struct rdt_domain_hdr *hdr; u32 resid, evtid, domid; struct rdtgroup *rdtgrp; struct rdt_resource *r; @@ -511,12 +512,14 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) evtid = md.u.evtid; r = resctrl_arch_get_resource(resid); - d = resctrl_arch_find_domain(r, domid); - if (!d) { + hdr = rdt_find_domain(&r->mon_domains, domid, NULL); + if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) { ret = -ENOENT; goto out; } + d = container_of(hdr, struct rdt_domain, hdr); + if (resctrl_arch_get_abmc_enabled() && evtid != QOS_L3_OCCUP_EVENT_ID) { index = mon_event_config_index_get(evtid); if (index != INVALID_CONFIG_INDEX && diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index dce9e730e995..2cc59a1e47be 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -283,7 +283,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry) idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid); entry->busy = 0; - list_for_each_entry(d, &r->domains, hdr.list) { + list_for_each_entry(d, &r->mon_domains, hdr.list) { /* * For the first limbo RMID in the domain, * setup up the limbo worker. @@ -481,7 +481,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) idx = resctrl_arch_rmid_idx_encode(closid, rmid); pmbm_data = &dom_mbm->mbm_local[idx]; - dom_mba = resctrl_get_domain_from_cpu(smp_processor_id(), r_mba); + dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba); if (!dom_mba) { pr_warn_once("Failure to get domain for MBA update\n"); return; diff --git a/fs/resctrl/psuedo_lock.c b/fs/resctrl/psuedo_lock.c index cb507b395d53..daee51d5da01 100644 --- a/fs/resctrl/psuedo_lock.c +++ b/fs/resctrl/psuedo_lock.c @@ -226,7 +226,7 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr) */ static int pseudo_lock_region_init(struct pseudo_lock_region *plr) { - enum resctrl_scope scope = plr->s->res->scope; + enum resctrl_scope scope = plr->s->res->ctrl_scope; struct cacheinfo *ci; int ret; @@ -663,7 +663,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) if (!r->alloc_capable) continue; - list_for_each_entry(d_i, &r->domains, hdr.list) { + list_for_each_entry(d_i, &r->ctrl_domains, hdr.list) { if (d_i->plr) cpumask_or(cpu_with_psl, cpu_with_psl, &d_i->hdr.cpu_mask); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index ce8098fcfbe3..f730f9b1bd70 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -105,7 +105,7 @@ void rdt_staged_configs_clear(void) if (!r->alloc_capable) continue; - list_for_each_entry(dom, &r->domains, hdr.list) + list_for_each_entry(dom, &r->ctrl_domains, hdr.list) memset(dom->staged_config, 0, sizeof(dom->staged_config)); } } @@ -913,7 +913,7 @@ static void rdtgroup_mbm_cntr_reset(struct rdt_resource *r) mbm_cntrs_init(r); - list_for_each_entry(dom, &r->domains, hdr.list) + list_for_each_entry(dom, &r->mon_domains, hdr.list) bitmap_zero(dom->mbm_cntr_map, r->mon.num_mbm_cntrs); /* Reset the cntr_id's for all the monitor groups */ @@ -1023,7 +1023,7 @@ static int rdtgroup_mbm_control_show(struct kernfs_open_file *of, seq_printf(s, "%s//", rdtg->kn->name); - list_for_each_entry(dom, &r->domains, hdr.list) + list_for_each_entry(dom, &r->mon_domains, hdr.list) seq_printf(s, "%d=%s;", dom->hdr.id, rdtgroup_mon_state_to_str(rdtg, dom, str)); seq_putc(s, '\n'); @@ -1032,7 +1032,7 @@ static int rdtgroup_mbm_control_show(struct kernfs_open_file *of, mon.crdtgrp_list) { seq_printf(s, "%s/%s/", rdtg->kn->name, crg->kn->name); - list_for_each_entry(dom, &r->domains, hdr.list) + list_for_each_entry(dom, &r->mon_domains, hdr.list) seq_printf(s, "%d=%s;", dom->hdr.id, rdtgroup_mon_state_to_str(crg, dom, str)); seq_putc(s, '\n'); @@ -1208,7 +1208,7 @@ static int rdtgroup_process_flags(struct rdt_resource *r, } /* Verify if the dom_id is valid */ - list_for_each_entry(d, &r->domains, hdr.list) { + list_for_each_entry(d, &r->mon_domains, hdr.list) { if (d->hdr.id == dom_id) { found = 1; break; @@ -1530,7 +1530,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, cpus_read_lock(); mutex_lock(&rdtgroup_mutex); hw_shareable = r->cache.shareable_bits; - list_for_each_entry(dom, &r->domains, hdr.list) { + list_for_each_entry(dom, &r->ctrl_domains, hdr.list) { if (sep) seq_putc(seq, ';'); sw_shareable = 0; @@ -1852,7 +1852,7 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA) continue; has_cache = true; - list_for_each_entry(d, &r->domains, hdr.list) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { ctrl = resctrl_arch_get_config(r, d, closid, s->conf_type); if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) { @@ -1964,11 +1964,11 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct cacheinfo *ci; int num_b; - if (WARN_ON_ONCE(r->scope != RESCTRL_L2_CACHE && r->scope != RESCTRL_L3_CACHE)) + if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE)) return size; num_b = bitmap_weight(&cbm, r->cache.cbm_len); - ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), r->scope); + ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), r->ctrl_scope); if (ci) size = ci->size / r->cache.cbm_len * num_b; @@ -2024,7 +2024,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, type = schema->conf_type; sep = false; seq_printf(s, "%*s:", max_name_width, schema->name); - list_for_each_entry(d, &r->domains, hdr.list) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { if (sep) seq_putc(s, ';'); if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { @@ -2063,7 +2063,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid cpus_read_lock(); mutex_lock(&rdtgroup_mutex); - list_for_each_entry(dom, &r->domains, hdr.list) { + list_for_each_entry(dom, &r->mon_domains, hdr.list) { if (sep) seq_puts(s, ";"); @@ -2182,7 +2182,7 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) return -EINVAL; } - list_for_each_entry(d, &r->domains, hdr.list) { + list_for_each_entry(d, &r->mon_domains, hdr.list) { if (d->hdr.id == dom_id) { err = mbm_config_write_domain(r, d, evtid, val); if (err) @@ -2290,7 +2290,7 @@ int rdtgroup_assign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) if (rdtgroup_alloc_cntr(rdtgrp, index)) return -EINVAL; - list_for_each_entry(d, &r->domains, hdr.list) { + list_for_each_entry(d, &r->mon_domains, hdr.list) { resctrl_arch_assign_cntr(d, evtid, rdtgrp->mon.rmid, rdtgrp->mon.cntr_id[index], rdtgrp->closid, true); @@ -2304,7 +2304,7 @@ static int rdtgroup_mbm_cntr_test(struct rdt_resource *r, u32 cntr_id) { struct rdt_domain *d; - list_for_each_entry(d, &r->domains, hdr.list) + list_for_each_entry(d, &r->mon_domains, hdr.list) if (test_bit(cntr_id, d->mbm_cntr_map)) return 1; @@ -2337,7 +2337,7 @@ int rdtgroup_unassign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) return -EINVAL; if (rdtgrp->mon.cntr_id[index] != MON_CNTR_UNSET) { - list_for_each_entry(d, &r->domains, hdr.list) { + list_for_each_entry(d, &r->mon_domains, hdr.list) { resctrl_arch_assign_cntr(d, evtid, rdtgrp->mon.rmid, rdtgrp->mon.cntr_id[index], rdtgrp->closid, false); @@ -2869,7 +2869,7 @@ static int set_mba_sc(bool mba_sc) r->membw.mba_sc = mba_sc; - list_for_each_entry(d, &r->domains, hdr.list) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { for (i = 0; i < num_closid; i++) d->mbps_val[i] = MBA_MAX_MBPS; } @@ -3236,7 +3236,7 @@ static int rdt_get_tree(struct fs_context *fc) resctrl_mounted = true; if (resctrl_is_mbm_enabled()) { - list_for_each_entry(dom, &l3->domains, hdr.list) + list_for_each_entry(dom, &l3->mon_domains, hdr.list) mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL, RESCTRL_PICK_ANY_CPU); } @@ -3477,7 +3477,7 @@ static void rdt_kill_sb(struct super_block *sb) * When resctrl is umounted, forcefully cancel delayed works since the * new mount option may be changed. */ - list_for_each_entry(d, &l3->domains, hdr.list) { + list_for_each_entry(d, &l3->mon_domains, hdr.list) { if (resctrl_is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) { @@ -3627,7 +3627,7 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn, /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); - list_for_each_entry(dom, &r->domains, hdr.list) { + list_for_each_entry(dom, &r->mon_domains, hdr.list) { ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp); if (ret) return ret; @@ -3814,7 +3814,7 @@ static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) struct rdt_domain *d; int ret; - list_for_each_entry(d, &s->res->domains, hdr.list) { + list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) { ret = __init_one_rdt_domain(d, s, closid); if (ret < 0) return ret; @@ -3829,7 +3829,7 @@ static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid) struct resctrl_staged_config *cfg; struct rdt_domain *d; - list_for_each_entry(d, &r->domains, hdr.list) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { if (is_mba_sc(r)) { d->mbps_val[closid] = MBA_MAX_MBPS; continue; @@ -4485,15 +4485,19 @@ static void domain_destroy_mon_state(struct rdt_domain *d) kfree(d->mbm_local); } -void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d) +void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d) { mutex_lock(&rdtgroup_mutex); if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) mba_sc_domain_destroy(r, d); - if (!r->mon_capable) - goto out_unlock; + mutex_unlock(&rdtgroup_mutex); +} + +void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain *d) +{ + mutex_lock(&rdtgroup_mutex); /* * If resctrl is mounted, remove all the @@ -4519,7 +4523,6 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d) domain_destroy_mon_state(d); -out_unlock: mutex_unlock(&rdtgroup_mutex); } @@ -4563,20 +4566,26 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) return 0; } -int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d) +int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d) { int err = 0; mutex_lock(&rdtgroup_mutex); - if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) { + if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) /* RDT_RESOURCE_MBA is never mon_capable */ err = mba_sc_domain_allocate(r, d); - goto out_unlock; - } - if (!r->mon_capable) - goto out_unlock; + mutex_unlock(&rdtgroup_mutex); + + return err; +} + +int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain *d) +{ + int err = 0; + + mutex_lock(&rdtgroup_mutex); err = domain_setup_mon_state(r, d); if (err) @@ -4641,7 +4650,7 @@ void resctrl_offline_cpu(unsigned int cpu) if (!l3->mon_capable) goto out_unlock; - d = resctrl_get_domain_from_cpu(cpu, l3); + d = get_mon_domain_from_cpu(cpu, l3); if (d) { if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) { cancel_delayed_work(&d->mbm_over); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index abdfa0878097..83f90bd170f1 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -90,15 +90,22 @@ struct resctrl_staged_config { bool have_new_ctrl; }; +enum resctrl_domain_type { + RESCTRL_CTRL_DOMAIN, + RESCTRL_MON_DOMAIN, +}; + /** * struct rdt_domain_hdr - common header for different domain types * @list: all instances of this resource * @id: unique id for this instance + * @type: type of this instance * @cpu_mask: which CPUs share this resource */ struct rdt_domain_hdr { struct list_head list; int id; + enum resctrl_domain_type type; struct cpumask cpu_mask; }; @@ -215,10 +222,12 @@ enum resctrl_scope { * @rid: The index of the resource * @alloc_capable: Is allocation available on this machine * @mon_capable: Is monitor feature available on this machine - * @scope: Scope of this resource + * @ctrl_scope: Scope of this resource for control functions + * @mon_scope: Scope of this resource for monitor functions * @cache: Cache allocation related data * @membw: If the component has bandwidth controls, their properties. - * @domains: RCU list of all domains for this resource + * @ctrl_domains: RCU list of all control domains for this resource + * @mon_domains: RCU list of all monitor domains for this resource * @name: Name to use in "schemata" file. * @data_width: Character width of data when displaying * @default_ctrl: Specifies default cache cbm or memory B/W percent. @@ -232,11 +241,13 @@ struct rdt_resource { int rid; bool alloc_capable; bool mon_capable; - enum resctrl_scope scope; + enum resctrl_scope ctrl_scope; + enum resctrl_scope mon_scope; struct resctrl_cache cache; struct resctrl_membw membw; struct resctrl_mon mon; - struct list_head domains; + struct list_head ctrl_domains; + struct list_head mon_domains; char *name; int data_width; u32 default_ctrl; @@ -327,6 +338,8 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); struct rdt_domain *resctrl_arch_find_domain(struct rdt_resource *r, int id); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); +struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id, + struct list_head **pos); bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); @@ -350,11 +363,25 @@ static inline u32 resctrl_get_config_index(u32 closid, * freed. */ static inline struct rdt_domain * -resctrl_get_domain_from_cpu(int cpu, struct rdt_resource *r) +get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r) +{ + struct rdt_domain *d; + + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { + /* Find the domain that contains this CPU */ + if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) + return d; + } + + return NULL; +} + +static inline struct rdt_domain * +get_mon_domain_from_cpu(int cpu, struct rdt_resource *r) { struct rdt_domain *d; - list_for_each_entry(d, &r->domains, hdr.list) { + list_for_each_entry(d, &r->mon_domains, hdr.list) { /* Find the domain that contains this CPU */ if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) return d; @@ -372,8 +399,10 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type type); -int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d); -void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d); +int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d); +int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain *d); +void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d); +void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain *d); void resctrl_online_cpu(unsigned int cpu); void resctrl_offline_cpu(unsigned int cpu); -- Gitee From 4a241124d02237c75302c0143ac9dd3c2d61e988 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 29 Aug 2025 22:23:13 +0800 Subject: [PATCH 08/25] x86/resctrl: Split the rdt_domain and rdt_hw_domain ANBZ: #24559 commit cae2bcb6a2c691ef7b537ad07e9819a5ed645bcc upstream. The same rdt_domain structure is used for both control and monitor functions. But this results in wasted memory as some of the fields are only used by control functions, while most are only used for monitor functions. Split into separate rdt_ctrl_domain and rdt_mon_domain structures with just the fields required for control and monitoring respectively. Similar split of the rdt_hw_domain structure into rdt_hw_ctrl_domain and rdt_hw_mon_domain. Intel-SIG: commit cae2bcb6a2c6 x86/resctrl: Split the rdt_domain and rdt_hw_domain structures Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-5-tony.luck@intel.com [ Zhang Rui: resolve conflict (file relocated) and amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/cpu/resctrl/core.c | 85 +++++++++++------------ arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 18 ++--- arch/x86/kernel/cpu/resctrl/internal.h | 34 ++++++--- arch/x86/kernel/cpu/resctrl/monitor.c | 16 ++--- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 22 +++--- fs/resctrl/ctrlmondata.c | 16 ++--- fs/resctrl/internal.h | 20 +++--- fs/resctrl/monitor.c | 28 ++++---- fs/resctrl/psuedo_lock.c | 6 +- fs/resctrl/rdtgroup.c | 74 ++++++++++---------- include/linux/resctrl.h | 60 +++++++++------- 11 files changed, 200 insertions(+), 179 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index d4b2bdeaf1ad..a48a6b14a4b7 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -51,12 +51,12 @@ DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state); bool rdt_alloc_capable; static void -mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, +mba_wrmsr_intel(struct rdt_ctrl_domain *d, struct msr_param *m, struct rdt_resource *r); static void -cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); +cat_wrmsr(struct rdt_ctrl_domain *d, struct msr_param *m, struct rdt_resource *r); static void -mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, +mba_wrmsr_amd(struct rdt_ctrl_domain *d, struct msr_param *m, struct rdt_resource *r); #define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains) @@ -300,10 +300,10 @@ bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l) } static void -mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) +mba_wrmsr_amd(struct rdt_ctrl_domain *d, struct msr_param *m, struct rdt_resource *r) { unsigned int i; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); for (i = m->low; i < m->high; i++) @@ -325,11 +325,11 @@ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) } static void -mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, +mba_wrmsr_intel(struct rdt_ctrl_domain *d, struct msr_param *m, struct rdt_resource *r) { unsigned int i; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); /* Write the delay values for mba. */ @@ -338,10 +338,10 @@ mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, } static void -cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) +cat_wrmsr(struct rdt_ctrl_domain *d, struct msr_param *m, struct rdt_resource *r) { unsigned int i; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); for (i = m->low; i < m->high; i++) @@ -359,7 +359,7 @@ void rdt_ctrl_update(void *arg) struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); struct rdt_resource *r = m->res; int cpu = smp_processor_id(); - struct rdt_domain *d; + struct rdt_ctrl_domain *d; d = get_ctrl_domain_from_cpu(cpu, r); if (d) { @@ -414,18 +414,23 @@ static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) *dc = r->default_ctrl; } -static void domain_free(struct rdt_hw_domain *hw_dom) +static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom) +{ + kfree(hw_dom->ctrl_val); + kfree(hw_dom); +} + +static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom) { kfree(hw_dom->arch_mbm_total); kfree(hw_dom->arch_mbm_local); - kfree(hw_dom->ctrl_val); kfree(hw_dom); } -static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) +static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *d) { + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); struct msr_param m; u32 *dc; @@ -448,7 +453,7 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) * @num_rmid: The size of the MBM counter array * @hw_dom: The domain that owns the allocated arrays */ -static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom) +static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom) { size_t tsize; @@ -500,10 +505,10 @@ static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope) static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r) { int id = get_domain_id_from_scope(cpu, r->ctrl_scope); + struct rdt_hw_ctrl_domain *hw_dom; struct list_head *add_pos = NULL; - struct rdt_hw_domain *hw_dom; struct rdt_domain_hdr *hdr; - struct rdt_domain *d; + struct rdt_ctrl_domain *d; int err; BUG_ON(id > NR_CPUS); @@ -519,7 +524,7 @@ static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r) if (hdr) { if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN)) return; - d = container_of(hdr, struct rdt_domain, hdr); + d = container_of(hdr, struct rdt_ctrl_domain, hdr); cpumask_set_cpu(cpu, &d->hdr.cpu_mask); return; @@ -532,14 +537,13 @@ static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r) d = &hw_dom->d_resctrl; d->hdr.id = id; d->hdr.type = RESCTRL_CTRL_DOMAIN; - r->rdt_domain_list[id] = d; cpumask_set_cpu(cpu, &d->hdr.cpu_mask); rdt_domain_reconfigure_cdp(r); if (domain_setup_ctrlval(r, d)) { - domain_free(hw_dom); + ctrl_domain_free(hw_dom); return; } @@ -549,7 +553,7 @@ static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r) if (err) { list_del_rcu(&d->hdr.list); synchronize_rcu(); - domain_free(hw_dom); + ctrl_domain_free(hw_dom); } } @@ -557,9 +561,9 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) { int id = get_domain_id_from_scope(cpu, r->mon_scope); struct list_head *add_pos = NULL; - struct rdt_hw_domain *hw_dom; + struct rdt_hw_mon_domain *hw_dom; struct rdt_domain_hdr *hdr; - struct rdt_domain *d; + struct rdt_mon_domain *d; int err; BUG_ON(id > NR_CPUS); @@ -575,7 +579,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) if (hdr) { if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) return; - d = container_of(hdr, struct rdt_domain, hdr); + d = container_of(hdr, struct rdt_mon_domain, hdr); cpumask_set_cpu(cpu, &d->hdr.cpu_mask); if (r->cache.arch_has_per_cpu_cfg) @@ -601,7 +605,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) resctrl_arch_mbm_cntr_assign_configure(); if (r->mon_capable && arch_domain_mbm_alloc(r->mon.num_rmid, hw_dom)) { - domain_free(hw_dom); + mon_domain_free(hw_dom); return; } @@ -611,7 +615,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) if (err) { list_del_rcu(&d->hdr.list); synchronize_rcu(); - domain_free(hw_dom); + mon_domain_free(hw_dom); } } @@ -626,9 +630,9 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r) { int id = get_domain_id_from_scope(cpu, r->ctrl_scope); - struct rdt_hw_domain *hw_dom; + struct rdt_hw_ctrl_domain *hw_dom; struct rdt_domain_hdr *hdr; - struct rdt_domain *d; + struct rdt_ctrl_domain *d; BUG_ON(id > NR_CPUS); lockdep_assert_held(&domain_list_lock); @@ -648,8 +652,8 @@ static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r) if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN)) return; - d = container_of(hdr, struct rdt_domain, hdr); - hw_dom = resctrl_to_arch_dom(d); + d = container_of(hdr, struct rdt_ctrl_domain, hdr); + hw_dom = resctrl_to_arch_ctrl_dom(d); cpumask_clear_cpu(cpu, &d->hdr.cpu_mask); @@ -659,13 +663,12 @@ static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r) synchronize_rcu(); /* - * rdt_domain "d" is going to be freed below, so clear + * rdt_ctrl_domain "d" is going to be freed below, so clear * its pointer from pseudo_lock_region struct. */ if (d->plr) d->plr->d = NULL; - r->rdt_domain_list[id] = NULL; - domain_free(hw_dom); + ctrl_domain_free(hw_dom); return; } @@ -674,9 +677,9 @@ static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r) static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r) { int id = get_domain_id_from_scope(cpu, r->mon_scope); - struct rdt_hw_domain *hw_dom; + struct rdt_hw_mon_domain *hw_dom; struct rdt_domain_hdr *hdr; - struct rdt_domain *d; + struct rdt_mon_domain *d; BUG_ON(id > NR_CPUS); lockdep_assert_held(&domain_list_lock); @@ -697,22 +700,16 @@ static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r) if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) return; - d = container_of(hdr, struct rdt_domain, hdr); - hw_dom = resctrl_to_arch_dom(d); + d = container_of(hdr, struct rdt_mon_domain, hdr); + hw_dom = resctrl_to_arch_mon_dom(d); cpumask_clear_cpu(cpu, &d->hdr.cpu_mask); if (cpumask_empty(&d->hdr.cpu_mask)) { resctrl_offline_mon_domain(r, d); list_del_rcu(&d->hdr.list); synchronize_rcu(); - /* - * rdt_domain "d" is going to be freed below, so clear - * its pointer from pseudo_lock_region struct. - */ - if (d->plr) - d->plr->d = NULL; r->rdt_domain_list[id] = NULL; - domain_free(hw_dom); + mon_domain_free(hw_dom); return; } diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index b9097e00ab5a..57f608435b57 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -23,11 +23,11 @@ #include "internal.h" -static bool apply_config(struct rdt_hw_domain *hw_dom, +static bool apply_config(struct rdt_hw_ctrl_domain *hw_dom, struct resctrl_staged_config *cfg, u32 idx, cpumask_var_t cpu_mask) { - struct rdt_domain *dom = &hw_dom->d_resctrl; + struct rdt_ctrl_domain *dom = &hw_dom->d_resctrl; if (cfg->new_ctrl != hw_dom->ctrl_val[idx]) { cpumask_set_cpu(cpumask_any(&dom->hdr.cpu_mask), cpu_mask); @@ -39,11 +39,11 @@ static bool apply_config(struct rdt_hw_domain *hw_dom, return false; } -int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, +int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type t, u32 cfg_val) { + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); u32 idx = resctrl_get_config_index(closid, t); struct msr_param msr_param; @@ -63,11 +63,11 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) { struct resctrl_staged_config *cfg; - struct rdt_hw_domain *hw_dom; + struct rdt_hw_ctrl_domain *hw_dom; struct msr_param msr_param; enum resctrl_conf_type t; + struct rdt_ctrl_domain *d; cpumask_var_t cpu_mask; - struct rdt_domain *d; u32 idx; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -78,7 +78,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) msr_param.res = NULL; list_for_each_entry(d, &r->ctrl_domains, hdr.list) { - hw_dom = resctrl_to_arch_dom(d); + hw_dom = resctrl_to_arch_ctrl_dom(d); for (t = 0; t < CDP_NUM_TYPES; t++) { cfg = &hw_dom->d_resctrl.staged_config[t]; if (!cfg->have_new_ctrl) @@ -111,10 +111,10 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) return 0; } -u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, +u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type type) { - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); u32 idx = resctrl_get_config_index(closid, type); return hw_dom->ctrl_val[idx]; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 9017fe146a86..ae1dc7b5fe29 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -56,10 +56,22 @@ struct arch_mbm_state { }; /** - * struct rdt_hw_domain - Arch private attributes of a set of CPUs that share - * a resource + * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share + * a resource for a control function * @d_resctrl: Properties exposed to the resctrl file system * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID) + * + * Members of this structure are accessed via helpers that provide abstraction. + */ +struct rdt_hw_ctrl_domain { + struct rdt_ctrl_domain d_resctrl; + u32 *ctrl_val; +}; + +/** + * struct rdt_hw_mon_domain - Arch private attributes of a set of CPUs that share + * a resource for a monitor function + * @d_resctrl: Properties exposed to the resctrl file system * @arch_mbm_total: arch private state for MBM total bandwidth * @arch_mbm_local: arch private state for MBM local bandwidth * @mbm_total_cfg: MBM total bandwidth configuration @@ -67,18 +79,22 @@ struct arch_mbm_state { * * Members of this structure are accessed via helpers that provide abstraction. */ -struct rdt_hw_domain { - struct rdt_domain d_resctrl; - u32 *ctrl_val; +struct rdt_hw_mon_domain { + struct rdt_mon_domain d_resctrl; struct arch_mbm_state *arch_mbm_total; struct arch_mbm_state *arch_mbm_local; u32 mbm_total_cfg; u32 mbm_local_cfg; }; -static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r) +static inline struct rdt_hw_ctrl_domain *resctrl_to_arch_ctrl_dom(struct rdt_ctrl_domain *r) +{ + return container_of(r, struct rdt_hw_ctrl_domain, d_resctrl); +} + +static inline struct rdt_hw_mon_domain *resctrl_to_arch_mon_dom(struct rdt_mon_domain *r) { - return container_of(r, struct rdt_hw_domain, d_resctrl); + return container_of(r, struct rdt_hw_mon_domain, d_resctrl); } /** @@ -116,7 +132,7 @@ struct rdt_hw_resource { struct rdt_resource r_resctrl; u32 num_closid; unsigned int msr_base; - void (*msr_update) (struct rdt_domain *d, struct msr_param *m, + void (*msr_update) (struct rdt_ctrl_domain *d, struct msr_param *m, struct rdt_resource *r); unsigned int mon_scale; unsigned int mbm_width; @@ -224,5 +240,5 @@ int rdt_get_mon_l3_config(struct rdt_resource *r); bool rdt_cpu_has(int flag); void __init intel_rdt_mbm_apply_quirk(void); void rdt_domain_reconfigure_cdp(struct rdt_resource *r); -void resctrl_mbm_evt_config_init(struct rdt_hw_domain *hw_dom); +void resctrl_mbm_evt_config_init(struct rdt_hw_mon_domain *hw_dom); #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 232d0a61a3c2..1876aa794346 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -123,7 +123,7 @@ static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) return 0; } -static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, +static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_dom, u32 rmid, enum resctrl_event_id eventid) { @@ -144,11 +144,11 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, return NULL; } -void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, u32 unused, u32 rmid, enum resctrl_event_id eventid) { - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); struct arch_mbm_state *am; am = get_arch_mbm_state(hw_dom, rmid, eventid); @@ -164,9 +164,9 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, * Assumes that hardware counters are also reset and thus that there is * no need to record initial non-zero counts. */ -void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d) +void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d) { - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); if (resctrl_arch_is_mbm_total_enabled()) memset(hw_dom->arch_mbm_total, 0, @@ -185,12 +185,12 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) return chunks >> shift; } -int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, u32 unused, u32 rmid, enum resctrl_event_id eventid, u64 *val, void *ignored) { + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); struct arch_mbm_state *am; u64 msr_val, chunks; int ret; @@ -280,7 +280,7 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) return 0; } -void resctrl_mbm_evt_config_init(struct rdt_hw_domain *hw_dom) +void resctrl_mbm_evt_config_init(struct rdt_hw_mon_domain *hw_dom) { unsigned int index; u64 msrval; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index f8a00511baf3..f58ca9979108 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -95,9 +95,9 @@ static void l2_qos_cfg_update(void *arg) static int set_cache_qos_cfg(int level, bool enable) { void (*update)(void *arg); + struct rdt_ctrl_domain *d; struct rdt_resource *r_l; cpumask_var_t cpu_mask; - struct rdt_domain *d; int cpu; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -327,7 +327,7 @@ static void resctrl_abmc_set_one_amd(void *arg) static void _resctrl_abmc_enable(struct rdt_resource *r, bool enable) { - struct rdt_domain *d; + struct rdt_mon_domain *d; /* * Hardware counters will reset after switching the monitor mode. @@ -412,8 +412,8 @@ static void rdtgroup_abmc_cfg(void *info) int resctrl_arch_assign_cntr(void *dom, enum resctrl_event_id evtid, u32 rmid, u32 cntr_id, u32 closid, bool assign) { - struct rdt_domain *d = dom; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_mon_domain *d = dom; + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); union l3_qos_abmc_cfg abmc_cfg = { 0 }; struct arch_mbm_state *arch_mbm; @@ -446,10 +446,10 @@ int resctrl_arch_assign_cntr(void *dom, enum resctrl_event_id evtid, static int reset_all_ctrls(struct rdt_resource *r) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - struct rdt_hw_domain *hw_dom; + struct rdt_hw_ctrl_domain *hw_dom; struct msr_param msr_param; cpumask_var_t cpu_mask; - struct rdt_domain *d; + struct rdt_ctrl_domain *d; int i; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -468,7 +468,7 @@ static int reset_all_ctrls(struct rdt_resource *r) * from each domain to update the MSRs below. */ list_for_each_entry(d, &r->ctrl_domains, hdr.list) { - hw_dom = resctrl_to_arch_dom(d); + hw_dom = resctrl_to_arch_ctrl_dom(d); cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask); for (i = 0; i < hw_res->num_closid; i++) @@ -493,8 +493,8 @@ void resctrl_arch_reset_resources(void) u32 resctrl_arch_event_config_get(void *dom, enum resctrl_event_id eventid) { - struct rdt_domain *d = dom; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_mon_domain *d = dom; + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); switch (eventid) { case QOS_L3_OCCUP_EVENT_ID: @@ -515,7 +515,7 @@ u32 resctrl_arch_event_config_get(void *dom, enum resctrl_event_id eventid) void resctrl_arch_event_config_set(void *info) { struct resctrl_mon_config_info *mon_info = info; - struct rdt_hw_domain *hw_dom; + struct rdt_hw_mon_domain *hw_dom; unsigned int index; index = mon_event_config_index_get(mon_info->evtid); @@ -524,7 +524,7 @@ void resctrl_arch_event_config_set(void *info) wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0); - hw_dom = resctrl_to_arch_dom(mon_info->d); + hw_dom = resctrl_to_arch_mon_dom(mon_info->d); switch (mon_info->evtid) { case QOS_L3_OCCUP_EVENT_ID: diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 2cb017457bda..8b4bbfd5b718 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -27,7 +27,7 @@ struct rdt_parse_data { }; typedef int(ctrlval_parser_t)(struct rdt_parse_data *data, - struct resctrl_schema *s, struct rdt_domain *d); + struct resctrl_schema *s, struct rdt_ctrl_domain *d); /* * Check whether MBA bandwidth percentage value is correct. The value is @@ -71,7 +71,7 @@ static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) } static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_domain *d) + struct rdt_ctrl_domain *d) { struct resctrl_staged_config *cfg; u32 closid = data->rdtgrp->closid; @@ -151,7 +151,7 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) * resource type. */ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_domain *d) + struct rdt_ctrl_domain *d) { struct rdtgroup *rdtgrp = data->rdtgrp; struct resctrl_staged_config *cfg; @@ -230,7 +230,7 @@ static int parse_line(char *line, struct resctrl_schema *s, struct rdt_resource *r = s->res; struct rdt_parse_data data; char *dom = NULL, *id; - struct rdt_domain *d; + struct rdt_ctrl_domain *d; unsigned long dom_id; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -377,7 +377,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid) { struct rdt_resource *r = schema->res; - struct rdt_domain *dom; + struct rdt_ctrl_domain *dom; bool sep = false; u32 ctrl_val; @@ -449,7 +449,7 @@ static int smp_mon_event_count(void *arg) } void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, - struct rdt_domain *d, struct rdtgroup *rdtgrp, + struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, int evtid, int first) { int cpu; @@ -492,11 +492,11 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) { struct kernfs_open_file *of = m->private; struct rdt_domain_hdr *hdr; + struct rdt_mon_domain *d; u32 resid, evtid, domid; struct rdtgroup *rdtgrp; struct rdt_resource *r; union mon_data_bits md; - struct rdt_domain *d; struct rmid_read rr; int ret = 0, index; @@ -518,7 +518,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) goto out; } - d = container_of(hdr, struct rdt_domain, hdr); + d = container_of(hdr, struct rdt_mon_domain, hdr); if (resctrl_arch_get_abmc_enabled() && evtid != QOS_L3_OCCUP_EVENT_ID) { index = mon_event_config_index_get(evtid); diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 58d812d6c1d2..d94f6ae91368 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -108,7 +108,7 @@ union mon_data_bits { struct rmid_read { struct rdtgroup *rgrp; struct rdt_resource *r; - struct rdt_domain *d; + struct rdt_mon_domain *d; enum resctrl_event_id evtid; bool first; int err; @@ -282,9 +282,9 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); int rdtgroup_schemata_show(struct kernfs_open_file *of, struct seq_file *s, void *v); -bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, +bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d, unsigned long cbm, int closid, bool exclusive); -unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d, +unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_ctrl_domain *d, unsigned long cbm); enum rdtgrp_mode rdtgroup_mode_by_closid(int closid); int rdtgroup_tasks_assigned(struct rdtgroup *r); @@ -296,19 +296,19 @@ void resctrl_mon_resource_exit(void); void mon_event_count(void *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, - struct rdt_domain *d, struct rdtgroup *rdtgrp, + struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, int evtid, int first); int resctrl_mon_resource_init(void); -void mbm_setup_overflow_handler(struct rdt_domain *dom, +void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, int exclude_cpu); void mbm_handle_overflow(struct work_struct *work); bool is_mba_sc(struct rdt_resource *r); -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms, +void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, int exclude_cpu); void cqm_handle_limbo(struct work_struct *work); -bool has_busy_rmid(struct rdt_domain *d); -void __check_limbo(struct rdt_domain *d, bool force_free); +bool has_busy_rmid(struct rdt_mon_domain *d); +void __check_limbo(struct rdt_mon_domain *d, bool force_free); int mbm_cntr_alloc(struct rdt_resource *r); void mbm_cntr_free(u32 cntr_id); void rdt_staged_configs_clear(void); @@ -323,8 +323,8 @@ void rdtgroup_free_cntr(struct rdt_resource *r, struct rdtgroup *rdtgrp, int ind #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm); -bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d); +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm); +bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d); int rdt_pseudo_lock_init(void); void rdt_pseudo_lock_release(void); int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 2cc59a1e47be..24e5386c3f12 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -123,7 +123,7 @@ static void limbo_release_entry(struct rmid_entry *entry) * decrement the count. If the busy count gets to zero on an RMID, we * free the RMID */ -void __check_limbo(struct rdt_domain *d, bool force_free) +void __check_limbo(struct rdt_mon_domain *d, bool force_free) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); u32 idx_limit = resctrl_arch_system_num_rmid_idx(); @@ -171,7 +171,7 @@ void __check_limbo(struct rdt_domain *d, bool force_free) resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx); } -bool has_busy_rmid(struct rdt_domain *d) +bool has_busy_rmid(struct rdt_mon_domain *d) { u32 idx_limit = resctrl_arch_system_num_rmid_idx(); @@ -272,7 +272,7 @@ int alloc_rmid(u32 closid) static void add_rmid_to_limbo(struct rmid_entry *entry) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - struct rdt_domain *d; + struct rdt_mon_domain *d; u32 idx; lockdep_assert_held(&rdtgroup_mutex); @@ -325,7 +325,7 @@ void free_rmid(u32 closid, u32 rmid) list_add_tail(&entry->list, &rmid_free_lru); } -static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 closid, +static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid, u32 rmid, enum resctrl_event_id evtid) { u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid); @@ -461,12 +461,12 @@ void mon_event_count(void *info) * throttle MSRs already have low percentage values. To avoid * unnecessarily restricting such rdtgroups, we also increase the bandwidth. */ -static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) +static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm) { u32 closid, rmid, cur_msr_val, new_msr_val; struct mbm_state *pmbm_data, *cmbm_data; struct rdt_resource *r_mba; - struct rdt_domain *dom_mba; + struct rdt_ctrl_domain *dom_mba; u32 cur_bw, user_bw, idx; struct list_head *head; struct rdtgroup *entry; @@ -527,7 +527,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val); } -static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, +static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d, u32 closid, u32 rmid) { struct rmid_read rr; @@ -585,12 +585,12 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, void cqm_handle_limbo(struct work_struct *work) { unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL); - struct rdt_domain *d; + struct rdt_mon_domain *d; cpus_read_lock(); mutex_lock(&rdtgroup_mutex); - d = container_of(work, struct rdt_domain, cqm_limbo.work); + d = container_of(work, struct rdt_mon_domain, cqm_limbo.work); __check_limbo(d, false); @@ -613,7 +613,7 @@ void cqm_handle_limbo(struct work_struct *work) * @exclude_cpu: Which CPU the handler should not run on, * RESCTRL_PICK_ANY_CPU to pick any CPU. */ -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms, +void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, int exclude_cpu) { unsigned long delay = msecs_to_jiffies(delay_ms); @@ -626,7 +626,7 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms, schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay); } -bool is_rdt_domain_valid(struct rdt_resource *r, struct rdt_domain *d) +bool is_rdt_domain_valid(struct rdt_resource *r, struct rdt_mon_domain *d) { int i; @@ -640,9 +640,9 @@ void mbm_handle_overflow(struct work_struct *work) { unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL); struct rdtgroup *prgrp, *crgrp; + struct rdt_mon_domain *d; struct list_head *head; struct rdt_resource *r; - struct rdt_domain *d; cpus_read_lock(); mutex_lock(&rdtgroup_mutex); @@ -655,7 +655,7 @@ void mbm_handle_overflow(struct work_struct *work) goto out_unlock; r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - d = container_of(work, struct rdt_domain, mbm_over.work); + d = container_of(work, struct rdt_mon_domain, mbm_over.work); if (!is_rdt_domain_valid(r, d)) goto out_unlock; @@ -692,7 +692,7 @@ void mbm_handle_overflow(struct work_struct *work) * @exclude_cpu: Which CPU the handler should not run on, * RESCTRL_PICK_ANY_CPU to pick any CPU. */ -void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms, +void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, int exclude_cpu) { unsigned long delay = msecs_to_jiffies(delay_ms); diff --git a/fs/resctrl/psuedo_lock.c b/fs/resctrl/psuedo_lock.c index daee51d5da01..6d5291c1b121 100644 --- a/fs/resctrl/psuedo_lock.c +++ b/fs/resctrl/psuedo_lock.c @@ -613,7 +613,7 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) * Return: true if @cbm overlaps with pseudo-locked region on @d, false * otherwise. */ -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm) +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm) { unsigned int cbm_len; unsigned long cbm_b; @@ -640,12 +640,12 @@ bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm * if it is not possible to test due to memory allocation issue, * false otherwise. */ -bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) +bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d) { + struct rdt_ctrl_domain *d_i; cpumask_var_t cpu_with_psl; enum resctrl_res_level i; struct rdt_resource *r; - struct rdt_domain *d_i; bool ret = false; /* Walking r->domains, ensure it can't race with cpuhp */ diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index f730f9b1bd70..6115540fec81 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -94,9 +94,9 @@ void rdt_last_cmd_printf(const char *fmt, ...) void rdt_staged_configs_clear(void) { + struct rdt_ctrl_domain *dom; enum resctrl_res_level i; struct rdt_resource *r; - struct rdt_domain *dom; lockdep_assert_held(&rdtgroup_mutex); @@ -909,7 +909,7 @@ static int rdtgroup_mbm_mode_show(struct kernfs_open_file *of, static void rdtgroup_mbm_cntr_reset(struct rdt_resource *r) { struct rdtgroup *prgrp, *crgrp; - struct rdt_domain *dom; + struct rdt_mon_domain *dom; mbm_cntrs_init(r); @@ -976,7 +976,7 @@ static int rdtgroup_num_mbm_cntrs_show(struct kernfs_open_file *of, } static char *rdtgroup_mon_state_to_str(struct rdtgroup *rdtgrp, - struct rdt_domain *d, char *str) + struct rdt_mon_domain *d, char *str) { char *tmp = str; int index; @@ -1007,7 +1007,7 @@ static int rdtgroup_mbm_control_show(struct kernfs_open_file *of, struct seq_file *s, void *v) { struct rdt_resource *r = of->kn->parent->priv; - struct rdt_domain *dom; + struct rdt_mon_domain *dom; struct rdtgroup *rdtg; char str[10]; @@ -1050,7 +1050,7 @@ static int rdtgroup_mbm_control_show(struct kernfs_open_file *of, * the assignment else just update the assign state */ static int rdtgroup_assign_update(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid, - struct rdt_domain *d) + struct rdt_mon_domain *d) { int ret, index; @@ -1086,7 +1086,7 @@ static int rdtgroup_assign_update(struct rdtgroup *rdtgrp, enum resctrl_event_id * update the unassign state */ static int rdtgroup_unassign_update(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid, - struct rdt_domain *d) + struct rdt_mon_domain *d) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); int ret = 0, index; @@ -1168,7 +1168,7 @@ static int rdtgroup_process_flags(struct rdt_resource *r, { int op, mon_state, assign_state, unassign_state; char *dom_str, *id_str, *op_str; - struct rdt_domain *d; + struct rdt_mon_domain *d; struct rdtgroup *rdtgrp; unsigned long dom_id; int ret, found = 0; @@ -1521,7 +1521,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, unsigned long sw_shareable = 0, hw_shareable = 0; unsigned long exclusive = 0, pseudo_locked = 0; struct rdt_resource *r = s->res; - struct rdt_domain *dom; + struct rdt_ctrl_domain *dom; int i, hwb, swb, excl, psl; enum rdtgrp_mode mode; bool sep = false; @@ -1752,7 +1752,7 @@ static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of, * * Return: false if CBM does not overlap, true if it does. */ -static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, +static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_ctrl_domain *d, unsigned long cbm, int closid, enum resctrl_conf_type type, bool exclusive) { @@ -1807,7 +1807,7 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d * * Return: true if CBM overlap detected, false if there is no overlap */ -bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, +bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d, unsigned long cbm, int closid, bool exclusive) { enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); @@ -1838,10 +1838,10 @@ bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) { int closid = rdtgrp->closid; + struct rdt_ctrl_domain *d; struct resctrl_schema *s; struct rdt_resource *r; bool has_cache = false; - struct rdt_domain *d; u32 ctrl; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -1958,7 +1958,7 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, * bitmap functions work correctly. */ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, - struct rdt_domain *d, unsigned long cbm) + struct rdt_ctrl_domain *d, unsigned long cbm) { unsigned int size = 0; struct cacheinfo *ci; @@ -1986,9 +1986,9 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, { struct resctrl_schema *schema; enum resctrl_conf_type type; + struct rdt_ctrl_domain *d; struct rdtgroup *rdtgrp; struct rdt_resource *r; - struct rdt_domain *d; unsigned int size; int ret = 0; u32 closid; @@ -2056,7 +2056,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid) { - struct rdt_domain *dom; + struct rdt_mon_domain *dom;; bool sep = false; u32 val; @@ -2103,7 +2103,7 @@ static int mbm_local_bytes_config_show(struct kernfs_open_file *of, } static int mbm_config_write_domain(struct rdt_resource *r, - struct rdt_domain *d, u32 evtid, u32 val) + struct rdt_mon_domain *d, u32 evtid, u32 val) { struct resctrl_mon_config_info mon_info = {0}; u32 config_val; @@ -2151,7 +2151,7 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) { char *dom_str = NULL, *id_str; unsigned long dom_id, val; - struct rdt_domain *d; + struct rdt_mon_domain *d; int err; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -2280,7 +2280,7 @@ int rdtgroup_alloc_cntr(struct rdtgroup *rdtgrp, int index) int rdtgroup_assign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - struct rdt_domain *d; + struct rdt_mon_domain *d; int index; index = mon_event_config_index_get(evtid); @@ -2302,7 +2302,7 @@ int rdtgroup_assign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) static int rdtgroup_mbm_cntr_test(struct rdt_resource *r, u32 cntr_id) { - struct rdt_domain *d; + struct rdt_mon_domain *d; list_for_each_entry(d, &r->mon_domains, hdr.list) if (test_bit(cntr_id, d->mbm_cntr_map)) @@ -2329,7 +2329,7 @@ void rdtgroup_free_cntr(struct rdt_resource *r, struct rdtgroup *rdtgrp, int rdtgroup_unassign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - struct rdt_domain *d; + struct rdt_mon_domain *d; int index; index = mon_event_config_index_get(evtid); @@ -2817,7 +2817,7 @@ static inline bool is_mba_linear(void) return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear; } -static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d) +static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d) { u32 num_closid = resctrl_arch_get_num_closid(r); int cpu = cpumask_any(&d->hdr.cpu_mask); @@ -2835,7 +2835,7 @@ static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d) } static void mba_sc_domain_destroy(struct rdt_resource *r, - struct rdt_domain *d) + struct rdt_ctrl_domain *d) { kfree(d->mbps_val); d->mbps_val = NULL; @@ -2861,7 +2861,7 @@ static int set_mba_sc(bool mba_sc) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); u32 num_closid = resctrl_arch_get_num_closid(r); - struct rdt_domain *d; + struct rdt_ctrl_domain *d; int i; if (!supports_mba_mbps() || mba_sc == is_mba_sc(r)) @@ -3159,7 +3159,7 @@ static int rdt_get_tree(struct fs_context *fc) struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); struct rdt_fs_context *ctx = rdt_fc2context(fc); unsigned long flags = RFTYPE_CTRL_BASE; - struct rdt_domain *dom; + struct rdt_mon_domain *dom; int ret; cpus_read_lock(); @@ -3461,7 +3461,7 @@ static void rmdir_all_sub(void) static void rdt_kill_sb(struct super_block *sb) { struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); - struct rdt_domain *d; + struct rdt_mon_domain *d; cpus_read_lock(); mutex_lock(&rdtgroup_mutex); @@ -3550,7 +3550,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, } static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, - struct rdt_domain *d, + struct rdt_mon_domain *d, struct rdt_resource *r, struct rdtgroup *prgrp) { union mon_data_bits priv; @@ -3599,7 +3599,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, * and "monitor" groups with given domain id. */ static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, - struct rdt_domain *d) + struct rdt_mon_domain *d) { struct kernfs_node *parent_kn; struct rdtgroup *prgrp, *crgrp; @@ -3621,7 +3621,7 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn, struct rdt_resource *r, struct rdtgroup *prgrp) { - struct rdt_domain *dom; + struct rdt_mon_domain *dom; int ret; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -3731,7 +3731,7 @@ static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r) * Set the RDT domain up to start off with all usable allocations. That is, * all shareable and unused bits. All-zero CBM is invalid. */ -static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s, +static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schema *s, u32 closid) { enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); @@ -3811,7 +3811,7 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s, */ static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) { - struct rdt_domain *d; + struct rdt_ctrl_domain *d; int ret; list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) { @@ -3827,7 +3827,7 @@ static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid) { struct resctrl_staged_config *cfg; - struct rdt_domain *d; + struct rdt_ctrl_domain *d; list_for_each_entry(d, &r->ctrl_domains, hdr.list) { if (is_mba_sc(r)) { @@ -4477,7 +4477,7 @@ static void rdtgroup_setup_default(void) mutex_unlock(&rdtgroup_mutex); } -static void domain_destroy_mon_state(struct rdt_domain *d) +static void domain_destroy_mon_state(struct rdt_mon_domain *d) { bitmap_free(d->mbm_cntr_map); bitmap_free(d->rmid_busy_llc); @@ -4485,7 +4485,7 @@ static void domain_destroy_mon_state(struct rdt_domain *d) kfree(d->mbm_local); } -void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d) +void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d) { mutex_lock(&rdtgroup_mutex); @@ -4495,7 +4495,7 @@ void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d) mutex_unlock(&rdtgroup_mutex); } -void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain *d) +void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d) { mutex_lock(&rdtgroup_mutex); @@ -4526,7 +4526,7 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain *d) mutex_unlock(&rdtgroup_mutex); } -static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) +static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d) { u32 idx_limit = resctrl_arch_system_num_rmid_idx(); size_t tsize; @@ -4566,7 +4566,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) return 0; } -int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d) +int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d) { int err = 0; @@ -4581,7 +4581,7 @@ int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d) return err; } -int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain *d) +int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d) { int err = 0; @@ -4636,8 +4636,8 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) void resctrl_offline_cpu(unsigned int cpu) { struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); + struct rdt_mon_domain *d; struct rdtgroup *rdtgrp; - struct rdt_domain *d; mutex_lock(&rdtgroup_mutex); list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 83f90bd170f1..daf3f57abf04 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -67,7 +67,7 @@ extern unsigned int resctrl_rmid_realloc_threshold; struct pseudo_lock_region { struct resctrl_schema *s; u32 closid; - struct rdt_domain *d; + struct rdt_ctrl_domain *d; u32 cbm; wait_queue_head_t lock_thread_wq; int thread_done; @@ -110,7 +110,23 @@ struct rdt_domain_hdr { }; /** - * struct rdt_domain - group of CPUs sharing a resctrl resource + * struct rdt_ctrl_domain - group of CPUs sharing a resctrl control resource + * @hdr: common header for different domain types + * @plr: pseudo-locked region (if any) associated with domain + * @staged_config: parsed configuration to be applied + * @mbps_val: When mba_sc is enabled, this holds the array of user + * specified control values for mba_sc in MBps, indexed + * by closid + */ +struct rdt_ctrl_domain { + struct rdt_domain_hdr hdr; + struct pseudo_lock_region *plr; + struct resctrl_staged_config staged_config[CDP_NUM_TYPES]; + u32 *mbps_val; +}; + +/** + * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource * @hdr: common header for different domain types * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold * @mbm_total: saved state for MBM total bandwidth @@ -120,13 +136,8 @@ struct rdt_domain_hdr { * @mbm_work_cpu: worker CPU for MBM h/w counters * @cqm_work_cpu: worker CPU for CQM h/w counters * @mbm_cntr_map: bitmap to track domain counter assignment - * @plr: pseudo-locked region (if any) associated with domain - * @staged_config: parsed configuration to be applied - * @mbps_val: When mba_sc is enabled, this holds the array of user - * specified control values for mba_sc in MBps, indexed - * by closid */ -struct rdt_domain { +struct rdt_mon_domain { struct rdt_domain_hdr hdr; unsigned long *rmid_busy_llc; struct mbm_state *mbm_total; @@ -136,9 +147,6 @@ struct rdt_domain { int mbm_work_cpu; int cqm_work_cpu; unsigned long *mbm_cntr_map; - struct pseudo_lock_region *plr; - struct resctrl_staged_config staged_config[CDP_NUM_TYPES]; - u32 *mbps_val; }; /** @@ -255,7 +263,7 @@ struct rdt_resource { unsigned long fflags; unsigned int mbm_cfg_mask; bool cdp_capable; - struct rdt_domain *rdt_domain_list[NR_CPUS]; + struct rdt_mon_domain *rdt_domain_list[NR_CPUS]; }; /* @@ -292,7 +300,7 @@ struct resctrl_cpu_sync { struct resctrl_mon_config_info { struct rdt_resource *r; - struct rdt_domain *d; + struct rdt_mon_domain *d; u32 evtid; u32 mon_config; int err; @@ -362,10 +370,10 @@ static inline u32 resctrl_get_config_index(u32 closid, * Caller must hold the cpuhp read lock to prevent the struct rdt_domain being * freed. */ -static inline struct rdt_domain * +static inline struct rdt_ctrl_domain * get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r) { - struct rdt_domain *d; + struct rdt_ctrl_domain *d; list_for_each_entry(d, &r->ctrl_domains, hdr.list) { /* Find the domain that contains this CPU */ @@ -376,10 +384,10 @@ get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r) return NULL; } -static inline struct rdt_domain * +static inline struct rdt_mon_domain * get_mon_domain_from_cpu(int cpu, struct rdt_resource *r) { - struct rdt_domain *d; + struct rdt_mon_domain *d; list_for_each_entry(d, &r->mon_domains, hdr.list) { /* Find the domain that contains this CPU */ @@ -394,15 +402,15 @@ get_mon_domain_from_cpu(int cpu, struct rdt_resource *r) * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. */ -int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, +int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type t, u32 cfg_val); -u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, +u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type type); -int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d); -int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain *d); -void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d); -void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain *d); +int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d); +int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d); +void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d); +void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d); void resctrl_online_cpu(unsigned int cpu); void resctrl_offline_cpu(unsigned int cpu); @@ -431,7 +439,7 @@ void resctrl_offline_cpu(unsigned int cpu); * Return: * 0 on success, or -EIO, -EINVAL etc on error. */ -int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, u32 closid, u32 rmid, enum resctrl_event_id eventid, u64 *val, void *arch_mon_ctx); @@ -464,7 +472,7 @@ static inline void resctrl_arch_rmid_read_context_check(void) * * This can be called from any CPU. */ -void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, u32 closid, u32 rmid, enum resctrl_event_id eventid); @@ -477,7 +485,7 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, * * This can be called from any CPU. */ -void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d); +void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d); extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; -- Gitee From 5b76e94a0dfa19d69aafbad394acbb280273acea Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 28 Jun 2024 14:56:05 -0700 Subject: [PATCH 09/25] x86/resctrl: Add node-scope to the options for feature scope ANBZ: #24559 commit 1a171608ee8d40d22d604303e42f033c69151123 upstream. Currently supported resctrl features are all domain scoped the same as the scope of the L2 or L3 caches. Add RESCTRL_L3_NODE as a new option for features that are scoped at the same granularity as NUMA nodes. This is needed for Intel's Sub-NUMA Cluster (SNC) feature where monitoring features are divided between nodes that share an L3 cache. Intel-SIG: 1a171608ee8d x86/resctrl: Add node-scope to the options for feature scope Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-6-tony.luck@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/cpu/resctrl/core.c | 2 ++ include/linux/resctrl.h | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index a48a6b14a4b7..ccaa8f3e6d0e 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -482,6 +482,8 @@ static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope) case RESCTRL_L2_CACHE: case RESCTRL_L3_CACHE: return get_cpu_cacheinfo_id(cpu, scope); + case RESCTRL_L3_NODE: + return cpu_to_node(cpu); default: break; } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index daf3f57abf04..6dbe96ae9672 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -223,6 +223,7 @@ struct resctrl_mon { enum resctrl_scope { RESCTRL_L2_CACHE = 2, RESCTRL_L3_CACHE = 3, + RESCTRL_L3_NODE, }; /** -- Gitee From d2790896b3c58558d4cbac655c48623587056b3f Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 5 Sep 2025 16:18:55 +0800 Subject: [PATCH 10/25] x86/resctrl: Introduce snc_nodes_per_l3_cache ANBZ: #24559 commit e13db55b5a0d447dea63cde772c1078405bbbf96 upstream. Intel Sub-NUMA Cluster (SNC) is a feature that subdivides the CPU cores and memory controllers on a socket into two or more groups. These are presented to the operating system as NUMA nodes. This may enable some workloads to have slightly lower latency to memory as the memory controller(s) in an SNC node are electrically closer to the CPU cores on that SNC node. This cost may be offset by lower bandwidth since the memory accesses for each core can only be interleaved between the memory controllers on the same SNC node. Resctrl monitoring on an Intel system depends upon attaching RMIDs to tasks to track L3 cache occupancy and memory bandwidth. There is an MSR that controls how the RMIDs are shared between SNC nodes. The default mode divides them numerically. E.g. when there are two SNC nodes on a socket the lower number half of the RMIDs are given to the first node, the remainder to the second node. This would be difficult to use with the Linux resctrl interface as specific RMID values assigned to resctrl groups are not visible to users. RMID sharing mode divides the physical RMIDs evenly between SNC nodes but uses a logical RMID in the IA32_PQR_ASSOC MSR. For example a system with 200 physical RMIDs (as enumerated by CPUID leaf 0xF) that has two SNC nodes per L3 cache instance would have 100 logical RMIDs available for Linux to use. A task running on SNC node 0 with RMID 5 would accumulate LLC occupancy and MBM bandwidth data in physical RMID 5. Another task using RMID 5, but running on SNC node 1 would accumulate data in physical RMID 105. Even with this renumbering SNC mode requires several changes in resctrl behavior for correct operation. Add a static global to arch/x86/kernel/cpu/resctrl/monitor.c to indicate how many SNC domains share an L3 cache instance. Initialize this to "1". Runtime detection of SNC mode will adjust this value. Update all places to take appropriate action when SNC mode is enabled: 1) The number of logical RMIDs per L3 cache available for use is the number of physical RMIDs divided by the number of SNC nodes. 2) Likewise the "mon_scale" value must be divided by the number of SNC nodes. 3) Add a function to convert from logical RMID values (assigned to tasks and loaded into the IA32_PQR_ASSOC MSR on context switch) to physical RMID values to load into IA32_QM_EVTSEL MSR when reading counters on each SNC node. Intel-SIG: e13db55b5a0d x86/resctrl: Introduce snc_nodes_per_l3_cache Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-7-tony.luck@intel.com [ Zhang Rui: resolve conflict and amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/cpu/resctrl/monitor.c | 56 ++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 1876aa794346..168aadf5fe85 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -38,6 +38,8 @@ unsigned int rdt_mon_features; #define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5)) +static int snc_nodes_per_l3_cache = 1; + /* * The correction factor table is documented in Documentation/arch/x86/resctrl.rst. * If rmid > rmid threshold, MBM total and local values should be multiplied @@ -99,7 +101,43 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val) return val; } -static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) +/* + * When Sub-NUMA Cluster (SNC) mode is not enabled (as indicated by + * "snc_nodes_per_l3_cache == 1") no translation of the RMID value is + * needed. The physical RMID is the same as the logical RMID. + * + * On a platform with SNC mode enabled, Linux enables RMID sharing mode + * via MSR 0xCA0 (see the "RMID Sharing Mode" section in the "Intel + * Resource Director Technology Architecture Specification" for a full + * description of RMID sharing mode). + * + * In RMID sharing mode there are fewer "logical RMID" values available + * to accumulate data ("physical RMIDs" are divided evenly between SNC + * nodes that share an L3 cache). Linux creates an rdt_mon_domain for + * each SNC node. + * + * The value loaded into IA32_PQR_ASSOC is the "logical RMID". + * + * Data is collected independently on each SNC node and can be retrieved + * using the "physical RMID" value computed by this function and loaded + * into IA32_QM_EVTSEL. @cpu can be any CPU in the SNC node. + * + * The scope of the IA32_QM_EVTSEL and IA32_QM_CTR MSRs is at the L3 + * cache. So a "physical RMID" may be read from any CPU that shares + * the L3 cache with the desired SNC node, not just from a CPU in + * the specific SNC node. + */ +static int logical_rmid_to_physical_rmid(int cpu, int lrmid) +{ + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + + if (snc_nodes_per_l3_cache == 1) + return lrmid; + + return lrmid + (cpu_to_node(cpu) % snc_nodes_per_l3_cache) * r->mon.num_rmid; +} + +static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val) { u64 msr_val; @@ -111,7 +149,7 @@ static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) * are error bits. */ - wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); + wrmsr(MSR_IA32_QM_EVTSEL, eventid, prmid); rdmsrl(MSR_IA32_QM_CTR, msr_val); if (msr_val & RMID_VAL_ERROR) @@ -149,14 +187,17 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, enum resctrl_event_id eventid) { struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); + int cpu = cpumask_any(&d->hdr.cpu_mask); struct arch_mbm_state *am; + u32 prmid; am = get_arch_mbm_state(hw_dom, rmid, eventid); if (am) { memset(am, 0, sizeof(*am)); + prmid = logical_rmid_to_physical_rmid(cpu, rmid); /* Record any initial, non-zero count value. */ - __rmid_read(rmid, eventid, &am->prev_msr); + __rmid_read_phys(prmid, eventid, &am->prev_msr); } } @@ -191,8 +232,10 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, { struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + int cpu = cpumask_any(&d->hdr.cpu_mask); struct arch_mbm_state *am; u64 msr_val, chunks; + u32 prmid; int ret; resctrl_arch_rmid_read_context_check(); @@ -200,7 +243,8 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, if (!cpumask_test_cpu(smp_processor_id(), &d->hdr.cpu_mask)) return -EINVAL; - ret = __rmid_read(rmid, eventid, &msr_val); + prmid = logical_rmid_to_physical_rmid(cpu, rmid); + ret = __rmid_read_phys(prmid, eventid, &msr_val); if (ret) return ret; @@ -227,8 +271,8 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) u32 eax, ebx, ecx, edx; resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024; - hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale; - r->mon.num_rmid = boot_cpu_data.x86_cache_max_rmid + 1; + hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale / snc_nodes_per_l3_cache; + r->mon.num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache; hw_res->mbm_width = MBM_CNTR_WIDTH_BASE; if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX) -- Gitee From c9ca32da31815a157aac74003be51801033dc0f4 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 5 Sep 2025 16:29:00 +0800 Subject: [PATCH 11/25] x86/resctrl: Block use of mba_MBps mount option on Sub-NUMA Cluster (SNC) systems ANBZ: #24559 commit ac20aa423052553c005089b00f1e3caf79d3c1d3 upstream. When SNC is enabled there is a mismatch between the MBA control function which operates at L3 cache scope and the MBM monitor functions which measure memory bandwidth on each SNC node. Block use of the mba_MBps when scopes for MBA/MBM do not match. Improve user diagnostics by adding invalfc() message when mba_MBps is not supported. Intel-SIG: commit ac20aa423052 x86/resctrl: Block use of mba_MBps mount option on Sub-NUMA Cluster (SNC) systems Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-8-tony.luck@intel.com [ Zhang Rui: resolve conflict (file relocated) and amend commit log ] Signed-off-by: Zhang Rui --- fs/resctrl/rdtgroup.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 6115540fec81..b6969188db81 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -2843,14 +2843,18 @@ static void mba_sc_domain_destroy(struct rdt_resource *r, /* * MBA software controller is supported only if - * MBM is supported and MBA is in linear scale. + * MBM is supported and MBA is in linear scale, + * and the MBM monitor scope is the same as MBA + * control scope. */ static bool supports_mba_mbps(void) { + struct rdt_resource *rmbm = resctrl_arch_get_resource(RDT_RESOURCE_L3); struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); return (resctrl_arch_is_mbm_local_enabled() && - r->alloc_capable && is_mba_linear()); + r->alloc_capable && is_mba_linear() && + r->ctrl_scope == rmbm->mon_scope); } /* @@ -3289,6 +3293,7 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct rdt_fs_context *ctx = rdt_fc2context(fc); struct fs_parse_result result; + const char *msg; int opt; opt = fs_parse(fc, rdt_fs_parameters, param, &result); @@ -3303,8 +3308,9 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->enable_cdpl2 = true; return 0; case Opt_mba_mbps: + msg = "mba_MBps requires local MBM and linear scale MBA at L3 scope"; if (!supports_mba_mbps()) - return -EINVAL; + return invalfc(fc, msg); ctx->enable_mba_mbps = true; return 0; case Opt_hwdrc_mb: -- Gitee From 7182195f9f6da30562e77aff51bd082f25b3a4f3 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Sun, 7 Sep 2025 00:17:47 +0800 Subject: [PATCH 12/25] x86/resctrl: Prepare for new Sub-NUMA Cluster (SNC) monitor files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #24559 commit 328ea688746420e12ced6cfbc5064413180244cc upstream. When SNC is enabled, monitoring data is collected at the SNC node granularity, but must be reported at L3-cache granularity for backwards compatibility in addition to reporting at the node level. Add a "ci" field to the rdt_mon_domain structure to save the cache information about the enclosing L3 cache for the domain. This provides: 1) The cache id which is needed to compose the name of the legacy monitoring directory, and to determine which domains should be summed to provide L3-scoped data. 2) The shared_cpu_map which is needed to determine which CPUs can be used to read the RMID counters with the MSR interface. This is the first step to an eventual goal of monitor reporting files like this (for a system with two SNC nodes per L3): $ cd /sys/fs/resctrl/mon_data $ tree mon_L3_00 mon_L3_00 <- 00 here is L3 cache id ├── llc_occupancy \ These files provide legacy support ├── mbm_local_bytes > for non-SNC aware monitor apps ├── mbm_total_bytes / that expect data at L3 cache level ├── mon_sub_L3_00 <- 00 here is SNC node id │   ├── llc_occupancy \ These files are finer grained │   ├── mbm_local_bytes > data from each SNC node │   └── mbm_total_bytes / └── mon_sub_L3_01 ├── llc_occupancy \ ├── mbm_local_bytes > As above, but for node 1. └── mbm_total_bytes / Intel-SIG: commit 328ea6887464 x86/resctrl: Prepare for new Sub-NUMA Cluster (SNC) monitor files Backport SNC RDT support for Intel platforms. [ bp: Massage commit message. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-9-tony.luck@intel.com [ Zhang Rui: resolve conflict (file relocated) and amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/cpu/resctrl/core.c | 7 ++++++- arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 1 - fs/resctrl/rdtgroup.c | 1 - include/linux/resctrl.h | 3 +++ 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index ccaa8f3e6d0e..c94c76508783 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -598,6 +597,12 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) d->hdr.id = id; d->hdr.type = RESCTRL_MON_DOMAIN; r->rdt_domain_list[id] = d; + d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!d->ci) { + pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name); + mon_domain_free(hw_dom); + return; + } cpumask_set_cpu(cpu, &d->hdr.cpu_mask); diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index ba1596afee10..d15e1e841dbc 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -11,7 +11,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include #include #include #include diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index b6969188db81..a822c125192a 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -12,7 +12,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include #include #include #include diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 6dbe96ae9672..472fcee19df5 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -2,6 +2,7 @@ #ifndef _RESCTRL_H #define _RESCTRL_H +#include #include #include #include @@ -128,6 +129,7 @@ struct rdt_ctrl_domain { /** * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource * @hdr: common header for different domain types + * @ci: cache info for this domain * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold * @mbm_total: saved state for MBM total bandwidth * @mbm_local: saved state for MBM local bandwidth @@ -139,6 +141,7 @@ struct rdt_ctrl_domain { */ struct rdt_mon_domain { struct rdt_domain_hdr hdr; + struct cacheinfo *ci; unsigned long *rmid_busy_llc; struct mbm_state *mbm_total; struct mbm_state *mbm_local; -- Gitee From db30ddb2f7b29684ce06483106e2c5b713e25751 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 5 Sep 2025 16:40:15 +0800 Subject: [PATCH 13/25] x86/resctrl: Add a new field to struct rmid_read for summation of domains ANBZ: #24559 commit fb1f51f677585f1b1ba17d2390963bbebe7a8cfa upstream. When a user reads a monitor file rdtgroup_mondata_show() calls mon_event_read() to package up all the required details into an rmid_read structure which is passed across the smp_call*() infrastructure to code that will read data from hardware and return the value (or error status) in the rmid_read structure. Sub-NUMA Cluster (SNC) mode adds files with new semantics. These require the smp_call-ed code to sum event data from all domains that share an L3 cache. Add a pointer to the L3 "cacheinfo" structure to struct rmid_read for the data collection routines to use to pick the domains to be summed. Intel-SIG: commit fb1f51f67758 x86/resctrl: Add a new field to struct rmid_read for summation of domains Backport SNC RDT support for Intel platforms. [ Reinette: the rmid_read structure has become complex enough so document each of its fields and provide the kerneldoc documentation for struct rmid_read. ] Co-developed-by: Reinette Chatre Signed-off-by: Reinette Chatre Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-10-tony.luck@intel.com [ Zhang Rui: resolve conflict (file relocated) and amend commit log ] Signed-off-by: Zhang Rui --- fs/resctrl/internal.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index d94f6ae91368..f72d1b035b6a 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -105,12 +105,31 @@ union mon_data_bits { } u; }; +/** + * struct rmid_read - Data passed across smp_call*() to read event count. + * @rgrp: Resource group for which the counter is being read. If it is a parent + * resource group then its event count is summed with the count from all + * its child resource groups. + * @r: Resource describing the properties of the event being read. + * @d: Domain that the counter should be read from. If NULL then sum all + * domains in @r sharing L3 @ci.id + * @evtid: Which monitor event to read. + * @first: Initialize MBM counter when true. + * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains. + * @err: Error encountered when reading counter. + * @val: Returned value of event counter. If @rgrp is a parent resource group, + * @val includes the sum of event counts from its child resource groups. + * If @d is NULL, @val includes the sum of all domains in @r sharing @ci.id, + * (summed across child resource groups if @rgrp is a parent resource group). + * @arch_mon_ctx: Hardware monitor allocated for this read request (MPAM only). + */ struct rmid_read { struct rdtgroup *rgrp; struct rdt_resource *r; struct rdt_mon_domain *d; enum resctrl_event_id evtid; bool first; + struct cacheinfo *ci; int err; u64 val; void *arch_mon_ctx; -- Gitee From 91f69dd42d263a47e7c27c055ee0047deaad3094 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 5 Sep 2025 17:33:10 +0800 Subject: [PATCH 14/25] x86/resctrl: Initialize on-stack struct rmid_read instances ANBZ: #24559 commit 587edd7069b9e7dc7993d2df9371e7c37a4d2133 upstream. New semantics rely on some struct rmid_read members having NULL values to distinguish between the SNC and non-SNC scenarios. resctrl can thus no longer rely on this struct not being initialized properly. Initialize all on-stack declarations of struct rmid_read: rdtgroup_mondata_show() mbm_update() mkdir_mondata_subdir() to ensure that garbage values from the stack are not passed down to other functions. Intel-SIG: 587edd7069b9 x86/resctrl: Initialize on-stack struct rmid_read instances Backport SNC RDT support for Intel platforms. [ bp: Massage commit message. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-11-tony.luck@intel.com [ Zhang Rui: resolve conflict (file relocated) and amend commit log ] Signed-off-by: Zhang Rui --- fs/resctrl/ctrlmondata.c | 3 +-- fs/resctrl/monitor.c | 3 +-- fs/resctrl/rdtgroup.c | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 8b4bbfd5b718..7c0b4ad30f27 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -464,7 +464,6 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, rr->evtid = evtid; rr->r = r; rr->d = d; - rr->val = 0; rr->first = first; rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid); if (IS_ERR(rr->arch_mon_ctx)) { @@ -492,12 +491,12 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) { struct kernfs_open_file *of = m->private; struct rdt_domain_hdr *hdr; + struct rmid_read rr = {0}; struct rdt_mon_domain *d; u32 resid, evtid, domid; struct rdtgroup *rdtgrp; struct rdt_resource *r; union mon_data_bits md; - struct rmid_read rr; int ret = 0, index; rdtgrp = rdtgroup_kn_lock_live(of->kn); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 24e5386c3f12..4eb449bb74ab 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -530,9 +530,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm) static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d, u32 closid, u32 rmid) { - struct rmid_read rr; + struct rmid_read rr = {0}; - rr.first = false; rr.r = r; rr.d = d; diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index a822c125192a..40463cb0744c 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -3558,10 +3558,10 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, struct rdt_mon_domain *d, struct rdt_resource *r, struct rdtgroup *prgrp) { + struct rmid_read rr = {0}; union mon_data_bits priv; struct kernfs_node *kn; struct mon_evt *mevt; - struct rmid_read rr; char name[32]; int ret; -- Gitee From 783589fbb68c0bd3d4e9b07f460ce99472b31312 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 5 Sep 2025 17:45:37 +0800 Subject: [PATCH 15/25] x86/resctrl: Refactor mkdir_mondata_subdir() with a helper function ANBZ: #24559 commit 603cf1e28838a01e4f140c3054ce147f8b087d08 upstream. In Sub-NUMA Cluster (SNC) mode Linux must create the monitor files in the original "mon_L3_XX" directories and also in each of the "mon_sub_L3_YY" directories. Refactor mkdir_mondata_subdir() to move the creation of monitoring files into a helper function to avoid the need to duplicate code later. No functional change. Intel-SIG: 603cf1e28838 x86/resctrl: Refactor mkdir_mondata_subdir() with a helper function Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-12-tony.luck@intel.com [ Zhang Rui: resolve conflict (file relocated) and amend commit log ] Signed-off-by: Zhang Rui --- fs/resctrl/rdtgroup.c | 45 +++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 40463cb0744c..3bc62241ec6e 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -3554,14 +3554,37 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, } } +static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, + struct rdt_resource *r, struct rdtgroup *prgrp) +{ + struct rmid_read rr = {0}; + union mon_data_bits priv; + struct mon_evt *mevt; + int ret; + + if (WARN_ON(list_empty(&r->mon.evt_list))) + return -EPERM; + + priv.u.rid = r->rid; + priv.u.domid = d->hdr.id; + list_for_each_entry(mevt, &r->mon.evt_list, list) { + priv.u.evtid = mevt->evtid; + ret = mon_addfile(kn, mevt->name, priv.priv); + if (ret) + return ret; + + if (resctrl_is_mbm_event(mevt->evtid)) + mon_event_read(&rr, r, d, prgrp, mevt->evtid, true); + } + + return 0; +} + static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, struct rdt_mon_domain *d, struct rdt_resource *r, struct rdtgroup *prgrp) { - struct rmid_read rr = {0}; - union mon_data_bits priv; struct kernfs_node *kn; - struct mon_evt *mevt; char name[32]; int ret; @@ -3575,22 +3598,10 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, if (ret) goto out_destroy; - if (WARN_ON(list_empty(&r->mon.evt_list))) { - ret = -EPERM; + ret = mon_add_all_files(kn, d, r, prgrp); + if (ret) goto out_destroy; - } - priv.u.rid = r->rid; - priv.u.domid = d->hdr.id; - list_for_each_entry(mevt, &r->mon.evt_list, list) { - priv.u.evtid = mevt->evtid; - ret = mon_addfile(kn, mevt->name, priv.priv); - if (ret) - goto out_destroy; - - if (resctrl_is_mbm_event(mevt->evtid)) - mon_event_read(&rr, r, d, prgrp, mevt->evtid, true); - } kernfs_activate(kn); return 0; -- Gitee From 40a2939d7500c16ea3fb80e6e13c7b72159eefc3 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 5 Sep 2025 17:48:56 +0800 Subject: [PATCH 16/25] x86/resctrl: Allocate a new field in union mon_data_bits ANBZ: #24559 commit 92b5d0b1189ea9e9f00ae493fc99102fe7f2442f upstream. When Sub-NUMA Cluster (SNC) mode is enabled, the legacy monitor reporting files must report the sum of the data from all of the SNC nodes that share the L3 cache that is referenced by the monitor file. Resctrl squeezes all the attributes of these files into 32 bits so they can be stored in the "priv" field of struct kernfs_node. Currently, only three monitor events are defined by enum resctrl_event_id so reducing it from 8 bits to 7 bits still provides more than enough space to represent all the known event types. But note that this choice was arbitrary. The "rid" field is also far wider than needed for the current number of resource id types. This structure is purely internal to resctrl, no ABI issues with modifying it. Subsequent changes may rearrange the allocation of bits between each of the fields as needed. Give the bit to a new "sum" field that indicates that reading this file must sum across SNC nodes. This bit also indicates that the domid field is the id of an L3 cache (instead of a domain id) to find which domains must be summed. Fix up other issues in the kerneldoc description for mon_data_bits. Intel-SIG: 92b5d0b1189e x86/resctrl: Allocate a new field in union mon_data_bits Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-13-tony.luck@intel.com [ Zhang Rui: resolve conflict (file relocated) and amend commit log ] Signed-off-by: Zhang Rui --- fs/resctrl/internal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index f72d1b035b6a..7d58d3ddf119 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -100,7 +100,8 @@ union mon_data_bits { void *priv; struct { unsigned int rid : 10; - enum resctrl_event_id evtid : 8; + enum resctrl_event_id evtid : 7; + unsigned int sum : 1; unsigned int domid : 14; } u; }; -- Gitee From 0d0725e501ebb64a77ee857eef5b7822a7a1593d Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 5 Sep 2025 18:10:04 +0800 Subject: [PATCH 17/25] x86/resctrl: Create Sub-NUMA Cluster (SNC) monitor files ANBZ: #24559 commit 0158ed6a1335ff37f0336a986d7b99d6e97d46e9 upstream. When SNC mode is enabled, create subdirectories and files to monitor at the SNC node granularity. Legacy behavior is preserved by tagging the monitor files at the L3 granularity with the "sum" attribute. When the user reads these files the kernel will read monitor data from all SNC nodes that share the same L3 cache instance and return the aggregated value to the user. Note that the "domid" field for files that must sum across SNC domains has the L3 cache instance id, while non-summing files use the domain id. The "sum" files do not need to make a call to mon_event_read() to initialize the MBM counters. This will be handled by initializing the individual SNC nodes that share the L3. Intel-SIG: 0158ed6a1335 x86/resctrl: Create Sub-NUMA Cluster (SNC) monitor files Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-14-tony.luck@intel.com [ Zhang Rui: resolve conflict (file relocated) and amend commit log ] Signed-off-by: Zhang Rui --- fs/resctrl/rdtgroup.c | 62 ++++++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 3bc62241ec6e..e390a4cc2547 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -3555,7 +3555,8 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, } static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, - struct rdt_resource *r, struct rdtgroup *prgrp) + struct rdt_resource *r, struct rdtgroup *prgrp, + bool do_sum) { struct rmid_read rr = {0}; union mon_data_bits priv; @@ -3566,14 +3567,15 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, return -EPERM; priv.u.rid = r->rid; - priv.u.domid = d->hdr.id; + priv.u.domid = do_sum ? d->ci->id : d->hdr.id; + priv.u.sum = do_sum; list_for_each_entry(mevt, &r->mon.evt_list, list) { priv.u.evtid = mevt->evtid; ret = mon_addfile(kn, mevt->name, priv.priv); if (ret) return ret; - if (resctrl_is_mbm_event(mevt->evtid)) + if (!do_sum && resctrl_is_mbm_event(mevt->evtid)) mon_event_read(&rr, r, d, prgrp, mevt->evtid, true); } @@ -3584,23 +3586,51 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, struct rdt_mon_domain *d, struct rdt_resource *r, struct rdtgroup *prgrp) { - struct kernfs_node *kn; + struct kernfs_node *kn, *ckn; char name[32]; - int ret; + bool snc_mode; + int ret = 0; - sprintf(name, "mon_%s_%02d", r->name, d->hdr.id); - /* create the directory */ - kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); - if (IS_ERR(kn)) - return PTR_ERR(kn); + lockdep_assert_held(&rdtgroup_mutex); - ret = rdtgroup_kn_set_ugid(kn); - if (ret) - goto out_destroy; + snc_mode = r->mon_scope == RESCTRL_L3_NODE; + sprintf(name, "mon_%s_%02lu", r->name, snc_mode ? d->ci->id : d->hdr.id); + kn = kernfs_find_and_get(parent_kn, name); + if (kn) { + /* + * rdtgroup_mutex will prevent this directory from being + * removed. No need to keep this hold. + */ + kernfs_put(kn); + } else { + kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); + if (IS_ERR(kn)) + return PTR_ERR(kn); - ret = mon_add_all_files(kn, d, r, prgrp); - if (ret) - goto out_destroy; + ret = rdtgroup_kn_set_ugid(kn); + if (ret) + goto out_destroy; + ret = mon_add_all_files(kn, d, r, prgrp, snc_mode); + if (ret) + goto out_destroy; + } + + if (snc_mode) { + sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id); + ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp); + if (IS_ERR(ckn)) { + ret = -EINVAL; + goto out_destroy; + } + + ret = rdtgroup_kn_set_ugid(ckn); + if (ret) + goto out_destroy; + + ret = mon_add_all_files(ckn, d, r, prgrp, false); + if (ret) + goto out_destroy; + } kernfs_activate(kn); return 0; -- Gitee From 2f89315fa991a9d60cee9f2b244b76a2a1bacc1a Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 5 Sep 2025 18:16:12 +0800 Subject: [PATCH 18/25] x86/resctrl: Handle removing directories in Sub-NUMA Cluster (SNC) mode ANBZ: #24559 commit 6b48b80b08e6f08eea8eaf7e44555ada191b6bee upstream. In SNC mode, there are multiple subdirectories in each L3 level monitor directory (one for each SNC node). If all the CPUs in an SNC node are taken offline, just remove the SNC directory for that node. In non-SNC mode, or when the last SNC node directory is removed, remove the L3 monitor directory. Add a helper function to avoid duplicated code. Intel-SIG: 6b48b80b08e6 x86/resctrl: Handle removing directories in Sub-NUMA Cluster (SNC) mode Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/r/20240702173820.90368-2-tony.luck@intel.com [ Zhang Rui: resolve conflict (file relocated) and amend commit log ] Signed-off-by: Zhang Rui --- fs/resctrl/rdtgroup.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index e390a4cc2547..940a83e390ad 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -3535,22 +3535,45 @@ static int mon_addfile(struct kernfs_node *parent_kn, const char *name, return ret; } +static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subname) +{ + struct kernfs_node *kn; + + kn = kernfs_find_and_get(pkn, name); + if (!kn) + return; + kernfs_put(kn); + + if (kn->dir.subdirs <= 1) + kernfs_remove(kn); + else + kernfs_remove_by_name(kn, subname); +} + /* * Remove all subdirectories of mon_data of ctrl_mon groups - * and monitor groups with given domain id. + * and monitor groups for the given domain. + * Remove files and directories containing "sum" of domain data + * when last domain being summed is removed. */ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, - unsigned int dom_id) + struct rdt_mon_domain *d) { struct rdtgroup *prgrp, *crgrp; + char subname[32]; + bool snc_mode; char name[32]; + snc_mode = r->mon_scope == RESCTRL_L3_NODE; + sprintf(name, "mon_%s_%02lu", r->name, snc_mode ? d->ci->id : d->hdr.id); + if (snc_mode) + sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id); + list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { - sprintf(name, "mon_%s_%02d", r->name, dom_id); - kernfs_remove_by_name(prgrp->mon.mon_data_kn, name); + mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname); list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list) - kernfs_remove_by_name(crgrp->mon.mon_data_kn, name); + mon_rmdir_one_subdir(crgrp->mon.mon_data_kn, name, subname); } } @@ -4550,7 +4573,7 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d * per domain monitor data directories. */ if (resctrl_mounted && resctrl_arch_mon_capable()) - rmdir_mondata_subdir_allrdtgrp(r, d->hdr.id); + rmdir_mondata_subdir_allrdtgrp(r, d); if (resctrl_is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); -- Gitee From 09f16e69a1e811fcb832d8c0560da10201d5f9b2 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 5 Sep 2025 18:25:18 +0800 Subject: [PATCH 19/25] x86/resctrl: Fill out rmid_read structure for smp_call*() to read a counter ANBZ: #24559 commit c8c7d3d904b76c45fe2b5dc982fb5090d12a63af upstream. mon_event_read() fills out most fields of the struct rmid_read that is passed via an smp_call*() function to a CPU that is part of the correct domain to read the monitor counters. With Sub-NUMA Cluster (SNC) mode there are now two cases to handle: 1) Reading a file that returns a value for a single domain. + Choose the CPU to execute from the domain cpu_mask 2) Reading a file that must sum across domains sharing an L3 cache instance. + Indicate to called code that a sum is needed by passing a NULL rdt_mon_domain pointer. + Choose the CPU from the L3 shared_cpu_map. Intel-SIG: c8c7d3d904b7 x86/resctrl: Fill out rmid_read structure for smp_call*() to read a counter Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-16-tony.luck@intel.com [ Zhang Rui: resolve conflict (file relocated) and amend commit log ] Signed-off-by: Zhang Rui --- fs/resctrl/ctrlmondata.c | 39 ++++++++++++++++++++++++++++++--------- fs/resctrl/internal.h | 2 +- fs/resctrl/rdtgroup.c | 2 +- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 7c0b4ad30f27..dd500e0470b9 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -450,7 +450,7 @@ static int smp_mon_event_count(void *arg) void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, - int evtid, int first) + cpumask_t *cpumask, int evtid, int first) { int cpu; @@ -471,7 +471,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, return; } - cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask, RESCTRL_PICK_ANY_CPU); + cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU); /* * cpumask_any_housekeeping() prefers housekeeping CPUs, but @@ -480,7 +480,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, * counters on some platforms if its called in IRQ context. */ if (tick_nohz_full_cpu(cpu)) - smp_call_function_any(&d->hdr.cpu_mask, mon_event_count, rr, 1); + smp_call_function_any(cpumask, mon_event_count, rr, 1); else smp_call_on_cpu(cpu, smp_mon_event_count, rr, false); @@ -511,14 +511,37 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) evtid = md.u.evtid; r = resctrl_arch_get_resource(resid); - hdr = rdt_find_domain(&r->mon_domains, domid, NULL); - if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) { + if (md.u.sum) { + /* + * This file requires summing across all domains that share + * the L3 cache id that was provided in the "domid" field of the + * mon_data_bits union. Search all domains in the resource for + * one that matches this cache id. + */ + list_for_each_entry(d, &r->mon_domains, hdr.list) { + if (d->ci->id == domid) { + rr.ci = d->ci; + mon_event_read(&rr, r, NULL, rdtgrp, + &d->ci->shared_cpu_map, evtid, false); + goto checkresult; + } + } ret = -ENOENT; goto out; + } else { + /* + * This file provides data from a single domain. Search + * the resource to find the domain with "domid". + */ + hdr = rdt_find_domain(&r->mon_domains, domid, NULL); + if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) { + ret = -ENOENT; + goto out; + } + d = container_of(hdr, struct rdt_mon_domain, hdr); + mon_event_read(&rr, r, d, rdtgrp, &d->hdr.cpu_mask, evtid, false); } - d = container_of(hdr, struct rdt_mon_domain, hdr); - if (resctrl_arch_get_abmc_enabled() && evtid != QOS_L3_OCCUP_EVENT_ID) { index = mon_event_config_index_get(evtid); if (index != INVALID_CONFIG_INDEX && @@ -528,8 +551,6 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) } } - mon_event_read(&rr, r, d, rdtgrp, evtid, false); - checkresult: if (rr.err == -EIO) seq_puts(m, "Error\n"); diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 7d58d3ddf119..0d672ffb1f2a 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -317,7 +317,7 @@ void mon_event_count(void *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, - int evtid, int first); + cpumask_t *cpumask, int evtid, int first); int resctrl_mon_resource_init(void); void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 940a83e390ad..77fc43eb4935 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -3599,7 +3599,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, return ret; if (!do_sum && resctrl_is_mbm_event(mevt->evtid)) - mon_event_read(&rr, r, d, prgrp, mevt->evtid, true); + mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true); } return 0; -- Gitee From d098c6246387bad9d59565a55adc2f30755d520a Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Sun, 7 Sep 2025 00:56:10 +0800 Subject: [PATCH 20/25] x86/resctrl: Make __mon_event_count() handle sum domains ANBZ: #24559 commit 9fbb303ec949a376f3cbdf6a2b66ad2212c24ebc upstream. Legacy resctrl monitor files must provide the sum of event values across all Sub-NUMA Cluster (SNC) domains that share an L3 cache instance. There are now two cases: 1) A specific domain is provided in struct rmid_read This is either a non-SNC system, or the request is to read data from just one SNC node. 2) Domain pointer is NULL. In this case the cacheinfo field in struct rmid_read indicates that all SNC nodes that share that L3 cache instance should have the event read and return the sum of all values. Update the CPU sanity check. The existing check that an event is read from a CPU in the requested domain still applies when reading a single domain. But when summing across domains a more relaxed check that the current CPU is in the scope of the L3 cache instance is appropriate since the MSRs to read events are scoped at L3 cache level. Intel-SIG: 9fbb303ec949 x86/resctrl: Make __mon_event_count() handle sum domains Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-17-tony.luck@intel.com [ Zhang Rui: resolve conflict (file relocated) and amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/cpu/resctrl/monitor.c | 3 -- fs/resctrl/monitor.c | 49 +++++++++++++++++++++++---- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 168aadf5fe85..67e8abb9cc5a 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -240,9 +240,6 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, resctrl_arch_rmid_read_context_check(); - if (!cpumask_test_cpu(smp_processor_id(), &d->hdr.cpu_mask)) - return -EINVAL; - prmid = logical_rmid_to_physical_rmid(cpu, rmid); ret = __rmid_read_phys(prmid, eventid, &msr_val); if (ret) diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 4eb449bb74ab..da1ae8519bb4 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -342,7 +342,10 @@ static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid, static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) { + int cpu = smp_processor_id(); + struct rdt_mon_domain *d; struct mbm_state *m; + int err, ret; u64 tval = 0; if (rr->first) { @@ -353,14 +356,48 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) return 0; } - rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, rr->evtid, - &tval, rr->arch_mon_ctx); - if (rr->err) - return rr->err; + if (rr->d) { + /* Reading a single domain, must be on a CPU in that domain. */ + if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask)) + return -EINVAL; + rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, + rr->evtid, &tval, rr->arch_mon_ctx); - rr->val += tval; + if (rr->err) + return rr->err; - return 0; + rr->val += tval; + + return 0; + } + + /* Summing domains that share a cache, must be on a CPU for that cache. */ + if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map)) + return -EINVAL; + + /* + * Legacy files must report the sum of an event across all + * domains that share the same L3 cache instance. + * Report success if a read from any domain succeeds, -EINVAL + * (translated to "Unavailable" for user space) if reading from + * all domains fail for any reason. + */ + ret = -EINVAL; + list_for_each_entry(d, &rr->r->mon_domains, hdr.list) { + if (d->ci->id != rr->ci->id) + continue; + err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, + rr->evtid, &tval, rr->arch_mon_ctx); + if (!err) { + rr->val += tval; + ret = 0; + } + } + + if (ret) + rr->err = ret; + + return ret; } /* -- Gitee From eaac4459d256f9d76672fdd76229d42ea01b98eb Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Sun, 7 Sep 2025 01:00:46 +0800 Subject: [PATCH 21/25] x86/resctrl: Enable shared RMID mode on Sub-NUMA Cluster (SNC) systems ANBZ: #24559 commit 21b362cc762aabb3e8496d33d7b4538154c95a0b upstream. Hardware has two RMID configuration options for SNC systems. The default mode divides RMID counters between SNC nodes. E.g. with 200 RMIDs and two SNC nodes per L3 cache RMIDs 0..99 are used on node 0, and 100..199 on node 1. This isn't compatible with Linux resctrl usage. On this example system a process using RMID 5 would only update monitor counters while running on SNC node 0. The other mode is "RMID Sharing Mode". This is enabled by clearing bit 0 of the RMID_SNC_CONFIG (0xCA0) model specific register. In this mode the number of logical RMIDs is the number of physical RMIDs (from CPUID leaf 0xF) divided by the number of SNC nodes per L3 cache instance. A process can use the same RMID across different SNC nodes. See the "Intel Resource Director Technology Architecture Specification" for additional details. When SNC is enabled, update the MSR when a monitor domain is marked online. Technically this is overkill. It only needs to be done once per L3 cache instance rather than per SNC domain. But there is no harm in doing it more than once, and this is not in a critical path. Intel-SIG: 21b362cc762a x86/resctrl: Enable shared RMID mode on Sub-NUMA Cluster (SNC) systems Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/r/20240702173820.90368-3-tony.luck@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/resctrl/core.c | 2 ++ arch/x86/kernel/cpu/resctrl/internal.h | 2 ++ arch/x86/kernel/cpu/resctrl/monitor.c | 20 ++++++++++++++++++++ 4 files changed, 25 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a7d700fcf0c0..41d0de2b7bdc 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1171,6 +1171,7 @@ #define MSR_IA32_QM_CTR 0xc8e #define MSR_IA32_PQR_ASSOC 0xc8f #define MSR_IA32_L3_CBM_BASE 0xc90 +#define MSR_RMID_SNC_CONFIG 0xca0 #define MSR_IA32_L2_CBM_BASE 0xd10 #define MSR_IA32_MBA_THRTL_BASE 0xd50 diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index c94c76508783..e01909b73506 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -611,6 +611,8 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) resctrl_mbm_evt_config_init(hw_dom); resctrl_arch_mbm_cntr_assign_configure(); + arch_mon_domain_online(r, d); + if (r->mon_capable && arch_domain_mbm_alloc(r->mon.num_rmid, hw_dom)) { mon_domain_free(hw_dom); return; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index ae1dc7b5fe29..68b7a9f60b6e 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -209,6 +209,8 @@ union cpuid_0x10_x_edx { unsigned int full; }; +void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d); + /* * ABMC counters can be configured by writing to L3_QOS_ABMC_CFG. * @bw_type : Bandwidth configuration(supported by BMEC) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 67e8abb9cc5a..ce2059dfa2bb 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -260,6 +260,26 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, return 0; } +/* + * The power-on reset value of MSR_RMID_SNC_CONFIG is 0x1 + * which indicates that RMIDs are configured in legacy mode. + * This mode is incompatible with Linux resctrl semantics + * as RMIDs are partitioned between SNC nodes, which requires + * a user to know which RMID is allocated to a task. + * Clearing bit 0 reconfigures the RMID counters for use + * in RMID sharing mode. This mode is better for Linux. + * The RMID space is divided between all SNC nodes with the + * RMIDs renumbered to start from zero in each node when + * counting operations from tasks. Code to read the counters + * must adjust RMID counter numbers based on SNC node. See + * logical_rmid_to_physical_rmid() for code that does this. + */ +void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d) +{ + if (snc_nodes_per_l3_cache > 1) + msr_clear_bit(MSR_RMID_SNC_CONFIG, 0); +} + int __init rdt_get_mon_l3_config(struct rdt_resource *r) { unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; -- Gitee From 01b999b9d294ea9ee607cbe48a2784589a1aa8ce Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Sun, 7 Sep 2025 01:02:31 +0800 Subject: [PATCH 22/25] x86/resctrl: Detect Sub-NUMA Cluster (SNC) mode ANBZ: #24559 commit 13488150f5e2a9b84a335ae18bee33a918ead85d upstream. There isn't a simple hardware bit that indicates whether a CPU is running in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing the number of CPUs sharing the L3 cache with CPU0 to the number of CPUs in the same NUMA node as CPU0. Add the missing definition of pr_fmt() to monitor.c. This wasn't noticed before as there are only "can't happen" console messages from this file. Intel-SIG: 13488150f5e2 x86/resctrl: Detect Sub-NUMA Cluster (SNC) mode Backport SNC RDT support for Intel platforms. [ bp: Massage commit message. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-19-tony.luck@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/cpu/resctrl/monitor.c | 66 +++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index ce2059dfa2bb..d4cb47b03959 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -15,6 +15,8 @@ * Software Developer Manual June 2016, volume 3, section 17.17. */ +#define pr_fmt(fmt) "resctrl: " fmt + #include #include #include @@ -280,6 +282,68 @@ void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d) msr_clear_bit(MSR_RMID_SNC_CONFIG, 0); } +/* CPU models that support MSR_RMID_SNC_CONFIG */ +static const struct x86_cpu_id snc_cpu_ids[] __initconst = { + X86_MATCH_VFM(INTEL_ICELAKE_X, 0), + X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, 0), + X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, 0), + X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, 0), + X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, 0), + {} +}; + +/* + * There isn't a simple hardware bit that indicates whether a CPU is running + * in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing the + * number of CPUs sharing the L3 cache with CPU0 to the number of CPUs in + * the same NUMA node as CPU0. + * It is not possible to accurately determine SNC state if the system is + * booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes + * to L3 caches. It will be OK if system is booted with hyperthreading + * disabled (since this doesn't affect the ratio). + */ +static __init int snc_get_config(void) +{ + struct cacheinfo *ci = get_cpu_cacheinfo_level(0, RESCTRL_L3_CACHE); + const cpumask_t *node0_cpumask; + int cpus_per_node, cpus_per_l3; + int ret; + + if (!x86_match_cpu(snc_cpu_ids) || !ci) + return 1; + + cpus_read_lock(); + if (num_online_cpus() != num_present_cpus()) + pr_warn("Some CPUs offline, SNC detection may be incorrect\n"); + cpus_read_unlock(); + + node0_cpumask = cpumask_of_node(cpu_to_node(0)); + + cpus_per_node = cpumask_weight(node0_cpumask); + cpus_per_l3 = cpumask_weight(&ci->shared_cpu_map); + + if (!cpus_per_node || !cpus_per_l3) + return 1; + + ret = cpus_per_l3 / cpus_per_node; + + /* sanity check: Only valid results are 1, 2, 3, 4 */ + switch (ret) { + case 1: + break; + case 2 ... 4: + pr_info("Sub-NUMA Cluster mode detected with %d nodes per L3 cache\n", ret); + rdt_resources_all[RDT_RESOURCE_L3].r_resctrl.mon_scope = RESCTRL_L3_NODE; + break; + default: + pr_warn("Ignore improbable SNC node count %d\n", ret); + ret = 1; + break; + } + + return ret; +} + int __init rdt_get_mon_l3_config(struct rdt_resource *r) { unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; @@ -287,6 +351,8 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) unsigned int threshold; u32 eax, ebx, ecx, edx; + snc_nodes_per_l3_cache = snc_get_config(); + resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024; hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale / snc_nodes_per_l3_cache; r->mon.num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache; -- Gitee From e180446fff00d85cdefecf0a4d3930ffaf1bfe84 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 28 Jun 2024 14:56:19 -0700 Subject: [PATCH 23/25] x86/resctrl: Update documentation with Sub-NUMA cluster changes ANBZ: #24559 commit ea34999f41873c96ac89e861e5fdfc7d0403f9e3 upstream. With Sub-NUMA Cluster (SNC) mode enabled, the scope of monitoring resources is per-NODE instead of per-L3 cache. Backwards compatibility is maintained by providing files in the mon_L3_XX directories that sum event counts for all SNC nodes sharing an L3 cache. New files provide per-SNC node event counts. Users should be aware that SNC mode also affects the amount of L3 cache available for allocation within each SNC node. Intel-SIG: ea34999f4187 x86/resctrl: Update documentation with Sub-NUMA cluster changes Backport SNC RDT support for Intel platforms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-20-tony.luck@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- Documentation/arch/x86/resctrl.rst | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst index 11c827931f1b..b481c5e2e90f 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -575,6 +575,10 @@ When monitoring is enabled all MON groups will also contain: all tasks in the group. In CTRL_MON groups these files provide the sum for all tasks in the CTRL_MON group and all tasks in MON groups. Please see example section for more details on usage. + On systems with Sub-NUMA Cluster (SNC) enabled there are extra + directories for each node (located within the "mon_L3_XX" directory + for the L3 cache they occupy). These are named "mon_sub_L3_YY" + where "YY" is the node number. "mon_hw_id": Available only with debug option. The identifier used by hardware @@ -678,6 +682,29 @@ if non-contiguous 1s value is supported. On a system with a 20-bit mask each bit represents 5% of the capacity of the cache. You could partition the cache into four equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000. +Notes on Sub-NUMA Cluster mode +============================== +When SNC mode is enabled, Linux may load balance tasks between Sub-NUMA +nodes much more readily than between regular NUMA nodes since the CPUs +on Sub-NUMA nodes share the same L3 cache and the system may report +the NUMA distance between Sub-NUMA nodes with a lower value than used +for regular NUMA nodes. + +The top-level monitoring files in each "mon_L3_XX" directory provide +the sum of data across all SNC nodes sharing an L3 cache instance. +Users who bind tasks to the CPUs of a specific Sub-NUMA node can read +the "llc_occupancy", "mbm_total_bytes", and "mbm_local_bytes" in the +"mon_sub_L3_YY" directories to get node local data. + +Memory bandwidth allocation is still performed at the L3 cache +level. I.e. throttling controls are applied to all SNC nodes. + +L3 cache allocation bitmaps also apply to all SNC nodes. But note that +the amount of L3 cache represented by each bit is divided by the number +of SNC nodes per L3 cache. E.g. with a 100MB cache on a system with 10-bit +allocation masks each bit normally represents 10MB. With SNC mode enabled +with two SNC nodes per L3 cache, each bit only represents 5MB. + Memory bandwidth Allocation and monitoring ========================================== -- Gitee From 244725468d2067e9c82fbe5c3943f8af45d56013 Mon Sep 17 00:00:00 2001 From: Qinyun Tan Date: Sat, 6 Sep 2025 19:57:56 -0700 Subject: [PATCH 24/25] x86,fs/resctrl: Remove inappropriate references to cacheinfo in the resctrl subsystem ANBZ: #24559 commit 594902c986e269660302f09df9ec4bf1cf017b77 upstream. In the resctrl subsystem's Sub-NUMA Cluster (SNC) mode, the rdt_mon_domain structure representing a NUMA node relies on the cacheinfo interface (rdt_mon_domain::ci) to store L3 cache information (e.g., shared_cpu_map) for monitoring. The L3 cache information of a SNC NUMA node determines which domains are summed for the "top level" L3-scoped events. rdt_mon_domain::ci is initialized using the first online CPU of a NUMA node. When this CPU goes offline, its shared_cpu_map is cleared to contain only the offline CPU itself. Subsequently, attempting to read counters via smp_call_on_cpu(offline_cpu) fails (and error ignored), returning zero values for "top-level events" without any error indication. Replace the cacheinfo references in struct rdt_mon_domain and struct rmid_read with the cacheinfo ID (a unique identifier for the L3 cache). rdt_domain_hdr::cpu_mask contains the online CPUs associated with that domain. When reading "top-level events", select a CPU from rdt_domain_hdr::cpu_mask and utilize its L3 shared_cpu_map to determine valid CPUs for reading RMID counter via the MSR interface. Considering all CPUs associated with the L3 cache improves the chances of picking a housekeeping CPU on which the counter reading work can be queued, avoiding an unnecessary IPI. Intel-SIG: 594902c986e2 x86,fs/resctrl: Remove inappropriate references to cacheinfo in the resctrl subsystem Backport SNC RDT support for Intel platforms. Fixes: 328ea68874642 ("x86/resctrl: Prepare for new Sub-NUMA Cluster (SNC) monitor files") Signed-off-by: Qinyun Tan Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Tony Luck Link: https://lore.kernel.org/20250530182053.37502-2-qinyuntan@linux.alibaba.com [ Zhang Rui: resolve conflict (file relocated) and amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/cpu/resctrl/core.c | 6 ++++-- fs/resctrl/ctrlmondata.c | 13 +++++++++---- fs/resctrl/internal.h | 4 ++-- fs/resctrl/monitor.c | 6 ++++-- fs/resctrl/rdtgroup.c | 6 +++--- include/linux/resctrl.h | 4 ++-- 6 files changed, 24 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index e01909b73506..17fe98195c56 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -565,6 +565,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) struct rdt_hw_mon_domain *hw_dom; struct rdt_domain_hdr *hdr; struct rdt_mon_domain *d; + struct cacheinfo *ci; int err; BUG_ON(id > NR_CPUS); @@ -597,13 +598,14 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) d->hdr.id = id; d->hdr.type = RESCTRL_MON_DOMAIN; r->rdt_domain_list[id] = d; - d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); - if (!d->ci) { + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci) { pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name); mon_domain_free(hw_dom); return; } + d->ci_id = ci->id; cpumask_set_cpu(cpu, &d->hdr.cpu_mask); rdt_domain_reconfigure_cdp(r); diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index dd500e0470b9..9eaa170f855b 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -497,7 +497,8 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) struct rdtgroup *rdtgrp; struct rdt_resource *r; union mon_data_bits md; - int ret = 0, index; + int ret = 0, index, cpu; + struct cacheinfo *ci; rdtgrp = rdtgroup_kn_lock_live(of->kn); if (!rdtgrp) { @@ -519,10 +520,14 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) * one that matches this cache id. */ list_for_each_entry(d, &r->mon_domains, hdr.list) { - if (d->ci->id == domid) { - rr.ci = d->ci; + if (d->ci_id == domid) { + rr.ci_id = d->ci_id; + cpu = cpumask_any(&d->hdr.cpu_mask); + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci) + continue; mon_event_read(&rr, r, NULL, rdtgrp, - &d->ci->shared_cpu_map, evtid, false); + &ci->shared_cpu_map, evtid, false); goto checkresult; } } diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 0d672ffb1f2a..89cfe8c78666 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -116,7 +116,7 @@ union mon_data_bits { * domains in @r sharing L3 @ci.id * @evtid: Which monitor event to read. * @first: Initialize MBM counter when true. - * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains. + * @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains. * @err: Error encountered when reading counter. * @val: Returned value of event counter. If @rgrp is a parent resource group, * @val includes the sum of event counts from its child resource groups. @@ -130,7 +130,7 @@ struct rmid_read { struct rdt_mon_domain *d; enum resctrl_event_id evtid; bool first; - struct cacheinfo *ci; + unsigned long ci_id; int err; u64 val; void *arch_mon_ctx; diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index da1ae8519bb4..55d37e8f1807 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -344,6 +344,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) { int cpu = smp_processor_id(); struct rdt_mon_domain *d; + struct cacheinfo *ci; struct mbm_state *m; int err, ret; u64 tval = 0; @@ -372,7 +373,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) } /* Summing domains that share a cache, must be on a CPU for that cache. */ - if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map)) + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci || ci->id != rr->ci_id) return -EINVAL; /* @@ -384,7 +386,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) */ ret = -EINVAL; list_for_each_entry(d, &rr->r->mon_domains, hdr.list) { - if (d->ci->id != rr->ci->id) + if (d->ci_id != rr->ci_id) continue; err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, rr->evtid, &tval, rr->arch_mon_ctx); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 77fc43eb4935..c81d1966eb06 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -3565,7 +3565,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, char name[32]; snc_mode = r->mon_scope == RESCTRL_L3_NODE; - sprintf(name, "mon_%s_%02lu", r->name, snc_mode ? d->ci->id : d->hdr.id); + sprintf(name, "mon_%s_%02lu", r->name, snc_mode ? d->ci_id : d->hdr.id); if (snc_mode) sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id); @@ -3590,7 +3590,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, return -EPERM; priv.u.rid = r->rid; - priv.u.domid = do_sum ? d->ci->id : d->hdr.id; + priv.u.domid = do_sum ? d->ci_id : d->hdr.id; priv.u.sum = do_sum; list_for_each_entry(mevt, &r->mon.evt_list, list) { priv.u.evtid = mevt->evtid; @@ -3617,7 +3617,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, lockdep_assert_held(&rdtgroup_mutex); snc_mode = r->mon_scope == RESCTRL_L3_NODE; - sprintf(name, "mon_%s_%02lu", r->name, snc_mode ? d->ci->id : d->hdr.id); + sprintf(name, "mon_%s_%02lu", r->name, snc_mode ? d->ci_id : d->hdr.id); kn = kernfs_find_and_get(parent_kn, name); if (kn) { /* diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 472fcee19df5..f730983a7a95 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -129,7 +129,7 @@ struct rdt_ctrl_domain { /** * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource * @hdr: common header for different domain types - * @ci: cache info for this domain + * @ci_id: cache info id for this domain * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold * @mbm_total: saved state for MBM total bandwidth * @mbm_local: saved state for MBM local bandwidth @@ -141,7 +141,7 @@ struct rdt_ctrl_domain { */ struct rdt_mon_domain { struct rdt_domain_hdr hdr; - struct cacheinfo *ci; + unsigned long ci_id; unsigned long *rmid_busy_llc; struct mbm_state *mbm_total; struct mbm_state *mbm_local; -- Gitee From 443d7ee63a13180e69479cd694f03a167a87da2e Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 26 Sep 2025 12:17:22 +0800 Subject: [PATCH 25/25] x86/resctrl: Support Sub-NUMA Cluster (SNC) mode on Clearwater Forest ANBZ: #24559 commit a0a0999507752574b80d7fbd179cce052c92791b upstream. Clearwater Forest supports SNC mode. Add it to the snc_cpu_ids[] table. Intel-SIG: commit a0a099950775 x86/resctrl: Support Sub-NUMA Cluster (SNC) mode on Clearwater Forest Backport SNC RDT support for Intel platforms. Signed-off-by: Chen Yu Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Acked-by: Tony Luck [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- arch/x86/kernel/cpu/resctrl/monitor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index d4cb47b03959..789c7dbfdd2d 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -289,6 +289,7 @@ static const struct x86_cpu_id snc_cpu_ids[] __initconst = { X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, 0), X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, 0), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, 0), + X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, 0), {} }; -- Gitee