diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst index 11c827931f1b638194a827b88429c257b157851b..b481c5e2e90ff183dcf3e2ad14e81dfdc595c38f 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -575,6 +575,10 @@ When monitoring is enabled all MON groups will also contain: all tasks in the group. In CTRL_MON groups these files provide the sum for all tasks in the CTRL_MON group and all tasks in MON groups. Please see example section for more details on usage. + On systems with Sub-NUMA Cluster (SNC) enabled there are extra + directories for each node (located within the "mon_L3_XX" directory + for the L3 cache they occupy). These are named "mon_sub_L3_YY" + where "YY" is the node number. "mon_hw_id": Available only with debug option. The identifier used by hardware @@ -678,6 +682,29 @@ if non-contiguous 1s value is supported. On a system with a 20-bit mask each bit represents 5% of the capacity of the cache. You could partition the cache into four equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000. +Notes on Sub-NUMA Cluster mode +============================== +When SNC mode is enabled, Linux may load balance tasks between Sub-NUMA +nodes much more readily than between regular NUMA nodes since the CPUs +on Sub-NUMA nodes share the same L3 cache and the system may report +the NUMA distance between Sub-NUMA nodes with a lower value than used +for regular NUMA nodes. + +The top-level monitoring files in each "mon_L3_XX" directory provide +the sum of data across all SNC nodes sharing an L3 cache instance. +Users who bind tasks to the CPUs of a specific Sub-NUMA node can read +the "llc_occupancy", "mbm_total_bytes", and "mbm_local_bytes" in the +"mon_sub_L3_YY" directories to get node local data. + +Memory bandwidth allocation is still performed at the L3 cache +level. I.e. throttling controls are applied to all SNC nodes. + +L3 cache allocation bitmaps also apply to all SNC nodes. But note that +the amount of L3 cache represented by each bit is divided by the number +of SNC nodes per L3 cache. E.g. with a 100MB cache on a system with 10-bit +allocation masks each bit normally represents 10MB. With SNC mode enabled +with two SNC nodes per L3 cache, each bit only represents 5MB. + Memory bandwidth Allocation and monitoring ========================================== diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a7d700fcf0c0d2ea11457a7dbebe6f32f0739a53..41d0de2b7bdc8c9646b96a14ab3fbe1bfdf00ebe 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1171,6 +1171,7 @@ #define MSR_IA32_QM_CTR 0xc8e #define MSR_IA32_PQR_ASSOC 0xc8f #define MSR_IA32_L3_CBM_BASE 0xc90 +#define MSR_RMID_SNC_CONFIG 0xca0 #define MSR_IA32_L2_CBM_BASE 0xd10 #define MSR_IA32_MBA_THRTL_BASE 0xd50 diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 526f58b8f26c92778ce7e4bed7e94b5028bb3f95..17fe98195c56664fedcb1c34150828842d7e5ee8 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -51,15 +50,16 @@ DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state); bool rdt_alloc_capable; static void -mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, +mba_wrmsr_intel(struct rdt_ctrl_domain *d, struct msr_param *m, struct rdt_resource *r); static void -cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); +cat_wrmsr(struct rdt_ctrl_domain *d, struct msr_param *m, struct rdt_resource *r); static void -mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, +mba_wrmsr_amd(struct rdt_ctrl_domain *d, struct msr_param *m, struct rdt_resource *r); -#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains) +#define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains) +#define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains) struct rdt_hw_resource rdt_resources_all[] = { [RDT_RESOURCE_L3] = @@ -67,8 +67,10 @@ struct rdt_hw_resource rdt_resources_all[] = { .r_resctrl = { .rid = RDT_RESOURCE_L3, .name = "L3", - .cache_level = 3, - .domains = domain_init(RDT_RESOURCE_L3), + .ctrl_scope = RESCTRL_L3_CACHE, + .mon_scope = RESCTRL_L3_CACHE, + .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L3), + .mon_domains = mon_domain_init(RDT_RESOURCE_L3), .format_str = "%d=%0*x", .fflags = RFTYPE_RES_CACHE, }, @@ -80,8 +82,8 @@ struct rdt_hw_resource rdt_resources_all[] = { .r_resctrl = { .rid = RDT_RESOURCE_L2, .name = "L2", - .cache_level = 2, - .domains = domain_init(RDT_RESOURCE_L2), + .ctrl_scope = RESCTRL_L2_CACHE, + .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L2), .format_str = "%d=%0*x", .fflags = RFTYPE_RES_CACHE, }, @@ -93,8 +95,8 @@ struct rdt_hw_resource rdt_resources_all[] = { .r_resctrl = { .rid = RDT_RESOURCE_MBA, .name = "MB", - .cache_level = 3, - .domains = domain_init(RDT_RESOURCE_MBA), + .ctrl_scope = RESCTRL_L3_CACHE, + .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA), .format_str = "%d=%*u", .fflags = RFTYPE_RES_MB, }, @@ -104,8 +106,8 @@ struct rdt_hw_resource rdt_resources_all[] = { .r_resctrl = { .rid = RDT_RESOURCE_SMBA, .name = "SMBA", - .cache_level = 3, - .domains = domain_init(RDT_RESOURCE_SMBA), + .ctrl_scope = RESCTRL_L3_CACHE, + .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA), .format_str = "%d=%*u", .fflags = RFTYPE_RES_MB, }, @@ -297,10 +299,10 @@ bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l) } static void -mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) +mba_wrmsr_amd(struct rdt_ctrl_domain *d, struct msr_param *m, struct rdt_resource *r) { unsigned int i; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); for (i = m->low; i < m->high; i++) @@ -322,11 +324,11 @@ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) } static void -mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, +mba_wrmsr_intel(struct rdt_ctrl_domain *d, struct msr_param *m, struct rdt_resource *r) { unsigned int i; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); /* Write the delay values for mba. */ @@ -335,10 +337,10 @@ mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, } static void -cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) +cat_wrmsr(struct rdt_ctrl_domain *d, struct msr_param *m, struct rdt_resource *r) { unsigned int i; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); for (i = m->low; i < m->high; i++) @@ -356,9 +358,9 @@ void rdt_ctrl_update(void *arg) struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); struct rdt_resource *r = m->res; int cpu = smp_processor_id(); - struct rdt_domain *d; + struct rdt_ctrl_domain *d; - d = resctrl_get_domain_from_cpu(cpu, r); + d = get_ctrl_domain_from_cpu(cpu, r); if (d) { hw_res->msr_update(d, m, r); return; @@ -375,17 +377,14 @@ void rdt_ctrl_update(void *arg) * caller, return the first domain whose id is bigger than the input id. * The domain list is sorted by id in ascending order. */ -static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, - struct list_head **pos) +struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id, + struct list_head **pos) { - struct rdt_domain *d; + struct rdt_domain_hdr *d; struct list_head *l; - if (id < 0) - return ERR_PTR(-ENODEV); - - list_for_each(l, &r->domains) { - d = list_entry(l, struct rdt_domain, list); + list_for_each(l, h) { + d = list_entry(l, struct rdt_domain_hdr, list); /* When id is found, return its domain. */ if (id == d->id) return d; @@ -400,11 +399,6 @@ static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, return NULL; } -struct rdt_domain *resctrl_arch_find_domain(struct rdt_resource *r, int id) -{ - return rdt_find_domain(r, id, NULL); -} - static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); @@ -419,18 +413,23 @@ static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) *dc = r->default_ctrl; } -static void domain_free(struct rdt_hw_domain *hw_dom) +static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom) +{ + kfree(hw_dom->ctrl_val); + kfree(hw_dom); +} + +static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom) { kfree(hw_dom->arch_mbm_total); kfree(hw_dom->arch_mbm_local); - kfree(hw_dom->ctrl_val); kfree(hw_dom); } -static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) +static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *d) { + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); struct msr_param m; u32 *dc; @@ -453,7 +452,7 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) * @num_rmid: The size of the MBM counter array * @hw_dom: The domain that owns the allocated arrays */ -static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom) +static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom) { size_t tsize; @@ -476,6 +475,21 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom) return 0; } +static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope) +{ + switch (scope) { + case RESCTRL_L2_CACHE: + case RESCTRL_L3_CACHE: + return get_cpu_cacheinfo_id(cpu, scope); + case RESCTRL_L3_NODE: + return cpu_to_node(cpu); + default: + break; + } + + return -EINVAL; +} + /* * domain_add_cpu - Add a cpu to a resource's domain list. * @@ -489,25 +503,87 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom) * in the schemata file and schemata input is validated to have the same order * as this list. */ -static void domain_add_cpu(int cpu, struct rdt_resource *r) +static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r) { - int id = get_cpu_cacheinfo_id(cpu, r->cache_level); + int id = get_domain_id_from_scope(cpu, r->ctrl_scope); + struct rdt_hw_ctrl_domain *hw_dom; struct list_head *add_pos = NULL; - struct rdt_hw_domain *hw_dom; - struct rdt_domain *d; + struct rdt_domain_hdr *hdr; + struct rdt_ctrl_domain *d; int err; BUG_ON(id > NR_CPUS); lockdep_assert_held(&domain_list_lock); - d = rdt_find_domain(r, id, &add_pos); - if (IS_ERR(d)) { - pr_warn("Couldn't find cache id for CPU %d\n", cpu); + if (id < 0) { + pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n", + cpu, r->ctrl_scope, r->name); return; } - if (d) { - cpumask_set_cpu(cpu, &d->cpu_mask); + hdr = rdt_find_domain(&r->ctrl_domains, id, &add_pos); + if (hdr) { + if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN)) + return; + d = container_of(hdr, struct rdt_ctrl_domain, hdr); + + cpumask_set_cpu(cpu, &d->hdr.cpu_mask); + return; + } + + hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu)); + if (!hw_dom) + return; + + d = &hw_dom->d_resctrl; + d->hdr.id = id; + d->hdr.type = RESCTRL_CTRL_DOMAIN; + + cpumask_set_cpu(cpu, &d->hdr.cpu_mask); + + rdt_domain_reconfigure_cdp(r); + + if (domain_setup_ctrlval(r, d)) { + ctrl_domain_free(hw_dom); + return; + } + + list_add_tail_rcu(&d->hdr.list, add_pos); + + err = resctrl_online_ctrl_domain(r, d); + if (err) { + list_del_rcu(&d->hdr.list); + synchronize_rcu(); + ctrl_domain_free(hw_dom); + } +} + +static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) +{ + int id = get_domain_id_from_scope(cpu, r->mon_scope); + struct list_head *add_pos = NULL; + struct rdt_hw_mon_domain *hw_dom; + struct rdt_domain_hdr *hdr; + struct rdt_mon_domain *d; + struct cacheinfo *ci; + int err; + + BUG_ON(id > NR_CPUS); + lockdep_assert_held(&domain_list_lock); + + if (id < 0) { + pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n", + cpu, r->mon_scope, r->name); + return; + } + + hdr = rdt_find_domain(&r->mon_domains, id, &add_pos); + if (hdr) { + if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) + return; + d = container_of(hdr, struct rdt_mon_domain, hdr); + + cpumask_set_cpu(cpu, &d->hdr.cpu_mask); if (r->cache.arch_has_per_cpu_cfg) rdt_domain_reconfigure_cdp(r); resctrl_arch_mbm_cntr_assign_configure(); @@ -519,71 +595,145 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) return; d = &hw_dom->d_resctrl; - d->id = id; + d->hdr.id = id; + d->hdr.type = RESCTRL_MON_DOMAIN; r->rdt_domain_list[id] = d; + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci) { + pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name); + mon_domain_free(hw_dom); + return; + } - cpumask_set_cpu(cpu, &d->cpu_mask); + d->ci_id = ci->id; + cpumask_set_cpu(cpu, &d->hdr.cpu_mask); rdt_domain_reconfigure_cdp(r); - if (r->alloc_capable && domain_setup_ctrlval(r, d)) { - domain_free(hw_dom); - return; - } - resctrl_mbm_evt_config_init(hw_dom); resctrl_arch_mbm_cntr_assign_configure(); + arch_mon_domain_online(r, d); + if (r->mon_capable && arch_domain_mbm_alloc(r->mon.num_rmid, hw_dom)) { - domain_free(hw_dom); + mon_domain_free(hw_dom); return; } - list_add_tail_rcu(&d->list, add_pos); + list_add_tail_rcu(&d->hdr.list, add_pos); - err = resctrl_online_domain(r, d); + err = resctrl_online_mon_domain(r, d); if (err) { - list_del_rcu(&d->list); + list_del_rcu(&d->hdr.list); synchronize_rcu(); - domain_free(hw_dom); + mon_domain_free(hw_dom); } } -static void domain_remove_cpu(int cpu, struct rdt_resource *r) +static void domain_add_cpu(int cpu, struct rdt_resource *r) +{ + if (r->alloc_capable) + domain_add_cpu_ctrl(cpu, r); + if (r->mon_capable) + domain_add_cpu_mon(cpu, r); +} + +static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r) { - int id = get_cpu_cacheinfo_id(cpu, r->cache_level); - struct rdt_hw_domain *hw_dom; - struct rdt_domain *d; + int id = get_domain_id_from_scope(cpu, r->ctrl_scope); + struct rdt_hw_ctrl_domain *hw_dom; + struct rdt_domain_hdr *hdr; + struct rdt_ctrl_domain *d; BUG_ON(id > NR_CPUS); lockdep_assert_held(&domain_list_lock); - d = rdt_find_domain(r, id, NULL); - if (IS_ERR_OR_NULL(d)) { - pr_warn("Couldn't find cache id for CPU %d\n", cpu); + if (id < 0) { + pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n", + cpu, r->ctrl_scope, r->name); + return; + } + + hdr = rdt_find_domain(&r->ctrl_domains, id, NULL); + if (!hdr) { + pr_warn("Couldn't find control domain with id=%d for CPU %d\n", id, cpu); return; } - hw_dom = resctrl_to_arch_dom(d); - cpumask_clear_cpu(cpu, &d->cpu_mask); - if (cpumask_empty(&d->cpu_mask)) { - resctrl_offline_domain(r, d); - list_del_rcu(&d->list); + if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN)) + return; + + d = container_of(hdr, struct rdt_ctrl_domain, hdr); + hw_dom = resctrl_to_arch_ctrl_dom(d); + + cpumask_clear_cpu(cpu, &d->hdr.cpu_mask); + + if (cpumask_empty(&d->hdr.cpu_mask)) { + resctrl_offline_ctrl_domain(r, d); + list_del_rcu(&d->hdr.list); synchronize_rcu(); /* - * rdt_domain "d" is going to be freed below, so clear + * rdt_ctrl_domain "d" is going to be freed below, so clear * its pointer from pseudo_lock_region struct. */ if (d->plr) d->plr->d = NULL; + ctrl_domain_free(hw_dom); + + return; + } +} + +static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r) +{ + int id = get_domain_id_from_scope(cpu, r->mon_scope); + struct rdt_hw_mon_domain *hw_dom; + struct rdt_domain_hdr *hdr; + struct rdt_mon_domain *d; + + BUG_ON(id > NR_CPUS); + lockdep_assert_held(&domain_list_lock); + + if (id < 0) { + pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n", + cpu, r->mon_scope, r->name); + return; + } + + hdr = rdt_find_domain(&r->mon_domains, id, NULL); + if (!hdr) { + pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n", + id, cpu, r->name); + return; + } + + if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) + return; + + d = container_of(hdr, struct rdt_mon_domain, hdr); + hw_dom = resctrl_to_arch_mon_dom(d); + + cpumask_clear_cpu(cpu, &d->hdr.cpu_mask); + if (cpumask_empty(&d->hdr.cpu_mask)) { + resctrl_offline_mon_domain(r, d); + list_del_rcu(&d->hdr.list); + synchronize_rcu(); r->rdt_domain_list[id] = NULL; - domain_free(hw_dom); + mon_domain_free(hw_dom); return; } } +static void domain_remove_cpu(int cpu, struct rdt_resource *r) +{ + if (r->alloc_capable) + domain_remove_cpu_ctrl(cpu, r); + if (r->mon_capable) + domain_remove_cpu_mon(cpu, r); +} + static void clear_closid_rmid(int cpu) { struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index c5c3eaea27b65be9f729863b9db4f5ecf933c940..57f608435b57a3f2af6813011ef92b73041a385f 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -23,14 +23,14 @@ #include "internal.h" -static bool apply_config(struct rdt_hw_domain *hw_dom, +static bool apply_config(struct rdt_hw_ctrl_domain *hw_dom, struct resctrl_staged_config *cfg, u32 idx, cpumask_var_t cpu_mask) { - struct rdt_domain *dom = &hw_dom->d_resctrl; + struct rdt_ctrl_domain *dom = &hw_dom->d_resctrl; if (cfg->new_ctrl != hw_dom->ctrl_val[idx]) { - cpumask_set_cpu(cpumask_any(&dom->cpu_mask), cpu_mask); + cpumask_set_cpu(cpumask_any(&dom->hdr.cpu_mask), cpu_mask); hw_dom->ctrl_val[idx] = cfg->new_ctrl; return true; @@ -39,15 +39,15 @@ static bool apply_config(struct rdt_hw_domain *hw_dom, return false; } -int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, +int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type t, u32 cfg_val) { + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); u32 idx = resctrl_get_config_index(closid, t); struct msr_param msr_param; - if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) + if (!cpumask_test_cpu(smp_processor_id(), &d->hdr.cpu_mask)) return -EINVAL; hw_dom->ctrl_val[idx] = cfg_val; @@ -63,11 +63,11 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) { struct resctrl_staged_config *cfg; - struct rdt_hw_domain *hw_dom; + struct rdt_hw_ctrl_domain *hw_dom; struct msr_param msr_param; enum resctrl_conf_type t; + struct rdt_ctrl_domain *d; cpumask_var_t cpu_mask; - struct rdt_domain *d; u32 idx; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -77,8 +77,8 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) return -ENOMEM; msr_param.res = NULL; - list_for_each_entry(d, &r->domains, list) { - hw_dom = resctrl_to_arch_dom(d); + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { + hw_dom = resctrl_to_arch_ctrl_dom(d); for (t = 0; t < CDP_NUM_TYPES; t++) { cfg = &hw_dom->d_resctrl.staged_config[t]; if (!cfg->have_new_ctrl) @@ -111,10 +111,10 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) return 0; } -u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, +u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type type) { - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d); u32 idx = resctrl_get_config_index(closid, type); return hw_dom->ctrl_val[idx]; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 9017fe146a86f4ebb58a90a75cb8f3bc11462cc7..68b7a9f60b6e0aaea53c335f9e7cc2e1f8910e06 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -56,10 +56,22 @@ struct arch_mbm_state { }; /** - * struct rdt_hw_domain - Arch private attributes of a set of CPUs that share - * a resource + * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share + * a resource for a control function * @d_resctrl: Properties exposed to the resctrl file system * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID) + * + * Members of this structure are accessed via helpers that provide abstraction. + */ +struct rdt_hw_ctrl_domain { + struct rdt_ctrl_domain d_resctrl; + u32 *ctrl_val; +}; + +/** + * struct rdt_hw_mon_domain - Arch private attributes of a set of CPUs that share + * a resource for a monitor function + * @d_resctrl: Properties exposed to the resctrl file system * @arch_mbm_total: arch private state for MBM total bandwidth * @arch_mbm_local: arch private state for MBM local bandwidth * @mbm_total_cfg: MBM total bandwidth configuration @@ -67,18 +79,22 @@ struct arch_mbm_state { * * Members of this structure are accessed via helpers that provide abstraction. */ -struct rdt_hw_domain { - struct rdt_domain d_resctrl; - u32 *ctrl_val; +struct rdt_hw_mon_domain { + struct rdt_mon_domain d_resctrl; struct arch_mbm_state *arch_mbm_total; struct arch_mbm_state *arch_mbm_local; u32 mbm_total_cfg; u32 mbm_local_cfg; }; -static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r) +static inline struct rdt_hw_ctrl_domain *resctrl_to_arch_ctrl_dom(struct rdt_ctrl_domain *r) +{ + return container_of(r, struct rdt_hw_ctrl_domain, d_resctrl); +} + +static inline struct rdt_hw_mon_domain *resctrl_to_arch_mon_dom(struct rdt_mon_domain *r) { - return container_of(r, struct rdt_hw_domain, d_resctrl); + return container_of(r, struct rdt_hw_mon_domain, d_resctrl); } /** @@ -116,7 +132,7 @@ struct rdt_hw_resource { struct rdt_resource r_resctrl; u32 num_closid; unsigned int msr_base; - void (*msr_update) (struct rdt_domain *d, struct msr_param *m, + void (*msr_update) (struct rdt_ctrl_domain *d, struct msr_param *m, struct rdt_resource *r); unsigned int mon_scale; unsigned int mbm_width; @@ -193,6 +209,8 @@ union cpuid_0x10_x_edx { unsigned int full; }; +void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d); + /* * ABMC counters can be configured by writing to L3_QOS_ABMC_CFG. * @bw_type : Bandwidth configuration(supported by BMEC) @@ -224,5 +242,5 @@ int rdt_get_mon_l3_config(struct rdt_resource *r); bool rdt_cpu_has(int flag); void __init intel_rdt_mbm_apply_quirk(void); void rdt_domain_reconfigure_cdp(struct rdt_resource *r); -void resctrl_mbm_evt_config_init(struct rdt_hw_domain *hw_dom); +void resctrl_mbm_evt_config_init(struct rdt_hw_mon_domain *hw_dom); #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 0ed28ef8c25a791733325b56cb6fd90100b33d7e..789c7dbfdd2db1eea432e26549322b8fad78c67e 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -15,6 +15,8 @@ * Software Developer Manual June 2016, volume 3, section 17.17. */ +#define pr_fmt(fmt) "resctrl: " fmt + #include #include #include @@ -38,6 +40,8 @@ unsigned int rdt_mon_features; #define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5)) +static int snc_nodes_per_l3_cache = 1; + /* * The correction factor table is documented in Documentation/arch/x86/resctrl.rst. * If rmid > rmid threshold, MBM total and local values should be multiplied @@ -99,7 +103,43 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val) return val; } -static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) +/* + * When Sub-NUMA Cluster (SNC) mode is not enabled (as indicated by + * "snc_nodes_per_l3_cache == 1") no translation of the RMID value is + * needed. The physical RMID is the same as the logical RMID. + * + * On a platform with SNC mode enabled, Linux enables RMID sharing mode + * via MSR 0xCA0 (see the "RMID Sharing Mode" section in the "Intel + * Resource Director Technology Architecture Specification" for a full + * description of RMID sharing mode). + * + * In RMID sharing mode there are fewer "logical RMID" values available + * to accumulate data ("physical RMIDs" are divided evenly between SNC + * nodes that share an L3 cache). Linux creates an rdt_mon_domain for + * each SNC node. + * + * The value loaded into IA32_PQR_ASSOC is the "logical RMID". + * + * Data is collected independently on each SNC node and can be retrieved + * using the "physical RMID" value computed by this function and loaded + * into IA32_QM_EVTSEL. @cpu can be any CPU in the SNC node. + * + * The scope of the IA32_QM_EVTSEL and IA32_QM_CTR MSRs is at the L3 + * cache. So a "physical RMID" may be read from any CPU that shares + * the L3 cache with the desired SNC node, not just from a CPU in + * the specific SNC node. + */ +static int logical_rmid_to_physical_rmid(int cpu, int lrmid) +{ + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + + if (snc_nodes_per_l3_cache == 1) + return lrmid; + + return lrmid + (cpu_to_node(cpu) % snc_nodes_per_l3_cache) * r->mon.num_rmid; +} + +static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val) { u64 msr_val; @@ -111,7 +151,7 @@ static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) * are error bits. */ - wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); + wrmsr(MSR_IA32_QM_EVTSEL, eventid, prmid); rdmsrl(MSR_IA32_QM_CTR, msr_val); if (msr_val & RMID_VAL_ERROR) @@ -123,7 +163,7 @@ static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) return 0; } -static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, +static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_dom, u32 rmid, enum resctrl_event_id eventid) { @@ -144,19 +184,22 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, return NULL; } -void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, u32 unused, u32 rmid, enum resctrl_event_id eventid) { - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); + int cpu = cpumask_any(&d->hdr.cpu_mask); struct arch_mbm_state *am; + u32 prmid; am = get_arch_mbm_state(hw_dom, rmid, eventid); if (am) { memset(am, 0, sizeof(*am)); + prmid = logical_rmid_to_physical_rmid(cpu, rmid); /* Record any initial, non-zero count value. */ - __rmid_read(rmid, eventid, &am->prev_msr); + __rmid_read_phys(prmid, eventid, &am->prev_msr); } } @@ -164,9 +207,9 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, * Assumes that hardware counters are also reset and thus that there is * no need to record initial non-zero counts. */ -void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d) +void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d) { - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); if (resctrl_arch_is_mbm_total_enabled()) memset(hw_dom->arch_mbm_total, 0, @@ -185,22 +228,22 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) return chunks >> shift; } -int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, u32 unused, u32 rmid, enum resctrl_event_id eventid, u64 *val, void *ignored) { + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + int cpu = cpumask_any(&d->hdr.cpu_mask); struct arch_mbm_state *am; u64 msr_val, chunks; + u32 prmid; int ret; resctrl_arch_rmid_read_context_check(); - if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) - return -EINVAL; - - ret = __rmid_read(rmid, eventid, &msr_val); + prmid = logical_rmid_to_physical_rmid(cpu, rmid); + ret = __rmid_read_phys(prmid, eventid, &msr_val); if (ret) return ret; @@ -219,6 +262,89 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, return 0; } +/* + * The power-on reset value of MSR_RMID_SNC_CONFIG is 0x1 + * which indicates that RMIDs are configured in legacy mode. + * This mode is incompatible with Linux resctrl semantics + * as RMIDs are partitioned between SNC nodes, which requires + * a user to know which RMID is allocated to a task. + * Clearing bit 0 reconfigures the RMID counters for use + * in RMID sharing mode. This mode is better for Linux. + * The RMID space is divided between all SNC nodes with the + * RMIDs renumbered to start from zero in each node when + * counting operations from tasks. Code to read the counters + * must adjust RMID counter numbers based on SNC node. See + * logical_rmid_to_physical_rmid() for code that does this. + */ +void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d) +{ + if (snc_nodes_per_l3_cache > 1) + msr_clear_bit(MSR_RMID_SNC_CONFIG, 0); +} + +/* CPU models that support MSR_RMID_SNC_CONFIG */ +static const struct x86_cpu_id snc_cpu_ids[] __initconst = { + X86_MATCH_VFM(INTEL_ICELAKE_X, 0), + X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, 0), + X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, 0), + X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, 0), + X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, 0), + X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, 0), + {} +}; + +/* + * There isn't a simple hardware bit that indicates whether a CPU is running + * in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing the + * number of CPUs sharing the L3 cache with CPU0 to the number of CPUs in + * the same NUMA node as CPU0. + * It is not possible to accurately determine SNC state if the system is + * booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes + * to L3 caches. It will be OK if system is booted with hyperthreading + * disabled (since this doesn't affect the ratio). + */ +static __init int snc_get_config(void) +{ + struct cacheinfo *ci = get_cpu_cacheinfo_level(0, RESCTRL_L3_CACHE); + const cpumask_t *node0_cpumask; + int cpus_per_node, cpus_per_l3; + int ret; + + if (!x86_match_cpu(snc_cpu_ids) || !ci) + return 1; + + cpus_read_lock(); + if (num_online_cpus() != num_present_cpus()) + pr_warn("Some CPUs offline, SNC detection may be incorrect\n"); + cpus_read_unlock(); + + node0_cpumask = cpumask_of_node(cpu_to_node(0)); + + cpus_per_node = cpumask_weight(node0_cpumask); + cpus_per_l3 = cpumask_weight(&ci->shared_cpu_map); + + if (!cpus_per_node || !cpus_per_l3) + return 1; + + ret = cpus_per_l3 / cpus_per_node; + + /* sanity check: Only valid results are 1, 2, 3, 4 */ + switch (ret) { + case 1: + break; + case 2 ... 4: + pr_info("Sub-NUMA Cluster mode detected with %d nodes per L3 cache\n", ret); + rdt_resources_all[RDT_RESOURCE_L3].r_resctrl.mon_scope = RESCTRL_L3_NODE; + break; + default: + pr_warn("Ignore improbable SNC node count %d\n", ret); + ret = 1; + break; + } + + return ret; +} + int __init rdt_get_mon_l3_config(struct rdt_resource *r) { unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; @@ -226,9 +352,11 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) unsigned int threshold; u32 eax, ebx, ecx, edx; + snc_nodes_per_l3_cache = snc_get_config(); + resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024; - hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale; - r->mon.num_rmid = boot_cpu_data.x86_cache_max_rmid + 1; + hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale / snc_nodes_per_l3_cache; + r->mon.num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache; hw_res->mbm_width = MBM_CNTR_WIDTH_BASE; if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX) @@ -280,7 +408,7 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) return 0; } -void resctrl_mbm_evt_config_init(struct rdt_hw_domain *hw_dom) +void resctrl_mbm_evt_config_init(struct rdt_hw_mon_domain *hw_dom) { unsigned int index; u64 msrval; diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index ba1596afee107f65f784ee004a86d0faabf68eb4..d15e1e841dbc93741d22f31211425d943dc0266e 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -11,7 +11,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include #include #include #include diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 50a49a1ea604d842cedb551a310f2e3c893e0c5c..f58ca997910867d5584caeea3f7848f116edadc3 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -95,9 +95,9 @@ static void l2_qos_cfg_update(void *arg) static int set_cache_qos_cfg(int level, bool enable) { void (*update)(void *arg); + struct rdt_ctrl_domain *d; struct rdt_resource *r_l; cpumask_var_t cpu_mask; - struct rdt_domain *d; int cpu; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -114,14 +114,14 @@ static int set_cache_qos_cfg(int level, bool enable) return -ENOMEM; r_l = &rdt_resources_all[level].r_resctrl; - list_for_each_entry(d, &r_l->domains, list) { + list_for_each_entry(d, &r_l->ctrl_domains, hdr.list) { if (r_l->cache.arch_has_per_cpu_cfg) /* Pick all the CPUs in the domain instance */ - for_each_cpu(cpu, &d->cpu_mask) + for_each_cpu(cpu, &d->hdr.cpu_mask) cpumask_set_cpu(cpu, cpu_mask); else /* Pick one CPU from each domain instance to update MSR */ - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); + cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask); } /* Update QOS_CFG MSR on all the CPUs in cpu_mask */ @@ -327,15 +327,15 @@ static void resctrl_abmc_set_one_amd(void *arg) static void _resctrl_abmc_enable(struct rdt_resource *r, bool enable) { - struct rdt_domain *d; + struct rdt_mon_domain *d; /* * Hardware counters will reset after switching the monitor mode. * Reset the architectural state so that reading of hardware * counter is not considered as an overflow in the next update. */ - list_for_each_entry(d, &r->domains, list) { - on_each_cpu_mask(&d->cpu_mask, + list_for_each_entry(d, &r->mon_domains, hdr.list) { + on_each_cpu_mask(&d->hdr.cpu_mask, resctrl_abmc_set_one_amd, &enable, 1); resctrl_arch_reset_rmid_all(r, d); } @@ -412,8 +412,8 @@ static void rdtgroup_abmc_cfg(void *info) int resctrl_arch_assign_cntr(void *dom, enum resctrl_event_id evtid, u32 rmid, u32 cntr_id, u32 closid, bool assign) { - struct rdt_domain *d = dom; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_mon_domain *d = dom; + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); union l3_qos_abmc_cfg abmc_cfg = { 0 }; struct arch_mbm_state *arch_mbm; @@ -431,7 +431,7 @@ int resctrl_arch_assign_cntr(void *dom, enum resctrl_event_id evtid, arch_mbm = &hw_dom->arch_mbm_local[rmid]; } - smp_call_function_any(&d->cpu_mask, rdtgroup_abmc_cfg, &abmc_cfg, 1); + smp_call_function_any(&d->hdr.cpu_mask, rdtgroup_abmc_cfg, &abmc_cfg, 1); /* * Reset the architectural state so that reading of hardware @@ -446,10 +446,10 @@ int resctrl_arch_assign_cntr(void *dom, enum resctrl_event_id evtid, static int reset_all_ctrls(struct rdt_resource *r) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - struct rdt_hw_domain *hw_dom; + struct rdt_hw_ctrl_domain *hw_dom; struct msr_param msr_param; cpumask_var_t cpu_mask; - struct rdt_domain *d; + struct rdt_ctrl_domain *d; int i; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -464,12 +464,12 @@ static int reset_all_ctrls(struct rdt_resource *r) /* * Disable resource control for this resource by setting all - * CBMs in all domains to the maximum mask value. Pick one CPU + * CBMs in all ctrl_domains to the maximum mask value. Pick one CPU * from each domain to update the MSRs below. */ - list_for_each_entry(d, &r->domains, list) { - hw_dom = resctrl_to_arch_dom(d); - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { + hw_dom = resctrl_to_arch_ctrl_dom(d); + cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask); for (i = 0; i < hw_res->num_closid; i++) hw_dom->ctrl_val[i] = r->default_ctrl; @@ -493,8 +493,8 @@ void resctrl_arch_reset_resources(void) u32 resctrl_arch_event_config_get(void *dom, enum resctrl_event_id eventid) { - struct rdt_domain *d = dom; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct rdt_mon_domain *d = dom; + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); switch (eventid) { case QOS_L3_OCCUP_EVENT_ID: @@ -515,7 +515,7 @@ u32 resctrl_arch_event_config_get(void *dom, enum resctrl_event_id eventid) void resctrl_arch_event_config_set(void *info) { struct resctrl_mon_config_info *mon_info = info; - struct rdt_hw_domain *hw_dom; + struct rdt_hw_mon_domain *hw_dom; unsigned int index; index = mon_event_config_index_get(mon_info->evtid); @@ -524,7 +524,7 @@ void resctrl_arch_event_config_set(void *info) wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0); - hw_dom = resctrl_to_arch_dom(mon_info->d); + hw_dom = resctrl_to_arch_mon_dom(mon_info->d); switch (mon_info->evtid) { case QOS_L3_OCCUP_EVENT_ID: diff --git a/drivers/platform/mpam/mpam_resctrl.c b/drivers/platform/mpam/mpam_resctrl.c index ee1a66a6c09924426f87e0350c3c555ff6682c39..31567c009a9928677a82008b5e4affd6cc9e2306 100644 --- a/drivers/platform/mpam/mpam_resctrl.c +++ b/drivers/platform/mpam/mpam_resctrl.c @@ -786,7 +786,7 @@ static int mpam_resctrl_resource_init(struct mpam_resctrl_res *res) res->resctrl_res.rid == RDT_RESOURCE_L3) { bool has_csu = cache_has_usable_csu(class); - r->cache_level = class->level; + r->scope = class->level; /* TODO: Scaling is not yet supported */ r->cache.cbm_len = class->props.cpbm_wd; diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 253443f87646885ecb743b8b790e865c0cc50d03..9eaa170f855baf9bec8a2416573fd95235a87490 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -27,7 +27,7 @@ struct rdt_parse_data { }; typedef int(ctrlval_parser_t)(struct rdt_parse_data *data, - struct resctrl_schema *s, struct rdt_domain *d); + struct resctrl_schema *s, struct rdt_ctrl_domain *d); /* * Check whether MBA bandwidth percentage value is correct. The value is @@ -71,7 +71,7 @@ static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) } static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_domain *d) + struct rdt_ctrl_domain *d) { struct resctrl_staged_config *cfg; u32 closid = data->rdtgrp->closid; @@ -80,7 +80,7 @@ static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, cfg = &d->staged_config[s->conf_type]; if (cfg->have_new_ctrl) { - rdt_last_cmd_printf("Duplicate domain %d\n", d->id); + rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id); return -EINVAL; } @@ -151,7 +151,7 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) * resource type. */ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_domain *d) + struct rdt_ctrl_domain *d) { struct rdtgroup *rdtgrp = data->rdtgrp; struct resctrl_staged_config *cfg; @@ -160,7 +160,7 @@ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, cfg = &d->staged_config[s->conf_type]; if (cfg->have_new_ctrl) { - rdt_last_cmd_printf("Duplicate domain %d\n", d->id); + rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id); return -EINVAL; } @@ -230,7 +230,7 @@ static int parse_line(char *line, struct resctrl_schema *s, struct rdt_resource *r = s->res; struct rdt_parse_data data; char *dom = NULL, *id; - struct rdt_domain *d; + struct rdt_ctrl_domain *d; unsigned long dom_id; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -252,8 +252,8 @@ static int parse_line(char *line, struct resctrl_schema *s, return -EINVAL; } dom = strim(dom); - list_for_each_entry(d, &r->domains, list) { - if (d->id == dom_id) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { + if (d->hdr.id == dom_id) { data.buf = dom; data.rdtgrp = rdtgrp; if (parse_ctrlval(&data, s, d)) @@ -377,7 +377,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid) { struct rdt_resource *r = schema->res; - struct rdt_domain *dom; + struct rdt_ctrl_domain *dom; bool sep = false; u32 ctrl_val; @@ -385,7 +385,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo lockdep_assert_cpus_held(); seq_printf(s, "%*s:", max_name_width, schema->name); - list_for_each_entry(dom, &r->domains, list) { + list_for_each_entry(dom, &r->ctrl_domains, hdr.list) { if (sep) seq_puts(s, ";"); @@ -395,7 +395,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo ctrl_val = resctrl_arch_get_config(r, dom, closid, schema->conf_type); - seq_printf(s, r->format_str, dom->id, max_data_width, + seq_printf(s, r->format_str, dom->hdr.id, max_data_width, ctrl_val); sep = true; } @@ -424,7 +424,7 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of, } else { seq_printf(s, "%s:%d=%x\n", rdtgrp->plr->s->res->name, - rdtgrp->plr->d->id, + rdtgrp->plr->d->hdr.id, rdtgrp->plr->cbm); } } else { @@ -449,8 +449,8 @@ static int smp_mon_event_count(void *arg) } void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, - struct rdt_domain *d, struct rdtgroup *rdtgrp, - int evtid, int first) + struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, + cpumask_t *cpumask, int evtid, int first) { int cpu; @@ -464,7 +464,6 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, rr->evtid = evtid; rr->r = r; rr->d = d; - rr->val = 0; rr->first = first; rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid); if (IS_ERR(rr->arch_mon_ctx)) { @@ -472,7 +471,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, return; } - cpu = cpumask_any_housekeeping(&d->cpu_mask, RESCTRL_PICK_ANY_CPU); + cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU); /* * cpumask_any_housekeeping() prefers housekeeping CPUs, but @@ -481,7 +480,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, * counters on some platforms if its called in IRQ context. */ if (tick_nohz_full_cpu(cpu)) - smp_call_function_any(&d->cpu_mask, mon_event_count, rr, 1); + smp_call_function_any(cpumask, mon_event_count, rr, 1); else smp_call_on_cpu(cpu, smp_mon_event_count, rr, false); @@ -491,13 +490,15 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, int rdtgroup_mondata_show(struct seq_file *m, void *arg) { struct kernfs_open_file *of = m->private; + struct rdt_domain_hdr *hdr; + struct rmid_read rr = {0}; + struct rdt_mon_domain *d; u32 resid, evtid, domid; struct rdtgroup *rdtgrp; struct rdt_resource *r; union mon_data_bits md; - struct rdt_domain *d; - struct rmid_read rr; - int ret = 0, index; + int ret = 0, index, cpu; + struct cacheinfo *ci; rdtgrp = rdtgroup_kn_lock_live(of->kn); if (!rdtgrp) { @@ -511,10 +512,39 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) evtid = md.u.evtid; r = resctrl_arch_get_resource(resid); - d = resctrl_arch_find_domain(r, domid); - if (IS_ERR_OR_NULL(d)) { + if (md.u.sum) { + /* + * This file requires summing across all domains that share + * the L3 cache id that was provided in the "domid" field of the + * mon_data_bits union. Search all domains in the resource for + * one that matches this cache id. + */ + list_for_each_entry(d, &r->mon_domains, hdr.list) { + if (d->ci_id == domid) { + rr.ci_id = d->ci_id; + cpu = cpumask_any(&d->hdr.cpu_mask); + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci) + continue; + mon_event_read(&rr, r, NULL, rdtgrp, + &ci->shared_cpu_map, evtid, false); + goto checkresult; + } + } ret = -ENOENT; goto out; + } else { + /* + * This file provides data from a single domain. Search + * the resource to find the domain with "domid". + */ + hdr = rdt_find_domain(&r->mon_domains, domid, NULL); + if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) { + ret = -ENOENT; + goto out; + } + d = container_of(hdr, struct rdt_mon_domain, hdr); + mon_event_read(&rr, r, d, rdtgrp, &d->hdr.cpu_mask, evtid, false); } if (resctrl_arch_get_abmc_enabled() && evtid != QOS_L3_OCCUP_EVENT_ID) { @@ -526,8 +556,6 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) } } - mon_event_read(&rr, r, d, rdtgrp, evtid, false); - checkresult: if (rr.err == -EIO) seq_puts(m, "Error\n"); diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 58d812d6c1d2ea3e8df4e9341e5919a26482b737..89cfe8c78666b523c4510a6572e73642305883ae 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -100,17 +100,37 @@ union mon_data_bits { void *priv; struct { unsigned int rid : 10; - enum resctrl_event_id evtid : 8; + enum resctrl_event_id evtid : 7; + unsigned int sum : 1; unsigned int domid : 14; } u; }; +/** + * struct rmid_read - Data passed across smp_call*() to read event count. + * @rgrp: Resource group for which the counter is being read. If it is a parent + * resource group then its event count is summed with the count from all + * its child resource groups. + * @r: Resource describing the properties of the event being read. + * @d: Domain that the counter should be read from. If NULL then sum all + * domains in @r sharing L3 @ci.id + * @evtid: Which monitor event to read. + * @first: Initialize MBM counter when true. + * @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains. + * @err: Error encountered when reading counter. + * @val: Returned value of event counter. If @rgrp is a parent resource group, + * @val includes the sum of event counts from its child resource groups. + * If @d is NULL, @val includes the sum of all domains in @r sharing @ci.id, + * (summed across child resource groups if @rgrp is a parent resource group). + * @arch_mon_ctx: Hardware monitor allocated for this read request (MPAM only). + */ struct rmid_read { struct rdtgroup *rgrp; struct rdt_resource *r; - struct rdt_domain *d; + struct rdt_mon_domain *d; enum resctrl_event_id evtid; bool first; + unsigned long ci_id; int err; u64 val; void *arch_mon_ctx; @@ -282,9 +302,9 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); int rdtgroup_schemata_show(struct kernfs_open_file *of, struct seq_file *s, void *v); -bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, +bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d, unsigned long cbm, int closid, bool exclusive); -unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d, +unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_ctrl_domain *d, unsigned long cbm); enum rdtgrp_mode rdtgroup_mode_by_closid(int closid); int rdtgroup_tasks_assigned(struct rdtgroup *r); @@ -296,19 +316,19 @@ void resctrl_mon_resource_exit(void); void mon_event_count(void *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, - struct rdt_domain *d, struct rdtgroup *rdtgrp, - int evtid, int first); + struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, + cpumask_t *cpumask, int evtid, int first); int resctrl_mon_resource_init(void); -void mbm_setup_overflow_handler(struct rdt_domain *dom, +void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, int exclude_cpu); void mbm_handle_overflow(struct work_struct *work); bool is_mba_sc(struct rdt_resource *r); -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms, +void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, int exclude_cpu); void cqm_handle_limbo(struct work_struct *work); -bool has_busy_rmid(struct rdt_domain *d); -void __check_limbo(struct rdt_domain *d, bool force_free); +bool has_busy_rmid(struct rdt_mon_domain *d); +void __check_limbo(struct rdt_mon_domain *d, bool force_free); int mbm_cntr_alloc(struct rdt_resource *r); void mbm_cntr_free(u32 cntr_id); void rdt_staged_configs_clear(void); @@ -323,8 +343,8 @@ void rdtgroup_free_cntr(struct rdt_resource *r, struct rdtgroup *rdtgrp, int ind #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm); -bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d); +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm); +bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d); int rdt_pseudo_lock_init(void); void rdt_pseudo_lock_release(void); int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index b6bb310bb10bdc6b7eeb3cb652d4847b72065e68..55d37e8f180759fd4cb52d4342bf3d75568fd205 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -123,7 +123,7 @@ static void limbo_release_entry(struct rmid_entry *entry) * decrement the count. If the busy count gets to zero on an RMID, we * free the RMID */ -void __check_limbo(struct rdt_domain *d, bool force_free) +void __check_limbo(struct rdt_mon_domain *d, bool force_free) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); u32 idx_limit = resctrl_arch_system_num_rmid_idx(); @@ -171,7 +171,7 @@ void __check_limbo(struct rdt_domain *d, bool force_free) resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx); } -bool has_busy_rmid(struct rdt_domain *d) +bool has_busy_rmid(struct rdt_mon_domain *d) { u32 idx_limit = resctrl_arch_system_num_rmid_idx(); @@ -272,7 +272,7 @@ int alloc_rmid(u32 closid) static void add_rmid_to_limbo(struct rmid_entry *entry) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - struct rdt_domain *d; + struct rdt_mon_domain *d; u32 idx; lockdep_assert_held(&rdtgroup_mutex); @@ -283,7 +283,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry) idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid); entry->busy = 0; - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->mon_domains, hdr.list) { /* * For the first limbo RMID in the domain, * setup up the limbo worker. @@ -325,7 +325,7 @@ void free_rmid(u32 closid, u32 rmid) list_add_tail(&entry->list, &rmid_free_lru); } -static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 closid, +static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid, u32 rmid, enum resctrl_event_id evtid) { u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid); @@ -342,7 +342,11 @@ static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 closid, static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) { + int cpu = smp_processor_id(); + struct rdt_mon_domain *d; + struct cacheinfo *ci; struct mbm_state *m; + int err, ret; u64 tval = 0; if (rr->first) { @@ -353,14 +357,49 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) return 0; } - rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, rr->evtid, - &tval, rr->arch_mon_ctx); - if (rr->err) - return rr->err; + if (rr->d) { + /* Reading a single domain, must be on a CPU in that domain. */ + if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask)) + return -EINVAL; + rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, + rr->evtid, &tval, rr->arch_mon_ctx); - rr->val += tval; + if (rr->err) + return rr->err; - return 0; + rr->val += tval; + + return 0; + } + + /* Summing domains that share a cache, must be on a CPU for that cache. */ + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci || ci->id != rr->ci_id) + return -EINVAL; + + /* + * Legacy files must report the sum of an event across all + * domains that share the same L3 cache instance. + * Report success if a read from any domain succeeds, -EINVAL + * (translated to "Unavailable" for user space) if reading from + * all domains fail for any reason. + */ + ret = -EINVAL; + list_for_each_entry(d, &rr->r->mon_domains, hdr.list) { + if (d->ci_id != rr->ci_id) + continue; + err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, + rr->evtid, &tval, rr->arch_mon_ctx); + if (!err) { + rr->val += tval; + ret = 0; + } + } + + if (ret) + rr->err = ret; + + return ret; } /* @@ -461,12 +500,12 @@ void mon_event_count(void *info) * throttle MSRs already have low percentage values. To avoid * unnecessarily restricting such rdtgroups, we also increase the bandwidth. */ -static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) +static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm) { u32 closid, rmid, cur_msr_val, new_msr_val; struct mbm_state *pmbm_data, *cmbm_data; struct rdt_resource *r_mba; - struct rdt_domain *dom_mba; + struct rdt_ctrl_domain *dom_mba; u32 cur_bw, user_bw, idx; struct list_head *head; struct rdtgroup *entry; @@ -481,7 +520,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) idx = resctrl_arch_rmid_idx_encode(closid, rmid); pmbm_data = &dom_mbm->mbm_local[idx]; - dom_mba = resctrl_get_domain_from_cpu(smp_processor_id(), r_mba); + dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba); if (!dom_mba) { pr_warn_once("Failure to get domain for MBA update\n"); return; @@ -527,12 +566,11 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val); } -static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, +static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d, u32 closid, u32 rmid) { - struct rmid_read rr; + struct rmid_read rr = {0}; - rr.first = false; rr.r = r; rr.d = d; @@ -585,17 +623,17 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, void cqm_handle_limbo(struct work_struct *work) { unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL); - struct rdt_domain *d; + struct rdt_mon_domain *d; cpus_read_lock(); mutex_lock(&rdtgroup_mutex); - d = container_of(work, struct rdt_domain, cqm_limbo.work); + d = container_of(work, struct rdt_mon_domain, cqm_limbo.work); __check_limbo(d, false); if (has_busy_rmid(d)) { - d->cqm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask, + d->cqm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask, RESCTRL_PICK_ANY_CPU); schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo, delay); @@ -613,20 +651,20 @@ void cqm_handle_limbo(struct work_struct *work) * @exclude_cpu: Which CPU the handler should not run on, * RESCTRL_PICK_ANY_CPU to pick any CPU. */ -void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms, +void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, int exclude_cpu) { unsigned long delay = msecs_to_jiffies(delay_ms); int cpu; - cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu); + cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu); dom->cqm_work_cpu = cpu; if (cpu < nr_cpu_ids) schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay); } -bool is_rdt_domain_valid(struct rdt_resource *r, struct rdt_domain *d) +bool is_rdt_domain_valid(struct rdt_resource *r, struct rdt_mon_domain *d) { int i; @@ -640,9 +678,9 @@ void mbm_handle_overflow(struct work_struct *work) { unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL); struct rdtgroup *prgrp, *crgrp; + struct rdt_mon_domain *d; struct list_head *head; struct rdt_resource *r; - struct rdt_domain *d; cpus_read_lock(); mutex_lock(&rdtgroup_mutex); @@ -655,7 +693,7 @@ void mbm_handle_overflow(struct work_struct *work) goto out_unlock; r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - d = container_of(work, struct rdt_domain, mbm_over.work); + d = container_of(work, struct rdt_mon_domain, mbm_over.work); if (!is_rdt_domain_valid(r, d)) goto out_unlock; @@ -675,7 +713,7 @@ void mbm_handle_overflow(struct work_struct *work) * Re-check for housekeeping CPUs. This allows the overflow handler to * move off a nohz_full CPU quickly. */ - d->mbm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask, + d->mbm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask, RESCTRL_PICK_ANY_CPU); schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay); @@ -692,7 +730,7 @@ void mbm_handle_overflow(struct work_struct *work) * @exclude_cpu: Which CPU the handler should not run on, * RESCTRL_PICK_ANY_CPU to pick any CPU. */ -void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms, +void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms, int exclude_cpu) { unsigned long delay = msecs_to_jiffies(delay_ms); @@ -704,7 +742,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms, */ if (!resctrl_mounted || !resctrl_arch_mon_capable()) return; - cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu); + cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu); dom->mbm_work_cpu = cpu; if (cpu < nr_cpu_ids) diff --git a/fs/resctrl/psuedo_lock.c b/fs/resctrl/psuedo_lock.c index 077c2abb6edd90cad1e1e237dd4c1861cf1dc025..6d5291c1b1212317dcee53a793c4c8ed6e6428f8 100644 --- a/fs/resctrl/psuedo_lock.c +++ b/fs/resctrl/psuedo_lock.c @@ -155,7 +155,7 @@ static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr) int cpu; int ret; - for_each_cpu(cpu, &plr->d->cpu_mask) { + for_each_cpu(cpu, &plr->d->hdr.cpu_mask) { pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL); if (!pm_req) { rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n"); @@ -226,12 +226,15 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr) */ static int pseudo_lock_region_init(struct pseudo_lock_region *plr) { - struct cpu_cacheinfo *ci; + enum resctrl_scope scope = plr->s->res->ctrl_scope; + struct cacheinfo *ci; int ret; - int i; + + if (WARN_ON_ONCE(scope != RESCTRL_L2_CACHE && scope != RESCTRL_L3_CACHE)) + return -ENODEV; /* Pick the first cpu we find that is associated with the cache. */ - plr->cpu = cpumask_first(&plr->d->cpu_mask); + plr->cpu = cpumask_first(&plr->d->hdr.cpu_mask); if (!cpu_online(plr->cpu)) { rdt_last_cmd_printf("CPU %u associated with cache not online\n", @@ -240,15 +243,11 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr) goto out_region; } - ci = get_cpu_cacheinfo(plr->cpu); - - plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm); - - for (i = 0; i < ci->num_leaves; i++) { - if (ci->info_list[i].level == plr->s->res->cache_level) { - plr->line_size = ci->info_list[i].coherency_line_size; - return 0; - } + ci = get_cpu_cacheinfo_level(plr->cpu, scope); + if (ci) { + plr->line_size = ci->coherency_line_size; + plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm); + return 0; } ret = -1; @@ -614,7 +613,7 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) * Return: true if @cbm overlaps with pseudo-locked region on @d, false * otherwise. */ -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm) +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm) { unsigned int cbm_len; unsigned long cbm_b; @@ -641,12 +640,12 @@ bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm * if it is not possible to test due to memory allocation issue, * false otherwise. */ -bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) +bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d) { + struct rdt_ctrl_domain *d_i; cpumask_var_t cpu_with_psl; enum resctrl_res_level i; struct rdt_resource *r; - struct rdt_domain *d_i; bool ret = false; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -664,10 +663,10 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) if (!r->alloc_capable) continue; - list_for_each_entry(d_i, &r->domains, list) { + list_for_each_entry(d_i, &r->ctrl_domains, hdr.list) { if (d_i->plr) cpumask_or(cpu_with_psl, cpu_with_psl, - &d_i->cpu_mask); + &d_i->hdr.cpu_mask); } } @@ -675,7 +674,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) * Next test if new pseudo-locked region would intersect with * existing region. */ - if (cpumask_intersects(&d->cpu_mask, cpu_with_psl)) + if (cpumask_intersects(&d->hdr.cpu_mask, cpu_with_psl)) ret = true; free_cpumask_var(cpu_with_psl); @@ -715,7 +714,7 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel) } plr->thread_done = 0; - cpu = cpumask_first(&plr->d->cpu_mask); + cpu = cpumask_first(&plr->d->hdr.cpu_mask); if (!cpu_online(cpu)) { ret = -ENODEV; goto out; @@ -1046,7 +1045,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma) * may be scheduled elsewhere and invalidate entries in the * pseudo-locked region. */ - if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) { + if (!cpumask_subset(current->cpus_ptr, &plr->d->hdr.cpu_mask)) { mutex_unlock(&rdtgroup_mutex); return -EINVAL; } diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index aba699474e8655139df557484b4e47f20e98484b..c81d1966eb0674643a9cb8273974506a21ae5f48 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -12,7 +12,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include #include #include #include @@ -94,9 +93,9 @@ void rdt_last_cmd_printf(const char *fmt, ...) void rdt_staged_configs_clear(void) { + struct rdt_ctrl_domain *dom; enum resctrl_res_level i; struct rdt_resource *r; - struct rdt_domain *dom; lockdep_assert_held(&rdtgroup_mutex); @@ -105,7 +104,7 @@ void rdt_staged_configs_clear(void) if (!r->alloc_capable) continue; - list_for_each_entry(dom, &r->domains, list) + list_for_each_entry(dom, &r->ctrl_domains, hdr.list) memset(dom->staged_config, 0, sizeof(dom->staged_config)); } } @@ -369,7 +368,7 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of, rdt_last_cmd_puts("Cache domain offline\n"); ret = -ENODEV; } else { - mask = &rdtgrp->plr->d->cpu_mask; + mask = &rdtgrp->plr->d->hdr.cpu_mask; seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n", cpumask_pr_args(mask)); @@ -909,11 +908,11 @@ static int rdtgroup_mbm_mode_show(struct kernfs_open_file *of, static void rdtgroup_mbm_cntr_reset(struct rdt_resource *r) { struct rdtgroup *prgrp, *crgrp; - struct rdt_domain *dom; + struct rdt_mon_domain *dom; mbm_cntrs_init(r); - list_for_each_entry(dom, &r->domains, list) + list_for_each_entry(dom, &r->mon_domains, hdr.list) bitmap_zero(dom->mbm_cntr_map, r->mon.num_mbm_cntrs); /* Reset the cntr_id's for all the monitor groups */ @@ -976,7 +975,7 @@ static int rdtgroup_num_mbm_cntrs_show(struct kernfs_open_file *of, } static char *rdtgroup_mon_state_to_str(struct rdtgroup *rdtgrp, - struct rdt_domain *d, char *str) + struct rdt_mon_domain *d, char *str) { char *tmp = str; int index; @@ -1007,7 +1006,7 @@ static int rdtgroup_mbm_control_show(struct kernfs_open_file *of, struct seq_file *s, void *v) { struct rdt_resource *r = of->kn->parent->priv; - struct rdt_domain *dom; + struct rdt_mon_domain *dom; struct rdtgroup *rdtg; char str[10]; @@ -1023,8 +1022,8 @@ static int rdtgroup_mbm_control_show(struct kernfs_open_file *of, seq_printf(s, "%s//", rdtg->kn->name); - list_for_each_entry(dom, &r->domains, list) - seq_printf(s, "%d=%s;", dom->id, + list_for_each_entry(dom, &r->mon_domains, hdr.list) + seq_printf(s, "%d=%s;", dom->hdr.id, rdtgroup_mon_state_to_str(rdtg, dom, str)); seq_putc(s, '\n'); @@ -1032,8 +1031,8 @@ static int rdtgroup_mbm_control_show(struct kernfs_open_file *of, mon.crdtgrp_list) { seq_printf(s, "%s/%s/", rdtg->kn->name, crg->kn->name); - list_for_each_entry(dom, &r->domains, list) - seq_printf(s, "%d=%s;", dom->id, + list_for_each_entry(dom, &r->mon_domains, hdr.list) + seq_printf(s, "%d=%s;", dom->hdr.id, rdtgroup_mon_state_to_str(crg, dom, str)); seq_putc(s, '\n'); } @@ -1050,7 +1049,7 @@ static int rdtgroup_mbm_control_show(struct kernfs_open_file *of, * the assignment else just update the assign state */ static int rdtgroup_assign_update(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid, - struct rdt_domain *d) + struct rdt_mon_domain *d) { int ret, index; @@ -1086,7 +1085,7 @@ static int rdtgroup_assign_update(struct rdtgroup *rdtgrp, enum resctrl_event_id * update the unassign state */ static int rdtgroup_unassign_update(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid, - struct rdt_domain *d) + struct rdt_mon_domain *d) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); int ret = 0, index; @@ -1168,7 +1167,7 @@ static int rdtgroup_process_flags(struct rdt_resource *r, { int op, mon_state, assign_state, unassign_state; char *dom_str, *id_str, *op_str; - struct rdt_domain *d; + struct rdt_mon_domain *d; struct rdtgroup *rdtgrp; unsigned long dom_id; int ret, found = 0; @@ -1208,8 +1207,8 @@ static int rdtgroup_process_flags(struct rdt_resource *r, } /* Verify if the dom_id is valid */ - list_for_each_entry(d, &r->domains, list) { - if (d->id == dom_id) { + list_for_each_entry(d, &r->mon_domains, hdr.list) { + if (d->hdr.id == dom_id) { found = 1; break; } @@ -1521,7 +1520,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, unsigned long sw_shareable = 0, hw_shareable = 0; unsigned long exclusive = 0, pseudo_locked = 0; struct rdt_resource *r = s->res; - struct rdt_domain *dom; + struct rdt_ctrl_domain *dom; int i, hwb, swb, excl, psl; enum rdtgrp_mode mode; bool sep = false; @@ -1530,12 +1529,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, cpus_read_lock(); mutex_lock(&rdtgroup_mutex); hw_shareable = r->cache.shareable_bits; - list_for_each_entry(dom, &r->domains, list) { + list_for_each_entry(dom, &r->ctrl_domains, hdr.list) { if (sep) seq_putc(seq, ';'); sw_shareable = 0; exclusive = 0; - seq_printf(seq, "%d=", dom->id); + seq_printf(seq, "%d=", dom->hdr.id); for (i = 0; i < closids_supported(); i++) { if (!closid_allocated(i)) continue; @@ -1752,7 +1751,7 @@ static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of, * * Return: false if CBM does not overlap, true if it does. */ -static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, +static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_ctrl_domain *d, unsigned long cbm, int closid, enum resctrl_conf_type type, bool exclusive) { @@ -1807,7 +1806,7 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d * * Return: true if CBM overlap detected, false if there is no overlap */ -bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, +bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d, unsigned long cbm, int closid, bool exclusive) { enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); @@ -1838,10 +1837,10 @@ bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) { int closid = rdtgrp->closid; + struct rdt_ctrl_domain *d; struct resctrl_schema *s; struct rdt_resource *r; bool has_cache = false; - struct rdt_domain *d; u32 ctrl; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -1852,7 +1851,7 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA) continue; has_cache = true; - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { ctrl = resctrl_arch_get_config(r, d, closid, s->conf_type); if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) { @@ -1958,20 +1957,19 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, * bitmap functions work correctly. */ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, - struct rdt_domain *d, unsigned long cbm) + struct rdt_ctrl_domain *d, unsigned long cbm) { - struct cpu_cacheinfo *ci; unsigned int size = 0; - int num_b, i; + struct cacheinfo *ci; + int num_b; + + if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE)) + return size; num_b = bitmap_weight(&cbm, r->cache.cbm_len); - ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask)); - for (i = 0; i < ci->num_leaves; i++) { - if (ci->info_list[i].level == r->cache_level) { - size = ci->info_list[i].size / r->cache.cbm_len * num_b; - break; - } - } + ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), r->ctrl_scope); + if (ci) + size = ci->size / r->cache.cbm_len * num_b; return size; } @@ -1987,9 +1985,9 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, { struct resctrl_schema *schema; enum resctrl_conf_type type; + struct rdt_ctrl_domain *d; struct rdtgroup *rdtgrp; struct rdt_resource *r; - struct rdt_domain *d; unsigned int size; int ret = 0; u32 closid; @@ -2013,7 +2011,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res, rdtgrp->plr->d, rdtgrp->plr->cbm); - seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size); + seq_printf(s, "%d=%u\n", rdtgrp->plr->d->hdr.id, size); } goto out; } @@ -2025,7 +2023,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, type = schema->conf_type; sep = false; seq_printf(s, "%*s:", max_name_width, schema->name); - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { if (sep) seq_putc(s, ';'); if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { @@ -2043,7 +2041,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, else size = rdtgroup_cbm_to_size(r, d, ctrl); } - seq_printf(s, "%d=%u", d->id, size); + seq_printf(s, "%d=%u", d->hdr.id, size); sep = true; } seq_putc(s, '\n'); @@ -2057,14 +2055,14 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid) { - struct rdt_domain *dom; + struct rdt_mon_domain *dom;; bool sep = false; u32 val; cpus_read_lock(); mutex_lock(&rdtgroup_mutex); - list_for_each_entry(dom, &r->domains, list) { + list_for_each_entry(dom, &r->mon_domains, hdr.list) { if (sep) seq_puts(s, ";"); @@ -2072,7 +2070,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid if (val == INVALID_CONFIG_VALUE) break; - seq_printf(s, "%d=0x%02x", dom->id, val); + seq_printf(s, "%d=0x%02x", dom->hdr.id, val); sep = true; } seq_puts(s, "\n"); @@ -2104,7 +2102,7 @@ static int mbm_local_bytes_config_show(struct kernfs_open_file *of, } static int mbm_config_write_domain(struct rdt_resource *r, - struct rdt_domain *d, u32 evtid, u32 val) + struct rdt_mon_domain *d, u32 evtid, u32 val) { struct resctrl_mon_config_info mon_info = {0}; u32 config_val; @@ -2127,7 +2125,7 @@ static int mbm_config_write_domain(struct rdt_resource *r, * are scoped at the domain level. Writing any of these MSRs * on one CPU is observed by all the CPUs in the domain. */ - smp_call_function_any(&d->cpu_mask, resctrl_arch_event_config_set, + smp_call_function_any(&d->hdr.cpu_mask, resctrl_arch_event_config_set, &mon_info, 1); if (mon_info.err) { rdt_last_cmd_puts("Invalid event configuration\n"); @@ -2152,7 +2150,7 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) { char *dom_str = NULL, *id_str; unsigned long dom_id, val; - struct rdt_domain *d; + struct rdt_mon_domain *d; int err; /* Walking r->domains, ensure it can't race with cpuhp */ @@ -2183,8 +2181,8 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) return -EINVAL; } - list_for_each_entry(d, &r->domains, list) { - if (d->id == dom_id) { + list_for_each_entry(d, &r->mon_domains, hdr.list) { + if (d->hdr.id == dom_id) { err = mbm_config_write_domain(r, d, evtid, val); if (err) return err; @@ -2281,7 +2279,7 @@ int rdtgroup_alloc_cntr(struct rdtgroup *rdtgrp, int index) int rdtgroup_assign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - struct rdt_domain *d; + struct rdt_mon_domain *d; int index; index = mon_event_config_index_get(evtid); @@ -2291,7 +2289,7 @@ int rdtgroup_assign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) if (rdtgroup_alloc_cntr(rdtgrp, index)) return -EINVAL; - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->mon_domains, hdr.list) { resctrl_arch_assign_cntr(d, evtid, rdtgrp->mon.rmid, rdtgrp->mon.cntr_id[index], rdtgrp->closid, true); @@ -2303,9 +2301,9 @@ int rdtgroup_assign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) static int rdtgroup_mbm_cntr_test(struct rdt_resource *r, u32 cntr_id) { - struct rdt_domain *d; + struct rdt_mon_domain *d; - list_for_each_entry(d, &r->domains, list) + list_for_each_entry(d, &r->mon_domains, hdr.list) if (test_bit(cntr_id, d->mbm_cntr_map)) return 1; @@ -2330,7 +2328,7 @@ void rdtgroup_free_cntr(struct rdt_resource *r, struct rdtgroup *rdtgrp, int rdtgroup_unassign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - struct rdt_domain *d; + struct rdt_mon_domain *d; int index; index = mon_event_config_index_get(evtid); @@ -2338,7 +2336,7 @@ int rdtgroup_unassign_cntr(struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) return -EINVAL; if (rdtgrp->mon.cntr_id[index] != MON_CNTR_UNSET) { - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->mon_domains, hdr.list) { resctrl_arch_assign_cntr(d, evtid, rdtgrp->mon.rmid, rdtgrp->mon.cntr_id[index], rdtgrp->closid, false); @@ -2818,10 +2816,10 @@ static inline bool is_mba_linear(void) return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear; } -static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d) +static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d) { u32 num_closid = resctrl_arch_get_num_closid(r); - int cpu = cpumask_any(&d->cpu_mask); + int cpu = cpumask_any(&d->hdr.cpu_mask); int i; d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val), @@ -2836,7 +2834,7 @@ static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d) } static void mba_sc_domain_destroy(struct rdt_resource *r, - struct rdt_domain *d) + struct rdt_ctrl_domain *d) { kfree(d->mbps_val); d->mbps_val = NULL; @@ -2844,14 +2842,18 @@ static void mba_sc_domain_destroy(struct rdt_resource *r, /* * MBA software controller is supported only if - * MBM is supported and MBA is in linear scale. + * MBM is supported and MBA is in linear scale, + * and the MBM monitor scope is the same as MBA + * control scope. */ static bool supports_mba_mbps(void) { + struct rdt_resource *rmbm = resctrl_arch_get_resource(RDT_RESOURCE_L3); struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); return (resctrl_arch_is_mbm_local_enabled() && - r->alloc_capable && is_mba_linear()); + r->alloc_capable && is_mba_linear() && + r->ctrl_scope == rmbm->mon_scope); } /* @@ -2862,7 +2864,7 @@ static int set_mba_sc(bool mba_sc) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA); u32 num_closid = resctrl_arch_get_num_closid(r); - struct rdt_domain *d; + struct rdt_ctrl_domain *d; int i; if (!supports_mba_mbps() || mba_sc == is_mba_sc(r)) @@ -2870,7 +2872,7 @@ static int set_mba_sc(bool mba_sc) r->membw.mba_sc = mba_sc; - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { for (i = 0; i < num_closid; i++) d->mbps_val[i] = MBA_MAX_MBPS; } @@ -3160,7 +3162,7 @@ static int rdt_get_tree(struct fs_context *fc) struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); struct rdt_fs_context *ctx = rdt_fc2context(fc); unsigned long flags = RFTYPE_CTRL_BASE; - struct rdt_domain *dom; + struct rdt_mon_domain *dom; int ret; cpus_read_lock(); @@ -3237,7 +3239,7 @@ static int rdt_get_tree(struct fs_context *fc) resctrl_mounted = true; if (resctrl_is_mbm_enabled()) { - list_for_each_entry(dom, &l3->domains, list) + list_for_each_entry(dom, &l3->mon_domains, hdr.list) mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL, RESCTRL_PICK_ANY_CPU); } @@ -3290,6 +3292,7 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct rdt_fs_context *ctx = rdt_fc2context(fc); struct fs_parse_result result; + const char *msg; int opt; opt = fs_parse(fc, rdt_fs_parameters, param, &result); @@ -3304,8 +3307,9 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->enable_cdpl2 = true; return 0; case Opt_mba_mbps: + msg = "mba_MBps requires local MBM and linear scale MBA at L3 scope"; if (!supports_mba_mbps()) - return -EINVAL; + return invalfc(fc, msg); ctx->enable_mba_mbps = true; return 0; case Opt_hwdrc_mb: @@ -3462,7 +3466,7 @@ static void rmdir_all_sub(void) static void rdt_kill_sb(struct super_block *sb) { struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); - struct rdt_domain *d; + struct rdt_mon_domain *d; cpus_read_lock(); mutex_lock(&rdtgroup_mutex); @@ -3478,7 +3482,7 @@ static void rdt_kill_sb(struct super_block *sb) * When resctrl is umounted, forcefully cancel delayed works since the * new mount option may be changed. */ - list_for_each_entry(d, &l3->domains, list) { + list_for_each_entry(d, &l3->mon_domains, hdr.list) { if (resctrl_is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) { @@ -3531,62 +3535,126 @@ static int mon_addfile(struct kernfs_node *parent_kn, const char *name, return ret; } +static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subname) +{ + struct kernfs_node *kn; + + kn = kernfs_find_and_get(pkn, name); + if (!kn) + return; + kernfs_put(kn); + + if (kn->dir.subdirs <= 1) + kernfs_remove(kn); + else + kernfs_remove_by_name(kn, subname); +} + /* * Remove all subdirectories of mon_data of ctrl_mon groups - * and monitor groups with given domain id. + * and monitor groups for the given domain. + * Remove files and directories containing "sum" of domain data + * when last domain being summed is removed. */ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, - unsigned int dom_id) + struct rdt_mon_domain *d) { struct rdtgroup *prgrp, *crgrp; + char subname[32]; + bool snc_mode; char name[32]; + snc_mode = r->mon_scope == RESCTRL_L3_NODE; + sprintf(name, "mon_%s_%02lu", r->name, snc_mode ? d->ci_id : d->hdr.id); + if (snc_mode) + sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id); + list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { - sprintf(name, "mon_%s_%02d", r->name, dom_id); - kernfs_remove_by_name(prgrp->mon.mon_data_kn, name); + mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname); list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list) - kernfs_remove_by_name(crgrp->mon.mon_data_kn, name); + mon_rmdir_one_subdir(crgrp->mon.mon_data_kn, name, subname); } } -static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, - struct rdt_domain *d, - struct rdt_resource *r, struct rdtgroup *prgrp) +static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, + struct rdt_resource *r, struct rdtgroup *prgrp, + bool do_sum) { + struct rmid_read rr = {0}; union mon_data_bits priv; - struct kernfs_node *kn; struct mon_evt *mevt; - struct rmid_read rr; - char name[32]; int ret; - sprintf(name, "mon_%s_%02d", r->name, d->id); - /* create the directory */ - kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); - if (IS_ERR(kn)) - return PTR_ERR(kn); - - ret = rdtgroup_kn_set_ugid(kn); - if (ret) - goto out_destroy; - - if (WARN_ON(list_empty(&r->mon.evt_list))) { - ret = -EPERM; - goto out_destroy; - } + if (WARN_ON(list_empty(&r->mon.evt_list))) + return -EPERM; priv.u.rid = r->rid; - priv.u.domid = d->id; + priv.u.domid = do_sum ? d->ci_id : d->hdr.id; + priv.u.sum = do_sum; list_for_each_entry(mevt, &r->mon.evt_list, list) { priv.u.evtid = mevt->evtid; ret = mon_addfile(kn, mevt->name, priv.priv); if (ret) + return ret; + + if (!do_sum && resctrl_is_mbm_event(mevt->evtid)) + mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true); + } + + return 0; +} + +static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, + struct rdt_mon_domain *d, + struct rdt_resource *r, struct rdtgroup *prgrp) +{ + struct kernfs_node *kn, *ckn; + char name[32]; + bool snc_mode; + int ret = 0; + + lockdep_assert_held(&rdtgroup_mutex); + + snc_mode = r->mon_scope == RESCTRL_L3_NODE; + sprintf(name, "mon_%s_%02lu", r->name, snc_mode ? d->ci_id : d->hdr.id); + kn = kernfs_find_and_get(parent_kn, name); + if (kn) { + /* + * rdtgroup_mutex will prevent this directory from being + * removed. No need to keep this hold. + */ + kernfs_put(kn); + } else { + kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); + if (IS_ERR(kn)) + return PTR_ERR(kn); + + ret = rdtgroup_kn_set_ugid(kn); + if (ret) + goto out_destroy; + ret = mon_add_all_files(kn, d, r, prgrp, snc_mode); + if (ret) + goto out_destroy; + } + + if (snc_mode) { + sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id); + ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp); + if (IS_ERR(ckn)) { + ret = -EINVAL; goto out_destroy; + } - if (resctrl_is_mbm_event(mevt->evtid)) - mon_event_read(&rr, r, d, prgrp, mevt->evtid, true); + ret = rdtgroup_kn_set_ugid(ckn); + if (ret) + goto out_destroy; + + ret = mon_add_all_files(ckn, d, r, prgrp, false); + if (ret) + goto out_destroy; } + kernfs_activate(kn); return 0; @@ -3600,7 +3668,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, * and "monitor" groups with given domain id. */ static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, - struct rdt_domain *d) + struct rdt_mon_domain *d) { struct kernfs_node *parent_kn; struct rdtgroup *prgrp, *crgrp; @@ -3622,13 +3690,13 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn, struct rdt_resource *r, struct rdtgroup *prgrp) { - struct rdt_domain *dom; + struct rdt_mon_domain *dom; int ret; /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); - list_for_each_entry(dom, &r->domains, list) { + list_for_each_entry(dom, &r->mon_domains, hdr.list) { ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp); if (ret) return ret; @@ -3732,7 +3800,7 @@ static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r) * Set the RDT domain up to start off with all usable allocations. That is, * all shareable and unused bits. All-zero CBM is invalid. */ -static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s, +static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schema *s, u32 closid) { enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); @@ -3792,7 +3860,7 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s, */ tmp_cbm = cfg->new_ctrl; if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) { - rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id); + rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->hdr.id); return -ENOSPC; } cfg->have_new_ctrl = true; @@ -3812,10 +3880,10 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s, */ static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) { - struct rdt_domain *d; + struct rdt_ctrl_domain *d; int ret; - list_for_each_entry(d, &s->res->domains, list) { + list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) { ret = __init_one_rdt_domain(d, s, closid); if (ret < 0) return ret; @@ -3828,9 +3896,9 @@ static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid) { struct resctrl_staged_config *cfg; - struct rdt_domain *d; + struct rdt_ctrl_domain *d; - list_for_each_entry(d, &r->domains, list) { + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { if (is_mba_sc(r)) { d->mbps_val[closid] = MBA_MAX_MBPS; continue; @@ -4478,7 +4546,7 @@ static void rdtgroup_setup_default(void) mutex_unlock(&rdtgroup_mutex); } -static void domain_destroy_mon_state(struct rdt_domain *d) +static void domain_destroy_mon_state(struct rdt_mon_domain *d) { bitmap_free(d->mbm_cntr_map); bitmap_free(d->rmid_busy_llc); @@ -4486,22 +4554,26 @@ static void domain_destroy_mon_state(struct rdt_domain *d) kfree(d->mbm_local); } -void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d) +void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d) { mutex_lock(&rdtgroup_mutex); if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) mba_sc_domain_destroy(r, d); - if (!r->mon_capable) - goto out_unlock; + mutex_unlock(&rdtgroup_mutex); +} + +void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d) +{ + mutex_lock(&rdtgroup_mutex); /* * If resctrl is mounted, remove all the * per domain monitor data directories. */ if (resctrl_mounted && resctrl_arch_mon_capable()) - rmdir_mondata_subdir_allrdtgrp(r, d->id); + rmdir_mondata_subdir_allrdtgrp(r, d); if (resctrl_is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); @@ -4520,11 +4592,10 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d) domain_destroy_mon_state(d); -out_unlock: mutex_unlock(&rdtgroup_mutex); } -static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) +static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d) { u32 idx_limit = resctrl_arch_system_num_rmid_idx(); size_t tsize; @@ -4564,20 +4635,26 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) return 0; } -int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d) +int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d) { int err = 0; mutex_lock(&rdtgroup_mutex); - if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) { + if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) /* RDT_RESOURCE_MBA is never mon_capable */ err = mba_sc_domain_allocate(r, d); - goto out_unlock; - } - if (!r->mon_capable) - goto out_unlock; + mutex_unlock(&rdtgroup_mutex); + + return err; +} + +int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d) +{ + int err = 0; + + mutex_lock(&rdtgroup_mutex); err = domain_setup_mon_state(r, d); if (err) @@ -4628,8 +4705,8 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) void resctrl_offline_cpu(unsigned int cpu) { struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3); + struct rdt_mon_domain *d; struct rdtgroup *rdtgrp; - struct rdt_domain *d; mutex_lock(&rdtgroup_mutex); list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { @@ -4642,7 +4719,7 @@ void resctrl_offline_cpu(unsigned int cpu) if (!l3->mon_capable) goto out_unlock; - d = resctrl_get_domain_from_cpu(cpu, l3); + d = get_mon_domain_from_cpu(cpu, l3); if (d) { if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) { cancel_delayed_work(&d->mbm_over); diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 251e8b393ec7282caf889438b6999fc74a297735..b11a4f0eb06f3cf670c1be6982f40372118cda5f 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -3,6 +3,7 @@ #define _LINUX_CACHEINFO_H #include +#include #include #include @@ -114,23 +115,37 @@ const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf); unsigned long cache_of_get_id(struct device_node *np); /* - * Get the id of the cache associated with @cpu at level @level. + * Get the cacheinfo structure for the cache associated with @cpu at + * level @level. * cpuhp lock must be held. */ -static inline unsigned long get_cpu_cacheinfo_id(int cpu, int level) +static inline struct cacheinfo *get_cpu_cacheinfo_level(int cpu, int level) { struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); int i; + lockdep_assert_cpus_held(); + for (i = 0; i < ci->num_leaves; i++) { if (ci->info_list[i].level == level) { if (ci->info_list[i].attributes & CACHE_ID) - return ci->info_list[i].id; - return ~0UL; + return &ci->info_list[i]; + return NULL; } } - return ~0UL; + return NULL; +} + +/* + * Get the id of the cache associated with @cpu at level @level. + * cpuhp lock must be held. + */ +static inline int get_cpu_cacheinfo_id(int cpu, int level) +{ + struct cacheinfo *ci = get_cpu_cacheinfo_level(cpu, level); + + return ci ? ci->id : -1; } /* diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 6b4f9f16968821702555ad6ab0d7f5e13ea5fd35..51f99eca8c6f5db9efa14b00d8608bd0157b682f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -18,6 +18,7 @@ #include #include #include +#include #include struct device; @@ -130,38 +131,6 @@ static inline int add_cpu(unsigned int cpu) { return 0;} #endif /* CONFIG_SMP */ extern struct bus_type cpu_subsys; -extern int lockdep_is_cpus_held(void); - -#ifdef CONFIG_HOTPLUG_CPU -extern void cpus_write_lock(void); -extern void cpus_write_unlock(void); -extern void cpus_read_lock(void); -extern void cpus_read_unlock(void); -extern int cpus_read_trylock(void); -extern void lockdep_assert_cpus_held(void); -extern void cpu_hotplug_disable(void); -extern void cpu_hotplug_enable(void); -void clear_tasks_mm_cpumask(int cpu); -int remove_cpu(unsigned int cpu); -int cpu_device_down(struct device *dev); -extern void smp_shutdown_nonboot_cpus(unsigned int primary_cpu); - -#else /* CONFIG_HOTPLUG_CPU */ - -static inline void cpus_write_lock(void) { } -static inline void cpus_write_unlock(void) { } -static inline void cpus_read_lock(void) { } -static inline void cpus_read_unlock(void) { } -static inline int cpus_read_trylock(void) { return true; } -static inline void lockdep_assert_cpus_held(void) { } -static inline void cpu_hotplug_disable(void) { } -static inline void cpu_hotplug_enable(void) { } -static inline int remove_cpu(unsigned int cpu) { return -EPERM; } -static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } -#endif /* !CONFIG_HOTPLUG_CPU */ - -DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock()) - #ifdef CONFIG_PM_SLEEP_SMP extern int freeze_secondary_cpus(int primary); extern void thaw_secondary_cpus(void); diff --git a/include/linux/cpuhplock.h b/include/linux/cpuhplock.h new file mode 100644 index 0000000000000000000000000000000000000000..431560bbd0453d7b3fce9726e70dcb6057ddcf6b --- /dev/null +++ b/include/linux/cpuhplock.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * include/linux/cpuhplock.h - CPU hotplug locking + * + * Locking functions for CPU hotplug. + */ +#ifndef _LINUX_CPUHPLOCK_H_ +#define _LINUX_CPUHPLOCK_H_ + +#include +#include + +struct device; + +extern int lockdep_is_cpus_held(void); + +#ifdef CONFIG_HOTPLUG_CPU +void cpus_write_lock(void); +void cpus_write_unlock(void); +void cpus_read_lock(void); +void cpus_read_unlock(void); +int cpus_read_trylock(void); +void lockdep_assert_cpus_held(void); +void cpu_hotplug_disable(void); +void cpu_hotplug_enable(void); +void clear_tasks_mm_cpumask(int cpu); +int remove_cpu(unsigned int cpu); +int cpu_device_down(struct device *dev); +void smp_shutdown_nonboot_cpus(unsigned int primary_cpu); + +#else /* CONFIG_HOTPLUG_CPU */ + +static inline void cpus_write_lock(void) { } +static inline void cpus_write_unlock(void) { } +static inline void cpus_read_lock(void) { } +static inline void cpus_read_unlock(void) { } +static inline int cpus_read_trylock(void) { return true; } +static inline void lockdep_assert_cpus_held(void) { } +static inline void cpu_hotplug_disable(void) { } +static inline void cpu_hotplug_enable(void) { } +static inline int remove_cpu(unsigned int cpu) { return -EPERM; } +static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } +#endif /* !CONFIG_HOTPLUG_CPU */ + +DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock()) + +#endif /* _LINUX_CPUHPLOCK_H_ */ diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 75dacdc47c8a2dcc6d5811e16e23a59fe5756b8c..f730983a7a95a577c606abc8d0770c20fec7e836 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -2,6 +2,7 @@ #ifndef _RESCTRL_H #define _RESCTRL_H +#include #include #include #include @@ -67,7 +68,7 @@ extern unsigned int resctrl_rmid_realloc_threshold; struct pseudo_lock_region { struct resctrl_schema *s; u32 closid; - struct rdt_domain *d; + struct rdt_ctrl_domain *d; u32 cbm; wait_queue_head_t lock_thread_wq; int thread_done; @@ -90,11 +91,45 @@ struct resctrl_staged_config { bool have_new_ctrl; }; +enum resctrl_domain_type { + RESCTRL_CTRL_DOMAIN, + RESCTRL_MON_DOMAIN, +}; + /** - * struct rdt_domain - group of CPUs sharing a resctrl resource + * struct rdt_domain_hdr - common header for different domain types * @list: all instances of this resource * @id: unique id for this instance + * @type: type of this instance * @cpu_mask: which CPUs share this resource + */ +struct rdt_domain_hdr { + struct list_head list; + int id; + enum resctrl_domain_type type; + struct cpumask cpu_mask; +}; + +/** + * struct rdt_ctrl_domain - group of CPUs sharing a resctrl control resource + * @hdr: common header for different domain types + * @plr: pseudo-locked region (if any) associated with domain + * @staged_config: parsed configuration to be applied + * @mbps_val: When mba_sc is enabled, this holds the array of user + * specified control values for mba_sc in MBps, indexed + * by closid + */ +struct rdt_ctrl_domain { + struct rdt_domain_hdr hdr; + struct pseudo_lock_region *plr; + struct resctrl_staged_config staged_config[CDP_NUM_TYPES]; + u32 *mbps_val; +}; + +/** + * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource + * @hdr: common header for different domain types + * @ci_id: cache info id for this domain * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold * @mbm_total: saved state for MBM total bandwidth * @mbm_local: saved state for MBM local bandwidth @@ -103,16 +138,10 @@ struct resctrl_staged_config { * @mbm_work_cpu: worker CPU for MBM h/w counters * @cqm_work_cpu: worker CPU for CQM h/w counters * @mbm_cntr_map: bitmap to track domain counter assignment - * @plr: pseudo-locked region (if any) associated with domain - * @staged_config: parsed configuration to be applied - * @mbps_val: When mba_sc is enabled, this holds the array of user - * specified control values for mba_sc in MBps, indexed - * by closid */ -struct rdt_domain { - struct list_head list; - int id; - struct cpumask cpu_mask; +struct rdt_mon_domain { + struct rdt_domain_hdr hdr; + unsigned long ci_id; unsigned long *rmid_busy_llc; struct mbm_state *mbm_total; struct mbm_state *mbm_local; @@ -121,9 +150,6 @@ struct rdt_domain { int mbm_work_cpu; int cqm_work_cpu; unsigned long *mbm_cntr_map; - struct pseudo_lock_region *plr; - struct resctrl_staged_config staged_config[CDP_NUM_TYPES]; - u32 *mbps_val; }; /** @@ -197,15 +223,23 @@ struct resctrl_mon { struct list_head evt_list; }; +enum resctrl_scope { + RESCTRL_L2_CACHE = 2, + RESCTRL_L3_CACHE = 3, + RESCTRL_L3_NODE, +}; + /** * struct rdt_resource - attributes of a resctrl resource * @rid: The index of the resource * @alloc_capable: Is allocation available on this machine * @mon_capable: Is monitor feature available on this machine - * @cache_level: Which cache level defines scope of this resource + * @ctrl_scope: Scope of this resource for control functions + * @mon_scope: Scope of this resource for monitor functions * @cache: Cache allocation related data * @membw: If the component has bandwidth controls, their properties. - * @domains: RCU list of all domains for this resource + * @ctrl_domains: RCU list of all control domains for this resource + * @mon_domains: RCU list of all monitor domains for this resource * @name: Name to use in "schemata" file. * @data_width: Character width of data when displaying * @default_ctrl: Specifies default cache cbm or memory B/W percent. @@ -219,11 +253,13 @@ struct rdt_resource { int rid; bool alloc_capable; bool mon_capable; - int cache_level; + enum resctrl_scope ctrl_scope; + enum resctrl_scope mon_scope; struct resctrl_cache cache; struct resctrl_membw membw; struct resctrl_mon mon; - struct list_head domains; + struct list_head ctrl_domains; + struct list_head mon_domains; char *name; int data_width; u32 default_ctrl; @@ -231,7 +267,7 @@ struct rdt_resource { unsigned long fflags; unsigned int mbm_cfg_mask; bool cdp_capable; - struct rdt_domain *rdt_domain_list[NR_CPUS]; + struct rdt_mon_domain *rdt_domain_list[NR_CPUS]; }; /* @@ -268,7 +304,7 @@ struct resctrl_cpu_sync { struct resctrl_mon_config_info { struct rdt_resource *r; - struct rdt_domain *d; + struct rdt_mon_domain *d; u32 evtid; u32 mon_config; int err; @@ -314,6 +350,8 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); struct rdt_domain *resctrl_arch_find_domain(struct rdt_resource *r, int id); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); +struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id, + struct list_head **pos); bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); @@ -336,14 +374,28 @@ static inline u32 resctrl_get_config_index(u32 closid, * Caller must hold the cpuhp read lock to prevent the struct rdt_domain being * freed. */ -static inline struct rdt_domain * -resctrl_get_domain_from_cpu(int cpu, struct rdt_resource *r) +static inline struct rdt_ctrl_domain * +get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r) +{ + struct rdt_ctrl_domain *d; + + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { + /* Find the domain that contains this CPU */ + if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) + return d; + } + + return NULL; +} + +static inline struct rdt_mon_domain * +get_mon_domain_from_cpu(int cpu, struct rdt_resource *r) { - struct rdt_domain *d; + struct rdt_mon_domain *d; - list_for_each_entry_rcu(d, &r->domains, list) { + list_for_each_entry(d, &r->mon_domains, hdr.list) { /* Find the domain that contains this CPU */ - if (cpumask_test_cpu(cpu, &d->cpu_mask)) + if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask)) return d; } @@ -354,13 +406,15 @@ resctrl_get_domain_from_cpu(int cpu, struct rdt_resource *r) * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. */ -int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, +int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type t, u32 cfg_val); -u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, +u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type type); -int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d); -void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d); +int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d); +int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d); +void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d); +void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d); void resctrl_online_cpu(unsigned int cpu); void resctrl_offline_cpu(unsigned int cpu); @@ -389,7 +443,7 @@ void resctrl_offline_cpu(unsigned int cpu); * Return: * 0 on success, or -EIO, -EINVAL etc on error. */ -int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, u32 closid, u32 rmid, enum resctrl_event_id eventid, u64 *val, void *arch_mon_ctx); @@ -422,7 +476,7 @@ static inline void resctrl_arch_rmid_read_context_check(void) * * This can be called from any CPU. */ -void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, u32 closid, u32 rmid, enum resctrl_event_id eventid); @@ -435,7 +489,7 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, * * This can be called from any CPU. */ -void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d); +void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d); extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit;