diff options
Diffstat (limited to 'arch/x86/kernel/cpu/resctrl/core.c')
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/core.c | 297 |
1 files changed, 147 insertions, 150 deletions
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index bb1c3f5f60c8..83e40341583e 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -16,6 +16,7 @@ #define pr_fmt(fmt) "resctrl: " fmt +#include <linux/cpu.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/cacheinfo.h> @@ -25,8 +26,15 @@ #include <asm/resctrl.h> #include "internal.h" -/* Mutex to protect rdtgroup access. */ -DEFINE_MUTEX(rdtgroup_mutex); +/* + * rdt_domain structures are kfree()d when their last CPU goes offline, + * and allocated when the first CPU in a new domain comes online. + * The rdt_resource's domain list is updated when this happens. Readers of + * the domain list must either take cpus_read_lock(), or rely on an RCU + * read-side critical section, to avoid observing concurrent modification. + * All writers take this mutex: + */ +static DEFINE_MUTEX(domain_list_lock); /* * The cached resctrl_pqr_state is strictly per CPU and can never be @@ -66,9 +74,6 @@ struct rdt_hw_resource rdt_resources_all[] = { .rid = RDT_RESOURCE_L3, .name = "L3", .cache_level = 3, - .cache = { - .min_cbm_bits = 1, - }, .domains = domain_init(RDT_RESOURCE_L3), .parse_ctrlval = parse_cbm, .format_str = "%d=%0*x", @@ -83,9 +88,6 @@ struct rdt_hw_resource rdt_resources_all[] = { .rid = RDT_RESOURCE_L2, .name = "L2", .cache_level = 2, - .cache = { - .min_cbm_bits = 1, - }, .domains = domain_init(RDT_RESOURCE_L2), .parse_ctrlval = parse_cbm, .format_str = "%d=%0*x", @@ -106,6 +108,18 @@ struct rdt_hw_resource rdt_resources_all[] = { .fflags = RFTYPE_RES_MB, }, }, + [RDT_RESOURCE_SMBA] = + { + .r_resctrl = { + .rid = RDT_RESOURCE_SMBA, + .name = "SMBA", + .cache_level = 3, + .domains = domain_init(RDT_RESOURCE_SMBA), + .parse_ctrlval = parse_bw, + .format_str = "%d=%*u", + .fflags = RFTYPE_RES_MB, + }, + }, }; /* @@ -130,15 +144,15 @@ static inline void cache_alloc_hsw_probe(void) { struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3]; struct rdt_resource *r = &hw_res->r_resctrl; - u32 l, h, max_cbm = BIT_MASK(20) - 1; + u64 max_cbm = BIT_ULL_MASK(20) - 1, l3_cbm_0; - if (wrmsr_safe(MSR_IA32_L3_CBM_BASE, max_cbm, 0)) + if (wrmsrl_safe(MSR_IA32_L3_CBM_BASE, max_cbm)) return; - rdmsr(MSR_IA32_L3_CBM_BASE, l, h); + rdmsrl(MSR_IA32_L3_CBM_BASE, l3_cbm_0); /* If all the bits were set in MSR, return success */ - if (l != max_cbm) + if (l3_cbm_0 != max_cbm) return; hw_res->num_closid = 4; @@ -146,8 +160,8 @@ static inline void cache_alloc_hsw_probe(void) r->cache.cbm_len = 20; r->cache.shareable_bits = 0xc0000; r->cache.min_cbm_bits = 2; + r->cache.arch_has_sparse_bitmasks = false; r->alloc_capable = true; - r->alloc_enabled = true; rdt_alloc_capable = true; } @@ -157,6 +171,13 @@ bool is_mba_sc(struct rdt_resource *r) if (!r) return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc; + /* + * The software controller support is only applicable to MBA resource. + * Make sure to check for resource type. + */ + if (r->rid != RDT_RESOURCE_MBA) + return false; + return r->membw.mba_sc; } @@ -211,7 +232,6 @@ static bool __get_mem_config_intel(struct rdt_resource *r) thread_throttle_mode_init(); r->alloc_capable = true; - r->alloc_enabled = true; return true; } @@ -219,13 +239,17 @@ static bool __get_mem_config_intel(struct rdt_resource *r) static bool __rdt_get_mem_config_amd(struct rdt_resource *r) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - union cpuid_0x10_3_eax eax; - union cpuid_0x10_x_edx edx; - u32 ebx, ecx; + u32 eax, ebx, ecx, edx, subleaf; - cpuid_count(0x80000020, 1, &eax.full, &ebx, &ecx, &edx.full); - hw_res->num_closid = edx.split.cos_max + 1; - r->default_ctrl = MAX_MBA_BW_AMD; + /* + * Query CPUID_Fn80000020_EDX_x01 for MBA and + * CPUID_Fn80000020_EDX_x02 for SMBA + */ + subleaf = (r->rid == RDT_RESOURCE_SMBA) ? 2 : 1; + + cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx); + hw_res->num_closid = edx + 1; + r->default_ctrl = 1 << eax; /* AMD does not use delay */ r->membw.delay_linear = false; @@ -242,7 +266,6 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r) r->data_width = 4; r->alloc_capable = true; - r->alloc_enabled = true; return true; } @@ -251,17 +274,19 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); union cpuid_0x10_1_eax eax; + union cpuid_0x10_x_ecx ecx; union cpuid_0x10_x_edx edx; - u32 ebx, ecx; + u32 ebx; - cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx, &edx.full); + cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx.full, &edx.full); hw_res->num_closid = edx.split.cos_max + 1; r->cache.cbm_len = eax.split.cbm_len + 1; r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; r->cache.shareable_bits = ebx & r->default_ctrl; r->data_width = (r->cache.cbm_len + 3) / 4; + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + r->cache.arch_has_sparse_bitmasks = ecx.split.noncont; r->alloc_capable = true; - r->alloc_enabled = true; } static void rdt_get_cdp_config(int level) @@ -300,7 +325,7 @@ mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) * that can be written to QOS_MSRs. * There are currently no SKUs which support non linear delay values. */ -u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) +static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) { if (r->membw.delay_linear) return MAX_MBA_BW - bw; @@ -401,7 +426,7 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, return NULL; } -void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm) +static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); int i; @@ -410,12 +435,17 @@ void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm) * Initialize the Control MSRs to having no control. * For Cache Allocation: Set all bits in cbm * For Memory Allocation: Set b/w requested to 100% - * and the bandwidth in MBps to U32_MAX */ - for (i = 0; i < hw_res->num_closid; i++, dc++, dm++) { + for (i = 0; i < hw_res->num_closid; i++, dc++) *dc = r->default_ctrl; - *dm = MBA_MAX_MBPS; - } +} + +static void domain_free(struct rdt_hw_domain *hw_dom) +{ + kfree(hw_dom->arch_mbm_total); + kfree(hw_dom->arch_mbm_local); + kfree(hw_dom->ctrl_val); + kfree(hw_dom); } static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) @@ -423,23 +453,15 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); struct msr_param m; - u32 *dc, *dm; + u32 *dc; dc = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->ctrl_val), GFP_KERNEL); if (!dc) return -ENOMEM; - dm = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->mbps_val), - GFP_KERNEL); - if (!dm) { - kfree(dc); - return -ENOMEM; - } - hw_dom->ctrl_val = dc; - hw_dom->mbps_val = dm; - setup_default_ctrlval(r, dc, dm); + setup_default_ctrlval(r, dc); m.low = 0; m.high = hw_res->num_closid; @@ -447,39 +469,31 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) return 0; } -static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) +/** + * arch_domain_mbm_alloc() - Allocate arch private storage for the MBM counters + * @num_rmid: The size of the MBM counter array + * @hw_dom: The domain that owns the allocated arrays + */ +static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom) { size_t tsize; - if (is_llc_occupancy_enabled()) { - d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL); - if (!d->rmid_busy_llc) - return -ENOMEM; - INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); - } if (is_mbm_total_enabled()) { - tsize = sizeof(*d->mbm_total); - d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL); - if (!d->mbm_total) { - bitmap_free(d->rmid_busy_llc); + tsize = sizeof(*hw_dom->arch_mbm_total); + hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL); + if (!hw_dom->arch_mbm_total) return -ENOMEM; - } } if (is_mbm_local_enabled()) { - tsize = sizeof(*d->mbm_local); - d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL); - if (!d->mbm_local) { - bitmap_free(d->rmid_busy_llc); - kfree(d->mbm_total); + tsize = sizeof(*hw_dom->arch_mbm_local); + hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL); + if (!hw_dom->arch_mbm_local) { + kfree(hw_dom->arch_mbm_total); + hw_dom->arch_mbm_total = NULL; return -ENOMEM; } } - if (is_mbm_enabled()) { - INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow); - mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL); - } - return 0; } @@ -502,6 +516,9 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) struct list_head *add_pos = NULL; struct rdt_hw_domain *hw_dom; struct rdt_domain *d; + int err; + + lockdep_assert_held(&domain_list_lock); d = rdt_find_domain(r, id, &add_pos); if (IS_ERR(d)) { @@ -527,25 +544,23 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) rdt_domain_reconfigure_cdp(r); if (r->alloc_capable && domain_setup_ctrlval(r, d)) { - kfree(hw_dom); + domain_free(hw_dom); return; } - if (r->mon_capable && domain_setup_mon_state(r, d)) { - kfree(hw_dom->ctrl_val); - kfree(hw_dom->mbps_val); - kfree(hw_dom); + if (r->mon_capable && arch_domain_mbm_alloc(r->num_rmid, hw_dom)) { + domain_free(hw_dom); return; } - list_add_tail(&d->list, add_pos); + list_add_tail_rcu(&d->list, add_pos); - /* - * If resctrl is mounted, add - * per domain monitor data directories. - */ - if (static_branch_unlikely(&rdt_mon_enable_key)) - mkdir_mondata_subdir_allrdtgrp(r, d); + err = resctrl_online_domain(r, d); + if (err) { + list_del_rcu(&d->list); + synchronize_rcu(); + domain_free(hw_dom); + } } static void domain_remove_cpu(int cpu, struct rdt_resource *r) @@ -554,6 +569,8 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) struct rdt_hw_domain *hw_dom; struct rdt_domain *d; + lockdep_assert_held(&domain_list_lock); + d = rdt_find_domain(r, id, NULL); if (IS_ERR_OR_NULL(d)) { pr_warn("Couldn't find cache id for CPU %d\n", cpu); @@ -563,27 +580,9 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) cpumask_clear_cpu(cpu, &d->cpu_mask); if (cpumask_empty(&d->cpu_mask)) { - /* - * If resctrl is mounted, remove all the - * per domain monitor data directories. - */ - if (static_branch_unlikely(&rdt_mon_enable_key)) - rmdir_mondata_subdir_allrdtgrp(r, d->id); - list_del(&d->list); - if (r->mon_capable && is_mbm_enabled()) - cancel_delayed_work(&d->mbm_over); - if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) { - /* - * When a package is going down, forcefully - * decrement rmid->ebusy. There is no way to know - * that the L3 was flushed and hence may lead to - * incorrect counts in rare scenarios, but leaving - * the RMID as busy creates RMID leaks if the - * package never comes back. - */ - __check_limbo(d, true); - cancel_delayed_work(&d->cqm_limbo); - } + resctrl_offline_domain(r, d); + list_del_rcu(&d->list); + synchronize_rcu(); /* * rdt_domain "d" is going to be freed below, so clear @@ -591,82 +590,51 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) */ if (d->plr) d->plr->d = NULL; + domain_free(hw_dom); - kfree(hw_dom->ctrl_val); - kfree(hw_dom->mbps_val); - bitmap_free(d->rmid_busy_llc); - kfree(d->mbm_total); - kfree(d->mbm_local); - kfree(hw_dom); return; } - - if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) { - if (is_mbm_enabled() && cpu == d->mbm_work_cpu) { - cancel_delayed_work(&d->mbm_over); - mbm_setup_overflow_handler(d, 0); - } - if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu && - has_busy_rmid(r, d)) { - cancel_delayed_work(&d->cqm_limbo); - cqm_setup_limbo_handler(d, 0); - } - } } static void clear_closid_rmid(int cpu) { struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); - state->default_closid = 0; - state->default_rmid = 0; - state->cur_closid = 0; - state->cur_rmid = 0; - wrmsr(IA32_PQR_ASSOC, 0, 0); + state->default_closid = RESCTRL_RESERVED_CLOSID; + state->default_rmid = RESCTRL_RESERVED_RMID; + state->cur_closid = RESCTRL_RESERVED_CLOSID; + state->cur_rmid = RESCTRL_RESERVED_RMID; + wrmsr(MSR_IA32_PQR_ASSOC, RESCTRL_RESERVED_RMID, + RESCTRL_RESERVED_CLOSID); } -static int resctrl_online_cpu(unsigned int cpu) +static int resctrl_arch_online_cpu(unsigned int cpu) { struct rdt_resource *r; - mutex_lock(&rdtgroup_mutex); + mutex_lock(&domain_list_lock); for_each_capable_rdt_resource(r) domain_add_cpu(cpu, r); - /* The cpu is set in default rdtgroup after online. */ - cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask); + mutex_unlock(&domain_list_lock); + clear_closid_rmid(cpu); - mutex_unlock(&rdtgroup_mutex); + resctrl_online_cpu(cpu); return 0; } -static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) +static int resctrl_arch_offline_cpu(unsigned int cpu) { - struct rdtgroup *cr; - - list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) { - if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask)) { - break; - } - } -} - -static int resctrl_offline_cpu(unsigned int cpu) -{ - struct rdtgroup *rdtgrp; struct rdt_resource *r; - mutex_lock(&rdtgroup_mutex); + resctrl_offline_cpu(cpu); + + mutex_lock(&domain_list_lock); for_each_capable_rdt_resource(r) domain_remove_cpu(cpu, r); - list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { - if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) { - clear_childcpus(rdtgrp, cpu); - break; - } - } + mutex_unlock(&domain_list_lock); + clear_closid_rmid(cpu); - mutex_unlock(&rdtgroup_mutex); return 0; } @@ -694,6 +662,8 @@ enum { RDT_FLAG_L2_CAT, RDT_FLAG_L2_CDP, RDT_FLAG_MBA, + RDT_FLAG_SMBA, + RDT_FLAG_BMEC, }; #define RDT_OPT(idx, n, f) \ @@ -717,6 +687,8 @@ static struct rdt_options rdt_options[] __initdata = { RDT_OPT(RDT_FLAG_L2_CAT, "l2cat", X86_FEATURE_CAT_L2), RDT_OPT(RDT_FLAG_L2_CDP, "l2cdp", X86_FEATURE_CDP_L2), RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA), + RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA), + RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC), }; #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) @@ -746,7 +718,7 @@ static int __init set_rdt_options(char *str) } __setup("rdt", set_rdt_options); -static bool __init rdt_cpu_has(int flag) +bool __init rdt_cpu_has(int flag) { bool ret = boot_cpu_has(flag); struct rdt_options *o; @@ -781,6 +753,19 @@ static __init bool get_mem_config(void) return false; } +static __init bool get_slow_mem_config(void) +{ + struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_SMBA]; + + if (!rdt_cpu_has(X86_FEATURE_SMBA)) + return false; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + return __rdt_get_mem_config_amd(&hw_res->r_resctrl); + + return false; +} + static __init bool get_rdt_alloc_resources(void) { struct rdt_resource *r; @@ -811,6 +796,9 @@ static __init bool get_rdt_alloc_resources(void) if (get_mem_config()) ret = true; + if (get_slow_mem_config()) + ret = true; + return ret; } @@ -874,9 +862,8 @@ static __init void rdt_init_res_defs_intel(void) if (r->rid == RDT_RESOURCE_L3 || r->rid == RDT_RESOURCE_L2) { - r->cache.arch_has_sparse_bitmaps = false; - r->cache.arch_has_empty_bitmaps = false; r->cache.arch_has_per_cpu_cfg = false; + r->cache.min_cbm_bits = 1; } else if (r->rid == RDT_RESOURCE_MBA) { hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE; hw_res->msr_update = mba_wrmsr_intel; @@ -894,12 +881,15 @@ static __init void rdt_init_res_defs_amd(void) if (r->rid == RDT_RESOURCE_L3 || r->rid == RDT_RESOURCE_L2) { - r->cache.arch_has_sparse_bitmaps = true; - r->cache.arch_has_empty_bitmaps = true; + r->cache.arch_has_sparse_bitmasks = true; r->cache.arch_has_per_cpu_cfg = true; + r->cache.min_cbm_bits = 0; } else if (r->rid == RDT_RESOURCE_MBA) { hw_res->msr_base = MSR_IA32_MBA_BW_BASE; hw_res->msr_update = mba_wrmsr_amd; + } else if (r->rid == RDT_RESOURCE_SMBA) { + hw_res->msr_base = MSR_IA32_SMBA_BW_BASE; + hw_res->msr_update = mba_wrmsr_amd; } } } @@ -964,7 +954,8 @@ static int __init resctrl_late_init(void) state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/resctrl/cat:online:", - resctrl_online_cpu, resctrl_offline_cpu); + resctrl_arch_online_cpu, + resctrl_arch_offline_cpu); if (state < 0) return state; @@ -988,8 +979,14 @@ late_initcall(resctrl_late_init); static void __exit resctrl_exit(void) { + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + cpuhp_remove_state(rdt_online); + rdtgroup_exit(); + + if (r->mon_capable) + rdt_put_mon_l3_config(); } __exitcall(resctrl_exit); |