diff options
Diffstat (limited to 'arch/x86/kernel/cpu/resctrl/rdtgroup.c')
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/rdtgroup.c | 997 |
1 files changed, 615 insertions, 382 deletions
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 064e9ef44cd6..e5a48f05e787 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * User interface for Resource Alloction in Resource Director Technology(RDT) + * User interface for Resource Allocation in Resource Director Technology(RDT) * * Copyright (C) 2016 Intel Corporation * @@ -29,7 +29,7 @@ #include <uapi/linux/magic.h> -#include <asm/resctrl_sched.h> +#include <asm/resctrl.h> #include "internal.h" DEFINE_STATIC_KEY_FALSE(rdt_enable_key); @@ -39,6 +39,9 @@ static struct kernfs_root *rdt_root; struct rdtgroup rdtgroup_default; LIST_HEAD(rdt_all_groups); +/* list of entries for the schemata file */ +LIST_HEAD(resctrl_schema_all); + /* Kernel fs node for "info" directory under root */ static struct kernfs_node *kn_info; @@ -100,12 +103,12 @@ int closids_supported(void) static void closid_init(void) { - struct rdt_resource *r; - int rdt_min_closid = 32; + struct resctrl_schema *s; + u32 rdt_min_closid = 32; /* Compute rdt_min_closid across all resources */ - for_each_alloc_enabled_rdt_resource(r) - rdt_min_closid = min(rdt_min_closid, r->num_closid); + list_for_each_entry(s, &resctrl_schema_all, list) + rdt_min_closid = min(rdt_min_closid, s->num_closid); closid_free_map = BIT_MASK(rdt_min_closid) - 1; @@ -240,13 +243,13 @@ static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf, return -EINVAL; } -static struct kernfs_ops rdtgroup_kf_single_ops = { +static const struct kernfs_ops rdtgroup_kf_single_ops = { .atomic_write_len = PAGE_SIZE, .write = rdtgroup_file_write, .seq_show = rdtgroup_seqfile_show, }; -static struct kernfs_ops kf_mondata_ops = { +static const struct kernfs_ops kf_mondata_ops = { .atomic_write_len = PAGE_SIZE, .seq_show = rdtgroup_mondata_show, }; @@ -294,7 +297,7 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of, /* * This is safe against resctrl_sched_in() called from __switch_to() * because __switch_to() is executed with interrupts disabled. A local call - * from update_closid_rmid() is proteced against __switch_to() because + * from update_closid_rmid() is protected against __switch_to() because * preemption is disabled. */ static void update_cpu_closid_rmid(void *info) @@ -338,14 +341,14 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, /* Check whether cpus belong to parent ctrl group */ cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask); - if (cpumask_weight(tmpmask)) { + if (!cpumask_empty(tmpmask)) { rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n"); return -EINVAL; } /* Check whether cpus are dropped from this group */ cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); - if (cpumask_weight(tmpmask)) { + if (!cpumask_empty(tmpmask)) { /* Give any dropped cpus to parent rdtgroup */ cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask); update_closid_rmid(tmpmask, prgrp); @@ -356,7 +359,7 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, * and update per-cpu rmid */ cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); - if (cpumask_weight(tmpmask)) { + if (!cpumask_empty(tmpmask)) { head = &prgrp->mon.crdtgrp_list; list_for_each_entry(crgrp, head, mon.crdtgrp_list) { if (crgrp == rdtgrp) @@ -391,7 +394,7 @@ static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, /* Check whether cpus are dropped from this group */ cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); - if (cpumask_weight(tmpmask)) { + if (!cpumask_empty(tmpmask)) { /* Can't drop from default group */ if (rdtgrp == &rdtgroup_default) { rdt_last_cmd_puts("Can't drop CPUs from default group\n"); @@ -410,12 +413,12 @@ static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, * and update per-cpu closid/rmid. */ cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); - if (cpumask_weight(tmpmask)) { + if (!cpumask_empty(tmpmask)) { list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) { if (r == rdtgrp) continue; cpumask_and(tmpmask1, &r->cpu_mask, tmpmask); - if (cpumask_weight(tmpmask1)) + if (!cpumask_empty(tmpmask1)) cpumask_rdtgrp_clear(r, tmpmask1); } update_closid_rmid(tmpmask, rdtgrp); @@ -485,7 +488,7 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, /* check that user didn't specify any offline cpus */ cpumask_andnot(tmpmask, newmask, cpu_online_mask); - if (cpumask_weight(tmpmask)) { + if (!cpumask_empty(tmpmask)) { ret = -EINVAL; rdt_last_cmd_puts("Can only assign online CPUs\n"); goto unlock; @@ -507,89 +510,100 @@ unlock: return ret ?: nbytes; } -struct task_move_callback { - struct callback_head work; - struct rdtgroup *rdtgrp; -}; - -static void move_myself(struct callback_head *head) +/** + * rdtgroup_remove - the helper to remove resource group safely + * @rdtgrp: resource group to remove + * + * On resource group creation via a mkdir, an extra kernfs_node reference is + * taken to ensure that the rdtgroup structure remains accessible for the + * rdtgroup_kn_unlock() calls where it is removed. + * + * Drop the extra reference here, then free the rdtgroup structure. + * + * Return: void + */ +static void rdtgroup_remove(struct rdtgroup *rdtgrp) { - struct task_move_callback *callback; - struct rdtgroup *rdtgrp; - - callback = container_of(head, struct task_move_callback, work); - rdtgrp = callback->rdtgrp; + kernfs_put(rdtgrp->kn); + kfree(rdtgrp); +} +static void _update_task_closid_rmid(void *task) +{ /* - * If resource group was deleted before this task work callback - * was invoked, then assign the task to root group and free the - * resource group. + * If the task is still current on this CPU, update PQR_ASSOC MSR. + * Otherwise, the MSR is updated when the task is scheduled in. */ - if (atomic_dec_and_test(&rdtgrp->waitcount) && - (rdtgrp->flags & RDT_DELETED)) { - current->closid = 0; - current->rmid = 0; - kfree(rdtgrp); - } - - if (unlikely(current->flags & PF_EXITING)) - goto out; - - preempt_disable(); - /* update PQR_ASSOC MSR to make resource group go into effect */ - resctrl_sched_in(); - preempt_enable(); + if (task == current) + resctrl_sched_in(); +} -out: - kfree(callback); +static void update_task_closid_rmid(struct task_struct *t) +{ + if (IS_ENABLED(CONFIG_SMP) && task_curr(t)) + smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1); + else + _update_task_closid_rmid(t); } static int __rdtgroup_move_task(struct task_struct *tsk, struct rdtgroup *rdtgrp) { - struct task_move_callback *callback; - int ret; - - callback = kzalloc(sizeof(*callback), GFP_KERNEL); - if (!callback) - return -ENOMEM; - callback->work.func = move_myself; - callback->rdtgrp = rdtgrp; + /* If the task is already in rdtgrp, no need to move the task. */ + if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid && + tsk->rmid == rdtgrp->mon.rmid) || + (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid && + tsk->closid == rdtgrp->mon.parent->closid)) + return 0; /* - * Take a refcount, so rdtgrp cannot be freed before the - * callback has been invoked. + * Set the task's closid/rmid before the PQR_ASSOC MSR can be + * updated by them. + * + * For ctrl_mon groups, move both closid and rmid. + * For monitor groups, can move the tasks only from + * their parent CTRL group. */ - atomic_inc(&rdtgrp->waitcount); - ret = task_work_add(tsk, &callback->work, true); - if (ret) { - /* - * Task is exiting. Drop the refcount and free the callback. - * No need to check the refcount as the group cannot be - * deleted before the write function unlocks rdtgroup_mutex. - */ - atomic_dec(&rdtgrp->waitcount); - kfree(callback); - rdt_last_cmd_puts("Task exited\n"); - } else { - /* - * For ctrl_mon groups move both closid and rmid. - * For monitor groups, can move the tasks only from - * their parent CTRL group. - */ - if (rdtgrp->type == RDTCTRL_GROUP) { - tsk->closid = rdtgrp->closid; - tsk->rmid = rdtgrp->mon.rmid; - } else if (rdtgrp->type == RDTMON_GROUP) { - if (rdtgrp->mon.parent->closid == tsk->closid) { - tsk->rmid = rdtgrp->mon.rmid; - } else { - rdt_last_cmd_puts("Can't move task to different control group\n"); - ret = -EINVAL; - } + + if (rdtgrp->type == RDTCTRL_GROUP) { + WRITE_ONCE(tsk->closid, rdtgrp->closid); + WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid); + } else if (rdtgrp->type == RDTMON_GROUP) { + if (rdtgrp->mon.parent->closid == tsk->closid) { + WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid); + } else { + rdt_last_cmd_puts("Can't move task to different control group\n"); + return -EINVAL; } } - return ret; + + /* + * Ensure the task's closid and rmid are written before determining if + * the task is current that will decide if it will be interrupted. + */ + barrier(); + + /* + * By now, the task's closid and rmid are set. If the task is current + * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource + * group go into effect. If the task is not current, the MSR will be + * updated when the task is scheduled in. + */ + update_task_closid_rmid(tsk); + + return 0; +} + +static bool is_closid_match(struct task_struct *t, struct rdtgroup *r) +{ + return (rdt_alloc_capable && + (r->type == RDTCTRL_GROUP) && (t->closid == r->closid)); +} + +static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r) +{ + return (rdt_mon_capable && + (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid)); } /** @@ -607,8 +621,7 @@ int rdtgroup_tasks_assigned(struct rdtgroup *r) rcu_read_lock(); for_each_process_thread(p, t) { - if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) || - (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid)) { + if (is_closid_match(t, r) || is_rmid_match(t, r)) { ret = 1; break; } @@ -706,8 +719,7 @@ static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) rcu_read_lock(); for_each_process_thread(p, t) { - if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) || - (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid)) + if (is_closid_match(t, r) || is_rmid_match(t, r)) seq_printf(s, "%d\n", t->pid); } rcu_read_unlock(); @@ -833,16 +845,17 @@ static int rdt_last_cmd_status_show(struct kernfs_open_file *of, static int rdt_num_closids_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; - seq_printf(seq, "%d\n", r->num_closid); + seq_printf(seq, "%u\n", s->num_closid); return 0; } static int rdt_default_ctrl_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%x\n", r->default_ctrl); return 0; @@ -851,7 +864,8 @@ static int rdt_default_ctrl_show(struct kernfs_open_file *of, static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->cache.min_cbm_bits); return 0; @@ -860,7 +874,8 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, static int rdt_shareable_bits_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%x\n", r->cache.shareable_bits); return 0; @@ -883,38 +898,40 @@ static int rdt_shareable_bits_show(struct kernfs_open_file *of, static int rdt_bit_usage_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; /* * Use unsigned long even though only 32 bits are used to ensure * test_bit() is used safely. */ unsigned long sw_shareable = 0, hw_shareable = 0; unsigned long exclusive = 0, pseudo_locked = 0; + struct rdt_resource *r = s->res; struct rdt_domain *dom; int i, hwb, swb, excl, psl; enum rdtgrp_mode mode; bool sep = false; - u32 *ctrl; + u32 ctrl_val; mutex_lock(&rdtgroup_mutex); hw_shareable = r->cache.shareable_bits; list_for_each_entry(dom, &r->domains, list) { if (sep) seq_putc(seq, ';'); - ctrl = dom->ctrl_val; sw_shareable = 0; exclusive = 0; seq_printf(seq, "%d=", dom->id); - for (i = 0; i < closids_supported(); i++, ctrl++) { + for (i = 0; i < closids_supported(); i++) { if (!closid_allocated(i)) continue; + ctrl_val = resctrl_arch_get_config(r, dom, i, + s->conf_type); mode = rdtgroup_mode_by_closid(i); switch (mode) { case RDT_MODE_SHAREABLE: - sw_shareable |= *ctrl; + sw_shareable |= ctrl_val; break; case RDT_MODE_EXCLUSIVE: - exclusive |= *ctrl; + exclusive |= ctrl_val; break; case RDT_MODE_PSEUDO_LOCKSETUP: /* @@ -961,7 +978,8 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, static int rdt_min_bw_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.min_bw); return 0; @@ -992,7 +1010,8 @@ static int rdt_mon_features_show(struct kernfs_open_file *of, static int rdt_bw_gran_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.bw_gran); return 0; @@ -1001,7 +1020,8 @@ static int rdt_bw_gran_show(struct kernfs_open_file *of, static int rdt_delay_linear_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.delay_linear); return 0; @@ -1010,9 +1030,21 @@ static int rdt_delay_linear_show(struct kernfs_open_file *of, static int max_threshold_occ_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold); - seq_printf(seq, "%u\n", resctrl_cqm_threshold * r->mon_scale); + return 0; +} + +static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct resctrl_schema *s = of->kn->parent->priv; + struct rdt_resource *r = s->res; + + if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD) + seq_puts(seq, "per-thread\n"); + else + seq_puts(seq, "max\n"); return 0; } @@ -1020,7 +1052,6 @@ static int max_threshold_occ_show(struct kernfs_open_file *of, static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct rdt_resource *r = of->kn->parent->priv; unsigned int bytes; int ret; @@ -1028,10 +1059,10 @@ static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, if (ret) return ret; - if (bytes > (boot_cpu_data.x86_cache_size * 1024)) + if (bytes > resctrl_rmid_realloc_limit) return -EINVAL; - resctrl_cqm_threshold = bytes / r->mon_scale; + resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes); return nbytes; } @@ -1056,75 +1087,17 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of, return 0; } -/** - * rdt_cdp_peer_get - Retrieve CDP peer if it exists - * @r: RDT resource to which RDT domain @d belongs - * @d: Cache instance for which a CDP peer is requested - * @r_cdp: RDT resource that shares hardware with @r (RDT resource peer) - * Used to return the result. - * @d_cdp: RDT domain that shares hardware with @d (RDT domain peer) - * Used to return the result. - * - * RDT resources are managed independently and by extension the RDT domains - * (RDT resource instances) are managed independently also. The Code and - * Data Prioritization (CDP) RDT resources, while managed independently, - * could refer to the same underlying hardware. For example, - * RDT_RESOURCE_L2CODE and RDT_RESOURCE_L2DATA both refer to the L2 cache. - * - * When provided with an RDT resource @r and an instance of that RDT - * resource @d rdt_cdp_peer_get() will return if there is a peer RDT - * resource and the exact instance that shares the same hardware. - * - * Return: 0 if a CDP peer was found, <0 on error or if no CDP peer exists. - * If a CDP peer was found, @r_cdp will point to the peer RDT resource - * and @d_cdp will point to the peer RDT domain. - */ -static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d, - struct rdt_resource **r_cdp, - struct rdt_domain **d_cdp) +static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type) { - struct rdt_resource *_r_cdp = NULL; - struct rdt_domain *_d_cdp = NULL; - int ret = 0; - - switch (r->rid) { - case RDT_RESOURCE_L3DATA: - _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3CODE]; - break; - case RDT_RESOURCE_L3CODE: - _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3DATA]; - break; - case RDT_RESOURCE_L2DATA: - _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2CODE]; - break; - case RDT_RESOURCE_L2CODE: - _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2DATA]; - break; + switch (my_type) { + case CDP_CODE: + return CDP_DATA; + case CDP_DATA: + return CDP_CODE; default: - ret = -ENOENT; - goto out; - } - - /* - * When a new CPU comes online and CDP is enabled then the new - * RDT domains (if any) associated with both CDP RDT resources - * are added in the same CPU online routine while the - * rdtgroup_mutex is held. It should thus not happen for one - * RDT domain to exist and be associated with its RDT CDP - * resource but there is no RDT domain associated with the - * peer RDT CDP resource. Hence the WARN. - */ - _d_cdp = rdt_find_domain(_r_cdp, d->id, NULL); - if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) { - _r_cdp = NULL; - ret = -EINVAL; + case CDP_NONE: + return CDP_NONE; } - -out: - *r_cdp = _r_cdp; - *d_cdp = _d_cdp; - - return ret; } /** @@ -1148,11 +1121,11 @@ out: * Return: false if CBM does not overlap, true if it does. */ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, - unsigned long cbm, int closid, bool exclusive) + unsigned long cbm, int closid, + enum resctrl_conf_type type, bool exclusive) { enum rdtgrp_mode mode; unsigned long ctrl_b; - u32 *ctrl; int i; /* Check for any overlap with regions used by hardware directly */ @@ -1163,9 +1136,8 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d } /* Check for overlap with other resource groups */ - ctrl = d->ctrl_val; - for (i = 0; i < closids_supported(); i++, ctrl++) { - ctrl_b = *ctrl; + for (i = 0; i < closids_supported(); i++) { + ctrl_b = resctrl_arch_get_config(r, d, i, type); mode = rdtgroup_mode_by_closid(i); if (closid_allocated(i) && i != closid && mode != RDT_MODE_PSEUDO_LOCKSETUP) { @@ -1185,7 +1157,7 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d /** * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware - * @r: Resource to which domain instance @d belongs. + * @s: Schema for the resource to which domain instance @d belongs. * @d: The domain instance for which @closid is being tested. * @cbm: Capacity bitmask being tested. * @closid: Intended closid for @cbm. @@ -1203,19 +1175,19 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d * * Return: true if CBM overlap detected, false if there is no overlap */ -bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, +bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d, unsigned long cbm, int closid, bool exclusive) { - struct rdt_resource *r_cdp; - struct rdt_domain *d_cdp; + enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); + struct rdt_resource *r = s->res; - if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, exclusive)) + if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type, + exclusive)) return true; - if (rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp) < 0) + if (!resctrl_arch_get_cdp_enabled(r->rid)) return false; - - return __rdtgroup_cbm_overlaps(r_cdp, d_cdp, cbm, closid, exclusive); + return __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive); } /** @@ -1233,17 +1205,21 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) { int closid = rdtgrp->closid; + struct resctrl_schema *s; struct rdt_resource *r; bool has_cache = false; struct rdt_domain *d; + u32 ctrl; - for_each_alloc_enabled_rdt_resource(r) { + list_for_each_entry(s, &resctrl_schema_all, list) { + r = s->res; if (r->rid == RDT_RESOURCE_MBA) continue; has_cache = true; list_for_each_entry(d, &r->domains, list) { - if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid], - rdtgrp->closid, false)) { + ctrl = resctrl_arch_get_config(r, d, closid, + s->conf_type); + if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) { rdt_last_cmd_puts("Schemata overlaps\n"); return false; } @@ -1374,11 +1350,14 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, static int rdtgroup_size_show(struct kernfs_open_file *of, struct seq_file *s, void *v) { + struct resctrl_schema *schema; + enum resctrl_conf_type type; struct rdtgroup *rdtgrp; struct rdt_resource *r; struct rdt_domain *d; unsigned int size; int ret = 0; + u32 closid; bool sep; u32 ctrl; @@ -1395,8 +1374,8 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, ret = -ENODEV; } else { seq_printf(s, "%*s:", max_name_width, - rdtgrp->plr->r->name); - size = rdtgroup_cbm_to_size(rdtgrp->plr->r, + rdtgrp->plr->s->name); + size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res, rdtgrp->plr->d, rdtgrp->plr->cbm); seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size); @@ -1404,18 +1383,25 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, goto out; } - for_each_alloc_enabled_rdt_resource(r) { + closid = rdtgrp->closid; + + list_for_each_entry(schema, &resctrl_schema_all, list) { + r = schema->res; + type = schema->conf_type; sep = false; - seq_printf(s, "%*s:", max_name_width, r->name); + seq_printf(s, "%*s:", max_name_width, schema->name); list_for_each_entry(d, &r->domains, list) { if (sep) seq_putc(s, ';'); if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { size = 0; } else { - ctrl = (!is_mba_sc(r) ? - d->ctrl_val[rdtgrp->closid] : - d->mbps_val[rdtgrp->closid]); + if (is_mba_sc(r)) + ctrl = d->mbps_val[closid]; + else + ctrl = resctrl_arch_get_config(r, d, + closid, + type); if (r->rid == RDT_RESOURCE_MBA) size = ctrl; else @@ -1512,6 +1498,17 @@ static struct rftype res_common_files[] = { .seq_show = rdt_delay_linear_show, .fflags = RF_CTRL_INFO | RFTYPE_RES_MB, }, + /* + * Platform specific which (if any) capabilities are provided by + * thread_throttle_mode. Defer "fflags" initialization to platform + * discovery. + */ + { + .name = "thread_throttle_mode", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_thread_throttle_mode_show, + }, { .name = "max_threshold_occupancy", .mode = 0644, @@ -1582,7 +1579,7 @@ static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags) lockdep_assert_held(&rdtgroup_mutex); for (rft = rfts; rft < rfts + len; rft++) { - if ((fflags & rft->fflags) == rft->fflags) { + if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) { ret = rdtgroup_add_file(kn, rft); if (ret) goto error; @@ -1599,6 +1596,33 @@ error: return ret; } +static struct rftype *rdtgroup_get_rftype_by_name(const char *name) +{ + struct rftype *rfts, *rft; + int len; + + rfts = res_common_files; + len = ARRAY_SIZE(res_common_files); + + for (rft = rfts; rft < rfts + len; rft++) { + if (!strcmp(rft->name, name)) + return rft; + } + + return NULL; +} + +void __init thread_throttle_mode_init(void) +{ + struct rftype *rft; + + rft = rdtgroup_get_rftype_by_name("thread_throttle_mode"); + if (!rft) + return; + + rft->fflags = RF_CTRL_INFO | RFTYPE_RES_MB; +} + /** * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file * @r: The resource group with which the file is associated. @@ -1696,18 +1720,17 @@ int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, return ret; } -static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name, +static int rdtgroup_mkdir_info_resdir(void *priv, char *name, unsigned long fflags) { struct kernfs_node *kn_subdir; int ret; kn_subdir = kernfs_create_dir(kn_info, name, - kn_info->mode, r); + kn_info->mode, priv); if (IS_ERR(kn_subdir)) return PTR_ERR(kn_subdir); - kernfs_get(kn_subdir); ret = rdtgroup_kn_set_ugid(kn_subdir); if (ret) return ret; @@ -1721,6 +1744,7 @@ static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name, static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) { + struct resctrl_schema *s; struct rdt_resource *r; unsigned long fflags; char name[32]; @@ -1730,20 +1754,21 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); if (IS_ERR(kn_info)) return PTR_ERR(kn_info); - kernfs_get(kn_info); ret = rdtgroup_add_files(kn_info, RF_TOP_INFO); if (ret) goto out_destroy; - for_each_alloc_enabled_rdt_resource(r) { + /* loop over enabled controls, these are all alloc_capable */ + list_for_each_entry(s, &resctrl_schema_all, list) { + r = s->res; fflags = r->fflags | RF_CTRL_INFO; - ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags); + ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags); if (ret) goto out_destroy; } - for_each_mon_enabled_rdt_resource(r) { + for_each_mon_capable_rdt_resource(r) { fflags = r->fflags | RF_MON_INFO; sprintf(name, "%s_MON", r->name); ret = rdtgroup_mkdir_info_resdir(r, name, fflags); @@ -1751,12 +1776,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) goto out_destroy; } - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that @rdtgrp->kn is always accessible. - */ - kernfs_get(kn_info); - ret = rdtgroup_kn_set_ugid(kn_info); if (ret) goto out_destroy; @@ -1785,12 +1804,6 @@ mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, if (dest_kn) *dest_kn = kn; - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that @rdtgrp->kn is always accessible. - */ - kernfs_get(kn); - ret = rdtgroup_kn_set_ugid(kn); if (ret) goto out_destroy; @@ -1820,7 +1833,7 @@ static void l2_qos_cfg_update(void *arg) static inline bool is_mba_linear(void) { - return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear; + return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear; } static int set_cache_qos_cfg(int level, bool enable) @@ -1841,10 +1854,15 @@ static int set_cache_qos_cfg(int level, bool enable) if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) return -ENOMEM; - r_l = &rdt_resources_all[level]; + r_l = &rdt_resources_all[level].r_resctrl; list_for_each_entry(d, &r_l->domains, list) { - /* Pick one CPU from each domain instance to update MSR */ - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); + if (r_l->cache.arch_has_per_cpu_cfg) + /* Pick all the CPUs in the domain instance */ + for_each_cpu(cpu, &d->cpu_mask) + cpumask_set_cpu(cpu, cpu_mask); + else + /* Pick one CPU from each domain instance to update MSR */ + cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); } cpu = get_cpu(); /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */ @@ -1859,89 +1877,127 @@ static int set_cache_qos_cfg(int level, bool enable) return 0; } +/* Restore the qos cfg state when a domain comes online */ +void rdt_domain_reconfigure_cdp(struct rdt_resource *r) +{ + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + + if (!r->cdp_capable) + return; + + if (r->rid == RDT_RESOURCE_L2) + l2_qos_cfg_update(&hw_res->cdp_enabled); + + if (r->rid == RDT_RESOURCE_L3) + l3_qos_cfg_update(&hw_res->cdp_enabled); +} + +static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d) +{ + u32 num_closid = resctrl_arch_get_num_closid(r); + int cpu = cpumask_any(&d->cpu_mask); + int i; + + d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val), + GFP_KERNEL, cpu_to_node(cpu)); + if (!d->mbps_val) + return -ENOMEM; + + for (i = 0; i < num_closid; i++) + d->mbps_val[i] = MBA_MAX_MBPS; + + return 0; +} + +static void mba_sc_domain_destroy(struct rdt_resource *r, + struct rdt_domain *d) +{ + kfree(d->mbps_val); + d->mbps_val = NULL; +} + /* - * Enable or disable the MBA software controller - * which helps user specify bandwidth in MBps. * MBA software controller is supported only if * MBM is supported and MBA is in linear scale. */ +static bool supports_mba_mbps(void) +{ + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + + return (is_mbm_local_enabled() && + r->alloc_capable && is_mba_linear()); +} + +/* + * Enable or disable the MBA software controller + * which helps user specify bandwidth in MBps. + */ static int set_mba_sc(bool mba_sc) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA]; + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + u32 num_closid = resctrl_arch_get_num_closid(r); struct rdt_domain *d; + int i; - if (!is_mbm_enabled() || !is_mba_linear() || - mba_sc == is_mba_sc(r)) + if (!supports_mba_mbps() || mba_sc == is_mba_sc(r)) return -EINVAL; r->membw.mba_sc = mba_sc; - list_for_each_entry(d, &r->domains, list) - setup_default_ctrlval(r, d->ctrl_val, d->mbps_val); + + list_for_each_entry(d, &r->domains, list) { + for (i = 0; i < num_closid; i++) + d->mbps_val[i] = MBA_MAX_MBPS; + } return 0; } -static int cdp_enable(int level, int data_type, int code_type) +static int cdp_enable(int level) { - struct rdt_resource *r_ldata = &rdt_resources_all[data_type]; - struct rdt_resource *r_lcode = &rdt_resources_all[code_type]; - struct rdt_resource *r_l = &rdt_resources_all[level]; + struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl; int ret; - if (!r_l->alloc_capable || !r_ldata->alloc_capable || - !r_lcode->alloc_capable) + if (!r_l->alloc_capable) return -EINVAL; ret = set_cache_qos_cfg(level, true); - if (!ret) { - r_l->alloc_enabled = false; - r_ldata->alloc_enabled = true; - r_lcode->alloc_enabled = true; - } + if (!ret) + rdt_resources_all[level].cdp_enabled = true; + return ret; } -static int cdpl3_enable(void) +static void cdp_disable(int level) { - return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, - RDT_RESOURCE_L3CODE); -} + struct rdt_hw_resource *r_hw = &rdt_resources_all[level]; -static int cdpl2_enable(void) -{ - return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, - RDT_RESOURCE_L2CODE); + if (r_hw->cdp_enabled) { + set_cache_qos_cfg(level, false); + r_hw->cdp_enabled = false; + } } -static void cdp_disable(int level, int data_type, int code_type) +int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable) { - struct rdt_resource *r = &rdt_resources_all[level]; + struct rdt_hw_resource *hw_res = &rdt_resources_all[l]; - r->alloc_enabled = r->alloc_capable; + if (!hw_res->r_resctrl.cdp_capable) + return -EINVAL; - if (rdt_resources_all[data_type].alloc_enabled) { - rdt_resources_all[data_type].alloc_enabled = false; - rdt_resources_all[code_type].alloc_enabled = false; - set_cache_qos_cfg(level, false); - } -} + if (enable) + return cdp_enable(l); -static void cdpl3_disable(void) -{ - cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE); -} + cdp_disable(l); -static void cdpl2_disable(void) -{ - cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE); + return 0; } static void cdp_disable_all(void) { - if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) - cdpl3_disable(); - if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) - cdpl2_disable(); + if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3)) + resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); + if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) + resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); } /* @@ -2004,8 +2060,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn) rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) rdtgroup_pseudo_lock_remove(rdtgrp); kernfs_unbreak_active_protection(kn); - kernfs_put(rdtgrp->kn); - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } else { kernfs_unbreak_active_protection(kn); } @@ -2020,10 +2075,10 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx) int ret = 0; if (ctx->enable_cdpl2) - ret = cdpl2_enable(); + ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true); if (!ret && ctx->enable_cdpl3) - ret = cdpl3_enable(); + ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true); if (!ret && ctx->enable_mba_mbps) ret = set_mba_sc(true); @@ -2031,6 +2086,92 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx) return ret; } +static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type) +{ + struct resctrl_schema *s; + const char *suffix = ""; + int ret, cl; + + s = kzalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + s->res = r; + s->num_closid = resctrl_arch_get_num_closid(r); + if (resctrl_arch_get_cdp_enabled(r->rid)) + s->num_closid /= 2; + + s->conf_type = type; + switch (type) { + case CDP_CODE: + suffix = "CODE"; + break; + case CDP_DATA: + suffix = "DATA"; + break; + case CDP_NONE: + suffix = ""; + break; + } + + ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix); + if (ret >= sizeof(s->name)) { + kfree(s); + return -EINVAL; + } + + cl = strlen(s->name); + + /* + * If CDP is supported by this resource, but not enabled, + * include the suffix. This ensures the tabular format of the + * schemata file does not change between mounts of the filesystem. + */ + if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid)) + cl += 4; + + if (cl > max_name_width) + max_name_width = cl; + + INIT_LIST_HEAD(&s->list); + list_add(&s->list, &resctrl_schema_all); + + return 0; +} + +static int schemata_list_create(void) +{ + struct rdt_resource *r; + int ret = 0; + + for_each_alloc_capable_rdt_resource(r) { + if (resctrl_arch_get_cdp_enabled(r->rid)) { + ret = schemata_list_add(r, CDP_CODE); + if (ret) + break; + + ret = schemata_list_add(r, CDP_DATA); + } else { + ret = schemata_list_add(r, CDP_NONE); + } + + if (ret) + break; + } + + return ret; +} + +static void schemata_list_destroy(void) +{ + struct resctrl_schema *s, *tmp; + + list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) { + list_del(&s->list); + kfree(s); + } +} + static int rdt_get_tree(struct fs_context *fc) { struct rdt_fs_context *ctx = rdt_fc2context(fc); @@ -2052,11 +2193,17 @@ static int rdt_get_tree(struct fs_context *fc) if (ret < 0) goto out_cdp; + ret = schemata_list_create(); + if (ret) { + schemata_list_destroy(); + goto out_mba; + } + closid_init(); ret = rdtgroup_create_info_dir(rdtgroup_default.kn); if (ret < 0) - goto out_mba; + goto out_schemata_free; if (rdt_mon_capable) { ret = mongroup_create_dir(rdtgroup_default.kn, @@ -2064,13 +2211,11 @@ static int rdt_get_tree(struct fs_context *fc) &kn_mongrp); if (ret < 0) goto out_info; - kernfs_get(kn_mongrp); ret = mkdir_mondata_all(rdtgroup_default.kn, &rdtgroup_default, &kn_mondata); if (ret < 0) goto out_mongrp; - kernfs_get(kn_mondata); rdtgroup_default.mon.mon_data_kn = kn_mondata; } @@ -2091,7 +2236,7 @@ static int rdt_get_tree(struct fs_context *fc) static_branch_enable_cpuslocked(&rdt_enable_key); if (is_mbm_enabled()) { - r = &rdt_resources_all[RDT_RESOURCE_L3]; + r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; list_for_each_entry(dom, &r->domains, list) mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL); } @@ -2108,6 +2253,8 @@ out_mongrp: kernfs_remove(kn_mongrp); out_info: kernfs_remove(kn_info); +out_schemata_free: + schemata_list_destroy(); out_mba: if (ctx->enable_mba_mbps) set_mba_sc(false); @@ -2152,7 +2299,7 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->enable_cdpl2 = true; return 0; case Opt_mba_mbps: - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + if (!supports_mba_mbps()) return -EINVAL; ctx->enable_mba_mbps = true; return 0; @@ -2195,6 +2342,8 @@ static int rdt_init_fs_context(struct fs_context *fc) static int reset_all_ctrls(struct rdt_resource *r) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + struct rdt_hw_domain *hw_dom; struct msr_param msr_param; cpumask_var_t cpu_mask; struct rdt_domain *d; @@ -2205,7 +2354,7 @@ static int reset_all_ctrls(struct rdt_resource *r) msr_param.res = r; msr_param.low = 0; - msr_param.high = r->num_closid; + msr_param.high = hw_res->num_closid; /* * Disable resource control for this resource by setting all @@ -2213,10 +2362,11 @@ static int reset_all_ctrls(struct rdt_resource *r) * from each domain to update the MSRs below. */ list_for_each_entry(d, &r->domains, list) { + hw_dom = resctrl_to_arch_dom(d); cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); - for (i = 0; i < r->num_closid; i++) - d->ctrl_val[i] = r->default_ctrl; + for (i = 0; i < hw_res->num_closid; i++) + hw_dom->ctrl_val[i] = r->default_ctrl; } cpu = get_cpu(); /* Update CBM on this cpu if it's in cpu_mask. */ @@ -2231,18 +2381,6 @@ static int reset_all_ctrls(struct rdt_resource *r) return 0; } -static bool is_closid_match(struct task_struct *t, struct rdtgroup *r) -{ - return (rdt_alloc_capable && - (r->type == RDTCTRL_GROUP) && (t->closid == r->closid)); -} - -static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r) -{ - return (rdt_mon_capable && - (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid)); -} - /* * Move tasks from one to the other group. If @from is NULL, then all tasks * in the systems are moved unconditionally (used for teardown). @@ -2260,22 +2398,18 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, for_each_process_thread(p, t) { if (!from || is_closid_match(t, from) || is_rmid_match(t, from)) { - t->closid = to->closid; - t->rmid = to->mon.rmid; + WRITE_ONCE(t->closid, to->closid); + WRITE_ONCE(t->rmid, to->mon.rmid); -#ifdef CONFIG_SMP /* - * This is safe on x86 w/o barriers as the ordering - * of writing to task_cpu() and t->on_cpu is - * reverse to the reading here. The detection is - * inaccurate as tasks might move or schedule - * before the smp function call takes place. In - * such a case the function call is pointless, but + * If the task is on a CPU, set the CPU in the mask. + * The detection is inaccurate as tasks might move or + * schedule before the smp function call takes place. + * In such a case the function call is pointless, but * there is no other side effect. */ - if (mask && t->on_cpu) + if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t)) cpumask_set_cpu(task_cpu(t), mask); -#endif } } read_unlock(&tasklist_lock); @@ -2294,7 +2428,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) if (atomic_read(&sentry->waitcount) != 0) sentry->flags = RDT_DELETED; else - kfree(sentry); + rdtgroup_remove(sentry); } } @@ -2336,7 +2470,7 @@ static void rmdir_all_sub(void) if (atomic_read(&rdtgrp->waitcount) != 0) rdtgrp->flags = RDT_DELETED; else - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ update_closid_rmid(cpu_online_mask, &rdtgroup_default); @@ -2356,12 +2490,13 @@ static void rdt_kill_sb(struct super_block *sb) set_mba_sc(false); /*Put everything back to default values. */ - for_each_alloc_enabled_rdt_resource(r) + for_each_alloc_capable_rdt_resource(r) reset_all_ctrls(r); cdp_disable_all(); rmdir_all_sub(); rdt_pseudo_lock_release(); rdtgroup_default.mode = RDT_MODE_SHAREABLE; + schemata_list_destroy(); static_branch_disable_cpuslocked(&rdt_alloc_enable_key); static_branch_disable_cpuslocked(&rdt_mon_enable_key); static_branch_disable_cpuslocked(&rdt_enable_key); @@ -2402,14 +2537,12 @@ static int mon_addfile(struct kernfs_node *parent_kn, const char *name, * Remove all subdirectories of mon_data of ctrl_mon groups * and monitor groups with given domain id. */ -void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id) +static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, + unsigned int dom_id) { struct rdtgroup *prgrp, *crgrp; char name[32]; - if (!r->mon_enabled) - return; - list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { sprintf(name, "mon_%s_%02d", r->name, dom_id); kernfs_remove_by_name(prgrp->mon.mon_data_kn, name); @@ -2436,11 +2569,6 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, if (IS_ERR(kn)) return PTR_ERR(kn); - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that kn is always accessible. - */ - kernfs_get(kn); ret = rdtgroup_kn_set_ugid(kn); if (ret) goto out_destroy; @@ -2459,7 +2587,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, goto out_destroy; if (is_mbm_event(mevt->evtid)) - mon_event_read(&rr, d, prgrp, mevt->evtid, true); + mon_event_read(&rr, r, d, prgrp, mevt->evtid, true); } kernfs_activate(kn); return 0; @@ -2473,16 +2601,13 @@ out_destroy: * Add all subdirectories of mon_data for "ctrl_mon" groups * and "monitor" groups with given domain id. */ -void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, - struct rdt_domain *d) +static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, + struct rdt_domain *d) { struct kernfs_node *parent_kn; struct rdtgroup *prgrp, *crgrp; struct list_head *head; - if (!r->mon_enabled) - return; - list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { parent_kn = prgrp->mon.mon_data_kn; mkdir_mondata_subdir(parent_kn, d, r, prgrp); @@ -2514,7 +2639,7 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn, /* * This creates a directory mon_data which contains the monitored data. * - * mon_data has one directory for each domain whic are named + * mon_data has one directory for each domain which are named * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data * with L3 domain looks as below: * ./mon_data: @@ -2550,7 +2675,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, * Create the subdirectories for each domain. Note that all events * in a domain like L3 are grouped into a resource whose domain is L3 */ - for_each_mon_enabled_rdt_resource(r) { + for_each_mon_capable_rdt_resource(r) { ret = mkdir_mondata_subdir_alldom(kn, r, prgrp); if (ret) goto out_destroy; @@ -2601,23 +2726,24 @@ static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r) * Set the RDT domain up to start off with all usable allocations. That is, * all shareable and unused bits. All-zero CBM is invalid. */ -static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r, +static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s, u32 closid) { - struct rdt_resource *r_cdp = NULL; - struct rdt_domain *d_cdp = NULL; + enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type); + enum resctrl_conf_type t = s->conf_type; + struct resctrl_staged_config *cfg; + struct rdt_resource *r = s->res; u32 used_b = 0, unused_b = 0; unsigned long tmp_cbm; enum rdtgrp_mode mode; - u32 peer_ctl, *ctrl; + u32 peer_ctl, ctrl_val; int i; - rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp); - d->have_new_ctrl = false; - d->new_ctrl = r->cache.shareable_bits; + cfg = &d->staged_config[t]; + cfg->have_new_ctrl = false; + cfg->new_ctrl = r->cache.shareable_bits; used_b = r->cache.shareable_bits; - ctrl = d->ctrl_val; - for (i = 0; i < closids_supported(); i++, ctrl++) { + for (i = 0; i < closids_supported(); i++) { if (closid_allocated(i) && i != closid) { mode = rdtgroup_mode_by_closid(i); if (mode == RDT_MODE_PSEUDO_LOCKSETUP) @@ -2632,35 +2758,38 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r, * usage to ensure there is no overlap * with an exclusive group. */ - if (d_cdp) - peer_ctl = d_cdp->ctrl_val[i]; + if (resctrl_arch_get_cdp_enabled(r->rid)) + peer_ctl = resctrl_arch_get_config(r, d, i, + peer_type); else peer_ctl = 0; - used_b |= *ctrl | peer_ctl; + ctrl_val = resctrl_arch_get_config(r, d, i, + s->conf_type); + used_b |= ctrl_val | peer_ctl; if (mode == RDT_MODE_SHAREABLE) - d->new_ctrl |= *ctrl | peer_ctl; + cfg->new_ctrl |= ctrl_val | peer_ctl; } } if (d->plr && d->plr->cbm > 0) used_b |= d->plr->cbm; unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1); unused_b &= BIT_MASK(r->cache.cbm_len) - 1; - d->new_ctrl |= unused_b; + cfg->new_ctrl |= unused_b; /* * Force the initial CBM to be valid, user can * modify the CBM based on system availability. */ - d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r); + cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r); /* * Assign the u32 CBM to an unsigned long to ensure that * bitmap_weight() does not access out-of-bound memory. */ - tmp_cbm = d->new_ctrl; + tmp_cbm = cfg->new_ctrl; if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) { - rdt_last_cmd_printf("No space on %s:%d\n", r->name, d->id); + rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id); return -ENOSPC; } - d->have_new_ctrl = true; + cfg->have_new_ctrl = true; return 0; } @@ -2675,13 +2804,13 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r, * If there are no more shareable bits available on any domain then * the entire allocation will fail. */ -static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid) +static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) { struct rdt_domain *d; int ret; - list_for_each_entry(d, &r->domains, list) { - ret = __init_one_rdt_domain(d, r, closid); + list_for_each_entry(d, &s->res->domains, list) { + ret = __init_one_rdt_domain(d, s, closid); if (ret < 0) return ret; } @@ -2690,32 +2819,43 @@ static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid) } /* Initialize MBA resource with default values. */ -static void rdtgroup_init_mba(struct rdt_resource *r) +static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid) { + struct resctrl_staged_config *cfg; struct rdt_domain *d; list_for_each_entry(d, &r->domains, list) { - d->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl; - d->have_new_ctrl = true; + if (is_mba_sc(r)) { + d->mbps_val[closid] = MBA_MAX_MBPS; + continue; + } + + cfg = &d->staged_config[CDP_NONE]; + cfg->new_ctrl = r->default_ctrl; + cfg->have_new_ctrl = true; } } /* Initialize the RDT group's allocations. */ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) { + struct resctrl_schema *s; struct rdt_resource *r; int ret; - for_each_alloc_enabled_rdt_resource(r) { + list_for_each_entry(s, &resctrl_schema_all, list) { + r = s->res; if (r->rid == RDT_RESOURCE_MBA) { - rdtgroup_init_mba(r); + rdtgroup_init_mba(r, rdtgrp->closid); + if (is_mba_sc(r)) + continue; } else { - ret = rdtgroup_init_cat(r, rdtgrp->closid); + ret = rdtgroup_init_cat(s, rdtgrp->closid); if (ret < 0) return ret; } - ret = update_domains(r, rdtgrp->closid); + ret = resctrl_arch_update_domains(r, rdtgrp->closid); if (ret < 0) { rdt_last_cmd_puts("Failed to initialize allocations\n"); return ret; @@ -2775,8 +2915,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, /* * kernfs_remove() will drop the reference count on "kn" which * will free it. But we still need it to stick around for the - * rdtgroup_kn_unlock(kn} call below. Take one extra reference - * here, which will be dropped inside rdtgroup_kn_unlock(). + * rdtgroup_kn_unlock(kn) call. Take one extra reference here, + * which will be dropped by kernfs_put() in rdtgroup_remove(). */ kernfs_get(kn); @@ -2817,6 +2957,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, out_idfree: free_rmid(rdtgrp->mon.rmid); out_destroy: + kernfs_put(rdtgrp->kn); kernfs_remove(rdtgrp->kn); out_free_rgrp: kfree(rdtgrp); @@ -2829,7 +2970,7 @@ static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) { kernfs_remove(rgrp->kn); free_rmid(rgrp->mon.rmid); - kfree(rgrp); + rdtgroup_remove(rgrp); } /* @@ -2958,8 +3099,7 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, return -EPERM; } -static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp, - cpumask_var_t tmpmask) +static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) { struct rdtgroup *prdtgrp = rdtgrp->mon.parent; int cpu; @@ -2986,33 +3126,21 @@ static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp, WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); list_del(&rdtgrp->mon.crdtgrp_list); - /* - * one extra hold on this, will drop when we kfree(rdtgrp) - * in rdtgroup_kn_unlock() - */ - kernfs_get(kn); kernfs_remove(rdtgrp->kn); return 0; } -static int rdtgroup_ctrl_remove(struct kernfs_node *kn, - struct rdtgroup *rdtgrp) +static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp) { rdtgrp->flags = RDT_DELETED; list_del(&rdtgrp->rdtgroup_list); - /* - * one extra hold on this, will drop when we kfree(rdtgrp) - * in rdtgroup_kn_unlock() - */ - kernfs_get(kn); kernfs_remove(rdtgrp->kn); return 0; } -static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, - cpumask_var_t tmpmask) +static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) { int cpu; @@ -3039,7 +3167,7 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, closid_free(rdtgrp->closid); free_rmid(rdtgrp->mon.rmid); - rdtgroup_ctrl_remove(kn, rdtgrp); + rdtgroup_ctrl_remove(rdtgrp); /* * Free all the child monitor group rmids. @@ -3072,16 +3200,17 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) * If the rdtgroup is a mon group and parent directory * is a valid "mon_groups" directory, remove the mon group. */ - if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) { + if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn && + rdtgrp != &rdtgroup_default) { if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { - ret = rdtgroup_ctrl_remove(kn, rdtgrp); + ret = rdtgroup_ctrl_remove(rdtgrp); } else { - ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask); + ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask); } } else if (rdtgrp->type == RDTMON_GROUP && is_mon_groups(parent_kn, kn->name)) { - ret = rdtgroup_rmdir_mon(kn, rdtgrp, tmpmask); + ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask); } else { ret = -EPERM; } @@ -3094,13 +3223,13 @@ out: static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) { - if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) + if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3)) seq_puts(seq, ",cdp"); - if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) + if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) seq_puts(seq, ",cdpl2"); - if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA])) + if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl)) seq_puts(seq, ",mba_MBps"); return 0; @@ -3132,13 +3261,13 @@ static int __init rdtgroup_setup_root(void) list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); - ret = rdtgroup_add_files(rdt_root->kn, RF_CTRL_BASE); + ret = rdtgroup_add_files(kernfs_root_to_node(rdt_root), RF_CTRL_BASE); if (ret) { kernfs_destroy_root(rdt_root); goto out; } - rdtgroup_default.kn = rdt_root->kn; + rdtgroup_default.kn = kernfs_root_to_node(rdt_root); kernfs_activate(rdtgroup_default.kn); out: @@ -3147,6 +3276,110 @@ out: return ret; } +static void domain_destroy_mon_state(struct rdt_domain *d) +{ + bitmap_free(d->rmid_busy_llc); + kfree(d->mbm_total); + kfree(d->mbm_local); +} + +void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d) +{ + lockdep_assert_held(&rdtgroup_mutex); + + if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) + mba_sc_domain_destroy(r, d); + + if (!r->mon_capable) + return; + + /* + * If resctrl is mounted, remove all the + * per domain monitor data directories. + */ + if (static_branch_unlikely(&rdt_mon_enable_key)) + rmdir_mondata_subdir_allrdtgrp(r, d->id); + + if (is_mbm_enabled()) + cancel_delayed_work(&d->mbm_over); + if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) { + /* + * When a package is going down, forcefully + * decrement rmid->ebusy. There is no way to know + * that the L3 was flushed and hence may lead to + * incorrect counts in rare scenarios, but leaving + * the RMID as busy creates RMID leaks if the + * package never comes back. + */ + __check_limbo(d, true); + cancel_delayed_work(&d->cqm_limbo); + } + + domain_destroy_mon_state(d); +} + +static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) +{ + size_t tsize; + + if (is_llc_occupancy_enabled()) { + d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL); + if (!d->rmid_busy_llc) + return -ENOMEM; + } + if (is_mbm_total_enabled()) { + tsize = sizeof(*d->mbm_total); + d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL); + if (!d->mbm_total) { + bitmap_free(d->rmid_busy_llc); + return -ENOMEM; + } + } + if (is_mbm_local_enabled()) { + tsize = sizeof(*d->mbm_local); + d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL); + if (!d->mbm_local) { + bitmap_free(d->rmid_busy_llc); + kfree(d->mbm_total); + return -ENOMEM; + } + } + + return 0; +} + +int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d) +{ + int err; + + lockdep_assert_held(&rdtgroup_mutex); + + if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) + /* RDT_RESOURCE_MBA is never mon_capable */ + return mba_sc_domain_allocate(r, d); + + if (!r->mon_capable) + return 0; + + err = domain_setup_mon_state(r, d); + if (err) + return err; + + if (is_mbm_enabled()) { + INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow); + mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL); + } + + if (is_llc_occupancy_enabled()) + INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); + + /* If resctrl is mounted, add per domain monitor data directories. */ + if (static_branch_unlikely(&rdt_mon_enable_key)) + mkdir_mondata_subdir_allrdtgrp(r, d); + + return 0; +} + /* * rdtgroup_init - rdtgroup initialization * @@ -3181,14 +3414,14 @@ int __init rdtgroup_init(void) * It may also be ok since that would enable debugging of RDT before * resctrl is mounted. * The reason why the debugfs directory is created here and not in - * rdt_mount() is because rdt_mount() takes rdtgroup_mutex and + * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and * during the debugfs directory creation also &sb->s_type->i_mutex_key * (the lockdep class of inode->i_rwsem). Other filesystem * interactions (eg. SyS_getdents) have the lock ordering: - * &sb->s_type->i_mutex_key --> &mm->mmap_sem - * During mmap(), called with &mm->mmap_sem, the rdtgroup_mutex + * &sb->s_type->i_mutex_key --> &mm->mmap_lock + * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex * is taken, thus creating dependency: - * &mm->mmap_sem --> rdtgroup_mutex for the latter that can cause + * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause * issues considering the other two lock dependencies. * By creating the debugfs directory here we avoid a dependency * that may cause deadlock (even though file operations cannot |