mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
Now that MPAM links against resctrl, call resctrl_init() to register the filesystem and setup resctrl's structures. Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Punit Agrawal <punit.agrawal@oss.qualcomm.com> Tested-by: Jesse Chick <jessechick@os.amperecomputing.com> Reviewed-by: Zeng Heng <zengheng4@huawei.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Co-developed-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: James Morse <james.morse@arm.com>
1705 lines
43 KiB
C
1705 lines
43 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
// Copyright (C) 2025 Arm Ltd.
|
|
|
|
#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
|
|
|
|
#include <linux/arm_mpam.h>
|
|
#include <linux/cacheinfo.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/limits.h>
|
|
#include <linux/list.h>
|
|
#include <linux/math.h>
|
|
#include <linux/printk.h>
|
|
#include <linux/rculist.h>
|
|
#include <linux/resctrl.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/types.h>
|
|
#include <linux/wait.h>
|
|
|
|
#include <asm/mpam.h>
|
|
|
|
#include "mpam_internal.h"
|
|
|
|
DECLARE_WAIT_QUEUE_HEAD(resctrl_mon_ctx_waiters);
|
|
|
|
/*
|
|
* The classes we've picked to map to resctrl resources, wrapped
|
|
* in with their resctrl structure.
|
|
* Class pointer may be NULL.
|
|
*/
|
|
static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES];
|
|
|
|
#define for_each_mpam_resctrl_control(res, rid) \
|
|
for (rid = 0, res = &mpam_resctrl_controls[rid]; \
|
|
rid < RDT_NUM_RESOURCES; \
|
|
rid++, res = &mpam_resctrl_controls[rid])
|
|
|
|
/*
|
|
* The classes we've picked to map to resctrl events.
|
|
* Resctrl believes all the worlds a Xeon, and these are all on the L3. This
|
|
* array lets us find the actual class backing the event counters. e.g.
|
|
* the only memory bandwidth counters may be on the memory controller, but to
|
|
* make use of them, we pretend they are on L3. Restrict the events considered
|
|
* to those supported by MPAM.
|
|
* Class pointer may be NULL.
|
|
*/
|
|
#define MPAM_MAX_EVENT QOS_L3_MBM_TOTAL_EVENT_ID
|
|
static struct mpam_resctrl_mon mpam_resctrl_counters[MPAM_MAX_EVENT + 1];
|
|
|
|
#define for_each_mpam_resctrl_mon(mon, eventid) \
|
|
for (eventid = QOS_FIRST_EVENT, mon = &mpam_resctrl_counters[eventid]; \
|
|
eventid <= MPAM_MAX_EVENT; \
|
|
eventid++, mon = &mpam_resctrl_counters[eventid])
|
|
|
|
/* The lock for modifying resctrl's domain lists from cpuhp callbacks. */
|
|
static DEFINE_MUTEX(domain_list_lock);
|
|
|
|
/*
|
|
* MPAM emulates CDP by setting different PARTID in the I/D fields of MPAM0_EL1.
|
|
* This applies globally to all traffic the CPU generates.
|
|
*/
|
|
static bool cdp_enabled;
|
|
|
|
/*
|
|
* We use cacheinfo to discover the size of the caches and their id. cacheinfo
|
|
* populates this from a device_initcall(). mpam_resctrl_setup() must wait.
|
|
*/
|
|
static bool cacheinfo_ready;
|
|
static DECLARE_WAIT_QUEUE_HEAD(wait_cacheinfo_ready);
|
|
|
|
/*
|
|
* If resctrl_init() succeeded, resctrl_exit() can be used to remove support
|
|
* for the filesystem in the event of an error.
|
|
*/
|
|
static bool resctrl_enabled;
|
|
|
|
bool resctrl_arch_alloc_capable(void)
|
|
{
|
|
struct mpam_resctrl_res *res;
|
|
enum resctrl_res_level rid;
|
|
|
|
for_each_mpam_resctrl_control(res, rid) {
|
|
if (res->resctrl_res.alloc_capable)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool resctrl_arch_mon_capable(void)
|
|
{
|
|
struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
|
|
struct rdt_resource *l3 = &res->resctrl_res;
|
|
|
|
/* All monitors are presented as being on the L3 cache */
|
|
return l3->mon_capable;
|
|
}
|
|
|
|
bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
void resctrl_arch_mon_event_config_read(void *info)
|
|
{
|
|
}
|
|
|
|
void resctrl_arch_mon_event_config_write(void *info)
|
|
{
|
|
}
|
|
|
|
void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
|
|
{
|
|
}
|
|
|
|
void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
|
|
u32 closid, u32 rmid, enum resctrl_event_id eventid)
|
|
{
|
|
}
|
|
|
|
void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
|
|
u32 closid, u32 rmid, int cntr_id,
|
|
enum resctrl_event_id eventid)
|
|
{
|
|
}
|
|
|
|
void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
|
|
enum resctrl_event_id evtid, u32 rmid, u32 closid,
|
|
u32 cntr_id, bool assign)
|
|
{
|
|
}
|
|
|
|
int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
|
|
u32 unused, u32 rmid, int cntr_id,
|
|
enum resctrl_event_id eventid, u64 *val)
|
|
{
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable)
|
|
{
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
void resctrl_arch_pre_mount(void)
|
|
{
|
|
}
|
|
|
|
bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid)
|
|
{
|
|
return mpam_resctrl_controls[rid].cdp_enabled;
|
|
}
|
|
|
|
/**
|
|
* resctrl_reset_task_closids() - Reset the PARTID/PMG values for all tasks.
|
|
*
|
|
* At boot, all existing tasks use partid zero for D and I.
|
|
* To enable/disable CDP emulation, all these tasks need relabelling.
|
|
*/
|
|
static void resctrl_reset_task_closids(void)
|
|
{
|
|
struct task_struct *p, *t;
|
|
|
|
read_lock(&tasklist_lock);
|
|
for_each_process_thread(p, t) {
|
|
resctrl_arch_set_closid_rmid(t, RESCTRL_RESERVED_CLOSID,
|
|
RESCTRL_RESERVED_RMID);
|
|
}
|
|
read_unlock(&tasklist_lock);
|
|
}
|
|
|
|
int resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid, bool enable)
|
|
{
|
|
u32 partid_i = RESCTRL_RESERVED_CLOSID, partid_d = RESCTRL_RESERVED_CLOSID;
|
|
struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
|
|
struct rdt_resource *l3 = &res->resctrl_res;
|
|
int cpu;
|
|
|
|
if (!IS_ENABLED(CONFIG_EXPERT) && enable) {
|
|
/*
|
|
* If the resctrl fs is mounted more than once, sequentially,
|
|
* then CDP can lead to the use of out of range PARTIDs.
|
|
*/
|
|
pr_warn("CDP not supported\n");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
if (enable)
|
|
pr_warn("CDP is an expert feature and may cause MPAM to malfunction.\n");
|
|
|
|
/*
|
|
* resctrl_arch_set_cdp_enabled() is only called with enable set to
|
|
* false on error and unmount.
|
|
*/
|
|
cdp_enabled = enable;
|
|
mpam_resctrl_controls[rid].cdp_enabled = enable;
|
|
|
|
if (enable)
|
|
l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx() / 2;
|
|
else
|
|
l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx();
|
|
|
|
/* The mbw_max feature can't hide cdp as it's a per-partid maximum. */
|
|
if (cdp_enabled && !mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled)
|
|
mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = false;
|
|
|
|
if (mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled &&
|
|
mpam_resctrl_controls[RDT_RESOURCE_MBA].class)
|
|
mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = true;
|
|
|
|
if (enable) {
|
|
if (mpam_partid_max < 1)
|
|
return -EINVAL;
|
|
|
|
partid_d = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_DATA);
|
|
partid_i = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_CODE);
|
|
}
|
|
|
|
mpam_set_task_partid_pmg(current, partid_d, partid_i, 0, 0);
|
|
WRITE_ONCE(arm64_mpam_global_default, mpam_get_regval(current));
|
|
|
|
resctrl_reset_task_closids();
|
|
|
|
for_each_possible_cpu(cpu)
|
|
mpam_set_cpu_defaults(cpu, partid_d, partid_i, 0, 0);
|
|
on_each_cpu(resctrl_arch_sync_cpu_closid_rmid, NULL, 1);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static bool mpam_resctrl_hide_cdp(enum resctrl_res_level rid)
|
|
{
|
|
return cdp_enabled && !resctrl_arch_get_cdp_enabled(rid);
|
|
}
|
|
|
|
/*
|
|
* MSC may raise an error interrupt if it sees an out or range partid/pmg,
|
|
* and go on to truncate the value. Regardless of what the hardware supports,
|
|
* only the system wide safe value is safe to use.
|
|
*/
|
|
u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored)
|
|
{
|
|
return mpam_partid_max + 1;
|
|
}
|
|
|
|
u32 resctrl_arch_system_num_rmid_idx(void)
|
|
{
|
|
return (mpam_pmg_max + 1) * (mpam_partid_max + 1);
|
|
}
|
|
|
|
u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid)
|
|
{
|
|
return closid * (mpam_pmg_max + 1) + rmid;
|
|
}
|
|
|
|
void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
|
|
{
|
|
*closid = idx / (mpam_pmg_max + 1);
|
|
*rmid = idx % (mpam_pmg_max + 1);
|
|
}
|
|
|
|
void resctrl_arch_sched_in(struct task_struct *tsk)
|
|
{
|
|
lockdep_assert_preemption_disabled();
|
|
|
|
mpam_thread_switch(tsk);
|
|
}
|
|
|
|
void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid)
|
|
{
|
|
WARN_ON_ONCE(closid > U16_MAX);
|
|
WARN_ON_ONCE(rmid > U8_MAX);
|
|
|
|
if (!cdp_enabled) {
|
|
mpam_set_cpu_defaults(cpu, closid, closid, rmid, rmid);
|
|
} else {
|
|
/*
|
|
* When CDP is enabled, resctrl halves the closid range and we
|
|
* use odd/even partid for one closid.
|
|
*/
|
|
u32 partid_d = resctrl_get_config_index(closid, CDP_DATA);
|
|
u32 partid_i = resctrl_get_config_index(closid, CDP_CODE);
|
|
|
|
mpam_set_cpu_defaults(cpu, partid_d, partid_i, rmid, rmid);
|
|
}
|
|
}
|
|
|
|
void resctrl_arch_sync_cpu_closid_rmid(void *info)
|
|
{
|
|
struct resctrl_cpu_defaults *r = info;
|
|
|
|
lockdep_assert_preemption_disabled();
|
|
|
|
if (r) {
|
|
resctrl_arch_set_cpu_default_closid_rmid(smp_processor_id(),
|
|
r->closid, r->rmid);
|
|
}
|
|
|
|
resctrl_arch_sched_in(current);
|
|
}
|
|
|
|
void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid)
|
|
{
|
|
WARN_ON_ONCE(closid > U16_MAX);
|
|
WARN_ON_ONCE(rmid > U8_MAX);
|
|
|
|
if (!cdp_enabled) {
|
|
mpam_set_task_partid_pmg(tsk, closid, closid, rmid, rmid);
|
|
} else {
|
|
u32 partid_d = resctrl_get_config_index(closid, CDP_DATA);
|
|
u32 partid_i = resctrl_get_config_index(closid, CDP_CODE);
|
|
|
|
mpam_set_task_partid_pmg(tsk, partid_d, partid_i, rmid, rmid);
|
|
}
|
|
}
|
|
|
|
bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid)
|
|
{
|
|
u64 regval = mpam_get_regval(tsk);
|
|
u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval);
|
|
|
|
if (cdp_enabled)
|
|
tsk_closid >>= 1;
|
|
|
|
return tsk_closid == closid;
|
|
}
|
|
|
|
/* The task's pmg is not unique, the partid must be considered too */
|
|
bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid)
|
|
{
|
|
u64 regval = mpam_get_regval(tsk);
|
|
u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval);
|
|
u32 tsk_rmid = FIELD_GET(MPAM0_EL1_PMG_D, regval);
|
|
|
|
if (cdp_enabled)
|
|
tsk_closid >>= 1;
|
|
|
|
return (tsk_closid == closid) && (tsk_rmid == rmid);
|
|
}
|
|
|
|
struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
|
|
{
|
|
if (l >= RDT_NUM_RESOURCES)
|
|
return NULL;
|
|
|
|
return &mpam_resctrl_controls[l].resctrl_res;
|
|
}
|
|
|
|
static int resctrl_arch_mon_ctx_alloc_no_wait(enum resctrl_event_id evtid)
|
|
{
|
|
struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid];
|
|
|
|
if (!mpam_is_enabled())
|
|
return -EINVAL;
|
|
|
|
if (!mon->class)
|
|
return -EINVAL;
|
|
|
|
switch (evtid) {
|
|
case QOS_L3_OCCUP_EVENT_ID:
|
|
/* With CDP, one monitor gets used for both code/data reads */
|
|
return mpam_alloc_csu_mon(mon->class);
|
|
case QOS_L3_MBM_LOCAL_EVENT_ID:
|
|
case QOS_L3_MBM_TOTAL_EVENT_ID:
|
|
return USE_PRE_ALLOCATED;
|
|
default:
|
|
return -EOPNOTSUPP;
|
|
}
|
|
}
|
|
|
|
void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r,
|
|
enum resctrl_event_id evtid)
|
|
{
|
|
DEFINE_WAIT(wait);
|
|
int *ret;
|
|
|
|
ret = kmalloc_obj(*ret);
|
|
if (!ret)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
do {
|
|
prepare_to_wait(&resctrl_mon_ctx_waiters, &wait,
|
|
TASK_INTERRUPTIBLE);
|
|
*ret = resctrl_arch_mon_ctx_alloc_no_wait(evtid);
|
|
if (*ret == -ENOSPC)
|
|
schedule();
|
|
} while (*ret == -ENOSPC && !signal_pending(current));
|
|
finish_wait(&resctrl_mon_ctx_waiters, &wait);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void resctrl_arch_mon_ctx_free_no_wait(enum resctrl_event_id evtid,
|
|
u32 mon_idx)
|
|
{
|
|
struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid];
|
|
|
|
if (!mpam_is_enabled())
|
|
return;
|
|
|
|
if (!mon->class)
|
|
return;
|
|
|
|
if (evtid == QOS_L3_OCCUP_EVENT_ID)
|
|
mpam_free_csu_mon(mon->class, mon_idx);
|
|
|
|
wake_up(&resctrl_mon_ctx_waiters);
|
|
}
|
|
|
|
void resctrl_arch_mon_ctx_free(struct rdt_resource *r,
|
|
enum resctrl_event_id evtid, void *arch_mon_ctx)
|
|
{
|
|
u32 mon_idx = *(u32 *)arch_mon_ctx;
|
|
|
|
kfree(arch_mon_ctx);
|
|
|
|
resctrl_arch_mon_ctx_free_no_wait(evtid, mon_idx);
|
|
}
|
|
|
|
static int __read_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp,
|
|
enum mpam_device_features mon_type,
|
|
int mon_idx,
|
|
enum resctrl_conf_type cdp_type, u32 closid, u32 rmid, u64 *val)
|
|
{
|
|
struct mon_cfg cfg;
|
|
|
|
if (!mpam_is_enabled())
|
|
return -EINVAL;
|
|
|
|
/* Shift closid to account for CDP */
|
|
closid = resctrl_get_config_index(closid, cdp_type);
|
|
|
|
if (irqs_disabled()) {
|
|
/* Check if we can access this domain without an IPI */
|
|
return -EIO;
|
|
}
|
|
|
|
cfg = (struct mon_cfg) {
|
|
.mon = mon_idx,
|
|
.match_pmg = true,
|
|
.partid = closid,
|
|
.pmg = rmid,
|
|
};
|
|
|
|
return mpam_msmon_read(mon_comp, &cfg, mon_type, val);
|
|
}
|
|
|
|
static int read_mon_cdp_safe(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp,
|
|
enum mpam_device_features mon_type,
|
|
int mon_idx, u32 closid, u32 rmid, u64 *val)
|
|
{
|
|
if (cdp_enabled) {
|
|
u64 code_val = 0, data_val = 0;
|
|
int err;
|
|
|
|
err = __read_mon(mon, mon_comp, mon_type, mon_idx,
|
|
CDP_CODE, closid, rmid, &code_val);
|
|
if (err)
|
|
return err;
|
|
|
|
err = __read_mon(mon, mon_comp, mon_type, mon_idx,
|
|
CDP_DATA, closid, rmid, &data_val);
|
|
if (err)
|
|
return err;
|
|
|
|
*val += code_val + data_val;
|
|
return 0;
|
|
}
|
|
|
|
return __read_mon(mon, mon_comp, mon_type, mon_idx,
|
|
CDP_NONE, closid, rmid, val);
|
|
}
|
|
|
|
/* MBWU when not in ABMC mode (not supported), and CSU counters. */
|
|
int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr,
|
|
u32 closid, u32 rmid, enum resctrl_event_id eventid,
|
|
void *arch_priv, u64 *val, void *arch_mon_ctx)
|
|
{
|
|
struct mpam_resctrl_dom *l3_dom;
|
|
struct mpam_component *mon_comp;
|
|
u32 mon_idx = *(u32 *)arch_mon_ctx;
|
|
enum mpam_device_features mon_type;
|
|
struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid];
|
|
|
|
resctrl_arch_rmid_read_context_check();
|
|
|
|
if (!mpam_is_enabled())
|
|
return -EINVAL;
|
|
|
|
if (eventid >= QOS_NUM_EVENTS || !mon->class)
|
|
return -EINVAL;
|
|
|
|
l3_dom = container_of(hdr, struct mpam_resctrl_dom, resctrl_mon_dom.hdr);
|
|
mon_comp = l3_dom->mon_comp[eventid];
|
|
|
|
if (eventid != QOS_L3_OCCUP_EVENT_ID)
|
|
return -EINVAL;
|
|
|
|
mon_type = mpam_feat_msmon_csu;
|
|
|
|
return read_mon_cdp_safe(mon, mon_comp, mon_type, mon_idx,
|
|
closid, rmid, val);
|
|
}
|
|
|
|
/*
|
|
* The rmid realloc threshold should be for the smallest cache exposed to
|
|
* resctrl.
|
|
*/
|
|
static int update_rmid_limits(struct mpam_class *class)
|
|
{
|
|
u32 num_unique_pmg = resctrl_arch_system_num_rmid_idx();
|
|
struct mpam_props *cprops = &class->props;
|
|
struct cacheinfo *ci;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
if (!mpam_has_feature(mpam_feat_msmon_csu, cprops))
|
|
return 0;
|
|
|
|
/*
|
|
* Assume cache levels are the same size for all CPUs...
|
|
* The check just requires any online CPU and it can't go offline as we
|
|
* hold the cpu lock.
|
|
*/
|
|
ci = get_cpu_cacheinfo_level(raw_smp_processor_id(), class->level);
|
|
if (!ci || ci->size == 0) {
|
|
pr_debug("Could not read cache size for class %u\n",
|
|
class->level);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (!resctrl_rmid_realloc_limit ||
|
|
ci->size < resctrl_rmid_realloc_limit) {
|
|
resctrl_rmid_realloc_limit = ci->size;
|
|
resctrl_rmid_realloc_threshold = ci->size / num_unique_pmg;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static bool cache_has_usable_cpor(struct mpam_class *class)
|
|
{
|
|
struct mpam_props *cprops = &class->props;
|
|
|
|
if (!mpam_has_feature(mpam_feat_cpor_part, cprops))
|
|
return false;
|
|
|
|
/* resctrl uses u32 for all bitmap configurations */
|
|
return class->props.cpbm_wd <= 32;
|
|
}
|
|
|
|
static bool mba_class_use_mbw_max(struct mpam_props *cprops)
|
|
{
|
|
return (mpam_has_feature(mpam_feat_mbw_max, cprops) &&
|
|
cprops->bwa_wd);
|
|
}
|
|
|
|
static bool class_has_usable_mba(struct mpam_props *cprops)
|
|
{
|
|
return mba_class_use_mbw_max(cprops);
|
|
}
|
|
|
|
static bool cache_has_usable_csu(struct mpam_class *class)
|
|
{
|
|
struct mpam_props *cprops;
|
|
|
|
if (!class)
|
|
return false;
|
|
|
|
cprops = &class->props;
|
|
|
|
if (!mpam_has_feature(mpam_feat_msmon_csu, cprops))
|
|
return false;
|
|
|
|
/*
|
|
* CSU counters settle on the value, so we can get away with
|
|
* having only one.
|
|
*/
|
|
if (!cprops->num_csu_mon)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Calculate the worst-case percentage change from each implemented step
|
|
* in the control.
|
|
*/
|
|
static u32 get_mba_granularity(struct mpam_props *cprops)
|
|
{
|
|
if (!mba_class_use_mbw_max(cprops))
|
|
return 0;
|
|
|
|
/*
|
|
* bwa_wd is the number of bits implemented in the 0.xxx
|
|
* fixed point fraction. 1 bit is 50%, 2 is 25% etc.
|
|
*/
|
|
return DIV_ROUND_UP(MAX_MBA_BW, 1 << cprops->bwa_wd);
|
|
}
|
|
|
|
/*
|
|
* Each fixed-point hardware value architecturally represents a range
|
|
* of values: the full range 0% - 100% is split contiguously into
|
|
* (1 << cprops->bwa_wd) equal bands.
|
|
*
|
|
* Although the bwa_bwd fields have 6 bits the maximum valid value is 16
|
|
* as it reports the width of fields that are at most 16 bits. When
|
|
* fewer than 16 bits are valid the least significant bits are
|
|
* ignored. The implied binary point is kept between bits 15 and 16 and
|
|
* so the valid bits are leftmost.
|
|
*
|
|
* See ARM IHI0099B.a "MPAM system component specification", Section 9.3,
|
|
* "The fixed-point fractional format" for more information.
|
|
*
|
|
* Find the nearest percentage value to the upper bound of the selected band:
|
|
*/
|
|
static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops)
|
|
{
|
|
u32 val = mbw_max;
|
|
|
|
val >>= 16 - cprops->bwa_wd;
|
|
val += 1;
|
|
val *= MAX_MBA_BW;
|
|
val = DIV_ROUND_CLOSEST(val, 1 << cprops->bwa_wd);
|
|
|
|
return val;
|
|
}
|
|
|
|
/*
|
|
* Find the band whose upper bound is closest to the specified percentage.
|
|
*
|
|
* A round-to-nearest policy is followed here as a balanced compromise
|
|
* between unexpected under-commit of the resource (where the total of
|
|
* a set of resource allocations after conversion is less than the
|
|
* expected total, due to rounding of the individual converted
|
|
* percentages) and over-commit (where the total of the converted
|
|
* allocations is greater than expected).
|
|
*/
|
|
static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops)
|
|
{
|
|
u32 val = pc;
|
|
|
|
val <<= cprops->bwa_wd;
|
|
val = DIV_ROUND_CLOSEST(val, MAX_MBA_BW);
|
|
val = max(val, 1) - 1;
|
|
val <<= 16 - cprops->bwa_wd;
|
|
|
|
return val;
|
|
}
|
|
|
|
static u32 get_mba_min(struct mpam_props *cprops)
|
|
{
|
|
if (!mba_class_use_mbw_max(cprops)) {
|
|
WARN_ON_ONCE(1);
|
|
return 0;
|
|
}
|
|
|
|
return mbw_max_to_percent(0, cprops);
|
|
}
|
|
|
|
/* Find the L3 cache that has affinity with this CPU */
|
|
static int find_l3_equivalent_bitmask(int cpu, cpumask_var_t tmp_cpumask)
|
|
{
|
|
u32 cache_id = get_cpu_cacheinfo_id(cpu, 3);
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
return mpam_get_cpumask_from_cache_id(cache_id, 3, tmp_cpumask);
|
|
}
|
|
|
|
/*
|
|
* topology_matches_l3() - Is the provided class the same shape as L3
|
|
* @victim: The class we'd like to pretend is L3.
|
|
*
|
|
* resctrl expects all the world's a Xeon, and all counters are on the
|
|
* L3. We allow some mapping counters on other classes. This requires
|
|
* that the CPU->domain mapping is the same kind of shape.
|
|
*
|
|
* Using cacheinfo directly would make this work even if resctrl can't
|
|
* use the L3 - but cacheinfo can't tell us anything about offline CPUs.
|
|
* Using the L3 resctrl domain list also depends on CPUs being online.
|
|
* Using the mpam_class we picked for L3 so we can use its domain list
|
|
* assumes that there are MPAM controls on the L3.
|
|
* Instead, this path eventually uses the mpam_get_cpumask_from_cache_id()
|
|
* helper which can tell us about offline CPUs ... but getting the cache_id
|
|
* to start with relies on at least one CPU per L3 cache being online at
|
|
* boot.
|
|
*
|
|
* Walk the victim component list and compare the affinity mask with the
|
|
* corresponding L3. The topology matches if each victim:component's affinity
|
|
* mask is the same as the CPU's corresponding L3's. These lists/masks are
|
|
* computed from firmware tables so don't change at runtime.
|
|
*/
|
|
static bool topology_matches_l3(struct mpam_class *victim)
|
|
{
|
|
int cpu, err;
|
|
struct mpam_component *victim_iter;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL;
|
|
if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL))
|
|
return false;
|
|
|
|
guard(srcu)(&mpam_srcu);
|
|
list_for_each_entry_srcu(victim_iter, &victim->components, class_list,
|
|
srcu_read_lock_held(&mpam_srcu)) {
|
|
if (cpumask_empty(&victim_iter->affinity)) {
|
|
pr_debug("class %u has CPU-less component %u - can't match L3!\n",
|
|
victim->level, victim_iter->comp_id);
|
|
return false;
|
|
}
|
|
|
|
cpu = cpumask_any_and(&victim_iter->affinity, cpu_online_mask);
|
|
if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
|
|
return false;
|
|
|
|
cpumask_clear(tmp_cpumask);
|
|
err = find_l3_equivalent_bitmask(cpu, tmp_cpumask);
|
|
if (err) {
|
|
pr_debug("Failed to find L3's equivalent component to class %u component %u\n",
|
|
victim->level, victim_iter->comp_id);
|
|
return false;
|
|
}
|
|
|
|
/* Any differing bits in the affinity mask? */
|
|
if (!cpumask_equal(tmp_cpumask, &victim_iter->affinity)) {
|
|
pr_debug("class %u component %u has Mismatched CPU mask with L3 equivalent\n"
|
|
"L3:%*pbl != victim:%*pbl\n",
|
|
victim->level, victim_iter->comp_id,
|
|
cpumask_pr_args(tmp_cpumask),
|
|
cpumask_pr_args(&victim_iter->affinity));
|
|
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Test if the traffic for a class matches that at egress from the L3. For
|
|
* MSC at memory controllers this is only possible if there is a single L3
|
|
* as otherwise the counters at the memory can include bandwidth from the
|
|
* non-local L3.
|
|
*/
|
|
static bool traffic_matches_l3(struct mpam_class *class)
|
|
{
|
|
int err, cpu;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
if (class->type == MPAM_CLASS_CACHE && class->level == 3)
|
|
return true;
|
|
|
|
if (class->type == MPAM_CLASS_CACHE && class->level != 3) {
|
|
pr_debug("class %u is a different cache from L3\n", class->level);
|
|
return false;
|
|
}
|
|
|
|
if (class->type != MPAM_CLASS_MEMORY) {
|
|
pr_debug("class %u is neither of type cache or memory\n", class->level);
|
|
return false;
|
|
}
|
|
|
|
cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL;
|
|
if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL)) {
|
|
pr_debug("cpumask allocation failed\n");
|
|
return false;
|
|
}
|
|
|
|
cpu = cpumask_any_and(&class->affinity, cpu_online_mask);
|
|
err = find_l3_equivalent_bitmask(cpu, tmp_cpumask);
|
|
if (err) {
|
|
pr_debug("Failed to find L3 downstream to cpu %d\n", cpu);
|
|
return false;
|
|
}
|
|
|
|
if (!cpumask_equal(tmp_cpumask, cpu_possible_mask)) {
|
|
pr_debug("There is more than one L3\n");
|
|
return false;
|
|
}
|
|
|
|
/* Be strict; the traffic might stop in the intermediate cache. */
|
|
if (get_cpu_cacheinfo_id(cpu, 4) != -1) {
|
|
pr_debug("L3 isn't the last level of cache\n");
|
|
return false;
|
|
}
|
|
|
|
if (num_possible_nodes() > 1) {
|
|
pr_debug("There is more than one numa node\n");
|
|
return false;
|
|
}
|
|
|
|
#ifdef CONFIG_HMEM_REPORTING
|
|
if (node_devices[cpu_to_node(cpu)]->cache_dev) {
|
|
pr_debug("There is a memory side cache\n");
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */
|
|
static void mpam_resctrl_pick_caches(void)
|
|
{
|
|
struct mpam_class *class;
|
|
struct mpam_resctrl_res *res;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
guard(srcu)(&mpam_srcu);
|
|
list_for_each_entry_srcu(class, &mpam_classes, classes_list,
|
|
srcu_read_lock_held(&mpam_srcu)) {
|
|
if (class->type != MPAM_CLASS_CACHE) {
|
|
pr_debug("class %u is not a cache\n", class->level);
|
|
continue;
|
|
}
|
|
|
|
if (class->level != 2 && class->level != 3) {
|
|
pr_debug("class %u is not L2 or L3\n", class->level);
|
|
continue;
|
|
}
|
|
|
|
if (!cache_has_usable_cpor(class)) {
|
|
pr_debug("class %u cache misses CPOR\n", class->level);
|
|
continue;
|
|
}
|
|
|
|
if (!cpumask_equal(&class->affinity, cpu_possible_mask)) {
|
|
pr_debug("class %u has missing CPUs, mask %*pb != %*pb\n", class->level,
|
|
cpumask_pr_args(&class->affinity),
|
|
cpumask_pr_args(cpu_possible_mask));
|
|
continue;
|
|
}
|
|
|
|
if (class->level == 2)
|
|
res = &mpam_resctrl_controls[RDT_RESOURCE_L2];
|
|
else
|
|
res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
|
|
res->class = class;
|
|
}
|
|
}
|
|
|
|
static void mpam_resctrl_pick_mba(void)
|
|
{
|
|
struct mpam_class *class, *candidate_class = NULL;
|
|
struct mpam_resctrl_res *res;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
guard(srcu)(&mpam_srcu);
|
|
list_for_each_entry_srcu(class, &mpam_classes, classes_list,
|
|
srcu_read_lock_held(&mpam_srcu)) {
|
|
struct mpam_props *cprops = &class->props;
|
|
|
|
if (class->level != 3 && class->type == MPAM_CLASS_CACHE) {
|
|
pr_debug("class %u is a cache but not the L3\n", class->level);
|
|
continue;
|
|
}
|
|
|
|
if (!class_has_usable_mba(cprops)) {
|
|
pr_debug("class %u has no bandwidth control\n",
|
|
class->level);
|
|
continue;
|
|
}
|
|
|
|
if (!cpumask_equal(&class->affinity, cpu_possible_mask)) {
|
|
pr_debug("class %u has missing CPUs\n", class->level);
|
|
continue;
|
|
}
|
|
|
|
if (!topology_matches_l3(class)) {
|
|
pr_debug("class %u topology doesn't match L3\n",
|
|
class->level);
|
|
continue;
|
|
}
|
|
|
|
if (!traffic_matches_l3(class)) {
|
|
pr_debug("class %u traffic doesn't match L3 egress\n",
|
|
class->level);
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Pick a resource to be MBA that as close as possible to
|
|
* the L3. mbm_total counts the bandwidth leaving the L3
|
|
* cache and MBA should correspond as closely as possible
|
|
* for proper operation of mba_sc.
|
|
*/
|
|
if (!candidate_class || class->level < candidate_class->level)
|
|
candidate_class = class;
|
|
}
|
|
|
|
if (candidate_class) {
|
|
pr_debug("selected class %u to back MBA\n",
|
|
candidate_class->level);
|
|
res = &mpam_resctrl_controls[RDT_RESOURCE_MBA];
|
|
res->class = candidate_class;
|
|
}
|
|
}
|
|
|
|
static void counter_update_class(enum resctrl_event_id evt_id,
|
|
struct mpam_class *class)
|
|
{
|
|
struct mpam_class *existing_class = mpam_resctrl_counters[evt_id].class;
|
|
|
|
if (existing_class) {
|
|
if (class->level == 3) {
|
|
pr_debug("Existing class is L3 - L3 wins\n");
|
|
return;
|
|
}
|
|
|
|
if (existing_class->level < class->level) {
|
|
pr_debug("Existing class is closer to L3, %u versus %u - closer is better\n",
|
|
existing_class->level, class->level);
|
|
return;
|
|
}
|
|
}
|
|
|
|
mpam_resctrl_counters[evt_id].class = class;
|
|
}
|
|
|
|
static void mpam_resctrl_pick_counters(void)
|
|
{
|
|
struct mpam_class *class;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
guard(srcu)(&mpam_srcu);
|
|
list_for_each_entry_srcu(class, &mpam_classes, classes_list,
|
|
srcu_read_lock_held(&mpam_srcu)) {
|
|
/* The name of the resource is L3... */
|
|
if (class->type == MPAM_CLASS_CACHE && class->level != 3) {
|
|
pr_debug("class %u is a cache but not the L3", class->level);
|
|
continue;
|
|
}
|
|
|
|
if (!cpumask_equal(&class->affinity, cpu_possible_mask)) {
|
|
pr_debug("class %u does not cover all CPUs",
|
|
class->level);
|
|
continue;
|
|
}
|
|
|
|
if (cache_has_usable_csu(class)) {
|
|
pr_debug("class %u has usable CSU",
|
|
class->level);
|
|
|
|
/* CSU counters only make sense on a cache. */
|
|
switch (class->type) {
|
|
case MPAM_CLASS_CACHE:
|
|
if (update_rmid_limits(class))
|
|
break;
|
|
|
|
counter_update_class(QOS_L3_OCCUP_EVENT_ID, class);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static int mpam_resctrl_control_init(struct mpam_resctrl_res *res)
|
|
{
|
|
struct mpam_class *class = res->class;
|
|
struct mpam_props *cprops = &class->props;
|
|
struct rdt_resource *r = &res->resctrl_res;
|
|
|
|
switch (r->rid) {
|
|
case RDT_RESOURCE_L2:
|
|
case RDT_RESOURCE_L3:
|
|
r->schema_fmt = RESCTRL_SCHEMA_BITMAP;
|
|
r->cache.arch_has_sparse_bitmasks = true;
|
|
|
|
r->cache.cbm_len = class->props.cpbm_wd;
|
|
/* mpam_devices will reject empty bitmaps */
|
|
r->cache.min_cbm_bits = 1;
|
|
|
|
if (r->rid == RDT_RESOURCE_L2) {
|
|
r->name = "L2";
|
|
r->ctrl_scope = RESCTRL_L2_CACHE;
|
|
r->cdp_capable = true;
|
|
} else {
|
|
r->name = "L3";
|
|
r->ctrl_scope = RESCTRL_L3_CACHE;
|
|
r->cdp_capable = true;
|
|
}
|
|
|
|
/*
|
|
* Which bits are shared with other ...things... Unknown
|
|
* devices use partid-0 which uses all the bitmap fields. Until
|
|
* we have configured the SMMU and GIC not to do this 'all the
|
|
* bits' is the correct answer here.
|
|
*/
|
|
r->cache.shareable_bits = resctrl_get_default_ctrl(r);
|
|
r->alloc_capable = true;
|
|
break;
|
|
case RDT_RESOURCE_MBA:
|
|
r->schema_fmt = RESCTRL_SCHEMA_RANGE;
|
|
r->ctrl_scope = RESCTRL_L3_CACHE;
|
|
|
|
r->membw.delay_linear = true;
|
|
r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
|
|
r->membw.min_bw = get_mba_min(cprops);
|
|
r->membw.max_bw = MAX_MBA_BW;
|
|
r->membw.bw_gran = get_mba_granularity(cprops);
|
|
|
|
r->name = "MB";
|
|
r->alloc_capable = true;
|
|
break;
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp)
|
|
{
|
|
struct mpam_class *class = comp->class;
|
|
|
|
if (class->type == MPAM_CLASS_CACHE)
|
|
return comp->comp_id;
|
|
|
|
if (topology_matches_l3(class)) {
|
|
/* Use the corresponding L3 component ID as the domain ID */
|
|
int id = get_cpu_cacheinfo_id(cpu, 3);
|
|
|
|
/* Implies topology_matches_l3() made a mistake */
|
|
if (WARN_ON_ONCE(id == -1))
|
|
return comp->comp_id;
|
|
|
|
return id;
|
|
}
|
|
|
|
/* Otherwise, expose the ID used by the firmware table code. */
|
|
return comp->comp_id;
|
|
}
|
|
|
|
static int mpam_resctrl_monitor_init(struct mpam_resctrl_mon *mon,
|
|
enum resctrl_event_id type)
|
|
{
|
|
struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
|
|
struct rdt_resource *l3 = &res->resctrl_res;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
/*
|
|
* There also needs to be an L3 cache present.
|
|
* The check just requires any online CPU and it can't go offline as we
|
|
* hold the cpu lock.
|
|
*/
|
|
if (get_cpu_cacheinfo_id(raw_smp_processor_id(), 3) == -1)
|
|
return 0;
|
|
|
|
/*
|
|
* If there are no MPAM resources on L3, force it into existence.
|
|
* topology_matches_l3() already ensures this looks like the L3.
|
|
* The domain-ids will be fixed up by mpam_resctrl_domain_hdr_init().
|
|
*/
|
|
if (!res->class) {
|
|
pr_warn_once("Faking L3 MSC to enable counters.\n");
|
|
res->class = mpam_resctrl_counters[type].class;
|
|
}
|
|
|
|
/*
|
|
* Called multiple times!, once per event type that has a
|
|
* monitoring class.
|
|
* Setting name is necessary on monitor only platforms.
|
|
*/
|
|
l3->name = "L3";
|
|
l3->mon_scope = RESCTRL_L3_CACHE;
|
|
|
|
/*
|
|
* num-rmid is the upper bound for the number of monitoring groups that
|
|
* can exist simultaneously, including the default monitoring group for
|
|
* each control group. Hence, advertise the whole rmid_idx space even
|
|
* though each control group has its own pmg/rmid space. Unfortunately,
|
|
* this does mean userspace needs to know the architecture to correctly
|
|
* interpret this value.
|
|
*/
|
|
l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx();
|
|
|
|
if (resctrl_enable_mon_event(type, false, 0, NULL))
|
|
l3->mon_capable = true;
|
|
|
|
return 0;
|
|
}
|
|
|
|
u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
|
|
u32 closid, enum resctrl_conf_type type)
|
|
{
|
|
u32 partid;
|
|
struct mpam_config *cfg;
|
|
struct mpam_props *cprops;
|
|
struct mpam_resctrl_res *res;
|
|
struct mpam_resctrl_dom *dom;
|
|
enum mpam_device_features configured_by;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
if (!mpam_is_enabled())
|
|
return resctrl_get_default_ctrl(r);
|
|
|
|
res = container_of(r, struct mpam_resctrl_res, resctrl_res);
|
|
dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom);
|
|
cprops = &res->class->props;
|
|
|
|
/*
|
|
* When CDP is enabled, but the resource doesn't support it,
|
|
* the control is cloned across both partids.
|
|
* Pick one at random to read:
|
|
*/
|
|
if (mpam_resctrl_hide_cdp(r->rid))
|
|
type = CDP_DATA;
|
|
|
|
partid = resctrl_get_config_index(closid, type);
|
|
cfg = &dom->ctrl_comp->cfg[partid];
|
|
|
|
switch (r->rid) {
|
|
case RDT_RESOURCE_L2:
|
|
case RDT_RESOURCE_L3:
|
|
configured_by = mpam_feat_cpor_part;
|
|
break;
|
|
case RDT_RESOURCE_MBA:
|
|
if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
|
|
configured_by = mpam_feat_mbw_max;
|
|
break;
|
|
}
|
|
fallthrough;
|
|
default:
|
|
return resctrl_get_default_ctrl(r);
|
|
}
|
|
|
|
if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r) ||
|
|
!mpam_has_feature(configured_by, cfg))
|
|
return resctrl_get_default_ctrl(r);
|
|
|
|
switch (configured_by) {
|
|
case mpam_feat_cpor_part:
|
|
return cfg->cpbm;
|
|
case mpam_feat_mbw_max:
|
|
return mbw_max_to_percent(cfg->mbw_max, cprops);
|
|
default:
|
|
return resctrl_get_default_ctrl(r);
|
|
}
|
|
}
|
|
|
|
int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
|
|
u32 closid, enum resctrl_conf_type t, u32 cfg_val)
|
|
{
|
|
int err;
|
|
u32 partid;
|
|
struct mpam_config cfg;
|
|
struct mpam_props *cprops;
|
|
struct mpam_resctrl_res *res;
|
|
struct mpam_resctrl_dom *dom;
|
|
|
|
lockdep_assert_cpus_held();
|
|
lockdep_assert_irqs_enabled();
|
|
|
|
if (!mpam_is_enabled())
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* No need to check the CPU as mpam_apply_config() doesn't care, and
|
|
* resctrl_arch_update_domains() relies on this.
|
|
*/
|
|
res = container_of(r, struct mpam_resctrl_res, resctrl_res);
|
|
dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom);
|
|
cprops = &res->class->props;
|
|
|
|
if (mpam_resctrl_hide_cdp(r->rid))
|
|
t = CDP_DATA;
|
|
|
|
partid = resctrl_get_config_index(closid, t);
|
|
if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r)) {
|
|
pr_debug("Not alloc capable or computed PARTID out of range\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* Copy the current config to avoid clearing other resources when the
|
|
* same component is exposed multiple times through resctrl.
|
|
*/
|
|
cfg = dom->ctrl_comp->cfg[partid];
|
|
|
|
switch (r->rid) {
|
|
case RDT_RESOURCE_L2:
|
|
case RDT_RESOURCE_L3:
|
|
cfg.cpbm = cfg_val;
|
|
mpam_set_feature(mpam_feat_cpor_part, &cfg);
|
|
break;
|
|
case RDT_RESOURCE_MBA:
|
|
if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
|
|
cfg.mbw_max = percent_to_mbw_max(cfg_val, cprops);
|
|
mpam_set_feature(mpam_feat_mbw_max, &cfg);
|
|
break;
|
|
}
|
|
fallthrough;
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* When CDP is enabled, but the resource doesn't support it, we need to
|
|
* apply the same configuration to the other partid.
|
|
*/
|
|
if (mpam_resctrl_hide_cdp(r->rid)) {
|
|
partid = resctrl_get_config_index(closid, CDP_CODE);
|
|
err = mpam_apply_config(dom->ctrl_comp, partid, &cfg);
|
|
if (err)
|
|
return err;
|
|
|
|
partid = resctrl_get_config_index(closid, CDP_DATA);
|
|
return mpam_apply_config(dom->ctrl_comp, partid, &cfg);
|
|
}
|
|
|
|
return mpam_apply_config(dom->ctrl_comp, partid, &cfg);
|
|
}
|
|
|
|
int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
|
|
{
|
|
int err;
|
|
struct rdt_ctrl_domain *d;
|
|
|
|
lockdep_assert_cpus_held();
|
|
lockdep_assert_irqs_enabled();
|
|
|
|
if (!mpam_is_enabled())
|
|
return -EINVAL;
|
|
|
|
list_for_each_entry_rcu(d, &r->ctrl_domains, hdr.list) {
|
|
for (enum resctrl_conf_type t = 0; t < CDP_NUM_TYPES; t++) {
|
|
struct resctrl_staged_config *cfg = &d->staged_config[t];
|
|
|
|
if (!cfg->have_new_ctrl)
|
|
continue;
|
|
|
|
err = resctrl_arch_update_one(r, d, closid, t,
|
|
cfg->new_ctrl);
|
|
if (err)
|
|
return err;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void resctrl_arch_reset_all_ctrls(struct rdt_resource *r)
|
|
{
|
|
struct mpam_resctrl_res *res;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
if (!mpam_is_enabled())
|
|
return;
|
|
|
|
res = container_of(r, struct mpam_resctrl_res, resctrl_res);
|
|
mpam_reset_class_locked(res->class);
|
|
}
|
|
|
|
static void mpam_resctrl_domain_hdr_init(int cpu, struct mpam_component *comp,
|
|
enum resctrl_res_level rid,
|
|
struct rdt_domain_hdr *hdr)
|
|
{
|
|
lockdep_assert_cpus_held();
|
|
|
|
INIT_LIST_HEAD(&hdr->list);
|
|
hdr->id = mpam_resctrl_pick_domain_id(cpu, comp);
|
|
hdr->rid = rid;
|
|
cpumask_set_cpu(cpu, &hdr->cpu_mask);
|
|
}
|
|
|
|
static void mpam_resctrl_online_domain_hdr(unsigned int cpu,
|
|
struct rdt_domain_hdr *hdr)
|
|
{
|
|
lockdep_assert_cpus_held();
|
|
|
|
cpumask_set_cpu(cpu, &hdr->cpu_mask);
|
|
}
|
|
|
|
/**
|
|
* mpam_resctrl_offline_domain_hdr() - Update the domain header to remove a CPU.
|
|
* @cpu: The CPU to remove from the domain.
|
|
* @hdr: The domain's header.
|
|
*
|
|
* Removes @cpu from the header mask. If this was the last CPU in the domain,
|
|
* the domain header is removed from its parent list and true is returned,
|
|
* indicating the parent structure can be freed.
|
|
* If there are other CPUs in the domain, returns false.
|
|
*/
|
|
static bool mpam_resctrl_offline_domain_hdr(unsigned int cpu,
|
|
struct rdt_domain_hdr *hdr)
|
|
{
|
|
lockdep_assert_held(&domain_list_lock);
|
|
|
|
cpumask_clear_cpu(cpu, &hdr->cpu_mask);
|
|
if (cpumask_empty(&hdr->cpu_mask)) {
|
|
list_del_rcu(&hdr->list);
|
|
synchronize_rcu();
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void mpam_resctrl_domain_insert(struct list_head *list,
|
|
struct rdt_domain_hdr *new)
|
|
{
|
|
struct rdt_domain_hdr *err;
|
|
struct list_head *pos = NULL;
|
|
|
|
lockdep_assert_held(&domain_list_lock);
|
|
|
|
err = resctrl_find_domain(list, new->id, &pos);
|
|
if (WARN_ON_ONCE(err))
|
|
return;
|
|
|
|
list_add_tail_rcu(&new->list, pos);
|
|
}
|
|
|
|
static struct mpam_component *find_component(struct mpam_class *class, int cpu)
|
|
{
|
|
struct mpam_component *comp;
|
|
|
|
guard(srcu)(&mpam_srcu);
|
|
list_for_each_entry_srcu(comp, &class->components, class_list,
|
|
srcu_read_lock_held(&mpam_srcu)) {
|
|
if (cpumask_test_cpu(cpu, &comp->affinity))
|
|
return comp;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static struct mpam_resctrl_dom *
|
|
mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res)
|
|
{
|
|
int err;
|
|
struct mpam_resctrl_dom *dom;
|
|
struct rdt_l3_mon_domain *mon_d;
|
|
struct rdt_ctrl_domain *ctrl_d;
|
|
struct mpam_class *class = res->class;
|
|
struct mpam_component *comp_iter, *ctrl_comp;
|
|
struct rdt_resource *r = &res->resctrl_res;
|
|
|
|
lockdep_assert_held(&domain_list_lock);
|
|
|
|
ctrl_comp = NULL;
|
|
guard(srcu)(&mpam_srcu);
|
|
list_for_each_entry_srcu(comp_iter, &class->components, class_list,
|
|
srcu_read_lock_held(&mpam_srcu)) {
|
|
if (cpumask_test_cpu(cpu, &comp_iter->affinity)) {
|
|
ctrl_comp = comp_iter;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* class has no component for this CPU */
|
|
if (WARN_ON_ONCE(!ctrl_comp))
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
dom = kzalloc_node(sizeof(*dom), GFP_KERNEL, cpu_to_node(cpu));
|
|
if (!dom)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
if (r->alloc_capable) {
|
|
dom->ctrl_comp = ctrl_comp;
|
|
|
|
ctrl_d = &dom->resctrl_ctrl_dom;
|
|
mpam_resctrl_domain_hdr_init(cpu, ctrl_comp, r->rid, &ctrl_d->hdr);
|
|
ctrl_d->hdr.type = RESCTRL_CTRL_DOMAIN;
|
|
err = resctrl_online_ctrl_domain(r, ctrl_d);
|
|
if (err)
|
|
goto free_domain;
|
|
|
|
mpam_resctrl_domain_insert(&r->ctrl_domains, &ctrl_d->hdr);
|
|
} else {
|
|
pr_debug("Skipped control domain online - no controls\n");
|
|
}
|
|
|
|
if (r->mon_capable) {
|
|
struct mpam_component *any_mon_comp;
|
|
struct mpam_resctrl_mon *mon;
|
|
enum resctrl_event_id eventid;
|
|
|
|
/*
|
|
* Even if the monitor domain is backed by a different
|
|
* component, the L3 component IDs need to be used... only
|
|
* there may be no ctrl_comp for the L3.
|
|
* Search each event's class list for a component with
|
|
* overlapping CPUs and set up the dom->mon_comp array.
|
|
*/
|
|
|
|
for_each_mpam_resctrl_mon(mon, eventid) {
|
|
struct mpam_component *mon_comp;
|
|
|
|
if (!mon->class)
|
|
continue; // dummy resource
|
|
|
|
mon_comp = find_component(mon->class, cpu);
|
|
dom->mon_comp[eventid] = mon_comp;
|
|
if (mon_comp)
|
|
any_mon_comp = mon_comp;
|
|
}
|
|
if (!any_mon_comp) {
|
|
WARN_ON_ONCE(0);
|
|
err = -EFAULT;
|
|
goto offline_ctrl_domain;
|
|
}
|
|
|
|
mon_d = &dom->resctrl_mon_dom;
|
|
mpam_resctrl_domain_hdr_init(cpu, any_mon_comp, r->rid, &mon_d->hdr);
|
|
mon_d->hdr.type = RESCTRL_MON_DOMAIN;
|
|
err = resctrl_online_mon_domain(r, &mon_d->hdr);
|
|
if (err)
|
|
goto offline_ctrl_domain;
|
|
|
|
mpam_resctrl_domain_insert(&r->mon_domains, &mon_d->hdr);
|
|
} else {
|
|
pr_debug("Skipped monitor domain online - no monitors\n");
|
|
}
|
|
|
|
return dom;
|
|
|
|
offline_ctrl_domain:
|
|
if (r->alloc_capable) {
|
|
mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr);
|
|
resctrl_offline_ctrl_domain(r, ctrl_d);
|
|
}
|
|
free_domain:
|
|
kfree(dom);
|
|
dom = ERR_PTR(err);
|
|
|
|
return dom;
|
|
}
|
|
|
|
/*
|
|
* We know all the monitors are associated with the L3, even if there are no
|
|
* controls and therefore no control component. Find the cache-id for the CPU
|
|
* and use that to search for existing resctrl domains.
|
|
* This relies on mpam_resctrl_pick_domain_id() using the L3 cache-id
|
|
* for anything that is not a cache.
|
|
*/
|
|
static struct mpam_resctrl_dom *mpam_resctrl_get_mon_domain_from_cpu(int cpu)
|
|
{
|
|
int cache_id;
|
|
struct mpam_resctrl_dom *dom;
|
|
struct mpam_resctrl_res *l3 = &mpam_resctrl_controls[RDT_RESOURCE_L3];
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
if (!l3->class)
|
|
return NULL;
|
|
cache_id = get_cpu_cacheinfo_id(cpu, 3);
|
|
if (cache_id < 0)
|
|
return NULL;
|
|
|
|
list_for_each_entry_rcu(dom, &l3->resctrl_res.mon_domains, resctrl_mon_dom.hdr.list) {
|
|
if (dom->resctrl_mon_dom.hdr.id == cache_id)
|
|
return dom;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static struct mpam_resctrl_dom *
|
|
mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res)
|
|
{
|
|
struct mpam_resctrl_dom *dom;
|
|
struct rdt_resource *r = &res->resctrl_res;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
list_for_each_entry_rcu(dom, &r->ctrl_domains, resctrl_ctrl_dom.hdr.list) {
|
|
if (cpumask_test_cpu(cpu, &dom->ctrl_comp->affinity))
|
|
return dom;
|
|
}
|
|
|
|
if (r->rid != RDT_RESOURCE_L3)
|
|
return NULL;
|
|
|
|
/* Search the mon domain list too - needed on monitor only platforms. */
|
|
return mpam_resctrl_get_mon_domain_from_cpu(cpu);
|
|
}
|
|
|
|
int mpam_resctrl_online_cpu(unsigned int cpu)
|
|
{
|
|
struct mpam_resctrl_res *res;
|
|
enum resctrl_res_level rid;
|
|
|
|
guard(mutex)(&domain_list_lock);
|
|
for_each_mpam_resctrl_control(res, rid) {
|
|
struct mpam_resctrl_dom *dom;
|
|
struct rdt_resource *r = &res->resctrl_res;
|
|
|
|
if (!res->class)
|
|
continue; // dummy_resource;
|
|
|
|
dom = mpam_resctrl_get_domain_from_cpu(cpu, res);
|
|
if (!dom) {
|
|
dom = mpam_resctrl_alloc_domain(cpu, res);
|
|
if (IS_ERR(dom))
|
|
return PTR_ERR(dom);
|
|
} else {
|
|
if (r->alloc_capable) {
|
|
struct rdt_ctrl_domain *ctrl_d = &dom->resctrl_ctrl_dom;
|
|
|
|
mpam_resctrl_online_domain_hdr(cpu, &ctrl_d->hdr);
|
|
}
|
|
if (r->mon_capable) {
|
|
struct rdt_l3_mon_domain *mon_d = &dom->resctrl_mon_dom;
|
|
|
|
mpam_resctrl_online_domain_hdr(cpu, &mon_d->hdr);
|
|
}
|
|
}
|
|
}
|
|
|
|
resctrl_online_cpu(cpu);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void mpam_resctrl_offline_cpu(unsigned int cpu)
|
|
{
|
|
struct mpam_resctrl_res *res;
|
|
enum resctrl_res_level rid;
|
|
|
|
resctrl_offline_cpu(cpu);
|
|
|
|
guard(mutex)(&domain_list_lock);
|
|
for_each_mpam_resctrl_control(res, rid) {
|
|
struct mpam_resctrl_dom *dom;
|
|
struct rdt_l3_mon_domain *mon_d;
|
|
struct rdt_ctrl_domain *ctrl_d;
|
|
bool ctrl_dom_empty, mon_dom_empty;
|
|
struct rdt_resource *r = &res->resctrl_res;
|
|
|
|
if (!res->class)
|
|
continue; // dummy resource
|
|
|
|
dom = mpam_resctrl_get_domain_from_cpu(cpu, res);
|
|
if (WARN_ON_ONCE(!dom))
|
|
continue;
|
|
|
|
if (r->alloc_capable) {
|
|
ctrl_d = &dom->resctrl_ctrl_dom;
|
|
ctrl_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr);
|
|
if (ctrl_dom_empty)
|
|
resctrl_offline_ctrl_domain(&res->resctrl_res, ctrl_d);
|
|
} else {
|
|
ctrl_dom_empty = true;
|
|
}
|
|
|
|
if (r->mon_capable) {
|
|
mon_d = &dom->resctrl_mon_dom;
|
|
mon_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &mon_d->hdr);
|
|
if (mon_dom_empty)
|
|
resctrl_offline_mon_domain(&res->resctrl_res, &mon_d->hdr);
|
|
} else {
|
|
mon_dom_empty = true;
|
|
}
|
|
|
|
if (ctrl_dom_empty && mon_dom_empty)
|
|
kfree(dom);
|
|
}
|
|
}
|
|
|
|
int mpam_resctrl_setup(void)
|
|
{
|
|
int err = 0;
|
|
struct mpam_resctrl_res *res;
|
|
enum resctrl_res_level rid;
|
|
struct mpam_resctrl_mon *mon;
|
|
enum resctrl_event_id eventid;
|
|
|
|
wait_event(wait_cacheinfo_ready, cacheinfo_ready);
|
|
|
|
cpus_read_lock();
|
|
for_each_mpam_resctrl_control(res, rid) {
|
|
INIT_LIST_HEAD_RCU(&res->resctrl_res.ctrl_domains);
|
|
INIT_LIST_HEAD_RCU(&res->resctrl_res.mon_domains);
|
|
res->resctrl_res.rid = rid;
|
|
}
|
|
|
|
/* Find some classes to use for controls */
|
|
mpam_resctrl_pick_caches();
|
|
mpam_resctrl_pick_mba();
|
|
|
|
/* Initialise the resctrl structures from the classes */
|
|
for_each_mpam_resctrl_control(res, rid) {
|
|
if (!res->class)
|
|
continue; // dummy resource
|
|
|
|
err = mpam_resctrl_control_init(res);
|
|
if (err) {
|
|
pr_debug("Failed to initialise rid %u\n", rid);
|
|
goto internal_error;
|
|
}
|
|
}
|
|
|
|
/* Find some classes to use for monitors */
|
|
mpam_resctrl_pick_counters();
|
|
|
|
for_each_mpam_resctrl_mon(mon, eventid) {
|
|
if (!mon->class)
|
|
continue; // dummy resource
|
|
|
|
err = mpam_resctrl_monitor_init(mon, eventid);
|
|
if (err) {
|
|
pr_debug("Failed to initialise event %u\n", eventid);
|
|
goto internal_error;
|
|
}
|
|
}
|
|
|
|
cpus_read_unlock();
|
|
|
|
if (!resctrl_arch_alloc_capable() && !resctrl_arch_mon_capable()) {
|
|
pr_debug("No alloc(%u) or monitor(%u) found - resctrl not supported\n",
|
|
resctrl_arch_alloc_capable(), resctrl_arch_mon_capable());
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
err = resctrl_init();
|
|
if (err)
|
|
return err;
|
|
|
|
WRITE_ONCE(resctrl_enabled, true);
|
|
|
|
return 0;
|
|
|
|
internal_error:
|
|
cpus_read_unlock();
|
|
pr_debug("Internal error %d - resctrl not supported\n", err);
|
|
return err;
|
|
}
|
|
|
|
void mpam_resctrl_exit(void)
|
|
{
|
|
if (!READ_ONCE(resctrl_enabled))
|
|
return;
|
|
|
|
WRITE_ONCE(resctrl_enabled, false);
|
|
resctrl_exit();
|
|
}
|
|
|
|
/*
|
|
* The driver is detaching an MSC from this class, if resctrl was using it,
|
|
* pull on resctrl_exit().
|
|
*/
|
|
void mpam_resctrl_teardown_class(struct mpam_class *class)
|
|
{
|
|
struct mpam_resctrl_res *res;
|
|
enum resctrl_res_level rid;
|
|
struct mpam_resctrl_mon *mon;
|
|
enum resctrl_event_id eventid;
|
|
|
|
might_sleep();
|
|
|
|
for_each_mpam_resctrl_control(res, rid) {
|
|
if (res->class == class) {
|
|
res->class = NULL;
|
|
break;
|
|
}
|
|
}
|
|
for_each_mpam_resctrl_mon(mon, eventid) {
|
|
if (mon->class == class) {
|
|
mon->class = NULL;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int __init __cacheinfo_ready(void)
|
|
{
|
|
cacheinfo_ready = true;
|
|
wake_up(&wait_cacheinfo_ready);
|
|
|
|
return 0;
|
|
}
|
|
device_initcall_sync(__cacheinfo_ready);
|
|
|
|
#ifdef CONFIG_MPAM_KUNIT_TEST
|
|
#include "test_mpam_resctrl.c"
|
|
#endif
|