mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas:
"The biggest changes are MPAM enablement in drivers/resctrl and new PMU
support under drivers/perf.
On the core side, FEAT_LSUI lets futex atomic operations with EL0
permissions, avoiding PAN toggling.
The rest is mostly TLB invalidation refactoring, further generic entry
work, sysreg updates and a few fixes.
Core features:
- Add support for FEAT_LSUI, allowing futex atomic operations without
toggling Privileged Access Never (PAN)
- Further refactor the arm64 exception handling code towards the
generic entry infrastructure
- Optimise __READ_ONCE() with CONFIG_LTO=y and allow alias analysis
through it
Memory management:
- Refactor the arm64 TLB invalidation API and implementation for
better control over barrier placement and level-hinted invalidation
- Enable batched TLB flushes during memory hot-unplug
- Fix rodata=full block mapping support for realm guests (when
BBML2_NOABORT is available)
Perf and PMU:
- Add support for a whole bunch of system PMUs featured in NVIDIA's
Tegra410 SoC (cspmu extensions for the fabric and PCIe, new drivers
for CPU/C2C memory latency PMUs)
- Clean up iomem resource handling in the Arm CMN driver
- Fix signedness handling of AA64DFR0.{PMUVer,PerfMon}
MPAM (Memory Partitioning And Monitoring):
- Add architecture context-switch and hiding of the feature from KVM
- Add interface to allow MPAM to be exposed to user-space using
resctrl
- Add errata workaround for some existing platforms
- Add documentation for using MPAM and what shape of platforms can
use resctrl
Miscellaneous:
- Check DAIF (and PMR, where relevant) at task-switch time
- Skip TFSR_EL1 checks and barriers in synchronous MTE tag check mode
(only relevant to asynchronous or asymmetric tag check modes)
- Remove a duplicate allocation in the kexec code
- Remove redundant save/restore of SCS SP on entry to/from EL0
- Generate the KERNEL_HWCAP_ definitions from the arm64 hwcap
descriptions
- Add kselftest coverage for cmpbr_sigill()
- Update sysreg definitions"
* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (109 commits)
arm64: rsi: use linear-map alias for realm config buffer
arm64: Kconfig: fix duplicate word in CMDLINE help text
arm64: mte: Skip TFSR_EL1 checks and barriers in synchronous tag check mode
arm64/sysreg: Update ID_AA64SMFR0_EL1 description to DDI0601 2025-12
arm64/sysreg: Update ID_AA64ZFR0_EL1 description to DDI0601 2025-12
arm64/sysreg: Update ID_AA64FPFR0_EL1 description to DDI0601 2025-12
arm64/sysreg: Update ID_AA64ISAR2_EL1 description to DDI0601 2025-12
arm64/sysreg: Update ID_AA64ISAR0_EL1 description to DDI0601 2025-12
arm64/hwcap: Generate the KERNEL_HWCAP_ definitions for the hwcaps
arm64: kexec: Remove duplicate allocation for trans_pgd
ACPI: AGDI: fix missing newline in error message
arm64: Check DAIF (and PMR) at task-switch time
arm64: entry: Use split preemption logic
arm64: entry: Use irqentry_{enter_from,exit_to}_kernel_mode()
arm64: entry: Consistently prefix arm64-specific wrappers
arm64: entry: Don't preempt with SError or Debug masked
entry: Split preemption from irqentry_exit_to_kernel_mode()
entry: Split kernel mode logic from irqentry_{enter,exit}()
entry: Move irqentry_enter() prototype later
entry: Remove local_irq_{enable,disable}_exit_to_user()
...
This commit is contained in:
@@ -311,4 +311,18 @@ config MARVELL_PEM_PMU
|
||||
Enable support for PCIe Interface performance monitoring
|
||||
on Marvell platform.
|
||||
|
||||
config NVIDIA_TEGRA410_CMEM_LATENCY_PMU
|
||||
tristate "NVIDIA Tegra410 CPU Memory Latency PMU"
|
||||
depends on ARM64 && ACPI
|
||||
help
|
||||
Enable perf support for CPU memory latency counters monitoring on
|
||||
NVIDIA Tegra410 SoC.
|
||||
|
||||
config NVIDIA_TEGRA410_C2C_PMU
|
||||
tristate "NVIDIA Tegra410 C2C PMU"
|
||||
depends on ARM64 && ACPI
|
||||
help
|
||||
Enable perf support for counters in NVIDIA C2C interface of NVIDIA
|
||||
Tegra410 SoC.
|
||||
|
||||
endmenu
|
||||
|
||||
@@ -35,3 +35,5 @@ obj-$(CONFIG_DWC_PCIE_PMU) += dwc_pcie_pmu.o
|
||||
obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/
|
||||
obj-$(CONFIG_MESON_DDR_PMU) += amlogic/
|
||||
obj-$(CONFIG_CXL_PMU) += cxl_pmu.o
|
||||
obj-$(CONFIG_NVIDIA_TEGRA410_CMEM_LATENCY_PMU) += nvidia_t410_cmem_latency_pmu.o
|
||||
obj-$(CONFIG_NVIDIA_TEGRA410_C2C_PMU) += nvidia_t410_c2c_pmu.o
|
||||
|
||||
@@ -2132,6 +2132,8 @@ static void arm_cmn_init_dtm(struct arm_cmn_dtm *dtm, struct arm_cmn_node *xp, i
|
||||
static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int idx)
|
||||
{
|
||||
struct arm_cmn_dtc *dtc = cmn->dtc + idx;
|
||||
const struct resource *cfg;
|
||||
resource_size_t base, size;
|
||||
|
||||
dtc->pmu_base = dn->pmu_base;
|
||||
dtc->base = dtc->pmu_base - arm_cmn_pmu_offset(cmn, dn);
|
||||
@@ -2139,6 +2141,13 @@ static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int id
|
||||
if (dtc->irq < 0)
|
||||
return dtc->irq;
|
||||
|
||||
cfg = platform_get_resource(to_platform_device(cmn->dev), IORESOURCE_MEM, 0);
|
||||
base = dtc->base - cmn->base + cfg->start;
|
||||
size = cmn->part == PART_CMN600 ? SZ_16K : SZ_64K;
|
||||
if (!devm_request_mem_region(cmn->dev, base, size, dev_name(cmn->dev)))
|
||||
return dev_err_probe(cmn->dev, -EBUSY,
|
||||
"Failed to request DTC region 0x%pa\n", &base);
|
||||
|
||||
writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL);
|
||||
writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, CMN_DT_PMCR(dtc));
|
||||
writeq_relaxed(0, CMN_DT_PMCCNTR(dtc));
|
||||
@@ -2525,43 +2534,26 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int arm_cmn600_acpi_probe(struct platform_device *pdev, struct arm_cmn *cmn)
|
||||
{
|
||||
struct resource *cfg, *root;
|
||||
|
||||
cfg = platform_get_resource(pdev, IORESOURCE_MEM, 0);
|
||||
if (!cfg)
|
||||
return -EINVAL;
|
||||
|
||||
root = platform_get_resource(pdev, IORESOURCE_MEM, 1);
|
||||
if (!root)
|
||||
return -EINVAL;
|
||||
|
||||
if (!resource_contains(cfg, root))
|
||||
swap(cfg, root);
|
||||
/*
|
||||
* Note that devm_ioremap_resource() is dumb and won't let the platform
|
||||
* device claim cfg when the ACPI companion device has already claimed
|
||||
* root within it. But since they *are* already both claimed in the
|
||||
* appropriate name, we don't really need to do it again here anyway.
|
||||
*/
|
||||
cmn->base = devm_ioremap(cmn->dev, cfg->start, resource_size(cfg));
|
||||
if (!cmn->base)
|
||||
return -ENOMEM;
|
||||
|
||||
return root->start - cfg->start;
|
||||
}
|
||||
|
||||
static int arm_cmn600_of_probe(struct device_node *np)
|
||||
static int arm_cmn_get_root(struct arm_cmn *cmn, const struct resource *cfg)
|
||||
{
|
||||
const struct device_node *np = cmn->dev->of_node;
|
||||
const struct resource *root;
|
||||
u32 rootnode;
|
||||
|
||||
return of_property_read_u32(np, "arm,root-node", &rootnode) ?: rootnode;
|
||||
if (cmn->part != PART_CMN600)
|
||||
return 0;
|
||||
|
||||
if (np)
|
||||
return of_property_read_u32(np, "arm,root-node", &rootnode) ?: rootnode;
|
||||
|
||||
root = platform_get_resource(to_platform_device(cmn->dev), IORESOURCE_MEM, 1);
|
||||
return root ? root->start - cfg->start : -EINVAL;
|
||||
}
|
||||
|
||||
static int arm_cmn_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct arm_cmn *cmn;
|
||||
const struct resource *cfg;
|
||||
const char *name;
|
||||
static atomic_t id;
|
||||
int err, rootnode, this_id;
|
||||
@@ -2575,16 +2567,16 @@ static int arm_cmn_probe(struct platform_device *pdev)
|
||||
cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev));
|
||||
platform_set_drvdata(pdev, cmn);
|
||||
|
||||
if (cmn->part == PART_CMN600 && has_acpi_companion(cmn->dev)) {
|
||||
rootnode = arm_cmn600_acpi_probe(pdev, cmn);
|
||||
} else {
|
||||
rootnode = 0;
|
||||
cmn->base = devm_platform_ioremap_resource(pdev, 0);
|
||||
if (IS_ERR(cmn->base))
|
||||
return PTR_ERR(cmn->base);
|
||||
if (cmn->part == PART_CMN600)
|
||||
rootnode = arm_cmn600_of_probe(pdev->dev.of_node);
|
||||
}
|
||||
cfg = platform_get_resource(pdev, IORESOURCE_MEM, 0);
|
||||
if (!cfg)
|
||||
return -EINVAL;
|
||||
|
||||
/* Map the whole region now, claim the DTCs once we've found them */
|
||||
cmn->base = devm_ioremap(cmn->dev, cfg->start, resource_size(cfg));
|
||||
if (!cmn->base)
|
||||
return -ENOMEM;
|
||||
|
||||
rootnode = arm_cmn_get_root(cmn, cfg);
|
||||
if (rootnode < 0)
|
||||
return rootnode;
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
* The user should refer to the vendor technical documentation to get details
|
||||
* about the supported events.
|
||||
*
|
||||
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -1134,6 +1134,23 @@ static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct acpi_device *arm_cspmu_acpi_dev_get(const struct arm_cspmu *cspmu)
|
||||
{
|
||||
char hid[16] = {};
|
||||
char uid[16] = {};
|
||||
const struct acpi_apmt_node *apmt_node;
|
||||
|
||||
apmt_node = arm_cspmu_apmt_node(cspmu->dev);
|
||||
if (!apmt_node || apmt_node->type != ACPI_APMT_NODE_TYPE_ACPI)
|
||||
return NULL;
|
||||
|
||||
memcpy(hid, &apmt_node->inst_primary, sizeof(apmt_node->inst_primary));
|
||||
snprintf(uid, sizeof(uid), "%u", apmt_node->inst_secondary);
|
||||
|
||||
return acpi_dev_get_first_match_dev(hid, uid, -1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(arm_cspmu_acpi_dev_get);
|
||||
#else
|
||||
static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu)
|
||||
{
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* ARM CoreSight Architecture PMU driver.
|
||||
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __ARM_CSPMU_H__
|
||||
#define __ARM_CSPMU_H__
|
||||
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/device.h>
|
||||
@@ -255,4 +256,18 @@ int arm_cspmu_impl_register(const struct arm_cspmu_impl_match *impl_match);
|
||||
/* Unregister vendor backend. */
|
||||
void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_match *impl_match);
|
||||
|
||||
#if defined(CONFIG_ACPI) && defined(CONFIG_ARM64)
|
||||
/**
|
||||
* Get ACPI device associated with the PMU.
|
||||
* The caller is responsible for calling acpi_dev_put() on the returned device.
|
||||
*/
|
||||
struct acpi_device *arm_cspmu_acpi_dev_get(const struct arm_cspmu *cspmu);
|
||||
#else
|
||||
static inline struct acpi_device *
|
||||
arm_cspmu_acpi_dev_get(const struct arm_cspmu *cspmu)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __ARM_CSPMU_H__ */
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
|
||||
#include <linux/io.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/property.h>
|
||||
#include <linux/topology.h>
|
||||
|
||||
#include "arm_cspmu.h"
|
||||
@@ -21,6 +22,44 @@
|
||||
#define NV_CNVL_PORT_COUNT 4ULL
|
||||
#define NV_CNVL_FILTER_ID_MASK GENMASK_ULL(NV_CNVL_PORT_COUNT - 1, 0)
|
||||
|
||||
#define NV_UCF_SRC_COUNT 3ULL
|
||||
#define NV_UCF_DST_COUNT 4ULL
|
||||
#define NV_UCF_FILTER_ID_MASK GENMASK_ULL(11, 0)
|
||||
#define NV_UCF_FILTER_SRC GENMASK_ULL(2, 0)
|
||||
#define NV_UCF_FILTER_DST GENMASK_ULL(11, 8)
|
||||
#define NV_UCF_FILTER_DEFAULT (NV_UCF_FILTER_SRC | NV_UCF_FILTER_DST)
|
||||
|
||||
#define NV_PCIE_V2_PORT_COUNT 8ULL
|
||||
#define NV_PCIE_V2_FILTER_ID_MASK GENMASK_ULL(24, 0)
|
||||
#define NV_PCIE_V2_FILTER_PORT GENMASK_ULL(NV_PCIE_V2_PORT_COUNT - 1, 0)
|
||||
#define NV_PCIE_V2_FILTER_BDF_VAL GENMASK_ULL(23, NV_PCIE_V2_PORT_COUNT)
|
||||
#define NV_PCIE_V2_FILTER_BDF_EN BIT(24)
|
||||
#define NV_PCIE_V2_FILTER_BDF_VAL_EN GENMASK_ULL(24, NV_PCIE_V2_PORT_COUNT)
|
||||
#define NV_PCIE_V2_FILTER_DEFAULT NV_PCIE_V2_FILTER_PORT
|
||||
|
||||
#define NV_PCIE_V2_DST_COUNT 5ULL
|
||||
#define NV_PCIE_V2_FILTER2_ID_MASK GENMASK_ULL(4, 0)
|
||||
#define NV_PCIE_V2_FILTER2_DST GENMASK_ULL(NV_PCIE_V2_DST_COUNT - 1, 0)
|
||||
#define NV_PCIE_V2_FILTER2_DEFAULT NV_PCIE_V2_FILTER2_DST
|
||||
|
||||
#define NV_PCIE_TGT_PORT_COUNT 8ULL
|
||||
#define NV_PCIE_TGT_EV_TYPE_CC 0x4
|
||||
#define NV_PCIE_TGT_EV_TYPE_COUNT 3ULL
|
||||
#define NV_PCIE_TGT_EV_TYPE_MASK GENMASK_ULL(NV_PCIE_TGT_EV_TYPE_COUNT - 1, 0)
|
||||
#define NV_PCIE_TGT_FILTER2_MASK GENMASK_ULL(NV_PCIE_TGT_PORT_COUNT, 0)
|
||||
#define NV_PCIE_TGT_FILTER2_PORT GENMASK_ULL(NV_PCIE_TGT_PORT_COUNT - 1, 0)
|
||||
#define NV_PCIE_TGT_FILTER2_ADDR_EN BIT(NV_PCIE_TGT_PORT_COUNT)
|
||||
#define NV_PCIE_TGT_FILTER2_ADDR GENMASK_ULL(15, NV_PCIE_TGT_PORT_COUNT)
|
||||
#define NV_PCIE_TGT_FILTER2_DEFAULT NV_PCIE_TGT_FILTER2_PORT
|
||||
|
||||
#define NV_PCIE_TGT_ADDR_COUNT 8ULL
|
||||
#define NV_PCIE_TGT_ADDR_STRIDE 20
|
||||
#define NV_PCIE_TGT_ADDR_CTRL 0xD38
|
||||
#define NV_PCIE_TGT_ADDR_BASE_LO 0xD3C
|
||||
#define NV_PCIE_TGT_ADDR_BASE_HI 0xD40
|
||||
#define NV_PCIE_TGT_ADDR_MASK_LO 0xD44
|
||||
#define NV_PCIE_TGT_ADDR_MASK_HI 0xD48
|
||||
|
||||
#define NV_GENERIC_FILTER_ID_MASK GENMASK_ULL(31, 0)
|
||||
|
||||
#define NV_PRODID_MASK (PMIIDR_PRODUCTID | PMIIDR_VARIANT | PMIIDR_REVISION)
|
||||
@@ -124,6 +163,55 @@ static struct attribute *mcf_pmu_event_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *ucf_pmu_event_attrs[] = {
|
||||
ARM_CSPMU_EVENT_ATTR(bus_cycles, 0x1D),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(slc_allocate, 0xF0),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_wb, 0xF3),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_refill_rd, 0x109),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_refill_wr, 0x10A),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_hit_rd, 0x119),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(slc_access_dataless, 0x183),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_access_atomic, 0x184),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(slc_access_rd, 0x111),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_access_wr, 0x112),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_bytes_rd, 0x113),
|
||||
ARM_CSPMU_EVENT_ATTR(slc_bytes_wr, 0x114),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(mem_access_rd, 0x121),
|
||||
ARM_CSPMU_EVENT_ATTR(mem_access_wr, 0x122),
|
||||
ARM_CSPMU_EVENT_ATTR(mem_bytes_rd, 0x123),
|
||||
ARM_CSPMU_EVENT_ATTR(mem_bytes_wr, 0x124),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(local_snoop, 0x180),
|
||||
ARM_CSPMU_EVENT_ATTR(ext_snp_access, 0x181),
|
||||
ARM_CSPMU_EVENT_ATTR(ext_snp_evict, 0x182),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute *pcie_v2_pmu_event_attrs[] = {
|
||||
ARM_CSPMU_EVENT_ATTR(rd_bytes, 0x0),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_bytes, 0x1),
|
||||
ARM_CSPMU_EVENT_ATTR(rd_req, 0x2),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_req, 0x3),
|
||||
ARM_CSPMU_EVENT_ATTR(rd_cum_outs, 0x4),
|
||||
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute *pcie_tgt_pmu_event_attrs[] = {
|
||||
ARM_CSPMU_EVENT_ATTR(rd_bytes, 0x0),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_bytes, 0x1),
|
||||
ARM_CSPMU_EVENT_ATTR(rd_req, 0x2),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_req, 0x3),
|
||||
ARM_CSPMU_EVENT_ATTR(cycles, NV_PCIE_TGT_EV_TYPE_CC),
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute *generic_pmu_event_attrs[] = {
|
||||
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
|
||||
NULL,
|
||||
@@ -152,6 +240,40 @@ static struct attribute *cnvlink_pmu_format_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *ucf_pmu_format_attrs[] = {
|
||||
ARM_CSPMU_FORMAT_EVENT_ATTR,
|
||||
ARM_CSPMU_FORMAT_ATTR(src_loc_noncpu, "config1:0"),
|
||||
ARM_CSPMU_FORMAT_ATTR(src_loc_cpu, "config1:1"),
|
||||
ARM_CSPMU_FORMAT_ATTR(src_rem, "config1:2"),
|
||||
ARM_CSPMU_FORMAT_ATTR(dst_loc_cmem, "config1:8"),
|
||||
ARM_CSPMU_FORMAT_ATTR(dst_loc_gmem, "config1:9"),
|
||||
ARM_CSPMU_FORMAT_ATTR(dst_loc_other, "config1:10"),
|
||||
ARM_CSPMU_FORMAT_ATTR(dst_rem, "config1:11"),
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute *pcie_v2_pmu_format_attrs[] = {
|
||||
ARM_CSPMU_FORMAT_EVENT_ATTR,
|
||||
ARM_CSPMU_FORMAT_ATTR(src_rp_mask, "config1:0-7"),
|
||||
ARM_CSPMU_FORMAT_ATTR(src_bdf, "config1:8-23"),
|
||||
ARM_CSPMU_FORMAT_ATTR(src_bdf_en, "config1:24"),
|
||||
ARM_CSPMU_FORMAT_ATTR(dst_loc_cmem, "config2:0"),
|
||||
ARM_CSPMU_FORMAT_ATTR(dst_loc_gmem, "config2:1"),
|
||||
ARM_CSPMU_FORMAT_ATTR(dst_loc_pcie_p2p, "config2:2"),
|
||||
ARM_CSPMU_FORMAT_ATTR(dst_loc_pcie_cxl, "config2:3"),
|
||||
ARM_CSPMU_FORMAT_ATTR(dst_rem, "config2:4"),
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute *pcie_tgt_pmu_format_attrs[] = {
|
||||
ARM_CSPMU_FORMAT_ATTR(event, "config:0-2"),
|
||||
ARM_CSPMU_FORMAT_ATTR(dst_rp_mask, "config:3-10"),
|
||||
ARM_CSPMU_FORMAT_ATTR(dst_addr_en, "config:11"),
|
||||
ARM_CSPMU_FORMAT_ATTR(dst_addr_base, "config1:0-63"),
|
||||
ARM_CSPMU_FORMAT_ATTR(dst_addr_mask, "config2:0-63"),
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute *generic_pmu_format_attrs[] = {
|
||||
ARM_CSPMU_FORMAT_EVENT_ATTR,
|
||||
ARM_CSPMU_FORMAT_FILTER_ATTR,
|
||||
@@ -183,6 +305,32 @@ nv_cspmu_get_name(const struct arm_cspmu *cspmu)
|
||||
return ctx->name;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_ACPI) && defined(CONFIG_ARM64)
|
||||
static int nv_cspmu_get_inst_id(const struct arm_cspmu *cspmu, u32 *id)
|
||||
{
|
||||
struct fwnode_handle *fwnode;
|
||||
struct acpi_device *adev;
|
||||
int ret;
|
||||
|
||||
adev = arm_cspmu_acpi_dev_get(cspmu);
|
||||
if (!adev)
|
||||
return -ENODEV;
|
||||
|
||||
fwnode = acpi_fwnode_handle(adev);
|
||||
ret = fwnode_property_read_u32(fwnode, "instance_id", id);
|
||||
if (ret)
|
||||
dev_err(cspmu->dev, "Failed to get instance ID\n");
|
||||
|
||||
acpi_dev_put(adev);
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
static int nv_cspmu_get_inst_id(const struct arm_cspmu *cspmu, u32 *id)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
|
||||
static u32 nv_cspmu_event_filter(const struct perf_event *event)
|
||||
{
|
||||
const struct nv_cspmu_ctx *ctx =
|
||||
@@ -228,6 +376,20 @@ static void nv_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
|
||||
}
|
||||
}
|
||||
|
||||
static void nv_cspmu_reset_ev_filter(struct arm_cspmu *cspmu,
|
||||
const struct perf_event *event)
|
||||
{
|
||||
const struct nv_cspmu_ctx *ctx =
|
||||
to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
|
||||
const u32 offset = 4 * event->hw.idx;
|
||||
|
||||
if (ctx->get_filter)
|
||||
writel(0, cspmu->base0 + PMEVFILTR + offset);
|
||||
|
||||
if (ctx->get_filter2)
|
||||
writel(0, cspmu->base0 + PMEVFILT2R + offset);
|
||||
}
|
||||
|
||||
static void nv_cspmu_set_cc_filter(struct arm_cspmu *cspmu,
|
||||
const struct perf_event *event)
|
||||
{
|
||||
@@ -236,10 +398,386 @@ static void nv_cspmu_set_cc_filter(struct arm_cspmu *cspmu,
|
||||
writel(filter, cspmu->base0 + PMCCFILTR);
|
||||
}
|
||||
|
||||
static u32 ucf_pmu_event_filter(const struct perf_event *event)
|
||||
{
|
||||
u32 ret, filter, src, dst;
|
||||
|
||||
filter = nv_cspmu_event_filter(event);
|
||||
|
||||
/* Monitor all sources if none is selected. */
|
||||
src = FIELD_GET(NV_UCF_FILTER_SRC, filter);
|
||||
if (src == 0)
|
||||
src = GENMASK_ULL(NV_UCF_SRC_COUNT - 1, 0);
|
||||
|
||||
/* Monitor all destinations if none is selected. */
|
||||
dst = FIELD_GET(NV_UCF_FILTER_DST, filter);
|
||||
if (dst == 0)
|
||||
dst = GENMASK_ULL(NV_UCF_DST_COUNT - 1, 0);
|
||||
|
||||
ret = FIELD_PREP(NV_UCF_FILTER_SRC, src);
|
||||
ret |= FIELD_PREP(NV_UCF_FILTER_DST, dst);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u32 pcie_v2_pmu_bdf_val_en(u32 filter)
|
||||
{
|
||||
const u32 bdf_en = FIELD_GET(NV_PCIE_V2_FILTER_BDF_EN, filter);
|
||||
|
||||
/* Returns both BDF value and enable bit if BDF filtering is enabled. */
|
||||
if (bdf_en)
|
||||
return FIELD_GET(NV_PCIE_V2_FILTER_BDF_VAL_EN, filter);
|
||||
|
||||
/* Ignore the BDF value if BDF filter is not enabled. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 pcie_v2_pmu_event_filter(const struct perf_event *event)
|
||||
{
|
||||
u32 filter, lead_filter, lead_bdf;
|
||||
struct perf_event *leader;
|
||||
const struct nv_cspmu_ctx *ctx =
|
||||
to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
|
||||
|
||||
filter = event->attr.config1 & ctx->filter_mask;
|
||||
if (filter != 0)
|
||||
return filter;
|
||||
|
||||
leader = event->group_leader;
|
||||
|
||||
/* Use leader's filter value if its BDF filtering is enabled. */
|
||||
if (event != leader) {
|
||||
lead_filter = pcie_v2_pmu_event_filter(leader);
|
||||
lead_bdf = pcie_v2_pmu_bdf_val_en(lead_filter);
|
||||
if (lead_bdf != 0)
|
||||
return lead_filter;
|
||||
}
|
||||
|
||||
/* Otherwise, return default filter value. */
|
||||
return ctx->filter_default_val;
|
||||
}
|
||||
|
||||
static int pcie_v2_pmu_validate_event(struct arm_cspmu *cspmu,
|
||||
struct perf_event *new_ev)
|
||||
{
|
||||
/*
|
||||
* Make sure the events are using same BDF filter since the PCIE-SRC PMU
|
||||
* only supports one common BDF filter setting for all of the counters.
|
||||
*/
|
||||
|
||||
int idx;
|
||||
u32 new_filter, new_rp, new_bdf, new_lead_filter, new_lead_bdf;
|
||||
struct perf_event *new_leader;
|
||||
|
||||
if (cspmu->impl.ops.is_cycle_counter_event(new_ev))
|
||||
return 0;
|
||||
|
||||
new_leader = new_ev->group_leader;
|
||||
|
||||
new_filter = pcie_v2_pmu_event_filter(new_ev);
|
||||
new_lead_filter = pcie_v2_pmu_event_filter(new_leader);
|
||||
|
||||
new_bdf = pcie_v2_pmu_bdf_val_en(new_filter);
|
||||
new_lead_bdf = pcie_v2_pmu_bdf_val_en(new_lead_filter);
|
||||
|
||||
new_rp = FIELD_GET(NV_PCIE_V2_FILTER_PORT, new_filter);
|
||||
|
||||
if (new_rp != 0 && new_bdf != 0) {
|
||||
dev_err(cspmu->dev,
|
||||
"RP and BDF filtering are mutually exclusive\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (new_bdf != new_lead_bdf) {
|
||||
dev_err(cspmu->dev,
|
||||
"sibling and leader BDF value should be equal\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Compare BDF filter on existing events. */
|
||||
idx = find_first_bit(cspmu->hw_events.used_ctrs,
|
||||
cspmu->cycle_counter_logical_idx);
|
||||
|
||||
if (idx != cspmu->cycle_counter_logical_idx) {
|
||||
struct perf_event *leader = cspmu->hw_events.events[idx]->group_leader;
|
||||
|
||||
const u32 lead_filter = pcie_v2_pmu_event_filter(leader);
|
||||
const u32 lead_bdf = pcie_v2_pmu_bdf_val_en(lead_filter);
|
||||
|
||||
if (new_lead_bdf != lead_bdf) {
|
||||
dev_err(cspmu->dev, "only one BDF value is supported\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct pcie_tgt_addr_filter {
|
||||
u32 refcount;
|
||||
u64 base;
|
||||
u64 mask;
|
||||
};
|
||||
|
||||
struct pcie_tgt_data {
|
||||
struct pcie_tgt_addr_filter addr_filter[NV_PCIE_TGT_ADDR_COUNT];
|
||||
void __iomem *addr_filter_reg;
|
||||
};
|
||||
|
||||
#if defined(CONFIG_ACPI) && defined(CONFIG_ARM64)
|
||||
static int pcie_tgt_init_data(struct arm_cspmu *cspmu)
|
||||
{
|
||||
int ret;
|
||||
struct acpi_device *adev;
|
||||
struct pcie_tgt_data *data;
|
||||
struct list_head resource_list;
|
||||
struct resource_entry *rentry;
|
||||
struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
|
||||
struct device *dev = cspmu->dev;
|
||||
|
||||
data = devm_kzalloc(dev, sizeof(struct pcie_tgt_data), GFP_KERNEL);
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
|
||||
adev = arm_cspmu_acpi_dev_get(cspmu);
|
||||
if (!adev) {
|
||||
dev_err(dev, "failed to get associated PCIE-TGT device\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&resource_list);
|
||||
ret = acpi_dev_get_memory_resources(adev, &resource_list);
|
||||
if (ret < 0) {
|
||||
dev_err(dev, "failed to get PCIE-TGT device memory resources\n");
|
||||
acpi_dev_put(adev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
rentry = list_first_entry_or_null(
|
||||
&resource_list, struct resource_entry, node);
|
||||
if (rentry) {
|
||||
data->addr_filter_reg = devm_ioremap_resource(dev, rentry->res);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (IS_ERR(data->addr_filter_reg)) {
|
||||
dev_err(dev, "failed to get address filter resource\n");
|
||||
ret = PTR_ERR(data->addr_filter_reg);
|
||||
}
|
||||
|
||||
acpi_dev_free_resource_list(&resource_list);
|
||||
acpi_dev_put(adev);
|
||||
|
||||
ctx->data = data;
|
||||
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
static int pcie_tgt_init_data(struct arm_cspmu *cspmu)
|
||||
{
|
||||
return -ENODEV;
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct pcie_tgt_data *pcie_tgt_get_data(struct arm_cspmu *cspmu)
|
||||
{
|
||||
struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
|
||||
|
||||
return ctx->data;
|
||||
}
|
||||
|
||||
/* Find the first available address filter slot. */
|
||||
static int pcie_tgt_find_addr_idx(struct arm_cspmu *cspmu, u64 base, u64 mask,
|
||||
bool is_reset)
|
||||
{
|
||||
int i;
|
||||
struct pcie_tgt_data *data = pcie_tgt_get_data(cspmu);
|
||||
|
||||
for (i = 0; i < NV_PCIE_TGT_ADDR_COUNT; i++) {
|
||||
if (!is_reset && data->addr_filter[i].refcount == 0)
|
||||
return i;
|
||||
|
||||
if (data->addr_filter[i].base == base &&
|
||||
data->addr_filter[i].mask == mask)
|
||||
return i;
|
||||
}
|
||||
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static u32 pcie_tgt_pmu_event_filter(const struct perf_event *event)
|
||||
{
|
||||
u32 filter;
|
||||
|
||||
filter = (event->attr.config >> NV_PCIE_TGT_EV_TYPE_COUNT) &
|
||||
NV_PCIE_TGT_FILTER2_MASK;
|
||||
|
||||
return filter;
|
||||
}
|
||||
|
||||
static bool pcie_tgt_pmu_addr_en(const struct perf_event *event)
|
||||
{
|
||||
u32 filter = pcie_tgt_pmu_event_filter(event);
|
||||
|
||||
return FIELD_GET(NV_PCIE_TGT_FILTER2_ADDR_EN, filter) != 0;
|
||||
}
|
||||
|
||||
static u32 pcie_tgt_pmu_port_filter(const struct perf_event *event)
|
||||
{
|
||||
u32 filter = pcie_tgt_pmu_event_filter(event);
|
||||
|
||||
return FIELD_GET(NV_PCIE_TGT_FILTER2_PORT, filter);
|
||||
}
|
||||
|
||||
static u64 pcie_tgt_pmu_dst_addr_base(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.config1;
|
||||
}
|
||||
|
||||
static u64 pcie_tgt_pmu_dst_addr_mask(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.config2;
|
||||
}
|
||||
|
||||
static int pcie_tgt_pmu_validate_event(struct arm_cspmu *cspmu,
|
||||
struct perf_event *new_ev)
|
||||
{
|
||||
u64 base, mask;
|
||||
int idx;
|
||||
|
||||
if (!pcie_tgt_pmu_addr_en(new_ev))
|
||||
return 0;
|
||||
|
||||
/* Make sure there is a slot available for the address filter. */
|
||||
base = pcie_tgt_pmu_dst_addr_base(new_ev);
|
||||
mask = pcie_tgt_pmu_dst_addr_mask(new_ev);
|
||||
idx = pcie_tgt_find_addr_idx(cspmu, base, mask, false);
|
||||
if (idx < 0)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pcie_tgt_pmu_config_addr_filter(struct arm_cspmu *cspmu,
|
||||
bool en, u64 base, u64 mask, int idx)
|
||||
{
|
||||
struct pcie_tgt_data *data;
|
||||
struct pcie_tgt_addr_filter *filter;
|
||||
void __iomem *filter_reg;
|
||||
|
||||
data = pcie_tgt_get_data(cspmu);
|
||||
filter = &data->addr_filter[idx];
|
||||
filter_reg = data->addr_filter_reg + (idx * NV_PCIE_TGT_ADDR_STRIDE);
|
||||
|
||||
if (en) {
|
||||
filter->refcount++;
|
||||
if (filter->refcount == 1) {
|
||||
filter->base = base;
|
||||
filter->mask = mask;
|
||||
|
||||
writel(lower_32_bits(base), filter_reg + NV_PCIE_TGT_ADDR_BASE_LO);
|
||||
writel(upper_32_bits(base), filter_reg + NV_PCIE_TGT_ADDR_BASE_HI);
|
||||
writel(lower_32_bits(mask), filter_reg + NV_PCIE_TGT_ADDR_MASK_LO);
|
||||
writel(upper_32_bits(mask), filter_reg + NV_PCIE_TGT_ADDR_MASK_HI);
|
||||
writel(1, filter_reg + NV_PCIE_TGT_ADDR_CTRL);
|
||||
}
|
||||
} else {
|
||||
filter->refcount--;
|
||||
if (filter->refcount == 0) {
|
||||
writel(0, filter_reg + NV_PCIE_TGT_ADDR_CTRL);
|
||||
writel(0, filter_reg + NV_PCIE_TGT_ADDR_BASE_LO);
|
||||
writel(0, filter_reg + NV_PCIE_TGT_ADDR_BASE_HI);
|
||||
writel(0, filter_reg + NV_PCIE_TGT_ADDR_MASK_LO);
|
||||
writel(0, filter_reg + NV_PCIE_TGT_ADDR_MASK_HI);
|
||||
|
||||
filter->base = 0;
|
||||
filter->mask = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void pcie_tgt_pmu_set_ev_filter(struct arm_cspmu *cspmu,
|
||||
const struct perf_event *event)
|
||||
{
|
||||
bool addr_filter_en;
|
||||
int idx;
|
||||
u32 filter2_val, filter2_offset, port_filter;
|
||||
u64 base, mask;
|
||||
|
||||
filter2_val = 0;
|
||||
filter2_offset = PMEVFILT2R + (4 * event->hw.idx);
|
||||
|
||||
addr_filter_en = pcie_tgt_pmu_addr_en(event);
|
||||
if (addr_filter_en) {
|
||||
base = pcie_tgt_pmu_dst_addr_base(event);
|
||||
mask = pcie_tgt_pmu_dst_addr_mask(event);
|
||||
idx = pcie_tgt_find_addr_idx(cspmu, base, mask, false);
|
||||
|
||||
if (idx < 0) {
|
||||
dev_err(cspmu->dev,
|
||||
"Unable to find a slot for address filtering\n");
|
||||
writel(0, cspmu->base0 + filter2_offset);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Configure address range filter registers.*/
|
||||
pcie_tgt_pmu_config_addr_filter(cspmu, true, base, mask, idx);
|
||||
|
||||
/* Config the counter to use the selected address filter slot. */
|
||||
filter2_val |= FIELD_PREP(NV_PCIE_TGT_FILTER2_ADDR, 1U << idx);
|
||||
}
|
||||
|
||||
port_filter = pcie_tgt_pmu_port_filter(event);
|
||||
|
||||
/* Monitor all ports if no filter is selected. */
|
||||
if (!addr_filter_en && port_filter == 0)
|
||||
port_filter = NV_PCIE_TGT_FILTER2_PORT;
|
||||
|
||||
filter2_val |= FIELD_PREP(NV_PCIE_TGT_FILTER2_PORT, port_filter);
|
||||
|
||||
writel(filter2_val, cspmu->base0 + filter2_offset);
|
||||
}
|
||||
|
||||
static void pcie_tgt_pmu_reset_ev_filter(struct arm_cspmu *cspmu,
|
||||
const struct perf_event *event)
|
||||
{
|
||||
bool addr_filter_en;
|
||||
u64 base, mask;
|
||||
int idx;
|
||||
|
||||
addr_filter_en = pcie_tgt_pmu_addr_en(event);
|
||||
if (!addr_filter_en)
|
||||
return;
|
||||
|
||||
base = pcie_tgt_pmu_dst_addr_base(event);
|
||||
mask = pcie_tgt_pmu_dst_addr_mask(event);
|
||||
idx = pcie_tgt_find_addr_idx(cspmu, base, mask, true);
|
||||
|
||||
if (idx < 0) {
|
||||
dev_err(cspmu->dev,
|
||||
"Unable to find the address filter slot to reset\n");
|
||||
return;
|
||||
}
|
||||
|
||||
pcie_tgt_pmu_config_addr_filter(cspmu, false, base, mask, idx);
|
||||
}
|
||||
|
||||
static u32 pcie_tgt_pmu_event_type(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.config & NV_PCIE_TGT_EV_TYPE_MASK;
|
||||
}
|
||||
|
||||
static bool pcie_tgt_pmu_is_cycle_counter_event(const struct perf_event *event)
|
||||
{
|
||||
u32 event_type = pcie_tgt_pmu_event_type(event);
|
||||
|
||||
return event_type == NV_PCIE_TGT_EV_TYPE_CC;
|
||||
}
|
||||
|
||||
enum nv_cspmu_name_fmt {
|
||||
NAME_FMT_GENERIC,
|
||||
NAME_FMT_SOCKET
|
||||
NAME_FMT_SOCKET,
|
||||
NAME_FMT_SOCKET_INST,
|
||||
};
|
||||
|
||||
struct nv_cspmu_match {
|
||||
@@ -342,6 +880,63 @@ static const struct nv_cspmu_match nv_cspmu_match[] = {
|
||||
.init_data = NULL
|
||||
},
|
||||
},
|
||||
{
|
||||
.prodid = 0x2CF20000,
|
||||
.prodid_mask = NV_PRODID_MASK,
|
||||
.name_pattern = "nvidia_ucf_pmu_%u",
|
||||
.name_fmt = NAME_FMT_SOCKET,
|
||||
.template_ctx = {
|
||||
.event_attr = ucf_pmu_event_attrs,
|
||||
.format_attr = ucf_pmu_format_attrs,
|
||||
.filter_mask = NV_UCF_FILTER_ID_MASK,
|
||||
.filter_default_val = NV_UCF_FILTER_DEFAULT,
|
||||
.filter2_mask = 0x0,
|
||||
.filter2_default_val = 0x0,
|
||||
.get_filter = ucf_pmu_event_filter,
|
||||
},
|
||||
},
|
||||
{
|
||||
.prodid = 0x10301000,
|
||||
.prodid_mask = NV_PRODID_MASK,
|
||||
.name_pattern = "nvidia_pcie_pmu_%u_rc_%u",
|
||||
.name_fmt = NAME_FMT_SOCKET_INST,
|
||||
.template_ctx = {
|
||||
.event_attr = pcie_v2_pmu_event_attrs,
|
||||
.format_attr = pcie_v2_pmu_format_attrs,
|
||||
.filter_mask = NV_PCIE_V2_FILTER_ID_MASK,
|
||||
.filter_default_val = NV_PCIE_V2_FILTER_DEFAULT,
|
||||
.filter2_mask = NV_PCIE_V2_FILTER2_ID_MASK,
|
||||
.filter2_default_val = NV_PCIE_V2_FILTER2_DEFAULT,
|
||||
.get_filter = pcie_v2_pmu_event_filter,
|
||||
.get_filter2 = nv_cspmu_event_filter2,
|
||||
},
|
||||
.ops = {
|
||||
.validate_event = pcie_v2_pmu_validate_event,
|
||||
.reset_ev_filter = nv_cspmu_reset_ev_filter,
|
||||
}
|
||||
},
|
||||
{
|
||||
.prodid = 0x10700000,
|
||||
.prodid_mask = NV_PRODID_MASK,
|
||||
.name_pattern = "nvidia_pcie_tgt_pmu_%u_rc_%u",
|
||||
.name_fmt = NAME_FMT_SOCKET_INST,
|
||||
.template_ctx = {
|
||||
.event_attr = pcie_tgt_pmu_event_attrs,
|
||||
.format_attr = pcie_tgt_pmu_format_attrs,
|
||||
.filter_mask = 0x0,
|
||||
.filter_default_val = 0x0,
|
||||
.filter2_mask = NV_PCIE_TGT_FILTER2_MASK,
|
||||
.filter2_default_val = NV_PCIE_TGT_FILTER2_DEFAULT,
|
||||
.init_data = pcie_tgt_init_data
|
||||
},
|
||||
.ops = {
|
||||
.is_cycle_counter_event = pcie_tgt_pmu_is_cycle_counter_event,
|
||||
.event_type = pcie_tgt_pmu_event_type,
|
||||
.validate_event = pcie_tgt_pmu_validate_event,
|
||||
.set_ev_filter = pcie_tgt_pmu_set_ev_filter,
|
||||
.reset_ev_filter = pcie_tgt_pmu_reset_ev_filter,
|
||||
}
|
||||
},
|
||||
{
|
||||
.prodid = 0,
|
||||
.prodid_mask = 0,
|
||||
@@ -365,7 +960,7 @@ static const struct nv_cspmu_match nv_cspmu_match[] = {
|
||||
static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,
|
||||
const struct nv_cspmu_match *match)
|
||||
{
|
||||
char *name;
|
||||
char *name = NULL;
|
||||
struct device *dev = cspmu->dev;
|
||||
|
||||
static atomic_t pmu_generic_idx = {0};
|
||||
@@ -379,13 +974,20 @@ static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,
|
||||
socket);
|
||||
break;
|
||||
}
|
||||
case NAME_FMT_SOCKET_INST: {
|
||||
const int cpu = cpumask_first(&cspmu->associated_cpus);
|
||||
const int socket = cpu_to_node(cpu);
|
||||
u32 inst_id;
|
||||
|
||||
if (!nv_cspmu_get_inst_id(cspmu, &inst_id))
|
||||
name = devm_kasprintf(dev, GFP_KERNEL,
|
||||
match->name_pattern, socket, inst_id);
|
||||
break;
|
||||
}
|
||||
case NAME_FMT_GENERIC:
|
||||
name = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,
|
||||
atomic_fetch_inc(&pmu_generic_idx));
|
||||
break;
|
||||
default:
|
||||
name = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
return name;
|
||||
@@ -426,8 +1028,12 @@ static int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
|
||||
cspmu->impl.ctx = ctx;
|
||||
|
||||
/* NVIDIA specific callbacks. */
|
||||
SET_OP(validate_event, impl_ops, match, NULL);
|
||||
SET_OP(event_type, impl_ops, match, NULL);
|
||||
SET_OP(is_cycle_counter_event, impl_ops, match, NULL);
|
||||
SET_OP(set_cc_filter, impl_ops, match, nv_cspmu_set_cc_filter);
|
||||
SET_OP(set_ev_filter, impl_ops, match, nv_cspmu_set_ev_filter);
|
||||
SET_OP(reset_ev_filter, impl_ops, match, NULL);
|
||||
SET_OP(get_event_attrs, impl_ops, match, nv_cspmu_get_event_attrs);
|
||||
SET_OP(get_format_attrs, impl_ops, match, nv_cspmu_get_format_attrs);
|
||||
SET_OP(get_name, impl_ops, match, nv_cspmu_get_name);
|
||||
|
||||
1051
drivers/perf/nvidia_t410_c2c_pmu.c
Normal file
1051
drivers/perf/nvidia_t410_c2c_pmu.c
Normal file
File diff suppressed because it is too large
Load Diff
736
drivers/perf/nvidia_t410_cmem_latency_pmu.c
Normal file
736
drivers/perf/nvidia_t410_cmem_latency_pmu.c
Normal file
@@ -0,0 +1,736 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* NVIDIA Tegra410 CPU Memory (CMEM) Latency PMU driver.
|
||||
*
|
||||
* Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/platform_device.h>
|
||||
|
||||
#define NUM_INSTANCES 14
|
||||
|
||||
/* Register offsets. */
|
||||
#define CMEM_LAT_CG_CTRL 0x800
|
||||
#define CMEM_LAT_CTRL 0x808
|
||||
#define CMEM_LAT_STATUS 0x810
|
||||
#define CMEM_LAT_CYCLE_CNTR 0x818
|
||||
#define CMEM_LAT_MC0_REQ_CNTR 0x820
|
||||
#define CMEM_LAT_MC0_AOR_CNTR 0x830
|
||||
#define CMEM_LAT_MC1_REQ_CNTR 0x838
|
||||
#define CMEM_LAT_MC1_AOR_CNTR 0x848
|
||||
#define CMEM_LAT_MC2_REQ_CNTR 0x850
|
||||
#define CMEM_LAT_MC2_AOR_CNTR 0x860
|
||||
|
||||
/* CMEM_LAT_CTRL values. */
|
||||
#define CMEM_LAT_CTRL_DISABLE 0x0ULL
|
||||
#define CMEM_LAT_CTRL_ENABLE 0x1ULL
|
||||
#define CMEM_LAT_CTRL_CLR 0x2ULL
|
||||
|
||||
/* CMEM_LAT_CG_CTRL values. */
|
||||
#define CMEM_LAT_CG_CTRL_DISABLE 0x0ULL
|
||||
#define CMEM_LAT_CG_CTRL_ENABLE 0x1ULL
|
||||
|
||||
/* CMEM_LAT_STATUS register field. */
|
||||
#define CMEM_LAT_STATUS_CYCLE_OVF BIT(0)
|
||||
#define CMEM_LAT_STATUS_MC0_AOR_OVF BIT(1)
|
||||
#define CMEM_LAT_STATUS_MC0_REQ_OVF BIT(3)
|
||||
#define CMEM_LAT_STATUS_MC1_AOR_OVF BIT(4)
|
||||
#define CMEM_LAT_STATUS_MC1_REQ_OVF BIT(6)
|
||||
#define CMEM_LAT_STATUS_MC2_AOR_OVF BIT(7)
|
||||
#define CMEM_LAT_STATUS_MC2_REQ_OVF BIT(9)
|
||||
|
||||
/* Events. */
|
||||
#define CMEM_LAT_EVENT_CYCLES 0x0
|
||||
#define CMEM_LAT_EVENT_REQ 0x1
|
||||
#define CMEM_LAT_EVENT_AOR 0x2
|
||||
|
||||
#define CMEM_LAT_NUM_EVENTS 0x3
|
||||
#define CMEM_LAT_MASK_EVENT 0x3
|
||||
#define CMEM_LAT_MAX_ACTIVE_EVENTS 32
|
||||
|
||||
#define CMEM_LAT_ACTIVE_CPU_MASK 0x0
|
||||
#define CMEM_LAT_ASSOCIATED_CPU_MASK 0x1
|
||||
|
||||
static unsigned long cmem_lat_pmu_cpuhp_state;
|
||||
|
||||
struct cmem_lat_pmu_hw_events {
|
||||
struct perf_event *events[CMEM_LAT_MAX_ACTIVE_EVENTS];
|
||||
DECLARE_BITMAP(used_ctrs, CMEM_LAT_MAX_ACTIVE_EVENTS);
|
||||
};
|
||||
|
||||
struct cmem_lat_pmu {
|
||||
struct pmu pmu;
|
||||
struct device *dev;
|
||||
const char *name;
|
||||
const char *identifier;
|
||||
void __iomem *base_broadcast;
|
||||
void __iomem *base[NUM_INSTANCES];
|
||||
cpumask_t associated_cpus;
|
||||
cpumask_t active_cpu;
|
||||
struct hlist_node node;
|
||||
struct cmem_lat_pmu_hw_events hw_events;
|
||||
};
|
||||
|
||||
#define to_cmem_lat_pmu(p) \
|
||||
container_of(p, struct cmem_lat_pmu, pmu)
|
||||
|
||||
|
||||
/* Get event type from perf_event. */
|
||||
static inline u32 get_event_type(struct perf_event *event)
|
||||
{
|
||||
return (event->attr.config) & CMEM_LAT_MASK_EVENT;
|
||||
}
|
||||
|
||||
/* PMU operations. */
|
||||
static int cmem_lat_pmu_get_event_idx(struct cmem_lat_pmu_hw_events *hw_events,
|
||||
struct perf_event *event)
|
||||
{
|
||||
unsigned int idx;
|
||||
|
||||
idx = find_first_zero_bit(hw_events->used_ctrs, CMEM_LAT_MAX_ACTIVE_EVENTS);
|
||||
if (idx >= CMEM_LAT_MAX_ACTIVE_EVENTS)
|
||||
return -EAGAIN;
|
||||
|
||||
set_bit(idx, hw_events->used_ctrs);
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
static bool cmem_lat_pmu_validate_event(struct pmu *pmu,
|
||||
struct cmem_lat_pmu_hw_events *hw_events,
|
||||
struct perf_event *event)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (is_software_event(event))
|
||||
return true;
|
||||
|
||||
/* Reject groups spanning multiple HW PMUs. */
|
||||
if (event->pmu != pmu)
|
||||
return false;
|
||||
|
||||
ret = cmem_lat_pmu_get_event_idx(hw_events, event);
|
||||
if (ret < 0)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Make sure the group of events can be scheduled at once on the PMU. */
|
||||
static bool cmem_lat_pmu_validate_group(struct perf_event *event)
|
||||
{
|
||||
struct perf_event *sibling, *leader = event->group_leader;
|
||||
struct cmem_lat_pmu_hw_events fake_hw_events;
|
||||
|
||||
if (event->group_leader == event)
|
||||
return true;
|
||||
|
||||
memset(&fake_hw_events, 0, sizeof(fake_hw_events));
|
||||
|
||||
if (!cmem_lat_pmu_validate_event(event->pmu, &fake_hw_events, leader))
|
||||
return false;
|
||||
|
||||
for_each_sibling_event(sibling, leader) {
|
||||
if (!cmem_lat_pmu_validate_event(event->pmu, &fake_hw_events, sibling))
|
||||
return false;
|
||||
}
|
||||
|
||||
return cmem_lat_pmu_validate_event(event->pmu, &fake_hw_events, event);
|
||||
}
|
||||
|
||||
static int cmem_lat_pmu_event_init(struct perf_event *event)
|
||||
{
|
||||
struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u32 event_type = get_event_type(event);
|
||||
|
||||
if (event->attr.type != event->pmu->type ||
|
||||
event_type >= CMEM_LAT_NUM_EVENTS)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* Sampling, per-process mode, and per-task counters are not supported
|
||||
* since this PMU is shared across all CPUs.
|
||||
*/
|
||||
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) {
|
||||
dev_dbg(cmem_lat_pmu->pmu.dev,
|
||||
"Can't support sampling and per-process mode\n");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (event->cpu < 0) {
|
||||
dev_dbg(cmem_lat_pmu->pmu.dev, "Can't support per-task counters\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure the CPU assignment is on one of the CPUs associated with
|
||||
* this PMU.
|
||||
*/
|
||||
if (!cpumask_test_cpu(event->cpu, &cmem_lat_pmu->associated_cpus)) {
|
||||
dev_dbg(cmem_lat_pmu->pmu.dev,
|
||||
"Requested cpu is not associated with the PMU\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Enforce the current active CPU to handle the events in this PMU. */
|
||||
event->cpu = cpumask_first(&cmem_lat_pmu->active_cpu);
|
||||
if (event->cpu >= nr_cpu_ids)
|
||||
return -EINVAL;
|
||||
|
||||
if (!cmem_lat_pmu_validate_group(event))
|
||||
return -EINVAL;
|
||||
|
||||
hwc->idx = -1;
|
||||
hwc->config = event_type;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 cmem_lat_pmu_read_status(struct cmem_lat_pmu *cmem_lat_pmu,
|
||||
unsigned int inst)
|
||||
{
|
||||
return readq(cmem_lat_pmu->base[inst] + CMEM_LAT_STATUS);
|
||||
}
|
||||
|
||||
static u64 cmem_lat_pmu_read_cycle_counter(struct perf_event *event)
|
||||
{
|
||||
const unsigned int instance = 0;
|
||||
u64 status;
|
||||
struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu);
|
||||
struct device *dev = cmem_lat_pmu->dev;
|
||||
|
||||
/*
|
||||
* Use the reading from first instance since all instances are
|
||||
* identical.
|
||||
*/
|
||||
status = cmem_lat_pmu_read_status(cmem_lat_pmu, instance);
|
||||
if (status & CMEM_LAT_STATUS_CYCLE_OVF)
|
||||
dev_warn(dev, "Cycle counter overflow\n");
|
||||
|
||||
return readq(cmem_lat_pmu->base[instance] + CMEM_LAT_CYCLE_CNTR);
|
||||
}
|
||||
|
||||
static u64 cmem_lat_pmu_read_req_counter(struct perf_event *event)
|
||||
{
|
||||
unsigned int i;
|
||||
u64 status, val = 0;
|
||||
struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu);
|
||||
struct device *dev = cmem_lat_pmu->dev;
|
||||
|
||||
/* Sum up the counts from all instances. */
|
||||
for (i = 0; i < NUM_INSTANCES; i++) {
|
||||
status = cmem_lat_pmu_read_status(cmem_lat_pmu, i);
|
||||
if (status & CMEM_LAT_STATUS_MC0_REQ_OVF)
|
||||
dev_warn(dev, "MC0 request counter overflow\n");
|
||||
if (status & CMEM_LAT_STATUS_MC1_REQ_OVF)
|
||||
dev_warn(dev, "MC1 request counter overflow\n");
|
||||
if (status & CMEM_LAT_STATUS_MC2_REQ_OVF)
|
||||
dev_warn(dev, "MC2 request counter overflow\n");
|
||||
|
||||
val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC0_REQ_CNTR);
|
||||
val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC1_REQ_CNTR);
|
||||
val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC2_REQ_CNTR);
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static u64 cmem_lat_pmu_read_aor_counter(struct perf_event *event)
|
||||
{
|
||||
unsigned int i;
|
||||
u64 status, val = 0;
|
||||
struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu);
|
||||
struct device *dev = cmem_lat_pmu->dev;
|
||||
|
||||
/* Sum up the counts from all instances. */
|
||||
for (i = 0; i < NUM_INSTANCES; i++) {
|
||||
status = cmem_lat_pmu_read_status(cmem_lat_pmu, i);
|
||||
if (status & CMEM_LAT_STATUS_MC0_AOR_OVF)
|
||||
dev_warn(dev, "MC0 AOR counter overflow\n");
|
||||
if (status & CMEM_LAT_STATUS_MC1_AOR_OVF)
|
||||
dev_warn(dev, "MC1 AOR counter overflow\n");
|
||||
if (status & CMEM_LAT_STATUS_MC2_AOR_OVF)
|
||||
dev_warn(dev, "MC2 AOR counter overflow\n");
|
||||
|
||||
val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC0_AOR_CNTR);
|
||||
val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC1_AOR_CNTR);
|
||||
val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC2_AOR_CNTR);
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static u64 (*read_counter_fn[CMEM_LAT_NUM_EVENTS])(struct perf_event *) = {
|
||||
[CMEM_LAT_EVENT_CYCLES] = cmem_lat_pmu_read_cycle_counter,
|
||||
[CMEM_LAT_EVENT_REQ] = cmem_lat_pmu_read_req_counter,
|
||||
[CMEM_LAT_EVENT_AOR] = cmem_lat_pmu_read_aor_counter,
|
||||
};
|
||||
|
||||
static void cmem_lat_pmu_event_update(struct perf_event *event)
|
||||
{
|
||||
u32 event_type;
|
||||
u64 prev, now;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (hwc->state & PERF_HES_STOPPED)
|
||||
return;
|
||||
|
||||
event_type = hwc->config;
|
||||
|
||||
do {
|
||||
prev = local64_read(&hwc->prev_count);
|
||||
now = read_counter_fn[event_type](event);
|
||||
} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
|
||||
|
||||
local64_add(now - prev, &event->count);
|
||||
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
|
||||
static void cmem_lat_pmu_start(struct perf_event *event, int pmu_flags)
|
||||
{
|
||||
event->hw.state = 0;
|
||||
}
|
||||
|
||||
static void cmem_lat_pmu_stop(struct perf_event *event, int pmu_flags)
|
||||
{
|
||||
event->hw.state |= PERF_HES_STOPPED;
|
||||
}
|
||||
|
||||
static int cmem_lat_pmu_add(struct perf_event *event, int flags)
|
||||
{
|
||||
struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu);
|
||||
struct cmem_lat_pmu_hw_events *hw_events = &cmem_lat_pmu->hw_events;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx;
|
||||
|
||||
if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
|
||||
&cmem_lat_pmu->associated_cpus)))
|
||||
return -ENOENT;
|
||||
|
||||
idx = cmem_lat_pmu_get_event_idx(hw_events, event);
|
||||
if (idx < 0)
|
||||
return idx;
|
||||
|
||||
hw_events->events[idx] = event;
|
||||
hwc->idx = idx;
|
||||
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
|
||||
|
||||
if (flags & PERF_EF_START)
|
||||
cmem_lat_pmu_start(event, PERF_EF_RELOAD);
|
||||
|
||||
/* Propagate changes to the userspace mapping. */
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cmem_lat_pmu_del(struct perf_event *event, int flags)
|
||||
{
|
||||
struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu);
|
||||
struct cmem_lat_pmu_hw_events *hw_events = &cmem_lat_pmu->hw_events;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx;
|
||||
|
||||
cmem_lat_pmu_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
hw_events->events[idx] = NULL;
|
||||
|
||||
clear_bit(idx, hw_events->used_ctrs);
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
||||
static void cmem_lat_pmu_read(struct perf_event *event)
|
||||
{
|
||||
cmem_lat_pmu_event_update(event);
|
||||
}
|
||||
|
||||
static inline void cmem_lat_pmu_cg_ctrl(struct cmem_lat_pmu *cmem_lat_pmu,
|
||||
u64 val)
|
||||
{
|
||||
writeq(val, cmem_lat_pmu->base_broadcast + CMEM_LAT_CG_CTRL);
|
||||
}
|
||||
|
||||
static inline void cmem_lat_pmu_ctrl(struct cmem_lat_pmu *cmem_lat_pmu, u64 val)
|
||||
{
|
||||
writeq(val, cmem_lat_pmu->base_broadcast + CMEM_LAT_CTRL);
|
||||
}
|
||||
|
||||
static void cmem_lat_pmu_enable(struct pmu *pmu)
|
||||
{
|
||||
bool disabled;
|
||||
struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(pmu);
|
||||
|
||||
disabled = bitmap_empty(cmem_lat_pmu->hw_events.used_ctrs,
|
||||
CMEM_LAT_MAX_ACTIVE_EVENTS);
|
||||
|
||||
if (disabled)
|
||||
return;
|
||||
|
||||
/* Enable all the counters. */
|
||||
cmem_lat_pmu_cg_ctrl(cmem_lat_pmu, CMEM_LAT_CG_CTRL_ENABLE);
|
||||
cmem_lat_pmu_ctrl(cmem_lat_pmu, CMEM_LAT_CTRL_ENABLE);
|
||||
}
|
||||
|
||||
static void cmem_lat_pmu_disable(struct pmu *pmu)
|
||||
{
|
||||
int idx;
|
||||
struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(pmu);
|
||||
|
||||
/* Disable all the counters. */
|
||||
cmem_lat_pmu_ctrl(cmem_lat_pmu, CMEM_LAT_CTRL_DISABLE);
|
||||
|
||||
/*
|
||||
* The counters will start from 0 again on restart.
|
||||
* Update the events immediately to avoid losing the counts.
|
||||
*/
|
||||
for_each_set_bit(idx, cmem_lat_pmu->hw_events.used_ctrs,
|
||||
CMEM_LAT_MAX_ACTIVE_EVENTS) {
|
||||
struct perf_event *event = cmem_lat_pmu->hw_events.events[idx];
|
||||
|
||||
if (!event)
|
||||
continue;
|
||||
|
||||
cmem_lat_pmu_event_update(event);
|
||||
|
||||
local64_set(&event->hw.prev_count, 0ULL);
|
||||
}
|
||||
|
||||
cmem_lat_pmu_ctrl(cmem_lat_pmu, CMEM_LAT_CTRL_CLR);
|
||||
cmem_lat_pmu_cg_ctrl(cmem_lat_pmu, CMEM_LAT_CG_CTRL_DISABLE);
|
||||
}
|
||||
|
||||
/* PMU identifier attribute. */
|
||||
|
||||
static ssize_t cmem_lat_pmu_identifier_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *page)
|
||||
{
|
||||
struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(dev_get_drvdata(dev));
|
||||
|
||||
return sysfs_emit(page, "%s\n", cmem_lat_pmu->identifier);
|
||||
}
|
||||
|
||||
static struct device_attribute cmem_lat_pmu_identifier_attr =
|
||||
__ATTR(identifier, 0444, cmem_lat_pmu_identifier_show, NULL);
|
||||
|
||||
static struct attribute *cmem_lat_pmu_identifier_attrs[] = {
|
||||
&cmem_lat_pmu_identifier_attr.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute_group cmem_lat_pmu_identifier_attr_group = {
|
||||
.attrs = cmem_lat_pmu_identifier_attrs,
|
||||
};
|
||||
|
||||
/* Format attributes. */
|
||||
|
||||
#define NV_PMU_EXT_ATTR(_name, _func, _config) \
|
||||
(&((struct dev_ext_attribute[]){ \
|
||||
{ \
|
||||
.attr = __ATTR(_name, 0444, _func, NULL), \
|
||||
.var = (void *)_config \
|
||||
} \
|
||||
})[0].attr.attr)
|
||||
|
||||
static struct attribute *cmem_lat_pmu_formats[] = {
|
||||
NV_PMU_EXT_ATTR(event, device_show_string, "config:0-1"),
|
||||
NULL
|
||||
};
|
||||
|
||||
static const struct attribute_group cmem_lat_pmu_format_group = {
|
||||
.name = "format",
|
||||
.attrs = cmem_lat_pmu_formats,
|
||||
};
|
||||
|
||||
/* Event attributes. */
|
||||
|
||||
static ssize_t cmem_lat_pmu_sysfs_event_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct perf_pmu_events_attr *pmu_attr;
|
||||
|
||||
pmu_attr = container_of(attr, typeof(*pmu_attr), attr);
|
||||
return sysfs_emit(buf, "event=0x%llx\n", pmu_attr->id);
|
||||
}
|
||||
|
||||
#define NV_PMU_EVENT_ATTR(_name, _config) \
|
||||
PMU_EVENT_ATTR_ID(_name, cmem_lat_pmu_sysfs_event_show, _config)
|
||||
|
||||
static struct attribute *cmem_lat_pmu_events[] = {
|
||||
NV_PMU_EVENT_ATTR(cycles, CMEM_LAT_EVENT_CYCLES),
|
||||
NV_PMU_EVENT_ATTR(rd_req, CMEM_LAT_EVENT_REQ),
|
||||
NV_PMU_EVENT_ATTR(rd_cum_outs, CMEM_LAT_EVENT_AOR),
|
||||
NULL
|
||||
};
|
||||
|
||||
static const struct attribute_group cmem_lat_pmu_events_group = {
|
||||
.name = "events",
|
||||
.attrs = cmem_lat_pmu_events,
|
||||
};
|
||||
|
||||
/* Cpumask attributes. */
|
||||
|
||||
static ssize_t cmem_lat_pmu_cpumask_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct pmu *pmu = dev_get_drvdata(dev);
|
||||
struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(pmu);
|
||||
struct dev_ext_attribute *eattr =
|
||||
container_of(attr, struct dev_ext_attribute, attr);
|
||||
unsigned long mask_id = (unsigned long)eattr->var;
|
||||
const cpumask_t *cpumask;
|
||||
|
||||
switch (mask_id) {
|
||||
case CMEM_LAT_ACTIVE_CPU_MASK:
|
||||
cpumask = &cmem_lat_pmu->active_cpu;
|
||||
break;
|
||||
case CMEM_LAT_ASSOCIATED_CPU_MASK:
|
||||
cpumask = &cmem_lat_pmu->associated_cpus;
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
return cpumap_print_to_pagebuf(true, buf, cpumask);
|
||||
}
|
||||
|
||||
#define NV_PMU_CPUMASK_ATTR(_name, _config) \
|
||||
NV_PMU_EXT_ATTR(_name, cmem_lat_pmu_cpumask_show, \
|
||||
(unsigned long)_config)
|
||||
|
||||
static struct attribute *cmem_lat_pmu_cpumask_attrs[] = {
|
||||
NV_PMU_CPUMASK_ATTR(cpumask, CMEM_LAT_ACTIVE_CPU_MASK),
|
||||
NV_PMU_CPUMASK_ATTR(associated_cpus, CMEM_LAT_ASSOCIATED_CPU_MASK),
|
||||
NULL
|
||||
};
|
||||
|
||||
static const struct attribute_group cmem_lat_pmu_cpumask_attr_group = {
|
||||
.attrs = cmem_lat_pmu_cpumask_attrs,
|
||||
};
|
||||
|
||||
/* Per PMU device attribute groups. */
|
||||
|
||||
static const struct attribute_group *cmem_lat_pmu_attr_groups[] = {
|
||||
&cmem_lat_pmu_identifier_attr_group,
|
||||
&cmem_lat_pmu_format_group,
|
||||
&cmem_lat_pmu_events_group,
|
||||
&cmem_lat_pmu_cpumask_attr_group,
|
||||
NULL
|
||||
};
|
||||
|
||||
static int cmem_lat_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
|
||||
{
|
||||
struct cmem_lat_pmu *cmem_lat_pmu =
|
||||
hlist_entry_safe(node, struct cmem_lat_pmu, node);
|
||||
|
||||
if (!cpumask_test_cpu(cpu, &cmem_lat_pmu->associated_cpus))
|
||||
return 0;
|
||||
|
||||
/* If the PMU is already managed, there is nothing to do */
|
||||
if (!cpumask_empty(&cmem_lat_pmu->active_cpu))
|
||||
return 0;
|
||||
|
||||
/* Use this CPU for event counting */
|
||||
cpumask_set_cpu(cpu, &cmem_lat_pmu->active_cpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cmem_lat_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
|
||||
{
|
||||
unsigned int dst;
|
||||
|
||||
struct cmem_lat_pmu *cmem_lat_pmu =
|
||||
hlist_entry_safe(node, struct cmem_lat_pmu, node);
|
||||
|
||||
/* Nothing to do if this CPU doesn't own the PMU */
|
||||
if (!cpumask_test_and_clear_cpu(cpu, &cmem_lat_pmu->active_cpu))
|
||||
return 0;
|
||||
|
||||
/* Choose a new CPU to migrate ownership of the PMU to */
|
||||
dst = cpumask_any_and_but(&cmem_lat_pmu->associated_cpus,
|
||||
cpu_online_mask, cpu);
|
||||
if (dst >= nr_cpu_ids)
|
||||
return 0;
|
||||
|
||||
/* Use this CPU for event counting */
|
||||
perf_pmu_migrate_context(&cmem_lat_pmu->pmu, cpu, dst);
|
||||
cpumask_set_cpu(dst, &cmem_lat_pmu->active_cpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cmem_lat_pmu_get_cpus(struct cmem_lat_pmu *cmem_lat_pmu,
|
||||
unsigned int socket)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (cpu_to_node(cpu) == socket)
|
||||
cpumask_set_cpu(cpu, &cmem_lat_pmu->associated_cpus);
|
||||
}
|
||||
|
||||
if (cpumask_empty(&cmem_lat_pmu->associated_cpus)) {
|
||||
dev_dbg(cmem_lat_pmu->dev,
|
||||
"No cpu associated with PMU socket-%u\n", socket);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cmem_lat_pmu_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
struct acpi_device *acpi_dev;
|
||||
struct cmem_lat_pmu *cmem_lat_pmu;
|
||||
char *name, *uid_str;
|
||||
int ret, i;
|
||||
u32 socket;
|
||||
|
||||
acpi_dev = ACPI_COMPANION(dev);
|
||||
if (!acpi_dev)
|
||||
return -ENODEV;
|
||||
|
||||
uid_str = acpi_device_uid(acpi_dev);
|
||||
if (!uid_str)
|
||||
return -ENODEV;
|
||||
|
||||
ret = kstrtou32(uid_str, 0, &socket);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
cmem_lat_pmu = devm_kzalloc(dev, sizeof(*cmem_lat_pmu), GFP_KERNEL);
|
||||
name = devm_kasprintf(dev, GFP_KERNEL, "nvidia_cmem_latency_pmu_%u", socket);
|
||||
if (!cmem_lat_pmu || !name)
|
||||
return -ENOMEM;
|
||||
|
||||
cmem_lat_pmu->dev = dev;
|
||||
cmem_lat_pmu->name = name;
|
||||
cmem_lat_pmu->identifier = acpi_device_hid(acpi_dev);
|
||||
platform_set_drvdata(pdev, cmem_lat_pmu);
|
||||
|
||||
cmem_lat_pmu->pmu = (struct pmu) {
|
||||
.parent = &pdev->dev,
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.pmu_enable = cmem_lat_pmu_enable,
|
||||
.pmu_disable = cmem_lat_pmu_disable,
|
||||
.event_init = cmem_lat_pmu_event_init,
|
||||
.add = cmem_lat_pmu_add,
|
||||
.del = cmem_lat_pmu_del,
|
||||
.start = cmem_lat_pmu_start,
|
||||
.stop = cmem_lat_pmu_stop,
|
||||
.read = cmem_lat_pmu_read,
|
||||
.attr_groups = cmem_lat_pmu_attr_groups,
|
||||
.capabilities = PERF_PMU_CAP_NO_EXCLUDE |
|
||||
PERF_PMU_CAP_NO_INTERRUPT,
|
||||
};
|
||||
|
||||
/* Map the address of all the instances. */
|
||||
for (i = 0; i < NUM_INSTANCES; i++) {
|
||||
cmem_lat_pmu->base[i] = devm_platform_ioremap_resource(pdev, i);
|
||||
if (IS_ERR(cmem_lat_pmu->base[i])) {
|
||||
dev_err(dev, "Failed map address for instance %d\n", i);
|
||||
return PTR_ERR(cmem_lat_pmu->base[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Map broadcast address. */
|
||||
cmem_lat_pmu->base_broadcast = devm_platform_ioremap_resource(pdev,
|
||||
NUM_INSTANCES);
|
||||
if (IS_ERR(cmem_lat_pmu->base_broadcast)) {
|
||||
dev_err(dev, "Failed map broadcast address\n");
|
||||
return PTR_ERR(cmem_lat_pmu->base_broadcast);
|
||||
}
|
||||
|
||||
ret = cmem_lat_pmu_get_cpus(cmem_lat_pmu, socket);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = cpuhp_state_add_instance(cmem_lat_pmu_cpuhp_state,
|
||||
&cmem_lat_pmu->node);
|
||||
if (ret) {
|
||||
dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
cmem_lat_pmu_cg_ctrl(cmem_lat_pmu, CMEM_LAT_CG_CTRL_ENABLE);
|
||||
cmem_lat_pmu_ctrl(cmem_lat_pmu, CMEM_LAT_CTRL_CLR);
|
||||
cmem_lat_pmu_cg_ctrl(cmem_lat_pmu, CMEM_LAT_CG_CTRL_DISABLE);
|
||||
|
||||
ret = perf_pmu_register(&cmem_lat_pmu->pmu, name, -1);
|
||||
if (ret) {
|
||||
dev_err(&pdev->dev, "Failed to register PMU: %d\n", ret);
|
||||
cpuhp_state_remove_instance(cmem_lat_pmu_cpuhp_state,
|
||||
&cmem_lat_pmu->node);
|
||||
return ret;
|
||||
}
|
||||
|
||||
dev_dbg(&pdev->dev, "Registered %s PMU\n", name);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cmem_lat_pmu_device_remove(struct platform_device *pdev)
|
||||
{
|
||||
struct cmem_lat_pmu *cmem_lat_pmu = platform_get_drvdata(pdev);
|
||||
|
||||
perf_pmu_unregister(&cmem_lat_pmu->pmu);
|
||||
cpuhp_state_remove_instance(cmem_lat_pmu_cpuhp_state,
|
||||
&cmem_lat_pmu->node);
|
||||
}
|
||||
|
||||
static const struct acpi_device_id cmem_lat_pmu_acpi_match[] = {
|
||||
{ "NVDA2021" },
|
||||
{ }
|
||||
};
|
||||
MODULE_DEVICE_TABLE(acpi, cmem_lat_pmu_acpi_match);
|
||||
|
||||
static struct platform_driver cmem_lat_pmu_driver = {
|
||||
.driver = {
|
||||
.name = "nvidia-t410-cmem-latency-pmu",
|
||||
.acpi_match_table = ACPI_PTR(cmem_lat_pmu_acpi_match),
|
||||
.suppress_bind_attrs = true,
|
||||
},
|
||||
.probe = cmem_lat_pmu_probe,
|
||||
.remove = cmem_lat_pmu_device_remove,
|
||||
};
|
||||
|
||||
static int __init cmem_lat_pmu_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
|
||||
"perf/nvidia/cmem_latency:online",
|
||||
cmem_lat_pmu_cpu_online,
|
||||
cmem_lat_pmu_cpu_teardown);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
cmem_lat_pmu_cpuhp_state = ret;
|
||||
|
||||
return platform_driver_register(&cmem_lat_pmu_driver);
|
||||
}
|
||||
|
||||
static void __exit cmem_lat_pmu_exit(void)
|
||||
{
|
||||
platform_driver_unregister(&cmem_lat_pmu_driver);
|
||||
cpuhp_remove_multi_state(cmem_lat_pmu_cpuhp_state);
|
||||
}
|
||||
|
||||
module_init(cmem_lat_pmu_init);
|
||||
module_exit(cmem_lat_pmu_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("NVIDIA Tegra410 CPU Memory Latency PMU driver");
|
||||
MODULE_AUTHOR("Besar Wicaksono <bwicaksono@nvidia.com>");
|
||||
Reference in New Issue
Block a user