Files
linux/drivers/gpu/drm/xe/xe_mert.c
Michal Wajdeczko def675cf3f drm/xe/mert: Improve handling of MERT CAT errors
All MERT catastrophic errors but VF's LMTT fault are serious, so
we shouldn't limit our handling only to print debug messages.

Change CATERR message to error level and then declare the device
as wedged to match expectation from the design document. For the
LMTT faults, add a note about adding tracking of this unexpected
VF activity.

While at it, rename register fields defnitions to match the BSpec.
Also drop trailing include guard name from the regs.h file.

BSpec: 74625
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Lukasz Laguna <lukasz.laguna@intel.com>
Reviewed-by: Lukasz Laguna <lukasz.laguna@intel.com>
Link: https://patch.msgid.link/20260112183716.28700-1-michal.wajdeczko@intel.com
2026-01-14 16:02:50 +01:00

119 lines
2.9 KiB
C

// SPDX-License-Identifier: MIT
/*
* Copyright(c) 2025, Intel Corporation. All rights reserved.
*/
#include "regs/xe_irq_regs.h"
#include "regs/xe_mert_regs.h"
#include "xe_device.h"
#include "xe_mert.h"
#include "xe_mmio.h"
#include "xe_sriov_printk.h"
#include "xe_tile.h"
/**
* xe_mert_init_early() - Initialize MERT data
* @xe: the &xe_device with MERT to init
*/
void xe_mert_init_early(struct xe_device *xe)
{
struct xe_tile *tile = xe_device_get_root_tile(xe);
struct xe_mert *mert = &tile->mert;
spin_lock_init(&mert->lock);
init_completion(&mert->tlb_inv_done);
}
/**
* xe_mert_invalidate_lmtt() - Invalidate MERT LMTT
* @xe: the &xe_device with MERT
*
* Trigger invalidation of the MERT LMTT and wait for completion.
*
* Return: 0 on success or -ETIMEDOUT in case of a timeout.
*/
int xe_mert_invalidate_lmtt(struct xe_device *xe)
{
struct xe_tile *tile = xe_device_get_root_tile(xe);
struct xe_mert *mert = &tile->mert;
const long timeout = HZ / 4;
unsigned long flags;
xe_assert(xe, xe_device_has_mert(xe));
spin_lock_irqsave(&mert->lock, flags);
if (!mert->tlb_inv_triggered) {
mert->tlb_inv_triggered = true;
reinit_completion(&mert->tlb_inv_done);
xe_mmio_write32(&tile->mmio, MERT_TLB_INV_DESC_A, MERT_TLB_INV_DESC_A_VALID);
}
spin_unlock_irqrestore(&mert->lock, flags);
if (!wait_for_completion_timeout(&mert->tlb_inv_done, timeout))
return -ETIMEDOUT;
return 0;
}
static void mert_handle_cat_error(struct xe_device *xe)
{
struct xe_tile *tile = xe_device_get_root_tile(xe);
u32 reg_val, vfid, code;
reg_val = xe_mmio_read32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT);
if (!reg_val)
return;
xe_mmio_write32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT, 0);
vfid = FIELD_GET(CATERR_VFID, reg_val);
code = FIELD_GET(CATERR_CODES, reg_val);
switch (code) {
case CATERR_NO_ERROR:
break;
case CATERR_UNMAPPED_GGTT:
xe_sriov_err(xe, "MERT: CAT_ERR: Access to an unmapped GGTT!\n");
xe_device_declare_wedged(xe);
break;
case CATERR_LMTT_FAULT:
xe_sriov_dbg(xe, "MERT: CAT_ERR: VF%u LMTT fault!\n", vfid);
/* XXX: track/report malicious VF activity */
break;
default:
xe_sriov_err(xe, "MERT: Unexpected CAT_ERR code=%#x!\n", code);
xe_device_declare_wedged(xe);
break;
}
}
/**
* xe_mert_irq_handler - Handler for MERT interrupts
* @xe: the &xe_device
* @master_ctl: interrupt register
*
* Handle interrupts generated by MERT.
*/
void xe_mert_irq_handler(struct xe_device *xe, u32 master_ctl)
{
struct xe_tile *tile = xe_device_get_root_tile(xe);
struct xe_mert *mert = &tile->mert;
unsigned long flags;
u32 reg_val;
if (!(master_ctl & SOC_H2DMEMINT_IRQ))
return;
mert_handle_cat_error(xe);
spin_lock_irqsave(&mert->lock, flags);
if (mert->tlb_inv_triggered) {
reg_val = xe_mmio_read32(&tile->mmio, MERT_TLB_INV_DESC_A);
if (!(reg_val & MERT_TLB_INV_DESC_A_VALID)) {
mert->tlb_inv_triggered = false;
complete_all(&mert->tlb_inv_done);
}
}
spin_unlock_irqrestore(&mert->lock, flags);
}