Files
linux/drivers/accel/ivpu/ivpu_pm.h
Karol Wachowski ade00a6c90 accel/ivpu: Perform engine reset instead of device recovery on TDR
Replace full device recovery on TDR timeout with per-context abort,
allowing individual context handling instead of resetting the entire
device.

Extend ivpu_jsm_reset_engine() to return the list of contexts impacted
by the engine reset and use that information to abort only the affected
contexts.

Only check for potentially faulty contexts when the engine reset was not
triggered by an MMU fault or a job completion error status. This prevents
misidentifying non-guilty contexts that happened to be running at the
time of the fault.

Trigger full device recovery if no contexts were marked by engine reset
if triggered by job completion timeout, as there is no way to identify
guilty one.

Add engine reset counter to debugfs for engine resets bookkeeping
for debugging/testing purposes.

Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
Signed-off-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Link: https://patch.msgid.link/20260318093927.4080303-1-karol.wachowski@linux.intel.com
2026-03-20 08:03:11 +01:00

51 lines
1.5 KiB
C

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2020-2024 Intel Corporation
*/
#ifndef __IVPU_PM_H__
#define __IVPU_PM_H__
#include <linux/rwsem.h>
#include <linux/types.h>
struct ivpu_device;
struct ivpu_pm_info {
struct ivpu_device *vdev;
struct delayed_work job_timeout_work;
struct work_struct recovery_work;
struct rw_semaphore reset_lock;
atomic_t reset_counter;
atomic_t reset_pending;
atomic_t engine_reset_counter;
u8 dct_active_percent;
};
void ivpu_pm_init(struct ivpu_device *vdev);
void ivpu_pm_enable(struct ivpu_device *vdev);
void ivpu_pm_disable(struct ivpu_device *vdev);
void ivpu_pm_disable_recovery(struct ivpu_device *vdev);
int ivpu_pm_suspend_cb(struct device *dev);
int ivpu_pm_resume_cb(struct device *dev);
int ivpu_pm_runtime_suspend_cb(struct device *dev);
int ivpu_pm_runtime_resume_cb(struct device *dev);
void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev);
void ivpu_pm_reset_done_cb(struct pci_dev *pdev);
int __must_check ivpu_rpm_get(struct ivpu_device *vdev);
void ivpu_rpm_put(struct ivpu_device *vdev);
void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason);
void ivpu_start_job_timeout_detection(struct ivpu_device *vdev);
void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev);
int ivpu_pm_dct_init(struct ivpu_device *vdev);
int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent);
int ivpu_pm_dct_disable(struct ivpu_device *vdev);
void ivpu_pm_irq_dct_work_fn(struct work_struct *work);
#endif /* __IVPU_PM_H__ */