Files
linux/drivers/gpu/drm/xe/xe_guc_ct.h
Satyanarayana K V P ee4b32220a drm/xe/guc: Add devm release action to safely tear down CT
When a buffer object (BO) is allocated with the XE_BO_FLAG_GGTT_INVALIDATE
flag, the driver initiates TLB invalidation requests via the CTB mechanism
while releasing the BO. However a premature release of the CTB BO can lead
to system crashes, as observed in:

Oops: Oops: 0000 [#1] SMP NOPTI
RIP: 0010:h2g_write+0x2f3/0x7c0 [xe]
Call Trace:
 guc_ct_send_locked+0x8b/0x670 [xe]
 xe_guc_ct_send_locked+0x19/0x60 [xe]
 send_tlb_invalidation+0xb4/0x460 [xe]
 xe_gt_tlb_invalidation_ggtt+0x15e/0x2e0 [xe]
 ggtt_invalidate_gt_tlb.part.0+0x16/0x90 [xe]
 ggtt_node_remove+0x110/0x140 [xe]
 xe_ggtt_node_remove+0x40/0xa0 [xe]
 xe_ggtt_remove_bo+0x87/0x250 [xe]

Introduce a devm-managed release action during xe_guc_ct_init() and
xe_guc_ct_init_post_hwconfig() to ensure proper CTB disablement before
resource deallocation, preventing the use-after-free scenario.

Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Summers Stuart <stuart.summers@intel.com>
Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://lore.kernel.org/r/20250901072541.31461-1-satyanarayana.k.v.p@intel.com
2025-09-02 08:21:58 +02:00

78 lines
2.3 KiB
C

/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2022 Intel Corporation
*/
#ifndef _XE_GUC_CT_H_
#define _XE_GUC_CT_H_
#include "xe_guc_ct_types.h"
struct drm_printer;
struct xe_device;
int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct);
int xe_guc_ct_init(struct xe_guc_ct *ct);
int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct);
int xe_guc_ct_enable(struct xe_guc_ct *ct);
void xe_guc_ct_disable(struct xe_guc_ct *ct);
void xe_guc_ct_stop(struct xe_guc_ct *ct);
void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct);
void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_printer *p);
void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb);
void xe_guc_ct_fixup_messages_with_ggtt(struct xe_guc_ct *ct, s64 ggtt_shift);
static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct)
{
return ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED;
}
static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct)
{
return ct->state == XE_GUC_CT_STATE_ENABLED;
}
static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
{
if (!xe_guc_ct_enabled(ct))
return;
wake_up_all(&ct->wq);
queue_work(ct->g2h_wq, &ct->g2h_worker);
xe_guc_ct_fast_path(ct);
}
/* Basic CT send / receives */
int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
u32 g2h_len, u32 num_g2h);
int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
u32 g2h_len, u32 num_g2h);
int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
u32 *response_buffer);
static inline int
xe_guc_ct_send_block(struct xe_guc_ct *ct, const u32 *action, u32 len)
{
return xe_guc_ct_send_recv(ct, action, len, NULL);
}
/* This is only version of the send CT you can call from a G2H handler */
int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action,
u32 len);
/* Can't fail because a GT reset is in progress */
int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
u32 len, u32 *response_buffer);
static inline int
xe_guc_ct_send_block_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len)
{
return xe_guc_ct_send_recv_no_fail(ct, action, len, NULL);
}
long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct);
#endif