drm/amdgpu: use GPU_HDP_FLUSH for sriov

Currently SRIOV runtime will use kiq to write HDP_MEM_FLUSH_CNTL for
hdp flush. This register need to be write from CPU for nbif to aware,
otherwise it will not work.

Implement amdgpu_kiq_hdp_flush and use kiq to do gpu hdp flush during
sriov runtime.

v2:
- fallback to amdgpu_asic_flush_hdp when amdgpu_kiq_hdp_flush failed
- add function amdgpu_mes_hdp_flush

v3:
- changed returned error

Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Victor Zhao <Victor.Zhao@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Victor Zhao
2025-10-09 10:42:48 +08:00
committed by Alex Deucher
parent e71ca1efd3
commit 6169b555db
5 changed files with 95 additions and 3 deletions

View File

@@ -33,6 +33,7 @@
#include "amdgpu_reset.h"
#include "amdgpu_xcp.h"
#include "amdgpu_xgmi.h"
#include "amdgpu_mes.h"
#include "nvd.h"
/* delay 0.1 second to enable gfx off feature */
@@ -1194,6 +1195,75 @@ failed_kiq_write:
dev_err(adev->dev, "failed to write reg:%x\n", reg);
}
int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev)
{
signed long r, cnt = 0;
unsigned long flags;
uint32_t seq;
struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
struct amdgpu_ring *ring = &kiq->ring;
if (amdgpu_device_skip_hw_access(adev))
return 0;
if (adev->enable_mes_kiq && adev->mes.ring[0].sched.ready)
return amdgpu_mes_hdp_flush(adev);
if (!ring->funcs->emit_hdp_flush) {
return -EOPNOTSUPP;
}
spin_lock_irqsave(&kiq->ring_lock, flags);
r = amdgpu_ring_alloc(ring, 32);
if (r)
goto failed_unlock;
amdgpu_ring_emit_hdp_flush(ring);
r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
if (r)
goto failed_undo;
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
/* don't wait anymore for gpu reset case because this way may
* block gpu_recover() routine forever, e.g. this virt_kiq_rreg
* is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
* never return if we keep waiting in virt_kiq_rreg, which cause
* gpu_recover() hang there.
*
* also don't wait anymore for IRQ context
* */
if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
goto failed_kiq_hdp_flush;
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
if (amdgpu_in_reset(adev))
goto failed_kiq_hdp_flush;
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
if (cnt > MAX_KIQ_REG_TRY) {
dev_err(adev->dev, "failed to flush HDP via KIQ timeout\n");
return -ETIMEDOUT;
}
return 0;
failed_undo:
amdgpu_ring_undo(ring);
failed_unlock:
spin_unlock_irqrestore(&kiq->ring_lock, flags);
failed_kiq_hdp_flush:
dev_err(adev->dev, "failed to flush HDP via KIQ\n");
return r < 0 ? r : -EIO;
}
int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
{
if (amdgpu_num_kcq == -1) {
@@ -2485,3 +2555,4 @@ void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
&amdgpu_debugfs_compute_sched_mask_fops);
#endif
}