mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 14:53:58 -04:00
drm/amdgpu: avoid dump mca bank log muti times during ras ISR
because the ue valid mca count will only be cleared after gpu reset, so only dump mca log on the first time to get mca bank after receive RAS interrupt. Signed-off-by: Yang Wang <kevinyang.wang@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
@@ -229,6 +229,8 @@ int amdgpu_mca_init(struct amdgpu_device *adev)
|
||||
struct mca_bank_cache *mca_cache;
|
||||
int i;
|
||||
|
||||
atomic_set(&mca->ue_update_flag, 0);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(mca->mca_caches); i++) {
|
||||
mca_cache = &mca->mca_caches[i];
|
||||
mutex_init(&mca_cache->lock);
|
||||
@@ -244,6 +246,8 @@ void amdgpu_mca_fini(struct amdgpu_device *adev)
|
||||
struct mca_bank_cache *mca_cache;
|
||||
int i;
|
||||
|
||||
atomic_set(&mca->ue_update_flag, 0);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(mca->mca_caches); i++) {
|
||||
mca_cache = &mca->mca_caches[i];
|
||||
amdgpu_mca_bank_set_release(&mca_cache->mca_set);
|
||||
@@ -325,6 +329,26 @@ static int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_
|
||||
return mca_funcs->mca_get_mca_entry(adev, type, idx, entry);
|
||||
}
|
||||
|
||||
static bool amdgpu_mca_bank_should_update(struct amdgpu_device *adev, enum amdgpu_mca_error_type type)
|
||||
{
|
||||
struct amdgpu_mca *mca = &adev->mca;
|
||||
bool ret = true;
|
||||
|
||||
/*
|
||||
* Because the UE Valid MCA count will only be cleared after reset,
|
||||
* in order to avoid repeated counting of the error count,
|
||||
* the aca bank is only updated once during the gpu recovery stage.
|
||||
*/
|
||||
if (type == AMDGPU_MCA_ERROR_TYPE_UE) {
|
||||
if (amdgpu_ras_intr_triggered())
|
||||
ret = atomic_cmpxchg(&mca->ue_update_flag, 0, 1) == 0;
|
||||
else
|
||||
atomic_set(&mca->ue_update_flag, 0);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set,
|
||||
struct ras_query_context *qctx)
|
||||
{
|
||||
@@ -335,6 +359,9 @@ static int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_mc
|
||||
if (!mca_set)
|
||||
return -EINVAL;
|
||||
|
||||
if (!amdgpu_mca_bank_should_update(adev, type))
|
||||
return 0;
|
||||
|
||||
ret = amdgpu_mca_smu_get_valid_mca_count(adev, type, &count);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
Reference in New Issue
Block a user