mirror of
https://github.com/torvalds/linux.git
synced 2026-04-21 08:13:56 -04:00
drm/amdgpu: protect RAS sysfs during GPU reset
MMHub EDC becomes dirty after BACO reset EDC registers should be cleared early on in reset phase Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: John Clements <john.clements@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
committed by
Alex Deucher
parent
cb7adfd6ad
commit
43c4d57618
@@ -2742,6 +2742,9 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
|
||||
|
||||
if (adev->asic_reset_res)
|
||||
goto fail;
|
||||
|
||||
if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
|
||||
adev->mmhub.funcs->reset_ras_error_count(adev);
|
||||
} else {
|
||||
|
||||
task_barrier_full(&hive->tb);
|
||||
@@ -3910,8 +3913,15 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
|
||||
}
|
||||
}
|
||||
|
||||
if (!r && amdgpu_ras_intr_triggered())
|
||||
if (!r && amdgpu_ras_intr_triggered()) {
|
||||
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
|
||||
if (tmp_adev->mmhub.funcs &&
|
||||
tmp_adev->mmhub.funcs->reset_ras_error_count)
|
||||
tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
|
||||
}
|
||||
|
||||
amdgpu_ras_intr_cleared();
|
||||
}
|
||||
|
||||
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
|
||||
if (need_full_reset) {
|
||||
|
||||
Reference in New Issue
Block a user