mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 14:53:58 -04:00
drm/amdgpu: suspend ras module before gpu reset
During gpu reset, all GPU-related resources are inaccessible. To avoid affecting ras functionality, suspend ras module before gpu reset and resume it after gpu reset is complete. V2: Rename functions to avoid misunderstanding. V3: Move flush_delayed_work to amdgpu_ras_process_pause, Move schedule_delayed_work to amdgpu_ras_process_unpause. V4: Rename functions. V5: Move the function to amdgpu_ras.c. Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Acked-by: Lijo Lazar <lijo.lazar@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
committed by
Alex Deucher
parent
d4432f16d3
commit
d95ca7f515
@@ -2921,8 +2921,12 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
|
||||
type = amdgpu_ras_get_fatal_error_event(adev);
|
||||
list_for_each_entry(remote_adev,
|
||||
device_list_handle, gmc.xgmi.head) {
|
||||
amdgpu_ras_query_err_status(remote_adev);
|
||||
amdgpu_ras_log_on_err_counter(remote_adev, type);
|
||||
if (amdgpu_uniras_enabled(remote_adev)) {
|
||||
amdgpu_ras_mgr_update_ras_ecc(remote_adev);
|
||||
} else {
|
||||
amdgpu_ras_query_err_status(remote_adev);
|
||||
amdgpu_ras_log_on_err_counter(remote_adev, type);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -5673,3 +5677,25 @@ bool amdgpu_ras_check_critical_address(struct amdgpu_device *adev, uint64_t addr
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void amdgpu_ras_pre_reset(struct amdgpu_device *adev,
|
||||
struct list_head *device_list)
|
||||
{
|
||||
struct amdgpu_device *tmp_adev = NULL;
|
||||
|
||||
list_for_each_entry(tmp_adev, device_list, reset_list) {
|
||||
if (amdgpu_uniras_enabled(tmp_adev))
|
||||
amdgpu_ras_mgr_pre_reset(tmp_adev);
|
||||
}
|
||||
}
|
||||
|
||||
void amdgpu_ras_post_reset(struct amdgpu_device *adev,
|
||||
struct list_head *device_list)
|
||||
{
|
||||
struct amdgpu_device *tmp_adev = NULL;
|
||||
|
||||
list_for_each_entry(tmp_adev, device_list, reset_list) {
|
||||
if (amdgpu_uniras_enabled(tmp_adev))
|
||||
amdgpu_ras_mgr_post_reset(tmp_adev);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user