mirror of
https://github.com/torvalds/linux.git
synced 2026-04-19 15:24:02 -04:00
drm/scheduler: rework job destruction
We now destroy finished jobs from the worker thread to make sure that we never destroy a job currently in timeout processing. By this we avoid holding lock around ring mirror list in drm_sched_stop which should solve a deadlock reported by a user. v2: Remove unused variable. v4: Move guilty job free into sched code. v5: Move sched->hw_rq_count to drm_sched_start to account for counter decrement in drm_sched_stop even when we don't call resubmit jobs if guily job did signal. v6: remove unused variable Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109692 Acked-by: Chunming Zhou <david1.zhou@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/1555599624-12285-3-git-send-email-andrey.grodzovsky@amd.com
This commit is contained in:
committed by
Alex Deucher
parent
b3198c38f0
commit
5918045c4e
@@ -3334,7 +3334,7 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
|
||||
if (!ring || !ring->sched.thread)
|
||||
continue;
|
||||
|
||||
drm_sched_stop(&ring->sched);
|
||||
drm_sched_stop(&ring->sched, &job->base);
|
||||
|
||||
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
|
||||
amdgpu_fence_driver_force_completion(ring);
|
||||
@@ -3343,8 +3343,6 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
|
||||
if(job)
|
||||
drm_sched_increase_karma(&job->base);
|
||||
|
||||
|
||||
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
|
||||
if (!need_full_reset)
|
||||
@@ -3482,8 +3480,7 @@ end:
|
||||
return r;
|
||||
}
|
||||
|
||||
static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
|
||||
struct amdgpu_job *job)
|
||||
static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -3623,7 +3620,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
|
||||
|
||||
/* Post ASIC reset for all devs .*/
|
||||
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
|
||||
amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL);
|
||||
amdgpu_device_post_asic_reset(tmp_adev);
|
||||
|
||||
if (r) {
|
||||
/* bad news, how to tell it to userspace ? */
|
||||
|
||||
Reference in New Issue
Block a user