drm/amdgpu: suspend gfx userqueues

This patch adds suspend support for gfx userqueues. It typically does
the following:
- adds an enable_signaling function for the eviction fence, so that it
  can trigger the userqueue suspend,
- adds a delayed work to handle suspending of the eviction_fence
- adds a suspend function to handle suspending of userqueues which
  suspends all the queues under this userq manager and signals the
  eviction fence,
- adds a function to replace the old eviction fence with a new one and
  attach it to each of the objects,
- adds reference of userq manager in the eviction fence container so
  that it can be used in the suspend function.

V2: Addressed Christian's review comments:
    - schedule suspend work immediately

V4: Addressed Christian's review comments:
    - wait for pending uq fences before starting suspend, added
      queue->last_fence for the same
    - accommodate ev_fence_mgr into existing code
    - some bug fixes and NULL checks

V5: Addressed Christian's review comments (gitlab)
    - Wait for eviction fence to get signaled in destroy,
      don't signal it
    - Wait for eviction fence to get signaled in replace fence,
      don't signal it

V6: Addressed Christian's review comments
    - Do not destroy the old eviction fence until we have it replaced
    - Change the sequence of fence replacement sub-tasks
    - reusing the ev_fence delayed work for userqueue suspend as well
      (Shashank).

V7: Addressed Christian's review comments
    - give evf_mgr as argument (instead of fpriv) to replace_fence()
    - save ptr to evf_mgr in ev_fence (instead of uq_mgr)
    - modify suspend_all_queues logic to reflect error properly
    - remove the garbage drm_exec_lock section in wait_for_signal
    - grab the userqueue mutex before starting the wait for fence
    - remove the unrelated gobj check from signal_ioctl

V8: Added race condition fixes

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Acked-by: Christian Koenig <christian.koenig@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Shashank Sharma
2024-11-20 18:59:49 +01:00
committed by Alex Deucher
parent 30e4d78138
commit b0328087c1
5 changed files with 276 additions and 11 deletions

View File

@@ -60,6 +60,16 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr,
{
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
struct dma_fence *f = queue->last_fence;
int ret;
if (f && !dma_fence_is_signaled(f)) {
ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
if (ret <= 0) {
DRM_ERROR("Timed out waiting for fence f=%p\n", f);
return;
}
}
uq_funcs->mqd_destroy(uq_mgr, queue);
amdgpu_userq_fence_driver_free(queue);
@@ -67,6 +77,22 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr,
kfree(queue);
}
int
amdgpu_userqueue_active(struct amdgpu_userq_mgr *uq_mgr)
{
struct amdgpu_usermode_queue *queue;
int queue_id;
int ret = 0;
mutex_lock(&uq_mgr->userq_mutex);
/* Resume all the queues for this process */
idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id)
ret += queue->queue_active;
mutex_unlock(&uq_mgr->userq_mutex);
return ret;
}
#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
static struct amdgpu_usermode_queue *
amdgpu_userqueue_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
@@ -202,6 +228,7 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id)
amdgpu_bo_unpin(queue->db_obj.obj);
amdgpu_bo_unref(&queue->db_obj.obj);
amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id);
uq_mgr->num_userqs--;
mutex_unlock(&uq_mgr->userq_mutex);
return 0;
}
@@ -277,6 +304,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args)
goto unlock;
}
args->out.queue_id = qid;
uq_mgr->num_userqs++;
unlock:
mutex_unlock(&uq_mgr->userq_mutex);
@@ -317,11 +345,93 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
}
#endif
static int
amdgpu_userqueue_suspend_all(struct amdgpu_userq_mgr *uq_mgr)
{
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *userq_funcs;
struct amdgpu_usermode_queue *queue;
int queue_id;
int ret = 0;
userq_funcs = adev->userq_funcs[AMDGPU_HW_IP_GFX];
/* Try to suspend all the queues in this process ctx */
idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id)
ret += userq_funcs->suspend(uq_mgr, queue);
if (ret)
DRM_ERROR("Couldn't suspend all the queues\n");
return ret;
}
static int
amdgpu_userqueue_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
{
struct amdgpu_usermode_queue *queue;
int queue_id, ret;
idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
struct dma_fence *f = queue->last_fence;
if (!f || dma_fence_is_signaled(f))
continue;
ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
if (ret <= 0) {
DRM_ERROR("Timed out waiting for fence f=%p\n", f);
return -ETIMEDOUT;
}
}
return 0;
}
void
amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr)
{
int ret;
struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
mutex_lock(&uq_mgr->userq_mutex);
/* Wait for any pending userqueue fence to signal */
ret = amdgpu_userqueue_wait_for_signal(uq_mgr);
if (ret) {
DRM_ERROR("Not suspending userqueue, timeout waiting for work\n");
goto unlock;
}
ret = amdgpu_userqueue_suspend_all(uq_mgr);
if (ret) {
DRM_ERROR("Failed to evict userqueue\n");
goto unlock;
}
/* Signal current eviction fence */
amdgpu_eviction_fence_signal(evf_mgr);
/* Cleanup old eviction fence entry */
amdgpu_eviction_fence_destroy(evf_mgr);
unlock:
mutex_unlock(&uq_mgr->userq_mutex);
}
int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_device *adev)
{
struct amdgpu_fpriv *fpriv;
mutex_init(&userq_mgr->userq_mutex);
idr_init_base(&userq_mgr->userq_idr, 1);
userq_mgr->adev = adev;
userq_mgr->num_userqs = 0;
fpriv = uq_mgr_to_fpriv(userq_mgr);
if (!fpriv->evf_mgr.ev_fence) {
DRM_ERROR("Eviction fence not initialized yet\n");
return -EINVAL;
}
return 0;
}