drm/amdkfd: Enable GFX11 usermode queue oversubscription

Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped to
GART for usermode queues in order to support oversubscription. In the
case that work is submitted to an unmapped queue, MES must have a GART
wptr address to determine whether the queue should be mapped.

This change is accompanied with changes in MES and is applicable for
MES_API_VERSION >= 2.

v3:
- Use amdgpu_vm_bo_lookup_mapping for wptr_bo mapping lookup
- Move wptr_bo refcount increment to amdgpu_amdkfd_map_gtt_bo_to_gart
- Remove list_del_init from amdgpu_amdkfd_map_gtt_bo_to_gart
- Cleanup/fix create_queue wptr_bo error handling
v4:
- Add MES version shift/mask defines to amdgpu_mes.h
- Change version check from MES_VERSION to MES_API_VERSION
- Add check in kfd_ioctl_create_queue before wptr bo pin/GART map to
ensure bo is a single page.

Signed-off-by: Graham Sider <Graham.Sider@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Graham Sider
2022-05-11 12:33:35 -04:00
committed by Alex Deucher
parent ff83e6e7ab
commit e77a541f5d
8 changed files with 125 additions and 8 deletions

View File

@@ -180,7 +180,8 @@ void pqm_uninit(struct process_queue_manager *pqm)
static int init_user_queue(struct process_queue_manager *pqm,
struct kfd_dev *dev, struct queue **q,
struct queue_properties *q_properties,
struct file *f, unsigned int qid)
struct file *f, struct amdgpu_bo *wptr_bo,
unsigned int qid)
{
int retval;
@@ -210,6 +211,7 @@ static int init_user_queue(struct process_queue_manager *pqm,
goto cleanup;
}
memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
(*q)->wptr_bo = wptr_bo;
}
pr_debug("PQM After init queue");
@@ -226,6 +228,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct file *f,
struct queue_properties *properties,
unsigned int *qid,
struct amdgpu_bo *wptr_bo,
const struct kfd_criu_queue_priv_data *q_data,
const void *restore_mqd,
const void *restore_ctl_stack,
@@ -288,7 +291,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
* allocate_sdma_queue() in create_queue() has the
* corresponding check logic.
*/
retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -309,7 +312,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -435,9 +438,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
pdd->qpd.num_gws = 0;
}
if (dev->shared_resources.enable_mes)
if (dev->shared_resources.enable_mes) {
amdgpu_amdkfd_free_gtt_mem(dev->adev,
pqn->q->gang_ctx_bo);
if (pqn->q->wptr_bo)
amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
}
kfd_procfs_del_queue(pqn->q);
uninit_queue(pqn->q);
}
@@ -844,7 +851,7 @@ int kfd_criu_restore_queue(struct kfd_process *p,
print_queue_properties(&qp);
ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, q_data, mqd, ctl_stack,
ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack,
NULL);
if (ret) {
pr_err("Failed to create new queue err:%d\n", ret);