mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 14:53:58 -04:00
drm/amdgpu/userq: refcount userqueues to avoid any race conditions
To avoid race condition and avoid UAF cases, implement kref
based queues and protect the below operations using xa lock
a. Getting a queue from xarray
b. Increment/Decrement it's refcount
Every time some one want to access a queue, always get via
amdgpu_userq_get to make sure we have locks in place and get
the object if active.
A userqueue is destroyed on the last refcount is dropped which
typically would be via IOCTL or during fini.
v2: Add the missing drop in one the condition in the signal ioclt [Alex]
v3: remove the queue from the xarray first in the free queue ioctl path
[Christian]
- Pass queue to the amdgpu_userq_put directly.
- make amdgpu_userq_put xa_lock free since we are doing put for each get
only and final put is done via destroy and we remove the queue from xa
with lock.
- use userq_put in fini too so cleanup is done fully.
v4: Use xa_erase directly rather than doing load and erase in free
ioctl. Also remove some of the error logs which could be exploited
by the user to flood the logs [Christian]
Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 4952189b28)
Cc: <stable@vger.kernel.org> # 048c1c4e51: drm/amdgpu/userq: Consolidate wait ioctl exit path
Cc: <stable@vger.kernel.org>
This commit is contained in:
committed by
Alex Deucher
parent
048c1c4e51
commit
65b5c326ce
@@ -446,8 +446,7 @@ static int amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue,
|
||||
int queue_id)
|
||||
static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue)
|
||||
{
|
||||
struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
|
||||
struct amdgpu_device *adev = uq_mgr->adev;
|
||||
@@ -461,7 +460,6 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue,
|
||||
uq_funcs->mqd_destroy(queue);
|
||||
amdgpu_userq_fence_driver_free(queue);
|
||||
/* Use interrupt-safe locking since IRQ handlers may access these XArrays */
|
||||
xa_erase_irq(&uq_mgr->userq_xa, (unsigned long)queue_id);
|
||||
xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index);
|
||||
queue->userq_mgr = NULL;
|
||||
list_del(&queue->userq_va_list);
|
||||
@@ -470,12 +468,6 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue,
|
||||
up_read(&adev->reset_domain->sem);
|
||||
}
|
||||
|
||||
static struct amdgpu_usermode_queue *
|
||||
amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
|
||||
{
|
||||
return xa_load(&uq_mgr->userq_xa, qid);
|
||||
}
|
||||
|
||||
void
|
||||
amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_eviction_fence_mgr *evf_mgr)
|
||||
@@ -625,22 +617,13 @@ unref_bo:
|
||||
}
|
||||
|
||||
static int
|
||||
amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
|
||||
amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue)
|
||||
{
|
||||
struct amdgpu_fpriv *fpriv = filp->driver_priv;
|
||||
struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
|
||||
struct amdgpu_device *adev = uq_mgr->adev;
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
int r = 0;
|
||||
|
||||
cancel_delayed_work_sync(&uq_mgr->resume_work);
|
||||
mutex_lock(&uq_mgr->userq_mutex);
|
||||
queue = amdgpu_userq_find(uq_mgr, queue_id);
|
||||
if (!queue) {
|
||||
drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n");
|
||||
mutex_unlock(&uq_mgr->userq_mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
amdgpu_userq_wait_for_last_fence(queue);
|
||||
/* Cancel any pending hang detection work and cleanup */
|
||||
if (queue->hang_detect_fence) {
|
||||
@@ -672,7 +655,7 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
|
||||
drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n");
|
||||
queue->state = AMDGPU_USERQ_STATE_HUNG;
|
||||
}
|
||||
amdgpu_userq_cleanup(queue, queue_id);
|
||||
amdgpu_userq_cleanup(queue);
|
||||
mutex_unlock(&uq_mgr->userq_mutex);
|
||||
|
||||
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
|
||||
@@ -680,6 +663,37 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
|
||||
return r;
|
||||
}
|
||||
|
||||
static void amdgpu_userq_kref_destroy(struct kref *kref)
|
||||
{
|
||||
int r;
|
||||
struct amdgpu_usermode_queue *queue =
|
||||
container_of(kref, struct amdgpu_usermode_queue, refcount);
|
||||
struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
|
||||
|
||||
r = amdgpu_userq_destroy(uq_mgr, queue);
|
||||
if (r)
|
||||
drm_file_err(uq_mgr->file, "Failed to destroy usermode queue %d\n", r);
|
||||
}
|
||||
|
||||
struct amdgpu_usermode_queue *amdgpu_userq_get(struct amdgpu_userq_mgr *uq_mgr, u32 qid)
|
||||
{
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
|
||||
xa_lock(&uq_mgr->userq_xa);
|
||||
queue = xa_load(&uq_mgr->userq_xa, qid);
|
||||
if (queue)
|
||||
kref_get(&queue->refcount);
|
||||
xa_unlock(&uq_mgr->userq_xa);
|
||||
|
||||
return queue;
|
||||
}
|
||||
|
||||
void amdgpu_userq_put(struct amdgpu_usermode_queue *queue)
|
||||
{
|
||||
if (queue)
|
||||
kref_put(&queue->refcount, amdgpu_userq_kref_destroy);
|
||||
}
|
||||
|
||||
static int amdgpu_userq_priority_permit(struct drm_file *filp,
|
||||
int priority)
|
||||
{
|
||||
@@ -834,6 +848,9 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* drop this refcount during queue destroy */
|
||||
kref_init(&queue->refcount);
|
||||
|
||||
/* Wait for mode-1 reset to complete */
|
||||
down_read(&adev->reset_domain->sem);
|
||||
r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL));
|
||||
@@ -985,7 +1002,9 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *filp)
|
||||
{
|
||||
union drm_amdgpu_userq *args = data;
|
||||
int r;
|
||||
struct amdgpu_fpriv *fpriv = filp->driver_priv;
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
int r = 0;
|
||||
|
||||
if (!amdgpu_userq_enabled(dev))
|
||||
return -ENOTSUPP;
|
||||
@@ -1000,11 +1019,16 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
|
||||
drm_file_err(filp, "Failed to create usermode queue\n");
|
||||
break;
|
||||
|
||||
case AMDGPU_USERQ_OP_FREE:
|
||||
r = amdgpu_userq_destroy(filp, args->in.queue_id);
|
||||
if (r)
|
||||
drm_file_err(filp, "Failed to destroy usermode queue\n");
|
||||
case AMDGPU_USERQ_OP_FREE: {
|
||||
xa_lock(&fpriv->userq_mgr.userq_xa);
|
||||
queue = __xa_erase(&fpriv->userq_mgr.userq_xa, args->in.queue_id);
|
||||
xa_unlock(&fpriv->userq_mgr.userq_xa);
|
||||
if (!queue)
|
||||
return -ENOENT;
|
||||
|
||||
amdgpu_userq_put(queue);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op);
|
||||
@@ -1023,16 +1047,23 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
|
||||
|
||||
/* Resume all the queues for this process */
|
||||
xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
|
||||
queue = amdgpu_userq_get(uq_mgr, queue_id);
|
||||
if (!queue)
|
||||
continue;
|
||||
|
||||
if (!amdgpu_userq_buffer_vas_mapped(queue)) {
|
||||
drm_file_err(uq_mgr->file,
|
||||
"trying restore queue without va mapping\n");
|
||||
queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
|
||||
amdgpu_userq_put(queue);
|
||||
continue;
|
||||
}
|
||||
|
||||
r = amdgpu_userq_restore_helper(queue);
|
||||
if (r)
|
||||
ret = r;
|
||||
|
||||
amdgpu_userq_put(queue);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
@@ -1266,9 +1297,13 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
|
||||
amdgpu_userq_detect_and_reset_queues(uq_mgr);
|
||||
/* Try to unmap all the queues in this process ctx */
|
||||
xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
|
||||
queue = amdgpu_userq_get(uq_mgr, queue_id);
|
||||
if (!queue)
|
||||
continue;
|
||||
r = amdgpu_userq_preempt_helper(queue);
|
||||
if (r)
|
||||
ret = r;
|
||||
amdgpu_userq_put(queue);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
@@ -1301,16 +1336,24 @@ amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
|
||||
int ret;
|
||||
|
||||
xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
|
||||
queue = amdgpu_userq_get(uq_mgr, queue_id);
|
||||
if (!queue)
|
||||
continue;
|
||||
|
||||
struct dma_fence *f = queue->last_fence;
|
||||
|
||||
if (!f || dma_fence_is_signaled(f))
|
||||
if (!f || dma_fence_is_signaled(f)) {
|
||||
amdgpu_userq_put(queue);
|
||||
continue;
|
||||
}
|
||||
ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
|
||||
if (ret <= 0) {
|
||||
drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
|
||||
f->context, f->seqno);
|
||||
amdgpu_userq_put(queue);
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
amdgpu_userq_put(queue);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -1361,20 +1404,23 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f
|
||||
void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
|
||||
{
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
unsigned long queue_id;
|
||||
unsigned long queue_id = 0;
|
||||
|
||||
cancel_delayed_work_sync(&userq_mgr->resume_work);
|
||||
for (;;) {
|
||||
xa_lock(&userq_mgr->userq_xa);
|
||||
queue = xa_find(&userq_mgr->userq_xa, &queue_id, ULONG_MAX,
|
||||
XA_PRESENT);
|
||||
if (queue)
|
||||
__xa_erase(&userq_mgr->userq_xa, queue_id);
|
||||
xa_unlock(&userq_mgr->userq_xa);
|
||||
|
||||
mutex_lock(&userq_mgr->userq_mutex);
|
||||
amdgpu_userq_detect_and_reset_queues(userq_mgr);
|
||||
xa_for_each(&userq_mgr->userq_xa, queue_id, queue) {
|
||||
amdgpu_userq_wait_for_last_fence(queue);
|
||||
amdgpu_userq_unmap_helper(queue);
|
||||
amdgpu_userq_cleanup(queue, queue_id);
|
||||
if (!queue)
|
||||
break;
|
||||
|
||||
amdgpu_userq_put(queue);
|
||||
}
|
||||
|
||||
xa_destroy(&userq_mgr->userq_xa);
|
||||
mutex_unlock(&userq_mgr->userq_mutex);
|
||||
mutex_destroy(&userq_mgr->userq_mutex);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user