drm/xe: Decouple bind queue last fence from TLB invalidations

Separate the bind queue’s last fence to apply exclusively to the bind
job, avoiding unnecessary serialization on prior TLB invalidations.
Preserve correct user fence signaling by merging bind and TLB
invalidation fences later in the pipeline.

v3:
 - Fix lockdep assert for migrate queues (CI)
 - Use individual dma fence contexts for array out fences (Testing)
 - Don't set last fence with arrays (Testing)
 - Move TLB invalid last fence under migrate lock (Testing)
 - Don't set queue last for migrate queues (Testing)

Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6047
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patch.msgid.link/20251031234050.3043507-4-matthew.brost@intel.com
This commit is contained in:
Matthew Brost
2025-10-31 16:40:47 -07:00
parent b2d7ec41f2
commit cb99e12ba8
6 changed files with 143 additions and 110 deletions

View File

@@ -1623,9 +1623,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
}
}
if (number_tiles > 1)
vm->composite_fence_ctx = dma_fence_context_alloc(1);
if (xef && xe->info.has_asid) {
u32 asid;
@@ -3107,20 +3104,26 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
struct dma_fence *fence = NULL;
struct dma_fence **fences = NULL;
struct dma_fence_array *cf = NULL;
int number_tiles = 0, current_fence = 0, err;
int number_tiles = 0, current_fence = 0, n_fence = 0, err;
u8 id;
number_tiles = vm_ops_setup_tile_args(vm, vops);
if (number_tiles == 0)
return ERR_PTR(-ENODATA);
if (number_tiles > 1) {
fences = kmalloc_array(number_tiles, sizeof(*fences),
GFP_KERNEL);
if (!fences) {
fence = ERR_PTR(-ENOMEM);
goto err_trace;
}
for_each_tile(tile, vm->xe, id)
n_fence += (1 + XE_MAX_GT_PER_TILE);
fences = kmalloc_array(n_fence, sizeof(*fences), GFP_KERNEL);
if (!fences) {
fence = ERR_PTR(-ENOMEM);
goto err_trace;
}
cf = dma_fence_array_alloc(n_fence);
if (!cf) {
fence = ERR_PTR(-ENOMEM);
goto err_out;
}
for_each_tile(tile, vm->xe, id) {
@@ -3137,29 +3140,30 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
trace_xe_vm_ops_execute(vops);
for_each_tile(tile, vm->xe, id) {
struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q;
int i;
fence = NULL;
if (!vops->pt_update_ops[id].num_ops)
continue;
goto collect_fences;
fence = xe_pt_update_ops_run(tile, vops);
if (IS_ERR(fence))
goto err_out;
if (fences)
fences[current_fence++] = fence;
collect_fences:
fences[current_fence++] = fence ?: dma_fence_get_stub();
xe_migrate_job_lock(tile->migrate, q);
for_each_tlb_inval(i)
fences[current_fence++] =
xe_exec_queue_tlb_inval_last_fence_get(q, vm, i);
xe_migrate_job_unlock(tile->migrate, q);
}
if (fences) {
cf = dma_fence_array_create(number_tiles, fences,
vm->composite_fence_ctx,
vm->composite_fence_seqno++,
false);
if (!cf) {
--vm->composite_fence_seqno;
fence = ERR_PTR(-ENOMEM);
goto err_out;
}
fence = &cf->base;
}
xe_assert(vm->xe, current_fence == n_fence);
dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1),
1, false);
fence = &cf->base;
for_each_tile(tile, vm->xe, id) {
if (!vops->pt_update_ops[id].num_ops)
@@ -3220,7 +3224,6 @@ static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
struct dma_fence *fence)
{
struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
struct xe_user_fence *ufence;
struct xe_vma_op *op;
int i;
@@ -3241,7 +3244,6 @@ static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
if (fence) {
for (i = 0; i < vops->num_syncs; i++)
xe_sync_entry_signal(vops->syncs + i, fence);
xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
}
}
@@ -3435,19 +3437,19 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
struct xe_sync_entry *syncs,
int num_syncs)
{
struct dma_fence *fence;
struct dma_fence *fence = NULL;
int i, err = 0;
fence = xe_sync_in_fence_get(syncs, num_syncs,
to_wait_exec_queue(vm, q), vm);
if (IS_ERR(fence))
return PTR_ERR(fence);
if (num_syncs) {
fence = xe_sync_in_fence_get(syncs, num_syncs,
to_wait_exec_queue(vm, q), vm);
if (IS_ERR(fence))
return PTR_ERR(fence);
for (i = 0; i < num_syncs; i++)
xe_sync_entry_signal(&syncs[i], fence);
for (i = 0; i < num_syncs; i++)
xe_sync_entry_signal(&syncs[i], fence);
}
xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
fence);
dma_fence_put(fence);
return err;