linux/drivers/gpu/drm/xe/xe_bb.c

// SPDX-License-Identifier: MIT
/*
 * Copyright © 2022 Intel Corporation
 */

#include "xe_bb.h"

#include "instructions/xe_mi_commands.h"
#include "xe_assert.h"
#include "xe_device_types.h"
#include "xe_exec_queue_types.h"
#include "xe_gt.h"
#include "xe_sa.h"
#include "xe_sched_job.h"
#include "xe_vm_types.h"

static int bb_prefetch(struct xe_gt *gt)
{
	struct xe_device *xe = gt_to_xe(gt);

	if (GRAPHICS_VERx100(xe) >= 1250 && xe_gt_is_main_type(gt))
		/*
		 * RCS and CCS require 1K, although other engines would be
		 * okay with 512.
		 */
		return SZ_1K;
	else
		return SZ_512;
}

struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
{
	struct xe_tile *tile = gt_to_tile(gt);
	struct xe_bb *bb = kmalloc_obj(*bb);
	int err;

	if (!bb)
		return ERR_PTR(-ENOMEM);

	/*
	 * We need to allocate space for the requested number of dwords,
	 * one additional MI_BATCH_BUFFER_END dword, and additional buffer
	 * space to accommodate the platform-specific hardware prefetch
	 * requirements.
	 */
	bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool,
			      4 * (dwords + 1) + bb_prefetch(gt));
	if (IS_ERR(bb->bo)) {
		err = PTR_ERR(bb->bo);
		goto err;
	}

	bb->cs = xe_sa_bo_cpu_addr(bb->bo);
	bb->len = 0;

	return bb;
err:
	kfree(bb);
	return ERR_PTR(err);
}

struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords,
			    enum xe_sriov_vf_ccs_rw_ctxs ctx_id)
{
	struct xe_bb *bb = kmalloc_obj(*bb);
	struct xe_device *xe = gt_to_xe(gt);
	struct xe_sa_manager *bb_pool;
	int err;

	if (!bb)
		return ERR_PTR(-ENOMEM);
	/*
	 * We need to allocate space for the requested number of dwords &
	 * one additional MI_BATCH_BUFFER_END dword. Since the whole SA
	 * is submitted to HW, we need to make sure that the last instruction
	 * is not over written when the last chunk of SA is allocated for BB.
	 * So, this extra DW acts as a guard here.
	 */

	bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool;
	bb->bo = xe_sa_bo_new(bb_pool, 4 * (dwords + 1));

	if (IS_ERR(bb->bo)) {
		err = PTR_ERR(bb->bo);
		goto err;
	}

	bb->cs = xe_sa_bo_cpu_addr(bb->bo);
	bb->len = 0;

	return bb;
err:
	kfree(bb);
	return ERR_PTR(err);
}

static struct xe_sched_job *
__xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr)
{
	u32 size = drm_suballoc_size(bb->bo);

	if (bb->len == 0 || bb->cs[bb->len - 1] != MI_BATCH_BUFFER_END)
		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;

	xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size);

	xe_sa_bo_flush_write(bb->bo);

	return xe_sched_job_create(q, addr);
}

struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
						struct xe_bb *bb,
						u64 batch_base_ofs,
						u32 second_idx)
{
	u64 addr[2] = {
		batch_base_ofs + drm_suballoc_soffset(bb->bo),
		batch_base_ofs + drm_suballoc_soffset(bb->bo) +
		4 * second_idx,
	};

	xe_gt_assert(q->gt, second_idx <= bb->len);
	xe_gt_assert(q->gt, xe_sched_job_is_migration(q));
	xe_gt_assert(q->gt, q->width == 1);

	return __xe_bb_create_job(q, bb, addr);
}

struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
				      struct xe_bb *bb)
{
	u64 addr = xe_sa_bo_gpu_addr(bb->bo);

	xe_gt_assert(q->gt, !xe_sched_job_is_migration(q));
	xe_gt_assert(q->gt, q->width == 1);
	return __xe_bb_create_job(q, bb, &addr);
}

void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence)
{
	if (!bb)
		return;

	xe_sa_bo_free(bb->bo, fence);
	kfree(bb);
}