Files
linux/drivers/accel/amdxdna/aie2_message.c
Max Zhen d76856beb4 accel/amdxdna: Refactor GEM BO handling and add helper APIs for address retrieval
Refactor amdxdna GEM buffer object (BO) handling to simplify address
management and unify BO type semantics.

Introduce helper APIs to retrieve commonly used BO addresses:
- User virtual address (UVA)
- Kernel virtual address (KVA)
- Device address (IOVA/PA)

These helpers centralize address lookup logic and avoid duplicating
BO-specific handling across submission and execution paths. This also
improves readability and reduces the risk of inconsistent address
handling in future changes.

As part of the refactor:
- Rename SHMEM BO type to SHARE to better reflect its usage.
- Merge CMD BO handling into SHARE, removing special-case logic for
  command buffers.
- Consolidate BO type handling paths to reduce code duplication and
  simplify maintenance.

No functional change is intended. The refactor prepares the driver for
future enhancements by providing a cleaner abstraction for BO address
management.

Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Max Zhen <max.zhen@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260320210615.1973016-1-lizhi.hou@amd.com
2026-03-20 22:12:49 -07:00

1234 lines
32 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
*/
#include <drm/amdxdna_accel.h>
#include <drm/drm_cache.h>
#include <drm/drm_device.h>
#include <drm/drm_gem.h>
#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_print.h>
#include <drm/gpu_scheduler.h>
#include <linux/bitfield.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/types.h>
#include <linux/xarray.h>
#include "aie2_msg_priv.h"
#include "aie2_pci.h"
#include "amdxdna_ctx.h"
#include "amdxdna_gem.h"
#include "amdxdna_mailbox.h"
#include "amdxdna_mailbox_helper.h"
#include "amdxdna_pci_drv.h"
#define DECLARE_AIE2_MSG(name, op) \
DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE)
#define EXEC_MSG_OPS(xdna) ((xdna)->dev_handle->exec_msg_ops)
static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
struct xdna_mailbox_msg *msg)
{
struct amdxdna_dev *xdna = ndev->xdna;
struct xdna_notify *hdl = msg->handle;
int ret;
if (!ndev->mgmt_chann)
return -ENODEV;
ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
if (ret == -ETIME)
aie2_destroy_mgmt_chann(ndev);
if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) {
XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
msg->opcode, *hdl->data);
ret = -EINVAL;
}
return ret;
}
void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
dma_addr_t *dma_addr)
{
struct amdxdna_dev *xdna = ndev->xdna;
int order;
*size = max(*size, SZ_8K);
order = get_order(*size);
if (order > MAX_PAGE_ORDER)
return ERR_PTR(-EINVAL);
*size = PAGE_SIZE << order;
if (amdxdna_iova_on(xdna))
return amdxdna_iommu_alloc(xdna, *size, dma_addr);
return dma_alloc_noncoherent(xdna->ddev.dev, *size, dma_addr,
DMA_FROM_DEVICE, GFP_KERNEL);
}
void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
void *cpu_addr, dma_addr_t dma_addr)
{
struct amdxdna_dev *xdna = ndev->xdna;
if (amdxdna_iova_on(xdna)) {
amdxdna_iommu_free(xdna, size, cpu_addr, dma_addr);
return;
}
dma_free_noncoherent(xdna->ddev.dev, size, cpu_addr, dma_addr, DMA_FROM_DEVICE);
}
int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev)
{
DECLARE_AIE2_MSG(suspend, MSG_OP_SUSPEND);
int ret;
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
if (ret) {
XDNA_ERR(ndev->xdna, "Failed to suspend fw, ret %d", ret);
return ret;
}
return aie2_psp_waitmode_poll(ndev->psp_hdl);
}
int aie2_resume_fw(struct amdxdna_dev_hdl *ndev)
{
DECLARE_AIE2_MSG(suspend, MSG_OP_RESUME);
return aie2_send_mgmt_msg_wait(ndev, &msg);
}
int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value)
{
DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG);
int ret;
req.type = type;
req.value = value;
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
if (ret) {
XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", ret);
return ret;
}
return 0;
}
int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value)
{
DECLARE_AIE2_MSG(get_runtime_cfg, MSG_OP_GET_RUNTIME_CONFIG);
int ret;
req.type = type;
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
if (ret) {
XDNA_ERR(ndev->xdna, "Failed to get runtime config, ret %d", ret);
return ret;
}
*value = resp.value;
return 0;
}
int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid)
{
DECLARE_AIE2_MSG(assign_mgmt_pasid, MSG_OP_ASSIGN_MGMT_PASID);
req.pasid = pasid;
return aie2_send_mgmt_msg_wait(ndev, &msg);
}
int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version)
{
DECLARE_AIE2_MSG(aie_version_info, MSG_OP_QUERY_AIE_VERSION);
struct amdxdna_dev *xdna = ndev->xdna;
int ret;
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
if (ret)
return ret;
XDNA_DBG(xdna, "Query AIE version - major: %u minor: %u completed",
resp.major, resp.minor);
version->major = resp.major;
version->minor = resp.minor;
return 0;
}
int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata)
{
DECLARE_AIE2_MSG(aie_tile_info, MSG_OP_QUERY_AIE_TILE_INFO);
int ret;
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
if (ret)
return ret;
metadata->size = resp.info.size;
metadata->cols = resp.info.cols;
metadata->rows = resp.info.rows;
metadata->version.major = resp.info.major;
metadata->version.minor = resp.info.minor;
metadata->core.row_count = resp.info.core_rows;
metadata->core.row_start = resp.info.core_row_start;
metadata->core.dma_channel_count = resp.info.core_dma_channels;
metadata->core.lock_count = resp.info.core_locks;
metadata->core.event_reg_count = resp.info.core_events;
metadata->mem.row_count = resp.info.mem_rows;
metadata->mem.row_start = resp.info.mem_row_start;
metadata->mem.dma_channel_count = resp.info.mem_dma_channels;
metadata->mem.lock_count = resp.info.mem_locks;
metadata->mem.event_reg_count = resp.info.mem_events;
metadata->shim.row_count = resp.info.shim_rows;
metadata->shim.row_start = resp.info.shim_row_start;
metadata->shim.dma_channel_count = resp.info.shim_dma_channels;
metadata->shim.lock_count = resp.info.shim_locks;
metadata->shim.event_reg_count = resp.info.shim_events;
return 0;
}
int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
struct amdxdna_fw_ver *fw_ver)
{
DECLARE_AIE2_MSG(firmware_version, MSG_OP_GET_FIRMWARE_VERSION);
int ret;
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
if (ret)
return ret;
fw_ver->major = resp.major;
fw_ver->minor = resp.minor;
fw_ver->sub = resp.sub;
fw_ver->build = resp.build;
return 0;
}
static int aie2_destroy_context_req(struct amdxdna_dev_hdl *ndev, u32 id)
{
DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT);
struct amdxdna_dev *xdna = ndev->xdna;
int ret;
req.context_id = id;
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
if (ret && ret != -ENODEV)
XDNA_WARN(xdna, "Destroy context failed, ret %d", ret);
else if (ret == -ENODEV)
XDNA_DBG(xdna, "Destroy context: device already stopped");
return ret;
}
static u32 aie2_get_context_priority(struct amdxdna_dev_hdl *ndev,
struct amdxdna_hwctx *hwctx)
{
if (!AIE2_FEATURE_ON(ndev, AIE2_PREEMPT))
return PRIORITY_HIGH;
switch (hwctx->qos.priority) {
case AMDXDNA_QOS_REALTIME_PRIORITY:
return PRIORITY_REALTIME;
case AMDXDNA_QOS_HIGH_PRIORITY:
return PRIORITY_HIGH;
case AMDXDNA_QOS_NORMAL_PRIORITY:
return PRIORITY_NORMAL;
case AMDXDNA_QOS_LOW_PRIORITY:
return PRIORITY_LOW;
default:
return PRIORITY_HIGH;
}
}
int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
{
DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT);
struct amdxdna_dev *xdna = ndev->xdna;
struct xdna_mailbox_chann_res x2i;
struct xdna_mailbox_chann_res i2x;
struct cq_pair *cq_pair;
u32 intr_reg;
int ret;
req.aie_type = 1;
req.start_col = hwctx->start_col;
req.num_col = hwctx->num_col;
req.num_unused_col = hwctx->num_unused_col;
req.num_cq_pairs_requested = 1;
req.pasid = amdxdna_pasid_on(hwctx->client) ? hwctx->client->pasid : 0;
req.context_priority = aie2_get_context_priority(ndev, hwctx);
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
if (ret)
return ret;
hwctx->fw_ctx_id = resp.context_id;
if (WARN_ON_ONCE(hwctx->fw_ctx_id == -1))
return -EINVAL;
if (ndev->force_preempt_enabled) {
ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FORCE_PREEMPT, &hwctx->fw_ctx_id);
if (ret) {
XDNA_ERR(xdna, "failed to enable force preempt %d", ret);
goto del_ctx_req;
}
}
cq_pair = &resp.cq_pair[0];
x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr);
x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr);
x2i.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->x2i_q.buf_addr);
x2i.rb_size = cq_pair->x2i_q.buf_size;
i2x.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.head_addr);
i2x.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.tail_addr);
i2x.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->i2x_q.buf_addr);
i2x.rb_size = cq_pair->i2x_q.buf_size;
ret = pci_irq_vector(to_pci_dev(xdna->ddev.dev), resp.msix_id);
if (ret == -EINVAL) {
XDNA_ERR(xdna, "Alloc IRQ failed %d", ret);
goto del_ctx_req;
}
intr_reg = i2x.mb_head_ptr_reg + 4;
hwctx->priv->mbox_chann = xdna_mailbox_alloc_channel(ndev->mbox);
if (!hwctx->priv->mbox_chann) {
XDNA_ERR(xdna, "Not able to create channel");
ret = -EINVAL;
goto del_ctx_req;
}
ret = xdna_mailbox_start_channel(hwctx->priv->mbox_chann, &x2i, &i2x,
intr_reg, ret);
if (ret) {
XDNA_ERR(xdna, "Not able to create channel");
ret = -EINVAL;
goto free_channel;
}
ndev->hwctx_num++;
XDNA_DBG(xdna, "Mailbox channel irq: %d, msix_id: %d", ret, resp.msix_id);
XDNA_DBG(xdna, "Created fw ctx %d pasid %d", hwctx->fw_ctx_id, hwctx->client->pasid);
return 0;
free_channel:
xdna_mailbox_free_channel(hwctx->priv->mbox_chann);
del_ctx_req:
aie2_destroy_context_req(ndev, hwctx->fw_ctx_id);
return ret;
}
int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
{
struct amdxdna_dev *xdna = ndev->xdna;
int ret;
if (!hwctx->priv->mbox_chann)
return 0;
xdna_mailbox_stop_channel(hwctx->priv->mbox_chann);
ret = aie2_destroy_context_req(ndev, hwctx->fw_ctx_id);
xdna_mailbox_free_channel(hwctx->priv->mbox_chann);
XDNA_DBG(xdna, "Destroyed fw ctx %d", hwctx->fw_ctx_id);
hwctx->priv->mbox_chann = NULL;
hwctx->fw_ctx_id = -1;
ndev->hwctx_num--;
return ret;
}
int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size)
{
DECLARE_AIE2_MSG(map_host_buffer, MSG_OP_MAP_HOST_BUFFER);
struct amdxdna_dev *xdna = ndev->xdna;
int ret;
req.context_id = context_id;
req.buf_addr = addr;
req.buf_size = size;
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
if (ret)
return ret;
XDNA_DBG(xdna, "fw ctx %d map host buf addr 0x%llx size 0x%llx",
context_id, addr, size);
return 0;
}
static int amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx, void *arg)
{
u32 *bitmap = arg;
*bitmap |= GENMASK(hwctx->start_col + hwctx->num_col - 1, hwctx->start_col);
return 0;
}
int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
u32 size, u32 *cols_filled)
{
DECLARE_AIE2_MSG(aie_column_info, MSG_OP_QUERY_COL_STATUS);
struct amdxdna_dev *xdna = ndev->xdna;
u32 buf_sz = size, aie_bitmap = 0;
struct amdxdna_client *client;
dma_addr_t dma_addr;
u8 *buff_addr;
int ret;
buff_addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
if (IS_ERR(buff_addr))
return PTR_ERR(buff_addr);
/* Go through each hardware context and mark the AIE columns that are active */
list_for_each_entry(client, &xdna->client_list, node)
amdxdna_hwctx_walk(client, &aie_bitmap, amdxdna_hwctx_col_map);
*cols_filled = 0;
req.dump_buff_addr = dma_addr;
req.dump_buff_size = buf_sz;
req.num_cols = hweight32(aie_bitmap);
req.aie_bitmap = aie_bitmap;
drm_clflush_virt_range(buff_addr, size); /* device can access */
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
if (ret) {
XDNA_ERR(xdna, "Error during NPU query, status %d", ret);
goto fail;
}
XDNA_DBG(xdna, "Query NPU status completed");
if (size < resp.size) {
ret = -EINVAL;
XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size);
goto fail;
}
if (copy_to_user(buf, buff_addr, resp.size)) {
ret = -EFAULT;
XDNA_ERR(xdna, "Failed to copy NPU status to user space");
goto fail;
}
*cols_filled = aie_bitmap;
fail:
aie2_free_msg_buffer(ndev, buf_sz, buff_addr, dma_addr);
return ret;
}
int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
char __user *buf, u32 size,
struct amdxdna_drm_query_telemetry_header *header)
{
DECLARE_AIE2_MSG(get_telemetry, MSG_OP_GET_TELEMETRY);
struct amdxdna_dev *xdna = ndev->xdna;
dma_addr_t dma_addr;
u32 buf_sz = size;
u8 *addr;
int ret;
if (header->type >= MAX_TELEMETRY_TYPE)
return -EINVAL;
addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
if (IS_ERR(addr))
return PTR_ERR(addr);
req.buf_addr = dma_addr;
req.buf_size = buf_sz;
req.type = header->type;
drm_clflush_virt_range(addr, size); /* device can access */
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
if (ret) {
XDNA_ERR(xdna, "Query telemetry failed, status %d", ret);
goto free_buf;
}
if (size < resp.size) {
ret = -EINVAL;
XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size);
goto free_buf;
}
if (copy_to_user(buf, addr, resp.size)) {
ret = -EFAULT;
XDNA_ERR(xdna, "Failed to copy telemetry to user space");
goto free_buf;
}
header->major = resp.major;
header->minor = resp.minor;
free_buf:
aie2_free_msg_buffer(ndev, buf_sz, addr, dma_addr);
return ret;
}
int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
void *handle, int (*cb)(void*, void __iomem *, size_t))
{
struct async_event_msg_req req = { 0 };
struct xdna_mailbox_msg msg = {
.send_data = (u8 *)&req,
.send_size = sizeof(req),
.handle = handle,
.opcode = MSG_OP_REGISTER_ASYNC_EVENT_MSG,
.notify_cb = cb,
};
req.buf_addr = addr;
req.buf_size = size;
XDNA_DBG(ndev->xdna, "Register addr 0x%llx size 0x%x", addr, size);
return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT);
}
int aie2_config_cu(struct amdxdna_hwctx *hwctx,
int (*notify_cb)(void *, void __iomem *, size_t))
{
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
struct amdxdna_dev *xdna = hwctx->client->xdna;
u32 shift = xdna->dev_info->dev_mem_buf_shift;
struct config_cu_req req = { 0 };
struct xdna_mailbox_msg msg;
struct drm_gem_object *gobj;
struct amdxdna_gem_obj *abo;
int i;
if (!chann)
return -ENODEV;
if (!hwctx->cus)
return 0;
if (hwctx->cus->num_cus > MAX_NUM_CUS) {
XDNA_DBG(xdna, "Exceed maximum CU %d", MAX_NUM_CUS);
return -EINVAL;
}
for (i = 0; i < hwctx->cus->num_cus; i++) {
struct amdxdna_cu_config *cu = &hwctx->cus->cu_configs[i];
if (XDNA_MBZ_DBG(xdna, cu->pad, sizeof(cu->pad)))
return -EINVAL;
gobj = drm_gem_object_lookup(hwctx->client->filp, cu->cu_bo);
if (!gobj) {
XDNA_ERR(xdna, "Lookup GEM object failed");
return -EINVAL;
}
abo = to_xdna_obj(gobj);
if (abo->type != AMDXDNA_BO_DEV) {
drm_gem_object_put(gobj);
XDNA_ERR(xdna, "Invalid BO type");
return -EINVAL;
}
req.cfgs[i] = FIELD_PREP(AIE2_MSG_CFG_CU_PDI_ADDR,
amdxdna_gem_dev_addr(abo) >> shift);
req.cfgs[i] |= FIELD_PREP(AIE2_MSG_CFG_CU_FUNC, cu->cu_func);
XDNA_DBG(xdna, "CU %d full addr 0x%llx, cfg 0x%x", i,
amdxdna_gem_dev_addr(abo), req.cfgs[i]);
drm_gem_object_put(gobj);
}
req.num_cus = hwctx->cus->num_cus;
msg.send_data = (u8 *)&req;
msg.send_size = sizeof(req);
msg.handle = hwctx;
msg.opcode = MSG_OP_CONFIG_CU;
msg.notify_cb = notify_cb;
return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
}
static int aie2_init_exec_cu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
size_t *size, u32 *msg_op)
{
struct execute_buffer_req *cu_req = req;
u32 cmd_len;
void *cmd;
cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
if (cmd_len > sizeof(cu_req->payload))
return -EINVAL;
cu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
if (cu_req->cu_idx == INVALID_CU_IDX)
return -EINVAL;
memcpy(cu_req->payload, cmd, cmd_len);
*size = sizeof(*cu_req);
*msg_op = MSG_OP_EXECUTE_BUFFER_CF;
return 0;
}
static int aie2_init_exec_dpu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
size_t *size, u32 *msg_op)
{
struct exec_dpu_req *dpu_req = req;
struct amdxdna_cmd_start_npu *sn;
u32 cmd_len;
sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
if (cmd_len - sizeof(*sn) > sizeof(dpu_req->payload))
return -EINVAL;
dpu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
if (dpu_req->cu_idx == INVALID_CU_IDX)
return -EINVAL;
dpu_req->inst_buf_addr = sn->buffer;
dpu_req->inst_size = sn->buffer_size;
dpu_req->inst_prop_cnt = sn->prop_count;
memcpy(dpu_req->payload, sn->prop_args, cmd_len - sizeof(*sn));
*size = sizeof(*dpu_req);
*msg_op = MSG_OP_EXEC_DPU;
return 0;
}
static void aie2_init_exec_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt)
{
struct cmd_chain_req *chain_req = req;
chain_req->buf_addr = slot_addr;
chain_req->buf_size = size;
chain_req->count = cmd_cnt;
}
static void aie2_init_npu_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt)
{
struct cmd_chain_npu_req *npu_chain_req = req;
npu_chain_req->flags = 0;
npu_chain_req->reserved = 0;
npu_chain_req->buf_addr = slot_addr;
npu_chain_req->buf_size = size;
npu_chain_req->count = cmd_cnt;
}
static int
aie2_cmdlist_fill_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
{
struct cmd_chain_slot_execbuf_cf *cf_slot = slot;
u32 cmd_len;
void *cmd;
cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
if (*size < sizeof(*cf_slot) + cmd_len)
return -EINVAL;
cf_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
if (cf_slot->cu_idx == INVALID_CU_IDX)
return -EINVAL;
cf_slot->arg_cnt = cmd_len / sizeof(u32);
memcpy(cf_slot->args, cmd, cmd_len);
/* Accurate slot size to hint firmware to do necessary copy */
*size = sizeof(*cf_slot) + cmd_len;
return 0;
}
static int
aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
{
struct cmd_chain_slot_dpu *dpu_slot = slot;
struct amdxdna_cmd_start_npu *sn;
u32 cmd_len;
u32 arg_sz;
sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
arg_sz = cmd_len - sizeof(*sn);
if (cmd_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
return -EINVAL;
if (*size < sizeof(*dpu_slot) + arg_sz)
return -EINVAL;
dpu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
if (dpu_slot->cu_idx == INVALID_CU_IDX)
return -EINVAL;
dpu_slot->inst_buf_addr = sn->buffer;
dpu_slot->inst_size = sn->buffer_size;
dpu_slot->inst_prop_cnt = sn->prop_count;
dpu_slot->arg_cnt = arg_sz / sizeof(u32);
memcpy(dpu_slot->args, sn->prop_args, arg_sz);
/* Accurate slot size to hint firmware to do necessary copy */
*size = sizeof(*dpu_slot) + arg_sz;
return 0;
}
static int aie2_cmdlist_unsupp(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
{
return -EOPNOTSUPP;
}
static u32 aie2_get_chain_msg_op(u32 cmd_op)
{
switch (cmd_op) {
case ERT_START_CU:
return MSG_OP_CHAIN_EXEC_BUFFER_CF;
case ERT_START_NPU:
return MSG_OP_CHAIN_EXEC_DPU;
default:
break;
}
return MSG_OP_MAX_OPCODE;
}
static struct aie2_exec_msg_ops legacy_exec_message_ops = {
.init_cu_req = aie2_init_exec_cu_req,
.init_dpu_req = aie2_init_exec_dpu_req,
.init_chain_req = aie2_init_exec_chain_req,
.fill_cf_slot = aie2_cmdlist_fill_cf,
.fill_dpu_slot = aie2_cmdlist_fill_dpu,
.fill_preempt_slot = aie2_cmdlist_unsupp,
.fill_elf_slot = aie2_cmdlist_unsupp,
.get_chain_msg_op = aie2_get_chain_msg_op,
};
static int
aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
{
struct cmd_chain_slot_npu *npu_slot = slot;
u32 cmd_len;
void *cmd;
cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
if (*size < sizeof(*npu_slot) + cmd_len)
return -EINVAL;
memset(npu_slot, 0, sizeof(*npu_slot));
npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
if (npu_slot->cu_idx == INVALID_CU_IDX)
return -EINVAL;
npu_slot->type = EXEC_NPU_TYPE_NON_ELF;
npu_slot->arg_cnt = cmd_len / sizeof(u32);
memcpy(npu_slot->args, cmd, cmd_len);
*size = sizeof(*npu_slot) + cmd_len;
return 0;
}
static int
aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
{
struct cmd_chain_slot_npu *npu_slot = slot;
struct amdxdna_cmd_start_npu *sn;
u32 cmd_len;
u32 arg_sz;
sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
arg_sz = cmd_len - sizeof(*sn);
if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE)
return -EINVAL;
if (*size < sizeof(*npu_slot) + arg_sz)
return -EINVAL;
memset(npu_slot, 0, sizeof(*npu_slot));
npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
if (npu_slot->cu_idx == INVALID_CU_IDX)
return -EINVAL;
npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF;
npu_slot->inst_buf_addr = sn->buffer;
npu_slot->inst_size = sn->buffer_size;
npu_slot->inst_prop_cnt = sn->prop_count;
npu_slot->arg_cnt = arg_sz / sizeof(u32);
memcpy(npu_slot->args, sn->prop_args, arg_sz);
*size = sizeof(*npu_slot) + arg_sz;
return 0;
}
static int
aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
{
struct cmd_chain_slot_npu *npu_slot = slot;
struct amdxdna_cmd_preempt_data *pd;
u32 cmd_len;
u32 arg_sz;
pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
arg_sz = cmd_len - sizeof(*pd);
if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
return -EINVAL;
if (*size < sizeof(*npu_slot) + arg_sz)
return -EINVAL;
memset(npu_slot, 0, sizeof(*npu_slot));
npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
if (npu_slot->cu_idx == INVALID_CU_IDX)
return -EINVAL;
npu_slot->type = EXEC_NPU_TYPE_PREEMPT;
npu_slot->inst_buf_addr = pd->inst_buf;
npu_slot->save_buf_addr = pd->save_buf;
npu_slot->restore_buf_addr = pd->restore_buf;
npu_slot->inst_size = pd->inst_size;
npu_slot->save_size = pd->save_size;
npu_slot->restore_size = pd->restore_size;
npu_slot->inst_prop_cnt = pd->inst_prop_cnt;
npu_slot->arg_cnt = arg_sz / sizeof(u32);
memcpy(npu_slot->args, pd->prop_args, arg_sz);
*size = sizeof(*npu_slot) + arg_sz;
return 0;
}
static int
aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
{
struct cmd_chain_slot_npu *npu_slot = slot;
struct amdxdna_cmd_preempt_data *pd;
u32 cmd_len;
u32 arg_sz;
pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
arg_sz = cmd_len - sizeof(*pd);
if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
return -EINVAL;
if (*size < sizeof(*npu_slot) + arg_sz)
return -EINVAL;
memset(npu_slot, 0, sizeof(*npu_slot));
npu_slot->type = EXEC_NPU_TYPE_ELF;
npu_slot->inst_buf_addr = pd->inst_buf;
npu_slot->save_buf_addr = pd->save_buf;
npu_slot->restore_buf_addr = pd->restore_buf;
npu_slot->inst_size = pd->inst_size;
npu_slot->save_size = pd->save_size;
npu_slot->restore_size = pd->restore_size;
npu_slot->inst_prop_cnt = pd->inst_prop_cnt;
npu_slot->arg_cnt = 1;
npu_slot->args[0] = AIE2_EXEC_BUFFER_KERNEL_OP_TXN;
*size = struct_size(npu_slot, args, npu_slot->arg_cnt);
return 0;
}
static u32 aie2_get_npu_chain_msg_op(u32 cmd_op)
{
return MSG_OP_CHAIN_EXEC_NPU;
}
static struct aie2_exec_msg_ops npu_exec_message_ops = {
.init_cu_req = aie2_init_exec_cu_req,
.init_dpu_req = aie2_init_exec_dpu_req,
.init_chain_req = aie2_init_npu_chain_req,
.fill_cf_slot = aie2_cmdlist_fill_npu_cf,
.fill_dpu_slot = aie2_cmdlist_fill_npu_dpu,
.fill_preempt_slot = aie2_cmdlist_fill_npu_preempt,
.fill_elf_slot = aie2_cmdlist_fill_npu_elf,
.get_chain_msg_op = aie2_get_npu_chain_msg_op,
};
static int aie2_init_exec_req(void *req, struct amdxdna_gem_obj *cmd_abo,
size_t *size, u32 *msg_op)
{
struct amdxdna_dev *xdna = cmd_abo->client->xdna;
int ret;
u32 op;
op = amdxdna_cmd_get_op(cmd_abo);
switch (op) {
case ERT_START_CU:
ret = EXEC_MSG_OPS(xdna)->init_cu_req(cmd_abo, req, size, msg_op);
if (ret) {
XDNA_DBG(xdna, "Init CU req failed ret %d", ret);
return ret;
}
break;
case ERT_START_NPU:
ret = EXEC_MSG_OPS(xdna)->init_dpu_req(cmd_abo, req, size, msg_op);
if (ret) {
XDNA_DBG(xdna, "Init DPU req failed ret %d", ret);
return ret;
}
break;
default:
XDNA_ERR(xdna, "Unsupported op %d", op);
ret = -EOPNOTSUPP;
break;
}
return ret;
}
static int
aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo,
size_t *size, u32 *cmd_op)
{
struct amdxdna_dev *xdna = cmd_abo->client->xdna;
int ret;
u32 op;
op = amdxdna_cmd_get_op(cmd_abo);
if (*cmd_op == ERT_INVALID_CMD)
*cmd_op = op;
else if (op != *cmd_op)
return -EINVAL;
switch (op) {
case ERT_START_CU:
ret = EXEC_MSG_OPS(xdna)->fill_cf_slot(cmd_abo, slot, size);
break;
case ERT_START_NPU:
ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size);
break;
case ERT_START_NPU_PREEMPT:
if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT))
return -EOPNOTSUPP;
ret = EXEC_MSG_OPS(xdna)->fill_preempt_slot(cmd_abo, slot, size);
break;
case ERT_START_NPU_PREEMPT_ELF:
if (!AIE2_FEATURE_ON(xdna->dev_handle, AIE2_PREEMPT))
return -EOPNOTSUPP;
ret = EXEC_MSG_OPS(xdna)->fill_elf_slot(cmd_abo, slot, size);
break;
default:
XDNA_INFO(xdna, "Unsupported op %d", op);
ret = -EOPNOTSUPP;
break;
}
return ret;
}
void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
{
if (AIE2_FEATURE_ON(ndev, AIE2_NPU_COMMAND))
ndev->exec_msg_ops = &npu_exec_message_ops;
else
ndev->exec_msg_ops = &legacy_exec_message_ops;
}
void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev)
{
struct amdxdna_dev *xdna = ndev->xdna;
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
if (!ndev->mgmt_chann)
return;
xdna_mailbox_stop_channel(ndev->mgmt_chann);
xdna_mailbox_free_channel(ndev->mgmt_chann);
ndev->mgmt_chann = NULL;
}
static inline struct amdxdna_gem_obj *
aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
{
int idx = get_job_idx(job->seq);
return job->hwctx->priv->cmd_buf[idx];
}
int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
int (*notify_cb)(void *, void __iomem *, size_t))
{
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
struct amdxdna_dev *xdna = hwctx->client->xdna;
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
struct xdna_mailbox_msg msg;
union exec_req req;
int ret;
if (!chann)
return -ENODEV;
ret = aie2_init_exec_req(&req, cmd_abo, &msg.send_size, &msg.opcode);
if (ret)
return ret;
msg.handle = job;
msg.notify_cb = notify_cb;
msg.send_data = (u8 *)&req;
print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
0x40, false);
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
if (ret) {
XDNA_ERR(xdna, "Send message failed");
return ret;
}
return 0;
}
int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
struct amdxdna_sched_job *job,
int (*notify_cb)(void *, void __iomem *, size_t))
{
struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
struct amdxdna_client *client = hwctx->client;
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
void *cmd_buf = amdxdna_gem_vmap(cmdbuf_abo);
struct amdxdna_dev *xdna = client->xdna;
struct amdxdna_cmd_chain *payload;
struct xdna_mailbox_msg msg;
union exec_chain_req req;
u32 payload_len, ccnt;
u32 offset = 0;
size_t size;
int ret;
u32 op;
u32 i;
if (!cmd_buf)
return -ENOMEM;
op = amdxdna_cmd_get_op(cmd_abo);
payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
if (op != ERT_CMD_CHAIN) {
XDNA_DBG(xdna, "Invalid op code %d", op);
return -EINVAL;
}
if (!payload) {
XDNA_DBG(xdna, "Failed to get command payload");
return -EINVAL;
}
ccnt = payload->command_count;
if (payload_len < struct_size(payload, data, ccnt)) {
XDNA_DBG(xdna, "Invalid command count %d", ccnt);
return -EINVAL;
}
op = ERT_INVALID_CMD;
for (i = 0; i < ccnt; i++) {
u32 boh = (u32)(payload->data[i]);
struct amdxdna_gem_obj *abo;
abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_SHARE);
if (!abo) {
XDNA_ERR(xdna, "Failed to find cmd BO %d", boh);
return -ENOENT;
}
size = cmdbuf_abo->mem.size - offset;
ret = aie2_cmdlist_fill_slot(cmd_buf + offset, abo, &size, &op);
amdxdna_gem_put_obj(abo);
if (ret)
return ret;
offset += size;
}
XDNA_DBG(xdna, "Total %d commands:", ccnt);
print_hex_dump_debug("cmdbufs: ", DUMP_PREFIX_OFFSET, 16, 4,
cmd_buf, offset, false);
msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
if (msg.opcode == MSG_OP_MAX_OPCODE)
return -EOPNOTSUPP;
/* The offset is the accumulated total size of the cmd buffer */
EXEC_MSG_OPS(xdna)->init_chain_req(&req, amdxdna_gem_dev_addr(cmdbuf_abo),
offset, ccnt);
drm_clflush_virt_range(cmd_buf, offset);
msg.handle = job;
msg.notify_cb = notify_cb;
msg.send_data = (u8 *)&req;
msg.send_size = sizeof(req);
print_hex_dump_debug("cmdlist msg: ", DUMP_PREFIX_OFFSET, 16, 4,
&req, msg.send_size, false);
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
if (ret) {
XDNA_ERR(xdna, "Send message failed");
return ret;
}
return 0;
}
int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
struct amdxdna_sched_job *job,
int (*notify_cb)(void *, void __iomem *, size_t))
{
struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
struct amdxdna_dev *xdna = hwctx->client->xdna;
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
void *cmd_buf = amdxdna_gem_vmap(cmdbuf_abo);
struct xdna_mailbox_msg msg;
union exec_chain_req req;
u32 op = ERT_INVALID_CMD;
size_t size;
int ret;
if (!cmd_buf)
return -ENOMEM;
size = cmdbuf_abo->mem.size;
ret = aie2_cmdlist_fill_slot(cmd_buf, cmd_abo, &size, &op);
if (ret)
return ret;
print_hex_dump_debug("cmdbuf: ", DUMP_PREFIX_OFFSET, 16, 4, cmd_buf, size, false);
msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
if (msg.opcode == MSG_OP_MAX_OPCODE)
return -EOPNOTSUPP;
EXEC_MSG_OPS(xdna)->init_chain_req(&req, amdxdna_gem_dev_addr(cmdbuf_abo), size, 1);
drm_clflush_virt_range(cmd_buf, size);
msg.handle = job;
msg.notify_cb = notify_cb;
msg.send_data = (u8 *)&req;
msg.send_size = sizeof(req);
print_hex_dump_debug("cmdlist msg: ", DUMP_PREFIX_OFFSET, 16, 4,
&req, msg.send_size, false);
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
if (ret) {
XDNA_ERR(hwctx->client->xdna, "Send message failed");
return ret;
}
return 0;
}
int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
int (*notify_cb)(void *, void __iomem *, size_t))
{
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]);
struct amdxdna_dev *xdna = hwctx->client->xdna;
struct xdna_mailbox_msg msg;
struct sync_bo_req req;
int ret = 0;
req.src_addr = 0;
req.dst_addr = amdxdna_dev_bo_offset(abo);
req.size = abo->mem.size;
/* Device to Host */
req.type = FIELD_PREP(AIE2_MSG_SYNC_BO_SRC_TYPE, SYNC_BO_DEV_MEM) |
FIELD_PREP(AIE2_MSG_SYNC_BO_DST_TYPE, SYNC_BO_HOST_MEM);
XDNA_DBG(xdna, "sync %d bytes src(0x%llx) to dst(0x%llx) completed",
req.size, req.src_addr, req.dst_addr);
msg.handle = job;
msg.notify_cb = notify_cb;
msg.send_data = (u8 *)&req;
msg.send_size = sizeof(req);
msg.opcode = MSG_OP_SYNC_BO;
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
if (ret) {
XDNA_ERR(xdna, "Send message failed");
return ret;
}
return 0;
}
int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
int (*notify_cb)(void *, void __iomem *, size_t))
{
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]);
struct amdxdna_dev *xdna = hwctx->client->xdna;
struct config_debug_bo_req req;
struct xdna_mailbox_msg msg;
if (job->drv_cmd->opcode == ATTACH_DEBUG_BO)
req.config = DEBUG_BO_REGISTER;
else
req.config = DEBUG_BO_UNREGISTER;
req.offset = amdxdna_dev_bo_offset(abo);
req.size = abo->mem.size;
XDNA_DBG(xdna, "offset 0x%llx size 0x%llx config %d",
req.offset, req.size, req.config);
msg.handle = job;
msg.notify_cb = notify_cb;
msg.send_data = (u8 *)&req;
msg.send_size = sizeof(req);
msg.opcode = MSG_OP_CONFIG_DEBUG_BO;
return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
}
int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id,
struct app_health_report *report)
{
DECLARE_AIE2_MSG(get_app_health, MSG_OP_GET_APP_HEALTH);
struct amdxdna_dev *xdna = ndev->xdna;
struct app_health_report *buf;
dma_addr_t dma_addr;
u32 buf_size;
int ret;
if (!AIE2_FEATURE_ON(ndev, AIE2_APP_HEALTH)) {
XDNA_DBG(xdna, "App health feature not supported");
return -EOPNOTSUPP;
}
buf_size = sizeof(*report);
buf = aie2_alloc_msg_buffer(ndev, &buf_size, &dma_addr);
if (IS_ERR(buf)) {
XDNA_ERR(xdna, "Failed to allocate buffer for app health");
return PTR_ERR(buf);
}
req.buf_addr = dma_addr;
req.context_id = context_id;
req.buf_size = buf_size;
drm_clflush_virt_range(buf, sizeof(*report));
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
if (ret) {
XDNA_ERR(xdna, "Get app health failed, ret %d status 0x%x", ret, resp.status);
goto free_buf;
}
/* Copy the report to caller's buffer */
memcpy(report, buf, sizeof(*report));
free_buf:
aie2_free_msg_buffer(ndev, buf_size, buf, dma_addr);
return ret;
}