drm/amdgpu: Add logic for VF data exchange region to init from dynamic crit_region offsets

1. Added VF logic to init data exchange region using the offsets from dynamic(v2) critical regions;

Signed-off-by: Ellen Pan <yunru.pan@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Ellen Pan
2025-10-08 15:36:50 -05:00
committed by Alex Deucher
parent b4a8fcc782
commit 91da591310

View File

@@ -218,12 +218,12 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
&adev->virt.mm_table.gpu_addr,
(void *)&adev->virt.mm_table.cpu_addr);
if (r) {
DRM_ERROR("failed to alloc mm table and error = %d.\n", r);
dev_err(adev->dev, "failed to alloc mm table and error = %d.\n", r);
return r;
}
memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n",
dev_info(adev->dev, "MM table gpu addr = 0x%llx, cpu addr = %p.\n",
adev->virt.mm_table.gpu_addr,
adev->virt.mm_table.cpu_addr);
return 0;
@@ -403,7 +403,9 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_GPU_PAGE_SIZE,
&bo, NULL))
DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
dev_dbg(adev->dev,
"RAS WARN: reserve vram for retired page %llx fail\n",
bp);
data->bps_bo[i] = bo;
}
data->last_reserved = i + 1;
@@ -671,10 +673,34 @@ out:
schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
}
static int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device *adev, uint32_t *pfvf_data)
{
uint32_t dataexchange_offset =
adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset;
uint32_t dataexchange_size =
adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10;
uint64_t pos = 0;
dev_info(adev->dev,
"Got data exchange info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
dataexchange_offset, dataexchange_size);
if (!IS_ALIGNED(dataexchange_offset, 4) || !IS_ALIGNED(dataexchange_size, 4)) {
dev_err(adev->dev, "Data exchange data not aligned to 4 bytes\n");
return -EINVAL;
}
pos = (uint64_t)dataexchange_offset;
amdgpu_device_vram_access(adev, pos, pfvf_data,
dataexchange_size, false);
return 0;
}
void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
{
if (adev->virt.vf2pf_update_interval_ms != 0) {
DRM_INFO("clean up the vf2pf work item\n");
dev_info(adev->dev, "clean up the vf2pf work item\n");
cancel_delayed_work_sync(&adev->virt.vf2pf_work);
adev->virt.vf2pf_update_interval_ms = 0;
}
@@ -682,13 +708,15 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
{
uint32_t *pfvf_data = NULL;
adev->virt.fw_reserve.p_pf2vf = NULL;
adev->virt.fw_reserve.p_vf2pf = NULL;
adev->virt.vf2pf_update_interval_ms = 0;
adev->virt.vf2pf_update_retry_cnt = 0;
if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!");
dev_warn(adev->dev, "Currently fw_vram and drv_vram should not have values at the same time!");
} else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
/* go through this logic in ip_init and reset to init workqueue*/
amdgpu_virt_exchange_data(adev);
@@ -697,11 +725,34 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
} else if (adev->bios != NULL) {
/* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
pfvf_data =
kzalloc(adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10,
GFP_KERNEL);
if (!pfvf_data) {
dev_err(adev->dev, "Failed to allocate memory for pfvf_data\n");
return;
}
amdgpu_virt_read_pf2vf_data(adev);
if (amdgpu_virt_read_exchange_data_from_mem(adev, pfvf_data))
goto free_pfvf_data;
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)pfvf_data;
amdgpu_virt_read_pf2vf_data(adev);
free_pfvf_data:
kfree(pfvf_data);
pfvf_data = NULL;
adev->virt.fw_reserve.p_pf2vf = NULL;
} else {
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
amdgpu_virt_read_pf2vf_data(adev);
}
}
}
@@ -714,14 +765,29 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
if (adev->mman.fw_vram_usage_va) {
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
adev->virt.fw_reserve.p_vf2pf =
(struct amd_sriov_msg_vf2pf_info_header *)
(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
adev->virt.fw_reserve.ras_telemetry =
(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
(adev->mman.fw_vram_usage_va +
adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset);
adev->virt.fw_reserve.p_vf2pf =
(struct amd_sriov_msg_vf2pf_info_header *)
(adev->mman.fw_vram_usage_va +
adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset +
(AMD_SRIOV_MSG_SIZE_KB << 10));
adev->virt.fw_reserve.ras_telemetry =
(adev->mman.fw_vram_usage_va +
adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset);
} else {
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
adev->virt.fw_reserve.p_vf2pf =
(struct amd_sriov_msg_vf2pf_info_header *)
(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
adev->virt.fw_reserve.ras_telemetry =
(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
}
} else if (adev->mman.drv_vram_usage_va) {
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
@@ -829,7 +895,7 @@ static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32 reg)
break;
default: /* other chip doesn't support SRIOV */
is_sriov = false;
DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type);
dev_err(adev->dev, "Unknown asic type: %d!\n", adev->asic_type);
break;
}
}
@@ -1510,7 +1576,7 @@ amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block bloc
case AMDGPU_RAS_BLOCK__MPIO:
return RAS_TELEMETRY_GPU_BLOCK_MPIO;
default:
DRM_WARN_ONCE("Unsupported SRIOV RAS telemetry block 0x%x\n",
dev_warn(adev->dev, "Unsupported SRIOV RAS telemetry block 0x%x\n",
block);
return RAS_TELEMETRY_GPU_BLOCK_COUNT;
}