mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 14:53:58 -04:00
drm/amdgpu: fix vf error handling
The error handling for virtual functions assumed a single vf per VM and didn't properly account for bare metal. Make the error arrays per device and add locking. Reviewed-by: Gavin Wan <gavin.wan@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
@@ -25,30 +25,21 @@
|
||||
#include "amdgpu_vf_error.h"
|
||||
#include "mxgpu_ai.h"
|
||||
|
||||
#define AMDGPU_VF_ERROR_ENTRY_SIZE 16
|
||||
|
||||
/* struct error_entry - amdgpu VF error information. */
|
||||
struct amdgpu_vf_error_buffer {
|
||||
int read_count;
|
||||
int write_count;
|
||||
uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE];
|
||||
uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE];
|
||||
uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
|
||||
};
|
||||
|
||||
struct amdgpu_vf_error_buffer admgpu_vf_errors;
|
||||
|
||||
|
||||
void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags, uint64_t error_data)
|
||||
void amdgpu_vf_error_put(struct amdgpu_device *adev,
|
||||
uint16_t sub_error_code,
|
||||
uint16_t error_flags,
|
||||
uint64_t error_data)
|
||||
{
|
||||
int index;
|
||||
uint16_t error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code);
|
||||
|
||||
index = admgpu_vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
|
||||
admgpu_vf_errors.code [index] = error_code;
|
||||
admgpu_vf_errors.flags [index] = error_flags;
|
||||
admgpu_vf_errors.data [index] = error_data;
|
||||
admgpu_vf_errors.write_count ++;
|
||||
mutex_lock(&adev->virt.vf_errors.lock);
|
||||
index = adev->virt.vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
|
||||
adev->virt.vf_errors.code [index] = error_code;
|
||||
adev->virt.vf_errors.flags [index] = error_flags;
|
||||
adev->virt.vf_errors.data [index] = error_data;
|
||||
adev->virt.vf_errors.write_count ++;
|
||||
mutex_unlock(&adev->virt.vf_errors.lock);
|
||||
}
|
||||
|
||||
|
||||
@@ -58,7 +49,8 @@ void amdgpu_vf_error_trans_all(struct amdgpu_device *adev)
|
||||
u32 data1, data2, data3;
|
||||
int index;
|
||||
|
||||
if ((NULL == adev) || (!amdgpu_sriov_vf(adev)) || (!adev->virt.ops) || (!adev->virt.ops->trans_msg)) {
|
||||
if ((NULL == adev) || (!amdgpu_sriov_vf(adev)) ||
|
||||
(!adev->virt.ops) || (!adev->virt.ops->trans_msg)) {
|
||||
return;
|
||||
}
|
||||
/*
|
||||
@@ -68,18 +60,22 @@ void amdgpu_vf_error_trans_all(struct amdgpu_device *adev)
|
||||
return;
|
||||
}
|
||||
*/
|
||||
|
||||
mutex_lock(&adev->virt.vf_errors.lock);
|
||||
/* The errors are overlay of array, correct read_count as full. */
|
||||
if (admgpu_vf_errors.write_count - admgpu_vf_errors.read_count > AMDGPU_VF_ERROR_ENTRY_SIZE) {
|
||||
admgpu_vf_errors.read_count = admgpu_vf_errors.write_count - AMDGPU_VF_ERROR_ENTRY_SIZE;
|
||||
if (adev->virt.vf_errors.write_count - adev->virt.vf_errors.read_count > AMDGPU_VF_ERROR_ENTRY_SIZE) {
|
||||
adev->virt.vf_errors.read_count = adev->virt.vf_errors.write_count - AMDGPU_VF_ERROR_ENTRY_SIZE;
|
||||
}
|
||||
|
||||
while (admgpu_vf_errors.read_count < admgpu_vf_errors.write_count) {
|
||||
index =admgpu_vf_errors.read_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
|
||||
data1 = AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX (admgpu_vf_errors.code[index], admgpu_vf_errors.flags[index]);
|
||||
data2 = admgpu_vf_errors.data[index] & 0xFFFFFFFF;
|
||||
data3 = (admgpu_vf_errors.data[index] >> 32) & 0xFFFFFFFF;
|
||||
while (adev->virt.vf_errors.read_count < adev->virt.vf_errors.write_count) {
|
||||
index =adev->virt.vf_errors.read_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
|
||||
data1 = AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX(adev->virt.vf_errors.code[index],
|
||||
adev->virt.vf_errors.flags[index]);
|
||||
data2 = adev->virt.vf_errors.data[index] & 0xFFFFFFFF;
|
||||
data3 = (adev->virt.vf_errors.data[index] >> 32) & 0xFFFFFFFF;
|
||||
|
||||
adev->virt.ops->trans_msg(adev, IDH_LOG_VF_ERROR, data1, data2, data3);
|
||||
admgpu_vf_errors.read_count ++;
|
||||
adev->virt.vf_errors.read_count ++;
|
||||
}
|
||||
mutex_unlock(&adev->virt.vf_errors.lock);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user