mirror of
https://github.com/torvalds/linux.git
synced 2026-04-27 19:12:29 -04:00
drm/amdgpu: break driver init process when it's bad GPU(v5)
When retrieving bad gpu tag from eeprom, GPU init should
fail as the GPU needs to be retired for further check.
v2: Fix spelling typo, correct the condition to detect
bad gpu tag and refine error message.
v3: Refine function argument name.
v4: Fix missing check of returning value of i2c
initialization error case.
v5: Use dev_err to print PCI information in dmesg instead
of DRM_ERROR.
Signed-off-by: Guchun Chen <guchun.chen@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
committed by
Alex Deucher
parent
1d6a9d122d
commit
b82e65a935
@@ -241,7 +241,8 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
|
||||
|
||||
}
|
||||
|
||||
int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
|
||||
int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
|
||||
bool *exceed_err_limit)
|
||||
{
|
||||
int ret = 0;
|
||||
struct amdgpu_device *adev = to_amdgpu_device(control);
|
||||
@@ -254,6 +255,8 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
|
||||
.buf = buff,
|
||||
};
|
||||
|
||||
*exceed_err_limit = false;
|
||||
|
||||
/* Verify i2c adapter is initialized */
|
||||
if (!adev->pm.smu_i2c.algo)
|
||||
return -ENOENT;
|
||||
@@ -282,6 +285,11 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
|
||||
DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
|
||||
control->num_recs);
|
||||
|
||||
} else if ((hdr->header == EEPROM_TABLE_HDR_BAD) &&
|
||||
(amdgpu_bad_page_threshold != 0)) {
|
||||
*exceed_err_limit = true;
|
||||
dev_err(adev->dev, "Exceeding the bad_page_threshold parameter, "
|
||||
"disabling the GPU.\n");
|
||||
} else {
|
||||
DRM_INFO("Creating new EEPROM table");
|
||||
|
||||
|
||||
Reference in New Issue
Block a user