mirror of
https://github.com/torvalds/linux.git
synced 2026-04-25 10:02:31 -04:00
drm/amdgpu: fix the issue of reserving bad pages failed
In amdgpu_ras_reset_gpu, because bad pages may not be freed, it has high probability to reserve bad pages failed. Change to reserve bad pages when freeing VRAM. v2: 1. avoid allocating the drm_mm node outside of amdgpu_vram_mgr.c 2. move bad page reserving into amdgpu_ras_add_bad_pages, if vram mgr reserve bad page failed, it will put it into pending list, otherwise put it into processed list; 3. remove amdgpu_ras_release_bad_pages, because retired page's info has been moved into amdgpu_vram_mgr v3: 1. formate code style; 2. rename amdgpu_vram_reserve_scope as amdgpu_vram_reservation; 3. rename scope_pending as reservations_pending; 4. rename scope_processed as reserved_pages; 5. change to iterate over all the pending ones and try to insert them with drm_mm_reserve_node(); v4: 1. rename amdgpu_vram_mgr_reserve_scope as amdgpu_vram_mgr_reserve_range; 2. remove unused include "amdgpu_ras.h"; 3. rename amdgpu_vram_mgr_check_and_reserve as amdgpu_vram_mgr_do_reserve; 4. refine amdgpu_vram_mgr_reserve_range to call amdgpu_vram_mgr_do_reserve. Reviewed-by: Christian König <christian.koenig@amd.com> Reviewed-by: Hawking Zhang <hawking.zhang@amd.com> Signed-off-by: Dennis Li <Dennis.Li@amd.com> Signed-off-by: Wenhui Sheng <Wenhui.Sheng@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
@@ -362,14 +362,10 @@ struct ras_err_data {
|
||||
struct ras_err_handler_data {
|
||||
/* point to bad page records array */
|
||||
struct eeprom_table_record *bps;
|
||||
/* point to reserved bo array */
|
||||
struct amdgpu_bo **bps_bo;
|
||||
/* the count of entries */
|
||||
int count;
|
||||
/* the space can place new entries */
|
||||
int space_left;
|
||||
/* last reserved entry's index + 1 */
|
||||
int last_reserved;
|
||||
};
|
||||
|
||||
typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
|
||||
@@ -506,15 +502,11 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
|
||||
struct eeprom_table_record *bps, int pages);
|
||||
|
||||
int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev);
|
||||
int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev);
|
||||
|
||||
static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||
|
||||
if (in_task())
|
||||
amdgpu_ras_reserve_bad_pages(adev);
|
||||
|
||||
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
|
||||
schedule_work(&ras->recovery_work);
|
||||
return 0;
|
||||
|
||||
Reference in New Issue
Block a user