drm/amdgpu: Add sysfs interface for gc reset mask

Add two sysfs interfaces for gfx and compute:
gfx_reset_mask
compute_reset_mask

These interfaces are read-only and show the resets supported by the IP.
For example, full adapter reset (mode1/mode2/BACO/etc),
soft reset, queue reset, and pipe reset.

V2: the sysfs node returns a text string instead of some flags (Christian)
v3: add a generic helper which takes the ring as parameter
    and print the strings in the order they are applied (Christian)

    check amdgpu_gpu_recovery  before creating sysfs file itself,
    and initialize supported_reset_types in IP version files (Lijo)
v4: Fixing uninitialized variables (Tim)

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
Suggested-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Tim Huang <tim.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Jesse.zhang@amd.com
2024-11-05 15:22:56 +08:00
committed by Alex Deucher
parent f4a3246a2c
commit 6c8d1f4b04
9 changed files with 172 additions and 0 deletions

View File

@@ -1588,6 +1588,32 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
return count;
}
static ssize_t amdgpu_gfx_get_gfx_reset_mask(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
if (!adev)
return -ENODEV;
return amdgpu_show_reset_mask(buf, adev->gfx.gfx_supported_reset);
}
static ssize_t amdgpu_gfx_get_compute_reset_mask(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
if (!adev)
return -ENODEV;
return amdgpu_show_reset_mask(buf, adev->gfx.compute_supported_reset);
}
static DEVICE_ATTR(run_cleaner_shader, 0200,
NULL, amdgpu_gfx_set_run_cleaner_shader);
@@ -1601,6 +1627,11 @@ static DEVICE_ATTR(current_compute_partition, 0644,
static DEVICE_ATTR(available_compute_partition, 0444,
amdgpu_gfx_get_available_compute_partition, NULL);
static DEVICE_ATTR(gfx_reset_mask, 0444,
amdgpu_gfx_get_gfx_reset_mask, NULL);
static DEVICE_ATTR(compute_reset_mask, 0444,
amdgpu_gfx_get_compute_reset_mask, NULL);
static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev)
{
@@ -1666,6 +1697,40 @@ static void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
}
static int amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device *adev)
{
int r = 0;
if (!amdgpu_gpu_recovery)
return r;
if (adev->gfx.num_gfx_rings) {
r = device_create_file(adev->dev, &dev_attr_gfx_reset_mask);
if (r)
return r;
}
if (adev->gfx.num_compute_rings) {
r = device_create_file(adev->dev, &dev_attr_compute_reset_mask);
if (r)
return r;
}
return r;
}
static void amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device *adev)
{
if (!amdgpu_gpu_recovery)
return;
if (adev->gfx.num_gfx_rings)
device_remove_file(adev->dev, &dev_attr_gfx_reset_mask);
if (adev->gfx.num_compute_rings)
device_remove_file(adev->dev, &dev_attr_compute_reset_mask);
}
int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
{
int r;
@@ -1680,6 +1745,10 @@ int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
if (r)
dev_err(adev->dev, "failed to create isolation sysfs files");
r = amdgpu_gfx_sysfs_reset_mask_init(adev);
if (r)
dev_err(adev->dev, "failed to create reset mask sysfs files");
return r;
}
@@ -1687,6 +1756,7 @@ void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
{
amdgpu_gfx_sysfs_xcp_fini(adev);
amdgpu_gfx_sysfs_isolation_shader_fini(adev);
amdgpu_gfx_sysfs_reset_mask_fini(adev);
}
int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,