Merge tag 'drm-next-5.5-2019-11-01' of git://people.freedesktop.org/~agd5f/linux into drm-next

drm-next-5.5-2019-11-01:

amdgpu:
- Add EEPROM support for Arcturus
- Enable VCN encode support for Arcturus
- Misc PSP fixes
- Misc DC fixes
- swSMU cleanup

amdkfd:
- Misc cleanups
- Fix typo in cu bitmap parsing

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191101190607.3763-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie
2019-11-04 10:22:53 +10:00
100 changed files with 3128 additions and 1150 deletions

View File

@@ -107,7 +107,7 @@ struct amdgpu_mgpu_info
uint32_t num_apu;
};
#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256
#define AMDGPU_MAX_TIMEOUT_PARAM_LENGTH 256
/*
* Modules parameters.
@@ -125,7 +125,7 @@ extern int amdgpu_disp_priority;
extern int amdgpu_hw_i2c;
extern int amdgpu_pcie_gen2;
extern int amdgpu_msi;
extern char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH];
extern char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
extern int amdgpu_dpm;
extern int amdgpu_fw_load_type;
extern int amdgpu_aspm;
@@ -139,6 +139,7 @@ extern int amdgpu_vm_fragment_size;
extern int amdgpu_vm_fault_stop;
extern int amdgpu_vm_debug;
extern int amdgpu_vm_update_mode;
extern int amdgpu_exp_hw_support;
extern int amdgpu_dc;
extern int amdgpu_sched_jobs;
extern int amdgpu_sched_hw_submission;

View File

@@ -130,14 +130,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
adev->gfx.mec.queue_bitmap,
KGD_MAX_QUEUES);
/* remove the KIQ bit as well */
if (adev->gfx.kiq.ring.sched.ready)
clear_bit(amdgpu_gfx_mec_queue_to_bit(adev,
adev->gfx.kiq.ring.me - 1,
adev->gfx.kiq.ring.pipe,
adev->gfx.kiq.ring.queue),
gpu_resources.queue_bitmap);
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
* nbits is not compile time constant
*/

View File

@@ -19,10 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#undef pr_fmt
#define pr_fmt(fmt) "kfd2kgd: " fmt
#include <linux/module.h>
#include <linux/fdtable.h>
#include <linux/uaccess.h>

View File

@@ -19,9 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#undef pr_fmt
#define pr_fmt(fmt) "kfd2kgd: " fmt
#include <linux/mmu_context.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"

View File

@@ -19,9 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#define pr_fmt(fmt) "kfd2kgd: " fmt
#include <linux/mmu_context.h>
#include "amdgpu.h"

View File

@@ -19,9 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#define pr_fmt(fmt) "kfd2kgd: " fmt
#include <linux/dma-buf.h>
#include <linux/list.h>
#include <linux/pagemap.h>

View File

@@ -1089,8 +1089,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
{
adev->debugfs_preempt =
debugfs_create_file("amdgpu_preempt_ib", 0600,
adev->ddev->primary->debugfs_root,
(void *)adev, &fops_ib_preempt);
adev->ddev->primary->debugfs_root, adev,
&fops_ib_preempt);
if (!(adev->debugfs_preempt)) {
DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n");
return -EIO;

View File

@@ -1877,6 +1877,19 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (r)
goto init_failed;
/*
* retired pages will be loaded from eeprom and reserved here,
* it should be called after amdgpu_device_ip_hw_init_phase2 since
* for some ASICs the RAS EEPROM code relies on SMU fully functioning
* for I2C communication which only true at this point.
* recovery_init may fail, but it can free all resources allocated by
* itself and its failure should not stop amdgpu init process.
*
* Note: theoretically, this should be called before all vram allocations
* to protect retired page from abusing
*/
amdgpu_ras_recovery_init(adev);
if (adev->gmc.xgmi.num_physical_nodes > 1)
amdgpu_xgmi_add_device(adev);
amdgpu_amdkfd_device_init(adev);
@@ -2258,6 +2271,12 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
/* displays are handled in phase1 */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
continue;
/* PSP lost connection when err_event_athub occurs */
if (amdgpu_ras_intr_triggered() &&
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
adev->ip_blocks[i].status.hw = false;
continue;
}
/* XXX handle errors */
r = adev->ip_blocks[i].version->funcs->suspend(adev);
/* XXX handle errors */
@@ -2615,9 +2634,9 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
else
adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
while ((timeout_setting = strsep(&input, ",")) &&
strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
ret = kstrtol(timeout_setting, 0, &timeout);
if (ret)
return ret;

View File

@@ -911,7 +911,8 @@ int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low)
if (is_support_sw_smu(adev)) {
ret = smu_get_dpm_freq_range(&adev->smu, SMU_GFXCLK,
low ? &clk_freq : NULL,
!low ? &clk_freq : NULL);
!low ? &clk_freq : NULL,
true);
if (ret)
return 0;
return clk_freq * 100;
@@ -928,7 +929,8 @@ int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low)
if (is_support_sw_smu(adev)) {
ret = smu_get_dpm_freq_range(&adev->smu, SMU_UCLK,
low ? &clk_freq : NULL,
!low ? &clk_freq : NULL);
!low ? &clk_freq : NULL,
true);
if (ret)
return 0;
return clk_freq * 100;

View File

@@ -298,12 +298,6 @@ enum amdgpu_pcie_gen {
#define amdgpu_dpm_get_current_power_state(adev) \
((adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle))
#define amdgpu_smu_get_current_power_state(adev) \
((adev)->smu.ppt_funcs->get_current_power_state(&((adev)->smu)))
#define amdgpu_smu_set_power_state(adev) \
((adev)->smu.ppt_funcs->set_power_state(&((adev)->smu)))
#define amdgpu_dpm_get_pp_num_states(adev, data) \
((adev)->powerplay.pp_funcs->get_pp_num_states((adev)->powerplay.pp_handle, data))

View File

@@ -84,9 +84,10 @@
* - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS.
* - 3.34.0 - Non-DC can flip correctly between buffers with different pitches
* - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask
* - 3.36.0 - Allow reading more status registers on si/cik
*/
#define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 35
#define KMS_DRIVER_MINOR 36
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
@@ -101,7 +102,7 @@ int amdgpu_disp_priority = 0;
int amdgpu_hw_i2c = 0;
int amdgpu_pcie_gen2 = -1;
int amdgpu_msi = -1;
char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH];
char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
int amdgpu_dpm = -1;
int amdgpu_fw_load_type = -1;
int amdgpu_aspm = -1;

View File

@@ -319,8 +319,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
return r;
}
void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq)
void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
{
amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
amdgpu_ring_fini(ring);

View File

@@ -330,8 +330,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq);
void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq);
void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring);
void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,

View File

@@ -218,7 +218,7 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched);
struct dma_fence *fence = NULL, *finished;
struct amdgpu_job *job;
int r;
int r = 0;
job = to_amdgpu_job(sched_job);
finished = &job->base.s_fence->finished;
@@ -243,6 +243,8 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
job->fence = dma_fence_get(fence);
amdgpu_job_free_resources(job);
fence = r ? ERR_PTR(r) : fence;
return fence;
}

View File

@@ -161,7 +161,7 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev,
if (is_support_sw_smu(adev)) {
if (adev->smu.ppt_funcs->get_current_power_state)
pm = amdgpu_smu_get_current_power_state(adev);
pm = smu_get_current_power_state(&adev->smu);
else
pm = adev->pm.dpm.user_state;
} else if (adev->powerplay.pp_funcs->get_current_power_state) {
@@ -907,7 +907,7 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
return ret;
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask);
ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
@@ -954,7 +954,7 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
return ret;
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask);
ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
@@ -994,7 +994,7 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
return ret;
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask);
ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask);
@@ -1034,7 +1034,7 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
return ret;
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask);
ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask);
@@ -1074,7 +1074,7 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
return ret;
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask);
ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask);
@@ -1114,7 +1114,7 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
return ret;
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask);
ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
@@ -1306,7 +1306,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
}
parameter[parameter_size] = profile_mode;
if (is_support_sw_smu(adev))
ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size);
ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true);
else if (adev->powerplay.pp_funcs->set_power_profile_mode)
ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size);
if (!ret)
@@ -2015,7 +2015,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
uint32_t limit = 0;
if (is_support_sw_smu(adev)) {
smu_get_power_limit(&adev->smu, &limit, true);
smu_get_power_limit(&adev->smu, &limit, true, true);
return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true);
@@ -2033,7 +2033,7 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
uint32_t limit = 0;
if (is_support_sw_smu(adev)) {
smu_get_power_limit(&adev->smu, &limit, false);
smu_get_power_limit(&adev->smu, &limit, false, true);
return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false);
@@ -2830,6 +2830,19 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
DRM_ERROR("failed to create device file pp_dpm_sclk\n");
return ret;
}
/* Arcturus does not support standalone mclk/socclk/fclk level setting */
if (adev->asic_type == CHIP_ARCTURUS) {
dev_attr_pp_dpm_mclk.attr.mode &= ~S_IWUGO;
dev_attr_pp_dpm_mclk.store = NULL;
dev_attr_pp_dpm_socclk.attr.mode &= ~S_IWUGO;
dev_attr_pp_dpm_socclk.store = NULL;
dev_attr_pp_dpm_fclk.attr.mode &= ~S_IWUGO;
dev_attr_pp_dpm_fclk.store = NULL;
}
ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
if (ret) {
DRM_ERROR("failed to create device file pp_dpm_mclk\n");
@@ -3013,7 +3026,8 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
struct smu_dpm_context *smu_dpm = &adev->smu.smu_dpm;
smu_handle_task(&adev->smu,
smu_dpm->dpm_level,
AMD_PP_TASK_DISPLAY_CONFIG_CHANGE);
AMD_PP_TASK_DISPLAY_CONFIG_CHANGE,
true);
} else {
if (adev->powerplay.pp_funcs->dispatch_tasks) {
if (!amdgpu_device_has_dc_support(adev)) {

View File

@@ -34,6 +34,8 @@
#include "psp_v11_0.h"
#include "psp_v12_0.h"
#include "amdgpu_ras.h"
static void psp_set_funcs(struct amdgpu_device *adev);
static int psp_early_init(void *handle)
@@ -90,7 +92,7 @@ static int psp_sw_init(void *handle)
ret = psp_mem_training_init(psp);
if (ret) {
DRM_ERROR("Failed to initliaze memory training!\n");
DRM_ERROR("Failed to initialize memory training!\n");
return ret;
}
ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT);
@@ -167,6 +169,13 @@ psp_cmd_submit_buf(struct psp_context *psp,
while (*((unsigned int *)psp->fence_buf) != index) {
if (--timeout == 0)
break;
/*
* Shouldn't wait for timeout when err_event_athub occurs,
* because gpu reset thread triggered and lock resource should
* be released for psp resume sequence.
*/
if (amdgpu_ras_intr_triggered())
break;
msleep(1);
amdgpu_asic_invalidate_hdp(psp->adev, NULL);
}

View File

@@ -68,6 +68,11 @@ const char *ras_block_string[] = {
/* inject address is 52 bits */
#define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52)
enum amdgpu_ras_retire_page_reservation {
AMDGPU_RAS_RETIRE_PAGE_RESERVED,
AMDGPU_RAS_RETIRE_PAGE_PENDING,
AMDGPU_RAS_RETIRE_PAGE_FAULT,
};
atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
@@ -153,8 +158,6 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
op = 1;
else if (sscanf(str, "inject %32s %8s", block_name, err) == 2)
op = 2;
else if (sscanf(str, "reboot %32s", block_name) == 1)
op = 3;
else if (str[0] && str[1] && str[2] && str[3])
/* ascii string, but commands are not matched. */
return -EINVAL;
@@ -218,12 +221,11 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
* value to the address.
*
* Second member: struct ras_debug_if::op.
* It has four kinds of operations.
* It has three kinds of operations.
*
* - 0: disable RAS on the block. Take ::head as its data.
* - 1: enable RAS on the block. Take ::head as its data.
* - 2: inject errors on the block. Take ::inject as its data.
* - 3: reboot on unrecoverable error
*
* How to use the interface?
* programs:
@@ -305,9 +307,6 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
/* data.inject.address is offset instead of absolute gpu address */
ret = amdgpu_ras_error_inject(adev, &data.inject);
break;
case 3:
amdgpu_ras_get_context(adev)->reboot = true;
break;
default:
ret = -EINVAL;
break;
@@ -559,15 +558,17 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head)))
return 0;
ret = psp_ras_enable_features(&adev->psp, &info, enable);
if (ret) {
DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n",
enable ? "enable":"disable",
ras_block_str(head->block),
ret);
if (ret == TA_RAS_STATUS__RESET_NEEDED)
return -EAGAIN;
return -EINVAL;
if (!amdgpu_ras_intr_triggered()) {
ret = psp_ras_enable_features(&adev->psp, &info, enable);
if (ret) {
DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n",
enable ? "enable":"disable",
ras_block_str(head->block),
ret);
if (ret == TA_RAS_STATUS__RESET_NEEDED)
return -EAGAIN;
return -EINVAL;
}
}
/* setup the obj */
@@ -815,11 +816,11 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
{
switch (flags) {
case 0:
case AMDGPU_RAS_RETIRE_PAGE_RESERVED:
return "R";
case 1:
case AMDGPU_RAS_RETIRE_PAGE_PENDING:
return "P";
case 2:
case AMDGPU_RAS_RETIRE_PAGE_FAULT:
default:
return "F";
};
@@ -1037,6 +1038,17 @@ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
adev, &amdgpu_ras_debugfs_ctrl_ops);
debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, con->dir,
adev, &amdgpu_ras_debugfs_eeprom_ops);
/*
* After one uncorrectable error happens, usually GPU recovery will
* be scheduled. But due to the known problem in GPU recovery failing
* to bring GPU back, below interface provides one direct way to
* user to reboot system automatically in such case within
* ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery routine
* will never be called.
*/
debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, con->dir,
&con->reboot);
}
void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
@@ -1289,13 +1301,13 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
(*bps)[i] = (struct ras_badpage){
.bp = data->bps[i].retired_page,
.size = AMDGPU_GPU_PAGE_SIZE,
.flags = 0,
.flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
};
if (data->last_reserved <= i)
(*bps)[i].flags = 1;
(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
else if (data->bps_bo[i] == NULL)
(*bps)[i].flags = 2;
(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
}
*count = data->count;

View File

@@ -216,6 +216,10 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
ret = smu_v11_0_i2c_eeprom_control_init(&control->eeprom_accessor);
break;
case CHIP_ARCTURUS:
ret = smu_i2c_eeprom_init(&adev->smu, &control->eeprom_accessor);
break;
default:
return 0;
}
@@ -260,6 +264,9 @@ void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control)
case CHIP_VEGA20:
smu_v11_0_i2c_eeprom_control_fini(&control->eeprom_accessor);
break;
case CHIP_ARCTURUS:
smu_i2c_eeprom_fini(&adev->smu, &control->eeprom_accessor);
break;
default:
return;
@@ -364,7 +371,7 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
struct eeprom_table_record *record;
struct amdgpu_device *adev = to_amdgpu_device(control);
if (adev->asic_type != CHIP_VEGA20)
if (adev->asic_type != CHIP_VEGA20 && adev->asic_type != CHIP_ARCTURUS)
return 0;
buffs = kcalloc(num, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE,

View File

@@ -486,15 +486,12 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_mem_reg *new_mem)
{
struct amdgpu_device *adev;
struct ttm_mem_reg *old_mem = &bo->mem;
struct ttm_mem_reg tmp_mem;
struct ttm_place placements;
struct ttm_placement placement;
int r;
adev = amdgpu_ttm_adev(bo->bdev);
/* create space/pages for new_mem in GTT space */
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
@@ -545,15 +542,12 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_mem_reg *new_mem)
{
struct amdgpu_device *adev;
struct ttm_mem_reg *old_mem = &bo->mem;
struct ttm_mem_reg tmp_mem;
struct ttm_placement placement;
struct ttm_place placements;
int r;
adev = amdgpu_ttm_adev(bo->bdev);
/* make space in GTT for old_mem buffer */
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
@@ -1220,11 +1214,8 @@ static struct ttm_backend_func amdgpu_backend_func = {
static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
uint32_t page_flags)
{
struct amdgpu_device *adev;
struct amdgpu_ttm_tt *gtt;
adev = amdgpu_ttm_adev(bo->bdev);
gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
if (gtt == NULL) {
return NULL;
@@ -1810,17 +1801,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
adev->gmc.visible_vram_size);
#endif
/*
* retired pages will be loaded from eeprom and reserved here,
* it should be called after ttm init since new bo may be created,
* recovery_init may fail, but it can free all resources allocated by
* itself and its failure should not stop amdgpu init process.
*
* Note: theoretically, this should be called before all vram allocations
* to protect retired page from abusing
*/
amdgpu_ras_recovery_init(adev);
/*
*The reserved vram for firmware must be pinned to the specified
*place on the VRAM, so reserve it early.

View File

@@ -39,6 +39,8 @@
#include "cikd.h"
#include "uvd/uvd_4_2_d.h"
#include "amdgpu_ras.h"
/* 1 second timeout */
#define UVD_IDLE_TIMEOUT msecs_to_jiffies(1000)
@@ -372,7 +374,13 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
if (!adev->uvd.inst[j].saved_bo)
return -ENOMEM;
memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
/* re-write 0 since err_event_athub will corrupt VCPU buffer */
if (amdgpu_ras_intr_triggered()) {
DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n");
memset(adev->uvd.inst[j].saved_bo, 0, size);
} else {
memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
}
}
return 0;
}

View File

@@ -966,6 +966,25 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev,
static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {
{mmGRBM_STATUS},
{mmGRBM_STATUS2},
{mmGRBM_STATUS_SE0},
{mmGRBM_STATUS_SE1},
{mmGRBM_STATUS_SE2},
{mmGRBM_STATUS_SE3},
{mmSRBM_STATUS},
{mmSRBM_STATUS2},
{mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET},
{mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET},
{mmCP_STAT},
{mmCP_STALLED_STAT1},
{mmCP_STALLED_STAT2},
{mmCP_STALLED_STAT3},
{mmCP_CPF_BUSY_STAT},
{mmCP_CPF_STALLED_STAT1},
{mmCP_CPF_STATUS},
{mmCP_CPC_BUSY_STAT},
{mmCP_CPC_STALLED_STAT1},
{mmCP_CPC_STATUS},
{mmGB_ADDR_CONFIG},
{mmMC_ARB_RAMCFG},
{mmGB_TILE_MODE0},

View File

@@ -93,7 +93,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0x60000ff0, 0x60000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000000, 0x40000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
@@ -140,7 +140,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
@@ -179,7 +179,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0xc0000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0x0d000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xffffcfff, 0x60000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0xffff0fff, 0x40000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
@@ -1442,7 +1442,7 @@ static int gfx_v10_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
amdgpu_gfx_mqd_sw_fini(adev);
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
amdgpu_gfx_kiq_fini(adev);
gfx_v10_0_pfp_fini(adev);
@@ -3106,6 +3106,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
memcpy(mqd, adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], sizeof(*mqd));
/* reset the ring */
ring->wptr = 0;
adev->wb.wb[ring->wptr_offs] = 0;
amdgpu_ring_clear_ring(ring);
#ifdef BRING_UP_DEBUG
mutex_lock(&adev->srbm_mutex);

View File

@@ -2103,7 +2103,7 @@ static int gfx_v8_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
amdgpu_gfx_mqd_sw_fini(adev);
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
amdgpu_gfx_kiq_fini(adev);
gfx_v8_0_mec_fini(adev);

View File

@@ -703,6 +703,7 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
};
static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
@@ -2153,7 +2154,7 @@ static int gfx_v9_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
amdgpu_gfx_mqd_sw_fini(adev);
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
amdgpu_gfx_kiq_fini(adev);
gfx_v9_0_mec_fini(adev);
@@ -3736,8 +3737,10 @@ static int gfx_v9_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
/* disable KCQ to avoid CPC touch memory not valid anymore */
gfx_v9_0_kcq_disable(adev);
/* DF freeze and kcq disable will fail */
if (!amdgpu_ras_intr_triggered())
/* disable KCQ to avoid CPC touch memory not valid anymore */
gfx_v9_0_kcq_disable(adev);
if (amdgpu_sriov_vf(adev)) {
gfx_v9_0_cp_gfx_enable(adev, false);

View File

@@ -155,6 +155,15 @@ static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, tmp);
tmp = mmGCVM_L2_CNTL3_DEFAULT;
if (adev->gmc.translate_further) {
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
} else {
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
}
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, tmp);
tmp = mmGCVM_L2_CNTL4_DEFAULT;

View File

@@ -309,6 +309,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
job->vm_needs_flush = true;
job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
r = amdgpu_job_submit(job, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &fence);

View File

@@ -142,6 +142,15 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2, tmp);
tmp = mmMMVM_L2_CNTL3_DEFAULT;
if (adev->gmc.translate_further) {
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
} else {
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
}
WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, tmp);
tmp = mmMMVM_L2_CNTL4_DEFAULT;

View File

@@ -178,6 +178,7 @@ static struct soc15_allowed_register_entry nv_allowed_read_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_BUSY_STAT)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)},
{ SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)},
@@ -298,7 +299,7 @@ nv_asic_reset_method(struct amdgpu_device *adev)
{
struct smu_context *smu = &adev->smu;
if (smu_baco_is_support(smu))
if (!amdgpu_sriov_vf(adev) && smu_baco_is_support(smu))
return AMD_RESET_METHOD_BACO;
else
return AMD_RESET_METHOD_MODE1;

View File

@@ -244,7 +244,7 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
/* Copy PSP KDB binary to memory */
memcpy(psp->fw_pri_buf, psp->kdb_start_addr, psp->kdb_bin_size);
/* Provide the sys driver to bootloader */
/* Provide the PSP KDB to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
(uint32_t)(psp->fw_pri_mc_addr >> 20));
psp_gfxdrv_command_reg = PSP_BL__LOAD_KEY_DATABASE;
@@ -467,6 +467,14 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
0x80000000, 0x8000FFFF, false);
} else {
/* Wait for sOS ready for ring creation */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
0x80000000, 0x80000000, false);
if (ret) {
DRM_ERROR("Failed to wait for sOS ready for ring creation\n");
return ret;
}
/* Write low address of the ring to C2PMSG_69 */
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);

View File

@@ -254,6 +254,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000)
};
static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,

View File

@@ -975,6 +975,17 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = {
{GRBM_STATUS},
{mmGRBM_STATUS2},
{mmGRBM_STATUS_SE0},
{mmGRBM_STATUS_SE1},
{mmSRBM_STATUS},
{mmSRBM_STATUS2},
{DMA_STATUS_REG + DMA0_REGISTER_OFFSET},
{DMA_STATUS_REG + DMA1_REGISTER_OFFSET},
{mmCP_STAT},
{mmCP_STALLED_STAT1},
{mmCP_STALLED_STAT2},
{mmCP_STALLED_STAT3},
{GB_ADDR_CONFIG},
{MC_ARB_RAMCFG},
{GB_TILE_MODE0},

View File

@@ -339,6 +339,7 @@ static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_BUSY_STAT)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)},
{ SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)},

View File

@@ -265,9 +265,6 @@ static int vcn_v2_5_hw_init(void *handle)
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
ring = &adev->vcn.inst[j].ring_enc[i];
/* disable encode rings till the robustness of the FW */
ring->sched.ready = false;
continue;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;