drm/amdkfd: Introduce kfd_node struct (v5)

Introduce a new structure, kfd_node, which will now represent
a compute node. kfd_node is carved out of kfd_dev structure.
kfd_dev struct now will become the parent of kfd_node, and will
store common resources such as doorbells, GTT sub-alloctor etc.
kfd_node struct will store all resources specific to a compute
node, such as device queue manager, interrupt handling etc.

This is the first step in adding compute partition support in KFD.

v2: introduce kfd_node struct to gc v11 (Hawking)
v3: make reference to kfd_dev struct through kfd_node (Morris)
v4: use kfd_node instead for kfd isr/mqd functions (Morris)
v5: rebase (Alex)

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Tested-by: Amber Lin <Amber.Lin@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Morris Zhang <Shiwu.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Mukul Joshi
2022-09-14 16:39:48 +08:00
committed by Alex Deucher
parent 5cf1675591
commit 8dc1db3172
38 changed files with 574 additions and 496 deletions

View File

@@ -269,7 +269,7 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
int cu_cnt;
int wave_cnt;
int max_waves_per_cu;
struct kfd_dev *dev = NULL;
struct kfd_node *dev = NULL;
struct kfd_process *proc = NULL;
struct kfd_process_device *pdd = NULL;
@@ -691,7 +691,7 @@ void kfd_process_destroy_wq(void)
static void kfd_process_free_gpuvm(struct kgd_mem *mem,
struct kfd_process_device *pdd, void **kptr)
{
struct kfd_dev *dev = pdd->dev;
struct kfd_node *dev = pdd->dev;
if (kptr && *kptr) {
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
@@ -713,7 +713,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
uint64_t gpu_va, uint32_t size,
uint32_t flags, struct kgd_mem **mem, void **kptr)
{
struct kfd_dev *kdev = pdd->dev;
struct kfd_node *kdev = pdd->dev;
int err;
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size,
@@ -982,7 +982,7 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
static void kfd_process_kunmap_signal_bo(struct kfd_process *p)
{
struct kfd_process_device *pdd;
struct kfd_dev *kdev;
struct kfd_node *kdev;
void *mem;
kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle));
@@ -1040,9 +1040,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
bitmap_free(pdd->qpd.doorbell_bitmap);
idr_destroy(&pdd->alloc_idr);
kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
kfd_free_process_doorbells(pdd->dev->kfd, pdd->doorbell_index);
if (pdd->dev->shared_resources.enable_mes)
if (pdd->dev->kfd->shared_resources.enable_mes)
amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
pdd->proc_ctx_bo);
/*
@@ -1259,10 +1259,10 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
int i;
for (i = 0; i < p->n_pdds; i++) {
struct kfd_dev *dev = p->pdds[i]->dev;
struct kfd_node *dev = p->pdds[i]->dev;
struct qcm_process_device *qpd = &p->pdds[i]->qpd;
if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
continue;
offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
@@ -1279,7 +1279,7 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
return err;
}
memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size);
qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
@@ -1291,7 +1291,7 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
{
struct kfd_dev *dev = pdd->dev;
struct kfd_node *dev = pdd->dev;
struct qcm_process_device *qpd = &pdd->qpd;
uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
| KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
@@ -1300,7 +1300,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
void *kaddr;
int ret;
if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
return 0;
/* cwsr_base is only set for dGPU */
@@ -1313,7 +1313,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
qpd->cwsr_kaddr = kaddr;
qpd->tba_addr = qpd->cwsr_base;
memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size);
qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
@@ -1324,10 +1324,10 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd)
{
struct kfd_dev *dev = pdd->dev;
struct kfd_node *dev = pdd->dev;
struct qcm_process_device *qpd = &pdd->qpd;
if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
if (!dev->kfd->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
return;
kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, &qpd->cwsr_kaddr);
@@ -1371,7 +1371,7 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
* support retry.
*/
for (i = 0; i < p->n_pdds; i++) {
struct kfd_dev *dev = p->pdds[i]->dev;
struct kfd_node *dev = p->pdds[i]->dev;
/* Only consider GFXv9 and higher GPUs. Older GPUs don't
* support the SVM APIs and don't need to be considered
@@ -1394,7 +1394,7 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
return false;
if (dev->noretry)
if (dev->kfd->noretry)
return false;
}
@@ -1528,7 +1528,7 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
return 0;
}
struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process_device *kfd_get_process_device_data(struct kfd_node *dev,
struct kfd_process *p)
{
int i;
@@ -1540,7 +1540,7 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
return NULL;
}
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
struct kfd_process *p)
{
struct kfd_process_device *pdd = NULL;
@@ -1552,7 +1552,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
if (!pdd)
return NULL;
if (init_doorbell_bitmap(&pdd->qpd, dev)) {
if (init_doorbell_bitmap(&pdd->qpd, dev->kfd)) {
pr_err("Failed to init doorbell for process\n");
goto err_free_pdd;
}
@@ -1573,7 +1573,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
pdd->user_gpu_id = dev->id;
atomic64_set(&pdd->evict_duration_counter, 0);
if (dev->shared_resources.enable_mes) {
if (dev->kfd->shared_resources.enable_mes) {
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
AMDGPU_MES_PROC_CTX_SIZE,
&pdd->proc_ctx_bo,
@@ -1619,7 +1619,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
struct amdgpu_fpriv *drv_priv;
struct amdgpu_vm *avm;
struct kfd_process *p;
struct kfd_dev *dev;
struct kfd_node *dev;
int ret;
if (!drm_file)
@@ -1679,7 +1679,7 @@ err_reserve_ib_mem:
*
* Assumes that the process lock is held.
*/
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,
struct kfd_process *p)
{
struct kfd_process_device *pdd;
@@ -1811,7 +1811,7 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid,
kfd_smi_event_queue_eviction(pdd->dev->kfd, p->lead_thread->pid,
trigger);
r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
@@ -1839,7 +1839,7 @@ fail:
if (n_evicted == 0)
break;
kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
kfd_smi_event_queue_restore(pdd->dev->kfd, p->lead_thread->pid);
if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd))
@@ -1860,7 +1860,7 @@ int kfd_process_restore_queues(struct kfd_process *p)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
kfd_smi_event_queue_restore(pdd->dev->kfd, p->lead_thread->pid);
r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd);
@@ -2016,7 +2016,7 @@ int kfd_resume_all_processes(void)
return ret;
}
int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
struct vm_area_struct *vma)
{
struct kfd_process_device *pdd;
@@ -2051,7 +2051,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
{
struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
struct kfd_dev *dev = pdd->dev;
struct kfd_node *dev = pdd->dev;
/*
* It can be that we race and lose here, but that is extremely unlikely