mirror of
https://github.com/torvalds/linux.git
synced 2026-04-26 18:42:25 -04:00
drm/amdkfd: Introduce kfd_node struct (v5)
Introduce a new structure, kfd_node, which will now represent a compute node. kfd_node is carved out of kfd_dev structure. kfd_dev struct now will become the parent of kfd_node, and will store common resources such as doorbells, GTT sub-alloctor etc. kfd_node struct will store all resources specific to a compute node, such as device queue manager, interrupt handling etc. This is the first step in adding compute partition support in KFD. v2: introduce kfd_node struct to gc v11 (Hawking) v3: make reference to kfd_dev struct through kfd_node (Morris) v4: use kfd_node instead for kfd isr/mqd functions (Morris) v5: rebase (Alex) Signed-off-by: Mukul Joshi <mukul.joshi@amd.com> Tested-by: Amber Lin <Amber.Lin@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Morris Zhang <Shiwu.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
committed by
Alex Deucher
parent
5cf1675591
commit
8dc1db3172
@@ -269,7 +269,7 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
|
||||
int cu_cnt;
|
||||
int wave_cnt;
|
||||
int max_waves_per_cu;
|
||||
struct kfd_dev *dev = NULL;
|
||||
struct kfd_node *dev = NULL;
|
||||
struct kfd_process *proc = NULL;
|
||||
struct kfd_process_device *pdd = NULL;
|
||||
|
||||
@@ -691,7 +691,7 @@ void kfd_process_destroy_wq(void)
|
||||
static void kfd_process_free_gpuvm(struct kgd_mem *mem,
|
||||
struct kfd_process_device *pdd, void **kptr)
|
||||
{
|
||||
struct kfd_dev *dev = pdd->dev;
|
||||
struct kfd_node *dev = pdd->dev;
|
||||
|
||||
if (kptr && *kptr) {
|
||||
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
|
||||
@@ -713,7 +713,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
|
||||
uint64_t gpu_va, uint32_t size,
|
||||
uint32_t flags, struct kgd_mem **mem, void **kptr)
|
||||
{
|
||||
struct kfd_dev *kdev = pdd->dev;
|
||||
struct kfd_node *kdev = pdd->dev;
|
||||
int err;
|
||||
|
||||
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size,
|
||||
@@ -982,7 +982,7 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
|
||||
static void kfd_process_kunmap_signal_bo(struct kfd_process *p)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
struct kfd_dev *kdev;
|
||||
struct kfd_node *kdev;
|
||||
void *mem;
|
||||
|
||||
kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle));
|
||||
@@ -1040,9 +1040,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
|
||||
bitmap_free(pdd->qpd.doorbell_bitmap);
|
||||
idr_destroy(&pdd->alloc_idr);
|
||||
|
||||
kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
|
||||
kfd_free_process_doorbells(pdd->dev->kfd, pdd->doorbell_index);
|
||||
|
||||
if (pdd->dev->shared_resources.enable_mes)
|
||||
if (pdd->dev->kfd->shared_resources.enable_mes)
|
||||
amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
|
||||
pdd->proc_ctx_bo);
|
||||
/*
|
||||
@@ -1259,10 +1259,10 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < p->n_pdds; i++) {
|
||||
struct kfd_dev *dev = p->pdds[i]->dev;
|
||||
struct kfd_node *dev = p->pdds[i]->dev;
|
||||
struct qcm_process_device *qpd = &p->pdds[i]->qpd;
|
||||
|
||||
if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
|
||||
if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
|
||||
continue;
|
||||
|
||||
offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
|
||||
@@ -1279,7 +1279,7 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
|
||||
return err;
|
||||
}
|
||||
|
||||
memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
|
||||
memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size);
|
||||
|
||||
qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
|
||||
pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
|
||||
@@ -1291,7 +1291,7 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
|
||||
|
||||
static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
|
||||
{
|
||||
struct kfd_dev *dev = pdd->dev;
|
||||
struct kfd_node *dev = pdd->dev;
|
||||
struct qcm_process_device *qpd = &pdd->qpd;
|
||||
uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
|
||||
| KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
|
||||
@@ -1300,7 +1300,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
|
||||
void *kaddr;
|
||||
int ret;
|
||||
|
||||
if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
|
||||
if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
|
||||
return 0;
|
||||
|
||||
/* cwsr_base is only set for dGPU */
|
||||
@@ -1313,7 +1313,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
|
||||
qpd->cwsr_kaddr = kaddr;
|
||||
qpd->tba_addr = qpd->cwsr_base;
|
||||
|
||||
memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
|
||||
memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size);
|
||||
|
||||
qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
|
||||
pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
|
||||
@@ -1324,10 +1324,10 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
|
||||
|
||||
static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd)
|
||||
{
|
||||
struct kfd_dev *dev = pdd->dev;
|
||||
struct kfd_node *dev = pdd->dev;
|
||||
struct qcm_process_device *qpd = &pdd->qpd;
|
||||
|
||||
if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
|
||||
if (!dev->kfd->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
|
||||
return;
|
||||
|
||||
kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, &qpd->cwsr_kaddr);
|
||||
@@ -1371,7 +1371,7 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
|
||||
* support retry.
|
||||
*/
|
||||
for (i = 0; i < p->n_pdds; i++) {
|
||||
struct kfd_dev *dev = p->pdds[i]->dev;
|
||||
struct kfd_node *dev = p->pdds[i]->dev;
|
||||
|
||||
/* Only consider GFXv9 and higher GPUs. Older GPUs don't
|
||||
* support the SVM APIs and don't need to be considered
|
||||
@@ -1394,7 +1394,7 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
|
||||
if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
|
||||
return false;
|
||||
|
||||
if (dev->noretry)
|
||||
if (dev->kfd->noretry)
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1528,7 +1528,7 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
|
||||
struct kfd_process_device *kfd_get_process_device_data(struct kfd_node *dev,
|
||||
struct kfd_process *p)
|
||||
{
|
||||
int i;
|
||||
@@ -1540,7 +1540,7 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
|
||||
struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
|
||||
struct kfd_process *p)
|
||||
{
|
||||
struct kfd_process_device *pdd = NULL;
|
||||
@@ -1552,7 +1552,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
|
||||
if (!pdd)
|
||||
return NULL;
|
||||
|
||||
if (init_doorbell_bitmap(&pdd->qpd, dev)) {
|
||||
if (init_doorbell_bitmap(&pdd->qpd, dev->kfd)) {
|
||||
pr_err("Failed to init doorbell for process\n");
|
||||
goto err_free_pdd;
|
||||
}
|
||||
@@ -1573,7 +1573,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
|
||||
pdd->user_gpu_id = dev->id;
|
||||
atomic64_set(&pdd->evict_duration_counter, 0);
|
||||
|
||||
if (dev->shared_resources.enable_mes) {
|
||||
if (dev->kfd->shared_resources.enable_mes) {
|
||||
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
|
||||
AMDGPU_MES_PROC_CTX_SIZE,
|
||||
&pdd->proc_ctx_bo,
|
||||
@@ -1619,7 +1619,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
|
||||
struct amdgpu_fpriv *drv_priv;
|
||||
struct amdgpu_vm *avm;
|
||||
struct kfd_process *p;
|
||||
struct kfd_dev *dev;
|
||||
struct kfd_node *dev;
|
||||
int ret;
|
||||
|
||||
if (!drm_file)
|
||||
@@ -1679,7 +1679,7 @@ err_reserve_ib_mem:
|
||||
*
|
||||
* Assumes that the process lock is held.
|
||||
*/
|
||||
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
|
||||
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,
|
||||
struct kfd_process *p)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
@@ -1811,7 +1811,7 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
|
||||
for (i = 0; i < p->n_pdds; i++) {
|
||||
struct kfd_process_device *pdd = p->pdds[i];
|
||||
|
||||
kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid,
|
||||
kfd_smi_event_queue_eviction(pdd->dev->kfd, p->lead_thread->pid,
|
||||
trigger);
|
||||
|
||||
r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
|
||||
@@ -1839,7 +1839,7 @@ fail:
|
||||
if (n_evicted == 0)
|
||||
break;
|
||||
|
||||
kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
|
||||
kfd_smi_event_queue_restore(pdd->dev->kfd, p->lead_thread->pid);
|
||||
|
||||
if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
|
||||
&pdd->qpd))
|
||||
@@ -1860,7 +1860,7 @@ int kfd_process_restore_queues(struct kfd_process *p)
|
||||
for (i = 0; i < p->n_pdds; i++) {
|
||||
struct kfd_process_device *pdd = p->pdds[i];
|
||||
|
||||
kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
|
||||
kfd_smi_event_queue_restore(pdd->dev->kfd, p->lead_thread->pid);
|
||||
|
||||
r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
|
||||
&pdd->qpd);
|
||||
@@ -2016,7 +2016,7 @@ int kfd_resume_all_processes(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
|
||||
int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
@@ -2051,7 +2051,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
|
||||
{
|
||||
struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
|
||||
uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
|
||||
struct kfd_dev *dev = pdd->dev;
|
||||
struct kfd_node *dev = pdd->dev;
|
||||
|
||||
/*
|
||||
* It can be that we race and lose here, but that is extremely unlikely
|
||||
|
||||
Reference in New Issue
Block a user