diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
24 files changed, 427 insertions, 341 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 177d1e5329a5..9f59ba93cfe0 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -33,7 +33,9 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, const struct cik_ih_ring_entry *ihre = (const struct cik_ih_ring_entry *)ih_ring_entry; const struct kfd2kgd_calls *f2g = dev->kfd2kgd; - unsigned int vmid, pasid; + unsigned int vmid; + uint16_t pasid; + bool ret; /* This workaround is due to HW/FW limitation on Hawaii that * VMID and PASID are not written into ih_ring_entry @@ -48,13 +50,13 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, *tmp_ihre = *ihre; vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd); - pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid); + ret = f2g->get_atc_vmid_pasid_mapping_info(dev->kgd, vmid, &pasid); tmp_ihre->ring_id &= 0x000000ff; tmp_ihre->ring_id |= vmid << 8; tmp_ihre->ring_id |= pasid << 16; - return (pasid != 0) && + return ret && (pasid != 0) && vmid >= dev->vm_info.first_vmid_kfd && vmid <= dev->vm_info.last_vmid_kfd; } diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 901fe3590165..d3400da6ab64 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -905,7 +905,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x7a5d0000, 0x807c817c, 0x807aff7a, 0x00000080, 0xbf0a717c, 0xbf85fff8, - 0xbf820141, 0xbef4037e, + 0xbf820142, 0xbef4037e, 0x8775ff7f, 0x0000ffff, 0x8875ff75, 0x00040000, 0xbef60380, 0xbef703ff, @@ -967,7 +967,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x725d0000, 0xe0304080, 0x725d0100, 0xe0304100, 0x725d0200, 0xe0304180, - 0x725d0300, 0xbf820031, + 0x725d0300, 0xbf820032, 0xbef603ff, 0x01000000, 0xbef20378, 0x8078ff78, 0x00000400, 0xbefc0384, @@ -992,83 +992,84 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x725d0000, 0xe0304100, 0x725d0100, 0xe0304200, 0x725d0200, 0xe0304300, - 0x725d0300, 0xb9782a05, - 0x80788178, 0x907c9973, - 0x877c817c, 0xbf06817c, - 0xbf850002, 0x8f788978, - 0xbf820001, 0x8f788a78, - 0xb9721e06, 0x8f728a72, - 0x80787278, 0x8078ff78, - 0x00000200, 0x80f8ff78, - 0x00000050, 0xbef603ff, - 0x01000000, 0xbefc03ff, - 0x0000006c, 0x80f89078, - 0xf429003a, 0xf0000000, - 0xbf8cc07f, 0x80fc847c, - 0xbf800000, 0xbe803100, - 0xbe823102, 0x80f8a078, - 0xf42d003a, 0xf0000000, - 0xbf8cc07f, 0x80fc887c, - 0xbf800000, 0xbe803100, - 0xbe823102, 0xbe843104, - 0xbe863106, 0x80f8c078, - 0xf431003a, 0xf0000000, - 0xbf8cc07f, 0x80fc907c, - 0xbf800000, 0xbe803100, - 0xbe823102, 0xbe843104, - 0xbe863106, 0xbe883108, - 0xbe8a310a, 0xbe8c310c, - 0xbe8e310e, 0xbf06807c, - 0xbf84fff0, 0xb9782a05, - 0x80788178, 0x907c9973, - 0x877c817c, 0xbf06817c, - 0xbf850002, 0x8f788978, - 0xbf820001, 0x8f788a78, - 0xb9721e06, 0x8f728a72, - 0x80787278, 0x8078ff78, - 0x00000200, 0xbef603ff, - 0x01000000, 0xf4211bfa, + 0x725d0300, 0xbf8c3f70, + 0xb9782a05, 0x80788178, + 0x907c9973, 0x877c817c, + 0xbf06817c, 0xbf850002, + 0x8f788978, 0xbf820001, + 0x8f788a78, 0xb9721e06, + 0x8f728a72, 0x80787278, + 0x8078ff78, 0x00000200, + 0x80f8ff78, 0x00000050, + 0xbef603ff, 0x01000000, + 0xbefc03ff, 0x0000006c, + 0x80f89078, 0xf429003a, + 0xf0000000, 0xbf8cc07f, + 0x80fc847c, 0xbf800000, + 0xbe803100, 0xbe823102, + 0x80f8a078, 0xf42d003a, + 0xf0000000, 0xbf8cc07f, + 0x80fc887c, 0xbf800000, + 0xbe803100, 0xbe823102, + 0xbe843104, 0xbe863106, + 0x80f8c078, 0xf431003a, + 0xf0000000, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe803100, 0xbe823102, + 0xbe843104, 0xbe863106, + 0xbe883108, 0xbe8a310a, + 0xbe8c310c, 0xbe8e310e, + 0xbf06807c, 0xbf84fff0, + 0xb9782a05, 0x80788178, + 0x907c9973, 0x877c817c, + 0xbf06817c, 0xbf850002, + 0x8f788978, 0xbf820001, + 0x8f788a78, 0xb9721e06, + 0x8f728a72, 0x80787278, + 0x8078ff78, 0x00000200, + 0xbef603ff, 0x01000000, + 0xf4211bfa, 0xf0000000, + 0x80788478, 0xf4211b3a, 0xf0000000, 0x80788478, - 0xf4211b3a, 0xf0000000, - 0x80788478, 0xf4211b7a, + 0xf4211b7a, 0xf0000000, + 0x80788478, 0xf4211eba, 0xf0000000, 0x80788478, - 0xf4211eba, 0xf0000000, - 0x80788478, 0xf4211efa, + 0xf4211efa, 0xf0000000, + 0x80788478, 0xf4211c3a, 0xf0000000, 0x80788478, - 0xf4211c3a, 0xf0000000, - 0x80788478, 0xf4211c7a, + 0xf4211c7a, 0xf0000000, + 0x80788478, 0xf4211e7a, 0xf0000000, 0x80788478, - 0xf4211e7a, 0xf0000000, - 0x80788478, 0xf4211cfa, + 0xf4211cfa, 0xf0000000, + 0x80788478, 0xf4211bba, 0xf0000000, 0x80788478, + 0xbf8cc07f, 0xb9eef814, 0xf4211bba, 0xf0000000, 0x80788478, 0xbf8cc07f, - 0xb9eef814, 0xf4211bba, - 0xf0000000, 0x80788478, - 0xbf8cc07f, 0xb9eef815, - 0xbef2036d, 0x876dff72, - 0x0000ffff, 0xbefc036f, - 0xbefe037a, 0xbeff037b, - 0x876f71ff, 0x000003ff, - 0xb9ef4803, 0xb9f9f816, - 0x876f71ff, 0xfffff800, - 0x906f8b6f, 0xb9efa2c3, - 0xb9f3f801, 0x876fff72, - 0xfc000000, 0x906f9a6f, - 0x8f6f906f, 0xbef30380, + 0xb9eef815, 0xbef2036d, + 0x876dff72, 0x0000ffff, + 0xbefc036f, 0xbefe037a, + 0xbeff037b, 0x876f71ff, + 0x000003ff, 0xb9ef4803, + 0xb9f9f816, 0x876f71ff, + 0xfffff800, 0x906f8b6f, + 0xb9efa2c3, 0xb9f3f801, + 0x876fff72, 0xfc000000, + 0x906f9a6f, 0x8f6f906f, + 0xbef30380, 0x88736f73, + 0x876fff72, 0x02000000, + 0x906f996f, 0x8f6f8f6f, 0x88736f73, 0x876fff72, - 0x02000000, 0x906f996f, - 0x8f6f8f6f, 0x88736f73, - 0x876fff72, 0x01000000, - 0x906f986f, 0x8f6f996f, - 0x88736f73, 0x876fff70, - 0x00800000, 0x906f976f, - 0xb9f3f807, 0x87fe7e7e, - 0x87ea6a6a, 0xb9f0f802, - 0xbf8a0000, 0xbe80226c, - 0xbf810000, 0xbf9f0000, + 0x01000000, 0x906f986f, + 0x8f6f996f, 0x88736f73, + 0x876fff70, 0x00800000, + 0x906f976f, 0xb9f3f807, + 0x87fe7e7e, 0x87ea6a6a, + 0xb9f0f802, 0xbf8a0000, + 0xbe80226c, 0xbf810000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0x00000000, }; static const uint32_t cwsr_trap_arcturus_hex[] = { 0xbf820001, 0xbf8202c4, diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index cdaa523ce6be..4433bda2ce25 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -758,6 +758,7 @@ L_RESTORE_V0: buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3 + s_waitcnt vmcnt(0) /* restore SGPRs */ //will be 2+8+16*6 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 1d3cd5c50d5f..1544007af34a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -49,7 +49,7 @@ static const char kfd_dev_name[] = "kfd"; static const struct file_operations kfd_fops = { .owner = THIS_MODULE, .unlocked_ioctl = kfd_ioctl, - .compat_ioctl = kfd_ioctl, + .compat_ioctl = compat_ptr_ioctl, .open = kfd_open, .mmap = kfd_mmap, }; @@ -282,7 +282,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, goto err_bind_process; } - pr_debug("Creating queue for PASID %d on gpu 0x%x\n", + pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n", p->pasid, dev->id); @@ -332,7 +332,7 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, int retval; struct kfd_ioctl_destroy_queue_args *args = data; - pr_debug("Destroying queue id %d for pasid %d\n", + pr_debug("Destroying queue id %d for pasid 0x%x\n", args->queue_id, p->pasid); @@ -378,7 +378,7 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, properties.queue_percent = args->queue_percentage; properties.priority = args->queue_priority; - pr_debug("Updating queue id %d for pasid %d\n", + pr_debug("Updating queue id %d for pasid 0x%x\n", args->queue_id, p->pasid); mutex_lock(&p->mutex); @@ -855,7 +855,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, struct kfd_process_device_apertures *pAperture; struct kfd_process_device *pdd; - dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); + dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid); args->num_of_nodes = 0; @@ -913,7 +913,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, uint32_t nodes = 0; int ret; - dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); + dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid); if (args->num_of_nodes == 0) { /* Return number of nodes, so that user space can alloacate @@ -1128,7 +1128,7 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep, mutex_unlock(&p->mutex); if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && - pdd->qpd.vmid != 0) + pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va) dev->kfd2kgd->set_scratch_backing_va( dev->kgd, args->va_addr, pdd->qpd.vmid); @@ -1801,7 +1801,7 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) } else goto err_i1; - dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg); + dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg); process = kfd_get_process(current); if (IS_ERR(process)) { @@ -1856,7 +1856,8 @@ err_i1: kfree(kdata); if (retcode) - dev_dbg(kfd_device, "ret = %d\n", retcode); + dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n", + nr, arg, retcode); return retcode; } @@ -1877,7 +1878,7 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process, vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - pr_debug("Process %d mapping mmio page\n" + pr_debug("pasid 0x%x mapping mmio page\n" " target user address == 0x%08llX\n" " physical address == 0x%08llX\n" " vm_flags == 0x%04lX\n" diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 66387caf966e..de9f68d5c312 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -138,6 +138,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { /* TODO - check & update Vega10 cache details */ #define vega10_cache_info carrizo_cache_info #define raven_cache_info carrizo_cache_info +#define renoir_cache_info carrizo_cache_info /* TODO - check & update Navi10 cache details */ #define navi10_cache_info carrizo_cache_info @@ -670,7 +671,13 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, pcache_info = raven_cache_info; num_of_cache_types = ARRAY_SIZE(raven_cache_info); break; + case CHIP_RENOIR: + pcache_info = renoir_cache_info; + num_of_cache_types = ARRAY_SIZE(renoir_cache_info); + break; case CHIP_NAVI10: + case CHIP_NAVI12: + case CHIP_NAVI14: pcache_info = navi10_cache_info; num_of_cache_types = ARRAY_SIZE(navi10_cache_info); break; @@ -703,7 +710,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, pcache_info, cu_info, mem_available, - cu_info->cu_bitmap[i][j], + cu_info->cu_bitmap[i % 4][j + i / 4], ct, cu_processor_id, k); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index a3441b0e385b..d59f2cd056c6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -761,6 +761,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) { int status = 0; unsigned int vmid; + uint16_t queried_pasid; union SQ_CMD_BITS reg_sq_cmd; union GRBM_GFX_INDEX_BITS reg_gfx_index; struct kfd_process_device *pdd; @@ -782,19 +783,18 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) */ for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { - if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid - (dev->kgd, vmid)) { - if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid - (dev->kgd, vmid) == p->pasid) { - pr_debug("Killing wave fronts of vmid %d and pasid %d\n", - vmid, p->pasid); - break; - } + status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info + (dev->kgd, vmid, &queried_pasid); + + if (status && queried_pasid == p->pasid) { + pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", + vmid, p->pasid); + break; } } if (vmid > last_vmid_to_scan) { - pr_err("Didn't find vmid for pasid %d\n", p->pasid); + pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); return -EFAULT; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c index 9d4af961c5d1..9bfa50633654 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c @@ -96,7 +96,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p) { if (pmgr->pasid != 0) { - pr_debug("H/W debugger is already active using pasid %d\n", + pr_debug("H/W debugger is already active using pasid 0x%x\n", pmgr->pasid); return -EBUSY; } @@ -117,7 +117,7 @@ long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p) { /* Is the requests coming from the already registered process? */ if (pmgr->pasid != p->pasid) { - pr_debug("H/W debugger is not registered by calling pasid %d\n", + pr_debug("H/W debugger is not registered by calling pasid 0x%x\n", p->pasid); return -EINVAL; } @@ -134,7 +134,7 @@ long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, { /* Is the requests coming from the already registered process? */ if (pmgr->pasid != wac_info->process->pasid) { - pr_debug("H/W debugger support was not registered for requester pasid %d\n", + pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n", wac_info->process->pasid); return -EINVAL; } @@ -147,7 +147,7 @@ long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, { /* Is the requests coming from the already registered process? */ if (pmgr->pasid != adw_info->process->pasid) { - pr_debug("H/W debugger support was not registered for requester pasid %d\n", + pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n", adw_info->process->pasid); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 0dc1084b5e82..4fa8834ce7cb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -39,6 +39,41 @@ */ static atomic_t kfd_locked = ATOMIC_INIT(0); +#ifdef CONFIG_DRM_AMDGPU_CIK +extern const struct kfd2kgd_calls gfx_v7_kfd2kgd; +#endif +extern const struct kfd2kgd_calls gfx_v8_kfd2kgd; +extern const struct kfd2kgd_calls gfx_v9_kfd2kgd; +extern const struct kfd2kgd_calls arcturus_kfd2kgd; +extern const struct kfd2kgd_calls gfx_v10_kfd2kgd; + +static const struct kfd2kgd_calls *kfd2kgd_funcs[] = { +#ifdef KFD_SUPPORT_IOMMU_V2 +#ifdef CONFIG_DRM_AMDGPU_CIK + [CHIP_KAVERI] = &gfx_v7_kfd2kgd, +#endif + [CHIP_CARRIZO] = &gfx_v8_kfd2kgd, + [CHIP_RAVEN] = &gfx_v9_kfd2kgd, +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + [CHIP_HAWAII] = &gfx_v7_kfd2kgd, +#endif + [CHIP_TONGA] = &gfx_v8_kfd2kgd, + [CHIP_FIJI] = &gfx_v8_kfd2kgd, + [CHIP_POLARIS10] = &gfx_v8_kfd2kgd, + [CHIP_POLARIS11] = &gfx_v8_kfd2kgd, + [CHIP_POLARIS12] = &gfx_v8_kfd2kgd, + [CHIP_VEGAM] = &gfx_v8_kfd2kgd, + [CHIP_VEGA10] = &gfx_v9_kfd2kgd, + [CHIP_VEGA12] = &gfx_v9_kfd2kgd, + [CHIP_VEGA20] = &gfx_v9_kfd2kgd, + [CHIP_RENOIR] = &gfx_v9_kfd2kgd, + [CHIP_ARCTURUS] = &arcturus_kfd2kgd, + [CHIP_NAVI10] = &gfx_v10_kfd2kgd, + [CHIP_NAVI12] = &gfx_v10_kfd2kgd, + [CHIP_NAVI14] = &gfx_v10_kfd2kgd, +}; + #ifdef KFD_SUPPORT_IOMMU_V2 static const struct kfd_device_info kaveri_device_info = { .asic_family = CHIP_KAVERI, @@ -351,6 +386,24 @@ static const struct kfd_device_info arcturus_device_info = { .num_sdma_queues_per_engine = 8, }; +static const struct kfd_device_info renoir_device_info = { + .asic_family = CHIP_RENOIR, + .asic_name = "renoir", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 1, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, +}; + static const struct kfd_device_info navi10_device_info = { .asic_family = CHIP_NAVI10, .asic_name = "navi10", @@ -369,133 +422,64 @@ static const struct kfd_device_info navi10_device_info = { .num_sdma_queues_per_engine = 8, }; -struct kfd_deviceid { - unsigned short did; - const struct kfd_device_info *device_info; +static const struct kfd_device_info navi12_device_info = { + .asic_family = CHIP_NAVI12, + .asic_name = "navi12", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .needs_iommu_device = false, + .supports_cwsr = true, + .needs_pci_atomics = false, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 8, +}; + +static const struct kfd_device_info navi14_device_info = { + .asic_family = CHIP_NAVI14, + .asic_name = "navi14", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .needs_iommu_device = false, + .supports_cwsr = true, + .needs_pci_atomics = false, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 8, }; -static const struct kfd_deviceid supported_devices[] = { +/* For each entry, [0] is regular and [1] is virtualisation device. */ +static const struct kfd_device_info *kfd_supported_devices[][2] = { #ifdef KFD_SUPPORT_IOMMU_V2 - { 0x1304, &kaveri_device_info }, /* Kaveri */ - { 0x1305, &kaveri_device_info }, /* Kaveri */ - { 0x1306, &kaveri_device_info }, /* Kaveri */ - { 0x1307, &kaveri_device_info }, /* Kaveri */ - { 0x1309, &kaveri_device_info }, /* Kaveri */ - { 0x130A, &kaveri_device_info }, /* Kaveri */ - { 0x130B, &kaveri_device_info }, /* Kaveri */ - { 0x130C, &kaveri_device_info }, /* Kaveri */ - { 0x130D, &kaveri_device_info }, /* Kaveri */ - { 0x130E, &kaveri_device_info }, /* Kaveri */ - { 0x130F, &kaveri_device_info }, /* Kaveri */ - { 0x1310, &kaveri_device_info }, /* Kaveri */ - { 0x1311, &kaveri_device_info }, /* Kaveri */ - { 0x1312, &kaveri_device_info }, /* Kaveri */ - { 0x1313, &kaveri_device_info }, /* Kaveri */ - { 0x1315, &kaveri_device_info }, /* Kaveri */ - { 0x1316, &kaveri_device_info }, /* Kaveri */ - { 0x1317, &kaveri_device_info }, /* Kaveri */ - { 0x1318, &kaveri_device_info }, /* Kaveri */ - { 0x131B, &kaveri_device_info }, /* Kaveri */ - { 0x131C, &kaveri_device_info }, /* Kaveri */ - { 0x131D, &kaveri_device_info }, /* Kaveri */ - { 0x9870, &carrizo_device_info }, /* Carrizo */ - { 0x9874, &carrizo_device_info }, /* Carrizo */ - { 0x9875, &carrizo_device_info }, /* Carrizo */ - { 0x9876, &carrizo_device_info }, /* Carrizo */ - { 0x9877, &carrizo_device_info }, /* Carrizo */ - { 0x15DD, &raven_device_info }, /* Raven */ - { 0x15D8, &raven_device_info }, /* Raven */ + [CHIP_KAVERI] = {&kaveri_device_info, NULL}, + [CHIP_CARRIZO] = {&carrizo_device_info, NULL}, + [CHIP_RAVEN] = {&raven_device_info, NULL}, #endif - { 0x67A0, &hawaii_device_info }, /* Hawaii */ - { 0x67A1, &hawaii_device_info }, /* Hawaii */ - { 0x67A2, &hawaii_device_info }, /* Hawaii */ - { 0x67A8, &hawaii_device_info }, /* Hawaii */ - { 0x67A9, &hawaii_device_info }, /* Hawaii */ - { 0x67AA, &hawaii_device_info }, /* Hawaii */ - { 0x67B0, &hawaii_device_info }, /* Hawaii */ - { 0x67B1, &hawaii_device_info }, /* Hawaii */ - { 0x67B8, &hawaii_device_info }, /* Hawaii */ - { 0x67B9, &hawaii_device_info }, /* Hawaii */ - { 0x67BA, &hawaii_device_info }, /* Hawaii */ - { 0x67BE, &hawaii_device_info }, /* Hawaii */ - { 0x6920, &tonga_device_info }, /* Tonga */ - { 0x6921, &tonga_device_info }, /* Tonga */ - { 0x6928, &tonga_device_info }, /* Tonga */ - { 0x6929, &tonga_device_info }, /* Tonga */ - { 0x692B, &tonga_device_info }, /* Tonga */ - { 0x6938, &tonga_device_info }, /* Tonga */ - { 0x6939, &tonga_device_info }, /* Tonga */ - { 0x7300, &fiji_device_info }, /* Fiji */ - { 0x730F, &fiji_vf_device_info }, /* Fiji vf*/ - { 0x67C0, &polaris10_device_info }, /* Polaris10 */ - { 0x67C1, &polaris10_device_info }, /* Polaris10 */ - { 0x67C2, &polaris10_device_info }, /* Polaris10 */ - { 0x67C4, &polaris10_device_info }, /* Polaris10 */ - { 0x67C7, &polaris10_device_info }, /* Polaris10 */ - { 0x67C8, &polaris10_device_info }, /* Polaris10 */ - { 0x67C9, &polaris10_device_info }, /* Polaris10 */ - { 0x67CA, &polaris10_device_info }, /* Polaris10 */ - { 0x67CC, &polaris10_device_info }, /* Polaris10 */ - { 0x67CF, &polaris10_device_info }, /* Polaris10 */ - { 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/ - { 0x67DF, &polaris10_device_info }, /* Polaris10 */ - { 0x6FDF, &polaris10_device_info }, /* Polaris10 */ - { 0x67E0, &polaris11_device_info }, /* Polaris11 */ - { 0x67E1, &polaris11_device_info }, /* Polaris11 */ - { 0x67E3, &polaris11_device_info }, /* Polaris11 */ - { 0x67E7, &polaris11_device_info }, /* Polaris11 */ - { 0x67E8, &polaris11_device_info }, /* Polaris11 */ - { 0x67E9, &polaris11_device_info }, /* Polaris11 */ - { 0x67EB, &polaris11_device_info }, /* Polaris11 */ - { 0x67EF, &polaris11_device_info }, /* Polaris11 */ - { 0x67FF, &polaris11_device_info }, /* Polaris11 */ - { 0x6980, &polaris12_device_info }, /* Polaris12 */ - { 0x6981, &polaris12_device_info }, /* Polaris12 */ - { 0x6985, &polaris12_device_info }, /* Polaris12 */ - { 0x6986, &polaris12_device_info }, /* Polaris12 */ - { 0x6987, &polaris12_device_info }, /* Polaris12 */ - { 0x6995, &polaris12_device_info }, /* Polaris12 */ - { 0x6997, &polaris12_device_info }, /* Polaris12 */ - { 0x699F, &polaris12_device_info }, /* Polaris12 */ - { 0x694C, &vegam_device_info }, /* VegaM */ - { 0x694E, &vegam_device_info }, /* VegaM */ - { 0x694F, &vegam_device_info }, /* VegaM */ - { 0x6860, &vega10_device_info }, /* Vega10 */ - { 0x6861, &vega10_device_info }, /* Vega10 */ - { 0x6862, &vega10_device_info }, /* Vega10 */ - { 0x6863, &vega10_device_info }, /* Vega10 */ - { 0x6864, &vega10_device_info }, /* Vega10 */ - { 0x6867, &vega10_device_info }, /* Vega10 */ - { 0x6868, &vega10_device_info }, /* Vega10 */ - { 0x6869, &vega10_device_info }, /* Vega10 */ - { 0x686A, &vega10_device_info }, /* Vega10 */ - { 0x686B, &vega10_device_info }, /* Vega10 */ - { 0x686C, &vega10_vf_device_info }, /* Vega10 vf*/ - { 0x686D, &vega10_device_info }, /* Vega10 */ - { 0x686E, &vega10_device_info }, /* Vega10 */ - { 0x686F, &vega10_device_info }, /* Vega10 */ - { 0x687F, &vega10_device_info }, /* Vega10 */ - { 0x69A0, &vega12_device_info }, /* Vega12 */ - { 0x69A1, &vega12_device_info }, /* Vega12 */ - { 0x69A2, &vega12_device_info }, /* Vega12 */ - { 0x69A3, &vega12_device_info }, /* Vega12 */ - { 0x69AF, &vega12_device_info }, /* Vega12 */ - { 0x66a0, &vega20_device_info }, /* Vega20 */ - { 0x66a1, &vega20_device_info }, /* Vega20 */ - { 0x66a2, &vega20_device_info }, /* Vega20 */ - { 0x66a3, &vega20_device_info }, /* Vega20 */ - { 0x66a4, &vega20_device_info }, /* Vega20 */ - { 0x66a7, &vega20_device_info }, /* Vega20 */ - { 0x66af, &vega20_device_info }, /* Vega20 */ - { 0x738C, &arcturus_device_info }, /* Arcturus */ - { 0x7388, &arcturus_device_info }, /* Arcturus */ - { 0x738E, &arcturus_device_info }, /* Arcturus */ - { 0x7390, &arcturus_device_info }, /* Arcturus vf */ - { 0x7310, &navi10_device_info }, /* Navi10 */ - { 0x7312, &navi10_device_info }, /* Navi10 */ - { 0x7318, &navi10_device_info }, /* Navi10 */ - { 0x731a, &navi10_device_info }, /* Navi10 */ - { 0x731f, &navi10_device_info }, /* Navi10 */ + [CHIP_HAWAII] = {&hawaii_device_info, NULL}, + [CHIP_TONGA] = {&tonga_device_info, NULL}, + [CHIP_FIJI] = {&fiji_device_info, &fiji_vf_device_info}, + [CHIP_POLARIS10] = {&polaris10_device_info, &polaris10_vf_device_info}, + [CHIP_POLARIS11] = {&polaris11_device_info, NULL}, + [CHIP_POLARIS12] = {&polaris12_device_info, NULL}, + [CHIP_VEGAM] = {&vegam_device_info, NULL}, + [CHIP_VEGA10] = {&vega10_device_info, &vega10_vf_device_info}, + [CHIP_VEGA12] = {&vega12_device_info, NULL}, + [CHIP_VEGA20] = {&vega20_device_info, NULL}, + [CHIP_RENOIR] = {&renoir_device_info, NULL}, + [CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info}, + [CHIP_NAVI10] = {&navi10_device_info, NULL}, + [CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info}, + [CHIP_NAVI14] = {&navi14_device_info, NULL}, }; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, @@ -504,32 +488,25 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd); static int kfd_resume(struct kfd_dev *kfd); -static const struct kfd_device_info *lookup_device_info(unsigned short did) +struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, + struct pci_dev *pdev, unsigned int asic_type, bool vf) { - size_t i; + struct kfd_dev *kfd; + const struct kfd_device_info *device_info; + const struct kfd2kgd_calls *f2g; - for (i = 0; i < ARRAY_SIZE(supported_devices); i++) { - if (supported_devices[i].did == did) { - WARN_ON(!supported_devices[i].device_info); - return supported_devices[i].device_info; - } + if (asic_type >= sizeof(kfd_supported_devices) / (sizeof(void *) * 2) + || asic_type >= sizeof(kfd2kgd_funcs) / sizeof(void *)) { + dev_err(kfd_device, "asic_type %d out of range\n", asic_type); + return NULL; /* asic_type out of range */ } - dev_warn(kfd_device, "DID %04x is missing in supported_devices\n", - did); + device_info = kfd_supported_devices[asic_type][vf]; + f2g = kfd2kgd_funcs[asic_type]; - return NULL; -} - -struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, - struct pci_dev *pdev, const struct kfd2kgd_calls *f2g) -{ - struct kfd_dev *kfd; - const struct kfd_device_info *device_info = - lookup_device_info(pdev->device); - - if (!device_info) { - dev_err(kfd_device, "kgd2kfd_probe failed\n"); + if (!device_info || !f2g) { + dev_err(kfd_device, "%s %s not supported in kfd\n", + amdgpu_asic_name[asic_type], vf ? "VF" : ""); return NULL; } @@ -593,10 +570,12 @@ static void kfd_cwsr_init(struct kfd_dev *kfd) } bool kgd2kfd_device_init(struct kfd_dev *kfd, + struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources) { unsigned int size; + kfd->ddev = ddev; kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, KGD_ENGINE_MEC1); kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, @@ -751,9 +730,6 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd) return 0; kgd2kfd_suspend(kfd); - /* hold dqm->lock to prevent further execution*/ - dqm_lock(kfd->dqm); - kfd_signal_reset_event(kfd); return 0; } @@ -771,8 +747,6 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd) if (!kfd->init_complete) return 0; - dqm_unlock(kfd->dqm); - ret = kfd_resume(kfd); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d985e31fcc1e..984c2f2b24b6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -195,20 +195,30 @@ static int allocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { - int bit, allocated_vmid; + int allocated_vmid = -1, i; - if (dqm->vmid_bitmap == 0) - return -ENOMEM; + for (i = dqm->dev->vm_info.first_vmid_kfd; + i <= dqm->dev->vm_info.last_vmid_kfd; i++) { + if (!dqm->vmid_pasid[i]) { + allocated_vmid = i; + break; + } + } + + if (allocated_vmid < 0) { + pr_err("no more vmid to allocate\n"); + return -ENOSPC; + } + + pr_debug("vmid allocated: %d\n", allocated_vmid); + + dqm->vmid_pasid[allocated_vmid] = q->process->pasid; - bit = ffs(dqm->vmid_bitmap) - 1; - dqm->vmid_bitmap &= ~(1 << bit); + set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid); - allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd; - pr_debug("vmid allocation %d\n", allocated_vmid); qpd->vmid = allocated_vmid; q->properties.vmid = allocated_vmid; - set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); program_sh_mem_settings(dqm, qpd); /* qpd->page_table_base is set earlier when register_process() @@ -220,8 +230,9 @@ static int allocate_vmid(struct device_queue_manager *dqm, /* invalidate the VM context after pasid and vmid mapping is set up */ kfd_flush_tlb(qpd_to_pdd(qpd)); - dqm->dev->kfd2kgd->set_scratch_backing_va( - dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); + if (dqm->dev->kfd2kgd->set_scratch_backing_va) + dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd, + qpd->sh_hidden_private_base, qpd->vmid); return 0; } @@ -248,8 +259,6 @@ static void deallocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { - int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; - /* On GFX v7, CP doesn't flush TC at dequeue */ if (q->device->device_info->asic_family == CHIP_HAWAII) if (flush_texture_cache_nocpsch(q->device, qpd)) @@ -259,8 +268,8 @@ static void deallocate_vmid(struct device_queue_manager *dqm, /* Release the vmid mapping */ set_pasid_vmid_mapping(dqm, 0, qpd->vmid); + dqm->vmid_pasid[qpd->vmid] = 0; - dqm->vmid_bitmap |= (1 << bit); qpd->vmid = 0; q->properties.vmid = 0; } @@ -331,6 +340,10 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (q->properties.is_active) { + if (!dqm->sched_running) { + WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); + goto add_queue_to_list; + } if (WARN(q->process->mm != current->mm, "should only run in user thread")) @@ -342,6 +355,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, goto out_free_mqd; } +add_queue_to_list: list_add(&q->list, &qpd->queues_list); qpd->queue_count++; if (q->properties.is_active) @@ -449,6 +463,11 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, deallocate_doorbell(qpd, q); + if (!dqm->sched_running) { + WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); + return 0; + } + retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, KFD_UNMAP_LATENCY_MS, @@ -524,6 +543,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || q->properties.type == KFD_QUEUE_TYPE_SDMA || q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { + + if (!dqm->sched_running) { + WARN_ONCE(1, "Update non-HWS queue while stopped\n"); + goto out_unlock; + } + retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); @@ -579,7 +604,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, goto out; pdd = qpd_to_pdd(qpd); - pr_info_ratelimited("Evicting PASID %u queues\n", + pr_info_ratelimited("Evicting PASID 0x%x queues\n", pdd->process->pasid); /* Mark all queues as evicted. Deactivate all active queues on @@ -593,6 +618,11 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; q->properties.is_active = false; + dqm->queue_count--; + + if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) + continue; + retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); @@ -601,7 +631,6 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, * maintain a consistent eviction state */ ret = retval; - dqm->queue_count--; } out: @@ -621,7 +650,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, goto out; pdd = qpd_to_pdd(qpd); - pr_info_ratelimited("Evicting PASID %u queues\n", + pr_info_ratelimited("Evicting PASID 0x%x queues\n", pdd->process->pasid); /* Mark all queues as evicted. Deactivate all active queues on @@ -667,7 +696,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, goto out; } - pr_info_ratelimited("Restoring PASID %u queues\n", + pr_info_ratelimited("Restoring PASID 0x%x queues\n", pdd->process->pasid); /* Update PD Base in QPD */ @@ -702,6 +731,11 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; q->properties.is_active = true; + dqm->queue_count++; + + if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) + continue; + retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, &q->properties, mm); if (retval && !ret) @@ -709,7 +743,6 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, * maintain a consistent eviction state */ ret = retval; - dqm->queue_count++; } qpd->evicted = 0; out: @@ -739,7 +772,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, goto out; } - pr_info_ratelimited("Restoring PASID %u queues\n", + pr_info_ratelimited("Restoring PASID 0x%x queues\n", pdd->process->pasid); /* Update PD Base in QPD */ @@ -879,7 +912,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) dqm->allocated_queues[pipe] |= 1 << queue; } - dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; + memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); + dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); @@ -902,12 +936,20 @@ static void uninitialize(struct device_queue_manager *dqm) static int start_nocpsch(struct device_queue_manager *dqm) { init_interrupts(dqm); - return pm_init(&dqm->packets, dqm); + + if (dqm->dev->device_info->asic_family == CHIP_HAWAII) + return pm_init(&dqm->packets, dqm); + dqm->sched_running = true; + + return 0; } static int stop_nocpsch(struct device_queue_manager *dqm) { - pm_uninit(&dqm->packets); + if (dqm->dev->device_info->asic_family == CHIP_HAWAII) + pm_uninit(&dqm->packets); + dqm->sched_running = false; + return 0; } @@ -1058,6 +1100,7 @@ static int start_cpsch(struct device_queue_manager *dqm) dqm_lock(dqm); /* clear hang status when driver try to start the hw scheduler */ dqm->is_hws_hang = false; + dqm->sched_running = true; execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); dqm_unlock(dqm); @@ -1073,6 +1116,7 @@ static int stop_cpsch(struct device_queue_manager *dqm) { dqm_lock(dqm); unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); + dqm->sched_running = false; dqm_unlock(dqm); kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); @@ -1259,9 +1303,10 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) { int retval; + if (!dqm->sched_running) + return 0; if (dqm->queue_count <= 0 || dqm->processes_count <= 0) return 0; - if (dqm->active_runlist) return 0; @@ -1283,6 +1328,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, { int retval = 0; + if (!dqm->sched_running) + return 0; if (dqm->is_hws_hang) return -EIO; if (!dqm->active_runlist) @@ -1676,7 +1723,8 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) struct kfd_dev *dev = dqm->dev; struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * - dev->device_info->num_sdma_engines * + (dev->device_info->num_sdma_engines + + dev->device_info->num_xgmi_sdma_engines) * dev->device_info->num_sdma_queues_per_engine + dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; @@ -1786,10 +1834,13 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_RENOIR: case CHIP_ARCTURUS: device_queue_manager_init_v9(&dqm->asic_ops); break; case CHIP_NAVI10: + case CHIP_NAVI12: + case CHIP_NAVI14: device_queue_manager_init_v10_navi10(&dqm->asic_ops); break; default: @@ -1883,6 +1934,12 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) int pipe, queue; int r = 0; + if (!dqm->sched_running) { + seq_printf(m, " Device is stopped\n"); + + return 0; + } + r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd, KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs); @@ -1917,7 +1974,8 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) } } - for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) { + for (pipe = 0; pipe < get_num_sdma_engines(dqm) + + get_num_xgmi_sdma_engines(dqm); pipe++) { for (queue = 0; queue < dqm->dev->device_info->num_sdma_queues_per_engine; queue++) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 90db2c9275f6..a8c37e6da027 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -32,6 +32,8 @@ #include "kfd_mqd_manager.h" +#define VMID_NUM 16 + struct device_process_node { struct qcm_process_device *qpd; struct list_head list; @@ -185,7 +187,8 @@ struct device_queue_manager { unsigned int *allocated_queues; uint64_t sdma_bitmap; uint64_t xgmi_sdma_bitmap; - unsigned int vmid_bitmap; + /* the pasid mapping for each kfd vmid */ + uint16_t vmid_pasid[VMID_NUM]; uint64_t pipelines_addr; struct kfd_mem_obj *pipeline_mem; uint64_t fence_gpu_addr; @@ -198,6 +201,7 @@ struct device_queue_manager { bool is_hws_hang; struct work_struct hw_exception_work; struct kfd_mem_obj hiq_sdma_mqd; + bool sched_running; }; void device_queue_manager_init_cik( diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index d674d4b3340f..908081c85de1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -852,8 +852,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, if (type == KFD_EVENT_TYPE_MEMORY) { dev_warn(kfd_device, - "Sending SIGSEGV to HSA Process with PID %d ", - p->lead_thread->pid); + "Sending SIGSEGV to process %d (pasid 0x%x)", + p->lead_thread->pid, p->pasid); send_sig(SIGSEGV, p->lead_thread, 0); } @@ -861,13 +861,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, if (send_signal) { if (send_sigterm) { dev_warn(kfd_device, - "Sending SIGTERM to HSA Process with PID %d ", - p->lead_thread->pid); + "Sending SIGTERM to process %d (pasid 0x%x)", + p->lead_thread->pid, p->pasid); send_sig(SIGTERM, p->lead_thread, 0); } else { dev_err(kfd_device, - "HSA Process (PID %d) got unhandled exception", - p->lead_thread->pid); + "Process %d (pasid 0x%x) got unhandled exception", + p->lead_thread->pid, p->pasid); } } } @@ -936,7 +936,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, /* Workaround on Raven to not kill the process when memory is freed * before IOMMU is able to finish processing all the excessive PPRs */ - if (dev->device_info->asic_family != CHIP_RAVEN) { + if (dev->device_info->asic_family != CHIP_RAVEN && + dev->device_info->asic_family != CHIP_RENOIR) { mutex_lock(&p->event_mutex); /* Lookup events by type and signal them */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 9dc4bff8085e..bb77b8890e77 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -369,8 +369,13 @@ int kfd_init_apertures(struct kfd_process *process) /*Iterating over all devices*/ while (kfd_topology_enum_kfd_devices(id, &dev) == 0) { - if (!dev) { - id++; /* Skip non GPU devices */ + if (!dev || kfd_devcgroup_check_permission(dev)) { + /* Skip non GPU devices and devices to which the + * current process have no access to. Access can be + * limited by placing the process in a specific + * cgroup hierarchy + */ + id++; continue; } @@ -405,8 +410,11 @@ int kfd_init_apertures(struct kfd_process *process) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_RENOIR: case CHIP_ARCTURUS: case CHIP_NAVI10: + case CHIP_NAVI12: + case CHIP_NAVI14: kfd_init_apertures_v9(pdd, id); break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 3ef67d2e0d9f..e05d75ecda21 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -54,8 +54,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev, memcpy(patched_ihre, ih_ring_entry, dev->device_info->ih_ring_entry_size); - pasid = dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid( - dev->kgd, vmid); + pasid = dev->dqm->vmid_pasid[vmid]; /* Patch the pasid field */ patched_ihre[3] = cpu_to_le32((le32_to_cpu(patched_ihre[3]) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index c56ac47cd318..bc47f6a44456 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -62,6 +62,11 @@ int kfd_interrupt_init(struct kfd_dev *kfd) } kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1); + if (unlikely(!kfd->ih_wq)) { + kfifo_free(&kfd->ih_fifo); + dev_err(kfd_chardev(), "Failed to allocate KFD IH workqueue\n"); + return -ENOMEM; + } spin_lock_init(&kfd->interrupt_lock); INIT_WORK(&kfd->interrupt_work, interrupt_wq); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c index 5f35df23fb18..193e2835bd4d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c @@ -160,7 +160,7 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) if (!p) return; - pr_debug("Unbinding process %d from IOMMU\n", pasid); + pr_debug("Unbinding process 0x%x from IOMMU\n", pasid); mutex_lock(kfd_get_dbgmgr_mutex()); @@ -194,7 +194,7 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, struct kfd_dev *dev; dev_warn_ratelimited(kfd_device, - "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", + "Invalid PPR device %x:%x.%x pasid 0x%x address 0x%lX flags 0x%X", PCI_BUS_NUM(pdev->devfn), PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), @@ -235,7 +235,7 @@ static int kfd_bind_processes_to_device(struct kfd_dev *kfd) err = amd_iommu_bind_pasid(kfd->pdev, p->pasid, p->lead_thread); if (err < 0) { - pr_err("Unexpected pasid %d binding failure\n", + pr_err("Unexpected pasid 0x%x binding failure\n", p->pasid); mutex_unlock(&p->mutex); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 8b4564f71a7a..11d244891393 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -330,10 +330,13 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_RENOIR: case CHIP_ARCTURUS: kernel_queue_init_v9(&kq->ops_asic_specific); break; case CHIP_NAVI10: + case CHIP_NAVI12: + case CHIP_NAVI14: kernel_queue_init_v10(&kq->ops_asic_specific); break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 986ff52d5750..f4b7f7e6c40e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -82,7 +82,7 @@ static void kfd_exit(void) kfd_chardev_exit(); } -int kgd2kfd_init() +int kgd2kfd_init(void) { return kfd_init(); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 9cd3eb2d90bd..4a236b2c2354 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -69,35 +69,13 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, struct queue_properties *q) { - int retval; - struct kfd_mem_obj *mqd_mem_obj = NULL; + struct kfd_mem_obj *mqd_mem_obj; - /* From V9, for CWSR, the control stack is located on the next page - * boundary after the mqd, we will use the gtt allocation function - * instead of sub-allocation function. - */ - if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { - mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); - if (!mqd_mem_obj) - return NULL; - retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd, - ALIGN(q->ctl_stack_size, PAGE_SIZE) + - ALIGN(sizeof(struct v10_compute_mqd), PAGE_SIZE), - &(mqd_mem_obj->gtt_mem), - &(mqd_mem_obj->gpu_addr), - (void *)&(mqd_mem_obj->cpu_ptr), true); - } else { - retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd), - &mqd_mem_obj); - } - - if (retval) { - kfree(mqd_mem_obj); + if (kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd), + &mqd_mem_obj)) return NULL; - } return mqd_mem_obj; - } static void init_mqd(struct mqd_manager *mm, void **mqd, @@ -250,14 +228,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, static void free_mqd(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { - struct kfd_dev *kfd = mm->dev; - - if (mqd_mem_obj->gtt_mem) { - amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem); - kfree(mqd_mem_obj); - } else { - kfd_gtt_sa_free(mm->dev, mqd_mem_obj); - } + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); } static bool is_occupied(struct mqd_manager *mm, void *mqd, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 2c8624c5b42c..83ef4b3dd2fb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -239,10 +239,13 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_RENOIR: case CHIP_ARCTURUS: pm->pmf = &kfd_v9_pm_funcs; break; case CHIP_NAVI10: + case CHIP_NAVI12: + case CHIP_NAVI14: pm->pmf = &kfd_v10_pm_funcs; break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index c89326125d71..060a9e8b301e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -36,6 +36,10 @@ #include <linux/seq_file.h> #include <linux/kref.h> #include <linux/sysfs.h> +#include <linux/device_cgroup.h> +#include <drm/drm_file.h> +#include <drm/drm_drv.h> +#include <drm/drm_device.h> #include <kgd_kfd_interface.h> #include "amd_shared.h" @@ -179,10 +183,6 @@ enum cache_policy { cache_policy_noncoherent }; -#define KFD_IS_VI(chip) ((chip) >= CHIP_CARRIZO && (chip) <= CHIP_POLARIS11) -#define KFD_IS_DGPU(chip) (((chip) >= CHIP_TONGA && \ - (chip) <= CHIP_NAVI10) || \ - (chip) == CHIP_HAWAII) #define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10) struct kfd_event_interrupt_class { @@ -230,6 +230,7 @@ struct kfd_dev { const struct kfd_device_info *device_info; struct pci_dev *pdev; + struct drm_device *ddev; unsigned int id; /* topology stub index */ @@ -687,7 +688,7 @@ struct kfd_process { /* We want to receive a notification when the mm_struct is destroyed */ struct mmu_notifier mmu_notifier; - unsigned int pasid; + uint16_t pasid; unsigned int doorbell_index; /* @@ -1040,6 +1041,21 @@ bool kfd_is_locked(void); void kfd_inc_compute_active(struct kfd_dev *dev); void kfd_dec_compute_active(struct kfd_dev *dev); +/* Cgroup Support */ +/* Check with device cgroup if @kfd device is accessible */ +static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd) +{ +#if defined(CONFIG_CGROUP_DEVICE) + struct drm_device *ddev = kfd->ddev; + + return devcgroup_check_permission(DEVCG_DEV_CHAR, ddev->driver->major, + ddev->render->index, + DEVCG_ACC_WRITE | DEVCG_ACC_READ); +#else + return 0; +#endif +} + /* Debugfs */ #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 40e3fc0c6942..10f9af5784f2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -416,7 +416,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) list_for_each_entry_safe(pdd, temp, &p->per_device_data, per_device_list) { - pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n", + pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n", pdd->dev->id, p->pasid); if (pdd->drm_file) { @@ -687,6 +687,8 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd, struct kfd_dev *dev) { unsigned int i; + int range_start = dev->shared_resources.non_cp_doorbells_start; + int range_end = dev->shared_resources.non_cp_doorbells_end; if (!KFD_IS_SOC15(dev->device_info->asic_family)) return 0; @@ -698,14 +700,16 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd, return -ENOMEM; /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */ + pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end); + pr_debug("reserved doorbell 0x%03x - 0x%03x\n", + range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, + range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET); + for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) { - if (i >= dev->shared_resources.non_cp_doorbells_start - && i <= dev->shared_resources.non_cp_doorbells_end) { + if (i >= range_start && i <= range_end) { set_bit(i, qpd->doorbell_bitmap); set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, qpd->doorbell_bitmap); - pr_debug("reserved doorbell 0x%03x and 0x%03x\n", i, - i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET); } } @@ -1020,7 +1024,7 @@ static void evict_process_worker(struct work_struct *work) */ flush_delayed_work(&p->restore_work); - pr_debug("Started evicting pasid %d\n", p->pasid); + pr_debug("Started evicting pasid 0x%x\n", p->pasid); ret = kfd_process_evict_queues(p); if (!ret) { dma_fence_signal(p->ef); @@ -1029,9 +1033,9 @@ static void evict_process_worker(struct work_struct *work) queue_delayed_work(kfd_restore_wq, &p->restore_work, msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); - pr_debug("Finished evicting pasid %d\n", p->pasid); + pr_debug("Finished evicting pasid 0x%x\n", p->pasid); } else - pr_err("Failed to evict queues of pasid %d\n", p->pasid); + pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid); } static void restore_process_worker(struct work_struct *work) @@ -1046,7 +1050,7 @@ static void restore_process_worker(struct work_struct *work) * lifetime of this thread, kfd_process p will be valid */ p = container_of(dwork, struct kfd_process, restore_work); - pr_debug("Started restoring pasid %d\n", p->pasid); + pr_debug("Started restoring pasid 0x%x\n", p->pasid); /* Setting last_restore_timestamp before successful restoration. * Otherwise this would have to be set by KGD (restore_process_bos) @@ -1062,7 +1066,7 @@ static void restore_process_worker(struct work_struct *work) ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info, &p->ef); if (ret) { - pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n", + pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n", p->pasid, PROCESS_BACK_OFF_TIME_MS); ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); @@ -1072,9 +1076,9 @@ static void restore_process_worker(struct work_struct *work) ret = kfd_process_restore_queues(p); if (!ret) - pr_debug("Finished restoring pasid %d\n", p->pasid); + pr_debug("Finished restoring pasid 0x%x\n", p->pasid); else - pr_err("Failed to restore queues of pasid %d\n", p->pasid); + pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid); } void kfd_suspend_all_processes(void) @@ -1088,7 +1092,7 @@ void kfd_suspend_all_processes(void) cancel_delayed_work_sync(&p->restore_work); if (kfd_process_evict_queues(p)) - pr_err("Failed to suspend process %d\n", p->pasid); + pr_err("Failed to suspend process 0x%x\n", p->pasid); dma_fence_signal(p->ef); dma_fence_put(p->ef); p->ef = NULL; @@ -1171,7 +1175,7 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data) int idx = srcu_read_lock(&kfd_processes_srcu); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { - seq_printf(m, "Process %d PASID %d:\n", + seq_printf(m, "Process %d PASID 0x%x:\n", p->lead_thread->tgid, p->pasid); mutex_lock(&p->mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 7e6c3ee82f5b..2659d226c056 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -53,7 +53,7 @@ static int find_available_queue_slot(struct process_queue_manager *pqm, pr_debug("The new slot id %lu\n", found); if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { - pr_info("Cannot open more queues for process with pasid %d\n", + pr_info("Cannot open more queues for process with pasid 0x%x\n", pqm->process->pasid); return -ENOMEM; } @@ -298,7 +298,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, } if (retval != 0) { - pr_err("Pasid %d DQM create queue %d failed. ret %d\n", + pr_err("Pasid 0x%x DQM create queue %d failed. ret %d\n", pqm->process->pasid, type, retval); goto err_create_queue; } @@ -377,7 +377,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); if (retval) { - pr_err("Pasid %d destroy queue %d failed, ret %d\n", + pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n", pqm->process->pasid, pqn->q->properties.queue_id, retval); if (retval != -ETIME) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 7551761f2aa9..69bd0628fdc6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -269,6 +269,8 @@ static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr, buffer[0] = 0; iolink = container_of(attr, struct kfd_iolink_properties, attr); + if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu)) + return -EPERM; sysfs_show_32bit_prop(buffer, "type", iolink->iolink_type); sysfs_show_32bit_prop(buffer, "version_major", iolink->ver_maj); sysfs_show_32bit_prop(buffer, "version_minor", iolink->ver_min); @@ -305,6 +307,8 @@ static ssize_t mem_show(struct kobject *kobj, struct attribute *attr, buffer[0] = 0; mem = container_of(attr, struct kfd_mem_properties, attr); + if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu)) + return -EPERM; sysfs_show_32bit_prop(buffer, "heap_type", mem->heap_type); sysfs_show_64bit_prop(buffer, "size_in_bytes", mem->size_in_bytes); sysfs_show_32bit_prop(buffer, "flags", mem->flags); @@ -334,6 +338,8 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, buffer[0] = 0; cache = container_of(attr, struct kfd_cache_properties, attr); + if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu)) + return -EPERM; sysfs_show_32bit_prop(buffer, "processor_id_low", cache->processor_id_low); sysfs_show_32bit_prop(buffer, "level", cache->cache_level); @@ -414,6 +420,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, if (strcmp(attr->name, "gpu_id") == 0) { dev = container_of(attr, struct kfd_topology_device, attr_gpuid); + if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) + return -EPERM; return sysfs_show_32bit_val(buffer, dev->gpu_id); } @@ -421,11 +429,15 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev = container_of(attr, struct kfd_topology_device, attr_name); + if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) + return -EPERM; return sysfs_show_str_val(buffer, dev->node_props.name); } dev = container_of(attr, struct kfd_topology_device, attr_props); + if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) + return -EPERM; sysfs_show_32bit_prop(buffer, "cpu_cores_count", dev->node_props.cpu_cores_count); sysfs_show_32bit_prop(buffer, "simd_count", @@ -1098,6 +1110,9 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) { struct kfd_topology_device *dev; struct kfd_topology_device *out_dev = NULL; + struct kfd_mem_properties *mem; + struct kfd_cache_properties *cache; + struct kfd_iolink_properties *iolink; down_write(&topology_lock); list_for_each_entry(dev, &topology_device_list, list) { @@ -1111,6 +1126,13 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) if (!dev->gpu && (dev->node_props.simd_count > 0)) { dev->gpu = gpu; out_dev = dev; + + list_for_each_entry(mem, &dev->mem_props, list) + mem->gpu = dev->gpu; + list_for_each_entry(cache, &dev->cache_props, list) + cache->gpu = dev->gpu; + list_for_each_entry(iolink, &dev->io_link_props, list) + iolink->gpu = dev->gpu; break; } } @@ -1317,8 +1339,11 @@ int kfd_topology_add_device(struct kfd_dev *gpu) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_RENOIR: case CHIP_ARCTURUS: case CHIP_NAVI10: + case CHIP_NAVI12: + case CHIP_NAVI14: dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index d4718d58d0f2..15843e0fc756 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -102,6 +102,7 @@ struct kfd_mem_properties { uint32_t flags; uint32_t width; uint32_t mem_clk_max; + struct kfd_dev *gpu; struct kobject *kobj; struct attribute attr; }; @@ -123,6 +124,7 @@ struct kfd_cache_properties { uint32_t cache_latency; uint32_t cache_type; uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE]; + struct kfd_dev *gpu; struct kobject *kobj; struct attribute attr; }; @@ -141,6 +143,7 @@ struct kfd_iolink_properties { uint32_t max_bandwidth; uint32_t rec_transfer_size; uint32_t flags; + struct kfd_dev *gpu; struct kobject *kobj; struct attribute attr; }; |