aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h139
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c21
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c272
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c108
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c15
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_iommu.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c37
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h26
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c32
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c25
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h3
25 files changed, 428 insertions, 342 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index a1a35d4d594b..ba0e68057a89 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -5,7 +5,7 @@
config HSA_AMD
bool "HSA kernel driver for AMD GPU devices"
- depends on DRM_AMDGPU && (X86_64 || ARM64)
+ depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
imply AMD_IOMMU_V2 if X86_64
select MMU_NOTIFIER
help
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 177d1e5329a5..9f59ba93cfe0 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -33,7 +33,9 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry;
const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
- unsigned int vmid, pasid;
+ unsigned int vmid;
+ uint16_t pasid;
+ bool ret;
/* This workaround is due to HW/FW limitation on Hawaii that
* VMID and PASID are not written into ih_ring_entry
@@ -48,13 +50,13 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
*tmp_ihre = *ihre;
vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd);
- pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid);
+ ret = f2g->get_atc_vmid_pasid_mapping_info(dev->kgd, vmid, &pasid);
tmp_ihre->ring_id &= 0x000000ff;
tmp_ihre->ring_id |= vmid << 8;
tmp_ihre->ring_id |= pasid << 16;
- return (pasid != 0) &&
+ return ret && (pasid != 0) &&
vmid >= dev->vm_info.first_vmid_kfd &&
vmid <= dev->vm_info.last_vmid_kfd;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 901fe3590165..d3400da6ab64 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -905,7 +905,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x7a5d0000, 0x807c817c,
0x807aff7a, 0x00000080,
0xbf0a717c, 0xbf85fff8,
- 0xbf820141, 0xbef4037e,
+ 0xbf820142, 0xbef4037e,
0x8775ff7f, 0x0000ffff,
0x8875ff75, 0x00040000,
0xbef60380, 0xbef703ff,
@@ -967,7 +967,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x725d0000, 0xe0304080,
0x725d0100, 0xe0304100,
0x725d0200, 0xe0304180,
- 0x725d0300, 0xbf820031,
+ 0x725d0300, 0xbf820032,
0xbef603ff, 0x01000000,
0xbef20378, 0x8078ff78,
0x00000400, 0xbefc0384,
@@ -992,83 +992,84 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x725d0000, 0xe0304100,
0x725d0100, 0xe0304200,
0x725d0200, 0xe0304300,
- 0x725d0300, 0xb9782a05,
- 0x80788178, 0x907c9973,
- 0x877c817c, 0xbf06817c,
- 0xbf850002, 0x8f788978,
- 0xbf820001, 0x8f788a78,
- 0xb9721e06, 0x8f728a72,
- 0x80787278, 0x8078ff78,
- 0x00000200, 0x80f8ff78,
- 0x00000050, 0xbef603ff,
- 0x01000000, 0xbefc03ff,
- 0x0000006c, 0x80f89078,
- 0xf429003a, 0xf0000000,
- 0xbf8cc07f, 0x80fc847c,
- 0xbf800000, 0xbe803100,
- 0xbe823102, 0x80f8a078,
- 0xf42d003a, 0xf0000000,
- 0xbf8cc07f, 0x80fc887c,
- 0xbf800000, 0xbe803100,
- 0xbe823102, 0xbe843104,
- 0xbe863106, 0x80f8c078,
- 0xf431003a, 0xf0000000,
- 0xbf8cc07f, 0x80fc907c,
- 0xbf800000, 0xbe803100,
- 0xbe823102, 0xbe843104,
- 0xbe863106, 0xbe883108,
- 0xbe8a310a, 0xbe8c310c,
- 0xbe8e310e, 0xbf06807c,
- 0xbf84fff0, 0xb9782a05,
- 0x80788178, 0x907c9973,
- 0x877c817c, 0xbf06817c,
- 0xbf850002, 0x8f788978,
- 0xbf820001, 0x8f788a78,
- 0xb9721e06, 0x8f728a72,
- 0x80787278, 0x8078ff78,
- 0x00000200, 0xbef603ff,
- 0x01000000, 0xf4211bfa,
+ 0x725d0300, 0xbf8c3f70,
+ 0xb9782a05, 0x80788178,
+ 0x907c9973, 0x877c817c,
+ 0xbf06817c, 0xbf850002,
+ 0x8f788978, 0xbf820001,
+ 0x8f788a78, 0xb9721e06,
+ 0x8f728a72, 0x80787278,
+ 0x8078ff78, 0x00000200,
+ 0x80f8ff78, 0x00000050,
+ 0xbef603ff, 0x01000000,
+ 0xbefc03ff, 0x0000006c,
+ 0x80f89078, 0xf429003a,
+ 0xf0000000, 0xbf8cc07f,
+ 0x80fc847c, 0xbf800000,
+ 0xbe803100, 0xbe823102,
+ 0x80f8a078, 0xf42d003a,
+ 0xf0000000, 0xbf8cc07f,
+ 0x80fc887c, 0xbf800000,
+ 0xbe803100, 0xbe823102,
+ 0xbe843104, 0xbe863106,
+ 0x80f8c078, 0xf431003a,
+ 0xf0000000, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe803100, 0xbe823102,
+ 0xbe843104, 0xbe863106,
+ 0xbe883108, 0xbe8a310a,
+ 0xbe8c310c, 0xbe8e310e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb9782a05, 0x80788178,
+ 0x907c9973, 0x877c817c,
+ 0xbf06817c, 0xbf850002,
+ 0x8f788978, 0xbf820001,
+ 0x8f788a78, 0xb9721e06,
+ 0x8f728a72, 0x80787278,
+ 0x8078ff78, 0x00000200,
+ 0xbef603ff, 0x01000000,
+ 0xf4211bfa, 0xf0000000,
+ 0x80788478, 0xf4211b3a,
0xf0000000, 0x80788478,
- 0xf4211b3a, 0xf0000000,
- 0x80788478, 0xf4211b7a,
+ 0xf4211b7a, 0xf0000000,
+ 0x80788478, 0xf4211eba,
0xf0000000, 0x80788478,
- 0xf4211eba, 0xf0000000,
- 0x80788478, 0xf4211efa,
+ 0xf4211efa, 0xf0000000,
+ 0x80788478, 0xf4211c3a,
0xf0000000, 0x80788478,
- 0xf4211c3a, 0xf0000000,
- 0x80788478, 0xf4211c7a,
+ 0xf4211c7a, 0xf0000000,
+ 0x80788478, 0xf4211e7a,
0xf0000000, 0x80788478,
- 0xf4211e7a, 0xf0000000,
- 0x80788478, 0xf4211cfa,
+ 0xf4211cfa, 0xf0000000,
+ 0x80788478, 0xf4211bba,
0xf0000000, 0x80788478,
+ 0xbf8cc07f, 0xb9eef814,
0xf4211bba, 0xf0000000,
0x80788478, 0xbf8cc07f,
- 0xb9eef814, 0xf4211bba,
- 0xf0000000, 0x80788478,
- 0xbf8cc07f, 0xb9eef815,
- 0xbef2036d, 0x876dff72,
- 0x0000ffff, 0xbefc036f,
- 0xbefe037a, 0xbeff037b,
- 0x876f71ff, 0x000003ff,
- 0xb9ef4803, 0xb9f9f816,
- 0x876f71ff, 0xfffff800,
- 0x906f8b6f, 0xb9efa2c3,
- 0xb9f3f801, 0x876fff72,
- 0xfc000000, 0x906f9a6f,
- 0x8f6f906f, 0xbef30380,
+ 0xb9eef815, 0xbef2036d,
+ 0x876dff72, 0x0000ffff,
+ 0xbefc036f, 0xbefe037a,
+ 0xbeff037b, 0x876f71ff,
+ 0x000003ff, 0xb9ef4803,
+ 0xb9f9f816, 0x876f71ff,
+ 0xfffff800, 0x906f8b6f,
+ 0xb9efa2c3, 0xb9f3f801,
+ 0x876fff72, 0xfc000000,
+ 0x906f9a6f, 0x8f6f906f,
+ 0xbef30380, 0x88736f73,
+ 0x876fff72, 0x02000000,
+ 0x906f996f, 0x8f6f8f6f,
0x88736f73, 0x876fff72,
- 0x02000000, 0x906f996f,
- 0x8f6f8f6f, 0x88736f73,
- 0x876fff72, 0x01000000,
- 0x906f986f, 0x8f6f996f,
- 0x88736f73, 0x876fff70,
- 0x00800000, 0x906f976f,
- 0xb9f3f807, 0x87fe7e7e,
- 0x87ea6a6a, 0xb9f0f802,
- 0xbf8a0000, 0xbe80226c,
- 0xbf810000, 0xbf9f0000,
+ 0x01000000, 0x906f986f,
+ 0x8f6f996f, 0x88736f73,
+ 0x876fff70, 0x00800000,
+ 0x906f976f, 0xb9f3f807,
+ 0x87fe7e7e, 0x87ea6a6a,
+ 0xb9f0f802, 0xbf8a0000,
+ 0xbe80226c, 0xbf810000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0x00000000,
};
static const uint32_t cwsr_trap_arcturus_hex[] = {
0xbf820001, 0xbf8202c4,
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index cdaa523ce6be..4433bda2ce25 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -758,6 +758,7 @@ L_RESTORE_V0:
buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256
buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2
buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3
+ s_waitcnt vmcnt(0)
/* restore SGPRs */
//will be 2+8+16*6
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 1d3cd5c50d5f..1544007af34a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -49,7 +49,7 @@ static const char kfd_dev_name[] = "kfd";
static const struct file_operations kfd_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = kfd_ioctl,
- .compat_ioctl = kfd_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.open = kfd_open,
.mmap = kfd_mmap,
};
@@ -282,7 +282,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
goto err_bind_process;
}
- pr_debug("Creating queue for PASID %d on gpu 0x%x\n",
+ pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
p->pasid,
dev->id);
@@ -332,7 +332,7 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
int retval;
struct kfd_ioctl_destroy_queue_args *args = data;
- pr_debug("Destroying queue id %d for pasid %d\n",
+ pr_debug("Destroying queue id %d for pasid 0x%x\n",
args->queue_id,
p->pasid);
@@ -378,7 +378,7 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
properties.queue_percent = args->queue_percentage;
properties.priority = args->queue_priority;
- pr_debug("Updating queue id %d for pasid %d\n",
+ pr_debug("Updating queue id %d for pasid 0x%x\n",
args->queue_id, p->pasid);
mutex_lock(&p->mutex);
@@ -855,7 +855,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
struct kfd_process_device_apertures *pAperture;
struct kfd_process_device *pdd;
- dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+ dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
args->num_of_nodes = 0;
@@ -913,7 +913,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
uint32_t nodes = 0;
int ret;
- dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+ dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
if (args->num_of_nodes == 0) {
/* Return number of nodes, so that user space can alloacate
@@ -1128,7 +1128,7 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
mutex_unlock(&p->mutex);
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
- pdd->qpd.vmid != 0)
+ pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
dev->kfd2kgd->set_scratch_backing_va(
dev->kgd, args->va_addr, pdd->qpd.vmid);
@@ -1801,7 +1801,7 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
} else
goto err_i1;
- dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
+ dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
process = kfd_get_process(current);
if (IS_ERR(process)) {
@@ -1856,7 +1856,8 @@ err_i1:
kfree(kdata);
if (retcode)
- dev_dbg(kfd_device, "ret = %d\n", retcode);
+ dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
+ nr, arg, retcode);
return retcode;
}
@@ -1877,7 +1878,7 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- pr_debug("Process %d mapping mmio page\n"
+ pr_debug("pasid 0x%x mapping mmio page\n"
" target user address == 0x%08llX\n"
" physical address == 0x%08llX\n"
" vm_flags == 0x%04lX\n"
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 66387caf966e..de9f68d5c312 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -138,6 +138,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
/* TODO - check & update Vega10 cache details */
#define vega10_cache_info carrizo_cache_info
#define raven_cache_info carrizo_cache_info
+#define renoir_cache_info carrizo_cache_info
/* TODO - check & update Navi10 cache details */
#define navi10_cache_info carrizo_cache_info
@@ -670,7 +671,13 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
pcache_info = raven_cache_info;
num_of_cache_types = ARRAY_SIZE(raven_cache_info);
break;
+ case CHIP_RENOIR:
+ pcache_info = renoir_cache_info;
+ num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
+ break;
case CHIP_NAVI10:
+ case CHIP_NAVI12:
+ case CHIP_NAVI14:
pcache_info = navi10_cache_info;
num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
break;
@@ -703,7 +710,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
pcache_info,
cu_info,
mem_available,
- cu_info->cu_bitmap[i][j],
+ cu_info->cu_bitmap[i % 4][j + i / 4],
ct,
cu_processor_id,
k);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index a3441b0e385b..d59f2cd056c6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -761,6 +761,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
{
int status = 0;
unsigned int vmid;
+ uint16_t queried_pasid;
union SQ_CMD_BITS reg_sq_cmd;
union GRBM_GFX_INDEX_BITS reg_gfx_index;
struct kfd_process_device *pdd;
@@ -782,19 +783,18 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
*/
for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
- if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
- (dev->kgd, vmid)) {
- if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
- (dev->kgd, vmid) == p->pasid) {
- pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
- vmid, p->pasid);
- break;
- }
+ status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
+ (dev->kgd, vmid, &queried_pasid);
+
+ if (status && queried_pasid == p->pasid) {
+ pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
+ vmid, p->pasid);
+ break;
}
}
if (vmid > last_vmid_to_scan) {
- pr_err("Didn't find vmid for pasid %d\n", p->pasid);
+ pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
return -EFAULT;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
index 9d4af961c5d1..9bfa50633654 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
@@ -96,7 +96,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
{
if (pmgr->pasid != 0) {
- pr_debug("H/W debugger is already active using pasid %d\n",
+ pr_debug("H/W debugger is already active using pasid 0x%x\n",
pmgr->pasid);
return -EBUSY;
}
@@ -117,7 +117,7 @@ long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
{
/* Is the requests coming from the already registered process? */
if (pmgr->pasid != p->pasid) {
- pr_debug("H/W debugger is not registered by calling pasid %d\n",
+ pr_debug("H/W debugger is not registered by calling pasid 0x%x\n",
p->pasid);
return -EINVAL;
}
@@ -134,7 +134,7 @@ long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
{
/* Is the requests coming from the already registered process? */
if (pmgr->pasid != wac_info->process->pasid) {
- pr_debug("H/W debugger support was not registered for requester pasid %d\n",
+ pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n",
wac_info->process->pasid);
return -EINVAL;
}
@@ -147,7 +147,7 @@ long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
{
/* Is the requests coming from the already registered process? */
if (pmgr->pasid != adw_info->process->pasid) {
- pr_debug("H/W debugger support was not registered for requester pasid %d\n",
+ pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n",
adw_info->process->pasid);
return -EINVAL;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 0dc1084b5e82..4fa8834ce7cb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -39,6 +39,41 @@
*/
static atomic_t kfd_locked = ATOMIC_INIT(0);
+#ifdef CONFIG_DRM_AMDGPU_CIK
+extern const struct kfd2kgd_calls gfx_v7_kfd2kgd;
+#endif
+extern const struct kfd2kgd_calls gfx_v8_kfd2kgd;
+extern const struct kfd2kgd_calls gfx_v9_kfd2kgd;
+extern const struct kfd2kgd_calls arcturus_kfd2kgd;
+extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
+
+static const struct kfd2kgd_calls *kfd2kgd_funcs[] = {
+#ifdef KFD_SUPPORT_IOMMU_V2
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ [CHIP_KAVERI] = &gfx_v7_kfd2kgd,
+#endif
+ [CHIP_CARRIZO] = &gfx_v8_kfd2kgd,
+ [CHIP_RAVEN] = &gfx_v9_kfd2kgd,
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ [CHIP_HAWAII] = &gfx_v7_kfd2kgd,
+#endif
+ [CHIP_TONGA] = &gfx_v8_kfd2kgd,
+ [CHIP_FIJI] = &gfx_v8_kfd2kgd,
+ [CHIP_POLARIS10] = &gfx_v8_kfd2kgd,
+ [CHIP_POLARIS11] = &gfx_v8_kfd2kgd,
+ [CHIP_POLARIS12] = &gfx_v8_kfd2kgd,
+ [CHIP_VEGAM] = &gfx_v8_kfd2kgd,
+ [CHIP_VEGA10] = &gfx_v9_kfd2kgd,
+ [CHIP_VEGA12] = &gfx_v9_kfd2kgd,
+ [CHIP_VEGA20] = &gfx_v9_kfd2kgd,
+ [CHIP_RENOIR] = &gfx_v9_kfd2kgd,
+ [CHIP_ARCTURUS] = &arcturus_kfd2kgd,
+ [CHIP_NAVI10] = &gfx_v10_kfd2kgd,
+ [CHIP_NAVI12] = &gfx_v10_kfd2kgd,
+ [CHIP_NAVI14] = &gfx_v10_kfd2kgd,
+};
+
#ifdef KFD_SUPPORT_IOMMU_V2
static const struct kfd_device_info kaveri_device_info = {
.asic_family = CHIP_KAVERI,
@@ -351,6 +386,24 @@ static const struct kfd_device_info arcturus_device_info = {
.num_sdma_queues_per_engine = 8,
};
+static const struct kfd_device_info renoir_device_info = {
+ .asic_family = CHIP_RENOIR,
+ .asic_name = "renoir",
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 8,
+ .ih_ring_entry_size = 8 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_v9,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cwsr = true,
+ .needs_iommu_device = false,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 1,
+ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+};
+
static const struct kfd_device_info navi10_device_info = {
.asic_family = CHIP_NAVI10,
.asic_name = "navi10",
@@ -369,133 +422,64 @@ static const struct kfd_device_info navi10_device_info = {
.num_sdma_queues_per_engine = 8,
};
-struct kfd_deviceid {
- unsigned short did;
- const struct kfd_device_info *device_info;
+static const struct kfd_device_info navi12_device_info = {
+ .asic_family = CHIP_NAVI12,
+ .asic_name = "navi12",
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 8,
+ .ih_ring_entry_size = 8 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_v9,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .needs_iommu_device = false,
+ .supports_cwsr = true,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 8,
+};
+
+static const struct kfd_device_info navi14_device_info = {
+ .asic_family = CHIP_NAVI14,
+ .asic_name = "navi14",
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 8,
+ .ih_ring_entry_size = 8 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_v9,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .needs_iommu_device = false,
+ .supports_cwsr = true,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 8,
};
-static const struct kfd_deviceid supported_devices[] = {
+/* For each entry, [0] is regular and [1] is virtualisation device. */
+static const struct kfd_device_info *kfd_supported_devices[][2] = {
#ifdef KFD_SUPPORT_IOMMU_V2
- { 0x1304, &kaveri_device_info }, /* Kaveri */
- { 0x1305, &kaveri_device_info }, /* Kaveri */
- { 0x1306, &kaveri_device_info }, /* Kaveri */
- { 0x1307, &kaveri_device_info }, /* Kaveri */
- { 0x1309, &kaveri_device_info }, /* Kaveri */
- { 0x130A, &kaveri_device_info }, /* Kaveri */
- { 0x130B, &kaveri_device_info }, /* Kaveri */
- { 0x130C, &kaveri_device_info }, /* Kaveri */
- { 0x130D, &kaveri_device_info }, /* Kaveri */
- { 0x130E, &kaveri_device_info }, /* Kaveri */
- { 0x130F, &kaveri_device_info }, /* Kaveri */
- { 0x1310, &kaveri_device_info }, /* Kaveri */
- { 0x1311, &kaveri_device_info }, /* Kaveri */
- { 0x1312, &kaveri_device_info }, /* Kaveri */
- { 0x1313, &kaveri_device_info }, /* Kaveri */
- { 0x1315, &kaveri_device_info }, /* Kaveri */
- { 0x1316, &kaveri_device_info }, /* Kaveri */
- { 0x1317, &kaveri_device_info }, /* Kaveri */
- { 0x1318, &kaveri_device_info }, /* Kaveri */
- { 0x131B, &kaveri_device_info }, /* Kaveri */
- { 0x131C, &kaveri_device_info }, /* Kaveri */
- { 0x131D, &kaveri_device_info }, /* Kaveri */
- { 0x9870, &carrizo_device_info }, /* Carrizo */
- { 0x9874, &carrizo_device_info }, /* Carrizo */
- { 0x9875, &carrizo_device_info }, /* Carrizo */
- { 0x9876, &carrizo_device_info }, /* Carrizo */
- { 0x9877, &carrizo_device_info }, /* Carrizo */
- { 0x15DD, &raven_device_info }, /* Raven */
- { 0x15D8, &raven_device_info }, /* Raven */
+ [CHIP_KAVERI] = {&kaveri_device_info, NULL},
+ [CHIP_CARRIZO] = {&carrizo_device_info, NULL},
+ [CHIP_RAVEN] = {&raven_device_info, NULL},
#endif
- { 0x67A0, &hawaii_device_info }, /* Hawaii */
- { 0x67A1, &hawaii_device_info }, /* Hawaii */
- { 0x67A2, &hawaii_device_info }, /* Hawaii */
- { 0x67A8, &hawaii_device_info }, /* Hawaii */
- { 0x67A9, &hawaii_device_info }, /* Hawaii */
- { 0x67AA, &hawaii_device_info }, /* Hawaii */
- { 0x67B0, &hawaii_device_info }, /* Hawaii */
- { 0x67B1, &hawaii_device_info }, /* Hawaii */
- { 0x67B8, &hawaii_device_info }, /* Hawaii */
- { 0x67B9, &hawaii_device_info }, /* Hawaii */
- { 0x67BA, &hawaii_device_info }, /* Hawaii */
- { 0x67BE, &hawaii_device_info }, /* Hawaii */
- { 0x6920, &tonga_device_info }, /* Tonga */
- { 0x6921, &tonga_device_info }, /* Tonga */
- { 0x6928, &tonga_device_info }, /* Tonga */
- { 0x6929, &tonga_device_info }, /* Tonga */
- { 0x692B, &tonga_device_info }, /* Tonga */
- { 0x6938, &tonga_device_info }, /* Tonga */
- { 0x6939, &tonga_device_info }, /* Tonga */
- { 0x7300, &fiji_device_info }, /* Fiji */
- { 0x730F, &fiji_vf_device_info }, /* Fiji vf*/
- { 0x67C0, &polaris10_device_info }, /* Polaris10 */
- { 0x67C1, &polaris10_device_info }, /* Polaris10 */
- { 0x67C2, &polaris10_device_info }, /* Polaris10 */
- { 0x67C4, &polaris10_device_info }, /* Polaris10 */
- { 0x67C7, &polaris10_device_info }, /* Polaris10 */
- { 0x67C8, &polaris10_device_info }, /* Polaris10 */
- { 0x67C9, &polaris10_device_info }, /* Polaris10 */
- { 0x67CA, &polaris10_device_info }, /* Polaris10 */
- { 0x67CC, &polaris10_device_info }, /* Polaris10 */
- { 0x67CF, &polaris10_device_info }, /* Polaris10 */
- { 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/
- { 0x67DF, &polaris10_device_info }, /* Polaris10 */
- { 0x6FDF, &polaris10_device_info }, /* Polaris10 */
- { 0x67E0, &polaris11_device_info }, /* Polaris11 */
- { 0x67E1, &polaris11_device_info }, /* Polaris11 */
- { 0x67E3, &polaris11_device_info }, /* Polaris11 */
- { 0x67E7, &polaris11_device_info }, /* Polaris11 */
- { 0x67E8, &polaris11_device_info }, /* Polaris11 */
- { 0x67E9, &polaris11_device_info }, /* Polaris11 */
- { 0x67EB, &polaris11_device_info }, /* Polaris11 */
- { 0x67EF, &polaris11_device_info }, /* Polaris11 */
- { 0x67FF, &polaris11_device_info }, /* Polaris11 */
- { 0x6980, &polaris12_device_info }, /* Polaris12 */
- { 0x6981, &polaris12_device_info }, /* Polaris12 */
- { 0x6985, &polaris12_device_info }, /* Polaris12 */
- { 0x6986, &polaris12_device_info }, /* Polaris12 */
- { 0x6987, &polaris12_device_info }, /* Polaris12 */
- { 0x6995, &polaris12_device_info }, /* Polaris12 */
- { 0x6997, &polaris12_device_info }, /* Polaris12 */
- { 0x699F, &polaris12_device_info }, /* Polaris12 */
- { 0x694C, &vegam_device_info }, /* VegaM */
- { 0x694E, &vegam_device_info }, /* VegaM */
- { 0x694F, &vegam_device_info }, /* VegaM */
- { 0x6860, &vega10_device_info }, /* Vega10 */
- { 0x6861, &vega10_device_info }, /* Vega10 */
- { 0x6862, &vega10_device_info }, /* Vega10 */
- { 0x6863, &vega10_device_info }, /* Vega10 */
- { 0x6864, &vega10_device_info }, /* Vega10 */
- { 0x6867, &vega10_device_info }, /* Vega10 */
- { 0x6868, &vega10_device_info }, /* Vega10 */
- { 0x6869, &vega10_device_info }, /* Vega10 */
- { 0x686A, &vega10_device_info }, /* Vega10 */
- { 0x686B, &vega10_device_info }, /* Vega10 */
- { 0x686C, &vega10_vf_device_info }, /* Vega10 vf*/
- { 0x686D, &vega10_device_info }, /* Vega10 */
- { 0x686E, &vega10_device_info }, /* Vega10 */
- { 0x686F, &vega10_device_info }, /* Vega10 */
- { 0x687F, &vega10_device_info }, /* Vega10 */
- { 0x69A0, &vega12_device_info }, /* Vega12 */
- { 0x69A1, &vega12_device_info }, /* Vega12 */
- { 0x69A2, &vega12_device_info }, /* Vega12 */
- { 0x69A3, &vega12_device_info }, /* Vega12 */
- { 0x69AF, &vega12_device_info }, /* Vega12 */
- { 0x66a0, &vega20_device_info }, /* Vega20 */
- { 0x66a1, &vega20_device_info }, /* Vega20 */
- { 0x66a2, &vega20_device_info }, /* Vega20 */
- { 0x66a3, &vega20_device_info }, /* Vega20 */
- { 0x66a4, &vega20_device_info }, /* Vega20 */
- { 0x66a7, &vega20_device_info }, /* Vega20 */
- { 0x66af, &vega20_device_info }, /* Vega20 */
- { 0x738C, &arcturus_device_info }, /* Arcturus */
- { 0x7388, &arcturus_device_info }, /* Arcturus */
- { 0x738E, &arcturus_device_info }, /* Arcturus */
- { 0x7390, &arcturus_device_info }, /* Arcturus vf */
- { 0x7310, &navi10_device_info }, /* Navi10 */
- { 0x7312, &navi10_device_info }, /* Navi10 */
- { 0x7318, &navi10_device_info }, /* Navi10 */
- { 0x731a, &navi10_device_info }, /* Navi10 */
- { 0x731f, &navi10_device_info }, /* Navi10 */
+ [CHIP_HAWAII] = {&hawaii_device_info, NULL},
+ [CHIP_TONGA] = {&tonga_device_info, NULL},
+ [CHIP_FIJI] = {&fiji_device_info, &fiji_vf_device_info},
+ [CHIP_POLARIS10] = {&polaris10_device_info, &polaris10_vf_device_info},
+ [CHIP_POLARIS11] = {&polaris11_device_info, NULL},
+ [CHIP_POLARIS12] = {&polaris12_device_info, NULL},
+ [CHIP_VEGAM] = {&vegam_device_info, NULL},
+ [CHIP_VEGA10] = {&vega10_device_info, &vega10_vf_device_info},
+ [CHIP_VEGA12] = {&vega12_device_info, NULL},
+ [CHIP_VEGA20] = {&vega20_device_info, NULL},
+ [CHIP_RENOIR] = {&renoir_device_info, NULL},
+ [CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info},
+ [CHIP_NAVI10] = {&navi10_device_info, NULL},
+ [CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info},
+ [CHIP_NAVI14] = {&navi14_device_info, NULL},
};
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
@@ -504,32 +488,25 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
static int kfd_resume(struct kfd_dev *kfd);
-static const struct kfd_device_info *lookup_device_info(unsigned short did)
+struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
+ struct pci_dev *pdev, unsigned int asic_type, bool vf)
{
- size_t i;
+ struct kfd_dev *kfd;
+ const struct kfd_device_info *device_info;
+ const struct kfd2kgd_calls *f2g;
- for (i = 0; i < ARRAY_SIZE(supported_devices); i++) {
- if (supported_devices[i].did == did) {
- WARN_ON(!supported_devices[i].device_info);
- return supported_devices[i].device_info;
- }
+ if (asic_type >= sizeof(kfd_supported_devices) / (sizeof(void *) * 2)
+ || asic_type >= sizeof(kfd2kgd_funcs) / sizeof(void *)) {
+ dev_err(kfd_device, "asic_type %d out of range\n", asic_type);
+ return NULL; /* asic_type out of range */
}
- dev_warn(kfd_device, "DID %04x is missing in supported_devices\n",
- did);
+ device_info = kfd_supported_devices[asic_type][vf];
+ f2g = kfd2kgd_funcs[asic_type];
- return NULL;
-}
-
-struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
- struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
-{
- struct kfd_dev *kfd;
- const struct kfd_device_info *device_info =
- lookup_device_info(pdev->device);
-
- if (!device_info) {
- dev_err(kfd_device, "kgd2kfd_probe failed\n");
+ if (!device_info || !f2g) {
+ dev_err(kfd_device, "%s %s not supported in kfd\n",
+ amdgpu_asic_name[asic_type], vf ? "VF" : "");
return NULL;
}
@@ -593,10 +570,12 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
}
bool kgd2kfd_device_init(struct kfd_dev *kfd,
+ struct drm_device *ddev,
const struct kgd2kfd_shared_resources *gpu_resources)
{
unsigned int size;
+ kfd->ddev = ddev;
kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
KGD_ENGINE_MEC1);
kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
@@ -751,9 +730,6 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
return 0;
kgd2kfd_suspend(kfd);
- /* hold dqm->lock to prevent further execution*/
- dqm_lock(kfd->dqm);
-
kfd_signal_reset_event(kfd);
return 0;
}
@@ -771,8 +747,6 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
if (!kfd->init_complete)
return 0;
- dqm_unlock(kfd->dqm);
-
ret = kfd_resume(kfd);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d985e31fcc1e..984c2f2b24b6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -195,20 +195,30 @@ static int allocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
{
- int bit, allocated_vmid;
+ int allocated_vmid = -1, i;
- if (dqm->vmid_bitmap == 0)
- return -ENOMEM;
+ for (i = dqm->dev->vm_info.first_vmid_kfd;
+ i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
+ if (!dqm->vmid_pasid[i]) {
+ allocated_vmid = i;
+ break;
+ }
+ }
+
+ if (allocated_vmid < 0) {
+ pr_err("no more vmid to allocate\n");
+ return -ENOSPC;
+ }
+
+ pr_debug("vmid allocated: %d\n", allocated_vmid);
+
+ dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
- bit = ffs(dqm->vmid_bitmap) - 1;
- dqm->vmid_bitmap &= ~(1 << bit);
+ set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
- allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
- pr_debug("vmid allocation %d\n", allocated_vmid);
qpd->vmid = allocated_vmid;
q->properties.vmid = allocated_vmid;
- set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
program_sh_mem_settings(dqm, qpd);
/* qpd->page_table_base is set earlier when register_process()
@@ -220,8 +230,9 @@ static int allocate_vmid(struct device_queue_manager *dqm,
/* invalidate the VM context after pasid and vmid mapping is set up */
kfd_flush_tlb(qpd_to_pdd(qpd));
- dqm->dev->kfd2kgd->set_scratch_backing_va(
- dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);
+ if (dqm->dev->kfd2kgd->set_scratch_backing_va)
+ dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
+ qpd->sh_hidden_private_base, qpd->vmid);
return 0;
}
@@ -248,8 +259,6 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
{
- int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
-
/* On GFX v7, CP doesn't flush TC at dequeue */
if (q->device->device_info->asic_family == CHIP_HAWAII)
if (flush_texture_cache_nocpsch(q->device, qpd))
@@ -259,8 +268,8 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
/* Release the vmid mapping */
set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
+ dqm->vmid_pasid[qpd->vmid] = 0;
- dqm->vmid_bitmap |= (1 << bit);
qpd->vmid = 0;
q->properties.vmid = 0;
}
@@ -331,6 +340,10 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
if (q->properties.is_active) {
+ if (!dqm->sched_running) {
+ WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
+ goto add_queue_to_list;
+ }
if (WARN(q->process->mm != current->mm,
"should only run in user thread"))
@@ -342,6 +355,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
goto out_free_mqd;
}
+add_queue_to_list:
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
if (q->properties.is_active)
@@ -449,6 +463,11 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
deallocate_doorbell(qpd, q);
+ if (!dqm->sched_running) {
+ WARN_ONCE(1, "Destroy non-HWS queue while stopped\n");
+ return 0;
+ }
+
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
KFD_UNMAP_LATENCY_MS,
@@ -524,6 +543,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+
+ if (!dqm->sched_running) {
+ WARN_ONCE(1, "Update non-HWS queue while stopped\n");
+ goto out_unlock;
+ }
+
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
@@ -579,7 +604,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
goto out;
pdd = qpd_to_pdd(qpd);
- pr_info_ratelimited("Evicting PASID %u queues\n",
+ pr_info_ratelimited("Evicting PASID 0x%x queues\n",
pdd->process->pasid);
/* Mark all queues as evicted. Deactivate all active queues on
@@ -593,6 +618,11 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = false;
+ dqm->queue_count--;
+
+ if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
+ continue;
+
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
@@ -601,7 +631,6 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
* maintain a consistent eviction state
*/
ret = retval;
- dqm->queue_count--;
}
out:
@@ -621,7 +650,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
goto out;
pdd = qpd_to_pdd(qpd);
- pr_info_ratelimited("Evicting PASID %u queues\n",
+ pr_info_ratelimited("Evicting PASID 0x%x queues\n",
pdd->process->pasid);
/* Mark all queues as evicted. Deactivate all active queues on
@@ -667,7 +696,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
goto out;
}
- pr_info_ratelimited("Restoring PASID %u queues\n",
+ pr_info_ratelimited("Restoring PASID 0x%x queues\n",
pdd->process->pasid);
/* Update PD Base in QPD */
@@ -702,6 +731,11 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = true;
+ dqm->queue_count++;
+
+ if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
+ continue;
+
retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
q->queue, &q->properties, mm);
if (retval && !ret)
@@ -709,7 +743,6 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
* maintain a consistent eviction state
*/
ret = retval;
- dqm->queue_count++;
}
qpd->evicted = 0;
out:
@@ -739,7 +772,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
goto out;
}
- pr_info_ratelimited("Restoring PASID %u queues\n",
+ pr_info_ratelimited("Restoring PASID 0x%x queues\n",
pdd->process->pasid);
/* Update PD Base in QPD */
@@ -879,7 +912,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
dqm->allocated_queues[pipe] |= 1 << queue;
}
- dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
+ memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
+
dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
@@ -902,12 +936,20 @@ static void uninitialize(struct device_queue_manager *dqm)
static int start_nocpsch(struct device_queue_manager *dqm)
{
init_interrupts(dqm);
- return pm_init(&dqm->packets, dqm);
+
+ if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
+ return pm_init(&dqm->packets, dqm);
+ dqm->sched_running = true;
+
+ return 0;
}
static int stop_nocpsch(struct device_queue_manager *dqm)
{
- pm_uninit(&dqm->packets);
+ if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
+ pm_uninit(&dqm->packets);
+ dqm->sched_running = false;
+
return 0;
}
@@ -1058,6 +1100,7 @@ static int start_cpsch(struct device_queue_manager *dqm)
dqm_lock(dqm);
/* clear hang status when driver try to start the hw scheduler */
dqm->is_hws_hang = false;
+ dqm->sched_running = true;
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
dqm_unlock(dqm);
@@ -1073,6 +1116,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
{
dqm_lock(dqm);
unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+ dqm->sched_running = false;
dqm_unlock(dqm);
kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
@@ -1259,9 +1303,10 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
{
int retval;
+ if (!dqm->sched_running)
+ return 0;
if (dqm->queue_count <= 0 || dqm->processes_count <= 0)
return 0;
-
if (dqm->active_runlist)
return 0;
@@ -1283,6 +1328,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
{
int retval = 0;
+ if (!dqm->sched_running)
+ return 0;
if (dqm->is_hws_hang)
return -EIO;
if (!dqm->active_runlist)
@@ -1676,7 +1723,8 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
struct kfd_dev *dev = dqm->dev;
struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
- dev->device_info->num_sdma_engines *
+ (dev->device_info->num_sdma_engines +
+ dev->device_info->num_xgmi_sdma_engines) *
dev->device_info->num_sdma_queues_per_engine +
dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
@@ -1786,10 +1834,13 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
+ case CHIP_RENOIR:
case CHIP_ARCTURUS:
device_queue_manager_init_v9(&dqm->asic_ops);
break;
case CHIP_NAVI10:
+ case CHIP_NAVI12:
+ case CHIP_NAVI14:
device_queue_manager_init_v10_navi10(&dqm->asic_ops);
break;
default:
@@ -1883,6 +1934,12 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
int pipe, queue;
int r = 0;
+ if (!dqm->sched_running) {
+ seq_printf(m, " Device is stopped\n");
+
+ return 0;
+ }
+
r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
&dump, &n_regs);
@@ -1917,7 +1974,8 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
}
}
- for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) {
+ for (pipe = 0; pipe < get_num_sdma_engines(dqm) +
+ get_num_xgmi_sdma_engines(dqm); pipe++) {
for (queue = 0;
queue < dqm->dev->device_info->num_sdma_queues_per_engine;
queue++) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 90db2c9275f6..a8c37e6da027 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -32,6 +32,8 @@
#include "kfd_mqd_manager.h"
+#define VMID_NUM 16
+
struct device_process_node {
struct qcm_process_device *qpd;
struct list_head list;
@@ -185,7 +187,8 @@ struct device_queue_manager {
unsigned int *allocated_queues;
uint64_t sdma_bitmap;
uint64_t xgmi_sdma_bitmap;
- unsigned int vmid_bitmap;
+ /* the pasid mapping for each kfd vmid */
+ uint16_t vmid_pasid[VMID_NUM];
uint64_t pipelines_addr;
struct kfd_mem_obj *pipeline_mem;
uint64_t fence_gpu_addr;
@@ -198,6 +201,7 @@ struct device_queue_manager {
bool is_hws_hang;
struct work_struct hw_exception_work;
struct kfd_mem_obj hiq_sdma_mqd;
+ bool sched_running;
};
void device_queue_manager_init_cik(
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index d674d4b3340f..908081c85de1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -852,8 +852,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
if (type == KFD_EVENT_TYPE_MEMORY) {
dev_warn(kfd_device,
- "Sending SIGSEGV to HSA Process with PID %d ",
- p->lead_thread->pid);
+ "Sending SIGSEGV to process %d (pasid 0x%x)",
+ p->lead_thread->pid, p->pasid);
send_sig(SIGSEGV, p->lead_thread, 0);
}
@@ -861,13 +861,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
if (send_signal) {
if (send_sigterm) {
dev_warn(kfd_device,
- "Sending SIGTERM to HSA Process with PID %d ",
- p->lead_thread->pid);
+ "Sending SIGTERM to process %d (pasid 0x%x)",
+ p->lead_thread->pid, p->pasid);
send_sig(SIGTERM, p->lead_thread, 0);
} else {
dev_err(kfd_device,
- "HSA Process (PID %d) got unhandled exception",
- p->lead_thread->pid);
+ "Process %d (pasid 0x%x) got unhandled exception",
+ p->lead_thread->pid, p->pasid);
}
}
}
@@ -936,7 +936,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
/* Workaround on Raven to not kill the process when memory is freed
* before IOMMU is able to finish processing all the excessive PPRs
*/
- if (dev->device_info->asic_family != CHIP_RAVEN) {
+ if (dev->device_info->asic_family != CHIP_RAVEN &&
+ dev->device_info->asic_family != CHIP_RENOIR) {
mutex_lock(&p->event_mutex);
/* Lookup events by type and signal them */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 9dc4bff8085e..bb77b8890e77 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -369,8 +369,13 @@ int kfd_init_apertures(struct kfd_process *process)
/*Iterating over all devices*/
while (kfd_topology_enum_kfd_devices(id, &dev) == 0) {
- if (!dev) {
- id++; /* Skip non GPU devices */
+ if (!dev || kfd_devcgroup_check_permission(dev)) {
+ /* Skip non GPU devices and devices to which the
+ * current process have no access to. Access can be
+ * limited by placing the process in a specific
+ * cgroup hierarchy
+ */
+ id++;
continue;
}
@@ -405,8 +410,11 @@ int kfd_init_apertures(struct kfd_process *process)
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
+ case CHIP_RENOIR:
case CHIP_ARCTURUS:
case CHIP_NAVI10:
+ case CHIP_NAVI12:
+ case CHIP_NAVI14:
kfd_init_apertures_v9(pdd, id);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 3ef67d2e0d9f..e05d75ecda21 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -54,8 +54,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
memcpy(patched_ihre, ih_ring_entry,
dev->device_info->ih_ring_entry_size);
- pasid = dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid(
- dev->kgd, vmid);
+ pasid = dev->dqm->vmid_pasid[vmid];
/* Patch the pasid field */
patched_ihre[3] = cpu_to_le32((le32_to_cpu(patched_ihre[3])
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index c56ac47cd318..bc47f6a44456 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -62,6 +62,11 @@ int kfd_interrupt_init(struct kfd_dev *kfd)
}
kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
+ if (unlikely(!kfd->ih_wq)) {
+ kfifo_free(&kfd->ih_fifo);
+ dev_err(kfd_chardev(), "Failed to allocate KFD IH workqueue\n");
+ return -ENOMEM;
+ }
spin_lock_init(&kfd->interrupt_lock);
INIT_WORK(&kfd->interrupt_work, interrupt_wq);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
index 5f35df23fb18..193e2835bd4d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
@@ -160,7 +160,7 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
if (!p)
return;
- pr_debug("Unbinding process %d from IOMMU\n", pasid);
+ pr_debug("Unbinding process 0x%x from IOMMU\n", pasid);
mutex_lock(kfd_get_dbgmgr_mutex());
@@ -194,7 +194,7 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
struct kfd_dev *dev;
dev_warn_ratelimited(kfd_device,
- "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
+ "Invalid PPR device %x:%x.%x pasid 0x%x address 0x%lX flags 0x%X",
PCI_BUS_NUM(pdev->devfn),
PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn),
@@ -235,7 +235,7 @@ static int kfd_bind_processes_to_device(struct kfd_dev *kfd)
err = amd_iommu_bind_pasid(kfd->pdev, p->pasid,
p->lead_thread);
if (err < 0) {
- pr_err("Unexpected pasid %d binding failure\n",
+ pr_err("Unexpected pasid 0x%x binding failure\n",
p->pasid);
mutex_unlock(&p->mutex);
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 8b4564f71a7a..11d244891393 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -330,10 +330,13 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
+ case CHIP_RENOIR:
case CHIP_ARCTURUS:
kernel_queue_init_v9(&kq->ops_asic_specific);
break;
case CHIP_NAVI10:
+ case CHIP_NAVI12:
+ case CHIP_NAVI14:
kernel_queue_init_v10(&kq->ops_asic_specific);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 986ff52d5750..f4b7f7e6c40e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -82,7 +82,7 @@ static void kfd_exit(void)
kfd_chardev_exit();
}
-int kgd2kfd_init()
+int kgd2kfd_init(void)
{
return kfd_init();
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 9cd3eb2d90bd..4a236b2c2354 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -69,35 +69,13 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
struct queue_properties *q)
{
- int retval;
- struct kfd_mem_obj *mqd_mem_obj = NULL;
+ struct kfd_mem_obj *mqd_mem_obj;
- /* From V9, for CWSR, the control stack is located on the next page
- * boundary after the mqd, we will use the gtt allocation function
- * instead of sub-allocation function.
- */
- if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
- mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
- if (!mqd_mem_obj)
- return NULL;
- retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
- ALIGN(q->ctl_stack_size, PAGE_SIZE) +
- ALIGN(sizeof(struct v10_compute_mqd), PAGE_SIZE),
- &(mqd_mem_obj->gtt_mem),
- &(mqd_mem_obj->gpu_addr),
- (void *)&(mqd_mem_obj->cpu_ptr), true);
- } else {
- retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd),
- &mqd_mem_obj);
- }
-
- if (retval) {
- kfree(mqd_mem_obj);
+ if (kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd),
+ &mqd_mem_obj))
return NULL;
- }
return mqd_mem_obj;
-
}
static void init_mqd(struct mqd_manager *mm, void **mqd,
@@ -250,14 +228,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
static void free_mqd(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
- struct kfd_dev *kfd = mm->dev;
-
- if (mqd_mem_obj->gtt_mem) {
- amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
- kfree(mqd_mem_obj);
- } else {
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
- }
+ kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
}
static bool is_occupied(struct mqd_manager *mm, void *mqd,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 2c8624c5b42c..83ef4b3dd2fb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -239,10 +239,13 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
+ case CHIP_RENOIR:
case CHIP_ARCTURUS:
pm->pmf = &kfd_v9_pm_funcs;
break;
case CHIP_NAVI10:
+ case CHIP_NAVI12:
+ case CHIP_NAVI14:
pm->pmf = &kfd_v10_pm_funcs;
break;
default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index c89326125d71..060a9e8b301e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -36,6 +36,10 @@
#include <linux/seq_file.h>
#include <linux/kref.h>
#include <linux/sysfs.h>
+#include <linux/device_cgroup.h>
+#include <drm/drm_file.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_device.h>
#include <kgd_kfd_interface.h>
#include "amd_shared.h"
@@ -179,10 +183,6 @@ enum cache_policy {
cache_policy_noncoherent
};
-#define KFD_IS_VI(chip) ((chip) >= CHIP_CARRIZO && (chip) <= CHIP_POLARIS11)
-#define KFD_IS_DGPU(chip) (((chip) >= CHIP_TONGA && \
- (chip) <= CHIP_NAVI10) || \
- (chip) == CHIP_HAWAII)
#define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10)
struct kfd_event_interrupt_class {
@@ -230,6 +230,7 @@ struct kfd_dev {
const struct kfd_device_info *device_info;
struct pci_dev *pdev;
+ struct drm_device *ddev;
unsigned int id; /* topology stub index */
@@ -687,7 +688,7 @@ struct kfd_process {
/* We want to receive a notification when the mm_struct is destroyed */
struct mmu_notifier mmu_notifier;
- unsigned int pasid;
+ uint16_t pasid;
unsigned int doorbell_index;
/*
@@ -1040,6 +1041,21 @@ bool kfd_is_locked(void);
void kfd_inc_compute_active(struct kfd_dev *dev);
void kfd_dec_compute_active(struct kfd_dev *dev);
+/* Cgroup Support */
+/* Check with device cgroup if @kfd device is accessible */
+static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd)
+{
+#if defined(CONFIG_CGROUP_DEVICE)
+ struct drm_device *ddev = kfd->ddev;
+
+ return devcgroup_check_permission(DEVCG_DEV_CHAR, ddev->driver->major,
+ ddev->render->index,
+ DEVCG_ACC_WRITE | DEVCG_ACC_READ);
+#else
+ return 0;
+#endif
+}
+
/* Debugfs */
#if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 40e3fc0c6942..10f9af5784f2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -416,7 +416,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
list_for_each_entry_safe(pdd, temp, &p->per_device_data,
per_device_list) {
- pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
+ pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
pdd->dev->id, p->pasid);
if (pdd->drm_file) {
@@ -687,6 +687,8 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
struct kfd_dev *dev)
{
unsigned int i;
+ int range_start = dev->shared_resources.non_cp_doorbells_start;
+ int range_end = dev->shared_resources.non_cp_doorbells_end;
if (!KFD_IS_SOC15(dev->device_info->asic_family))
return 0;
@@ -698,14 +700,16 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
return -ENOMEM;
/* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
+ pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
+ pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
+ range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
+ range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
+
for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
- if (i >= dev->shared_resources.non_cp_doorbells_start
- && i <= dev->shared_resources.non_cp_doorbells_end) {
+ if (i >= range_start && i <= range_end) {
set_bit(i, qpd->doorbell_bitmap);
set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
qpd->doorbell_bitmap);
- pr_debug("reserved doorbell 0x%03x and 0x%03x\n", i,
- i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
}
}
@@ -1020,7 +1024,7 @@ static void evict_process_worker(struct work_struct *work)
*/
flush_delayed_work(&p->restore_work);
- pr_debug("Started evicting pasid %d\n", p->pasid);
+ pr_debug("Started evicting pasid 0x%x\n", p->pasid);
ret = kfd_process_evict_queues(p);
if (!ret) {
dma_fence_signal(p->ef);
@@ -1029,9 +1033,9 @@ static void evict_process_worker(struct work_struct *work)
queue_delayed_work(kfd_restore_wq, &p->restore_work,
msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
- pr_debug("Finished evicting pasid %d\n", p->pasid);
+ pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
} else
- pr_err("Failed to evict queues of pasid %d\n", p->pasid);
+ pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
}
static void restore_process_worker(struct work_struct *work)
@@ -1046,7 +1050,7 @@ static void restore_process_worker(struct work_struct *work)
* lifetime of this thread, kfd_process p will be valid
*/
p = container_of(dwork, struct kfd_process, restore_work);
- pr_debug("Started restoring pasid %d\n", p->pasid);
+ pr_debug("Started restoring pasid 0x%x\n", p->pasid);
/* Setting last_restore_timestamp before successful restoration.
* Otherwise this would have to be set by KGD (restore_process_bos)
@@ -1062,7 +1066,7 @@ static void restore_process_worker(struct work_struct *work)
ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
&p->ef);
if (ret) {
- pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
+ pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
p->pasid, PROCESS_BACK_OFF_TIME_MS);
ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
@@ -1072,9 +1076,9 @@ static void restore_process_worker(struct work_struct *work)
ret = kfd_process_restore_queues(p);
if (!ret)
- pr_debug("Finished restoring pasid %d\n", p->pasid);
+ pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
else
- pr_err("Failed to restore queues of pasid %d\n", p->pasid);
+ pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
}
void kfd_suspend_all_processes(void)
@@ -1088,7 +1092,7 @@ void kfd_suspend_all_processes(void)
cancel_delayed_work_sync(&p->restore_work);
if (kfd_process_evict_queues(p))
- pr_err("Failed to suspend process %d\n", p->pasid);
+ pr_err("Failed to suspend process 0x%x\n", p->pasid);
dma_fence_signal(p->ef);
dma_fence_put(p->ef);
p->ef = NULL;
@@ -1171,7 +1175,7 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- seq_printf(m, "Process %d PASID %d:\n",
+ seq_printf(m, "Process %d PASID 0x%x:\n",
p->lead_thread->tgid, p->pasid);
mutex_lock(&p->mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 7e6c3ee82f5b..2659d226c056 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -53,7 +53,7 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
pr_debug("The new slot id %lu\n", found);
if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
- pr_info("Cannot open more queues for process with pasid %d\n",
+ pr_info("Cannot open more queues for process with pasid 0x%x\n",
pqm->process->pasid);
return -ENOMEM;
}
@@ -298,7 +298,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
}
if (retval != 0) {
- pr_err("Pasid %d DQM create queue %d failed. ret %d\n",
+ pr_err("Pasid 0x%x DQM create queue %d failed. ret %d\n",
pqm->process->pasid, type, retval);
goto err_create_queue;
}
@@ -377,7 +377,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
dqm = pqn->q->device->dqm;
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
if (retval) {
- pr_err("Pasid %d destroy queue %d failed, ret %d\n",
+ pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
pqm->process->pasid,
pqn->q->properties.queue_id, retval);
if (retval != -ETIME)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 7551761f2aa9..69bd0628fdc6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -269,6 +269,8 @@ static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
buffer[0] = 0;
iolink = container_of(attr, struct kfd_iolink_properties, attr);
+ if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu))
+ return -EPERM;
sysfs_show_32bit_prop(buffer, "type", iolink->iolink_type);
sysfs_show_32bit_prop(buffer, "version_major", iolink->ver_maj);
sysfs_show_32bit_prop(buffer, "version_minor", iolink->ver_min);
@@ -305,6 +307,8 @@ static ssize_t mem_show(struct kobject *kobj, struct attribute *attr,
buffer[0] = 0;
mem = container_of(attr, struct kfd_mem_properties, attr);
+ if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu))
+ return -EPERM;
sysfs_show_32bit_prop(buffer, "heap_type", mem->heap_type);
sysfs_show_64bit_prop(buffer, "size_in_bytes", mem->size_in_bytes);
sysfs_show_32bit_prop(buffer, "flags", mem->flags);
@@ -334,6 +338,8 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
buffer[0] = 0;
cache = container_of(attr, struct kfd_cache_properties, attr);
+ if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
+ return -EPERM;
sysfs_show_32bit_prop(buffer, "processor_id_low",
cache->processor_id_low);
sysfs_show_32bit_prop(buffer, "level", cache->cache_level);
@@ -414,6 +420,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
if (strcmp(attr->name, "gpu_id") == 0) {
dev = container_of(attr, struct kfd_topology_device,
attr_gpuid);
+ if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
+ return -EPERM;
return sysfs_show_32bit_val(buffer, dev->gpu_id);
}
@@ -421,11 +429,15 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev = container_of(attr, struct kfd_topology_device,
attr_name);
+ if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
+ return -EPERM;
return sysfs_show_str_val(buffer, dev->node_props.name);
}
dev = container_of(attr, struct kfd_topology_device,
attr_props);
+ if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
+ return -EPERM;
sysfs_show_32bit_prop(buffer, "cpu_cores_count",
dev->node_props.cpu_cores_count);
sysfs_show_32bit_prop(buffer, "simd_count",
@@ -1098,6 +1110,9 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
{
struct kfd_topology_device *dev;
struct kfd_topology_device *out_dev = NULL;
+ struct kfd_mem_properties *mem;
+ struct kfd_cache_properties *cache;
+ struct kfd_iolink_properties *iolink;
down_write(&topology_lock);
list_for_each_entry(dev, &topology_device_list, list) {
@@ -1111,6 +1126,13 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
if (!dev->gpu && (dev->node_props.simd_count > 0)) {
dev->gpu = gpu;
out_dev = dev;
+
+ list_for_each_entry(mem, &dev->mem_props, list)
+ mem->gpu = dev->gpu;
+ list_for_each_entry(cache, &dev->cache_props, list)
+ cache->gpu = dev->gpu;
+ list_for_each_entry(iolink, &dev->io_link_props, list)
+ iolink->gpu = dev->gpu;
break;
}
}
@@ -1317,8 +1339,11 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
+ case CHIP_RENOIR:
case CHIP_ARCTURUS:
case CHIP_NAVI10:
+ case CHIP_NAVI12:
+ case CHIP_NAVI14:
dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index d4718d58d0f2..15843e0fc756 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -102,6 +102,7 @@ struct kfd_mem_properties {
uint32_t flags;
uint32_t width;
uint32_t mem_clk_max;
+ struct kfd_dev *gpu;
struct kobject *kobj;
struct attribute attr;
};
@@ -123,6 +124,7 @@ struct kfd_cache_properties {
uint32_t cache_latency;
uint32_t cache_type;
uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE];
+ struct kfd_dev *gpu;
struct kobject *kobj;
struct attribute attr;
};
@@ -141,6 +143,7 @@ struct kfd_iolink_properties {
uint32_t max_bandwidth;
uint32_t rec_transfer_size;
uint32_t flags;
+ struct kfd_dev *gpu;
struct kobject *kobj;
struct attribute attr;
};