aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c54
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c148
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c998
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c149
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c31
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c46
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_iommu.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c35
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h22
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c66
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c449
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c98
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h46
31 files changed, 930 insertions, 1379 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index f6233019f042..d60576ce10cd 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -43,15 +43,15 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
*/
if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) &&
- dev->device_info->asic_family == CHIP_HAWAII) {
+ dev->adev->asic_type == CHIP_HAWAII) {
struct cik_ih_ring_entry *tmp_ihre =
(struct cik_ih_ring_entry *)patched_ihre;
*patched_flag = true;
*tmp_ihre = *ihre;
- vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd);
- ret = f2g->get_atc_vmid_pasid_mapping_info(dev->kgd, vmid, &pasid);
+ vmid = f2g->read_vmid_from_vmfault_reg(dev->adev);
+ ret = f2g->get_atc_vmid_pasid_mapping_info(dev->adev, vmid, &pasid);
tmp_ihre->ring_id &= 0x000000ff;
tmp_ihre->ring_id |= vmid << 8;
@@ -113,7 +113,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
kfd_process_vm_fault(dev->dqm, pasid);
memset(&info, 0, sizeof(info));
- amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->kgd, &info);
+ amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->adev, &info);
if (!info.page_addr && !info.status)
return;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 24ebd61395d8..4bfc0c8ab764 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -321,7 +321,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
/* Return gpu_id as doorbell offset for mmap usage */
args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
- if (KFD_IS_SOC15(dev->device_info->asic_family))
+ if (KFD_IS_SOC15(dev))
/* On SOC15 ASICs, include the doorbell offset within the
* process doorbell frame, which is 2 pages.
*/
@@ -580,7 +580,7 @@ static int kfd_ioctl_dbg_register(struct file *filep,
if (!dev)
return -EINVAL;
- if (dev->device_info->asic_family == CHIP_CARRIZO) {
+ if (dev->adev->asic_type == CHIP_CARRIZO) {
pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
return -EINVAL;
}
@@ -631,7 +631,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep,
if (!dev || !dev->dbgmgr)
return -EINVAL;
- if (dev->device_info->asic_family == CHIP_CARRIZO) {
+ if (dev->adev->asic_type == CHIP_CARRIZO) {
pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
return -EINVAL;
}
@@ -676,7 +676,7 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep,
if (!dev)
return -EINVAL;
- if (dev->device_info->asic_family == CHIP_CARRIZO) {
+ if (dev->adev->asic_type == CHIP_CARRIZO) {
pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
return -EINVAL;
}
@@ -784,7 +784,7 @@ static int kfd_ioctl_dbg_wave_control(struct file *filep,
if (!dev)
return -EINVAL;
- if (dev->device_info->asic_family == CHIP_CARRIZO) {
+ if (dev->adev->asic_type == CHIP_CARRIZO) {
pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
return -EINVAL;
}
@@ -851,7 +851,7 @@ static int kfd_ioctl_get_clock_counters(struct file *filep,
dev = kfd_device_by_id(args->gpu_id);
if (dev)
/* Reading GPU clock counter from KGD */
- args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd);
+ args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->adev);
else
/* Node without GPU resource */
args->gpu_clock_counter = 0;
@@ -1041,7 +1041,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
goto out_unlock;
}
- err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
+ err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->adev,
mem, &kern_addr, &size);
if (err) {
pr_err("Failed to map event page to kernel\n");
@@ -1051,7 +1051,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
err = kfd_event_page_set(p, kern_addr, size);
if (err) {
pr_err("Failed to set event page\n");
- amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->kgd, mem);
+ amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->adev, mem);
goto out_unlock;
}
@@ -1137,7 +1137,7 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
dev->kfd2kgd->set_scratch_backing_va(
- dev->kgd, args->va_addr, pdd->qpd.vmid);
+ dev->adev, args->va_addr, pdd->qpd.vmid);
return 0;
@@ -1158,7 +1158,7 @@ static int kfd_ioctl_get_tile_config(struct file *filep,
if (!dev)
return -EINVAL;
- amdgpu_amdkfd_get_tile_config(dev->kgd, &config);
+ amdgpu_amdkfd_get_tile_config(dev->adev, &config);
args->gb_addr_config = config.gb_addr_config;
args->num_banks = config.num_banks;
@@ -1244,7 +1244,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)
if (dev->use_iommu_v2)
return false;
- amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
+ amdgpu_amdkfd_get_local_mem_info(dev->adev, &mem_info);
if (mem_info.local_mem_size_private == 0 &&
mem_info.local_mem_size_public > 0)
return true;
@@ -1313,7 +1313,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
err = -EINVAL;
goto err_unlock;
}
- offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
+ offset = dev->adev->rmmio_remap.bus_addr;
if (!offset) {
err = -ENOMEM;
goto err_unlock;
@@ -1321,7 +1321,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
}
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
- dev->kgd, args->va_addr, args->size,
+ dev->adev, args->va_addr, args->size,
pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
flags);
@@ -1353,7 +1353,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
return 0;
err_free:
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem,
pdd->drm_priv, NULL);
err_unlock:
mutex_unlock(&p->mutex);
@@ -1399,7 +1399,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
goto err_unlock;
}
- ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
+ ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev,
(struct kgd_mem *)mem, pdd->drm_priv, &size);
/* If freeing the buffer failed, leave the handle in place for
@@ -1484,7 +1484,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
goto get_mem_obj_from_handle_failed;
}
err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- peer->kgd, (struct kgd_mem *)mem,
+ peer->adev, (struct kgd_mem *)mem,
peer_pdd->drm_priv, &table_freed);
if (err) {
pr_err("Failed to map to gpu %d/%d\n",
@@ -1496,7 +1496,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
mutex_unlock(&p->mutex);
- err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
+ err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true);
if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n");
goto sync_memory_failed;
@@ -1593,7 +1593,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
goto get_mem_obj_from_handle_failed;
}
err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
- peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
+ peer->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv);
if (err) {
pr_err("Failed to unmap from gpu %d/%d\n",
i, args->n_devices);
@@ -1603,8 +1603,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
}
mutex_unlock(&p->mutex);
- if (dev->device_info->asic_family == CHIP_ALDEBARAN) {
- err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd,
+ if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) {
+ err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev,
(struct kgd_mem *) mem, true);
if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n");
@@ -1680,7 +1680,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
{
struct kfd_ioctl_get_dmabuf_info_args *args = data;
struct kfd_dev *dev = NULL;
- struct kgd_dev *dma_buf_kgd;
+ struct amdgpu_device *dmabuf_adev;
void *metadata_buffer = NULL;
uint32_t flags;
unsigned int i;
@@ -1700,15 +1700,15 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
}
/* Get dmabuf info from KGD */
- r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
- &dma_buf_kgd, &args->size,
+ r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd,
+ &dmabuf_adev, &args->size,
metadata_buffer, args->metadata_size,
&args->metadata_size, &flags);
if (r)
goto exit;
/* Reverse-lookup gpu_id from kgd pointer */
- dev = kfd_device_by_kgd(dma_buf_kgd);
+ dev = kfd_device_by_adev(dmabuf_adev);
if (!dev) {
r = -EINVAL;
goto exit;
@@ -1758,7 +1758,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
goto err_unlock;
}
- r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
+ r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->adev, dmabuf,
args->va_addr, pdd->drm_priv,
(struct kgd_mem **)&mem, &size,
NULL);
@@ -1779,7 +1779,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
return 0;
err_free:
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem,
pdd->drm_priv, NULL);
err_unlock:
mutex_unlock(&p->mutex);
@@ -2066,7 +2066,7 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
- address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
+ address = dev->adev->rmmio_remap.bus_addr;
vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
VM_DONTDUMP | VM_PFNMAP;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index cfedfb1e8596..f187596faf66 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1340,7 +1340,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
int ret;
unsigned int num_cu_shared;
- switch (kdev->device_info->asic_family) {
+ switch (kdev->adev->asic_type) {
case CHIP_KAVERI:
pcache_info = kaveri_cache_info;
num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
@@ -1377,67 +1377,71 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
pcache_info = vegam_cache_info;
num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
break;
- case CHIP_VEGA10:
- pcache_info = vega10_cache_info;
- num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
- break;
- case CHIP_VEGA12:
- pcache_info = vega12_cache_info;
- num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
- break;
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
- pcache_info = vega20_cache_info;
- num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
- break;
- case CHIP_ALDEBARAN:
- pcache_info = aldebaran_cache_info;
- num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
- break;
- case CHIP_RAVEN:
- pcache_info = raven_cache_info;
- num_of_cache_types = ARRAY_SIZE(raven_cache_info);
- break;
- case CHIP_RENOIR:
- pcache_info = renoir_cache_info;
- num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
- break;
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_CYAN_SKILLFISH:
- pcache_info = navi10_cache_info;
- num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
- break;
- case CHIP_NAVI14:
- pcache_info = navi14_cache_info;
- num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
- break;
- case CHIP_SIENNA_CICHLID:
- pcache_info = sienna_cichlid_cache_info;
- num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
- break;
- case CHIP_NAVY_FLOUNDER:
- pcache_info = navy_flounder_cache_info;
- num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
- break;
- case CHIP_DIMGREY_CAVEFISH:
- pcache_info = dimgrey_cavefish_cache_info;
- num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
- break;
- case CHIP_VANGOGH:
- pcache_info = vangogh_cache_info;
- num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
- break;
- case CHIP_BEIGE_GOBY:
- pcache_info = beige_goby_cache_info;
- num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
- break;
- case CHIP_YELLOW_CARP:
- pcache_info = yellow_carp_cache_info;
- num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
- break;
default:
- return -EINVAL;
+ switch(KFD_GC_VERSION(kdev)) {
+ case IP_VERSION(9, 0, 1):
+ pcache_info = vega10_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+ break;
+ case IP_VERSION(9, 2, 1):
+ pcache_info = vega12_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
+ break;
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ pcache_info = vega20_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
+ break;
+ case IP_VERSION(9, 4, 2):
+ pcache_info = aldebaran_cache_info;
+ num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
+ break;
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 2):
+ pcache_info = raven_cache_info;
+ num_of_cache_types = ARRAY_SIZE(raven_cache_info);
+ break;
+ case IP_VERSION(9, 3, 0):
+ pcache_info = renoir_cache_info;
+ num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
+ break;
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ pcache_info = navi10_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
+ break;
+ case IP_VERSION(10, 1, 1):
+ pcache_info = navi14_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
+ break;
+ case IP_VERSION(10, 3, 0):
+ pcache_info = sienna_cichlid_cache_info;
+ num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
+ break;
+ case IP_VERSION(10, 3, 2):
+ pcache_info = navy_flounder_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
+ break;
+ case IP_VERSION(10, 3, 4):
+ pcache_info = dimgrey_cavefish_cache_info;
+ num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
+ break;
+ case IP_VERSION(10, 3, 1):
+ pcache_info = vangogh_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
+ break;
+ case IP_VERSION(10, 3, 5):
+ pcache_info = beige_goby_cache_info;
+ num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
+ break;
+ case IP_VERSION(10, 3, 3):
+ pcache_info = yellow_carp_cache_info;
+ num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
+ break;
+ default:
+ return -EINVAL;
+ }
}
*size_filled = 0;
@@ -1963,8 +1967,6 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
struct crat_subtype_iolink *sub_type_hdr,
uint32_t proximity_domain)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
-
*avail_size -= sizeof(struct crat_subtype_iolink);
if (*avail_size < 0)
return -ENOMEM;
@@ -1981,7 +1983,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
/* Fill in IOLINK subtype.
* TODO: Fill-in other fields of iolink subtype
*/
- if (adev->gmc.xgmi.connected_to_cpu) {
+ if (kdev->adev->gmc.xgmi.connected_to_cpu) {
/*
* with host gpu xgmi link, host can access gpu memory whether
* or not pcie bar type is large, so always create bidirectional
@@ -1990,19 +1992,19 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
sub_type_hdr->num_hops_xgmi = 1;
- if (adev->asic_type == CHIP_ALDEBARAN) {
+ if (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 2)) {
sub_type_hdr->minimum_bandwidth_mbs =
amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
- kdev->kgd, NULL, true);
+ kdev->adev, NULL, true);
sub_type_hdr->maximum_bandwidth_mbs =
sub_type_hdr->minimum_bandwidth_mbs;
}
} else {
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
sub_type_hdr->minimum_bandwidth_mbs =
- amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, true);
+ amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true);
sub_type_hdr->maximum_bandwidth_mbs =
- amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, false);
+ amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false);
}
sub_type_hdr->proximity_domain_from = proximity_domain;
@@ -2044,11 +2046,11 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
sub_type_hdr->proximity_domain_from = proximity_domain_from;
sub_type_hdr->proximity_domain_to = proximity_domain_to;
sub_type_hdr->num_hops_xgmi =
- amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd);
+ amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);
sub_type_hdr->maximum_bandwidth_mbs =
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, peer_kdev->kgd, false);
+ amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, peer_kdev->adev, false);
sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, NULL, true) : 0;
+ amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;
return 0;
}
@@ -2114,7 +2116,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
cu->proximity_domain = proximity_domain;
- amdgpu_amdkfd_get_cu_info(kdev->kgd, &cu_info);
+ amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
cu->num_simd_per_cu = cu_info.simd_per_cu;
cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;
cu->max_waves_simd = cu_info.max_waves_per_simd;
@@ -2145,7 +2147,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* report the total FB size (public+private) as a single
* private heap.
*/
- amdgpu_amdkfd_get_local_mem_info(kdev->kgd, &local_mem_info);
+ amdgpu_amdkfd_get_local_mem_info(kdev->adev, &local_mem_info);
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
sub_type_hdr->length);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index 159add0f5aaa..1e30717b5253 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -41,7 +41,7 @@
static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
{
- dev->kfd2kgd->address_watch_disable(dev->kgd);
+ dev->kfd2kgd->address_watch_disable(dev->adev);
}
static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
@@ -322,7 +322,7 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
pdd->dev->kfd2kgd->address_watch_execute(
- dbgdev->dev->kgd,
+ dbgdev->dev->adev,
i,
cntl.u32All,
addrHi.u32All,
@@ -420,7 +420,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
aw_reg_add_dword =
dbgdev->dev->kfd2kgd->address_watch_get_offset(
- dbgdev->dev->kgd,
+ dbgdev->dev->adev,
i,
ADDRESS_WATCH_REG_CNTL);
@@ -431,7 +431,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
aw_reg_add_dword =
dbgdev->dev->kfd2kgd->address_watch_get_offset(
- dbgdev->dev->kgd,
+ dbgdev->dev->adev,
i,
ADDRESS_WATCH_REG_ADDR_HI);
@@ -441,7 +441,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
aw_reg_add_dword =
dbgdev->dev->kfd2kgd->address_watch_get_offset(
- dbgdev->dev->kgd,
+ dbgdev->dev->adev,
i,
ADDRESS_WATCH_REG_ADDR_LO);
@@ -457,7 +457,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
aw_reg_add_dword =
dbgdev->dev->kfd2kgd->address_watch_get_offset(
- dbgdev->dev->kgd,
+ dbgdev->dev->adev,
i,
ADDRESS_WATCH_REG_CNTL);
@@ -752,7 +752,7 @@ static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
- return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
+ return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->adev,
reg_gfx_index.u32All,
reg_sq_cmd.u32All);
}
@@ -784,7 +784,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
- (dev->kgd, vmid, &queried_pasid);
+ (dev->adev, vmid, &queried_pasid);
if (status && queried_pasid == p->pasid) {
pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
@@ -811,7 +811,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
/* for non DIQ we need to patch the VMID: */
reg_sq_cmd.bits.vm_id = vmid;
- dev->kfd2kgd->wave_control_execute(dev->kgd,
+ dev->kfd2kgd->wave_control_execute(dev->adev,
reg_gfx_index.u32All,
reg_sq_cmd.u32All);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 3b119db16003..127d41d0e4f0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -53,770 +53,310 @@ extern const struct kfd2kgd_calls aldebaran_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
-#ifdef KFD_SUPPORT_IOMMU_V2
-static const struct kfd_device_info kaveri_device_info = {
- .asic_family = CHIP_KAVERI,
- .asic_name = "kaveri",
- .gfx_target_version = 70000,
- .max_pasid_bits = 16,
- /* max num of queues for KV.TODO should be a dynamic value */
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = false,
- .needs_iommu_device = true,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info carrizo_device_info = {
- .asic_family = CHIP_CARRIZO,
- .asic_name = "carrizo",
- .gfx_target_version = 80001,
- .max_pasid_bits = 16,
- /* max num of queues for CZ.TODO should be a dynamic value */
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = true,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info raven_device_info = {
- .asic_family = CHIP_RAVEN,
- .asic_name = "raven",
- .gfx_target_version = 90002,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = true,
- .needs_pci_atomics = true,
- .num_sdma_engines = 1,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-#endif
-
-#ifdef CONFIG_DRM_AMDGPU_CIK
-static const struct kfd_device_info hawaii_device_info = {
- .asic_family = CHIP_HAWAII,
- .asic_name = "hawaii",
- .gfx_target_version = 70001,
- .max_pasid_bits = 16,
- /* max num of queues for KV.TODO should be a dynamic value */
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = false,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-#endif
-
-static const struct kfd_device_info tonga_device_info = {
- .asic_family = CHIP_TONGA,
- .asic_name = "tonga",
- .gfx_target_version = 80002,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = false,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info fiji_device_info = {
- .asic_family = CHIP_FIJI,
- .asic_name = "fiji",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info fiji_vf_device_info = {
- .asic_family = CHIP_FIJI,
- .asic_name = "fiji",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-
-static const struct kfd_device_info polaris10_device_info = {
- .asic_family = CHIP_POLARIS10,
- .asic_name = "polaris10",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info polaris10_vf_device_info = {
- .asic_family = CHIP_POLARIS10,
- .asic_name = "polaris10",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info polaris11_device_info = {
- .asic_family = CHIP_POLARIS11,
- .asic_name = "polaris11",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info polaris12_device_info = {
- .asic_family = CHIP_POLARIS12,
- .asic_name = "polaris12",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vegam_device_info = {
- .asic_family = CHIP_VEGAM,
- .asic_name = "vegam",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vega10_device_info = {
- .asic_family = CHIP_VEGA10,
- .asic_name = "vega10",
- .gfx_target_version = 90000,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vega10_vf_device_info = {
- .asic_family = CHIP_VEGA10,
- .asic_name = "vega10",
- .gfx_target_version = 90000,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vega12_device_info = {
- .asic_family = CHIP_VEGA12,
- .asic_name = "vega12",
- .gfx_target_version = 90004,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vega20_device_info = {
- .asic_family = CHIP_VEGA20,
- .asic_name = "vega20",
- .gfx_target_version = 90006,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info arcturus_device_info = {
- .asic_family = CHIP_ARCTURUS,
- .asic_name = "arcturus",
- .gfx_target_version = 90008,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 6,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info aldebaran_device_info = {
- .asic_family = CHIP_ALDEBARAN,
- .asic_name = "aldebaran",
- .gfx_target_version = 90010,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 3,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info renoir_device_info = {
- .asic_family = CHIP_RENOIR,
- .asic_name = "renoir",
- .gfx_target_version = 90012,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 1,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info navi10_device_info = {
- .asic_family = CHIP_NAVI10,
- .asic_name = "navi10",
- .gfx_target_version = 100100,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 145,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info navi12_device_info = {
- .asic_family = CHIP_NAVI12,
- .asic_name = "navi12",
- .gfx_target_version = 100101,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 145,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info navi14_device_info = {
- .asic_family = CHIP_NAVI14,
- .asic_name = "navi14",
- .gfx_target_version = 100102,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 145,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info sienna_cichlid_device_info = {
- .asic_family = CHIP_SIENNA_CICHLID,
- .asic_name = "sienna_cichlid",
- .gfx_target_version = 100300,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 4,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info navy_flounder_device_info = {
- .asic_family = CHIP_NAVY_FLOUNDER,
- .asic_name = "navy_flounder",
- .gfx_target_version = 100301,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info vangogh_device_info = {
- .asic_family = CHIP_VANGOGH,
- .asic_name = "vangogh",
- .gfx_target_version = 100303,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 1,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info dimgrey_cavefish_device_info = {
- .asic_family = CHIP_DIMGREY_CAVEFISH,
- .asic_name = "dimgrey_cavefish",
- .gfx_target_version = 100302,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info beige_goby_device_info = {
- .asic_family = CHIP_BEIGE_GOBY,
- .asic_name = "beige_goby",
- .gfx_target_version = 100304,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 1,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info yellow_carp_device_info = {
- .asic_family = CHIP_YELLOW_CARP,
- .asic_name = "yellow_carp",
- .gfx_target_version = 100305,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 1,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info cyan_skillfish_device_info = {
- .asic_family = CHIP_CYAN_SKILLFISH,
- .asic_name = "cyan_skillfish",
- .gfx_target_version = 100103,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size);
static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
static int kfd_resume(struct kfd_dev *kfd);
-struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf)
+static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd)
{
- struct kfd_dev *kfd;
- const struct kfd_device_info *device_info;
- const struct kfd2kgd_calls *f2g;
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
+
+ switch (sdma_version) {
+ case IP_VERSION(4, 0, 0):/* VEGA10 */
+ case IP_VERSION(4, 0, 1):/* VEGA12 */
+ case IP_VERSION(4, 1, 0):/* RAVEN */
+ case IP_VERSION(4, 1, 1):/* RAVEN */
+ case IP_VERSION(4, 1, 2):/* RENIOR */
+ case IP_VERSION(5, 2, 1):/* VANGOGH */
+ case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
+ kfd->device_info.num_sdma_queues_per_engine = 2;
+ break;
+ case IP_VERSION(4, 2, 0):/* VEGA20 */
+ case IP_VERSION(4, 2, 2):/* ARCTUTUS */
+ case IP_VERSION(4, 4, 0):/* ALDEBARAN */
+ case IP_VERSION(5, 0, 0):/* NAVI10 */
+ case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
+ case IP_VERSION(5, 0, 2):/* NAVI14 */
+ case IP_VERSION(5, 0, 5):/* NAVI12 */
+ case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
+ case IP_VERSION(5, 2, 2):/* NAVY_FLOUDER */
+ case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
+ case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */
+ kfd->device_info.num_sdma_queues_per_engine = 8;
+ break;
+ default:
+ dev_warn(kfd_device,
+ "Default sdma queue per engine(8) is set due to "
+ "mismatch of sdma ip block(SDMA_HWIP:0x%x).\n",
+ sdma_version);
+ kfd->device_info.num_sdma_queues_per_engine = 8;
+ }
+}
+
+static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
+{
+ uint32_t gc_version = KFD_GC_VERSION(kfd);
+
+ switch (gc_version) {
+ case IP_VERSION(9, 0, 1): /* VEGA10 */
+ case IP_VERSION(9, 1, 0): /* RAVEN */
+ case IP_VERSION(9, 2, 1): /* VEGA12 */
+ case IP_VERSION(9, 2, 2): /* RAVEN */
+ case IP_VERSION(9, 3, 0): /* RENOIR */
+ case IP_VERSION(9, 4, 0): /* VEGA20 */
+ case IP_VERSION(9, 4, 1): /* ARCTURUS */
+ case IP_VERSION(9, 4, 2): /* ALDEBARAN */
+ case IP_VERSION(10, 3, 1): /* VANGOGH */
+ case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
+ case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
+ case IP_VERSION(10, 1, 10): /* NAVI10 */
+ case IP_VERSION(10, 1, 2): /* NAVI12 */
+ case IP_VERSION(10, 1, 1): /* NAVI14 */
+ case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */
+ case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */
+ case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */
+ case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
+ break;
+ default:
+ dev_warn(kfd_device, "v9 event interrupt handler is set due to "
+ "mismatch of gc ip block(GC_HWIP:0x%x).\n", gc_version);
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
+ }
+}
+
+static void kfd_device_info_init(struct kfd_dev *kfd,
+ bool vf, uint32_t gfx_target_version)
+{
+ uint32_t gc_version = KFD_GC_VERSION(kfd);
+ uint32_t asic_type = kfd->adev->asic_type;
+
+ kfd->device_info.max_pasid_bits = 16;
+ kfd->device_info.max_no_of_hqd = 24;
+ kfd->device_info.num_of_watch_points = 4;
+ kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED;
+ kfd->device_info.gfx_target_version = gfx_target_version;
+
+ if (KFD_IS_SOC15(kfd)) {
+ kfd->device_info.doorbell_size = 8;
+ kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
+ kfd->device_info.supports_cwsr = true;
+
+ kfd_device_info_set_sdma_queue_num(kfd);
+
+ kfd_device_info_set_event_interrupt_class(kfd);
+
+ /* Raven */
+ if (gc_version == IP_VERSION(9, 1, 0) ||
+ gc_version == IP_VERSION(9, 2, 2))
+ kfd->device_info.needs_iommu_device = true;
+
+ if (gc_version < IP_VERSION(11, 0, 0)) {
+ /* Navi2x+, Navi1x+ */
+ if (gc_version >= IP_VERSION(10, 3, 0))
+ kfd->device_info.no_atomic_fw_version = 92;
+ else if (gc_version >= IP_VERSION(10, 1, 1))
+ kfd->device_info.no_atomic_fw_version = 145;
+
+ /* Navi1x+ */
+ if (gc_version >= IP_VERSION(10, 1, 1))
+ kfd->device_info.needs_pci_atomics = true;
+ }
+ } else {
+ kfd->device_info.doorbell_size = 4;
+ kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t);
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_cik;
+ kfd->device_info.num_sdma_queues_per_engine = 2;
+
+ if (asic_type != CHIP_KAVERI &&
+ asic_type != CHIP_HAWAII &&
+ asic_type != CHIP_TONGA)
+ kfd->device_info.supports_cwsr = true;
+
+ if (asic_type == CHIP_KAVERI ||
+ asic_type == CHIP_CARRIZO)
+ kfd->device_info.needs_iommu_device = true;
+
+ if (asic_type != CHIP_HAWAII && !vf)
+ kfd->device_info.needs_pci_atomics = true;
+ }
+}
+
+struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
+{
+ struct kfd_dev *kfd = NULL;
+ const struct kfd2kgd_calls *f2g = NULL;
struct pci_dev *pdev = adev->pdev;
+ uint32_t gfx_target_version = 0;
switch (adev->asic_type) {
#ifdef KFD_SUPPORT_IOMMU_V2
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
- if (vf)
- device_info = NULL;
- else
- device_info = &kaveri_device_info;
- f2g = &gfx_v7_kfd2kgd;
+ gfx_target_version = 70000;
+ if (!vf)
+ f2g = &gfx_v7_kfd2kgd;
break;
#endif
case CHIP_CARRIZO:
- if (vf)
- device_info = NULL;
- else
- device_info = &carrizo_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80001;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_HAWAII:
- if (vf)
- device_info = NULL;
- else
- device_info = &hawaii_device_info;
- f2g = &gfx_v7_kfd2kgd;
+ gfx_target_version = 70001;
+ if (!amdgpu_exp_hw_support)
+ pr_info(
+ "KFD support on Hawaii is experimental. See modparam exp_hw_support\n"
+ );
+ else if (!vf)
+ f2g = &gfx_v7_kfd2kgd;
break;
#endif
case CHIP_TONGA:
- if (vf)
- device_info = NULL;
- else
- device_info = &tonga_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80002;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_FIJI:
- if (vf)
- device_info = &fiji_vf_device_info;
- else
- device_info = &fiji_device_info;
+ gfx_target_version = 80003;
f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_POLARIS10:
- if (vf)
- device_info = &polaris10_vf_device_info;
- else
- device_info = &polaris10_device_info;
+ gfx_target_version = 80003;
f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_POLARIS11:
- if (vf)
- device_info = NULL;
- else
- device_info = &polaris11_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80003;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_POLARIS12:
- if (vf)
- device_info = NULL;
- else
- device_info = &polaris12_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80003;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_VEGAM:
- if (vf)
- device_info = NULL;
- else
- device_info = &vegam_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80003;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
default:
switch (adev->ip_versions[GC_HWIP][0]) {
+ /* Vega 10 */
case IP_VERSION(9, 0, 1):
- if (vf)
- device_info = &vega10_vf_device_info;
- else
- device_info = &vega10_device_info;
+ gfx_target_version = 90000;
f2g = &gfx_v9_kfd2kgd;
break;
#ifdef KFD_SUPPORT_IOMMU_V2
+ /* Raven */
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
- if (vf)
- device_info = NULL;
- else
- device_info = &raven_device_info;
- f2g = &gfx_v9_kfd2kgd;
+ gfx_target_version = 90002;
+ if (!vf)
+ f2g = &gfx_v9_kfd2kgd;
break;
#endif
+ /* Vega12 */
case IP_VERSION(9, 2, 1):
- if (vf)
- device_info = NULL;
- else
- device_info = &vega12_device_info;
- f2g = &gfx_v9_kfd2kgd;
+ gfx_target_version = 90004;
+ if (!vf)
+ f2g = &gfx_v9_kfd2kgd;
break;
+ /* Renoir */
case IP_VERSION(9, 3, 0):
- if (vf)
- device_info = NULL;
- else
- device_info = &renoir_device_info;
- f2g = &gfx_v9_kfd2kgd;
+ gfx_target_version = 90012;
+ if (!vf)
+ f2g = &gfx_v9_kfd2kgd;
break;
+ /* Vega20 */
case IP_VERSION(9, 4, 0):
- if (vf)
- device_info = NULL;
- else
- device_info = &vega20_device_info;
- f2g = &gfx_v9_kfd2kgd;
+ gfx_target_version = 90006;
+ if (!vf)
+ f2g = &gfx_v9_kfd2kgd;
break;
+ /* Arcturus */
case IP_VERSION(9, 4, 1):
- device_info = &arcturus_device_info;
+ gfx_target_version = 90008;
f2g = &arcturus_kfd2kgd;
break;
+ /* Aldebaran */
case IP_VERSION(9, 4, 2):
- device_info = &aldebaran_device_info;
+ gfx_target_version = 90010;
f2g = &aldebaran_kfd2kgd;
break;
+ /* Navi10 */
case IP_VERSION(10, 1, 10):
- if (vf)
- device_info = NULL;
- else
- device_info = &navi10_device_info;
- f2g = &gfx_v10_kfd2kgd;
+ gfx_target_version = 100100;
+ if (!vf)
+ f2g = &gfx_v10_kfd2kgd;
break;
+ /* Navi12 */
case IP_VERSION(10, 1, 2):
- device_info = &navi12_device_info;
+ gfx_target_version = 100101;
f2g = &gfx_v10_kfd2kgd;
break;
+ /* Navi14 */
case IP_VERSION(10, 1, 1):
- if (vf)
- device_info = NULL;
- else
- device_info = &navi14_device_info;
- f2g = &gfx_v10_kfd2kgd;
+ gfx_target_version = 100102;
+ if (!vf)
+ f2g = &gfx_v10_kfd2kgd;
break;
+ /* Cyan Skillfish */
case IP_VERSION(10, 1, 3):
- if (vf)
- device_info = NULL;
- else
- device_info = &cyan_skillfish_device_info;
- f2g = &gfx_v10_kfd2kgd;
+ gfx_target_version = 100103;
+ if (!vf)
+ f2g = &gfx_v10_kfd2kgd;
break;
+ /* Sienna Cichlid */
case IP_VERSION(10, 3, 0):
- device_info = &sienna_cichlid_device_info;
+ gfx_target_version = 100300;
f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Navy Flounder */
case IP_VERSION(10, 3, 2):
- device_info = &navy_flounder_device_info;
+ gfx_target_version = 100301;
f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Van Gogh */
case IP_VERSION(10, 3, 1):
- if (vf)
- device_info = NULL;
- else
- device_info = &vangogh_device_info;
- f2g = &gfx_v10_3_kfd2kgd;
+ gfx_target_version = 100303;
+ if (!vf)
+ f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Dimgrey Cavefish */
case IP_VERSION(10, 3, 4):
- device_info = &dimgrey_cavefish_device_info;
+ gfx_target_version = 100302;
f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Beige Goby */
case IP_VERSION(10, 3, 5):
- device_info = &beige_goby_device_info;
+ gfx_target_version = 100304;
f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Yellow Carp */
case IP_VERSION(10, 3, 3):
- if (vf)
- device_info = NULL;
- else
- device_info = &yellow_carp_device_info;
- f2g = &gfx_v10_3_kfd2kgd;
+ gfx_target_version = 100305;
+ if (!vf)
+ f2g = &gfx_v10_3_kfd2kgd;
break;
default:
- return NULL;
+ break;
}
break;
}
- if (!device_info || !f2g) {
- dev_err(kfd_device, "%s %s not supported in kfd\n",
- amdgpu_asic_name[adev->asic_type], vf ? "VF" : "");
+ if (!f2g) {
+ if (adev->ip_versions[GC_HWIP][0])
+ dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n",
+ adev->ip_versions[GC_HWIP][0], vf ? "VF" : "");
+ else
+ dev_err(kfd_device, "%s %s not supported in kfd\n",
+ amdgpu_asic_name[adev->asic_type], vf ? "VF" : "");
return NULL;
}
@@ -824,8 +364,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf)
if (!kfd)
return NULL;
- kfd->kgd = kgd;
- kfd->device_info = device_info;
+ kfd->adev = adev;
+ kfd_device_info_init(kfd, vf, gfx_target_version);
kfd->pdev = pdev;
kfd->init_complete = false;
kfd->kfd2kgd = f2g;
@@ -844,24 +384,24 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf)
static void kfd_cwsr_init(struct kfd_dev *kfd)
{
- if (cwsr_enable && kfd->device_info->supports_cwsr) {
- if (kfd->device_info->asic_family < CHIP_VEGA10) {
+ if (cwsr_enable && kfd->device_info.supports_cwsr) {
+ if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx8_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
- } else if (kfd->device_info->asic_family == CHIP_ARCTURUS) {
+ } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) {
BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_arcturus_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
- } else if (kfd->device_info->asic_family == CHIP_ALDEBARAN) {
+ } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) {
BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
- } else if (kfd->device_info->asic_family < CHIP_NAVI10) {
+ } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx9_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
- } else if (kfd->device_info->asic_family < CHIP_SIENNA_CICHLID) {
+ } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) {
BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_nv1x_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
@@ -882,18 +422,17 @@ static int kfd_gws_init(struct kfd_dev *kfd)
if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
return 0;
- if (hws_gws_support
- || (kfd->device_info->asic_family == CHIP_VEGA10
- && kfd->mec2_fw_version >= 0x81b3)
- || (kfd->device_info->asic_family >= CHIP_VEGA12
- && kfd->device_info->asic_family <= CHIP_RAVEN
- && kfd->mec2_fw_version >= 0x1b3)
- || (kfd->device_info->asic_family == CHIP_ARCTURUS
- && kfd->mec2_fw_version >= 0x30)
- || (kfd->device_info->asic_family == CHIP_ALDEBARAN
- && kfd->mec2_fw_version >= 0x28))
- ret = amdgpu_amdkfd_alloc_gws(kfd->kgd,
- amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws);
+ if (hws_gws_support || (KFD_IS_SOC15(kfd) &&
+ ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 0, 1)
+ && kfd->mec2_fw_version >= 0x81b3) ||
+ (KFD_GC_VERSION(kfd) <= IP_VERSION(9, 4, 0)
+ && kfd->mec2_fw_version >= 0x1b3) ||
+ (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)
+ && kfd->mec2_fw_version >= 0x30) ||
+ (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)
+ && kfd->mec2_fw_version >= 0x28))))
+ ret = amdgpu_amdkfd_alloc_gws(kfd->adev,
+ kfd->adev->gds.gws_size, &kfd->gws);
return ret;
}
@@ -910,11 +449,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
unsigned int size, map_process_packet_size;
kfd->ddev = ddev;
- kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
+ kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
KGD_ENGINE_MEC1);
- kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
+ kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
KGD_ENGINE_MEC2);
- kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
+ kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
KGD_ENGINE_SDMA1);
kfd->shared_resources = *gpu_resources;
@@ -927,16 +466,16 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
* 32 and 64-bit requests are possible and must be
* supported.
*/
- kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->kgd);
+ kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev);
if (!kfd->pci_atomic_requested &&
- kfd->device_info->needs_pci_atomics &&
- (!kfd->device_info->no_atomic_fw_version ||
- kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) {
+ kfd->device_info.needs_pci_atomics &&
+ (!kfd->device_info.no_atomic_fw_version ||
+ kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) {
dev_info(kfd_device,
"skipped device %x:%x, PCI rejects atomics %d<%d\n",
kfd->pdev->vendor, kfd->pdev->device,
kfd->mec_fw_version,
- kfd->device_info->no_atomic_fw_version);
+ kfd->device_info.no_atomic_fw_version);
return false;
}
@@ -953,16 +492,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
/* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device *
- kfd->device_info->mqd_size_aligned;
+ kfd->device_info.mqd_size_aligned;
/*
* calculate max size of runlist packet.
* There can be only 2 packets at once
*/
- map_process_packet_size =
- kfd->device_info->asic_family == CHIP_ALDEBARAN ?
+ map_process_packet_size = KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) ?
sizeof(struct pm4_mes_map_process_aldebaran) :
- sizeof(struct pm4_mes_map_process);
+ sizeof(struct pm4_mes_map_process);
size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size +
max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
+ sizeof(struct pm4_mes_runlist)) * 2;
@@ -974,7 +512,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
size += 512 * 1024;
if (amdgpu_amdkfd_alloc_gtt_mem(
- kfd->kgd, size, &kfd->gtt_mem,
+ kfd->adev, size, &kfd->gtt_mem,
&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
false)) {
dev_err(kfd_device, "Could not allocate %d bytes\n", size);
@@ -995,9 +533,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto kfd_doorbell_error;
}
- kfd->hive_id = amdgpu_amdkfd_get_hive_id(kfd->kgd);
+ kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
- kfd->noretry = amdgpu_amdkfd_get_noretry(kfd->kgd);
+ kfd->noretry = kfd->adev->gmc.noretry;
if (kfd_interrupt_init(kfd)) {
dev_err(kfd_device, "Error initializing interrupts\n");
@@ -1015,7 +553,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
*/
if (kfd_gws_init(kfd)) {
dev_err(kfd_device, "Could not allocate %d gws\n",
- amdgpu_amdkfd_get_num_gws(kfd->kgd));
+ kfd->adev->gds.gws_size);
goto gws_error;
}
@@ -1030,7 +568,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd_cwsr_init(kfd);
- svm_migrate_init((struct amdgpu_device *)kfd->kgd);
+ svm_migrate_init(kfd->adev);
if(kgd2kfd_resume_iommu(kfd))
goto device_iommu_error;
@@ -1068,10 +606,10 @@ kfd_interrupt_error:
kfd_doorbell_error:
kfd_gtt_sa_fini(kfd);
kfd_gtt_sa_init_error:
- amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
alloc_gtt_mem_failure:
if (kfd->gws)
- amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
+ amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws);
dev_err(kfd_device,
"device %x:%x NOT added due to errors\n",
kfd->pdev->vendor, kfd->pdev->device);
@@ -1088,9 +626,9 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfd_doorbell_fini(kfd);
ida_destroy(&kfd->doorbell_ida);
kfd_gtt_sa_fini(kfd);
- amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
if (kfd->gws)
- amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
+ amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws);
}
kfree(kfd);
@@ -1229,7 +767,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
if (!kfd->init_complete)
return;
- if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
+ if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) {
dev_err_once(kfd_device, "Ring entry too small\n");
return;
}
@@ -1526,7 +1064,7 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
void kfd_inc_compute_active(struct kfd_dev *kfd)
{
if (atomic_inc_return(&kfd->compute_profile) == 1)
- amdgpu_amdkfd_set_compute_idle(kfd->kgd, false);
+ amdgpu_amdkfd_set_compute_idle(kfd->adev, false);
}
void kfd_dec_compute_active(struct kfd_dev *kfd)
@@ -1534,7 +1072,7 @@ void kfd_dec_compute_active(struct kfd_dev *kfd)
int count = atomic_dec_return(&kfd->compute_profile);
if (count == 0)
- amdgpu_amdkfd_set_compute_idle(kfd->kgd, true);
+ amdgpu_amdkfd_set_compute_idle(kfd->adev, true);
WARN_ONCE(count < 0, "Compute profile ref. count error");
}
@@ -1544,6 +1082,26 @@ void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
}
+/* kfd_get_num_sdma_engines returns the number of PCIe optimized SDMA and
+ * kfd_get_num_xgmi_sdma_engines returns the number of XGMI SDMA.
+ * When the device has more than two engines, we reserve two for PCIe to enable
+ * full-duplex and the rest are used as XGMI.
+ */
+unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev)
+{
+ /* If XGMI is not supported, all SDMA engines are PCIe */
+ if (!kdev->adev->gmc.xgmi.supported)
+ return kdev->adev->sdma.num_instances;
+
+ return min(kdev->adev->sdma.num_instances, 2);
+}
+
+unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev)
+{
+ /* After reserved for PCIe, the rest of engines are XGMI */
+ return kdev->adev->sdma.num_instances - kfd_get_num_sdma_engines(kdev);
+}
+
#if defined(CONFIG_DEBUG_FS)
/* This function will send a package to HIQ to hang the HWS
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 003ba6a373ff..19890e350107 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -47,7 +47,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm,
uint32_t filter_param);
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param);
+ uint32_t filter_param, bool reset);
static int map_queues_cpsch(struct device_queue_manager *dqm);
@@ -99,38 +99,29 @@ unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
return dqm->dev->shared_resources.num_pipe_per_mec;
}
-static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
-{
- return dqm->dev->device_info->num_sdma_engines;
-}
-
-static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
-{
- return dqm->dev->device_info->num_xgmi_sdma_engines;
-}
-
static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
{
- return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm);
+ return kfd_get_num_sdma_engines(dqm->dev) +
+ kfd_get_num_xgmi_sdma_engines(dqm->dev);
}
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
{
- return dqm->dev->device_info->num_sdma_engines
- * dqm->dev->device_info->num_sdma_queues_per_engine;
+ return kfd_get_num_sdma_engines(dqm->dev) *
+ dqm->dev->device_info.num_sdma_queues_per_engine;
}
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
{
- return dqm->dev->device_info->num_xgmi_sdma_engines
- * dqm->dev->device_info->num_sdma_queues_per_engine;
+ return kfd_get_num_xgmi_sdma_engines(dqm->dev) *
+ dqm->dev->device_info.num_sdma_queues_per_engine;
}
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
return dqm->dev->kfd2kgd->program_sh_mem_settings(
- dqm->dev->kgd, qpd->vmid,
+ dqm->dev->adev, qpd->vmid,
qpd->sh_mem_config,
qpd->sh_mem_ape1_base,
qpd->sh_mem_ape1_limit,
@@ -157,7 +148,7 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
{
struct kfd_dev *dev = qpd->dqm->dev;
- if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
+ if (!KFD_IS_SOC15(dev)) {
/* On pre-SOC15 chips we need to use the queue ID to
* preserve the user mode ABI.
*/
@@ -202,7 +193,7 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
unsigned int old;
struct kfd_dev *dev = qpd->dqm->dev;
- if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
+ if (!KFD_IS_SOC15(dev) ||
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
return;
@@ -216,7 +207,7 @@ static void program_trap_handler_settings(struct device_queue_manager *dqm,
{
if (dqm->dev->kfd2kgd->program_trap_handler_settings)
dqm->dev->kfd2kgd->program_trap_handler_settings(
- dqm->dev->kgd, qpd->vmid,
+ dqm->dev->adev, qpd->vmid,
qpd->tba_addr, qpd->tma_addr);
}
@@ -250,21 +241,20 @@ static int allocate_vmid(struct device_queue_manager *dqm,
program_sh_mem_settings(dqm, qpd);
- if (dqm->dev->device_info->asic_family >= CHIP_VEGA10 &&
- dqm->dev->cwsr_enabled)
+ if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled)
program_trap_handler_settings(dqm, qpd);
/* qpd->page_table_base is set earlier when register_process()
* is called, i.e. when the first queue is created.
*/
- dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
+ dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev,
qpd->vmid,
qpd->page_table_base);
/* invalidate the VM context after pasid and vmid mapping is set up */
kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
if (dqm->dev->kfd2kgd->set_scratch_backing_va)
- dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
+ dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev,
qpd->sh_hidden_private_base, qpd->vmid);
return 0;
@@ -283,7 +273,7 @@ static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
if (ret)
return ret;
- return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
+ return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid,
qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
pmf->release_mem_size / sizeof(uint32_t));
}
@@ -293,7 +283,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
struct queue *q)
{
/* On GFX v7, CP doesn't flush TC at dequeue */
- if (q->device->device_info->asic_family == CHIP_HAWAII)
+ if (q->device->adev->asic_type == CHIP_HAWAII)
if (flush_texture_cache_nocpsch(q->device, qpd))
pr_err("Failed to flush TC\n");
@@ -580,7 +570,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
/* Make sure the queue is unmapped before updating the MQD */
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
retval = unmap_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false);
if (retval) {
pr_err("unmap queue failed\n");
goto out_unlock;
@@ -776,7 +766,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
if (!list_empty(&qpd->queues_list)) {
dqm->dev->kfd2kgd->set_vm_context_page_table_base(
- dqm->dev->kgd,
+ dqm->dev->adev,
qpd->vmid,
qpd->page_table_base);
kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
@@ -954,7 +944,7 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
unsigned int vmid)
{
return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
- dqm->dev->kgd, pasid, vmid);
+ dqm->dev->adev, pasid, vmid);
}
static void init_interrupts(struct device_queue_manager *dqm)
@@ -963,7 +953,7 @@ static void init_interrupts(struct device_queue_manager *dqm)
for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
if (is_pipe_enabled(dqm, 0, i))
- dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
+ dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i);
}
static int initialize_nocpsch(struct device_queue_manager *dqm)
@@ -1017,7 +1007,7 @@ static int start_nocpsch(struct device_queue_manager *dqm)
pr_info("SW scheduler is used");
init_interrupts(dqm);
- if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
+ if (dqm->dev->adev->asic_type == CHIP_HAWAII)
return pm_init(&dqm->packet_mgr, dqm);
dqm->sched_running = true;
@@ -1026,7 +1016,7 @@ static int start_nocpsch(struct device_queue_manager *dqm)
static int stop_nocpsch(struct device_queue_manager *dqm)
{
- if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
+ if (dqm->dev->adev->asic_type == CHIP_HAWAII)
pm_uninit(&dqm->packet_mgr, false);
dqm->sched_running = false;
@@ -1055,9 +1045,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
dqm->sdma_bitmap &= ~(1ULL << bit);
q->sdma_id = bit;
q->properties.sdma_engine_id = q->sdma_id %
- get_num_sdma_engines(dqm);
+ kfd_get_num_sdma_engines(dqm->dev);
q->properties.sdma_queue_id = q->sdma_id /
- get_num_sdma_engines(dqm);
+ kfd_get_num_sdma_engines(dqm->dev);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
if (dqm->xgmi_sdma_bitmap == 0) {
pr_err("No more XGMI SDMA queue to allocate\n");
@@ -1072,10 +1062,11 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
* assumes the first N engines are always
* PCIe-optimized ones
*/
- q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
- q->sdma_id % get_num_xgmi_sdma_engines(dqm);
+ q->properties.sdma_engine_id =
+ kfd_get_num_sdma_engines(dqm->dev) +
+ q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);
q->properties.sdma_queue_id = q->sdma_id /
- get_num_xgmi_sdma_engines(dqm);
+ kfd_get_num_xgmi_sdma_engines(dqm->dev);
}
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
@@ -1132,7 +1123,7 @@ static int set_sched_resources(struct device_queue_manager *dqm)
res.queue_mask |= 1ull
<< amdgpu_queue_mask_bit_to_set_resource_bit(
- (struct amdgpu_device *)dqm->dev->kgd, i);
+ dqm->dev->adev, i);
}
res.gws_mask = ~0ull;
res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
@@ -1226,8 +1217,13 @@ static int stop_cpsch(struct device_queue_manager *dqm)
bool hanging;
dqm_lock(dqm);
+ if (!dqm->sched_running) {
+ dqm_unlock(dqm);
+ return 0;
+ }
+
if (!dqm->is_hws_hang)
- unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+ unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false);
hanging = dqm->is_hws_hang || dqm->is_resetting;
dqm->sched_running = false;
@@ -1423,7 +1419,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
/* dqm->lock mutex has to be locked before calling this function */
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param)
+ uint32_t filter_param, bool reset)
{
int retval = 0;
struct mqd_manager *mqd_mgr;
@@ -1436,7 +1432,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
return retval;
retval = pm_send_unmap_queue(&dqm->packet_mgr, KFD_QUEUE_TYPE_COMPUTE,
- filter, filter_param, false, 0);
+ filter, filter_param, reset, 0);
if (retval)
return retval;
@@ -1480,6 +1476,21 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
return retval;
}
+/* only for compute queue */
+static int reset_queues_cpsch(struct device_queue_manager *dqm,
+ uint16_t pasid)
+{
+ int retval;
+
+ dqm_lock(dqm);
+
+ retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
+ pasid, true);
+
+ dqm_unlock(dqm);
+ return retval;
+}
+
/* dqm->lock mutex has to be locked before calling this function */
static int execute_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
@@ -1489,7 +1500,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm,
if (dqm->is_hws_hang)
return -EIO;
- retval = unmap_queues_cpsch(dqm, filter, filter_param);
+ retval = unmap_queues_cpsch(dqm, filter, filter_param, false);
if (retval)
return retval;
@@ -1842,10 +1853,10 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
get_num_all_sdma_engines(dqm) *
- dev->device_info->num_sdma_queues_per_engine +
+ dev->device_info.num_sdma_queues_per_engine +
dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
- retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
+ retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
(void *)&(mem_obj->cpu_ptr), false);
@@ -1862,7 +1873,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
if (!dqm)
return NULL;
- switch (dev->device_info->asic_family) {
+ switch (dev->adev->asic_type) {
/* HWS is not available on Hawaii. */
case CHIP_HAWAII:
/* HWS depends on CWSR for timely dequeue. CWSR is not
@@ -1900,6 +1911,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.evict_process_queues = evict_process_queues_cpsch;
dqm->ops.restore_process_queues = restore_process_queues_cpsch;
dqm->ops.get_wave_state = get_wave_state;
+ dqm->ops.reset_queues = reset_queues_cpsch;
break;
case KFD_SCHED_POLICY_NO_HWS:
/* initialize dqm for no cp scheduling */
@@ -1925,7 +1937,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
goto out_free;
}
- switch (dev->device_info->asic_family) {
+ switch (dev->adev->asic_type) {
case CHIP_CARRIZO:
device_queue_manager_init_vi(&dqm->asic_ops);
break;
@@ -1947,31 +1959,16 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
device_queue_manager_init_vi_tonga(&dqm->asic_ops);
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_RENOIR:
- case CHIP_ARCTURUS:
- case CHIP_ALDEBARAN:
- device_queue_manager_init_v9(&dqm->asic_ops);
- break;
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_NAVI14:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_YELLOW_CARP:
- case CHIP_CYAN_SKILLFISH:
- device_queue_manager_init_v10_navi10(&dqm->asic_ops);
- break;
default:
- WARN(1, "Unexpected ASIC family %u",
- dev->device_info->asic_family);
- goto out_free;
+ if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
+ device_queue_manager_init_v10_navi10(&dqm->asic_ops);
+ else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
+ device_queue_manager_init_v9(&dqm->asic_ops);
+ else {
+ WARN(1, "Unexpected ASIC family %u",
+ dev->adev->asic_type);
+ goto out_free;
+ }
}
if (init_mqd_managers(dqm))
@@ -1995,7 +1992,7 @@ static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,
{
WARN(!mqd, "No hiq sdma mqd trunk to free");
- amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem);
}
void device_queue_manager_uninit(struct device_queue_manager *dqm)
@@ -2026,7 +2023,7 @@ static void kfd_process_hw_exception(struct work_struct *work)
{
struct device_queue_manager *dqm = container_of(work,
struct device_queue_manager, hw_exception_work);
- amdgpu_amdkfd_gpu_reset(dqm->dev->kgd);
+ amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
}
#if defined(CONFIG_DEBUG_FS)
@@ -2065,7 +2062,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
return 0;
}
- r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
+ r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
&dump, &n_regs);
if (!r) {
@@ -2087,7 +2084,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
continue;
r = dqm->dev->kfd2kgd->hqd_dump(
- dqm->dev->kgd, pipe, queue, &dump, &n_regs);
+ dqm->dev->adev, pipe, queue, &dump, &n_regs);
if (r)
break;
@@ -2101,10 +2098,10 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
for (queue = 0;
- queue < dqm->dev->device_info->num_sdma_queues_per_engine;
+ queue < dqm->dev->device_info.num_sdma_queues_per_engine;
queue++) {
r = dqm->dev->kfd2kgd->hqd_sdma_dump(
- dqm->dev->kgd, pipe, queue, &dump, &n_regs);
+ dqm->dev->adev, pipe, queue, &dump, &n_regs);
if (r)
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 499fc0ea387f..e145e4deb53a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -81,6 +81,8 @@ struct device_process_node {
*
* @get_wave_state: Retrieves context save state and optionally copies the
* control stack, if kept in the MQD, to the given userspace address.
+ *
+ * @reset_queues: reset queues which consume RAS poison
*/
struct device_queue_manager_ops {
@@ -134,6 +136,9 @@ struct device_queue_manager_ops {
void __user *ctl_stack,
u32 *ctl_stack_used_size,
u32 *save_area_used_size);
+
+ int (*reset_queues)(struct device_queue_manager *dqm,
+ uint16_t pasid);
};
struct device_queue_manager_asic_ops {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index b5c3d13643f1..f20434d9980e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -62,7 +62,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
- if (dqm->dev->device_info->asic_family == CHIP_ALDEBARAN) {
+ if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2)) {
/* Aldebaran can safely support different XNACK modes
* per process
*/
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index 768d153acff4..0dbcf54657ed 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -48,7 +48,7 @@
/* # of doorbell bytes allocated for each process. */
size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
{
- return roundup(kfd->device_info->doorbell_size *
+ return roundup(kfd->device_info.doorbell_size *
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
PAGE_SIZE);
}
@@ -180,7 +180,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL;
- inx *= kfd->device_info->doorbell_size / sizeof(u32);
+ inx *= kfd->device_info.doorbell_size / sizeof(u32);
/*
* Calculating the kernel doorbell offset using the first
@@ -201,7 +201,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
unsigned int inx;
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
- * sizeof(u32) / kfd->device_info->doorbell_size;
+ * sizeof(u32) / kfd->device_info.doorbell_size;
mutex_lock(&kfd->doorbell_mutex);
__clear_bit(inx, kfd->doorbell_available_index);
@@ -239,7 +239,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
return kfd->doorbell_base_dw_offset +
pdd->doorbell_index
* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
- doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
+ doorbell_id * kfd->device_info.doorbell_size / sizeof(u32);
}
uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 3eea4edee355..afe72dd11325 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -935,8 +935,10 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid,
/* Workaround on Raven to not kill the process when memory is freed
* before IOMMU is able to finish processing all the excessive PPRs
*/
- if (dev->device_info->asic_family != CHIP_RAVEN &&
- dev->device_info->asic_family != CHIP_RENOIR) {
+
+ if (KFD_GC_VERSION(dev) != IP_VERSION(9, 1, 0) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 2, 2) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0)) {
mutex_lock(&p->event_mutex);
/* Lookup events by type and signal them */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index d1388896f9c1..2e2b7ceb71db 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -394,7 +394,7 @@ int kfd_init_apertures(struct kfd_process *process)
pdd->gpuvm_base = pdd->gpuvm_limit = 0;
pdd->scratch_base = pdd->scratch_limit = 0;
} else {
- switch (dev->device_info->asic_family) {
+ switch (dev->adev->asic_type) {
case CHIP_KAVERI:
case CHIP_HAWAII:
case CHIP_CARRIZO:
@@ -406,29 +406,14 @@ int kfd_init_apertures(struct kfd_process *process)
case CHIP_VEGAM:
kfd_init_apertures_vi(pdd, id);
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_RENOIR:
- case CHIP_ARCTURUS:
- case CHIP_ALDEBARAN:
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_NAVI14:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_YELLOW_CARP:
- case CHIP_CYAN_SKILLFISH:
- kfd_init_apertures_v9(pdd, id);
- break;
default:
- WARN(1, "Unexpected ASIC family %u",
- dev->device_info->asic_family);
- return -EINVAL;
+ if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
+ kfd_init_apertures_v9(pdd, id);
+ else {
+ WARN(1, "Unexpected ASIC family %u",
+ dev->adev->asic_type);
+ return -EINVAL;
+ }
}
if (!dev->use_iommu_v2) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 543e7ea75593..b8ac28fb1231 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -89,6 +89,44 @@ enum SQ_INTERRUPT_ERROR_TYPE {
#define KFD_SQ_INT_DATA__ERR_TYPE_MASK 0xF00000
#define KFD_SQ_INT_DATA__ERR_TYPE__SHIFT 20
+static void event_interrupt_poison_consumption(struct kfd_dev *dev,
+ uint16_t pasid, uint16_t source_id)
+{
+ int ret = -EINVAL;
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+
+ if (!p)
+ return;
+
+ /* all queues of a process will be unmapped in one time */
+ if (atomic_read(&p->poison)) {
+ kfd_unref_process(p);
+ return;
+ }
+
+ atomic_set(&p->poison, 1);
+ kfd_unref_process(p);
+
+ switch (source_id) {
+ case SOC15_INTSRC_SQ_INTERRUPT_MSG:
+ if (dev->dqm->ops.reset_queues)
+ ret = dev->dqm->ops.reset_queues(dev->dqm, pasid);
+ break;
+ case SOC15_INTSRC_SDMA_ECC:
+ default:
+ break;
+ }
+
+ kfd_signal_poison_consumed_event(dev, pasid);
+
+ /* resetting queue passes, do page retirement without gpu reset
+ resetting queue fails, fallback to gpu reset solution */
+ if (!ret)
+ amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false);
+ else
+ amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true);
+}
+
static bool event_interrupt_isr_v9(struct kfd_dev *dev,
const uint32_t *ih_ring_entry,
uint32_t *patched_ihre,
@@ -135,7 +173,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
*patched_flag = true;
memcpy(patched_ihre, ih_ring_entry,
- dev->device_info->ih_ring_entry_size);
+ dev->device_info.ih_ring_entry_size);
pasid = dev->dqm->vmid_pasid[vmid];
@@ -230,8 +268,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
sq_intr_err);
if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
- kfd_signal_poison_consumed_event(dev, pasid);
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->kgd);
+ event_interrupt_poison_consumption(dev, pasid, source_id);
return;
}
break;
@@ -252,8 +289,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
if (source_id == SOC15_INTSRC_SDMA_TRAP) {
kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);
} else if (source_id == SOC15_INTSRC_SDMA_ECC) {
- kfd_signal_poison_consumed_event(dev, pasid);
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->kgd);
+ event_interrupt_poison_consumption(dev, pasid, source_id);
return;
}
} else if (client_id == SOC15_IH_CLIENTID_VMC ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index bc47f6a44456..81887c2013c9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -54,7 +54,7 @@ int kfd_interrupt_init(struct kfd_dev *kfd)
int r;
r = kfifo_alloc(&kfd->ih_fifo,
- KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size,
+ KFD_IH_NUM_ENTRIES * kfd->device_info.ih_ring_entry_size,
GFP_KERNEL);
if (r) {
dev_err(kfd_chardev(), "Failed to allocate IH fifo\n");
@@ -114,8 +114,8 @@ bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
int count;
count = kfifo_in(&kfd->ih_fifo, ih_ring_entry,
- kfd->device_info->ih_ring_entry_size);
- if (count != kfd->device_info->ih_ring_entry_size) {
+ kfd->device_info.ih_ring_entry_size);
+ if (count != kfd->device_info.ih_ring_entry_size) {
dev_err_ratelimited(kfd_chardev(),
"Interrupt ring overflow, dropping interrupt %d\n",
count);
@@ -133,11 +133,11 @@ static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
int count;
count = kfifo_out(&kfd->ih_fifo, ih_ring_entry,
- kfd->device_info->ih_ring_entry_size);
+ kfd->device_info.ih_ring_entry_size);
- WARN_ON(count && count != kfd->device_info->ih_ring_entry_size);
+ WARN_ON(count && count != kfd->device_info.ih_ring_entry_size);
- return count == kfd->device_info->ih_ring_entry_size;
+ return count == kfd->device_info.ih_ring_entry_size;
}
static void interrupt_wq(struct work_struct *work)
@@ -146,13 +146,13 @@ static void interrupt_wq(struct work_struct *work)
interrupt_work);
uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
- if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) {
+ if (dev->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {
dev_err_once(kfd_chardev(), "Ring entry too small\n");
return;
}
while (dequeue_ih_ring_entry(dev, ih_ring_entry))
- dev->device_info->event_interrupt_class->interrupt_wq(dev,
+ dev->device_info.event_interrupt_class->interrupt_wq(dev,
ih_ring_entry);
}
@@ -163,7 +163,7 @@ bool interrupt_is_wanted(struct kfd_dev *dev,
/* integer and bitwise OR so there is no boolean short-circuiting */
unsigned int wanted = 0;
- wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
+ wanted |= dev->device_info.event_interrupt_class->interrupt_isr(dev,
ih_ring_entry, patched_ihre, flag);
return wanted != 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
index 73f2257acc23..66ad8d0b8f7f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
@@ -89,7 +89,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd)
}
pasid_limit = min_t(unsigned int,
- (unsigned int)(1 << kfd->device_info->max_pasid_bits),
+ (unsigned int)(1 << kfd->device_info.max_pasid_bits),
iommu_info.max_pasids);
if (!kfd_set_pasid_limit(pasid_limit)) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 64b4ac339904..16f8bc4ca7f6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -91,7 +91,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->pq_gpu_addr = kq->pq->gpu_addr;
/* For CIK family asics, kq->eop_mem is not needed */
- if (dev->device_info->asic_family > CHIP_MULLINS) {
+ if (dev->adev->asic_type > CHIP_MULLINS) {
retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
if (retval != 0)
goto err_eop_allocate_vidmem;
@@ -111,7 +111,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
- retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size,
+ retval = kfd_gtt_sa_allocate(dev, dev->device_info.doorbell_size,
&kq->wptr_mem);
if (retval != 0)
@@ -297,7 +297,7 @@ void kq_submit_packet(struct kernel_queue *kq)
}
pr_debug("\n");
#endif
- if (kq->dev->device_info->doorbell_size == 8) {
+ if (kq->dev->device_info.doorbell_size == 8) {
*kq->wptr64_kernel = kq->pending_wptr64;
write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
kq->pending_wptr64);
@@ -310,7 +310,7 @@ void kq_submit_packet(struct kernel_queue *kq)
void kq_rollback_packet(struct kernel_queue *kq)
{
- if (kq->dev->device_info->doorbell_size == 8) {
+ if (kq->dev->device_info.doorbell_size == 8) {
kq->pending_wptr64 = *kq->wptr64_kernel;
kq->pending_wptr = *kq->wptr_kernel %
(kq->queue->properties.queue_size / 4);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 9b9c2b9bf2ef..ed5385137f48 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -108,8 +108,8 @@ error_free:
* svm_migrate_copy_memory_gart - sdma copy data between ram and vram
*
* @adev: amdgpu device the sdma ring running
- * @src: source page address array
- * @dst: destination page address array
+ * @sys: system DMA pointer to be copied
+ * @vram: vram destination DMA pointer
* @npages: number of pages to copy
* @direction: enum MIGRATION_COPY_DIR
* @mfence: output, sdma fence to signal after sdma is done
@@ -549,7 +549,7 @@ static void svm_migrate_page_free(struct page *page)
if (svm_bo) {
pr_debug_ratelimited("ref: %d\n", kref_read(&svm_bo->kref));
- svm_range_bo_unref(svm_bo);
+ svm_range_bo_unref_async(svm_bo);
}
}
@@ -938,7 +938,7 @@ int svm_migrate_init(struct amdgpu_device *adev)
void *r;
/* Page migration works on Vega10 or newer */
- if (kfddev->device_info->asic_family < CHIP_VEGA10)
+ if (!KFD_IS_SOC15(kfddev))
return -EINVAL;
pgmap = &kfddev->pgmap;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index c021519af810..e2825ad4d699 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -71,7 +71,7 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
return NULL;
offset = (q->sdma_engine_id *
- dev->device_info->num_sdma_queues_per_engine +
+ dev->device_info.num_sdma_queues_per_engine +
q->sdma_queue_id) *
dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
@@ -100,7 +100,7 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
struct kfd_cu_info cu_info;
uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
int i, se, sh, cu;
- amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info);
+ amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
if (cu_mask_count > cu_info.cu_active_number)
cu_mask_count = cu_info.cu_active_number;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 8128f4d312f1..e9a8e21e144e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -171,7 +171,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
- return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+ return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
wptr_shift, wptr_mask, mms);
}
@@ -180,7 +180,7 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
struct queue_properties *p, struct mm_struct *mms)
{
- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
+ return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
(uint32_t __user *)p->write_ptr,
mms);
}
@@ -276,7 +276,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
unsigned int timeout, uint32_t pipe_id,
uint32_t queue_id)
{
- return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, mqd, type, timeout,
+ return mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, mqd, type, timeout,
pipe_id, queue_id);
}
@@ -289,7 +289,7 @@ static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
unsigned int timeout, uint32_t pipe_id,
uint32_t queue_id)
{
- return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+ return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
}
static bool is_occupied(struct mqd_manager *mm, void *mqd,
@@ -297,7 +297,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd,
uint32_t queue_id)
{
- return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address,
+ return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->adev, queue_address,
pipe_id, queue_id);
}
@@ -306,7 +306,7 @@ static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id)
{
- return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+ return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
}
/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 270160fc401b..d74d8a6ac27a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -148,7 +148,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
- r = mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+ r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
wptr_shift, 0, mms);
return r;
@@ -158,7 +158,7 @@ static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
struct queue_properties *p, struct mm_struct *mms)
{
- return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id,
+ return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,
queue_id, p->doorbell_off);
}
@@ -239,7 +239,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
uint32_t queue_id)
{
return mm->dev->kfd2kgd->hqd_destroy
- (mm->dev->kgd, mqd, type, timeout,
+ (mm->dev->adev, mqd, type, timeout,
pipe_id, queue_id);
}
@@ -254,7 +254,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd,
uint32_t queue_id)
{
return mm->dev->kfd2kgd->hqd_is_occupied(
- mm->dev->kgd, queue_address,
+ mm->dev->adev, queue_address,
pipe_id, queue_id);
}
@@ -320,7 +320,7 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
struct queue_properties *p, struct mm_struct *mms)
{
- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
+ return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
(uint32_t __user *)p->write_ptr,
mms);
}
@@ -363,14 +363,14 @@ static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
unsigned int timeout, uint32_t pipe_id,
uint32_t queue_id)
{
- return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+ return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
}
static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id)
{
- return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+ return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
}
#if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 4e5932f54b5a..326eb2285029 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -108,7 +108,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!mqd_mem_obj)
return NULL;
- retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
+ retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->adev,
ALIGN(q->ctl_stack_size, PAGE_SIZE) +
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
&(mqd_mem_obj->gtt_mem),
@@ -199,7 +199,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
- return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+ return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
wptr_shift, 0, mms);
}
@@ -208,7 +208,7 @@ static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
struct queue_properties *p, struct mm_struct *mms)
{
- return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id,
+ return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,
queue_id, p->doorbell_off);
}
@@ -291,7 +291,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
uint32_t queue_id)
{
return mm->dev->kfd2kgd->hqd_destroy
- (mm->dev->kgd, mqd, type, timeout,
+ (mm->dev->adev, mqd, type, timeout,
pipe_id, queue_id);
}
@@ -301,7 +301,7 @@ static void free_mqd(struct mqd_manager *mm, void *mqd,
struct kfd_dev *kfd = mm->dev;
if (mqd_mem_obj->gtt_mem) {
- amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(kfd->adev, mqd_mem_obj->gtt_mem);
kfree(mqd_mem_obj);
} else {
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
@@ -313,7 +313,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd,
uint32_t queue_id)
{
return mm->dev->kfd2kgd->hqd_is_occupied(
- mm->dev->kgd, queue_address,
+ mm->dev->adev, queue_address,
pipe_id, queue_id);
}
@@ -375,7 +375,7 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
struct queue_properties *p, struct mm_struct *mms)
{
- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
+ return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
(uint32_t __user *)p->write_ptr,
mms);
}
@@ -418,14 +418,14 @@ static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
unsigned int timeout, uint32_t pipe_id,
uint32_t queue_id)
{
- return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+ return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
}
static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id)
{
- return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+ return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
}
#if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index cd9220eb8a7a..d456e950ce1d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -162,7 +162,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
- return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+ return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
wptr_shift, wptr_mask, mms);
}
@@ -265,7 +265,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
uint32_t queue_id)
{
return mm->dev->kfd2kgd->hqd_destroy
- (mm->dev->kgd, mqd, type, timeout,
+ (mm->dev->adev, mqd, type, timeout,
pipe_id, queue_id);
}
@@ -280,7 +280,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd,
uint32_t queue_id)
{
return mm->dev->kfd2kgd->hqd_is_occupied(
- mm->dev->kgd, queue_address,
+ mm->dev->adev, queue_address,
pipe_id, queue_id);
}
@@ -347,7 +347,7 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
struct queue_properties *p, struct mm_struct *mms)
{
- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
+ return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
(uint32_t __user *)p->write_ptr,
mms);
}
@@ -389,14 +389,14 @@ static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
unsigned int timeout, uint32_t pipe_id,
uint32_t queue_id)
{
- return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+ return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
}
static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id)
{
- return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+ return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
}
#if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index e547f1f8c49f..1439420925a0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -223,7 +223,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
{
- switch (dqm->dev->device_info->asic_family) {
+ switch (dqm->dev->adev->asic_type) {
case CHIP_KAVERI:
case CHIP_HAWAII:
/* PM4 packet structures on CIK are the same as on VI */
@@ -236,31 +236,16 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
case CHIP_VEGAM:
pm->pmf = &kfd_vi_pm_funcs;
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_RENOIR:
- case CHIP_ARCTURUS:
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_NAVI14:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_YELLOW_CARP:
- case CHIP_CYAN_SKILLFISH:
- pm->pmf = &kfd_v9_pm_funcs;
- break;
- case CHIP_ALDEBARAN:
- pm->pmf = &kfd_aldebaran_pm_funcs;
- break;
default:
- WARN(1, "Unexpected ASIC family %u",
- dqm->dev->device_info->asic_family);
- return -EINVAL;
+ if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2))
+ pm->pmf = &kfd_aldebaran_pm_funcs;
+ else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))
+ pm->pmf = &kfd_v9_pm_funcs;
+ else {
+ WARN(1, "Unexpected ASIC family %u",
+ dqm->dev->adev->asic_type);
+ return -EINVAL;
+ }
}
pm->dqm = dqm;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
index 08442e7d9944..3c0658e32e93 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
@@ -110,8 +110,8 @@ static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
-int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
- struct scheduling_resources *res)
+static int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct scheduling_resources *res)
{
struct pm4_mes_set_resources *packet;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 94e92c0812db..ea68f3b3a4e9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -183,7 +183,8 @@ enum cache_policy {
cache_policy_noncoherent
};
-#define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10)
+#define KFD_GC_VERSION(dev) ((dev)->adev->ip_versions[GC_HWIP][0])
+#define KFD_IS_SOC15(dev) ((KFD_GC_VERSION(dev)) >= (IP_VERSION(9, 0, 1)))
struct kfd_event_interrupt_class {
bool (*interrupt_isr)(struct kfd_dev *dev,
@@ -194,8 +195,6 @@ struct kfd_event_interrupt_class {
};
struct kfd_device_info {
- enum amd_asic_type asic_family;
- const char *asic_name;
uint32_t gfx_target_version;
const struct kfd_event_interrupt_class *event_interrupt_class;
unsigned int max_pasid_bits;
@@ -208,11 +207,12 @@ struct kfd_device_info {
bool needs_iommu_device;
bool needs_pci_atomics;
uint32_t no_atomic_fw_version;
- unsigned int num_sdma_engines;
- unsigned int num_xgmi_sdma_engines;
unsigned int num_sdma_queues_per_engine;
};
+unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev);
+unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev);
+
struct kfd_mem_obj {
uint32_t range_start;
uint32_t range_end;
@@ -228,9 +228,9 @@ struct kfd_vmid_info {
};
struct kfd_dev {
- struct kgd_dev *kgd;
+ struct amdgpu_device *adev;
- const struct kfd_device_info *device_info;
+ struct kfd_device_info device_info;
struct pci_dev *pdev;
struct drm_device *ddev;
@@ -766,7 +766,7 @@ struct svm_range_list {
struct list_head deferred_range_list;
spinlock_t deferred_list_lock;
atomic_t evicted_ranges;
- bool drain_pagefaults;
+ atomic_t drain_pagefaults;
struct delayed_work restore_work;
DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
struct task_struct *faulting_task;
@@ -856,6 +856,8 @@ struct kfd_process {
struct svm_range_list svms;
bool xnack_enabled;
+
+ atomic_t poison;
};
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
@@ -891,7 +893,7 @@ struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
-int kfd_process_gpuid_from_kgd(struct kfd_process *p,
+int kfd_process_gpuid_from_adev(struct kfd_process *p,
struct amdgpu_device *adev, uint32_t *gpuid,
uint32_t *gpuidx);
static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
@@ -984,7 +986,7 @@ struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
-struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd);
+struct kfd_dev *kfd_device_by_adev(const struct amdgpu_device *adev);
int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
int kfd_numa_node_to_apic_id(int numa_node_id);
void kfd_double_confirm_iommu_support(struct kfd_dev *gpu);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index b993011cfa64..f1930ff2c74a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -251,14 +251,13 @@ cleanup:
}
/**
- * @kfd_get_cu_occupancy - Collect number of waves in-flight on this device
+ * kfd_get_cu_occupancy - Collect number of waves in-flight on this device
* by current process. Translates acquired wave count into number of compute units
* that are occupied.
*
- * @atr: Handle of attribute that allows reporting of wave count. The attribute
+ * @attr: Handle of attribute that allows reporting of wave count. The attribute
* handle encapsulates GPU device it is associated with, thereby allowing collection
* of waves in flight, etc
- *
* @buffer: Handle of user provided buffer updated with wave count
*
* Return: Number of bytes written to user buffer or an error value
@@ -288,7 +287,7 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
/* Collect wave count from device if it supports */
wave_cnt = 0;
max_waves_per_cu = 0;
- dev->kfd2kgd->get_cu_occupancy(dev->kgd, proc->pasid, &wave_cnt,
+ dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt,
&max_waves_per_cu);
/* Translate wave count to number of compute units */
@@ -692,12 +691,12 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
struct kfd_dev *dev = pdd->dev;
if (kptr) {
- amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->kgd, mem);
+ amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->adev, mem);
kptr = NULL;
}
- amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv);
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv,
+ amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv);
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, mem, pdd->drm_priv,
NULL);
}
@@ -714,24 +713,24 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
struct kfd_dev *kdev = pdd->dev;
int err;
- err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
+ err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size,
pdd->drm_priv, mem, NULL, flags);
if (err)
goto err_alloc_mem;
- err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, *mem,
+ err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->adev, *mem,
pdd->drm_priv, NULL);
if (err)
goto err_map_mem;
- err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, *mem, true);
+ err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->adev, *mem, true);
if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n");
goto sync_memory_failed;
}
if (kptr) {
- err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
+ err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->adev,
(struct kgd_mem *)*mem, kptr, NULL);
if (err) {
pr_debug("Map GTT BO to kernel failed\n");
@@ -742,10 +741,10 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
return err;
sync_memory_failed:
- amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->kgd, *mem, pdd->drm_priv);
+ amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->adev, *mem, pdd->drm_priv);
err_map_mem:
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, *mem, pdd->drm_priv,
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->adev, *mem, pdd->drm_priv,
NULL);
err_alloc_mem:
*mem = NULL;
@@ -940,10 +939,10 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
if (!peer_pdd->drm_priv)
continue;
amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
- peer_pdd->dev->kgd, mem, peer_pdd->drm_priv);
+ peer_pdd->dev->adev, mem, peer_pdd->drm_priv);
}
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem,
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, mem,
pdd->drm_priv, NULL);
kfd_process_device_remove_obj_handle(pdd, id);
}
@@ -974,7 +973,7 @@ static void kfd_process_kunmap_signal_bo(struct kfd_process *p)
if (!mem)
goto out;
- amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->kgd, mem);
+ amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->adev, mem);
out:
mutex_unlock(&p->mutex);
@@ -1003,7 +1002,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
if (pdd->drm_file) {
amdgpu_amdkfd_gpuvm_release_process_vm(
- pdd->dev->kgd, pdd->drm_priv);
+ pdd->dev->adev, pdd->drm_priv);
fput(pdd->drm_file);
}
@@ -1011,7 +1010,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
get_order(KFD_CWSR_TBA_TMA_SIZE));
- kfree(pdd->qpd.doorbell_bitmap);
+ bitmap_free(pdd->qpd.doorbell_bitmap);
idr_destroy(&pdd->alloc_idr);
kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
@@ -1317,14 +1316,13 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
* support the SVM APIs and don't need to be considered
* for the XNACK mode selection.
*/
- if (dev->device_info->asic_family < CHIP_VEGA10)
+ if (!KFD_IS_SOC15(dev))
continue;
/* Aldebaran can always support XNACK because it can support
* per-process XNACK mode selection. But let the dev->noretry
* setting still influence the default XNACK mode.
*/
- if (supported &&
- dev->device_info->asic_family == CHIP_ALDEBARAN)
+ if (supported && KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2))
continue;
/* GFXv10 and later GPUs do not support shader preemption
@@ -1332,7 +1330,7 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
* management and memory-manager-related preemptions or
* even deadlocks.
*/
- if (dev->device_info->asic_family >= CHIP_NAVI10)
+ if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
return false;
if (dev->noretry)
@@ -1431,12 +1429,11 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
int range_start = dev->shared_resources.non_cp_doorbells_start;
int range_end = dev->shared_resources.non_cp_doorbells_end;
- if (!KFD_IS_SOC15(dev->device_info->asic_family))
+ if (!KFD_IS_SOC15(dev))
return 0;
- qpd->doorbell_bitmap =
- kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
- BITS_PER_BYTE), GFP_KERNEL);
+ qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ GFP_KERNEL);
if (!qpd->doorbell_bitmap)
return -ENOMEM;
@@ -1448,9 +1445,9 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
if (i >= range_start && i <= range_end) {
- set_bit(i, qpd->doorbell_bitmap);
- set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
- qpd->doorbell_bitmap);
+ __set_bit(i, qpd->doorbell_bitmap);
+ __set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
+ qpd->doorbell_bitmap);
}
}
@@ -1547,7 +1544,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
dev = pdd->dev;
ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
- dev->kgd, drm_file, p->pasid,
+ dev->adev, drm_file, p->pasid,
&p->kgd_process_info, &p->ef);
if (ret) {
pr_err("Failed to create process VM object\n");
@@ -1779,14 +1776,13 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
}
int
-kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev,
+kfd_process_gpuid_from_adev(struct kfd_process *p, struct amdgpu_device *adev,
uint32_t *gpuid, uint32_t *gpuidx)
{
- struct kgd_dev *kgd = (struct kgd_dev *)adev;
int i;
for (i = 0; i < p->n_pdds; i++)
- if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) {
+ if (p->pdds[i] && p->pdds[i]->dev->adev == adev) {
*gpuid = p->pdds[i]->dev->id;
*gpuidx = i;
return 0;
@@ -1951,10 +1947,10 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
* only happens when the first queue is created.
*/
if (pdd->qpd.vmid)
- amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
+ amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,
pdd->qpd.vmid);
} else {
- amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
+ amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->adev,
pdd->process->pasid, type);
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 3627e7ac161b..5e5c84a8e1ef 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -118,7 +118,7 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
return ret;
pqn->q->gws = mem;
- pdd->qpd.num_gws = gws ? amdgpu_amdkfd_get_num_gws(dev->kgd) : 0;
+ pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
pqn->q, NULL);
@@ -135,9 +135,8 @@ void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
{
INIT_LIST_HEAD(&pqm->queues);
- pqm->queue_slot_bitmap =
- kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
- BITS_PER_BYTE), GFP_KERNEL);
+ pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ GFP_KERNEL);
if (!pqm->queue_slot_bitmap)
return -ENOMEM;
pqm->process = p;
@@ -159,7 +158,7 @@ void pqm_uninit(struct process_queue_manager *pqm)
kfree(pqn);
}
- kfree(pqm->queue_slot_bitmap);
+ bitmap_free(pqm->queue_slot_bitmap);
pqm->queue_slot_bitmap = NULL;
}
@@ -220,7 +219,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
* Hence we also check the type as well
*/
if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
- max_queues = dev->device_info->max_no_of_hqd/2;
+ max_queues = dev->device_info.max_no_of_hqd/2;
if (pdd->qpd.queue_count >= max_queues)
return -ENOSPC;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index ed4bc5f844ce..deae12dc777d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -207,7 +207,6 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
uint64_t throttle_bitmask)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
/*
* ThermalThrottle msg = throttle_bitmask(8):
* thermal_interrupt_count(16):
@@ -223,14 +222,13 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n",
KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
- atomic64_read(&adev->smu.throttle_int_counter));
+ atomic64_read(&dev->adev->smu.throttle_int_counter));
add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len);
}
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
struct amdgpu_task_info task_info;
/* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */
/* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n +
@@ -243,7 +241,7 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
return;
memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, pasid, &task_info);
+ amdgpu_vm_get_task_info(dev->adev, pasid, &task_info);
/* Report VM faults from user applications, not retry from kernel */
if (!task_info.pid)
return;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 16137c4247bb..aa5ee91cd595 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -193,7 +193,6 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
struct kfd_process_device *pdd;
- struct amdgpu_device *adev;
pr_debug("mapping to gpu idx 0x%x\n", gpuidx);
pdd = kfd_process_device_from_gpuidx(p, gpuidx);
@@ -201,9 +200,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
pr_debug("failed to find device idx %d\n", gpuidx);
return -EINVAL;
}
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- r = svm_range_dma_map_dev(adev, prange, offset, npages,
+ r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages,
hmm_pfns, gpuidx);
if (r)
break;
@@ -334,6 +332,8 @@ static void svm_range_bo_release(struct kref *kref)
struct svm_range_bo *svm_bo;
svm_bo = container_of(kref, struct svm_range_bo, kref);
+ pr_debug("svm_bo 0x%p\n", svm_bo);
+
spin_lock(&svm_bo->list_lock);
while (!list_empty(&svm_bo->range_list)) {
struct svm_range *prange =
@@ -367,12 +367,33 @@ static void svm_range_bo_release(struct kref *kref)
kfree(svm_bo);
}
-void svm_range_bo_unref(struct svm_range_bo *svm_bo)
+static void svm_range_bo_wq_release(struct work_struct *work)
{
- if (!svm_bo)
- return;
+ struct svm_range_bo *svm_bo;
+
+ svm_bo = container_of(work, struct svm_range_bo, release_work);
+ svm_range_bo_release(&svm_bo->kref);
+}
+
+static void svm_range_bo_release_async(struct kref *kref)
+{
+ struct svm_range_bo *svm_bo;
+
+ svm_bo = container_of(kref, struct svm_range_bo, kref);
+ pr_debug("svm_bo 0x%p\n", svm_bo);
+ INIT_WORK(&svm_bo->release_work, svm_range_bo_wq_release);
+ schedule_work(&svm_bo->release_work);
+}
+
+void svm_range_bo_unref_async(struct svm_range_bo *svm_bo)
+{
+ kref_put(&svm_bo->kref, svm_range_bo_release_async);
+}
- kref_put(&svm_bo->kref, svm_range_bo_release);
+static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
+{
+ if (svm_bo)
+ kref_put(&svm_bo->kref, svm_range_bo_release);
}
static bool
@@ -581,7 +602,7 @@ svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id)
return NULL;
}
- return (struct amdgpu_device *)pdd->dev->kgd;
+ return pdd->dev->adev;
}
struct kfd_process_device *
@@ -593,7 +614,7 @@ svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev)
p = container_of(prange->svms, struct kfd_process, svms);
- r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpu_idx);
+ r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpu_idx);
if (r) {
pr_debug("failed to get device id by adev %p\n", adev);
return NULL;
@@ -706,6 +727,61 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
}
}
+static bool
+svm_range_is_same_attrs(struct kfd_process *p, struct svm_range *prange,
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+{
+ uint32_t i;
+ int gpuidx;
+
+ for (i = 0; i < nattr; i++) {
+ switch (attrs[i].type) {
+ case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+ if (prange->preferred_loc != attrs[i].value)
+ return false;
+ break;
+ case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+ /* Prefetch should always trigger a migration even
+ * if the value of the attribute didn't change.
+ */
+ return false;
+ case KFD_IOCTL_SVM_ATTR_ACCESS:
+ case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+ case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+ gpuidx = kfd_process_gpuidx_from_gpuid(p,
+ attrs[i].value);
+ if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
+ if (test_bit(gpuidx, prange->bitmap_access) ||
+ test_bit(gpuidx, prange->bitmap_aip))
+ return false;
+ } else if (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS) {
+ if (!test_bit(gpuidx, prange->bitmap_access))
+ return false;
+ } else {
+ if (!test_bit(gpuidx, prange->bitmap_aip))
+ return false;
+ }
+ break;
+ case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+ if ((prange->flags & attrs[i].value) != attrs[i].value)
+ return false;
+ break;
+ case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+ if ((prange->flags & attrs[i].value) != 0)
+ return false;
+ break;
+ case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+ if (prange->granularity != attrs[i].value)
+ return false;
+ break;
+ default:
+ WARN_ONCE(1, "svm_range_check_attrs wasn't called?");
+ }
+ }
+
+ return true;
+}
+
/**
* svm_range_debug_dump - print all range information from svms
* @svms: svm range list header
@@ -743,14 +819,6 @@ static void svm_range_debug_dump(struct svm_range_list *svms)
}
}
-static bool
-svm_range_is_same_attrs(struct svm_range *old, struct svm_range *new)
-{
- return (old->prefetch_loc == new->prefetch_loc &&
- old->flags == new->flags &&
- old->granularity == new->granularity);
-}
-
static int
svm_range_split_array(void *ppnew, void *ppold, size_t size,
uint64_t old_start, uint64_t old_n,
@@ -943,7 +1011,7 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
}
static int
-svm_range_split_tail(struct svm_range *prange, struct svm_range *new,
+svm_range_split_tail(struct svm_range *prange,
uint64_t new_last, struct list_head *insert_list)
{
struct svm_range *tail;
@@ -955,7 +1023,7 @@ svm_range_split_tail(struct svm_range *prange, struct svm_range *new,
}
static int
-svm_range_split_head(struct svm_range *prange, struct svm_range *new,
+svm_range_split_head(struct svm_range *prange,
uint64_t new_start, struct list_head *insert_list)
{
struct svm_range *head;
@@ -1053,8 +1121,8 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
if (domain == SVM_RANGE_VRAM_DOMAIN)
bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
- switch (adev->asic_type) {
- case CHIP_ARCTURUS:
+ switch (KFD_GC_VERSION(adev->kfd.dev)) {
+ case IP_VERSION(9, 4, 1):
if (domain == SVM_RANGE_VRAM_DOMAIN) {
if (bo_adev == adev) {
mapping_flags |= coherent ?
@@ -1070,7 +1138,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
}
break;
- case CHIP_ALDEBARAN:
+ case IP_VERSION(9, 4, 2):
if (domain == SVM_RANGE_VRAM_DOMAIN) {
if (bo_adev == adev) {
mapping_flags |= coherent ?
@@ -1129,7 +1197,6 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
struct kfd_process_device *pdd;
struct dma_fence *fence = NULL;
- struct amdgpu_device *adev;
struct kfd_process *p;
uint32_t gpuidx;
int r = 0;
@@ -1145,9 +1212,9 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
pr_debug("failed to find device idx %d\n", gpuidx);
return -EINVAL;
}
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- r = svm_range_unmap_from_gpu(adev, drm_priv_to_vm(pdd->drm_priv),
+ r = svm_range_unmap_from_gpu(pdd->dev->adev,
+ drm_priv_to_vm(pdd->drm_priv),
start, last, &fence);
if (r)
break;
@@ -1159,7 +1226,7 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
if (r)
break;
}
- amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
+ amdgpu_amdkfd_flush_gpu_tlb_pasid(pdd->dev->adev,
p->pasid, TLB_FLUSH_HEAVYWEIGHT);
}
@@ -1172,7 +1239,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned long npages, bool readonly, dma_addr_t *dma_addr,
struct amdgpu_device *bo_adev, struct dma_fence **fence)
{
- struct amdgpu_bo_va bo_va;
bool table_freed = false;
uint64_t pte_flags;
unsigned long last_start;
@@ -1185,9 +1251,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms,
last_start, last_start + npages - 1, readonly);
- if (prange->svm_bo && prange->ttm_res)
- bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
-
for (i = offset; i < offset + npages; i++) {
last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
@@ -1243,8 +1306,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct kfd_process *p;
p = container_of(prange->svms, struct kfd_process, svms);
- amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
- p->pasid, TLB_FLUSH_LEGACY);
+ amdgpu_amdkfd_flush_gpu_tlb_pasid(adev, p->pasid, TLB_FLUSH_LEGACY);
}
out:
return r;
@@ -1257,7 +1319,6 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
{
struct kfd_process_device *pdd;
struct amdgpu_device *bo_adev;
- struct amdgpu_device *adev;
struct kfd_process *p;
struct dma_fence *fence = NULL;
uint32_t gpuidx;
@@ -1276,19 +1337,18 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
pr_debug("failed to find device idx %d\n", gpuidx);
return -EINVAL;
}
- adev = (struct amdgpu_device *)pdd->dev->kgd;
pdd = kfd_bind_process_to_device(pdd->dev, p);
if (IS_ERR(pdd))
return -EINVAL;
- if (bo_adev && adev != bo_adev &&
- !amdgpu_xgmi_same_hive(adev, bo_adev)) {
+ if (bo_adev && pdd->dev->adev != bo_adev &&
+ !amdgpu_xgmi_same_hive(pdd->dev->adev, bo_adev)) {
pr_debug("cannot map to device idx %d\n", gpuidx);
continue;
}
- r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv),
+ r = svm_range_map_to_gpu(pdd->dev->adev, drm_priv_to_vm(pdd->drm_priv),
prange, offset, npages, readonly,
prange->dma_addr[gpuidx],
bo_adev, wait ? &fence : NULL);
@@ -1322,7 +1382,6 @@ struct svm_validate_context {
static int svm_range_reserve_bos(struct svm_validate_context *ctx)
{
struct kfd_process_device *pdd;
- struct amdgpu_device *adev;
struct amdgpu_vm *vm;
uint32_t gpuidx;
int r;
@@ -1334,7 +1393,6 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)
pr_debug("failed to find device idx %d\n", gpuidx);
return -EINVAL;
}
- adev = (struct amdgpu_device *)pdd->dev->kgd;
vm = drm_priv_to_vm(pdd->drm_priv);
ctx->tv[gpuidx].bo = &vm->root.bo->tbo;
@@ -1356,9 +1414,9 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)
r = -EINVAL;
goto unreserve_out;
}
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- r = amdgpu_vm_validate_pt_bos(adev, drm_priv_to_vm(pdd->drm_priv),
+ r = amdgpu_vm_validate_pt_bos(pdd->dev->adev,
+ drm_priv_to_vm(pdd->drm_priv),
svm_range_bo_validate, NULL);
if (r) {
pr_debug("failed %d validate pt bos\n", r);
@@ -1381,12 +1439,10 @@ static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
{
struct kfd_process_device *pdd;
- struct amdgpu_device *adev;
pdd = kfd_process_device_from_gpuidx(p, gpuidx);
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- return SVM_ADEV_PGMAP_OWNER(adev);
+ return SVM_ADEV_PGMAP_OWNER(pdd->dev->adev);
}
/*
@@ -1574,7 +1630,6 @@ retry_flush_work:
static void svm_range_restore_work(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
- struct amdkfd_process_info *process_info;
struct svm_range_list *svms;
struct svm_range *prange;
struct kfd_process *p;
@@ -1594,12 +1649,10 @@ static void svm_range_restore_work(struct work_struct *work)
* the lifetime of this thread, kfd_process and mm will be valid.
*/
p = container_of(svms, struct kfd_process, svms);
- process_info = p->kgd_process_info;
mm = p->mm;
if (!mm)
return;
- mutex_lock(&process_info->lock);
svm_range_list_lock_and_flush_work(svms, mm);
mutex_lock(&svms->lock);
@@ -1652,7 +1705,6 @@ static void svm_range_restore_work(struct work_struct *work)
out_reschedule:
mutex_unlock(&svms->lock);
mmap_write_unlock(mm);
- mutex_unlock(&process_info->lock);
/* If validation failed, reschedule another attempt */
if (evicted_ranges) {
@@ -1664,6 +1716,10 @@ out_reschedule:
/**
* svm_range_evict - evict svm range
+ * @prange: svm range structure
+ * @mm: current process mm_struct
+ * @start: starting process queue number
+ * @last: last process queue number
*
* Stop all queues of the process to ensure GPU doesn't access the memory, then
* return to let CPU evict the buffer and proceed CPU pagetable update.
@@ -1768,46 +1824,49 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
}
/**
- * svm_range_handle_overlap - split overlap ranges
- * @svms: svm range list header
- * @new: range added with this attributes
- * @start: range added start address, in pages
- * @last: range last address, in pages
- * @update_list: output, the ranges attributes are updated. For set_attr, this
- * will do validation and map to GPUs. For unmap, this will be
- * removed and unmap from GPUs
- * @insert_list: output, the ranges will be inserted into svms, attributes are
- * not changes. For set_attr, this will add into svms.
- * @remove_list:output, the ranges will be removed from svms
- * @left: the remaining range after overlap, For set_attr, this will be added
- * as new range.
+ * svm_range_add - add svm range and handle overlap
+ * @p: the range add to this process svms
+ * @start: page size aligned
+ * @size: page size aligned
+ * @nattr: number of attributes
+ * @attrs: array of attributes
+ * @update_list: output, the ranges need validate and update GPU mapping
+ * @insert_list: output, the ranges need insert to svms
+ * @remove_list: output, the ranges are replaced and need remove from svms
*
- * Total have 5 overlap cases.
+ * Check if the virtual address range has overlap with any existing ranges,
+ * split partly overlapping ranges and add new ranges in the gaps. All changes
+ * should be applied to the range_list and interval tree transactionally. If
+ * any range split or allocation fails, the entire update fails. Therefore any
+ * existing overlapping svm_ranges are cloned and the original svm_ranges left
+ * unchanged.
*
- * This function handles overlap of an address interval with existing
- * struct svm_ranges for applying new attributes. This may require
- * splitting existing struct svm_ranges. All changes should be applied to
- * the range_list and interval tree transactionally. If any split operation
- * fails, the entire update fails. Therefore the existing overlapping
- * svm_ranges are cloned and the original svm_ranges left unchanged. If the
- * transaction succeeds, the modified clones are added and the originals
- * freed. Otherwise the clones are removed and the old svm_ranges remain.
+ * If the transaction succeeds, the caller can update and insert clones and
+ * new ranges, then free the originals.
*
- * Context: The caller must hold svms->lock
+ * Otherwise the caller can free the clones and new ranges, while the old
+ * svm_ranges remain unchanged.
+ *
+ * Context: Process context, caller must hold svms->lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
*/
static int
-svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
- unsigned long start, unsigned long last,
- struct list_head *update_list,
- struct list_head *insert_list,
- struct list_head *remove_list,
- unsigned long *left)
+svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
+ struct list_head *update_list, struct list_head *insert_list,
+ struct list_head *remove_list)
{
+ unsigned long last = start + size - 1UL;
+ struct svm_range_list *svms = &p->svms;
struct interval_tree_node *node;
struct svm_range *prange;
struct svm_range *tmp;
int r = 0;
+ pr_debug("svms 0x%p [0x%llx 0x%lx]\n", &p->svms, start, last);
+
INIT_LIST_HEAD(update_list);
INIT_LIST_HEAD(insert_list);
INIT_LIST_HEAD(remove_list);
@@ -1815,18 +1874,24 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
node = interval_tree_iter_first(&svms->objects, start, last);
while (node) {
struct interval_tree_node *next;
- struct svm_range *old;
unsigned long next_start;
pr_debug("found overlap node [0x%lx 0x%lx]\n", node->start,
node->last);
- old = container_of(node, struct svm_range, it_node);
+ prange = container_of(node, struct svm_range, it_node);
next = interval_tree_iter_next(node, start, last);
next_start = min(node->last, last) + 1;
- if (node->start < start || node->last > last) {
- /* node intersects the updated range, clone+split it */
+ if (svm_range_is_same_attrs(p, prange, nattr, attrs)) {
+ /* nothing to do */
+ } else if (node->start < start || node->last > last) {
+ /* node intersects the update range and its attributes
+ * will change. Clone and split it, apply updates only
+ * to the overlapping part
+ */
+ struct svm_range *old = prange;
+
prange = svm_range_clone(old);
if (!prange) {
r = -ENOMEM;
@@ -1835,17 +1900,18 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
list_add(&old->remove_list, remove_list);
list_add(&prange->insert_list, insert_list);
+ list_add(&prange->update_list, update_list);
if (node->start < start) {
pr_debug("change old range start\n");
- r = svm_range_split_head(prange, new, start,
+ r = svm_range_split_head(prange, start,
insert_list);
if (r)
goto out;
}
if (node->last > last) {
pr_debug("change old range last\n");
- r = svm_range_split_tail(prange, new, last,
+ r = svm_range_split_tail(prange, last,
insert_list);
if (r)
goto out;
@@ -1854,16 +1920,12 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
/* The node is contained within start..last,
* just update it
*/
- prange = old;
- }
-
- if (!svm_range_is_same_attrs(prange, new))
list_add(&prange->update_list, update_list);
+ }
/* insert a new node if needed */
if (node->start > start) {
- prange = svm_range_new(prange->svms, start,
- node->start - 1);
+ prange = svm_range_new(svms, start, node->start - 1);
if (!prange) {
r = -ENOMEM;
goto out;
@@ -1877,8 +1939,16 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
start = next_start;
}
- if (left && start <= last)
- *left = last - start + 1;
+ /* add a final range at the end if needed */
+ if (start <= last) {
+ prange = svm_range_new(svms, start, last);
+ if (!prange) {
+ r = -ENOMEM;
+ goto out;
+ }
+ list_add(&prange->insert_list, insert_list);
+ list_add(&prange->update_list, update_list);
+ }
out:
if (r)
@@ -1966,23 +2036,30 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
static void svm_range_drain_retry_fault(struct svm_range_list *svms)
{
struct kfd_process_device *pdd;
- struct amdgpu_device *adev;
struct kfd_process *p;
+ int drain;
uint32_t i;
p = container_of(svms, struct kfd_process, svms);
+restart:
+ drain = atomic_read(&svms->drain_pagefaults);
+ if (!drain)
+ return;
+
for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
pdd = p->pdds[i];
if (!pdd)
continue;
pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1);
+ amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
+ &pdd->dev->adev->irq.ih1);
pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
}
+ if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain)
+ goto restart;
}
static void svm_range_deferred_list_work(struct work_struct *work)
@@ -1990,43 +2067,41 @@ static void svm_range_deferred_list_work(struct work_struct *work)
struct svm_range_list *svms;
struct svm_range *prange;
struct mm_struct *mm;
+ struct kfd_process *p;
svms = container_of(work, struct svm_range_list, deferred_list_work);
pr_debug("enter svms 0x%p\n", svms);
+ p = container_of(svms, struct kfd_process, svms);
+ /* Avoid mm is gone when inserting mmu notifier */
+ mm = get_task_mm(p->lead_thread);
+ if (!mm) {
+ pr_debug("svms 0x%p process mm gone\n", svms);
+ return;
+ }
+retry:
+ mmap_write_lock(mm);
+
+ /* Checking for the need to drain retry faults must be inside
+ * mmap write lock to serialize with munmap notifiers.
+ */
+ if (unlikely(atomic_read(&svms->drain_pagefaults))) {
+ mmap_write_unlock(mm);
+ svm_range_drain_retry_fault(svms);
+ goto retry;
+ }
+
spin_lock(&svms->deferred_list_lock);
while (!list_empty(&svms->deferred_range_list)) {
prange = list_first_entry(&svms->deferred_range_list,
struct svm_range, deferred_list);
+ list_del_init(&prange->deferred_list);
spin_unlock(&svms->deferred_list_lock);
+
pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
prange->start, prange->last, prange->work_item.op);
- mm = prange->work_item.mm;
-retry:
- mmap_write_lock(mm);
mutex_lock(&svms->lock);
-
- /* Checking for the need to drain retry faults must be in
- * mmap write lock to serialize with munmap notifiers.
- *
- * Remove from deferred_list must be inside mmap write lock,
- * otherwise, svm_range_list_lock_and_flush_work may hold mmap
- * write lock, and continue because deferred_list is empty, then
- * deferred_list handle is blocked by mmap write lock.
- */
- spin_lock(&svms->deferred_list_lock);
- if (unlikely(svms->drain_pagefaults)) {
- svms->drain_pagefaults = false;
- spin_unlock(&svms->deferred_list_lock);
- mutex_unlock(&svms->lock);
- mmap_write_unlock(mm);
- svm_range_drain_retry_fault(svms);
- goto retry;
- }
- list_del_init(&prange->deferred_list);
- spin_unlock(&svms->deferred_list_lock);
-
mutex_lock(&prange->migrate_mutex);
while (!list_empty(&prange->child_list)) {
struct svm_range *pchild;
@@ -2042,12 +2117,13 @@ retry:
svm_range_handle_list_op(svms, prange);
mutex_unlock(&svms->lock);
- mmap_write_unlock(mm);
spin_lock(&svms->deferred_list_lock);
}
spin_unlock(&svms->deferred_list_lock);
+ mmap_write_unlock(mm);
+ mmput(mm);
pr_debug("exit svms 0x%p\n", svms);
}
@@ -2056,12 +2132,6 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
struct mm_struct *mm, enum svm_work_list_ops op)
{
spin_lock(&svms->deferred_list_lock);
- /* Make sure pending page faults are drained in the deferred worker
- * before the range is freed to avoid straggler interrupts on
- * unmapped memory causing "phantom faults".
- */
- if (op == SVM_OP_UNMAP_RANGE)
- svms->drain_pagefaults = true;
/* if prange is on the deferred list */
if (!list_empty(&prange->deferred_list)) {
pr_debug("update exist prange 0x%p work op %d\n", prange, op);
@@ -2140,6 +2210,12 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
prange, prange->start, prange->last, start, last);
+ /* Make sure pending page faults are drained in the deferred worker
+ * before the range is freed to avoid straggler interrupts on
+ * unmapped memory causing "phantom faults".
+ */
+ atomic_inc(&svms->drain_pagefaults);
+
unmap_parent = start <= prange->start && last >= prange->last;
list_for_each_entry(pchild, &prange->child_list, child_list) {
@@ -2169,6 +2245,9 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
/**
* svm_range_cpu_invalidate_pagetables - interval notifier callback
+ * @mni: mmu_interval_notifier struct
+ * @range: mmu_notifier_range struct
+ * @cur_seq: value to pass to mmu_interval_set_seq()
*
* If event is MMU_NOTIFY_UNMAP, this is from CPU unmap range, otherwise, it
* is from migration, or CPU page invalidation callback.
@@ -2198,8 +2277,8 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
start = mni->interval_tree.start;
last = mni->interval_tree.last;
- start = (start > range->start ? start : range->start) >> PAGE_SHIFT;
- last = (last < (range->end - 1) ? last : range->end - 1) >> PAGE_SHIFT;
+ start = max(start, range->start) >> PAGE_SHIFT;
+ last = min(last, range->end - 1) >> PAGE_SHIFT;
pr_debug("[0x%lx 0x%lx] range[0x%lx 0x%lx] notifier[0x%lx 0x%lx] %d\n",
start, last, range->start >> PAGE_SHIFT,
(range->end - 1) >> PAGE_SHIFT,
@@ -2301,7 +2380,7 @@ svm_range_best_restore_location(struct svm_range *prange,
p = container_of(prange->svms, struct kfd_process, svms);
- r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, gpuidx);
+ r = kfd_process_gpuid_from_adev(p, adev, &gpuid, gpuidx);
if (r < 0) {
pr_debug("failed to get gpuid from kgd\n");
return -1;
@@ -2478,7 +2557,7 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
pr_debug("Failed to create prange in address [0x%llx]\n", addr);
return NULL;
}
- if (kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx)) {
+ if (kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx)) {
pr_debug("failed to get gpuid from kgd\n");
svm_range_free(prange);
return NULL;
@@ -2545,7 +2624,7 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
uint32_t gpuid;
int r;
- r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx);
+ r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx);
if (r < 0)
return;
}
@@ -2559,20 +2638,13 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
}
static bool
-svm_fault_allowed(struct mm_struct *mm, uint64_t addr, bool write_fault)
+svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)
{
unsigned long requested = VM_READ;
- struct vm_area_struct *vma;
if (write_fault)
requested |= VM_WRITE;
- vma = find_vma(mm, addr << PAGE_SHIFT);
- if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
- pr_debug("address 0x%llx VMA is removed\n", addr);
- return true;
- }
-
pr_debug("requested 0x%lx, vma permission flags 0x%lx\n", requested,
vma->vm_flags);
return (vma->vm_flags & requested) == requested;
@@ -2590,6 +2662,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
int32_t best_loc;
int32_t gpuidx = MAX_GPU_INSTANCE;
bool write_locked = false;
+ struct vm_area_struct *vma;
int r = 0;
if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
@@ -2600,7 +2673,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
p = kfd_lookup_process_by_pasid(pasid);
if (!p) {
pr_debug("kfd process not founded pasid 0x%x\n", pasid);
- return -ESRCH;
+ return 0;
}
if (!p->xnack_enabled) {
pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
@@ -2611,10 +2684,19 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
+ if (atomic_read(&svms->drain_pagefaults)) {
+ pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
+ r = 0;
+ goto out;
+ }
+
+ /* p->lead_thread is available as kfd_process_wq_release flush the work
+ * before releasing task ref.
+ */
mm = get_task_mm(p->lead_thread);
if (!mm) {
pr_debug("svms 0x%p failed to get mm\n", svms);
- r = -ESRCH;
+ r = 0;
goto out;
}
@@ -2652,6 +2734,7 @@ retry_write_locked:
if (svm_range_skip_recover(prange)) {
amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
+ r = 0;
goto out_unlock_range;
}
@@ -2660,10 +2743,21 @@ retry_write_locked:
if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
svms, prange->start, prange->last);
+ r = 0;
goto out_unlock_range;
}
- if (!svm_fault_allowed(mm, addr, write_fault)) {
+ /* __do_munmap removed VMA, return success as we are handling stale
+ * retry fault.
+ */
+ vma = find_vma(mm, addr << PAGE_SHIFT);
+ if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+ pr_debug("address 0x%llx VMA is removed\n", addr);
+ r = 0;
+ goto out_unlock_range;
+ }
+
+ if (!svm_fault_allowed(vma, write_fault)) {
pr_debug("fault addr 0x%llx no %s permission\n", addr,
write_fault ? "write" : "read");
r = -EPERM;
@@ -2741,6 +2835,14 @@ void svm_range_list_fini(struct kfd_process *p)
/* Ensure list work is finished before process is destroyed */
flush_work(&p->svms.deferred_list_work);
+ /*
+ * Ensure no retry fault comes in afterwards, as page fault handler will
+ * not find kfd process and take mm lock to recover fault.
+ */
+ atomic_inc(&p->svms.drain_pagefaults);
+ svm_range_drain_retry_fault(&p->svms);
+
+
list_for_each_entry_safe(prange, next, &p->svms.list, list) {
svm_range_unlink(prange);
svm_range_remove_notifier(prange);
@@ -2761,6 +2863,7 @@ int svm_range_list_init(struct kfd_process *p)
mutex_init(&svms->lock);
INIT_LIST_HEAD(&svms->list);
atomic_set(&svms->evicted_ranges, 0);
+ atomic_set(&svms->drain_pagefaults, 0);
INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
INIT_LIST_HEAD(&svms->deferred_range_list);
@@ -2868,59 +2971,6 @@ svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size)
}
/**
- * svm_range_add - add svm range and handle overlap
- * @p: the range add to this process svms
- * @start: page size aligned
- * @size: page size aligned
- * @nattr: number of attributes
- * @attrs: array of attributes
- * @update_list: output, the ranges need validate and update GPU mapping
- * @insert_list: output, the ranges need insert to svms
- * @remove_list: output, the ranges are replaced and need remove from svms
- *
- * Check if the virtual address range has overlap with the registered ranges,
- * split the overlapped range, copy and adjust pages address and vram nodes in
- * old and new ranges.
- *
- * Context: Process context, caller must hold svms->lock
- *
- * Return:
- * 0 - OK, otherwise error code
- */
-static int
-svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
- uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
- struct list_head *update_list, struct list_head *insert_list,
- struct list_head *remove_list)
-{
- uint64_t last = start + size - 1UL;
- struct svm_range_list *svms;
- struct svm_range new = {0};
- struct svm_range *prange;
- unsigned long left = 0;
- int r = 0;
-
- pr_debug("svms 0x%p [0x%llx 0x%llx]\n", &p->svms, start, last);
-
- svm_range_apply_attrs(p, &new, nattr, attrs);
-
- svms = &p->svms;
-
- r = svm_range_handle_overlap(svms, &new, start, last, update_list,
- insert_list, remove_list, &left);
- if (r)
- return r;
-
- if (left) {
- prange = svm_range_new(svms, last - left + 1, last);
- list_add(&prange->insert_list, insert_list);
- list_add(&prange->update_list, update_list);
- }
-
- return 0;
-}
-
-/**
* svm_range_best_prefetch_location - decide the best prefetch location
* @prange: svm range structure
*
@@ -2953,7 +3003,6 @@ svm_range_best_prefetch_location(struct svm_range *prange)
uint32_t best_loc = prange->prefetch_loc;
struct kfd_process_device *pdd;
struct amdgpu_device *bo_adev;
- struct amdgpu_device *adev;
struct kfd_process *p;
uint32_t gpuidx;
@@ -2981,12 +3030,11 @@ svm_range_best_prefetch_location(struct svm_range *prange)
pr_debug("failed to get device by idx 0x%x\n", gpuidx);
continue;
}
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- if (adev == bo_adev)
+ if (pdd->dev->adev == bo_adev)
continue;
- if (!amdgpu_xgmi_same_hive(adev, bo_adev)) {
+ if (!amdgpu_xgmi_same_hive(pdd->dev->adev, bo_adev)) {
best_loc = 0;
break;
}
@@ -3150,7 +3198,6 @@ static int
svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
{
- struct amdkfd_process_info *process_info = p->kgd_process_info;
struct mm_struct *mm = current->mm;
struct list_head update_list;
struct list_head insert_list;
@@ -3169,8 +3216,6 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
svms = &p->svms;
- mutex_lock(&process_info->lock);
-
svm_range_list_lock_and_flush_work(svms, mm);
r = svm_range_is_valid(p, start, size);
@@ -3246,8 +3291,6 @@ out_unlock_range:
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);
out:
- mutex_unlock(&process_info->lock);
-
pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
&p->svms, start, start + size - 1, r);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 6dc91c33e80f..2f8a95e86dcb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -48,6 +48,7 @@ struct svm_range_bo {
struct work_struct eviction_work;
struct svm_range_list *svms;
uint32_t evicting;
+ struct work_struct release_work;
};
enum svm_work_list_ops {
@@ -195,7 +196,7 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s
*/
#define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0)
-void svm_range_bo_unref(struct svm_range_bo *svm_bo);
+void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);
#else
struct kfd_process;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index dd593ad0614a..948fbb39336e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -113,7 +113,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
return device;
}
-struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)
+struct kfd_dev *kfd_device_by_adev(const struct amdgpu_device *adev)
{
struct kfd_topology_device *top_dev;
struct kfd_dev *device = NULL;
@@ -121,7 +121,7 @@ struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)
down_read(&topology_lock);
list_for_each_entry(top_dev, &topology_device_list, list)
- if (top_dev->gpu && top_dev->gpu->kgd == kgd) {
+ if (top_dev->gpu && top_dev->gpu->adev == adev) {
device = top_dev->gpu;
break;
}
@@ -503,7 +503,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
if (dev->gpu) {
log_max_watch_addr =
- __ilog2_u32(dev->gpu->device_info->num_of_watch_points);
+ __ilog2_u32(dev->gpu->device_info.num_of_watch_points);
if (log_max_watch_addr) {
dev->node_props.capability |=
@@ -515,7 +515,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);
}
- if (dev->gpu->device_info->asic_family == CHIP_TONGA)
+ if (dev->gpu->adev->asic_type == CHIP_TONGA)
dev->node_props.capability |=
HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
@@ -531,7 +531,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
dev->gpu->sdma_fw_version);
sysfs_show_64bit_prop(buffer, offs, "unique_id",
- amdgpu_amdkfd_get_unique_id(dev->gpu->kgd));
+ dev->gpu->adev->unique_id);
}
@@ -1106,7 +1106,7 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
if (!gpu)
return 0;
- amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info);
+ amdgpu_amdkfd_get_local_mem_info(gpu->adev, &local_mem_info);
local_mem_size = local_mem_info.local_mem_size_private +
local_mem_info.local_mem_size_public;
@@ -1189,7 +1189,7 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
* for APUs - If CRAT from ACPI reports more than one bank, then
* all the banks will report the same mem_clk_max information
*/
- amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info);
+ amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info);
list_for_each_entry(mem, &dev->mem_props, list)
mem->mem_clk_max = local_mem_info.mem_clk_max;
@@ -1217,8 +1217,7 @@ static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev,
/* set gpu (dev) flags. */
} else {
if (!dev->gpu->pci_atomic_requested ||
- dev->gpu->device_info->asic_family ==
- CHIP_HAWAII)
+ dev->gpu->adev->asic_type == CHIP_HAWAII)
link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
}
@@ -1239,7 +1238,7 @@ static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev,
*/
if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS ||
(inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
- to_dev->gpu->device_info->asic_family == CHIP_VEGA20)) {
+ KFD_GC_VERSION(to_dev->gpu) == IP_VERSION(9, 4, 0))) {
outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
}
@@ -1286,7 +1285,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
void *crat_image = NULL;
size_t image_size = 0;
int proximity_domain;
- struct amdgpu_device *adev;
+ int i;
+ const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
INIT_LIST_HEAD(&temp_topology_device_list);
@@ -1296,10 +1296,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
proximity_domain = atomic_inc_return(&topology_crat_proximity_domain);
- adev = (struct amdgpu_device *)(gpu->kgd);
-
/* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */
- if (gpu->hive_id && adev->gmc.xgmi.connected_to_cpu) {
+ if (gpu->hive_id && gpu->adev->gmc.xgmi.connected_to_cpu) {
struct kfd_topology_device *top_dev;
down_read(&topology_lock);
@@ -1372,45 +1370,48 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
* needed for the topology
*/
- amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info);
+ amdgpu_amdkfd_get_cu_info(dev->gpu->adev, &cu_info);
- strncpy(dev->node_props.name, gpu->device_info->asic_name,
- KFD_TOPOLOGY_PUBLIC_NAME_SIZE);
+ for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) {
+ dev->node_props.name[i] = __tolower(asic_name[i]);
+ if (asic_name[i] == '\0')
+ break;
+ }
+ dev->node_props.name[i] = '\0';
dev->node_props.simd_arrays_per_engine =
cu_info.num_shader_arrays_per_engine;
- dev->node_props.gfx_target_version = gpu->device_info->gfx_target_version;
+ dev->node_props.gfx_target_version = gpu->device_info.gfx_target_version;
dev->node_props.vendor_id = gpu->pdev->vendor;
dev->node_props.device_id = gpu->pdev->device;
dev->node_props.capability |=
- ((amdgpu_amdkfd_get_asic_rev_id(dev->gpu->kgd) <<
- HSA_CAP_ASIC_REVISION_SHIFT) &
+ ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) &
HSA_CAP_ASIC_REVISION_MASK);
dev->node_props.location_id = pci_dev_id(gpu->pdev);
dev->node_props.domain = pci_domain_nr(gpu->pdev->bus);
dev->node_props.max_engine_clk_fcompute =
- amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd);
+ amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev);
dev->node_props.max_engine_clk_ccompute =
cpufreq_quick_get_max(0) / 1000;
dev->node_props.drm_render_minor =
gpu->shared_resources.drm_render_minor;
dev->node_props.hive_id = gpu->hive_id;
- dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines;
+ dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu);
dev->node_props.num_sdma_xgmi_engines =
- gpu->device_info->num_xgmi_sdma_engines;
+ kfd_get_num_xgmi_sdma_engines(gpu);
dev->node_props.num_sdma_queues_per_engine =
- gpu->device_info->num_sdma_queues_per_engine;
+ gpu->device_info.num_sdma_queues_per_engine;
dev->node_props.num_gws = (dev->gpu->gws &&
dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
- amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
+ dev->gpu->adev->gds.gws_size : 0;
dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm);
kfd_fill_mem_clk_max_info(dev);
kfd_fill_iolink_non_crat_info(dev);
- switch (dev->gpu->device_info->asic_family) {
+ switch (dev->gpu->adev->asic_type) {
case CHIP_KAVERI:
case CHIP_HAWAII:
case CHIP_TONGA:
@@ -1429,30 +1430,14 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_RENOIR:
- case CHIP_ARCTURUS:
- case CHIP_ALDEBARAN:
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_NAVI14:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_YELLOW_CARP:
- case CHIP_CYAN_SKILLFISH:
- dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
- HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
- HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
- break;
default:
- WARN(1, "Unexpected ASIC family %u",
- dev->gpu->device_info->asic_family);
+ if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 0, 1))
+ dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
+ else
+ WARN(1, "Unexpected ASIC family %u",
+ dev->gpu->adev->asic_type);
}
/*
@@ -1469,7 +1454,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
* because it doesn't consider masked out CUs
* max_waves_per_simd: Carrizo reports wrong max_waves_per_simd
*/
- if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
+ if (dev->gpu->adev->asic_type == CHIP_CARRIZO) {
dev->node_props.simd_count =
cu_info.simd_per_cu * cu_info.cu_active_number;
dev->node_props.max_waves_per_simd = 10;
@@ -1477,16 +1462,17 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
/* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
dev->node_props.capability |=
- ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
+ ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
HSA_CAP_SRAM_EDCSUPPORTED : 0;
- dev->node_props.capability |= ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
+ dev->node_props.capability |=
+ ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
HSA_CAP_MEM_EDCSUPPORTED : 0;
- if (adev->asic_type != CHIP_VEGA10)
- dev->node_props.capability |= (adev->ras_enabled != 0) ?
+ if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 0, 1))
+ dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ?
HSA_CAP_RASEVENTNOTIFY : 0;
- if (KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev))
+ if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev->kfd.dev))
dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED;
kfd_debug_print_topology();
@@ -1592,7 +1578,7 @@ void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)
gpu->use_iommu_v2 = false;
- if (!gpu->device_info->needs_iommu_device)
+ if (!gpu->device_info.needs_iommu_device)
return;
down_read(&topology_lock);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index a8db017c9b8e..f0cc59d2fd5d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -25,38 +25,11 @@
#include <linux/types.h>
#include <linux/list.h>
+#include <linux/kfd_sysfs.h>
#include "kfd_crat.h"
#define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32
-#define HSA_CAP_HOT_PLUGGABLE 0x00000001
-#define HSA_CAP_ATS_PRESENT 0x00000002
-#define HSA_CAP_SHARED_WITH_GRAPHICS 0x00000004
-#define HSA_CAP_QUEUE_SIZE_POW2 0x00000008
-#define HSA_CAP_QUEUE_SIZE_32BIT 0x00000010
-#define HSA_CAP_QUEUE_IDLE_EVENT 0x00000020
-#define HSA_CAP_VA_LIMIT 0x00000040
-#define HSA_CAP_WATCH_POINTS_SUPPORTED 0x00000080
-#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00
-#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8
-#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK 0x00003000
-#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT 12
-
-#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0
-#define HSA_CAP_DOORBELL_TYPE_1_0 0x1
-#define HSA_CAP_DOORBELL_TYPE_2_0 0x2
-#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000
-
-#define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED 0x00080000 /* Old buggy user mode depends on this being 0 */
-#define HSA_CAP_MEM_EDCSUPPORTED 0x00100000
-#define HSA_CAP_RASEVENTNOTIFY 0x00200000
-#define HSA_CAP_ASIC_REVISION_MASK 0x03c00000
-#define HSA_CAP_ASIC_REVISION_SHIFT 22
-#define HSA_CAP_SRAM_EDCSUPPORTED 0x04000000
-#define HSA_CAP_SVMAPI_SUPPORTED 0x08000000
-#define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000
-#define HSA_CAP_RESERVED 0xe00f8000
-
struct kfd_node_properties {
uint64_t hive_id;
uint32_t cpu_cores_count;
@@ -93,17 +66,6 @@ struct kfd_node_properties {
char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
};
-#define HSA_MEM_HEAP_TYPE_SYSTEM 0
-#define HSA_MEM_HEAP_TYPE_FB_PUBLIC 1
-#define HSA_MEM_HEAP_TYPE_FB_PRIVATE 2
-#define HSA_MEM_HEAP_TYPE_GPU_GDS 3
-#define HSA_MEM_HEAP_TYPE_GPU_LDS 4
-#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH 5
-
-#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001
-#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002
-#define HSA_MEM_FLAGS_RESERVED 0xfffffffc
-
struct kfd_mem_properties {
struct list_head list;
uint32_t heap_type;
@@ -116,12 +78,6 @@ struct kfd_mem_properties {
struct attribute attr;
};
-#define HSA_CACHE_TYPE_DATA 0x00000001
-#define HSA_CACHE_TYPE_INSTRUCTION 0x00000002
-#define HSA_CACHE_TYPE_CPU 0x00000004
-#define HSA_CACHE_TYPE_HSACU 0x00000008
-#define HSA_CACHE_TYPE_RESERVED 0xfffffff0
-
struct kfd_cache_properties {
struct list_head list;
uint32_t processor_id_low;