aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_device.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c186
1 files changed, 119 insertions, 67 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 127d41d0e4f0..65a1d4f9004b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -52,6 +53,7 @@ extern const struct kfd2kgd_calls arcturus_kfd2kgd;
extern const struct kfd2kgd_calls aldebaran_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
+extern const struct kfd2kgd_calls gfx_v11_kfd2kgd;
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size);
@@ -59,39 +61,63 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
static int kfd_resume(struct kfd_dev *kfd);
-static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd)
+static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
{
uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
switch (sdma_version) {
- case IP_VERSION(4, 0, 0):/* VEGA10 */
- case IP_VERSION(4, 0, 1):/* VEGA12 */
- case IP_VERSION(4, 1, 0):/* RAVEN */
- case IP_VERSION(4, 1, 1):/* RAVEN */
- case IP_VERSION(4, 1, 2):/* RENIOR */
- case IP_VERSION(5, 2, 1):/* VANGOGH */
- case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
- kfd->device_info.num_sdma_queues_per_engine = 2;
- break;
- case IP_VERSION(4, 2, 0):/* VEGA20 */
- case IP_VERSION(4, 2, 2):/* ARCTUTUS */
- case IP_VERSION(4, 4, 0):/* ALDEBARAN */
- case IP_VERSION(5, 0, 0):/* NAVI10 */
- case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
- case IP_VERSION(5, 0, 2):/* NAVI14 */
- case IP_VERSION(5, 0, 5):/* NAVI12 */
- case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
- case IP_VERSION(5, 2, 2):/* NAVY_FLOUDER */
- case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
- case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */
- kfd->device_info.num_sdma_queues_per_engine = 8;
- break;
- default:
- dev_warn(kfd_device,
- "Default sdma queue per engine(8) is set due to "
- "mismatch of sdma ip block(SDMA_HWIP:0x%x).\n",
- sdma_version);
- kfd->device_info.num_sdma_queues_per_engine = 8;
+ case IP_VERSION(4, 0, 0):/* VEGA10 */
+ case IP_VERSION(4, 0, 1):/* VEGA12 */
+ case IP_VERSION(4, 1, 0):/* RAVEN */
+ case IP_VERSION(4, 1, 1):/* RAVEN */
+ case IP_VERSION(4, 1, 2):/* RENOIR */
+ case IP_VERSION(5, 2, 1):/* VANGOGH */
+ case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
+ case IP_VERSION(5, 2, 6):/* GC 10.3.6 */
+ case IP_VERSION(5, 2, 7):/* GC 10.3.7 */
+ kfd->device_info.num_sdma_queues_per_engine = 2;
+ break;
+ case IP_VERSION(4, 2, 0):/* VEGA20 */
+ case IP_VERSION(4, 2, 2):/* ARCTURUS */
+ case IP_VERSION(4, 4, 0):/* ALDEBARAN */
+ case IP_VERSION(5, 0, 0):/* NAVI10 */
+ case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
+ case IP_VERSION(5, 0, 2):/* NAVI14 */
+ case IP_VERSION(5, 0, 5):/* NAVI12 */
+ case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
+ case IP_VERSION(5, 2, 2):/* NAVY_FLOUNDER */
+ case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
+ case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
+ case IP_VERSION(6, 0, 2):
+ case IP_VERSION(6, 0, 3):
+ kfd->device_info.num_sdma_queues_per_engine = 8;
+ break;
+ default:
+ dev_warn(kfd_device,
+ "Default sdma queue per engine(8) is set due to mismatch of sdma ip block(SDMA_HWIP:0x%x).\n",
+ sdma_version);
+ kfd->device_info.num_sdma_queues_per_engine = 8;
+ }
+
+ switch (sdma_version) {
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 2):
+ case IP_VERSION(6, 0, 3):
+ /* Reserve 1 for paging and 1 for gfx */
+ kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
+ /* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */
+ kfd->device_info.reserved_sdma_queues_bitmap = 0xFULL;
+ break;
+ case IP_VERSION(6, 0, 1):
+ /* Reserve 1 for paging and 1 for gfx */
+ kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
+ /* BIT(0)=engine-0 queue-0; BIT(1)=engine-0 queue-1; ... */
+ kfd->device_info.reserved_sdma_queues_bitmap = 0x3ULL;
+ break;
+ default:
+ break;
}
}
@@ -110,7 +136,10 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
case IP_VERSION(9, 4, 2): /* ALDEBARAN */
case IP_VERSION(10, 3, 1): /* VANGOGH */
case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
+ case IP_VERSION(10, 3, 6): /* GC 10.3.6 */
+ case IP_VERSION(10, 3, 7): /* GC 10.3.7 */
case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
+ case IP_VERSION(10, 1, 4):
case IP_VERSION(10, 1, 10): /* NAVI10 */
case IP_VERSION(10, 1, 2): /* NAVI12 */
case IP_VERSION(10, 1, 1): /* NAVI14 */
@@ -120,6 +149,12 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
+ break;
default:
dev_warn(kfd_device, "v9 event interrupt handler is set due to "
"mismatch of gc ip block(GC_HWIP:0x%x).\n", gc_version);
@@ -144,7 +179,7 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
kfd->device_info.supports_cwsr = true;
- kfd_device_info_set_sdma_queue_num(kfd);
+ kfd_device_info_set_sdma_info(kfd);
kfd_device_info_set_event_interrupt_class(kfd);
@@ -155,7 +190,11 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
if (gc_version < IP_VERSION(11, 0, 0)) {
/* Navi2x+, Navi1x+ */
- if (gc_version >= IP_VERSION(10, 3, 0))
+ if (gc_version == IP_VERSION(10, 3, 6))
+ kfd->device_info.no_atomic_fw_version = 14;
+ else if (gc_version == IP_VERSION(10, 3, 7))
+ kfd->device_info.no_atomic_fw_version = 3;
+ else if (gc_version >= IP_VERSION(10, 3, 0))
kfd->device_info.no_atomic_fw_version = 92;
else if (gc_version >= IP_VERSION(10, 1, 1))
kfd->device_info.no_atomic_fw_version = 145;
@@ -308,6 +347,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
break;
/* Cyan Skillfish */
case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
gfx_target_version = 100103;
if (!vf)
f2g = &gfx_v10_kfd2kgd;
@@ -344,6 +384,29 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
if (!vf)
f2g = &gfx_v10_3_kfd2kgd;
break;
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
+ gfx_target_version = 100306;
+ if (!vf)
+ f2g = &gfx_v10_3_kfd2kgd;
+ break;
+ case IP_VERSION(11, 0, 0):
+ gfx_target_version = 110000;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(11, 0, 1):
+ gfx_target_version = 110003;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(11, 0, 2):
+ gfx_target_version = 110002;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(11, 0, 3):
+ /* Note: Compiler version is 11.0.1 while HW version is 11.0.3 */
+ gfx_target_version = 110001;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
default:
break;
}
@@ -405,10 +468,14 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_nv1x_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
- } else {
+ } else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx10_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
+ } else {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_gfx11_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
}
kfd->cwsr_enabled = true;
@@ -437,7 +504,8 @@ static int kfd_gws_init(struct kfd_dev *kfd)
return ret;
}
-static void kfd_smi_init(struct kfd_dev *dev) {
+static void kfd_smi_init(struct kfd_dev *dev)
+{
INIT_LIST_HEAD(&dev->smi_clients);
spin_lock_init(&dev->smi_lock);
}
@@ -480,15 +548,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
}
/* Verify module parameters regarding mapped process number*/
- if ((hws_max_conc_proc < 0)
- || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
- dev_err(kfd_device,
- "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
- hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
- kfd->vm_info.vmid_num_kfd);
+ if (hws_max_conc_proc >= 0)
+ kfd->max_proc_per_quantum = min((u32)hws_max_conc_proc, kfd->vm_info.vmid_num_kfd);
+ else
kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
- } else
- kfd->max_proc_per_quantum = hws_max_conc_proc;
/* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device *
@@ -533,7 +596,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto kfd_doorbell_error;
}
- kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
+ if (amdgpu_use_xgmi_p2p)
+ kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
kfd->noretry = kfd->adev->gmc.noretry;
@@ -570,13 +634,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
svm_migrate_init(kfd->adev);
- if(kgd2kfd_resume_iommu(kfd))
+ if (kgd2kfd_resume_iommu(kfd))
goto device_iommu_error;
if (kfd_resume(kfd))
goto kfd_resume_error;
- kfd->dbgmgr = NULL;
+ amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info);
if (kfd_topology_add_device(kfd)) {
dev_err(kfd_device, "Error adding device to topology\n");
@@ -784,7 +848,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
}
-int kgd2kfd_quiesce_mm(struct mm_struct *mm)
+int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger)
{
struct kfd_process *p;
int r;
@@ -798,7 +862,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm)
return -ESRCH;
WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
- r = kfd_process_evict_queues(p);
+ r = kfd_process_evict_queues(p, trigger);
kfd_unref_process(p);
return r;
@@ -876,8 +940,6 @@ out:
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size)
{
- unsigned int num_of_longs;
-
if (WARN_ON(buf_size < chunk_size))
return -EINVAL;
if (WARN_ON(buf_size == 0))
@@ -888,11 +950,8 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
kfd->gtt_sa_chunk_size = chunk_size;
kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
- num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) /
- BITS_PER_LONG;
-
- kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL);
-
+ kfd->gtt_sa_bitmap = bitmap_zalloc(kfd->gtt_sa_num_of_chunks,
+ GFP_KERNEL);
if (!kfd->gtt_sa_bitmap)
return -ENOMEM;
@@ -902,13 +961,12 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
mutex_init(&kfd->gtt_sa_lock);
return 0;
-
}
static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
{
mutex_destroy(&kfd->gtt_sa_lock);
- kfree(kfd->gtt_sa_bitmap);
+ bitmap_free(kfd->gtt_sa_bitmap);
}
static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
@@ -976,7 +1034,7 @@ kfd_gtt_restart_search:
/* If we need only one chunk, mark it as allocated and get out */
if (size <= kfd->gtt_sa_chunk_size) {
pr_debug("Single bit\n");
- set_bit(found, kfd->gtt_sa_bitmap);
+ __set_bit(found, kfd->gtt_sa_bitmap);
goto kfd_gtt_out;
}
@@ -1014,10 +1072,8 @@ kfd_gtt_restart_search:
(*mem_obj)->range_start, (*mem_obj)->range_end);
/* Mark the chunks as allocated */
- for (found = (*mem_obj)->range_start;
- found <= (*mem_obj)->range_end;
- found++)
- set_bit(found, kfd->gtt_sa_bitmap);
+ bitmap_set(kfd->gtt_sa_bitmap, (*mem_obj)->range_start,
+ (*mem_obj)->range_end - (*mem_obj)->range_start + 1);
kfd_gtt_out:
mutex_unlock(&kfd->gtt_sa_lock);
@@ -1032,8 +1088,6 @@ kfd_gtt_no_free_chunk:
int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
{
- unsigned int bit;
-
/* Act like kfree when trying to free a NULL object */
if (!mem_obj)
return 0;
@@ -1044,10 +1098,8 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
mutex_lock(&kfd->gtt_sa_lock);
/* Mark the chunks as free */
- for (bit = mem_obj->range_start;
- bit <= mem_obj->range_end;
- bit++)
- clear_bit(bit, kfd->gtt_sa_bitmap);
+ bitmap_clear(kfd->gtt_sa_bitmap, mem_obj->range_start,
+ mem_obj->range_end - mem_obj->range_start + 1);
mutex_unlock(&kfd->gtt_sa_lock);