aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h2975
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm421
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm244
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c1746
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c352
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.h5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c845
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h230
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c158
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h293
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c1120
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c832
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h72
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c81
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c75
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c516
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.h6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c36
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c384
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c104
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c40
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_iommu.c17
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_iommu.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c239
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.h5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c91
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h44
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c128
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c157
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c454
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c179
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c153
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c46
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c52
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c33
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pasid.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h249
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c253
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c446
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_queue.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c223
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h24
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c1183
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.h56
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c538
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h53
-rw-r--r--drivers/gpu/drm/amd/amdkfd/soc15_int.h3
64 files changed, 9326 insertions, 5942 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index 8cc0a76ddf9f..93bd4eda0d94 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -25,3 +25,17 @@ config HSA_AMD_SVM
preemptions and one based on page faults. To enable page fault
based memory management on most GFXv9 GPUs, set the module
parameter amdgpu.noretry=0.
+
+config HSA_AMD_P2P
+ bool "HSA kernel driver support for peer-to-peer for AMD GPU devices"
+ depends on HSA_AMD && PCI_P2PDMA && DMABUF_MOVE_NOTIFY
+ help
+ Enable peer-to-peer (P2P) communication between AMD GPUs over
+ the PCIe bus. This can improve performance of multi-GPU compute
+ applications and libraries by enabling GPUs to access data directly
+ in peer GPUs' memory without intermediate copies in system memory.
+
+ This P2P feature is only enabled on compatible chipsets, and between
+ GPUs with large memory BARs that expose the entire VRAM in PCIe bus
+ address space within the physical address limits of the GPUs.
+
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index c4f3aff11072..e758c2a24cd0 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -37,6 +37,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_mqd_manager_vi.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v10.o \
+ $(AMDKFD_PATH)/kfd_mqd_manager_v11.o \
$(AMDKFD_PATH)/kfd_kernel_queue.o \
$(AMDKFD_PATH)/kfd_packet_manager.o \
$(AMDKFD_PATH)/kfd_packet_manager_vi.o \
@@ -47,12 +48,12 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_vi.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_v9.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_v10.o \
+ $(AMDKFD_PATH)/kfd_device_queue_manager_v11.o \
$(AMDKFD_PATH)/kfd_interrupt.o \
$(AMDKFD_PATH)/kfd_events.o \
$(AMDKFD_PATH)/cik_event_interrupt.o \
$(AMDKFD_PATH)/kfd_int_process_v9.o \
- $(AMDKFD_PATH)/kfd_dbgdev.o \
- $(AMDKFD_PATH)/kfd_dbgmgr.o \
+ $(AMDKFD_PATH)/kfd_int_process_v11.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
$(AMDKFD_PATH)/kfd_crat.o
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index f6233019f042..5c8023cba196 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -43,15 +43,15 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
*/
if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) &&
- dev->device_info->asic_family == CHIP_HAWAII) {
+ dev->adev->asic_type == CHIP_HAWAII) {
struct cik_ih_ring_entry *tmp_ihre =
(struct cik_ih_ring_entry *)patched_ihre;
*patched_flag = true;
*tmp_ihre = *ihre;
- vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd);
- ret = f2g->get_atc_vmid_pasid_mapping_info(dev->kgd, vmid, &pasid);
+ vmid = f2g->read_vmid_from_vmfault_reg(dev->adev);
+ ret = f2g->get_atc_vmid_pasid_mapping_info(dev->adev, vmid, &pasid);
tmp_ihre->ring_id &= 0x000000ff;
tmp_ihre->ring_id |= vmid << 8;
@@ -110,10 +110,10 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
struct kfd_vm_fault_info info;
kfd_smi_event_update_vmfault(dev, pasid);
- kfd_process_vm_fault(dev->dqm, pasid);
+ kfd_dqm_evict_pasid(dev->dqm, pasid);
memset(&info, 0, sizeof(info));
- amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->kgd, &info);
+ amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->adev, &info);
if (!info.page_addr && !info.status)
return;
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 475f89700c74..0c4c5499bb5c 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -166,7 +166,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
0x807c847c, 0x806eff6e,
0x00000400, 0xbf0a757c,
0xbf85ffef, 0xbf9c0000,
- 0xbf8200cd, 0xbef8007e,
+ 0xbf8200ce, 0xbef8007e,
0x8679ff7f, 0x0000ffff,
0x8779ff79, 0x00040000,
0xbefa0080, 0xbefb00ff,
@@ -212,304 +212,310 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
0x761e0000, 0xe0524100,
0x761e0100, 0xe0524200,
0x761e0200, 0xe0524300,
- 0x761e0300, 0xb8f22a05,
- 0x80728172, 0x8e728a72,
- 0xb8f61605, 0x80768176,
- 0x8e768676, 0x80727672,
- 0x80f2c072, 0xb8f31605,
- 0x80738173, 0x8e738473,
- 0x8e7a8273, 0xbefa00ff,
- 0x01000000, 0xbefc0073,
- 0xc031003c, 0x00000072,
- 0x80f2c072, 0xbf8c007f,
- 0x80fc907c, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff1, 0xb8f22a05,
- 0x80728172, 0x8e728a72,
- 0xb8f61605, 0x80768176,
- 0x8e768676, 0x80727672,
- 0xbefa0084, 0xbefa00ff,
- 0x01000000, 0xc0211cfc,
+ 0x761e0300, 0xbf8c0f70,
+ 0xb8f22a05, 0x80728172,
+ 0x8e728a72, 0xb8f61605,
+ 0x80768176, 0x8e768676,
+ 0x80727672, 0x80f2c072,
+ 0xb8f31605, 0x80738173,
+ 0x8e738473, 0x8e7a8273,
+ 0xbefa00ff, 0x01000000,
+ 0xbefc0073, 0xc031003c,
+ 0x00000072, 0x80f2c072,
+ 0xbf8c007f, 0x80fc907c,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff1,
+ 0xb8f22a05, 0x80728172,
+ 0x8e728a72, 0xb8f61605,
+ 0x80768176, 0x8e768676,
+ 0x80727672, 0xbefa0084,
+ 0xbefa00ff, 0x01000000,
+ 0xc0211cfc, 0x00000072,
+ 0x80728472, 0xc0211c3c,
0x00000072, 0x80728472,
- 0xc0211c3c, 0x00000072,
- 0x80728472, 0xc0211c7c,
+ 0xc0211c7c, 0x00000072,
+ 0x80728472, 0xc0211bbc,
0x00000072, 0x80728472,
- 0xc0211bbc, 0x00000072,
- 0x80728472, 0xc0211bfc,
+ 0xc0211bfc, 0x00000072,
+ 0x80728472, 0xc0211d3c,
0x00000072, 0x80728472,
- 0xc0211d3c, 0x00000072,
- 0x80728472, 0xc0211d7c,
+ 0xc0211d7c, 0x00000072,
+ 0x80728472, 0xc0211a3c,
0x00000072, 0x80728472,
- 0xc0211a3c, 0x00000072,
- 0x80728472, 0xc0211a7c,
+ 0xc0211a7c, 0x00000072,
+ 0x80728472, 0xc0211dfc,
0x00000072, 0x80728472,
- 0xc0211dfc, 0x00000072,
- 0x80728472, 0xc0211b3c,
+ 0xc0211b3c, 0x00000072,
+ 0x80728472, 0xc0211b7c,
0x00000072, 0x80728472,
- 0xc0211b7c, 0x00000072,
- 0x80728472, 0xbf8c007f,
- 0xbefc0073, 0xbefe006e,
- 0xbeff006f, 0x867375ff,
- 0x000003ff, 0xb9734803,
- 0x867375ff, 0xfffff800,
- 0x8f738b73, 0xb973a2c3,
- 0xb977f801, 0x8673ff71,
- 0xf0000000, 0x8f739c73,
- 0x8e739073, 0xbef60080,
- 0x87767376, 0x8673ff71,
- 0x08000000, 0x8f739b73,
- 0x8e738f73, 0x87767376,
- 0x8673ff74, 0x00800000,
- 0x8f739773, 0xb976f807,
- 0x8671ff71, 0x0000ffff,
- 0x86fe7e7e, 0x86ea6a6a,
- 0x8f768374, 0xb976e0c2,
- 0xbf800002, 0xb9740002,
- 0xbf8a0000, 0x95807370,
- 0xbf810000, 0x00000000,
+ 0xbf8c007f, 0xbefc0073,
+ 0xbefe006e, 0xbeff006f,
+ 0x867375ff, 0x000003ff,
+ 0xb9734803, 0x867375ff,
+ 0xfffff800, 0x8f738b73,
+ 0xb973a2c3, 0xb977f801,
+ 0x8673ff71, 0xf0000000,
+ 0x8f739c73, 0x8e739073,
+ 0xbef60080, 0x87767376,
+ 0x8673ff71, 0x08000000,
+ 0x8f739b73, 0x8e738f73,
+ 0x87767376, 0x8673ff74,
+ 0x00800000, 0x8f739773,
+ 0xb976f807, 0x8671ff71,
+ 0x0000ffff, 0x86fe7e7e,
+ 0x86ea6a6a, 0x8f768374,
+ 0xb976e0c2, 0xbf800002,
+ 0xb9740002, 0xbf8a0000,
+ 0x95807370, 0xbf810000,
};
static const uint32_t cwsr_trap_gfx9_hex[] = {
- 0xbf820001, 0xbf820248,
- 0xb8f8f802, 0x89788678,
- 0xb8eef801, 0x866eff6e,
- 0x00000800, 0xbf840003,
+ 0xbf820001, 0xbf820254,
+ 0xb8f8f802, 0x8978ff78,
+ 0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
- 0xbf840016, 0xb8fbf803,
+ 0xbf840009, 0x866eff6d,
+ 0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
- 0xbf85003b, 0x866eff7b,
- 0x00000800, 0xbf850003,
- 0x866eff7b, 0x00000100,
- 0xbf84000c, 0x866eff78,
- 0x00002000, 0xbf840005,
- 0xbf8e0010, 0xb8eef803,
- 0x866eff6e, 0x00000400,
- 0xbf84fffb, 0x8778ff78,
- 0x00002000, 0x80ec886c,
- 0x82ed806d, 0xb8eef807,
- 0x866fff6e, 0x001f8000,
- 0x8e6f8b6f, 0x8977ff77,
- 0xfc000000, 0x87776f77,
- 0x896eff6e, 0x001f8000,
- 0xb96ef807, 0xb8faf812,
+ 0xbf850051, 0xbf8e0010,
+ 0xb8fbf803, 0xbf82fffa,
+ 0x866eff7b, 0x00000900,
+ 0xbf850015, 0x866eff7b,
+ 0x000071ff, 0xbf840008,
+ 0x866fff7b, 0x00007080,
+ 0xbf840001, 0xbeee1a87,
+ 0xb8eff801, 0x8e6e8c6e,
+ 0x866e6f6e, 0xbf85000a,
+ 0x866eff6d, 0x00ff0000,
+ 0xbf850007, 0xb8eef801,
+ 0x866eff6e, 0x00000800,
+ 0xbf850003, 0x866eff7b,
+ 0x00000400, 0xbf850036,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8977ff77, 0xfc000000,
+ 0x87777a77, 0xba7ff807,
+ 0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a,
- 0xc0071bbd, 0x00000000,
- 0xbf8cc07f, 0xc0071ebd,
- 0x00000008, 0xbf8cc07f,
- 0x86ee6e6e, 0xbf840001,
- 0xbe801d6e, 0xb8fbf803,
- 0x867bff7b, 0x000001ff,
+ 0xc0031bbd, 0x00000010,
+ 0xbf8cc07f, 0x8e6e976e,
+ 0x8977ff77, 0x00800000,
+ 0x87776e77, 0xc0071bbd,
+ 0x00000000, 0xbf8cc07f,
+ 0xc0071ebd, 0x00000008,
+ 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf840001, 0xbe801d6e,
+ 0x866eff6d, 0x01ff0000,
+ 0xbf850005, 0x8778ff78,
+ 0x00002000, 0x80ec886c,
+ 0x82ed806d, 0xbf820005,
+ 0x866eff6d, 0x01000000,
0xbf850002, 0x806c846c,
0x826d806d, 0x866dff6d,
- 0x0000ffff, 0x8f6e8b77,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x86fe7e7e,
+ 0x0000ffff, 0x8f7a8b77,
+ 0x867aff7a, 0x001f8000,
+ 0xb97af807, 0x86fe7e7e,
0x86ea6a6a, 0x8f6e8378,
0xb96ee0c2, 0xbf800002,
0xb9780002, 0xbe801f6c,
0x866dff6d, 0x0000ffff,
0xbefa0080, 0xb97a0283,
- 0xb8fa2407, 0x8e7a9b7a,
- 0x876d7a6d, 0xb8fa03c7,
- 0x8e7a9a7a, 0x876d7a6d,
0xb8faf807, 0x867aff7a,
- 0x00007fff, 0xb97af807,
- 0xbeee007e, 0xbeef007f,
- 0xbefe0180, 0xbf900004,
- 0x877a8478, 0xb97af802,
- 0xbf8e0002, 0xbf88fffe,
- 0xb8fa2a05, 0x807a817a,
- 0x8e7a8a7a, 0xb8fb1605,
- 0x807b817b, 0x8e7b867b,
- 0x807a7b7a, 0x807a7e7a,
- 0x827b807f, 0x867bff7b,
- 0x0000ffff, 0xc04b1c3d,
- 0x00000050, 0xbf8cc07f,
- 0xc04b1d3d, 0x00000060,
- 0xbf8cc07f, 0xc0431e7d,
- 0x00000074, 0xbf8cc07f,
- 0xbef4007e, 0x8675ff7f,
- 0x0000ffff, 0x8775ff75,
- 0x00040000, 0xbef60080,
- 0xbef700ff, 0x00807fac,
- 0x867aff7f, 0x08000000,
- 0x8f7a837a, 0x87777a77,
- 0x867aff7f, 0x70000000,
- 0x8f7a817a, 0x87777a77,
- 0xbef1007c, 0xbef00080,
- 0xb8f02a05, 0x80708170,
- 0x8e708a70, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xbefe007c, 0xbefc0070,
- 0xc0611c7a, 0x0000007c,
- 0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611b3a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8977ff77, 0xfc000000,
+ 0x87777a77, 0xba7ff807,
+ 0x00000000, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbf900004, 0x877a8478,
+ 0xb97af802, 0xbf8e0002,
+ 0xbf88fffe, 0xb8fa2a05,
+ 0x807a817a, 0x8e7a8a7a,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b867b, 0x807a7b7a,
+ 0x807a7e7a, 0x827b807f,
+ 0x867bff7b, 0x0000ffff,
+ 0xc04b1c3d, 0x00000050,
+ 0xbf8cc07f, 0xc04b1d3d,
+ 0x00000060, 0xbf8cc07f,
+ 0xc0431e7d, 0x00000074,
+ 0xbf8cc07f, 0xbef4007e,
+ 0x8675ff7f, 0x0000ffff,
+ 0x8775ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x00807fac, 0xbef1007c,
+ 0xbef00080, 0xb8f02a05,
+ 0x80708170, 0x8e708a70,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc0070, 0xc0611c7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611b7a, 0x0000007c,
+ 0xc0611b3a, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611bba,
+ 0xbefc0070, 0xc0611b7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611bfa, 0x0000007c,
+ 0xc0611bba, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611e3a,
- 0x0000007c, 0xbf8cc07f,
- 0x80708470, 0xbefc007e,
- 0xb8fbf803, 0xbefe007c,
- 0xbefc0070, 0xc0611efa,
+ 0xbefc0070, 0xc0611bfa,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611a3a, 0x0000007c,
+ 0xc0611e3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8fbf803,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611efa, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611a7a,
+ 0xbefc0070, 0xc0611a3a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
- 0xb8f1f801, 0xbefe007c,
- 0xbefc0070, 0xc0611c7a,
- 0x0000007c, 0xbf8cc07f,
- 0x80708470, 0xbefc007e,
- 0x867aff7f, 0x04000000,
- 0xbeef0080, 0x876f6f7a,
- 0xb8f02a05, 0x80708170,
- 0x8e708a70, 0xb8fb1605,
- 0x807b817b, 0x8e7b847b,
- 0x8e76827b, 0xbef600ff,
- 0x01000000, 0xbef20174,
- 0x80747074, 0x82758075,
- 0xbefc0080, 0xbf800000,
- 0xbe802b00, 0xbe822b02,
- 0xbe842b04, 0xbe862b06,
- 0xbe882b08, 0xbe8a2b0a,
- 0xbe8c2b0c, 0xbe8e2b0e,
- 0xc06b003a, 0x00000000,
- 0xbf8cc07f, 0xc06b013a,
- 0x00000010, 0xbf8cc07f,
- 0xc06b023a, 0x00000020,
- 0xbf8cc07f, 0xc06b033a,
- 0x00000030, 0xbf8cc07f,
- 0x8074c074, 0x82758075,
- 0x807c907c, 0xbf0a7b7c,
- 0xbf85ffe7, 0xbef40172,
- 0xbef00080, 0xbefe00c1,
- 0xbeff00c1, 0xbee80080,
- 0xbee90080, 0xbef600ff,
- 0x01000000, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf85004d,
- 0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
- 0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611a7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8f1f801,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611c7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0x867aff7f,
+ 0x04000000, 0xbeef0080,
+ 0x876f6f7a, 0xb8f02a05,
+ 0x80708170, 0x8e708a70,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b847b, 0x8e76827b,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747074,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a7b7c, 0xbf85ffe7,
+ 0xbef40172, 0xbef00080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf85004d, 0xbe840080,
+ 0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
+ 0x00000900, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
+ 0xd2890000, 0x00000902,
0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
+ 0x00000902, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0xbf820008, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0xbefe00c1,
- 0xbeff00c1, 0xb8fb4306,
- 0x867bc17b, 0xbf840063,
- 0xbf8a0000, 0x867aff6f,
- 0x04000000, 0xbf84005f,
- 0x8e7b867b, 0x8e7b827b,
- 0xbef6007b, 0xb8f02a05,
- 0x80708170, 0x8e708a70,
- 0xb8fa1605, 0x807a817a,
- 0x8e7a867a, 0x80707a70,
- 0x8070ff70, 0x00000080,
- 0xbef600ff, 0x01000000,
- 0xbefc0080, 0xd28c0002,
- 0x000100c1, 0xd28d0003,
- 0x000204c1, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850030,
- 0x24040682, 0xd86e4000,
- 0x00000002, 0xbf8cc07f,
0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
+ 0xbf84ffee, 0xbf820008,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8fb4306, 0x867bc17b,
+ 0xbf840063, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+ 0xbf84005f, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02a05, 0x80708170,
+ 0x8e708a70, 0xb8fa1605,
+ 0x807a817a, 0x8e7a867a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xd28c0002, 0x000100c1,
+ 0xd28d0003, 0x000204c1,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850030, 0x24040682,
+ 0xd86e4000, 0x00000002,
+ 0xbf8cc07f, 0xbe840080,
+ 0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
+ 0x00000900, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0x680404ff, 0x00000200,
+ 0xbe840080, 0xd2890000,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
+ 0x80048104, 0xd2890002,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x680404ff,
+ 0x00000200, 0xd0c9006a,
+ 0x0000f702, 0xbf87ffd2,
+ 0xbf820015, 0xd1060002,
+ 0x00011103, 0x7e0602ff,
+ 0x00000200, 0xbefc00ff,
+ 0x00010000, 0xbe800077,
+ 0x8677ff77, 0xff7fffff,
+ 0x8777ff77, 0x00058000,
+ 0xd8ec0000, 0x00000002,
+ 0xbf8cc07f, 0xe0765000,
+ 0x701d0002, 0x68040702,
0xd0c9006a, 0x0000f702,
- 0xbf87ffd2, 0xbf820015,
- 0xd1060002, 0x00011103,
- 0x7e0602ff, 0x00000200,
- 0xbefc00ff, 0x00010000,
- 0xbe800077, 0x8677ff77,
- 0xff7fffff, 0x8777ff77,
- 0x00058000, 0xd8ec0000,
- 0x00000002, 0xbf8cc07f,
- 0xe0765000, 0x701d0002,
- 0x68040702, 0xd0c9006a,
- 0x0000f702, 0xbf87fff7,
- 0xbef70000, 0xbef000ff,
- 0x00000400, 0xbefe00c1,
- 0xbeff00c1, 0xb8fb2a05,
- 0x807b817b, 0x8e7b827b,
- 0x8e76887b, 0xbef600ff,
+ 0xbf87fff7, 0xbef70000,
+ 0xbef000ff, 0x00000400,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8fb2a05, 0x807b817b,
+ 0x8e7b827b, 0xbef600ff,
0x01000000, 0xbefc0084,
0xbf0a7b7c, 0xbf84006d,
0xbf11017c, 0x807bff7b,
@@ -566,15 +572,11 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0x701d0300, 0x807c847c,
0x8070ff70, 0x00000400,
0xbf0a7b7c, 0xbf85ffef,
- 0xbf9c0000, 0xbf8200da,
+ 0xbf9c0000, 0xbf8200c7,
0xbef4007e, 0x8675ff7f,
0x0000ffff, 0x8775ff75,
0x00040000, 0xbef60080,
0xbef700ff, 0x00807fac,
- 0x866eff7f, 0x08000000,
- 0x8f6e836e, 0x87776e77,
- 0x866eff7f, 0x70000000,
- 0x8f6e816e, 0x87776e77,
0x866eff7f, 0x04000000,
0xbf84001e, 0xbefe00c1,
0xbeff00c1, 0xb8ef4306,
@@ -591,28 +593,28 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0x781d0000, 0x807cff7c,
0x00000200, 0x8078ff78,
0x00000200, 0xbf0a6f7c,
- 0xbf85fff6, 0xbef80080,
- 0xbefe00c1, 0xbeff00c1,
- 0xb8ef2a05, 0x806f816f,
- 0x8e6f826f, 0x8e76886f,
- 0xbef600ff, 0x01000000,
- 0xbeee0078, 0x8078ff78,
- 0x00000400, 0xbefc0084,
- 0xbf11087c, 0x806fff6f,
- 0x00008000, 0xe0524000,
- 0x781d0000, 0xe0524100,
- 0x781d0100, 0xe0524200,
- 0x781d0200, 0xe0524300,
- 0x781d0300, 0xbf8c0f70,
- 0x7e000300, 0x7e020301,
- 0x7e040302, 0x7e060303,
- 0x807c847c, 0x8078ff78,
- 0x00000400, 0xbf0a6f7c,
- 0xbf85ffee, 0xbf9c0000,
- 0xe0524000, 0x6e1d0000,
- 0xe0524100, 0x6e1d0100,
- 0xe0524200, 0x6e1d0200,
- 0xe0524300, 0x6e1d0300,
+ 0xbf85fff6, 0xbefe00c1,
+ 0xbeff00c1, 0xbef600ff,
+ 0x01000000, 0xb8ef2a05,
+ 0x806f816f, 0x8e6f826f,
+ 0x806fff6f, 0x00008000,
+ 0xbef80080, 0xbeee0078,
+ 0x8078ff78, 0x00000400,
+ 0xbefc0084, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0x7e000300,
+ 0x7e020301, 0x7e040302,
+ 0x7e060303, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffee,
+ 0xbf9c0000, 0xe0524000,
+ 0x6e1d0000, 0xe0524100,
+ 0x6e1d0100, 0xe0524200,
+ 0x6e1d0200, 0xe0524300,
+ 0x6e1d0300, 0xbf8c0f70,
0xb8f82a05, 0x80788178,
0x8e788a78, 0xb8ee1605,
0x806e816e, 0x8e6e866e,
@@ -663,90 +665,101 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xc00b1c37, 0x00000050,
0xc00b1d37, 0x00000060,
0xc0031e77, 0x00000074,
- 0xbf8cc07f, 0x866fff6d,
- 0xf8000000, 0x8f6f9b6f,
- 0x8e6f906f, 0xbeee0080,
- 0x876e6f6e, 0x866fff6d,
- 0x04000000, 0x8f6f9a6f,
- 0x8e6f8f6f, 0x876e6f6e,
- 0x866fff7a, 0x00800000,
- 0x8f6f976f, 0xb96ef807,
- 0x866dff6d, 0x0000ffff,
- 0x86fe7e7e, 0x86ea6a6a,
- 0x8f6e837a, 0xb96ee0c2,
- 0xbf800002, 0xb97a0002,
- 0xbf8a0000, 0x95806f6c,
- 0xbf810000, 0x00000000,
+ 0xbf8cc07f, 0x8f6e8b77,
+ 0x866eff6e, 0x001f8000,
+ 0xb96ef807, 0x866dff6d,
+ 0x0000ffff, 0x86fe7e7e,
+ 0x86ea6a6a, 0x8f6e837a,
+ 0xb96ee0c2, 0xbf800002,
+ 0xb97a0002, 0xbf8a0000,
+ 0xbe801f6c, 0xbf810000,
};
static const uint32_t cwsr_trap_nv1x_hex[] = {
- 0xbf820001, 0xbf8201cd,
+ 0xbf820001, 0xbf8201f1,
0xb0804004, 0xb978f802,
- 0x8a788678, 0xb96ef801,
- 0x876eff6e, 0x00000800,
- 0xbf840003, 0x876eff78,
+ 0x8a78ff78, 0x00020006,
+ 0xb97bf803, 0x876eff78,
0x00002000, 0xbf840009,
- 0xb97bf803, 0x876eff7b,
- 0x00000400, 0xbf850033,
- 0x876eff7b, 0x00000100,
- 0xbf840002, 0x8878ff78,
- 0x00002000, 0x8a77ff77,
- 0xff000000, 0xb96ef807,
- 0x876fff6e, 0x02000000,
- 0x8f6f866f, 0x88776f77,
- 0x876fff6e, 0x003f8000,
- 0x8f6f896f, 0x88776f77,
- 0x8a6eff6e, 0x023f8000,
- 0xb9eef807, 0xb97af812,
+ 0x876eff6d, 0x00ff0000,
+ 0xbf85001e, 0x876eff7b,
+ 0x00000400, 0xbf850057,
+ 0xbf8e0010, 0xb97bf803,
+ 0xbf82fffa, 0x876eff7b,
+ 0x00000900, 0xbf850015,
+ 0x876eff7b, 0x000071ff,
+ 0xbf840008, 0x876fff7b,
+ 0x00007080, 0xbf840001,
+ 0xbeee1d87, 0xb96ff801,
+ 0x8f6e8c6e, 0x876e6f6e,
+ 0xbf85000a, 0x876eff6d,
+ 0x00ff0000, 0xbf850007,
+ 0xb96ef801, 0x876eff6e,
+ 0x00000800, 0xbf850003,
+ 0x876eff7b, 0x00000400,
+ 0xbf85003c, 0x8a77ff77,
+ 0xff000000, 0xb97af807,
+ 0x877bff7a, 0x02000000,
+ 0x8f7b867b, 0x88777b77,
+ 0x877bff7a, 0x003f8000,
+ 0x8f7b897b, 0x88777b77,
+ 0x8a7aff7a, 0x023f8000,
+ 0xb9faf807, 0xb97af812,
0xb97bf813, 0x8ffa887a,
- 0xf4051bbd, 0xfa000000,
- 0xbf8cc07f, 0xf4051ebd,
- 0xfa000008, 0xbf8cc07f,
- 0x87ee6e6e, 0xbf840001,
- 0xbe80206e, 0xb97bf803,
- 0x877bff7b, 0x000001ff,
+ 0xf4011bbd, 0xfa000010,
+ 0xbf8cc07f, 0x8f6e976e,
+ 0x8a77ff77, 0x00800000,
+ 0x88776e77, 0xf4051bbd,
+ 0xfa000000, 0xbf8cc07f,
+ 0xf4051ebd, 0xfa000008,
+ 0xbf8cc07f, 0x87ee6e6e,
+ 0xbf840001, 0xbe80206e,
+ 0x876eff6d, 0x01ff0000,
+ 0xbf850005, 0x8878ff78,
+ 0x00002000, 0x80ec886c,
+ 0x82ed806d, 0xbf820005,
+ 0x876eff6d, 0x01000000,
0xbf850002, 0x806c846c,
0x826d806d, 0x876dff6d,
- 0x0000ffff, 0x906e8977,
- 0x876fff6e, 0x003f8000,
- 0x906e8677, 0x876eff6e,
- 0x02000000, 0x886e6f6e,
- 0xb9eef807, 0x87fe7e7e,
+ 0x0000ffff, 0x907a8977,
+ 0x877bff7a, 0x003f8000,
+ 0x907a8677, 0x877aff7a,
+ 0x02000000, 0x887a7b7a,
+ 0xb9faf807, 0x87fe7e7e,
0x87ea6a6a, 0xb9f8f802,
0xbe80226c, 0x876dff6d,
0x0000ffff, 0xbefa0380,
- 0xb9fa0283, 0xb97a2c07,
- 0x8f7a9a7a, 0x886d7a6d,
- 0xb97a03c7, 0x8f7a997a,
- 0x886d7a6d, 0xb97a0647,
- 0x8f7a987a, 0x886d7a6d,
- 0xb97af807, 0x877aff7a,
- 0x00007fff, 0xb9faf807,
- 0xbeee037e, 0xbeef037f,
- 0xbefe0480, 0xbf900004,
- 0xbf8e0002, 0xbf88fffe,
- 0xb97b02dc, 0x8f7b997b,
- 0x887b7b7f, 0xb97a2a05,
+ 0xb9fa0283, 0x8a77ff77,
+ 0xff000000, 0xb97af807,
+ 0x877bff7a, 0x02000000,
+ 0x8f7b867b, 0x88777b77,
+ 0x877bff7a, 0x003f8000,
+ 0x8f7b897b, 0x88777b77,
+ 0x8a7aff7a, 0x023f8000,
+ 0xb9faf807, 0xbeee037e,
+ 0xbeef037f, 0xbefe0480,
+ 0xbf900004, 0xbf8e0002,
+ 0xbf88fffe, 0x877aff7f,
+ 0x04000000, 0x8f7a857a,
+ 0x886d7a6d, 0xb97b02dc,
+ 0x8f7b997b, 0xb97a3a05,
0x807a817a, 0xbf0d997b,
0xbf850002, 0x8f7a897a,
0xbf820001, 0x8f7a8a7a,
- 0x877bff7f, 0x0000ffff,
- 0x807aff7a, 0x00000200,
- 0x807a7e7a, 0x827b807b,
- 0xf4491c3d, 0xfa000050,
- 0xf4491d3d, 0xfa000060,
- 0xf4411e7d, 0xfa000074,
- 0xbef4037e, 0x8775ff7f,
- 0x0000ffff, 0x8875ff75,
- 0x00040000, 0xbef60380,
- 0xbef703ff, 0x10807fac,
- 0x877aff7f, 0x08000000,
- 0x907a837a, 0x88777a77,
- 0x877aff7f, 0x70000000,
- 0x907a817a, 0x88777a77,
- 0xbef1037c, 0xbef00380,
- 0xb97302dc, 0x8f739973,
- 0x8873737f, 0xb97bf816,
+ 0xb97b1e06, 0x8f7b8a7b,
+ 0x807a7b7a, 0x877bff7f,
+ 0x0000ffff, 0x807aff7a,
+ 0x00000200, 0x807a7e7a,
+ 0x827b807b, 0xf4491c3d,
+ 0xfa000050, 0xf4491d3d,
+ 0xfa000060, 0xf4411e7d,
+ 0xfa000074, 0xbef4037e,
+ 0x8775ff7f, 0x0000ffff,
+ 0x8875ff75, 0x00040000,
+ 0xbef60380, 0xbef703ff,
+ 0x10807fac, 0xbef1037c,
+ 0xbef00380, 0xb97302dc,
+ 0x8f739973, 0xb97bf816,
0xba80f816, 0x00000000,
0xbefe03c1, 0x907c9973,
0x877c817c, 0xbf06817c,
@@ -763,7 +776,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xe0704100, 0x705d0100,
0xe0704200, 0x705d0200,
0xe0704300, 0x705d0300,
- 0xb9702a05, 0x80708170,
+ 0xb9703a05, 0x80708170,
0xbf0d9973, 0xbf850002,
0x8f708970, 0xbf820001,
0x8f708a70, 0xb97a1e06,
@@ -776,8 +789,9 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbefe037c, 0xbefc0370,
0xf4611b3a, 0xf8000000,
0x80708470, 0xbefc037e,
+ 0x8a7aff6d, 0x80000000,
0xbefe037c, 0xbefc0370,
- 0xf4611b7a, 0xf8000000,
+ 0xf4611eba, 0xf8000000,
0x80708470, 0xbefc037e,
0xbefe037c, 0xbefc0370,
0xf4611bba, 0xf8000000,
@@ -805,7 +819,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbefe037c, 0xbefc0370,
0xf4611c7a, 0xf8000000,
0x80708470, 0xbefc037e,
- 0xb9702a05, 0x80708170,
+ 0xb9703a05, 0x80708170,
0xbf0d9973, 0xbf850002,
0x8f708970, 0xbf820001,
0x8f708a70, 0xb97a1e06,
@@ -838,10 +852,10 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbf820001, 0xbeff03c1,
0xb97b4306, 0x877bc17b,
0xbf840044, 0xbf8a0000,
- 0x877aff73, 0x04000000,
+ 0x877aff6d, 0x80000000,
0xbf840040, 0x8f7b867b,
0x8f7b827b, 0xbef6037b,
- 0xb9702a05, 0x80708170,
+ 0xb9703a05, 0x80708170,
0xbf0d9973, 0xbf850002,
0x8f708970, 0xbf820001,
0x8f708a70, 0xb97a1e06,
@@ -877,7 +891,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbef003ff, 0x00000200,
0xbeff0380, 0xbf820003,
0xbef003ff, 0x00000400,
- 0xbeff03c1, 0xb97b2a05,
+ 0xbeff03c1, 0xb97b3a05,
0x807b817b, 0x8f7b827b,
0x907c9973, 0x877c817c,
0xbf06817c, 0xbf850017,
@@ -894,7 +908,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbf0a7b7c, 0xbf85ffef,
0xbf820025, 0xbef603ff,
0x01000000, 0xbefc0384,
- 0xbf0a7b7c, 0xbf840020,
+ 0xbf0a7b7c, 0xbf840011,
0x7e008700, 0x7e028701,
0x7e048702, 0x7e068703,
0xe0704000, 0x705d0000,
@@ -911,71 +925,69 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x705d0000, 0x807c817c,
0x8070ff70, 0x00000080,
0xbf0a7b7c, 0xbf85fff8,
- 0xbf820151, 0xbef4037e,
+ 0xbf820144, 0xbef4037e,
0x8775ff7f, 0x0000ffff,
0x8875ff75, 0x00040000,
0xbef60380, 0xbef703ff,
- 0x10807fac, 0x876eff7f,
- 0x08000000, 0x906e836e,
- 0x88776e77, 0x876eff7f,
- 0x70000000, 0x906e816e,
- 0x88776e77, 0xb97202dc,
- 0x8f729972, 0x8872727f,
- 0x876eff7f, 0x04000000,
- 0xbf840034, 0xbefe03c1,
- 0x907c9972, 0x877c817c,
- 0xbf06817c, 0xbf850002,
- 0xbeff0380, 0xbf820001,
- 0xbeff03c1, 0xb96f4306,
- 0x876fc16f, 0xbf840029,
- 0x8f6f866f, 0x8f6f826f,
- 0xbef6036f, 0xb9782a05,
- 0x80788178, 0xbf0d9972,
- 0xbf850002, 0x8f788978,
- 0xbf820001, 0x8f788a78,
- 0xb96e1e06, 0x8f6e8a6e,
- 0x80786e78, 0x8078ff78,
- 0x00000200, 0x8078ff78,
- 0x00000080, 0xbef603ff,
- 0x01000000, 0x907c9972,
- 0x877c817c, 0xbf06817c,
- 0xbefc0380, 0xbf850009,
- 0xe0310000, 0x781d0000,
- 0x807cff7c, 0x00000080,
- 0x8078ff78, 0x00000080,
- 0xbf0a6f7c, 0xbf85fff8,
- 0xbf820008, 0xe0310000,
- 0x781d0000, 0x807cff7c,
- 0x00000100, 0x8078ff78,
- 0x00000100, 0xbf0a6f7c,
- 0xbf85fff8, 0xbef80380,
+ 0x10807fac, 0xb97202dc,
+ 0x8f729972, 0x876eff7f,
+ 0x04000000, 0xbf840034,
0xbefe03c1, 0x907c9972,
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
- 0xb96f2a05, 0x806f816f,
- 0x8f6f826f, 0x907c9972,
- 0x877c817c, 0xbf06817c,
- 0xbf850021, 0xbef603ff,
- 0x01000000, 0xbeee0378,
+ 0xb96f4306, 0x876fc16f,
+ 0xbf840029, 0x8f6f866f,
+ 0x8f6f826f, 0xbef6036f,
+ 0xb9783a05, 0x80788178,
+ 0xbf0d9972, 0xbf850002,
+ 0x8f788978, 0xbf820001,
+ 0x8f788a78, 0xb96e1e06,
+ 0x8f6e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
- 0xbefc0384, 0xe0304000,
- 0x785d0000, 0xe0304080,
- 0x785d0100, 0xe0304100,
- 0x785d0200, 0xe0304180,
- 0x785d0300, 0xbf8c3f70,
- 0x7e008500, 0x7e028501,
- 0x7e048502, 0x7e068503,
- 0x807c847c, 0x8078ff78,
- 0x00000200, 0xbf0a6f7c,
- 0xbf85ffee, 0xe0304000,
- 0x6e5d0000, 0xe0304080,
- 0x6e5d0100, 0xe0304100,
- 0x6e5d0200, 0xe0304180,
- 0x6e5d0300, 0xbf820032,
+ 0x8078ff78, 0x00000080,
+ 0xbef603ff, 0x01000000,
+ 0x907c9972, 0x877c817c,
+ 0xbf06817c, 0xbefc0380,
+ 0xbf850009, 0xe0310000,
+ 0x781d0000, 0x807cff7c,
+ 0x00000080, 0x8078ff78,
+ 0x00000080, 0xbf0a6f7c,
+ 0xbf85fff8, 0xbf820008,
+ 0xe0310000, 0x781d0000,
+ 0x807cff7c, 0x00000100,
+ 0x8078ff78, 0x00000100,
+ 0xbf0a6f7c, 0xbf85fff8,
+ 0xbef80380, 0xbefe03c1,
+ 0x907c9972, 0x877c817c,
+ 0xbf06817c, 0xbf850002,
+ 0xbeff0380, 0xbf820001,
+ 0xbeff03c1, 0xb96f3a05,
+ 0x806f816f, 0x8f6f826f,
+ 0x907c9972, 0x877c817c,
+ 0xbf06817c, 0xbf850024,
+ 0xbef603ff, 0x01000000,
+ 0xbeee0378, 0x8078ff78,
+ 0x00000200, 0xbefc0384,
+ 0xbf0a6f7c, 0xbf840050,
+ 0xe0304000, 0x785d0000,
+ 0xe0304080, 0x785d0100,
+ 0xe0304100, 0x785d0200,
+ 0xe0304180, 0x785d0300,
+ 0xbf8c3f70, 0x7e008500,
+ 0x7e028501, 0x7e048502,
+ 0x7e068503, 0x807c847c,
+ 0x8078ff78, 0x00000200,
+ 0xbf0a6f7c, 0xbf85ffee,
+ 0xe0304000, 0x6e5d0000,
+ 0xe0304080, 0x6e5d0100,
+ 0xe0304100, 0x6e5d0200,
+ 0xe0304180, 0x6e5d0300,
+ 0xbf8c3f70, 0xbf820034,
0xbef603ff, 0x01000000,
0xbeee0378, 0x8078ff78,
0x00000400, 0xbefc0384,
+ 0xbf0a6f7c, 0xbf840012,
0xe0304000, 0x785d0000,
0xe0304100, 0x785d0100,
0xe0304200, 0x785d0200,
@@ -998,7 +1010,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x6e5d0100, 0xe0304200,
0x6e5d0200, 0xe0304300,
0x6e5d0300, 0xbf8c3f70,
- 0xb9782a05, 0x80788178,
+ 0xb9783a05, 0x80788178,
0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
0x8f788a78, 0xb96e1e06,
@@ -1025,7 +1037,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbe8c310c, 0xbe8e310e,
0xbf06807c, 0xbf84fff0,
0xba80f801, 0x00000000,
- 0xbf8a0000, 0xb9782a05,
+ 0xbf8a0000, 0xb9783a05,
0x80788178, 0xbf0d9972,
0xbf850002, 0x8f788978,
0xbf820001, 0x8f788a78,
@@ -1057,273 +1069,275 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xb9f9f816, 0x876f7bff,
0xfffff800, 0x906f8b6f,
0xb9efa2c3, 0xb9f3f801,
- 0xb96e2a05, 0x806e816e,
+ 0xb96e3a05, 0x806e816e,
0xbf0d9972, 0xbf850002,
0x8f6e896e, 0xbf820001,
- 0x8f6e8a6e, 0x806eff6e,
- 0x00000200, 0x806e746e,
- 0x826f8075, 0x876fff6f,
- 0x0000ffff, 0xf4091c37,
- 0xfa000050, 0xf4091d37,
- 0xfa000060, 0xf4011e77,
- 0xfa000074, 0xbf8cc07f,
- 0x876fff6d, 0xfc000000,
- 0x906f9a6f, 0x8f6f906f,
- 0xbeee0380, 0x886e6f6e,
- 0x876fff6d, 0x02000000,
- 0x906f996f, 0x8f6f8f6f,
- 0x886e6f6e, 0x876fff6d,
- 0x01000000, 0x906f986f,
- 0x8f6f996f, 0x886e6f6e,
- 0x876fff7a, 0x00800000,
- 0x906f976f, 0xb9eef807,
- 0x876dff6d, 0x0000ffff,
- 0x87fe7e7e, 0x87ea6a6a,
- 0xb9faf802, 0xbe80226c,
- 0xbf810000, 0xbf9f0000,
+ 0x8f6e8a6e, 0xb96f1e06,
+ 0x8f6f8a6f, 0x806e6f6e,
+ 0x806eff6e, 0x00000200,
+ 0x806e746e, 0x826f8075,
+ 0x876fff6f, 0x0000ffff,
+ 0xf4091c37, 0xfa000050,
+ 0xf4091d37, 0xfa000060,
+ 0xf4011e77, 0xfa000074,
+ 0xbf8cc07f, 0x906e8977,
+ 0x876fff6e, 0x003f8000,
+ 0x906e8677, 0x876eff6e,
+ 0x02000000, 0x886e6f6e,
+ 0xb9eef807, 0x876dff6d,
+ 0x0000ffff, 0x87fe7e7e,
+ 0x87ea6a6a, 0xb9faf802,
+ 0xbe80226c, 0xbf810000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0x00000000,
};
static const uint32_t cwsr_trap_arcturus_hex[] = {
- 0xbf820001, 0xbf8202c4,
- 0xb8f8f802, 0x89788678,
- 0xb8eef801, 0x866eff6e,
- 0x00000800, 0xbf840003,
+ 0xbf820001, 0xbf8202d0,
+ 0xb8f8f802, 0x8978ff78,
+ 0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
- 0xbf840016, 0xb8fbf803,
+ 0xbf840009, 0x866eff6d,
+ 0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
- 0xbf85003b, 0x866eff7b,
- 0x00000800, 0xbf850003,
- 0x866eff7b, 0x00000100,
- 0xbf84000c, 0x866eff78,
- 0x00002000, 0xbf840005,
- 0xbf8e0010, 0xb8eef803,
- 0x866eff6e, 0x00000400,
- 0xbf84fffb, 0x8778ff78,
- 0x00002000, 0x80ec886c,
- 0x82ed806d, 0xb8eef807,
- 0x866fff6e, 0x001f8000,
- 0x8e6f8b6f, 0x8977ff77,
- 0xfc000000, 0x87776f77,
- 0x896eff6e, 0x001f8000,
- 0xb96ef807, 0xb8faf812,
+ 0xbf850051, 0xbf8e0010,
+ 0xb8fbf803, 0xbf82fffa,
+ 0x866eff7b, 0x00000900,
+ 0xbf850015, 0x866eff7b,
+ 0x000071ff, 0xbf840008,
+ 0x866fff7b, 0x00007080,
+ 0xbf840001, 0xbeee1a87,
+ 0xb8eff801, 0x8e6e8c6e,
+ 0x866e6f6e, 0xbf85000a,
+ 0x866eff6d, 0x00ff0000,
+ 0xbf850007, 0xb8eef801,
+ 0x866eff6e, 0x00000800,
+ 0xbf850003, 0x866eff7b,
+ 0x00000400, 0xbf850036,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8977ff77, 0xfc000000,
+ 0x87777a77, 0xba7ff807,
+ 0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a,
- 0xc0071bbd, 0x00000000,
- 0xbf8cc07f, 0xc0071ebd,
- 0x00000008, 0xbf8cc07f,
- 0x86ee6e6e, 0xbf840001,
- 0xbe801d6e, 0xb8fbf803,
- 0x867bff7b, 0x000001ff,
+ 0xc0031bbd, 0x00000010,
+ 0xbf8cc07f, 0x8e6e976e,
+ 0x8977ff77, 0x00800000,
+ 0x87776e77, 0xc0071bbd,
+ 0x00000000, 0xbf8cc07f,
+ 0xc0071ebd, 0x00000008,
+ 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf840001, 0xbe801d6e,
+ 0x866eff6d, 0x01ff0000,
+ 0xbf850005, 0x8778ff78,
+ 0x00002000, 0x80ec886c,
+ 0x82ed806d, 0xbf820005,
+ 0x866eff6d, 0x01000000,
0xbf850002, 0x806c846c,
0x826d806d, 0x866dff6d,
- 0x0000ffff, 0x8f6e8b77,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x86fe7e7e,
+ 0x0000ffff, 0x8f7a8b77,
+ 0x867aff7a, 0x001f8000,
+ 0xb97af807, 0x86fe7e7e,
0x86ea6a6a, 0x8f6e8378,
0xb96ee0c2, 0xbf800002,
0xb9780002, 0xbe801f6c,
0x866dff6d, 0x0000ffff,
0xbefa0080, 0xb97a0283,
- 0xb8fa2407, 0x8e7a9b7a,
- 0x876d7a6d, 0xb8fa03c7,
- 0x8e7a9a7a, 0x876d7a6d,
0xb8faf807, 0x867aff7a,
- 0x00007fff, 0xb97af807,
- 0xbeee007e, 0xbeef007f,
- 0xbefe0180, 0xbf900004,
- 0x877a8478, 0xb97af802,
- 0xbf8e0002, 0xbf88fffe,
- 0xb8fa2a05, 0x807a817a,
- 0x8e7a8a7a, 0x8e7a817a,
- 0xb8fb1605, 0x807b817b,
- 0x8e7b867b, 0x807a7b7a,
- 0x807a7e7a, 0x827b807f,
- 0x867bff7b, 0x0000ffff,
- 0xc04b1c3d, 0x00000050,
- 0xbf8cc07f, 0xc04b1d3d,
- 0x00000060, 0xbf8cc07f,
- 0xc0431e7d, 0x00000074,
- 0xbf8cc07f, 0xbef4007e,
- 0x8675ff7f, 0x0000ffff,
- 0x8775ff75, 0x00040000,
- 0xbef60080, 0xbef700ff,
- 0x00807fac, 0x867aff7f,
- 0x08000000, 0x8f7a837a,
- 0x87777a77, 0x867aff7f,
- 0x70000000, 0x8f7a817a,
- 0x87777a77, 0xbef1007c,
- 0xbef00080, 0xb8f02a05,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xbefe007c, 0xbefc0070,
- 0xc0611c7a, 0x0000007c,
- 0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611b3a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8977ff77, 0xfc000000,
+ 0x87777a77, 0xba7ff807,
+ 0x00000000, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbf900004, 0x877a8478,
+ 0xb97af802, 0xbf8e0002,
+ 0xbf88fffe, 0xb8fa2a05,
+ 0x807a817a, 0x8e7a8a7a,
+ 0x8e7a817a, 0xb8fb1605,
+ 0x807b817b, 0x8e7b867b,
+ 0x807a7b7a, 0x807a7e7a,
+ 0x827b807f, 0x867bff7b,
+ 0x0000ffff, 0xc04b1c3d,
+ 0x00000050, 0xbf8cc07f,
+ 0xc04b1d3d, 0x00000060,
+ 0xbf8cc07f, 0xc0431e7d,
+ 0x00000074, 0xbf8cc07f,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0xbef1007c, 0xbef00080,
+ 0xb8f02a05, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc0070, 0xc0611c7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611b7a, 0x0000007c,
+ 0xc0611b3a, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611bba,
+ 0xbefc0070, 0xc0611b7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611bfa, 0x0000007c,
+ 0xc0611bba, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611e3a,
- 0x0000007c, 0xbf8cc07f,
- 0x80708470, 0xbefc007e,
- 0xb8fbf803, 0xbefe007c,
- 0xbefc0070, 0xc0611efa,
+ 0xbefc0070, 0xc0611bfa,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611a3a, 0x0000007c,
+ 0xc0611e3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8fbf803,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611efa, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611a7a,
- 0x0000007c, 0xbf8cc07f,
- 0x80708470, 0xbefc007e,
- 0xb8f1f801, 0xbefe007c,
- 0xbefc0070, 0xc0611c7a,
+ 0xbefc0070, 0xc0611a3a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
- 0x867aff7f, 0x04000000,
- 0xbeef0080, 0x876f6f7a,
- 0xb8f02a05, 0x80708170,
- 0x8e708a70, 0x8e708170,
- 0xb8fb1605, 0x807b817b,
- 0x8e7b847b, 0x8e76827b,
- 0xbef600ff, 0x01000000,
- 0xbef20174, 0x80747074,
- 0x82758075, 0xbefc0080,
- 0xbf800000, 0xbe802b00,
- 0xbe822b02, 0xbe842b04,
- 0xbe862b06, 0xbe882b08,
- 0xbe8a2b0a, 0xbe8c2b0c,
- 0xbe8e2b0e, 0xc06b003a,
- 0x00000000, 0xbf8cc07f,
- 0xc06b013a, 0x00000010,
- 0xbf8cc07f, 0xc06b023a,
- 0x00000020, 0xbf8cc07f,
- 0xc06b033a, 0x00000030,
- 0xbf8cc07f, 0x8074c074,
- 0x82758075, 0x807c907c,
- 0xbf0a7b7c, 0xbf85ffe7,
- 0xbef40172, 0xbef00080,
- 0xbefe00c1, 0xbeff00c1,
- 0xbee80080, 0xbee90080,
- 0xbef600ff, 0x01000000,
- 0x867aff78, 0x00400000,
- 0xbf850003, 0xb8faf803,
- 0x897a7aff, 0x10000000,
- 0xbf85004d, 0xbe840080,
- 0xd2890000, 0x00000900,
- 0x80048104, 0xd2890001,
- 0x00000900, 0x80048104,
- 0xd2890002, 0x00000900,
- 0x80048104, 0xd2890003,
- 0x00000900, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611a7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8f1f801,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611c7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0x867aff7f,
+ 0x04000000, 0xbeef0080,
+ 0x876f6f7a, 0xb8f02a05,
+ 0x80708170, 0x8e708a70,
+ 0x8e708170, 0xb8fb1605,
+ 0x807b817b, 0x8e7b847b,
+ 0x8e76827b, 0xbef600ff,
+ 0x01000000, 0xbef20174,
+ 0x80747074, 0x82758075,
+ 0xbefc0080, 0xbf800000,
+ 0xbe802b00, 0xbe822b02,
+ 0xbe842b04, 0xbe862b06,
+ 0xbe882b08, 0xbe8a2b0a,
+ 0xbe8c2b0c, 0xbe8e2b0e,
+ 0xc06b003a, 0x00000000,
+ 0xbf8cc07f, 0xc06b013a,
+ 0x00000010, 0xbf8cc07f,
+ 0xc06b023a, 0x00000020,
+ 0xbf8cc07f, 0xc06b033a,
+ 0x00000030, 0xbf8cc07f,
+ 0x8074c074, 0x82758075,
+ 0x807c907c, 0xbf0a7b7c,
+ 0xbf85ffe7, 0xbef40172,
+ 0xbef00080, 0xbefe00c1,
+ 0xbeff00c1, 0xbee80080,
+ 0xbee90080, 0xbef600ff,
+ 0x01000000, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf85004d,
0xbe840080, 0xd2890000,
- 0x00000901, 0x80048104,
- 0xd2890001, 0x00000901,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
0x80048104, 0xd2890002,
- 0x00000901, 0x80048104,
- 0xd2890003, 0x00000901,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000902,
+ 0xd2890000, 0x00000901,
0x80048104, 0xd2890001,
- 0x00000902, 0x80048104,
- 0xd2890002, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
0x80048104, 0xd2890003,
- 0x00000902, 0x80048104,
+ 0x00000901, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000903, 0x80048104,
- 0xd2890001, 0x00000903,
- 0x80048104, 0xd2890002,
- 0x00000903, 0x80048104,
- 0xd2890003, 0x00000903,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbf820008,
- 0xe0724000, 0x701d0000,
- 0xe0724100, 0x701d0100,
- 0xe0724200, 0x701d0200,
- 0xe0724300, 0x701d0300,
- 0xbefe00c1, 0xbeff00c1,
- 0xb8fb4306, 0x867bc17b,
- 0xbf840064, 0xbf8a0000,
- 0x867aff6f, 0x04000000,
- 0xbf840060, 0x8e7b867b,
- 0x8e7b827b, 0xbef6007b,
- 0xb8f02a05, 0x80708170,
- 0x8e708a70, 0x8e708170,
- 0xb8fa1605, 0x807a817a,
- 0x8e7a867a, 0x80707a70,
- 0x8070ff70, 0x00000080,
- 0xbef600ff, 0x01000000,
- 0xbefc0080, 0xd28c0002,
- 0x000100c1, 0xd28d0003,
- 0x000204c1, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850030,
- 0x24040682, 0xd86e4000,
- 0x00000002, 0xbf8cc07f,
- 0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
+ 0xd2890000, 0x00000903,
0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
+ 0x00000903, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbf820008, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb4306,
+ 0x867bc17b, 0xbf840064,
+ 0xbf8a0000, 0x867aff6f,
+ 0x04000000, 0xbf840060,
+ 0x8e7b867b, 0x8e7b827b,
+ 0xbef6007b, 0xb8f02a05,
+ 0x80708170, 0x8e708a70,
+ 0x8e708170, 0xb8fa1605,
+ 0x807a817a, 0x8e7a867a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xd28c0002, 0x000100c1,
+ 0xd28d0003, 0x000204c1,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850030, 0x24040682,
+ 0xd86e4000, 0x00000002,
+ 0xbf8cc07f, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0x680404ff, 0x00000200,
+ 0xbe840080, 0xd2890000,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
+ 0x80048104, 0xd2890002,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x680404ff,
+ 0x00000200, 0xd0c9006a,
+ 0x0000f702, 0xbf87ffd2,
+ 0xbf820015, 0xd1060002,
+ 0x00011103, 0x7e0602ff,
+ 0x00000200, 0xbefc00ff,
+ 0x00010000, 0xbe800077,
+ 0x8677ff77, 0xff7fffff,
+ 0x8777ff77, 0x00058000,
+ 0xd8ec0000, 0x00000002,
+ 0xbf8cc07f, 0xe0765000,
+ 0x701d0002, 0x68040702,
0xd0c9006a, 0x0000f702,
- 0xbf87ffd2, 0xbf820015,
- 0xd1060002, 0x00011103,
- 0x7e0602ff, 0x00000200,
- 0xbefc00ff, 0x00010000,
- 0xbe800077, 0x8677ff77,
- 0xff7fffff, 0x8777ff77,
- 0x00058000, 0xd8ec0000,
- 0x00000002, 0xbf8cc07f,
- 0xe0765000, 0x701d0002,
- 0x68040702, 0xd0c9006a,
- 0x0000f702, 0xbf87fff7,
- 0xbef70000, 0xbef000ff,
- 0x00000400, 0xbefe00c1,
- 0xbeff00c1, 0xb8fb2a05,
- 0x807b817b, 0x8e7b827b,
- 0x8e76887b, 0xbef600ff,
+ 0xbf87fff7, 0xbef70000,
+ 0xbef000ff, 0x00000400,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8fb2a05, 0x807b817b,
+ 0x8e7b827b, 0xbef600ff,
0x01000000, 0xbefc0084,
0xbf0a7b7c, 0xbf84006d,
0xbf11017c, 0x807bff7b,
@@ -1440,15 +1454,11 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0x701d0300, 0x807c847c,
0x8070ff70, 0x00000400,
0xbf0a7b7c, 0xbf85ffeb,
- 0xbf9c0000, 0xbf820106,
+ 0xbf9c0000, 0xbf8200e3,
0xbef4007e, 0x8675ff7f,
0x0000ffff, 0x8775ff75,
0x00040000, 0xbef60080,
0xbef700ff, 0x00807fac,
- 0x866eff7f, 0x08000000,
- 0x8f6e836e, 0x87776e77,
- 0x866eff7f, 0x70000000,
- 0x8f6e816e, 0x87776e77,
0x866eff7f, 0x04000000,
0xbf84001f, 0xbefe00c1,
0xbeff00c1, 0xb8ef4306,
@@ -1466,26 +1476,14 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0x807cff7c, 0x00000200,
0x8078ff78, 0x00000200,
0xbf0a6f7c, 0xbf85fff6,
- 0xbef80080, 0xbefe00c1,
- 0xbeff00c1, 0xb8ef2a05,
- 0x806f816f, 0x8e6f826f,
- 0x8e76886f, 0xbef90076,
+ 0xbefe00c1, 0xbeff00c1,
0xbef600ff, 0x01000000,
+ 0xb8ef2a05, 0x806f816f,
+ 0x8e6f826f, 0x806fff6f,
+ 0x00008000, 0xbef80080,
0xbeee0078, 0x8078ff78,
- 0x00000400, 0xbef30079,
- 0x8079ff79, 0x00000400,
- 0xbefc0084, 0xbf11087c,
- 0x806fff6f, 0x00008000,
- 0xe0524000, 0x791d0000,
- 0xe0524100, 0x791d0100,
- 0xe0524200, 0x791d0200,
- 0xe0524300, 0x791d0300,
- 0x8079ff79, 0x00000400,
- 0xbf8c0f70, 0xd3d94000,
- 0x18000100, 0xd3d94001,
- 0x18000101, 0xd3d94002,
- 0x18000102, 0xd3d94003,
- 0x18000103, 0xe0524000,
+ 0x00000400, 0xbefc0084,
+ 0xbf11087c, 0xe0524000,
0x781d0000, 0xe0524100,
0x781d0100, 0xe0524200,
0x781d0200, 0xe0524300,
@@ -1494,20 +1492,24 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0x7e040302, 0x7e060303,
0x807c847c, 0x8078ff78,
0x00000400, 0xbf0a6f7c,
- 0xbf85ffdb, 0xbf9c0000,
- 0xe0524000, 0x731d0000,
- 0xe0524100, 0x731d0100,
- 0xe0524200, 0x731d0200,
- 0xe0524300, 0x731d0300,
- 0xbf8c0f70, 0xd3d94000,
- 0x18000100, 0xd3d94001,
- 0x18000101, 0xd3d94002,
- 0x18000102, 0xd3d94003,
- 0x18000103, 0xe0524000,
- 0x6e1d0000, 0xe0524100,
- 0x6e1d0100, 0xe0524200,
- 0x6e1d0200, 0xe0524300,
- 0x6e1d0300, 0xb8f82a05,
+ 0xbf85ffee, 0xbefc0080,
+ 0xbf11087c, 0xe0524000,
+ 0x781d0000, 0xe0524100,
+ 0x781d0100, 0xe0524200,
+ 0x781d0200, 0xe0524300,
+ 0x781d0300, 0xbf8c0f70,
+ 0xd3d94000, 0x18000100,
+ 0xd3d94001, 0x18000101,
+ 0xd3d94002, 0x18000102,
+ 0xd3d94003, 0x18000103,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffea, 0xbf9c0000,
+ 0xe0524000, 0x6e1d0000,
+ 0xe0524100, 0x6e1d0100,
+ 0xe0524200, 0x6e1d0200,
+ 0xe0524300, 0x6e1d0300,
+ 0xbf8c0f70, 0xb8f82a05,
0x80788178, 0x8e788a78,
0x8e788178, 0xb8ee1605,
0x806e816e, 0x8e6e866e,
@@ -1559,224 +1561,268 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0xc00b1c37, 0x00000050,
0xc00b1d37, 0x00000060,
0xc0031e77, 0x00000074,
- 0xbf8cc07f, 0x866fff6d,
- 0xf8000000, 0x8f6f9b6f,
- 0x8e6f906f, 0xbeee0080,
- 0x876e6f6e, 0x866fff6d,
- 0x04000000, 0x8f6f9a6f,
- 0x8e6f8f6f, 0x876e6f6e,
- 0x866fff7a, 0x00800000,
- 0x8f6f976f, 0xb96ef807,
- 0x866dff6d, 0x0000ffff,
- 0x86fe7e7e, 0x86ea6a6a,
- 0x8f6e837a, 0xb96ee0c2,
- 0xbf800002, 0xb97a0002,
- 0xbf8a0000, 0x95806f6c,
- 0xbf810000, 0x00000000,
+ 0xbf8cc07f, 0x8f6e8b77,
+ 0x866eff6e, 0x001f8000,
+ 0xb96ef807, 0x866dff6d,
+ 0x0000ffff, 0x86fe7e7e,
+ 0x86ea6a6a, 0x8f6e837a,
+ 0xb96ee0c2, 0xbf800002,
+ 0xb97a0002, 0xbf8a0000,
+ 0xbe801f6c, 0xbf810000,
};
static const uint32_t cwsr_trap_aldebaran_hex[] = {
- 0xbf820001, 0xbf8202ce,
- 0xb8f8f802, 0x89788678,
- 0xb8eef801, 0x866eff6e,
- 0x00000800, 0xbf840003,
+ 0xbf820001, 0xbf8202db,
+ 0xb8f8f802, 0x8978ff78,
+ 0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
- 0xbf840016, 0xb8fbf803,
+ 0xbf840009, 0x866eff6d,
+ 0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
- 0xbf85003b, 0x866eff7b,
- 0x00000800, 0xbf850003,
- 0x866eff7b, 0x00000100,
- 0xbf84000c, 0x866eff78,
- 0x00002000, 0xbf840005,
- 0xbf8e0010, 0xb8eef803,
- 0x866eff6e, 0x00000400,
- 0xbf84fffb, 0x8778ff78,
- 0x00002000, 0x80ec886c,
- 0x82ed806d, 0xb8eef807,
- 0x866fff6e, 0x001f8000,
- 0x8e6f8b6f, 0x8977ff77,
- 0xfc000000, 0x87776f77,
- 0x896eff6e, 0x001f8000,
- 0xb96ef807, 0xb8faf812,
+ 0xbf850051, 0xbf8e0010,
+ 0xb8fbf803, 0xbf82fffa,
+ 0x866eff7b, 0x00000900,
+ 0xbf850015, 0x866eff7b,
+ 0x000071ff, 0xbf840008,
+ 0x866fff7b, 0x00007080,
+ 0xbf840001, 0xbeee1a87,
+ 0xb8eff801, 0x8e6e8c6e,
+ 0x866e6f6e, 0xbf85000a,
+ 0x866eff6d, 0x00ff0000,
+ 0xbf850007, 0xb8eef801,
+ 0x866eff6e, 0x00000800,
+ 0xbf850003, 0x866eff7b,
+ 0x00000400, 0xbf850036,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8977ff77, 0xfc000000,
+ 0x87777a77, 0xba7ff807,
+ 0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a,
- 0xc0071bbd, 0x00000000,
- 0xbf8cc07f, 0xc0071ebd,
- 0x00000008, 0xbf8cc07f,
- 0x86ee6e6e, 0xbf840001,
- 0xbe801d6e, 0xb8fbf803,
- 0x867bff7b, 0x000001ff,
+ 0xc0031bbd, 0x00000010,
+ 0xbf8cc07f, 0x8e6e976e,
+ 0x8977ff77, 0x00800000,
+ 0x87776e77, 0xc0071bbd,
+ 0x00000000, 0xbf8cc07f,
+ 0xc0071ebd, 0x00000008,
+ 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf840001, 0xbe801d6e,
+ 0x866eff6d, 0x01ff0000,
+ 0xbf850005, 0x8778ff78,
+ 0x00002000, 0x80ec886c,
+ 0x82ed806d, 0xbf820005,
+ 0x866eff6d, 0x01000000,
0xbf850002, 0x806c846c,
0x826d806d, 0x866dff6d,
- 0x0000ffff, 0x8f6e8b77,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x86fe7e7e,
+ 0x0000ffff, 0x8f7a8b77,
+ 0x867aff7a, 0x001f8000,
+ 0xb97af807, 0x86fe7e7e,
0x86ea6a6a, 0x8f6e8378,
0xb96ee0c2, 0xbf800002,
0xb9780002, 0xbe801f6c,
0x866dff6d, 0x0000ffff,
0xbefa0080, 0xb97a0283,
- 0xb8fa2407, 0x8e7a9b7a,
- 0x876d7a6d, 0xb8fa03c7,
- 0x8e7a9a7a, 0x876d7a6d,
0xb8faf807, 0x867aff7a,
- 0x00007fff, 0xb97af807,
- 0xbeee007e, 0xbeef007f,
- 0xbefe0180, 0xbf900004,
- 0x877a8478, 0xb97af802,
- 0xbf8e0002, 0xbf88fffe,
- 0xb8fa2985, 0x807a817a,
- 0x8e7a8a7a, 0x8e7a817a,
- 0xb8fb1605, 0x807b817b,
- 0x8e7b867b, 0x807a7b7a,
- 0x807a7e7a, 0x827b807f,
- 0x867bff7b, 0x0000ffff,
- 0xc04b1c3d, 0x00000050,
- 0xbf8cc07f, 0xc04b1d3d,
- 0x00000060, 0xbf8cc07f,
- 0xc0431e7d, 0x00000074,
- 0xbf8cc07f, 0xbef4007e,
- 0x8675ff7f, 0x0000ffff,
- 0x8775ff75, 0x00040000,
- 0xbef60080, 0xbef700ff,
- 0x00807fac, 0x867aff7f,
- 0x08000000, 0x8f7a837a,
- 0x87777a77, 0x867aff7f,
- 0x70000000, 0x8f7a817a,
- 0x87777a77, 0xbef1007c,
- 0xbef00080, 0xb8f02985,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xbefe007c, 0xbefc0070,
- 0xc0611c7a, 0x0000007c,
- 0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611b3a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8977ff77, 0xfc000000,
+ 0x87777a77, 0xba7ff807,
+ 0x00000000, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbf900004, 0x877a8478,
+ 0xb97af802, 0xbf8e0002,
+ 0xbf88fffe, 0xb8fa2985,
+ 0x807a817a, 0x8e7a8a7a,
+ 0x8e7a817a, 0xb8fb1605,
+ 0x807b817b, 0x8e7b867b,
+ 0x807a7b7a, 0x807a7e7a,
+ 0x827b807f, 0x867bff7b,
+ 0x0000ffff, 0xc04b1c3d,
+ 0x00000050, 0xbf8cc07f,
+ 0xc04b1d3d, 0x00000060,
+ 0xbf8cc07f, 0xc0431e7d,
+ 0x00000074, 0xbf8cc07f,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0xbef1007c, 0xbef00080,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc0070, 0xc0611c7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611b7a, 0x0000007c,
+ 0xc0611b3a, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611bba,
+ 0xbefc0070, 0xc0611b7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611bfa, 0x0000007c,
+ 0xc0611bba, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611e3a,
- 0x0000007c, 0xbf8cc07f,
- 0x80708470, 0xbefc007e,
- 0xb8fbf803, 0xbefe007c,
- 0xbefc0070, 0xc0611efa,
+ 0xbefc0070, 0xc0611bfa,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611a3a, 0x0000007c,
+ 0xc0611e3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8fbf803,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611efa, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611a7a,
- 0x0000007c, 0xbf8cc07f,
- 0x80708470, 0xbefc007e,
- 0xb8f1f801, 0xbefe007c,
- 0xbefc0070, 0xc0611c7a,
+ 0xbefc0070, 0xc0611a3a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
- 0x867aff7f, 0x04000000,
- 0xbeef0080, 0x876f6f7a,
- 0xb8f02985, 0x80708170,
- 0x8e708a70, 0x8e708170,
- 0xb8fb1605, 0x807b817b,
- 0x8e7b847b, 0x8e76827b,
- 0xbef600ff, 0x01000000,
- 0xbef20174, 0x80747074,
- 0x82758075, 0xbefc0080,
- 0xbf800000, 0xbe802b00,
- 0xbe822b02, 0xbe842b04,
- 0xbe862b06, 0xbe882b08,
- 0xbe8a2b0a, 0xbe8c2b0c,
- 0xbe8e2b0e, 0xc06b003a,
- 0x00000000, 0xbf8cc07f,
- 0xc06b013a, 0x00000010,
- 0xbf8cc07f, 0xc06b023a,
- 0x00000020, 0xbf8cc07f,
- 0xc06b033a, 0x00000030,
- 0xbf8cc07f, 0x8074c074,
- 0x82758075, 0x807c907c,
- 0xbf0a7b7c, 0xbf85ffe7,
- 0xbef40172, 0xbef00080,
- 0xbefe00c1, 0xbeff00c1,
- 0xbee80080, 0xbee90080,
- 0xbef600ff, 0x01000000,
- 0x867aff78, 0x00400000,
- 0xbf850003, 0xb8faf803,
- 0x897a7aff, 0x10000000,
- 0xbf85004d, 0xbe840080,
- 0xd2890000, 0x00000900,
- 0x80048104, 0xd2890001,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611a7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8f1f801,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611c7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0x867aff7f,
+ 0x04000000, 0xbeef0080,
+ 0x876f6f7a, 0xb8f02985,
+ 0x80708170, 0x8e708a70,
+ 0x8e708170, 0xb8fb1605,
+ 0x807b817b, 0x8e7b847b,
+ 0x8e76827b, 0xbef600ff,
+ 0x01000000, 0xbef20174,
+ 0x80747074, 0x82758075,
+ 0xbefc0080, 0xbf800000,
+ 0xbe802b00, 0xbe822b02,
+ 0xbe842b04, 0xbe862b06,
+ 0xbe882b08, 0xbe8a2b0a,
+ 0xbe8c2b0c, 0xbe8e2b0e,
+ 0xc06b003a, 0x00000000,
+ 0xbf8cc07f, 0xc06b013a,
+ 0x00000010, 0xbf8cc07f,
+ 0xc06b023a, 0x00000020,
+ 0xbf8cc07f, 0xc06b033a,
+ 0x00000030, 0xbf8cc07f,
+ 0x8074c074, 0x82758075,
+ 0x807c907c, 0xbf0a7b7c,
+ 0xbf85ffe7, 0xbef40172,
+ 0xbef00080, 0xbefe00c1,
+ 0xbeff00c1, 0xbee80080,
+ 0xbee90080, 0xbef600ff,
+ 0x01000000, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf85004d,
+ 0xbe840080, 0xd2890000,
0x00000900, 0x80048104,
- 0xd2890002, 0x00000900,
- 0x80048104, 0xd2890003,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000901,
+ 0x80048104, 0xd2890001,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
+ 0x80048104, 0xd2890003,
+ 0x00000901, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000901, 0x80048104,
- 0xd2890001, 0x00000901,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
0x80048104, 0xd2890002,
- 0x00000901, 0x80048104,
- 0xd2890003, 0x00000901,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000902,
+ 0xd2890000, 0x00000903,
0x80048104, 0xd2890001,
- 0x00000902, 0x80048104,
- 0xd2890002, 0x00000902,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
0x80048104, 0xd2890003,
- 0x00000902, 0x80048104,
+ 0x00000903, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbf820008, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb4306,
+ 0x867bc17b, 0xbf840064,
+ 0xbf8a0000, 0x867aff6f,
+ 0x04000000, 0xbf840060,
+ 0x8e7b867b, 0x8e7b827b,
+ 0xbef6007b, 0xb8f02985,
+ 0x80708170, 0x8e708a70,
+ 0x8e708170, 0xb8fa1605,
+ 0x807a817a, 0x8e7a867a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xd28c0002, 0x000100c1,
+ 0xd28d0003, 0x000204c1,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850030, 0x24040682,
+ 0xd86e4000, 0x00000002,
+ 0xbf8cc07f, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000903, 0x80048104,
- 0xd2890001, 0x00000903,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
0x80048104, 0xd2890002,
- 0x00000903, 0x80048104,
- 0xd2890003, 0x00000903,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbf820008,
- 0xe0724000, 0x701d0000,
- 0xe0724100, 0x701d0100,
- 0xe0724200, 0x701d0200,
- 0xe0724300, 0x701d0300,
+ 0xbf84ffee, 0x680404ff,
+ 0x00000200, 0xd0c9006a,
+ 0x0000f702, 0xbf87ffd2,
+ 0xbf820015, 0xd1060002,
+ 0x00011103, 0x7e0602ff,
+ 0x00000200, 0xbefc00ff,
+ 0x00010000, 0xbe800077,
+ 0x8677ff77, 0xff7fffff,
+ 0x8777ff77, 0x00058000,
+ 0xd8ec0000, 0x00000002,
+ 0xbf8cc07f, 0xe0765000,
+ 0x701d0002, 0x68040702,
+ 0xd0c9006a, 0x0000f702,
+ 0xbf87fff7, 0xbef70000,
+ 0xbef000ff, 0x00000400,
0xbefe00c1, 0xbeff00c1,
- 0xb8fb4306, 0x867bc17b,
- 0xbf840064, 0xbf8a0000,
- 0x867aff6f, 0x04000000,
- 0xbf840060, 0x8e7b867b,
- 0x8e7b827b, 0xbef6007b,
- 0xb8f02985, 0x80708170,
- 0x8e708a70, 0x8e708170,
- 0xb8fa1605, 0x807a817a,
- 0x8e7a867a, 0x80707a70,
- 0x8070ff70, 0x00000080,
- 0xbef600ff, 0x01000000,
- 0xbefc0080, 0xd28c0002,
- 0x000100c1, 0xd28d0003,
- 0x000204c1, 0x867aff78,
+ 0xb8fb2b05, 0x807b817b,
+ 0x8e7b827b, 0xbef600ff,
+ 0x01000000, 0xbefc0084,
+ 0xbf0a7b7c, 0xbf84006d,
+ 0xbf11017c, 0x807bff7b,
+ 0x00001000, 0x867aff78,
0x00400000, 0xbf850003,
0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850030,
- 0x24040682, 0xd86e4000,
- 0x00000002, 0xbf8cc07f,
+ 0x10000000, 0xbf850051,
0xbe840080, 0xd2890000,
0x00000900, 0x80048104,
0xd2890001, 0x00000900,
@@ -1796,31 +1842,51 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0x680404ff, 0x00000200,
- 0xd0c9006a, 0x0000f702,
- 0xbf87ffd2, 0xbf820015,
- 0xd1060002, 0x00011103,
- 0x7e0602ff, 0x00000200,
- 0xbefc00ff, 0x00010000,
- 0xbe800077, 0x8677ff77,
- 0xff7fffff, 0x8777ff77,
- 0x00058000, 0xd8ec0000,
- 0x00000002, 0xbf8cc07f,
- 0xe0765000, 0x701d0002,
- 0x68040702, 0xd0c9006a,
- 0x0000f702, 0xbf87fff7,
- 0xbef70000, 0xbef000ff,
- 0x00000400, 0xbefe00c1,
- 0xbeff00c1, 0xb8fb2b05,
- 0x807b817b, 0x8e7b827b,
- 0xbef600ff, 0x01000000,
- 0xbefc0084, 0xbf0a7b7c,
- 0xbf84006d, 0xbf11017c,
+ 0xbe840080, 0xd2890000,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
+ 0x80048104, 0xd2890002,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000903,
+ 0x80048104, 0xd2890001,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
+ 0x80048104, 0xd2890003,
+ 0x00000903, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0x807c847c, 0xbf0a7b7c,
+ 0xbf85ffb1, 0xbf9c0000,
+ 0xbf820012, 0x7e000300,
+ 0x7e020301, 0x7e040302,
+ 0x7e060303, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0x807c847c,
+ 0x8070ff70, 0x00000400,
+ 0xbf0a7b7c, 0xbf85ffef,
+ 0xbf9c0000, 0xb8fb2985,
+ 0x807b817b, 0x8e7b837b,
+ 0xb8fa2b05, 0x807a817a,
+ 0x8e7a827a, 0x80fb7a7b,
+ 0x867b7b7b, 0xbf84007a,
0x807bff7b, 0x00001000,
+ 0xbefc0080, 0xbf11017c,
0x867aff78, 0x00400000,
0xbf850003, 0xb8faf803,
0x897a7aff, 0x10000000,
- 0xbf850051, 0xbe840080,
+ 0xbf850059, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xbe840080,
0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
0x00000900, 0x80048104,
@@ -1859,233 +1925,178 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0x807c847c,
- 0xbf0a7b7c, 0xbf85ffb1,
- 0xbf9c0000, 0xbf820012,
- 0x7e000300, 0x7e020301,
- 0x7e040302, 0x7e060303,
+ 0xbf0a7b7c, 0xbf85ffa9,
+ 0xbf9c0000, 0xbf820016,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
0xe0724000, 0x701d0000,
0xe0724100, 0x701d0100,
0xe0724200, 0x701d0200,
0xe0724300, 0x701d0300,
0x807c847c, 0x8070ff70,
0x00000400, 0xbf0a7b7c,
- 0xbf85ffef, 0xbf9c0000,
- 0xb8fb2985, 0x807b817b,
- 0x8e7b837b, 0xb8fa2b05,
- 0x807a817a, 0x8e7a827a,
- 0x80fb7a7b, 0x867b7b7b,
- 0xbf84007a, 0x807bff7b,
- 0x00001000, 0xbefc0080,
- 0xbf11017c, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850059,
- 0xd3d84000, 0x18000100,
- 0xd3d84001, 0x18000101,
- 0xd3d84002, 0x18000102,
- 0xd3d84003, 0x18000103,
- 0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
- 0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
- 0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
- 0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
- 0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
- 0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
- 0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffa9, 0xbf9c0000,
- 0xbf820016, 0xd3d84000,
- 0x18000100, 0xd3d84001,
- 0x18000101, 0xd3d84002,
- 0x18000102, 0xd3d84003,
- 0x18000103, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffeb,
- 0xbf9c0000, 0xbf820101,
- 0xbef4007e, 0x8675ff7f,
- 0x0000ffff, 0x8775ff75,
- 0x00040000, 0xbef60080,
- 0xbef700ff, 0x00807fac,
- 0x866eff7f, 0x08000000,
- 0x8f6e836e, 0x87776e77,
- 0x866eff7f, 0x70000000,
- 0x8f6e816e, 0x87776e77,
- 0x866eff7f, 0x04000000,
- 0xbf84001f, 0xbefe00c1,
- 0xbeff00c1, 0xb8ef4306,
- 0x866fc16f, 0xbf84001a,
- 0x8e6f866f, 0x8e6f826f,
- 0xbef6006f, 0xb8f82985,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x8078ff78,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xe0510000, 0x781d0000,
- 0xe0510100, 0x781d0000,
- 0x807cff7c, 0x00000200,
- 0x8078ff78, 0x00000200,
- 0xbf0a6f7c, 0xbf85fff6,
+ 0xbf85ffeb, 0xbf9c0000,
+ 0xbf8200ee, 0xbef4007e,
+ 0x8675ff7f, 0x0000ffff,
+ 0x8775ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x00807fac, 0x866eff7f,
+ 0x04000000, 0xbf84001f,
0xbefe00c1, 0xbeff00c1,
+ 0xb8ef4306, 0x866fc16f,
+ 0xbf84001a, 0x8e6f866f,
+ 0x8e6f826f, 0xbef6006f,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x8078ff78, 0x00000080,
0xbef600ff, 0x01000000,
- 0xb8ef2b05, 0x806f816f,
- 0x8e6f826f, 0x806fff6f,
- 0x00008000, 0xbef80080,
- 0xbeee0078, 0x8078ff78,
- 0x00000400, 0xbefc0084,
+ 0xbefc0080, 0xe0510000,
+ 0x781d0000, 0xe0510100,
+ 0x781d0000, 0x807cff7c,
+ 0x00000200, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7c,
+ 0xbf85fff6, 0xbefe00c1,
+ 0xbeff00c1, 0xbef600ff,
+ 0x01000000, 0xb8ef2b05,
+ 0x806f816f, 0x8e6f826f,
+ 0x806fff6f, 0x00008000,
+ 0xbef80080, 0xbeee0078,
+ 0x8078ff78, 0x00000400,
+ 0xbefc0084, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0x7e000300,
+ 0x7e020301, 0x7e040302,
+ 0x7e060303, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffee,
+ 0xb8ef2985, 0x806f816f,
+ 0x8e6f836f, 0xb8f92b05,
+ 0x80798179, 0x8e798279,
+ 0x80ef796f, 0x866f6f6f,
+ 0xbf84001a, 0x806fff6f,
+ 0x00008000, 0xbefc0080,
0xbf11087c, 0xe0524000,
0x781d0000, 0xe0524100,
0x781d0100, 0xe0524200,
0x781d0200, 0xe0524300,
0x781d0300, 0xbf8c0f70,
- 0x7e000300, 0x7e020301,
- 0x7e040302, 0x7e060303,
+ 0xd3d94000, 0x18000100,
+ 0xd3d94001, 0x18000101,
+ 0xd3d94002, 0x18000102,
+ 0xd3d94003, 0x18000103,
0x807c847c, 0x8078ff78,
0x00000400, 0xbf0a6f7c,
- 0xbf85ffee, 0xb8ef2985,
- 0x806f816f, 0x8e6f836f,
- 0xb8f92b05, 0x80798179,
- 0x8e798279, 0x80ef796f,
- 0x866f6f6f, 0xbf84001a,
- 0x806fff6f, 0x00008000,
- 0xbefc0080, 0xbf11087c,
- 0xe0524000, 0x781d0000,
- 0xe0524100, 0x781d0100,
- 0xe0524200, 0x781d0200,
- 0xe0524300, 0x781d0300,
- 0xbf8c0f70, 0xd3d94000,
- 0x18000100, 0xd3d94001,
- 0x18000101, 0xd3d94002,
- 0x18000102, 0xd3d94003,
- 0x18000103, 0x807c847c,
- 0x8078ff78, 0x00000400,
- 0xbf0a6f7c, 0xbf85ffea,
- 0xbf9c0000, 0xe0524000,
- 0x6e1d0000, 0xe0524100,
- 0x6e1d0100, 0xe0524200,
- 0x6e1d0200, 0xe0524300,
- 0x6e1d0300, 0xbf8c0f70,
- 0xb8f82985, 0x80788178,
- 0x8e788a78, 0x8e788178,
- 0xb8ee1605, 0x806e816e,
- 0x8e6e866e, 0x80786e78,
- 0x80f8c078, 0xb8ef1605,
- 0x806f816f, 0x8e6f846f,
- 0x8e76826f, 0xbef600ff,
- 0x01000000, 0xbefc006f,
- 0xc031003a, 0x00000078,
- 0x80f8c078, 0xbf8cc07f,
- 0x80fc907c, 0xbf800000,
- 0xbe802d00, 0xbe822d02,
- 0xbe842d04, 0xbe862d06,
- 0xbe882d08, 0xbe8a2d0a,
- 0xbe8c2d0c, 0xbe8e2d0e,
- 0xbf06807c, 0xbf84fff0,
- 0xb8f82985, 0x80788178,
- 0x8e788a78, 0x8e788178,
- 0xb8ee1605, 0x806e816e,
- 0x8e6e866e, 0x80786e78,
- 0xbef60084, 0xbef600ff,
- 0x01000000, 0xc0211bfa,
+ 0xbf85ffea, 0xbf9c0000,
+ 0xe0524000, 0x6e1d0000,
+ 0xe0524100, 0x6e1d0100,
+ 0xe0524200, 0x6e1d0200,
+ 0xe0524300, 0x6e1d0300,
+ 0xbf8c0f70, 0xb8f82985,
+ 0x80788178, 0x8e788a78,
+ 0x8e788178, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x80f8c078,
+ 0xb8ef1605, 0x806f816f,
+ 0x8e6f846f, 0x8e76826f,
+ 0xbef600ff, 0x01000000,
+ 0xbefc006f, 0xc031003a,
+ 0x00000078, 0x80f8c078,
+ 0xbf8cc07f, 0x80fc907c,
+ 0xbf800000, 0xbe802d00,
+ 0xbe822d02, 0xbe842d04,
+ 0xbe862d06, 0xbe882d08,
+ 0xbe8a2d0a, 0xbe8c2d0c,
+ 0xbe8e2d0e, 0xbf06807c,
+ 0xbf84fff0, 0xb8f82985,
+ 0x80788178, 0x8e788a78,
+ 0x8e788178, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0xbef60084,
+ 0xbef600ff, 0x01000000,
+ 0xc0211bfa, 0x00000078,
+ 0x80788478, 0xc0211b3a,
0x00000078, 0x80788478,
- 0xc0211b3a, 0x00000078,
- 0x80788478, 0xc0211b7a,
+ 0xc0211b7a, 0x00000078,
+ 0x80788478, 0xc0211c3a,
0x00000078, 0x80788478,
- 0xc0211c3a, 0x00000078,
- 0x80788478, 0xc0211c7a,
+ 0xc0211c7a, 0x00000078,
+ 0x80788478, 0xc0211eba,
0x00000078, 0x80788478,
- 0xc0211eba, 0x00000078,
- 0x80788478, 0xc0211efa,
+ 0xc0211efa, 0x00000078,
+ 0x80788478, 0xc0211a3a,
0x00000078, 0x80788478,
- 0xc0211a3a, 0x00000078,
- 0x80788478, 0xc0211a7a,
+ 0xc0211a7a, 0x00000078,
+ 0x80788478, 0xc0211cfa,
0x00000078, 0x80788478,
- 0xc0211cfa, 0x00000078,
- 0x80788478, 0xbf8cc07f,
- 0xbefc006f, 0xbefe0070,
- 0xbeff0071, 0x866f7bff,
- 0x000003ff, 0xb96f4803,
- 0x866f7bff, 0xfffff800,
- 0x8f6f8b6f, 0xb96fa2c3,
- 0xb973f801, 0xb8ee2985,
- 0x806e816e, 0x8e6e8a6e,
- 0x8e6e816e, 0xb8ef1605,
- 0x806f816f, 0x8e6f866f,
- 0x806e6f6e, 0x806e746e,
- 0x826f8075, 0x866fff6f,
- 0x0000ffff, 0xc00b1c37,
- 0x00000050, 0xc00b1d37,
- 0x00000060, 0xc0031e77,
- 0x00000074, 0xbf8cc07f,
- 0x866fff6d, 0xf8000000,
- 0x8f6f9b6f, 0x8e6f906f,
- 0xbeee0080, 0x876e6f6e,
- 0x866fff6d, 0x04000000,
- 0x8f6f9a6f, 0x8e6f8f6f,
- 0x876e6f6e, 0x866fff7a,
- 0x00800000, 0x8f6f976f,
+ 0xbf8cc07f, 0xbefc006f,
+ 0xbefe0070, 0xbeff0071,
+ 0x866f7bff, 0x000003ff,
+ 0xb96f4803, 0x866f7bff,
+ 0xfffff800, 0x8f6f8b6f,
+ 0xb96fa2c3, 0xb973f801,
+ 0xb8ee2985, 0x806e816e,
+ 0x8e6e8a6e, 0x8e6e816e,
+ 0xb8ef1605, 0x806f816f,
+ 0x8e6f866f, 0x806e6f6e,
+ 0x806e746e, 0x826f8075,
+ 0x866fff6f, 0x0000ffff,
+ 0xc00b1c37, 0x00000050,
+ 0xc00b1d37, 0x00000060,
+ 0xc0031e77, 0x00000074,
+ 0xbf8cc07f, 0x8f6e8b77,
+ 0x866eff6e, 0x001f8000,
0xb96ef807, 0x866dff6d,
0x0000ffff, 0x86fe7e7e,
0x86ea6a6a, 0x8f6e837a,
0xb96ee0c2, 0xbf800002,
0xb97a0002, 0xbf8a0000,
- 0x95806f6c, 0xbf810000,
+ 0xbe801f6c, 0xbf810000,
};
static const uint32_t cwsr_trap_gfx10_hex[] = {
- 0xbf820001, 0xbf8201cf,
+ 0xbf820001, 0xbf82021c,
0xb0804004, 0xb978f802,
- 0x8a788678, 0xb96ef801,
- 0x876eff6e, 0x00000800,
- 0xbf840003, 0x876eff78,
+ 0x8a78ff78, 0x00020006,
+ 0xb97bf803, 0x876eff78,
0x00002000, 0xbf840009,
- 0xb97bf803, 0x876eff7b,
- 0x00000400, 0xbf85001d,
- 0x876eff7b, 0x00000100,
- 0xbf840002, 0x8878ff78,
- 0x00002000, 0xb97af812,
+ 0x876eff6d, 0x00ff0000,
+ 0xbf85001e, 0x876eff7b,
+ 0x00000400, 0xbf850041,
+ 0xbf8e0010, 0xb97bf803,
+ 0xbf82fffa, 0x876eff7b,
+ 0x00000900, 0xbf850015,
+ 0x876eff7b, 0x000071ff,
+ 0xbf840008, 0x876fff7b,
+ 0x00007080, 0xbf840001,
+ 0xbeee1d87, 0xb96ff801,
+ 0x8f6e8c6e, 0x876e6f6e,
+ 0xbf85000a, 0x876eff6d,
+ 0x00ff0000, 0xbf850007,
+ 0xb96ef801, 0x876eff6e,
+ 0x00000800, 0xbf850003,
+ 0x876eff7b, 0x00000400,
+ 0xbf850026, 0xb97af812,
0xb97bf813, 0x8ffa887a,
- 0xf4051bbd, 0xfa000000,
- 0xbf8cc07f, 0xf4051ebd,
- 0xfa000008, 0xbf8cc07f,
- 0x87ee6e6e, 0xbf840001,
- 0xbe80206e, 0xb97bf803,
- 0x877bff7b, 0x000001ff,
+ 0xf4011bbd, 0xfa000010,
+ 0xbf8cc07f, 0x8f6e976e,
+ 0x8a77ff77, 0x00800000,
+ 0x88776e77, 0xf4051bbd,
+ 0xfa000000, 0xbf8cc07f,
+ 0xf4051ebd, 0xfa000008,
+ 0xbf8cc07f, 0x87ee6e6e,
+ 0xbf840001, 0xbe80206e,
+ 0x876eff6d, 0x01ff0000,
+ 0xbf850005, 0x8878ff78,
+ 0x00002000, 0x80ec886c,
+ 0x82ed806d, 0xbf820005,
+ 0x876eff6d, 0x01000000,
0xbf850002, 0x806c846c,
0x826d806d, 0x876dff6d,
0x0000ffff, 0x87fe7e7e,
@@ -2095,40 +2106,58 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xb9fa0283, 0xbeee037e,
0xbeef037f, 0xbefe0480,
0xbf900004, 0xbf8cc07f,
+ 0x877aff7f, 0x04000000,
+ 0x8f7a857a, 0x886d7a6d,
+ 0xbefa037e, 0x877bff7f,
+ 0x0000ffff, 0xbefe03c1,
+ 0xbeff03c1, 0xdc5f8000,
+ 0x007a0000, 0x7e000280,
+ 0xbefe037a, 0xbeff037b,
0xb97b02dc, 0x8f7b997b,
- 0x887b7b7f, 0xb97a2a05,
- 0x807a817a, 0xbf0d997b,
- 0xbf850002, 0x8f7a897a,
- 0xbf820001, 0x8f7a8a7a,
+ 0xb97a3a05, 0x807a817a,
+ 0xbf0d997b, 0xbf850002,
+ 0x8f7a897a, 0xbf820001,
+ 0x8f7a8a7a, 0xb97b1e06,
+ 0x8f7b8a7b, 0x807a7b7a,
0x877bff7f, 0x0000ffff,
0x807aff7a, 0x00000200,
0x807a7e7a, 0x827b807b,
- 0xbef4037e, 0x8775ff7f,
- 0x0000ffff, 0x8875ff75,
- 0x00040000, 0xbef60380,
- 0xbef703ff, 0x10807fac,
- 0x877aff7f, 0x08000000,
- 0x907a837a, 0x88777a77,
- 0x877aff7f, 0x70000000,
- 0x907a817a, 0x88777a77,
- 0xbef1037c, 0xbef00380,
- 0xb97302dc, 0x8f739973,
- 0x8873737f, 0xbefe03c1,
+ 0xd7610000, 0x00010870,
+ 0xd7610000, 0x00010a71,
+ 0xd7610000, 0x00010c72,
+ 0xd7610000, 0x00010e73,
+ 0xd7610000, 0x00011074,
+ 0xd7610000, 0x00011275,
+ 0xd7610000, 0x00011476,
+ 0xd7610000, 0x00011677,
+ 0xd7610000, 0x00011a79,
+ 0xd7610000, 0x00011c7e,
+ 0xd7610000, 0x00011e7f,
+ 0xbefe03ff, 0x00003fff,
+ 0xbeff0380, 0xdc5f8040,
+ 0x007a0000, 0xd760007a,
+ 0x00011d00, 0xd760007b,
+ 0x00011f00, 0xbefe037a,
+ 0xbeff037b, 0xbef4037e,
+ 0x8775ff7f, 0x0000ffff,
+ 0x8875ff75, 0x00040000,
+ 0xbef60380, 0xbef703ff,
+ 0x10807fac, 0xbef1037c,
+ 0xbef00380, 0xb97302dc,
+ 0x8f739973, 0xbefe03c1,
0x907c9973, 0x877c817c,
0xbf06817c, 0xbf850002,
0xbeff0380, 0xbf820002,
- 0xbeff03c1, 0xbf82000b,
+ 0xbeff03c1, 0xbf820009,
0xbef603ff, 0x01000000,
- 0xe0704000, 0x705d0000,
0xe0704080, 0x705d0100,
0xe0704100, 0x705d0200,
0xe0704180, 0x705d0300,
- 0xbf82000a, 0xbef603ff,
- 0x01000000, 0xe0704000,
- 0x705d0000, 0xe0704100,
+ 0xbf820008, 0xbef603ff,
+ 0x01000000, 0xe0704100,
0x705d0100, 0xe0704200,
0x705d0200, 0xe0704300,
- 0x705d0300, 0xb9702a05,
+ 0x705d0300, 0xb9703a05,
0x80708170, 0xbf0d9973,
0xbf850002, 0x8f708970,
0xbf820001, 0x8f708a70,
@@ -2140,8 +2169,9 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbefc0380, 0xd7610002,
0x0000f871, 0x807c817c,
0xd7610002, 0x0000f86c,
- 0x807c817c, 0xd7610002,
- 0x0000f86d, 0x807c817c,
+ 0x807c817c, 0x8a7aff6d,
+ 0x80000000, 0xd7610002,
+ 0x0000f87a, 0x807c817c,
0xd7610002, 0x0000f86e,
0x807c817c, 0xd7610002,
0x0000f86f, 0x807c817c,
@@ -2156,160 +2186,157 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x0000f871, 0x807c817c,
0xb971f815, 0xd7610002,
0x0000f871, 0x807c817c,
+ 0xbefe03ff, 0x0000ffff,
0xbeff0380, 0xe0704000,
- 0x705d0200, 0xb9702a05,
- 0x80708170, 0xbf0d9973,
- 0xbf850002, 0x8f708970,
- 0xbf820001, 0x8f708a70,
- 0xb97a1e06, 0x8f7a8a7a,
- 0x80707a70, 0xbef603ff,
- 0x01000000, 0xbef90380,
- 0xbefc0380, 0xbf800000,
- 0xbe802f00, 0xbe822f02,
- 0xbe842f04, 0xbe862f06,
- 0xbe882f08, 0xbe8a2f0a,
- 0xbe8c2f0c, 0xbe8e2f0e,
- 0xd7610002, 0x0000f200,
- 0x80798179, 0xd7610002,
- 0x0000f201, 0x80798179,
- 0xd7610002, 0x0000f202,
- 0x80798179, 0xd7610002,
- 0x0000f203, 0x80798179,
- 0xd7610002, 0x0000f204,
+ 0x705d0200, 0xbefe03c1,
+ 0xb9703a05, 0x80708170,
+ 0xbf0d9973, 0xbf850002,
+ 0x8f708970, 0xbf820001,
+ 0x8f708a70, 0xb97a1e06,
+ 0x8f7a8a7a, 0x80707a70,
+ 0xbef603ff, 0x01000000,
+ 0xbef90380, 0xbefc0380,
+ 0xbf800000, 0xbe802f00,
+ 0xbe822f02, 0xbe842f04,
+ 0xbe862f06, 0xbe882f08,
+ 0xbe8a2f0a, 0xbe8c2f0c,
+ 0xbe8e2f0e, 0xd7610002,
+ 0x0000f200, 0x80798179,
+ 0xd7610002, 0x0000f201,
0x80798179, 0xd7610002,
- 0x0000f205, 0x80798179,
- 0xd7610002, 0x0000f206,
+ 0x0000f202, 0x80798179,
+ 0xd7610002, 0x0000f203,
0x80798179, 0xd7610002,
- 0x0000f207, 0x80798179,
- 0xd7610002, 0x0000f208,
+ 0x0000f204, 0x80798179,
+ 0xd7610002, 0x0000f205,
0x80798179, 0xd7610002,
- 0x0000f209, 0x80798179,
- 0xd7610002, 0x0000f20a,
+ 0x0000f206, 0x80798179,
+ 0xd7610002, 0x0000f207,
0x80798179, 0xd7610002,
- 0x0000f20b, 0x80798179,
- 0xd7610002, 0x0000f20c,
+ 0x0000f208, 0x80798179,
+ 0xd7610002, 0x0000f209,
0x80798179, 0xd7610002,
- 0x0000f20d, 0x80798179,
- 0xd7610002, 0x0000f20e,
+ 0x0000f20a, 0x80798179,
+ 0xd7610002, 0x0000f20b,
0x80798179, 0xd7610002,
- 0x0000f20f, 0x80798179,
- 0xbf06a079, 0xbf840006,
- 0xe0704000, 0x705d0200,
- 0x8070ff70, 0x00000080,
- 0xbef90380, 0x7e040280,
- 0x807c907c, 0xbf0aff7c,
- 0x00000060, 0xbf85ffbc,
- 0xbe802f00, 0xbe822f02,
- 0xbe842f04, 0xbe862f06,
- 0xbe882f08, 0xbe8a2f0a,
- 0xd7610002, 0x0000f200,
+ 0x0000f20c, 0x80798179,
+ 0xd7610002, 0x0000f20d,
0x80798179, 0xd7610002,
- 0x0000f201, 0x80798179,
- 0xd7610002, 0x0000f202,
+ 0x0000f20e, 0x80798179,
+ 0xd7610002, 0x0000f20f,
+ 0x80798179, 0xbf06a079,
+ 0xbf840006, 0xe0704000,
+ 0x705d0200, 0x8070ff70,
+ 0x00000080, 0xbef90380,
+ 0x7e040280, 0x807c907c,
+ 0xbf0aff7c, 0x00000060,
+ 0xbf85ffbc, 0xbe802f00,
+ 0xbe822f02, 0xbe842f04,
+ 0xbe862f06, 0xbe882f08,
+ 0xbe8a2f0a, 0xd7610002,
+ 0x0000f200, 0x80798179,
+ 0xd7610002, 0x0000f201,
0x80798179, 0xd7610002,
- 0x0000f203, 0x80798179,
- 0xd7610002, 0x0000f204,
+ 0x0000f202, 0x80798179,
+ 0xd7610002, 0x0000f203,
0x80798179, 0xd7610002,
- 0x0000f205, 0x80798179,
- 0xd7610002, 0x0000f206,
+ 0x0000f204, 0x80798179,
+ 0xd7610002, 0x0000f205,
0x80798179, 0xd7610002,
- 0x0000f207, 0x80798179,
- 0xd7610002, 0x0000f208,
+ 0x0000f206, 0x80798179,
+ 0xd7610002, 0x0000f207,
0x80798179, 0xd7610002,
- 0x0000f209, 0x80798179,
- 0xd7610002, 0x0000f20a,
+ 0x0000f208, 0x80798179,
+ 0xd7610002, 0x0000f209,
0x80798179, 0xd7610002,
- 0x0000f20b, 0x80798179,
- 0xe0704000, 0x705d0200,
+ 0x0000f20a, 0x80798179,
+ 0xd7610002, 0x0000f20b,
+ 0x80798179, 0xe0704000,
+ 0x705d0200, 0xbefe03c1,
+ 0x907c9973, 0x877c817c,
+ 0xbf06817c, 0xbf850002,
+ 0xbeff0380, 0xbf820001,
+ 0xbeff03c1, 0xb97b4306,
+ 0x877bc17b, 0xbf840044,
+ 0xbf8a0000, 0x877aff6d,
+ 0x80000000, 0xbf840040,
+ 0x8f7b867b, 0x8f7b827b,
+ 0xbef6037b, 0xb9703a05,
+ 0x80708170, 0xbf0d9973,
+ 0xbf850002, 0x8f708970,
+ 0xbf820001, 0x8f708a70,
+ 0xb97a1e06, 0x8f7a8a7a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000200, 0x8070ff70,
+ 0x00000080, 0xbef603ff,
+ 0x01000000, 0xd7650000,
+ 0x000100c1, 0xd7660000,
+ 0x000200c1, 0x16000084,
+ 0x907c9973, 0x877c817c,
+ 0xbf06817c, 0xbefc0380,
+ 0xbf850012, 0xbe8303ff,
+ 0x00000080, 0xbf800000,
+ 0xbf800000, 0xbf800000,
+ 0xd8d80000, 0x01000000,
+ 0xbf8c0000, 0xe0704000,
+ 0x705d0100, 0x807c037c,
+ 0x80700370, 0xd5250000,
+ 0x0001ff00, 0x00000080,
+ 0xbf0a7b7c, 0xbf85fff4,
+ 0xbf820011, 0xbe8303ff,
+ 0x00000100, 0xbf800000,
+ 0xbf800000, 0xbf800000,
+ 0xd8d80000, 0x01000000,
+ 0xbf8c0000, 0xe0704000,
+ 0x705d0100, 0x807c037c,
+ 0x80700370, 0xd5250000,
+ 0x0001ff00, 0x00000100,
+ 0xbf0a7b7c, 0xbf85fff4,
0xbefe03c1, 0x907c9973,
0x877c817c, 0xbf06817c,
- 0xbf850002, 0xbeff0380,
- 0xbf820001, 0xbeff03c1,
- 0xb97b4306, 0x877bc17b,
- 0xbf840044, 0xbf8a0000,
- 0x877aff73, 0x04000000,
- 0xbf840040, 0x8f7b867b,
- 0x8f7b827b, 0xbef6037b,
- 0xb9702a05, 0x80708170,
- 0xbf0d9973, 0xbf850002,
- 0x8f708970, 0xbf820001,
- 0x8f708a70, 0xb97a1e06,
- 0x8f7a8a7a, 0x80707a70,
- 0x8070ff70, 0x00000200,
- 0x8070ff70, 0x00000080,
- 0xbef603ff, 0x01000000,
- 0xd7650000, 0x000100c1,
- 0xd7660000, 0x000200c1,
- 0x16000084, 0x907c9973,
+ 0xbf850004, 0xbef003ff,
+ 0x00000200, 0xbeff0380,
+ 0xbf820003, 0xbef003ff,
+ 0x00000400, 0xbeff03c1,
+ 0xb97b3a05, 0x807b817b,
+ 0x8f7b827b, 0x907c9973,
0x877c817c, 0xbf06817c,
- 0xbefc0380, 0xbf850012,
- 0xbe8303ff, 0x00000080,
- 0xbf800000, 0xbf800000,
- 0xbf800000, 0xd8d80000,
- 0x01000000, 0xbf8c0000,
- 0xe0704000, 0x705d0100,
- 0x807c037c, 0x80700370,
- 0xd5250000, 0x0001ff00,
- 0x00000080, 0xbf0a7b7c,
- 0xbf85fff4, 0xbf820011,
- 0xbe8303ff, 0x00000100,
- 0xbf800000, 0xbf800000,
- 0xbf800000, 0xd8d80000,
- 0x01000000, 0xbf8c0000,
- 0xe0704000, 0x705d0100,
- 0x807c037c, 0x80700370,
- 0xd5250000, 0x0001ff00,
- 0x00000100, 0xbf0a7b7c,
- 0xbf85fff4, 0xbefe03c1,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850004,
- 0xbef003ff, 0x00000200,
- 0xbeff0380, 0xbf820003,
- 0xbef003ff, 0x00000400,
- 0xbeff03c1, 0xb97b2a05,
- 0x807b817b, 0x8f7b827b,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850017,
+ 0xbf850017, 0xbef603ff,
+ 0x01000000, 0xbefc0384,
+ 0xbf0a7b7c, 0xbf840037,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
+ 0xe0704000, 0x705d0000,
+ 0xe0704080, 0x705d0100,
+ 0xe0704100, 0x705d0200,
+ 0xe0704180, 0x705d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000200, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf820025,
0xbef603ff, 0x01000000,
0xbefc0384, 0xbf0a7b7c,
- 0xbf840037, 0x7e008700,
+ 0xbf840011, 0x7e008700,
0x7e028701, 0x7e048702,
0x7e068703, 0xe0704000,
- 0x705d0000, 0xe0704080,
- 0x705d0100, 0xe0704100,
- 0x705d0200, 0xe0704180,
+ 0x705d0000, 0xe0704100,
+ 0x705d0100, 0xe0704200,
+ 0x705d0200, 0xe0704300,
0x705d0300, 0x807c847c,
- 0x8070ff70, 0x00000200,
+ 0x8070ff70, 0x00000400,
0xbf0a7b7c, 0xbf85ffef,
- 0xbf820025, 0xbef603ff,
- 0x01000000, 0xbefc0384,
- 0xbf0a7b7c, 0xbf840020,
- 0x7e008700, 0x7e028701,
- 0x7e048702, 0x7e068703,
+ 0xb97b1e06, 0x877bc17b,
+ 0xbf84000c, 0x8f7b837b,
+ 0x807b7c7b, 0xbefe03c1,
+ 0xbeff0380, 0x7e008700,
0xe0704000, 0x705d0000,
- 0xe0704100, 0x705d0100,
- 0xe0704200, 0x705d0200,
- 0xe0704300, 0x705d0300,
- 0x807c847c, 0x8070ff70,
- 0x00000400, 0xbf0a7b7c,
- 0xbf85ffef, 0xb97b1e06,
- 0x877bc17b, 0xbf84000c,
- 0x8f7b837b, 0x807b7c7b,
- 0xbefe03c1, 0xbeff0380,
- 0x7e008700, 0xe0704000,
- 0x705d0000, 0x807c817c,
- 0x8070ff70, 0x00000080,
- 0xbf0a7b7c, 0xbf85fff8,
- 0xbf82013c, 0xbef4037e,
- 0x8775ff7f, 0x0000ffff,
- 0x8875ff75, 0x00040000,
- 0xbef60380, 0xbef703ff,
- 0x10807fac, 0x876eff7f,
- 0x08000000, 0x906e836e,
- 0x88776e77, 0x876eff7f,
- 0x70000000, 0x906e816e,
- 0x88776e77, 0xb97202dc,
- 0x8f729972, 0x8872727f,
+ 0x807c817c, 0x8070ff70,
+ 0x00000080, 0xbf0a7b7c,
+ 0xbf85fff8, 0xbf82013b,
+ 0xbef4037e, 0x8775ff7f,
+ 0x0000ffff, 0x8875ff75,
+ 0x00040000, 0xbef60380,
+ 0xbef703ff, 0x10807fac,
+ 0xb97202dc, 0x8f729972,
0x876eff7f, 0x04000000,
0xbf840034, 0xbefe03c1,
0x907c9972, 0x877c817c,
@@ -2318,7 +2345,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbeff03c1, 0xb96f4306,
0x876fc16f, 0xbf840029,
0x8f6f866f, 0x8f6f826f,
- 0xbef6036f, 0xb9782a05,
+ 0xbef6036f, 0xb9783a05,
0x80788178, 0xbf0d9972,
0xbf850002, 0x8f788978,
0xbf820001, 0x8f788a78,
@@ -2342,13 +2369,14 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
- 0xb96f2a05, 0x806f816f,
+ 0xb96f3a05, 0x806f816f,
0x8f6f826f, 0x907c9972,
0x877c817c, 0xbf06817c,
- 0xbf850021, 0xbef603ff,
+ 0xbf850024, 0xbef603ff,
0x01000000, 0xbeee0378,
0x8078ff78, 0x00000200,
- 0xbefc0384, 0xe0304000,
+ 0xbefc0384, 0xbf0a6f7c,
+ 0xbf840050, 0xe0304000,
0x785d0000, 0xe0304080,
0x785d0100, 0xe0304100,
0x785d0200, 0xe0304180,
@@ -2361,94 +2389,97 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x6e5d0000, 0xe0304080,
0x6e5d0100, 0xe0304100,
0x6e5d0200, 0xe0304180,
- 0x6e5d0300, 0xbf820032,
- 0xbef603ff, 0x01000000,
- 0xbeee0378, 0x8078ff78,
- 0x00000400, 0xbefc0384,
+ 0x6e5d0300, 0xbf8c3f70,
+ 0xbf820034, 0xbef603ff,
+ 0x01000000, 0xbeee0378,
+ 0x8078ff78, 0x00000400,
+ 0xbefc0384, 0xbf0a6f7c,
+ 0xbf840012, 0xe0304000,
+ 0x785d0000, 0xe0304100,
+ 0x785d0100, 0xe0304200,
+ 0x785d0200, 0xe0304300,
+ 0x785d0300, 0xbf8c3f70,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xb96f1e06,
+ 0x876fc16f, 0xbf84000e,
+ 0x8f6f836f, 0x806f7c6f,
+ 0xbefe03c1, 0xbeff0380,
0xe0304000, 0x785d0000,
- 0xe0304100, 0x785d0100,
- 0xe0304200, 0x785d0200,
- 0xe0304300, 0x785d0300,
0xbf8c3f70, 0x7e008500,
- 0x7e028501, 0x7e048502,
- 0x7e068503, 0x807c847c,
- 0x8078ff78, 0x00000400,
- 0xbf0a6f7c, 0xbf85ffee,
- 0xb96f1e06, 0x876fc16f,
- 0xbf84000e, 0x8f6f836f,
- 0x806f7c6f, 0xbefe03c1,
- 0xbeff0380, 0xe0304000,
- 0x785d0000, 0xbf8c3f70,
- 0x7e008500, 0x807c817c,
- 0x8078ff78, 0x00000080,
- 0xbf0a6f7c, 0xbf85fff7,
- 0xbeff03c1, 0xe0304000,
- 0x6e5d0000, 0xe0304100,
- 0x6e5d0100, 0xe0304200,
- 0x6e5d0200, 0xe0304300,
- 0x6e5d0300, 0xbf8c3f70,
- 0xb9782a05, 0x80788178,
+ 0x807c817c, 0x8078ff78,
+ 0x00000080, 0xbf0a6f7c,
+ 0xbf85fff7, 0xbeff03c1,
+ 0xe0304000, 0x6e5d0000,
+ 0xe0304100, 0x6e5d0100,
+ 0xe0304200, 0x6e5d0200,
+ 0xe0304300, 0x6e5d0300,
+ 0xbf8c3f70, 0xb9783a05,
+ 0x80788178, 0xbf0d9972,
+ 0xbf850002, 0x8f788978,
+ 0xbf820001, 0x8f788a78,
+ 0xb96e1e06, 0x8f6e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x80f8ff78,
+ 0x00000050, 0xbef603ff,
+ 0x01000000, 0xbefc03ff,
+ 0x0000006c, 0x80f89078,
+ 0xf429003a, 0xf0000000,
+ 0xbf8cc07f, 0x80fc847c,
+ 0xbf800000, 0xbe803100,
+ 0xbe823102, 0x80f8a078,
+ 0xf42d003a, 0xf0000000,
+ 0xbf8cc07f, 0x80fc887c,
+ 0xbf800000, 0xbe803100,
+ 0xbe823102, 0xbe843104,
+ 0xbe863106, 0x80f8c078,
+ 0xf431003a, 0xf0000000,
+ 0xbf8cc07f, 0x80fc907c,
+ 0xbf800000, 0xbe803100,
+ 0xbe823102, 0xbe843104,
+ 0xbe863106, 0xbe883108,
+ 0xbe8a310a, 0xbe8c310c,
+ 0xbe8e310e, 0xbf06807c,
+ 0xbf84fff0, 0xba80f801,
+ 0x00000000, 0xbf8a0000,
+ 0xb9783a05, 0x80788178,
0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
0x8f788a78, 0xb96e1e06,
0x8f6e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
- 0x80f8ff78, 0x00000050,
0xbef603ff, 0x01000000,
- 0xbefc03ff, 0x0000006c,
- 0x80f89078, 0xf429003a,
- 0xf0000000, 0xbf8cc07f,
- 0x80fc847c, 0xbf800000,
- 0xbe803100, 0xbe823102,
- 0x80f8a078, 0xf42d003a,
- 0xf0000000, 0xbf8cc07f,
- 0x80fc887c, 0xbf800000,
- 0xbe803100, 0xbe823102,
- 0xbe843104, 0xbe863106,
- 0x80f8c078, 0xf431003a,
- 0xf0000000, 0xbf8cc07f,
- 0x80fc907c, 0xbf800000,
- 0xbe803100, 0xbe823102,
- 0xbe843104, 0xbe863106,
- 0xbe883108, 0xbe8a310a,
- 0xbe8c310c, 0xbe8e310e,
- 0xbf06807c, 0xbf84fff0,
- 0xba80f801, 0x00000000,
- 0xbf8a0000, 0xb9782a05,
- 0x80788178, 0xbf0d9972,
- 0xbf850002, 0x8f788978,
- 0xbf820001, 0x8f788a78,
- 0xb96e1e06, 0x8f6e8a6e,
- 0x80786e78, 0x8078ff78,
- 0x00000200, 0xbef603ff,
- 0x01000000, 0xf4211bfa,
+ 0xf4211bfa, 0xf0000000,
+ 0x80788478, 0xf4211b3a,
0xf0000000, 0x80788478,
- 0xf4211b3a, 0xf0000000,
- 0x80788478, 0xf4211b7a,
+ 0xf4211b7a, 0xf0000000,
+ 0x80788478, 0xf4211c3a,
0xf0000000, 0x80788478,
- 0xf4211c3a, 0xf0000000,
- 0x80788478, 0xf4211c7a,
+ 0xf4211c7a, 0xf0000000,
+ 0x80788478, 0xf4211eba,
0xf0000000, 0x80788478,
- 0xf4211eba, 0xf0000000,
- 0x80788478, 0xf4211efa,
+ 0xf4211efa, 0xf0000000,
+ 0x80788478, 0xf4211e7a,
0xf0000000, 0x80788478,
- 0xf4211e7a, 0xf0000000,
- 0x80788478, 0xf4211cfa,
+ 0xf4211cfa, 0xf0000000,
+ 0x80788478, 0xf4211bba,
0xf0000000, 0x80788478,
+ 0xbf8cc07f, 0xb9eef814,
0xf4211bba, 0xf0000000,
0x80788478, 0xbf8cc07f,
- 0xb9eef814, 0xf4211bba,
- 0xf0000000, 0x80788478,
- 0xbf8cc07f, 0xb9eef815,
- 0xbefc036f, 0xbefe0370,
- 0xbeff0371, 0x876f7bff,
- 0x000003ff, 0xb9ef4803,
- 0x876f7bff, 0xfffff800,
- 0x906f8b6f, 0xb9efa2c3,
- 0xb9f3f801, 0xb96e2a05,
- 0x806e816e, 0xbf0d9972,
- 0xbf850002, 0x8f6e896e,
- 0xbf820001, 0x8f6e8a6e,
+ 0xb9eef815, 0xbefc036f,
+ 0xbefe0370, 0xbeff0371,
+ 0x876f7bff, 0x000003ff,
+ 0xb9ef4803, 0x876f7bff,
+ 0xfffff800, 0x906f8b6f,
+ 0xb9efa2c3, 0xb9f3f801,
+ 0xb96e3a05, 0x806e816e,
+ 0xbf0d9972, 0xbf850002,
+ 0x8f6e896e, 0xbf820001,
+ 0x8f6e8a6e, 0xb96f1e06,
+ 0x8f6f8a6f, 0x806e6f6e,
0x806eff6e, 0x00000200,
0x806e746e, 0x826f8075,
0x876fff6f, 0x0000ffff,
@@ -2463,3 +2494,445 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0x00000000,
};
+static const uint32_t cwsr_trap_gfx11_hex[] = {
+ 0xbfa00001, 0xbfa00221,
+ 0xb0804006, 0xb8f8f802,
+ 0x9178ff78, 0x00020006,
+ 0xb8fbf803, 0xbf0d9e6d,
+ 0xbfa10001, 0xbfbd0000,
+ 0xbf0d9f6d, 0xbfa20006,
+ 0x8b6eff78, 0x00002000,
+ 0xbfa10009, 0x8b6eff6d,
+ 0x00ff0000, 0xbfa2001e,
+ 0x8b6eff7b, 0x00000400,
+ 0xbfa20041, 0xbf830010,
+ 0xb8fbf803, 0xbfa0fffa,
+ 0x8b6eff7b, 0x00000900,
+ 0xbfa20015, 0x8b6eff7b,
+ 0x000071ff, 0xbfa10008,
+ 0x8b6fff7b, 0x00007080,
+ 0xbfa10001, 0xbeee1287,
+ 0xb8eff801, 0x846e8c6e,
+ 0x8b6e6f6e, 0xbfa2000a,
+ 0x8b6eff6d, 0x00ff0000,
+ 0xbfa20007, 0xb8eef801,
+ 0x8b6eff6e, 0x00000800,
+ 0xbfa20003, 0x8b6eff7b,
+ 0x00000400, 0xbfa20026,
+ 0xbefa4d82, 0xbf89fc07,
+ 0x84fa887a, 0xf4005bbd,
+ 0xf8000010, 0xbf89fc07,
+ 0x846e976e, 0x9177ff77,
+ 0x00800000, 0x8c776e77,
+ 0xf4045bbd, 0xf8000000,
+ 0xbf89fc07, 0xf4045ebd,
+ 0xf8000008, 0xbf89fc07,
+ 0x8bee6e6e, 0xbfa10001,
+ 0xbe80486e, 0x8b6eff6d,
+ 0x01ff0000, 0xbfa20005,
+ 0x8c78ff78, 0x00002000,
+ 0x80ec886c, 0x82ed806d,
+ 0xbfa00005, 0x8b6eff6d,
+ 0x01000000, 0xbfa20002,
+ 0x806c846c, 0x826d806d,
+ 0x8b6dff6d, 0x0000ffff,
+ 0x8bfe7e7e, 0x8bea6a6a,
+ 0xb978f802, 0xbe804a6c,
+ 0x8b6dff6d, 0x0000ffff,
+ 0xbefa0080, 0xb97a0283,
+ 0xbeee007e, 0xbeef007f,
+ 0xbefe0180, 0xbefe4d84,
+ 0xbf89fc07, 0x8b7aff7f,
+ 0x04000000, 0x847a857a,
+ 0x8c6d7a6d, 0xbefa007e,
+ 0x8b7bff7f, 0x0000ffff,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xdca6c000, 0x007a0000,
+ 0x7e000280, 0xbefe007a,
+ 0xbeff007b, 0xb8fb02dc,
+ 0x847b997b, 0xb8fa3b05,
+ 0x807a817a, 0xbf0d997b,
+ 0xbfa20002, 0x847a897a,
+ 0xbfa00001, 0x847a8a7a,
+ 0xb8fb1e06, 0x847b8a7b,
+ 0x807a7b7a, 0x8b7bff7f,
+ 0x0000ffff, 0x807aff7a,
+ 0x00000200, 0x807a7e7a,
+ 0x827b807b, 0xd7610000,
+ 0x00010870, 0xd7610000,
+ 0x00010a71, 0xd7610000,
+ 0x00010c72, 0xd7610000,
+ 0x00010e73, 0xd7610000,
+ 0x00011074, 0xd7610000,
+ 0x00011275, 0xd7610000,
+ 0x00011476, 0xd7610000,
+ 0x00011677, 0xd7610000,
+ 0x00011a79, 0xd7610000,
+ 0x00011c7e, 0xd7610000,
+ 0x00011e7f, 0xbefe00ff,
+ 0x00003fff, 0xbeff0080,
+ 0xdca6c040, 0x007a0000,
+ 0xd760007a, 0x00011d00,
+ 0xd760007b, 0x00011f00,
+ 0xbefe007a, 0xbeff007b,
+ 0xbef4007e, 0x8b75ff7f,
+ 0x0000ffff, 0x8c75ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x10807fac,
+ 0xbef1007d, 0xbef00080,
+ 0xb8f302dc, 0x84739973,
+ 0xbefe00c1, 0x857d9973,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00002, 0xbeff00c1,
+ 0xbfa00009, 0xbef600ff,
+ 0x01000000, 0xe0685080,
+ 0x701d0100, 0xe0685100,
+ 0x701d0200, 0xe0685180,
+ 0x701d0300, 0xbfa00008,
+ 0xbef600ff, 0x01000000,
+ 0xe0685100, 0x701d0100,
+ 0xe0685200, 0x701d0200,
+ 0xe0685300, 0x701d0300,
+ 0xb8f03b05, 0x80708170,
+ 0xbf0d9973, 0xbfa20002,
+ 0x84708970, 0xbfa00001,
+ 0x84708a70, 0xb8fa1e06,
+ 0x847a8a7a, 0x80707a70,
+ 0x8070ff70, 0x00000200,
+ 0xbef600ff, 0x01000000,
+ 0x7e000280, 0x7e020280,
+ 0x7e040280, 0xbefd0080,
+ 0xd7610002, 0x0000fa71,
+ 0x807d817d, 0xd7610002,
+ 0x0000fa6c, 0x807d817d,
+ 0x917aff6d, 0x80000000,
+ 0xd7610002, 0x0000fa7a,
+ 0x807d817d, 0xd7610002,
+ 0x0000fa6e, 0x807d817d,
+ 0xd7610002, 0x0000fa6f,
+ 0x807d817d, 0xd7610002,
+ 0x0000fa78, 0x807d817d,
+ 0xb8faf803, 0xd7610002,
+ 0x0000fa7a, 0x807d817d,
+ 0xd7610002, 0x0000fa7b,
+ 0x807d817d, 0xb8f1f801,
+ 0xd7610002, 0x0000fa71,
+ 0x807d817d, 0xb8f1f814,
+ 0xd7610002, 0x0000fa71,
+ 0x807d817d, 0xb8f1f815,
+ 0xd7610002, 0x0000fa71,
+ 0x807d817d, 0xbefe00ff,
+ 0x0000ffff, 0xbeff0080,
+ 0xe0685000, 0x701d0200,
+ 0xbefe00c1, 0xb8f03b05,
+ 0x80708170, 0xbf0d9973,
+ 0xbfa20002, 0x84708970,
+ 0xbfa00001, 0x84708a70,
+ 0xb8fa1e06, 0x847a8a7a,
+ 0x80707a70, 0xbef600ff,
+ 0x01000000, 0xbef90080,
+ 0xbefd0080, 0xbf800000,
+ 0xbe804100, 0xbe824102,
+ 0xbe844104, 0xbe864106,
+ 0xbe884108, 0xbe8a410a,
+ 0xbe8c410c, 0xbe8e410e,
+ 0xd7610002, 0x0000f200,
+ 0x80798179, 0xd7610002,
+ 0x0000f201, 0x80798179,
+ 0xd7610002, 0x0000f202,
+ 0x80798179, 0xd7610002,
+ 0x0000f203, 0x80798179,
+ 0xd7610002, 0x0000f204,
+ 0x80798179, 0xd7610002,
+ 0x0000f205, 0x80798179,
+ 0xd7610002, 0x0000f206,
+ 0x80798179, 0xd7610002,
+ 0x0000f207, 0x80798179,
+ 0xd7610002, 0x0000f208,
+ 0x80798179, 0xd7610002,
+ 0x0000f209, 0x80798179,
+ 0xd7610002, 0x0000f20a,
+ 0x80798179, 0xd7610002,
+ 0x0000f20b, 0x80798179,
+ 0xd7610002, 0x0000f20c,
+ 0x80798179, 0xd7610002,
+ 0x0000f20d, 0x80798179,
+ 0xd7610002, 0x0000f20e,
+ 0x80798179, 0xd7610002,
+ 0x0000f20f, 0x80798179,
+ 0xbf06a079, 0xbfa10006,
+ 0xe0685000, 0x701d0200,
+ 0x8070ff70, 0x00000080,
+ 0xbef90080, 0x7e040280,
+ 0x807d907d, 0xbf0aff7d,
+ 0x00000060, 0xbfa2ffbc,
+ 0xbe804100, 0xbe824102,
+ 0xbe844104, 0xbe864106,
+ 0xbe884108, 0xbe8a410a,
+ 0xd7610002, 0x0000f200,
+ 0x80798179, 0xd7610002,
+ 0x0000f201, 0x80798179,
+ 0xd7610002, 0x0000f202,
+ 0x80798179, 0xd7610002,
+ 0x0000f203, 0x80798179,
+ 0xd7610002, 0x0000f204,
+ 0x80798179, 0xd7610002,
+ 0x0000f205, 0x80798179,
+ 0xd7610002, 0x0000f206,
+ 0x80798179, 0xd7610002,
+ 0x0000f207, 0x80798179,
+ 0xd7610002, 0x0000f208,
+ 0x80798179, 0xd7610002,
+ 0x0000f209, 0x80798179,
+ 0xd7610002, 0x0000f20a,
+ 0x80798179, 0xd7610002,
+ 0x0000f20b, 0x80798179,
+ 0xe0685000, 0x701d0200,
+ 0xbefe00c1, 0x857d9973,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8fb4306, 0x8b7bc17b,
+ 0xbfa10044, 0xbfbd0000,
+ 0x8b7aff6d, 0x80000000,
+ 0xbfa10040, 0x847b867b,
+ 0x847b827b, 0xbef6007b,
+ 0xb8f03b05, 0x80708170,
+ 0xbf0d9973, 0xbfa20002,
+ 0x84708970, 0xbfa00001,
+ 0x84708a70, 0xb8fa1e06,
+ 0x847a8a7a, 0x80707a70,
+ 0x8070ff70, 0x00000200,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xd71f0000, 0x000100c1,
+ 0xd7200000, 0x000200c1,
+ 0x16000084, 0x857d9973,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbefd0080, 0xbfa20012,
+ 0xbe8300ff, 0x00000080,
+ 0xbf800000, 0xbf800000,
+ 0xbf800000, 0xd8d80000,
+ 0x01000000, 0xbf890000,
+ 0xe0685000, 0x701d0100,
+ 0x807d037d, 0x80700370,
+ 0xd5250000, 0x0001ff00,
+ 0x00000080, 0xbf0a7b7d,
+ 0xbfa2fff4, 0xbfa00011,
+ 0xbe8300ff, 0x00000100,
+ 0xbf800000, 0xbf800000,
+ 0xbf800000, 0xd8d80000,
+ 0x01000000, 0xbf890000,
+ 0xe0685000, 0x701d0100,
+ 0x807d037d, 0x80700370,
+ 0xd5250000, 0x0001ff00,
+ 0x00000100, 0xbf0a7b7d,
+ 0xbfa2fff4, 0xbefe00c1,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20004,
+ 0xbef000ff, 0x00000200,
+ 0xbeff0080, 0xbfa00003,
+ 0xbef000ff, 0x00000400,
+ 0xbeff00c1, 0xb8fb3b05,
+ 0x807b817b, 0x847b827b,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20017,
+ 0xbef600ff, 0x01000000,
+ 0xbefd0084, 0xbf0a7b7d,
+ 0xbfa10037, 0x7e008700,
+ 0x7e028701, 0x7e048702,
+ 0x7e068703, 0xe0685000,
+ 0x701d0000, 0xe0685080,
+ 0x701d0100, 0xe0685100,
+ 0x701d0200, 0xe0685180,
+ 0x701d0300, 0x807d847d,
+ 0x8070ff70, 0x00000200,
+ 0xbf0a7b7d, 0xbfa2ffef,
+ 0xbfa00025, 0xbef600ff,
+ 0x01000000, 0xbefd0084,
+ 0xbf0a7b7d, 0xbfa10011,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
+ 0xe0685000, 0x701d0000,
+ 0xe0685100, 0x701d0100,
+ 0xe0685200, 0x701d0200,
+ 0xe0685300, 0x701d0300,
+ 0x807d847d, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7d,
+ 0xbfa2ffef, 0xb8fb1e06,
+ 0x8b7bc17b, 0xbfa1000c,
+ 0x847b837b, 0x807b7d7b,
+ 0xbefe00c1, 0xbeff0080,
+ 0x7e008700, 0xe0685000,
+ 0x701d0000, 0x807d817d,
+ 0x8070ff70, 0x00000080,
+ 0xbf0a7b7d, 0xbfa2fff8,
+ 0xbfa00146, 0xbef4007e,
+ 0x8b75ff7f, 0x0000ffff,
+ 0x8c75ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x10807fac, 0xb8f202dc,
+ 0x84729972, 0x8b6eff7f,
+ 0x04000000, 0xbfa1003a,
+ 0xbefe00c1, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8ef4306, 0x8b6fc16f,
+ 0xbfa1002f, 0x846f866f,
+ 0x846f826f, 0xbef6006f,
+ 0xb8f83b05, 0x80788178,
+ 0xbf0d9972, 0xbfa20002,
+ 0x84788978, 0xbfa00001,
+ 0x84788a78, 0xb8ee1e06,
+ 0x846e8a6e, 0x80786e78,
+ 0x8078ff78, 0x00000200,
+ 0x8078ff78, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0x857d9972, 0x8b7d817d,
+ 0xbf06817d, 0xbefd0080,
+ 0xbfa2000c, 0xe0500000,
+ 0x781d0000, 0xbf8903f7,
+ 0xdac00000, 0x00000000,
+ 0x807dff7d, 0x00000080,
+ 0x8078ff78, 0x00000080,
+ 0xbf0a6f7d, 0xbfa2fff5,
+ 0xbfa0000b, 0xe0500000,
+ 0x781d0000, 0xbf8903f7,
+ 0xdac00000, 0x00000000,
+ 0x807dff7d, 0x00000100,
+ 0x8078ff78, 0x00000100,
+ 0xbf0a6f7d, 0xbfa2fff5,
+ 0xbef80080, 0xbefe00c1,
+ 0x857d9972, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20002,
+ 0xbeff0080, 0xbfa00001,
+ 0xbeff00c1, 0xb8ef3b05,
+ 0x806f816f, 0x846f826f,
+ 0x857d9972, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20024,
+ 0xbef600ff, 0x01000000,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000200, 0xbefd0084,
+ 0xbf0a6f7d, 0xbfa10050,
+ 0xe0505000, 0x781d0000,
+ 0xe0505080, 0x781d0100,
+ 0xe0505100, 0x781d0200,
+ 0xe0505180, 0x781d0300,
+ 0xbf8903f7, 0x7e008500,
+ 0x7e028501, 0x7e048502,
+ 0x7e068503, 0x807d847d,
+ 0x8078ff78, 0x00000200,
+ 0xbf0a6f7d, 0xbfa2ffee,
+ 0xe0505000, 0x6e1d0000,
+ 0xe0505080, 0x6e1d0100,
+ 0xe0505100, 0x6e1d0200,
+ 0xe0505180, 0x6e1d0300,
+ 0xbf8903f7, 0xbfa00034,
+ 0xbef600ff, 0x01000000,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefd0084,
+ 0xbf0a6f7d, 0xbfa10012,
+ 0xe0505000, 0x781d0000,
+ 0xe0505100, 0x781d0100,
+ 0xe0505200, 0x781d0200,
+ 0xe0505300, 0x781d0300,
+ 0xbf8903f7, 0x7e008500,
+ 0x7e028501, 0x7e048502,
+ 0x7e068503, 0x807d847d,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7d, 0xbfa2ffee,
+ 0xb8ef1e06, 0x8b6fc16f,
+ 0xbfa1000e, 0x846f836f,
+ 0x806f7d6f, 0xbefe00c1,
+ 0xbeff0080, 0xe0505000,
+ 0x781d0000, 0xbf8903f7,
+ 0x7e008500, 0x807d817d,
+ 0x8078ff78, 0x00000080,
+ 0xbf0a6f7d, 0xbfa2fff7,
+ 0xbeff00c1, 0xe0505000,
+ 0x6e1d0000, 0xe0505100,
+ 0x6e1d0100, 0xe0505200,
+ 0x6e1d0200, 0xe0505300,
+ 0x6e1d0300, 0xbf8903f7,
+ 0xb8f83b05, 0x80788178,
+ 0xbf0d9972, 0xbfa20002,
+ 0x84788978, 0xbfa00001,
+ 0x84788a78, 0xb8ee1e06,
+ 0x846e8a6e, 0x80786e78,
+ 0x8078ff78, 0x00000200,
+ 0x80f8ff78, 0x00000050,
+ 0xbef600ff, 0x01000000,
+ 0xbefd00ff, 0x0000006c,
+ 0x80f89078, 0xf428403a,
+ 0xf0000000, 0xbf89fc07,
+ 0x80fd847d, 0xbf800000,
+ 0xbe804300, 0xbe824302,
+ 0x80f8a078, 0xf42c403a,
+ 0xf0000000, 0xbf89fc07,
+ 0x80fd887d, 0xbf800000,
+ 0xbe804300, 0xbe824302,
+ 0xbe844304, 0xbe864306,
+ 0x80f8c078, 0xf430403a,
+ 0xf0000000, 0xbf89fc07,
+ 0x80fd907d, 0xbf800000,
+ 0xbe804300, 0xbe824302,
+ 0xbe844304, 0xbe864306,
+ 0xbe884308, 0xbe8a430a,
+ 0xbe8c430c, 0xbe8e430e,
+ 0xbf06807d, 0xbfa1fff0,
+ 0xb980f801, 0x00000000,
+ 0xbfbd0000, 0xb8f83b05,
+ 0x80788178, 0xbf0d9972,
+ 0xbfa20002, 0x84788978,
+ 0xbfa00001, 0x84788a78,
+ 0xb8ee1e06, 0x846e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0xbef600ff,
+ 0x01000000, 0xf4205bfa,
+ 0xf0000000, 0x80788478,
+ 0xf4205b3a, 0xf0000000,
+ 0x80788478, 0xf4205b7a,
+ 0xf0000000, 0x80788478,
+ 0xf4205c3a, 0xf0000000,
+ 0x80788478, 0xf4205c7a,
+ 0xf0000000, 0x80788478,
+ 0xf4205eba, 0xf0000000,
+ 0x80788478, 0xf4205efa,
+ 0xf0000000, 0x80788478,
+ 0xf4205e7a, 0xf0000000,
+ 0x80788478, 0xf4205cfa,
+ 0xf0000000, 0x80788478,
+ 0xf4205bba, 0xf0000000,
+ 0x80788478, 0xbf89fc07,
+ 0xb96ef814, 0xf4205bba,
+ 0xf0000000, 0x80788478,
+ 0xbf89fc07, 0xb96ef815,
+ 0xbefd006f, 0xbefe0070,
+ 0xbeff0071, 0x8b6f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x8b6f7bff, 0xfffff800,
+ 0x856f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee3b05,
+ 0x806e816e, 0xbf0d9972,
+ 0xbfa20002, 0x846e896e,
+ 0xbfa00001, 0x846e8a6e,
+ 0xb8ef1e06, 0x846f8a6f,
+ 0x806e6f6e, 0x806eff6e,
+ 0x00000200, 0x806e746e,
+ 0x826f8075, 0x8b6fff6f,
+ 0x0000ffff, 0xf4085c37,
+ 0xf8000050, 0xf4085d37,
+ 0xf8000060, 0xf4005e77,
+ 0xf8000074, 0xbf89fc07,
+ 0x8b6dff6d, 0x0000ffff,
+ 0x8bfe7e7e, 0x8bea6a6a,
+ 0xb8eef802, 0xbf0d866e,
+ 0xbfa20002, 0xb97af802,
+ 0xbe80486c, 0xb97af802,
+ 0xbe804a6c, 0xbfb00000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0x00000000,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index 5081f91190b8..8b92c33c2a7c 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -23,37 +23,54 @@
/* To compile this assembly code:
*
* Navi1x:
- * cpp -DASIC_TARGET_NAVI1X=1 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3
- * sp3-nv1x nv1x.sp3 -hex nv1x.hex
+ * cpp -DASIC_FAMILY=CHIP_NAVI10 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3
+ * sp3 nv1x.sp3 -hex nv1x.hex
*
- * Others:
- * cpp -DASIC_TARGET_NAVI1X=0 cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3
- * sp3-gfx10 gfx10.sp3 -hex gfx10.hex
+ * gfx10:
+ * cpp -DASIC_FAMILY=CHIP_SIENNA_CICHLID cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3
+ * sp3 gfx10.sp3 -hex gfx10.hex
+ *
+ * gfx11:
+ * cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3
+ * sp3 gfx11.sp3 -hex gfx11.hex
*/
-#define NO_SQC_STORE !ASIC_TARGET_NAVI1X
+#define CHIP_NAVI10 26
+#define CHIP_SIENNA_CICHLID 30
+#define CHIP_PLUM_BONITO 36
+
+#define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID)
+#define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID)
+#define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO)
+#define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO)
+#define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO)
var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
-var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
-var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_STATUS_HALT_MASK = 0x2000
+var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
+var SQ_WAVE_STATUS_TRAP_EN_SHIFT = 6
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
-var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
-var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6
-var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24
-var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 4
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8
var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24
var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4
var SQ_WAVE_IB_STS2_WAVE64_SHIFT = 11
var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1
+#if ASIC_FAMILY < CHIP_PLUM_BONITO
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
+#else
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12
+#endif
+
var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
-var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF
+var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF
var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
+var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK = 0x80
+var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7
var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8
var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF
@@ -63,46 +80,37 @@ var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800
+var SQ_WAVE_TRAPSTS_EXCP_HI_MASK = 0x7000
+
+var SQ_WAVE_MODE_EXCP_EN_SHIFT = 12
+var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT = 19
-var SQ_WAVE_IB_STS_RCNT_SHIFT = 16
var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15
var SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT = 25
-var SQ_WAVE_IB_STS_REPLAY_W64H_SIZE = 1
var SQ_WAVE_IB_STS_REPLAY_W64H_MASK = 0x02000000
-var SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE = 1
-var SQ_WAVE_IB_STS_RCNT_SIZE = 6
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x003F8000
-var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF
var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800
-var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24
-var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27
-
// bits [31:24] unused by SPI debug data
var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31
var TTMP11_SAVE_REPLAY_W64H_MASK = 0x80000000
var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 24
var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0x7F000000
+var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23
+var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000
// SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14]
// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000
var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC
-
-var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000
-var S_SAVE_SPI_INIT_ATC_SHIFT = 27
-var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000
-var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
+var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000
+var S_SAVE_PC_HI_HT_MASK = 0x01000000
var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
-var S_SAVE_PC_HI_RCNT_SHIFT = 26
-var S_SAVE_PC_HI_RCNT_MASK = 0xFC000000
-var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 25
-var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x02000000
-var S_SAVE_PC_HI_REPLAY_W64H_SHIFT = 24
-var S_SAVE_PC_HI_REPLAY_W64H_MASK = 0x01000000
+var S_SAVE_PC_HI_FIRST_WAVE_MASK = 0x80000000
+var S_SAVE_PC_HI_FIRST_WAVE_SHIFT = 31
var s_sgpr_save_num = 108
@@ -130,19 +138,10 @@ var s_save_ttmps_hi = s_save_trapsts
var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
-var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000
-var S_RESTORE_SPI_INIT_ATC_SHIFT = 27
-var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000
-var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28
var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
var S_WAVE_SIZE = 25
-var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT
-var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK
-var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
-var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK
-
var s_restore_spi_init_lo = exec_lo
var s_restore_spi_init_hi = exec_hi
var s_restore_mem_offset = ttmp12
@@ -179,84 +178,146 @@ L_JUMP_TO_RESTORE:
L_SKIP_RESTORE:
s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
- s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK
-if SINGLE_STEP_MISSED_WORKAROUND
- // No single step exceptions if MODE.DEBUG_EN=0.
- s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
- s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
- s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND
+ // Clear SPI_PRIO: do not save with elevated priority.
+ // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
+ s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK
+
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+
+#if SW_SA_TRAP
+ // If ttmp1[30] is set then issue s_barrier to unblock dependent waves.
+ s_bitcmp1_b32 s_save_pc_hi, 30
+ s_cbranch_scc0 L_TRAP_NO_BARRIER
+ s_barrier
+
+L_TRAP_NO_BARRIER:
+ // If ttmp1[31] is set then trap may occur early.
+ // Spin wait until SAVECTX exception is raised.
+ s_bitcmp1_b32 s_save_pc_hi, 31
+ s_cbranch_scc1 L_CHECK_SAVE
+#endif
- // Second-level trap already handled exception if STATUS.HALT=1.
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+ s_cbranch_scc0 L_NOT_HALTED
+
+L_HALTED:
+ // Host trap may occur while wave is halted.
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+L_CHECK_SAVE:
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
+ s_cbranch_scc1 L_SAVE
+
+ // Wave is halted but neither host trap nor SAVECTX is raised.
+ // Caused by instruction fetch memory violation.
+ // Spin wait until context saved to prevent interrupt storm.
+ s_sleep 0x10
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ s_branch L_CHECK_SAVE
+
+L_NOT_HALTED:
+ // Let second-level handle non-SAVECTX exception or trap.
+ // Any concurrent SAVECTX will be handled upon re-entry once halted.
+
+ // Check non-maskable exceptions. memory_violation, illegal_instruction
+ // and xnack_error exceptions always cause the wave to enter the trap
+ // handler.
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+ // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
+ // Maskable exceptions only cause the wave to enter the trap handler if
+ // their respective bit in mode.excp_en is set.
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
+ s_cbranch_scc0 L_CHECK_TRAP_ID
+
+ s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
+ s_cbranch_scc0 L_NOT_ADDR_WATCH
+ s_bitset1_b32 ttmp2, SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT // Check all addr_watch[123] exceptions against excp_en.addr_watch
+
+L_NOT_ADDR_WATCH:
+ s_getreg_b32 ttmp3, hwreg(HW_REG_MODE)
+ s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
+ s_and_b32 ttmp2, ttmp2, ttmp3
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+L_CHECK_TRAP_ID:
+ // Check trap_id != 0
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+if SINGLE_STEP_MISSED_WORKAROUND
// Prioritize single step exception over context save.
// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
- s_cbranch_scc0 L_FETCH_2ND_TRAP
-
-L_NO_SINGLE_STEP_WORKAROUND:
+ s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
end
-
- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
s_cbranch_scc1 L_SAVE
- // If STATUS.MEM_VIOL is asserted then halt the wave to prevent
- // the exception raising again and blocking context save.
- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
- s_cbranch_scc0 L_FETCH_2ND_TRAP
- s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
-
L_FETCH_2ND_TRAP:
-
-#if ASIC_TARGET_NAVI1X
- // Preserve and clear scalar XNACK state before issuing scalar loads.
- // Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into
- // unused space ttmp11[31:24].
- s_andn2_b32 ttmp11, ttmp11, (TTMP11_SAVE_REPLAY_W64H_MASK | TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK)
- s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS)
- s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
- s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
- s_or_b32 ttmp11, ttmp11, ttmp3
- s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
- s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
- s_or_b32 ttmp11, ttmp11, ttmp3
- s_andn2_b32 ttmp2, ttmp2, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK)
- s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+#if HAVE_XNACK
+ save_and_clear_ib_sts(ttmp14, ttmp15)
#endif
// Read second-level TBA/TMA from first-level TMA and jump if available.
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
// ttmp12 holds SQ_WAVE_STATUS
+#if HAVE_SENDMSG_RTN
+ s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA)
+ s_waitcnt lgkmcnt(0)
+#else
s_getreg_b32 ttmp14, hwreg(HW_REG_SHADER_TMA_LO)
s_getreg_b32 ttmp15, hwreg(HW_REG_SHADER_TMA_HI)
+#endif
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+
+ s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
+ s_waitcnt lgkmcnt(0)
+ s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
+ s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
+ s_or_b32 ttmp11, ttmp11, ttmp2
+
s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
s_waitcnt lgkmcnt(0)
s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
s_waitcnt lgkmcnt(0)
+
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
L_NO_NEXT_TRAP:
- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
- s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK
- s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly.
- s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0
- s_addc_u32 ttmp1, ttmp1, 0
-L_EXCP_CASE:
+ // If not caused by trap then halt wave to prevent re-entry.
+ s_and_b32 ttmp2, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK)
+ s_cbranch_scc1 L_TRAP_CASE
+ s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+
+ // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
+ // Rewind the PC to prevent this from occurring.
+ s_sub_u32 ttmp0, ttmp0, 0x8
+ s_subb_u32 ttmp1, ttmp1, 0x0
+
+ s_branch L_EXIT_TRAP
+
+L_TRAP_CASE:
+ // Host trap will not cause trap re-entry.
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
+ s_cbranch_scc1 L_EXIT_TRAP
+
+ // Advance past trap instruction to prevent re-entry.
+ s_add_u32 ttmp0, ttmp0, 0x4
+ s_addc_u32 ttmp1, ttmp1, 0x0
+
+L_EXIT_TRAP:
s_and_b32 ttmp1, ttmp1, 0xFFFF
-#if ASIC_TARGET_NAVI1X
- // Restore SQ_WAVE_IB_STS.
- s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
- s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
- s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
- s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
- s_or_b32 ttmp2, ttmp2, ttmp3
- s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+#if HAVE_XNACK
+ restore_ib_sts(ttmp14, ttmp15)
#endif
// Restore SQ_WAVE_STATUS.
@@ -271,20 +332,8 @@ L_SAVE:
s_mov_b32 s_save_tmp, 0
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
-#if ASIC_TARGET_NAVI1X
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE)
- s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
- s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE)
- s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
- s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT, SQ_WAVE_IB_STS_REPLAY_W64H_SIZE)
- s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_REPLAY_W64H_SHIFT
- s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY and REPLAY_W64H in IB_STS
- s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
-
- s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp
+#if HAVE_XNACK
+ save_and_clear_ib_sts(s_save_tmp, s_save_trapsts)
#endif
/* inform SPI the readiness and wait for SPI's go signal */
@@ -292,9 +341,13 @@ L_SAVE:
s_mov_b32 s_save_exec_hi, exec_hi
s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive
+#if HAVE_SENDMSG_RTN
+ s_sendmsg_rtn_b64 [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE)
+#else
s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
+#endif
-#if ASIC_TARGET_NAVI1X
+#if ASIC_FAMILY < CHIP_SIENNA_CICHLID
L_SLEEP:
// sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause
// SQ hang, since the 7,8th wave could not get arbit to exec inst, while
@@ -305,16 +358,57 @@ L_SLEEP:
s_waitcnt lgkmcnt(0)
#endif
+ // Save first_wave flag so we can clear high bits of save address.
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
+ s_lshl_b32 s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT)
+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
+
+#if NO_SQC_STORE
+ // Trap temporaries must be saved via VGPR but all VGPRs are in use.
+ // There is no ttmp space to hold the resource constant for VGPR save.
+ // Save v0 by itself since it requires only two SGPRs.
+ s_mov_b32 s_save_ttmps_lo, exec_lo
+ s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] slc:1 glc:1
+ v_mov_b32 v0, 0x0
+ s_mov_b32 exec_lo, s_save_ttmps_lo
+ s_mov_b32 exec_hi, s_save_ttmps_hi
+#endif
+
// Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
- // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
get_wave_size(s_save_ttmps_hi)
get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi)
+ get_svgpr_size_bytes(s_save_ttmps_hi)
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes()
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0
-#if ASIC_TARGET_NAVI1X
+#if NO_SQC_STORE
+ v_writelane_b32 v0, ttmp4, 0x4
+ v_writelane_b32 v0, ttmp5, 0x5
+ v_writelane_b32 v0, ttmp6, 0x6
+ v_writelane_b32 v0, ttmp7, 0x7
+ v_writelane_b32 v0, ttmp8, 0x8
+ v_writelane_b32 v0, ttmp9, 0x9
+ v_writelane_b32 v0, ttmp10, 0xA
+ v_writelane_b32 v0, ttmp11, 0xB
+ v_writelane_b32 v0, ttmp13, 0xD
+ v_writelane_b32 v0, exec_lo, 0xE
+ v_writelane_b32 v0, exec_hi, 0xF
+
+ s_mov_b32 exec_lo, 0x3FFF
+ s_mov_b32 exec_hi, 0x0
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] inst_offset:0x40 slc:1 glc:1
+ v_readlane_b32 ttmp14, v0, 0xE
+ v_readlane_b32 ttmp15, v0, 0xF
+ s_mov_b32 exec_lo, ttmp14
+ s_mov_b32 exec_hi, ttmp15
+#else
s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
@@ -326,12 +420,6 @@ L_SLEEP:
s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
- s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
- s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)
- s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC
- s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
- s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)
- s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE
s_mov_b32 s_save_m0, m0
@@ -339,7 +427,7 @@ L_SLEEP:
s_mov_b32 s_save_mem_offset, 0x0
get_wave_size(s_wave_size)
-#if ASIC_TARGET_NAVI1X
+#if HAVE_XNACK
// Save and clear vector XNACK state late to free up SGPRs.
s_getreg_b32 s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK)
s_setreg_imm32_b32 hwreg(HW_REG_SHADER_XNACK_MASK), 0x0
@@ -361,7 +449,9 @@ L_SAVE_4VGPR_WAVE32:
// VGPR Allocated in 4-GPR granularity
+#if !NO_SQC_STORE
buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+#endif
buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2
buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3
@@ -372,7 +462,9 @@ L_SAVE_4VGPR_WAVE64:
// VGPR Allocated in 4-GPR granularity
+#if !NO_SQC_STORE
buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+#endif
buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
@@ -397,7 +489,8 @@ L_SAVE_HWREG:
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)
- write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
+ s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+ write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset)
@@ -418,9 +511,13 @@ L_SAVE_HWREG:
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
#if NO_SQC_STORE
- // Write HWREG/SGPRs with 32 VGPR lanes, wave32 is common case.
+ // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
+ s_mov_b32 exec_lo, 0xFFFF
s_mov_b32 exec_hi, 0x0
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+
+ // Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode.
+ s_mov_b32 exec_lo, 0xFFFFFFFF
#endif
/* save SGPRs */
@@ -506,7 +603,7 @@ L_SAVE_LDS_NORMAL:
s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
s_barrier //LDS is used? wait for other waves in the same TG
- s_and_b32 s_save_tmp, s_wave_size, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
+ s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
s_cbranch_scc0 L_SAVE_LDS_DONE
// first wave do LDS save;
@@ -628,7 +725,7 @@ L_SAVE_VGPR_WAVE64:
// VGPR store using dw burst
s_mov_b32 m0, 0x4 //VGPR initial index value =4
s_cmp_lt_u32 m0, s_save_alloc_size
- s_cbranch_scc0 L_SAVE_VGPR_END
+ s_cbranch_scc0 L_SAVE_SHARED_VGPR
L_SAVE_VGPR_W64_LOOP:
v_movrels_b32 v0, v0 //v0 = v[0+m0]
@@ -646,6 +743,7 @@ L_SAVE_VGPR_W64_LOOP:
s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete?
+L_SAVE_SHARED_VGPR:
//Below part will be the save shared vgpr part (new for gfx10)
s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
@@ -674,12 +772,7 @@ L_RESTORE:
s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
- s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
- s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)
- s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC
- s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
- s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)
- s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE
+
//determine it is wave32 or wave64
get_wave_size(s_restore_size)
@@ -722,7 +815,13 @@ L_RESTORE_LDS_NORMAL:
s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64
L_RESTORE_LDS_LOOP_W32:
+#if HAVE_BUFFER_LDS_LOAD
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
+#else
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+ s_waitcnt vmcnt(0)
+ ds_store_addtid_b32 v0
+#endif
s_add_u32 m0, m0, 128 // 128 DW
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW
s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
@@ -730,7 +829,13 @@ L_RESTORE_LDS_LOOP_W32:
s_branch L_RESTORE_VGPR
L_RESTORE_LDS_LOOP_W64:
+#if HAVE_BUFFER_LDS_LOAD
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
+#else
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+ s_waitcnt vmcnt(0)
+ ds_store_addtid_b32 v0
+#endif
s_add_u32 m0, m0, 256 // 256 DW
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW
s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
@@ -765,6 +870,8 @@ L_RESTORE_VGPR_NORMAL:
s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4
s_mov_b32 m0, 4 //VGPR initial index value = 4
+ s_cmp_lt_u32 m0, s_restore_alloc_size
+ s_cbranch_scc0 L_RESTORE_SGPR
L_RESTORE_VGPR_WAVE32_LOOP:
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
@@ -786,6 +893,7 @@ L_RESTORE_VGPR_WAVE32_LOOP:
buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128
buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*2
buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*3
+ s_waitcnt vmcnt(0)
s_branch L_RESTORE_SGPR
@@ -796,6 +904,8 @@ L_RESTORE_VGPR_WAVE64:
s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
s_mov_b32 m0, 4 //VGPR initial index value = 4
+ s_cmp_lt_u32 m0, s_restore_alloc_size
+ s_cbranch_scc0 L_RESTORE_SHARED_VGPR
L_RESTORE_VGPR_WAVE64_LOOP:
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
@@ -812,6 +922,7 @@ L_RESTORE_VGPR_WAVE64_LOOP:
s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
+L_RESTORE_SHARED_VGPR:
//Below part will be the restore shared vgpr part (new for gfx10)
s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size
s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
@@ -935,7 +1046,7 @@ L_RESTORE_HWREG:
s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
-#if ASIC_TARGET_NAVI1X
+#if HAVE_XNACK
s_setreg_b32 hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask
#endif
@@ -945,8 +1056,10 @@ L_RESTORE_HWREG:
s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
// Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
- // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size)
+ get_svgpr_size_bytes(s_restore_ttmps_hi)
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes()
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
@@ -956,31 +1069,27 @@ L_RESTORE_HWREG:
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
s_waitcnt lgkmcnt(0)
-#if ASIC_TARGET_NAVI1X
- s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
- s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
- s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
- s_mov_b32 s_restore_tmp, 0x0
- s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
- s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
- s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
- s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
- s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
- s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_REPLAY_W64H_MASK
- s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_REPLAY_W64H_SHIFT
- s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT
- s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
-
- s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
- s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
- s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
+#if HAVE_XNACK
+ restore_ib_sts(s_restore_tmp, s_restore_m0)
#endif
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+
+#if SW_SA_TRAP
+ // If traps are enabled then return to the shader with PRIV=0.
+ // Otherwise retain PRIV=1 for subsequent context save requests.
+ s_getreg_b32 s_restore_tmp, hwreg(HW_REG_STATUS)
+ s_bitcmp1_b32 s_restore_tmp, SQ_WAVE_STATUS_TRAP_EN_SHIFT
+ s_cbranch_scc1 L_RETURN_WITHOUT_PRIV
+
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
+ s_setpc_b64 [s_restore_pc_lo, s_restore_pc_hi]
+L_RETURN_WITHOUT_PRIV:
+#endif
+ s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
L_END_PGM:
@@ -1089,5 +1198,29 @@ end
function get_wave_size(s_reg)
s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
- s_or_b32 s_reg, s_save_spi_init_hi, s_reg //share with exec_hi, it's at bit25
+end
+
+function save_and_clear_ib_sts(tmp1, tmp2)
+ // Preserve and clear scalar XNACK state before issuing scalar loads.
+ // Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into
+ // unused space ttmp11[31:24].
+ s_andn2_b32 ttmp11, ttmp11, (TTMP11_SAVE_REPLAY_W64H_MASK | TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK)
+ s_getreg_b32 tmp1, hwreg(HW_REG_IB_STS)
+ s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
+ s_lshl_b32 tmp2, tmp2, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
+ s_or_b32 ttmp11, ttmp11, tmp2
+ s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+ s_lshl_b32 tmp2, tmp2, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+ s_or_b32 ttmp11, ttmp11, tmp2
+ s_andn2_b32 tmp1, tmp1, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK)
+ s_setreg_b32 hwreg(HW_REG_IB_STS), tmp1
+end
+
+function restore_ib_sts(tmp1, tmp2)
+ s_lshr_b32 tmp1, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+ s_and_b32 tmp2, tmp1, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+ s_lshr_b32 tmp1, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
+ s_and_b32 tmp1, tmp1, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
+ s_or_b32 tmp1, tmp1, tmp2
+ s_setreg_b32 hwreg(HW_REG_IB_STS), tmp1
end
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index eed78a04e7c7..6770cbe3250a 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -46,8 +46,6 @@ var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN
/**************************************************************************/
/* variables */
/**************************************************************************/
-var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
-var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_STATUS_HALT_MASK = 0x2000
@@ -56,6 +54,7 @@ var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1
var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3
var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29
var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK = 0x400000
+var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
@@ -72,8 +71,10 @@ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
#endif
var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
-var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask
+var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF
var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
+var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK = 0x80
+var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7
var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8
var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF
@@ -83,37 +84,30 @@ var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800
+var SQ_WAVE_TRAPSTS_EXCP_HI_MASK = 0x7000
var SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK = 0x10000000
-var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME
+var SQ_WAVE_MODE_EXCP_EN_SHIFT = 12
+var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT = 19
+
var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000
-var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME
var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800
-var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24
-var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27
-
var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data
var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000
+var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23
+var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000
/* Save */
var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes
var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
-
-var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
-var S_SAVE_SPI_INIT_ATC_SHIFT = 27
-var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
-var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
+var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000
+var S_SAVE_PC_HI_HT_MASK = 0x01000000
var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
-var S_SAVE_PC_HI_RCNT_SHIFT = 27 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
-var S_SAVE_PC_HI_RCNT_MASK = 0xF8000000 //FIXME
-var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 26 //FIXME
-var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x04000000 //FIXME
-
var s_save_spi_init_lo = exec_lo
var s_save_spi_init_hi = exec_hi
@@ -140,18 +134,9 @@ var s_save_ttmps_hi = s_save_trapsts //no conflict
var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
-var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
-var S_RESTORE_SPI_INIT_ATC_SHIFT = 27
-var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
-var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28
var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
-var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT
-var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK
-var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
-var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK
-
var s_restore_spi_init_lo = exec_lo
var s_restore_spi_init_hi = exec_hi
@@ -199,71 +184,77 @@ L_JUMP_TO_RESTORE:
L_SKIP_RESTORE:
s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
- s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save
-if SINGLE_STEP_MISSED_WORKAROUND
- // No single step exceptions if MODE.DEBUG_EN=0.
- s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
- s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
- s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND
+ // Clear SPI_PRIO: do not save with elevated priority.
+ // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
+ s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK
- // Second-level trap already handled exception if STATUS.HALT=1.
- s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
- // Prioritize single step exception over context save.
- // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
- s_cbranch_scc0 L_FETCH_2ND_TRAP
+ s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+ s_cbranch_scc0 L_NOT_HALTED
-L_NO_SINGLE_STEP_WORKAROUND:
-end
+L_HALTED:
+ // Host trap may occur while wave is halted.
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+L_CHECK_SAVE:
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
s_cbranch_scc1 L_SAVE //this is the operation for save
- // ********* Handle non-CWSR traps *******************
-
- // Illegal instruction is a non-maskable exception which blocks context save.
- // Halt the wavefront and return from the trap.
- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
- s_cbranch_scc1 L_HALT_WAVE
-
- // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA.
- // Instead, halt the wavefront and return from the trap.
- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
- s_cbranch_scc0 L_FETCH_2ND_TRAP
-
-L_HALT_WAVE:
- // If STATUS.HALT is set then this fault must come from SQC instruction fetch.
- // We cannot prevent further faults. Spin wait until context saved.
- s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
- s_cbranch_scc0 L_NOT_ALREADY_HALTED
-
-L_WAIT_CTX_SAVE:
+ // Wave is halted but neither host trap nor SAVECTX is raised.
+ // Caused by instruction fetch memory violation.
+ // Spin wait until context saved to prevent interrupt storm.
s_sleep 0x10
- s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS)
- s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK
- s_cbranch_scc0 L_WAIT_CTX_SAVE
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ s_branch L_CHECK_SAVE
+
+L_NOT_HALTED:
+ // Let second-level handle non-SAVECTX exception or trap.
+ // Any concurrent SAVECTX will be handled upon re-entry once halted.
+
+ // Check non-maskable exceptions. memory_violation, illegal_instruction
+ // and xnack_error exceptions always cause the wave to enter the trap
+ // handler.
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+ // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
+ // Maskable exceptions only cause the wave to enter the trap handler if
+ // their respective bit in mode.excp_en is set.
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
+ s_cbranch_scc0 L_CHECK_TRAP_ID
+
+ s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
+ s_cbranch_scc0 L_NOT_ADDR_WATCH
+ s_bitset1_b32 ttmp2, SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT // Check all addr_watch[123] exceptions against excp_en.addr_watch
+
+L_NOT_ADDR_WATCH:
+ s_getreg_b32 ttmp3, hwreg(HW_REG_MODE)
+ s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
+ s_and_b32 ttmp2, ttmp2, ttmp3
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+L_CHECK_TRAP_ID:
+ // Check trap_id != 0
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
-L_NOT_ALREADY_HALTED:
- s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+if SINGLE_STEP_MISSED_WORKAROUND
+ // Prioritize single step exception over context save.
+ // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
+ s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+end
- // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
- // Rewind the PC to prevent this from occurring. The debugger compensates for this.
- s_sub_u32 ttmp0, ttmp0, 0x8
- s_subb_u32 ttmp1, ttmp1, 0x0
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
+ s_cbranch_scc1 L_SAVE
L_FETCH_2ND_TRAP:
// Preserve and clear scalar XNACK state before issuing scalar reads.
- // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26].
- s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS)
- s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
- s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
- s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK
- s_or_b32 ttmp11, ttmp11, ttmp3
-
- s_andn2_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
- s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+ save_and_clear_ib_sts(ttmp14)
// Read second-level TBA/TMA from first-level TMA and jump if available.
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
@@ -271,27 +262,48 @@ L_FETCH_2ND_TRAP:
s_getreg_b32 ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO)
s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+
+ s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
+ s_waitcnt lgkmcnt(0)
+ s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
+ s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
+ s_or_b32 ttmp11, ttmp11, ttmp2
+
s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
s_waitcnt lgkmcnt(0)
s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
s_waitcnt lgkmcnt(0)
+
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
L_NO_NEXT_TRAP:
- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
- s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception
- s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly.
- s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0
- s_addc_u32 ttmp1, ttmp1, 0
-L_EXCP_CASE:
+ // If not caused by trap then halt wave to prevent re-entry.
+ s_and_b32 ttmp2, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK)
+ s_cbranch_scc1 L_TRAP_CASE
+ s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+
+ // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
+ // Rewind the PC to prevent this from occurring.
+ s_sub_u32 ttmp0, ttmp0, 0x8
+ s_subb_u32 ttmp1, ttmp1, 0x0
+
+ s_branch L_EXIT_TRAP
+
+L_TRAP_CASE:
+ // Host trap will not cause trap re-entry.
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
+ s_cbranch_scc1 L_EXIT_TRAP
+
+ // Advance past trap instruction to prevent re-entry.
+ s_add_u32 ttmp0, ttmp0, 0x4
+ s_addc_u32 ttmp1, ttmp1, 0x0
+
+L_EXIT_TRAP:
s_and_b32 ttmp1, ttmp1, 0xFFFF
- // Restore SQ_WAVE_IB_STS.
- s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
- s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
- s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+ restore_ib_sts(ttmp14)
// Restore SQ_WAVE_STATUS.
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
@@ -312,16 +324,7 @@ L_SAVE:
s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT
- s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
- s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY
- s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
- s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS
- s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
-
- s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp
+ save_and_clear_ib_sts(s_save_tmp)
/* inform SPI the readiness and wait for SPI's go signal */
s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
@@ -360,12 +363,6 @@ L_SAVE:
s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
- s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
- s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
- s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC
- s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
- s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
- s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE
//FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?)
s_mov_b32 s_save_m0, m0 //save M0
@@ -690,12 +687,6 @@ L_RESTORE:
s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
- s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
- s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
- s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC
- s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
- s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
- s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE
/* global mem offset */
// s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0
@@ -889,19 +880,7 @@ L_RESTORE:
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
s_waitcnt lgkmcnt(0)
- //reuse s_restore_m0 as a temp register
- s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
- s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
- s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
- s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero
- s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
- s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
- s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
- s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
- s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
- s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
- s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
- s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
+ restore_ib_sts(s_restore_tmp)
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
@@ -910,8 +889,7 @@ L_RESTORE:
s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
-// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
- s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc
+ s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
/**************************************************************************/
@@ -1078,3 +1056,19 @@ function set_status_without_spi_prio(status, tmp)
s_nop 0x2 // avoid S_SETREG => S_SETREG hazard
s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status
end
+
+function save_and_clear_ib_sts(tmp)
+ // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26].
+ s_getreg_b32 tmp, hwreg(HW_REG_IB_STS)
+ s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+ s_lshl_b32 tmp, tmp, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+ s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK
+ s_or_b32 ttmp11, ttmp11, tmp
+ s_setreg_imm32_b32 hwreg(HW_REG_IB_STS), 0x0
+end
+
+function restore_ib_sts(tmp)
+ s_lshr_b32 tmp, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+ s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+ s_setreg_b32 hwreg(HW_REG_IB_STS), tmp
+end
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 24ebd61395d8..6d291aa6386b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -33,14 +34,16 @@
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/mman.h>
+#include <linux/ptrace.h>
#include <linux/dma-buf.h>
-#include <asm/processor.h>
+#include <linux/fdtable.h>
+#include <linux/processor.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
-#include "kfd_dbgmgr.h"
#include "kfd_svm.h"
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"
+#include "amdgpu_dma_buf.h"
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
@@ -62,6 +65,25 @@ static int kfd_char_dev_major = -1;
static struct class *kfd_class;
struct device *kfd_device;
+static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id)
+{
+ struct kfd_process_device *pdd;
+
+ mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, gpu_id);
+
+ if (pdd)
+ return pdd;
+
+ mutex_unlock(&p->mutex);
+ return NULL;
+}
+
+static inline void kfd_unlock_pdd(struct kfd_process_device *pdd)
+{
+ mutex_unlock(&pdd->process->mutex);
+}
+
int kfd_chardev_init(void)
{
int err = 0;
@@ -101,11 +123,6 @@ void kfd_chardev_exit(void)
kfd_device = NULL;
}
-struct device *kfd_chardev(void)
-{
- return kfd_device;
-}
-
static int kfd_open(struct inode *inode, struct file *filep)
{
@@ -282,6 +299,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
struct kfd_process_device *pdd;
struct queue_properties q_properties;
uint32_t doorbell_offset_in_process = 0;
+ struct amdgpu_bo *wptr_bo = NULL;
memset(&q_properties, 0, sizeof(struct queue_properties));
@@ -292,26 +310,72 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
return err;
pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev) {
- pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
- return -EINVAL;
- }
mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
+ err = -EINVAL;
+ goto err_pdd;
+ }
+ dev = pdd->dev;
+
pdd = kfd_bind_process_to_device(dev, p);
if (IS_ERR(pdd)) {
err = -ESRCH;
goto err_bind_process;
}
+ if (!pdd->doorbell_index &&
+ kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
+ err = -ENOMEM;
+ goto err_alloc_doorbells;
+ }
+
+ /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
+ * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
+ */
+ if (dev->shared_resources.enable_mes &&
+ ((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK)
+ >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) {
+ struct amdgpu_bo_va_mapping *wptr_mapping;
+ struct amdgpu_vm *wptr_vm;
+
+ wptr_vm = drm_priv_to_vm(pdd->drm_priv);
+ err = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+ if (err)
+ goto err_wptr_map_gart;
+
+ wptr_mapping = amdgpu_vm_bo_lookup_mapping(
+ wptr_vm, args->write_pointer_address >> PAGE_SHIFT);
+ amdgpu_bo_unreserve(wptr_vm->root.bo);
+ if (!wptr_mapping) {
+ pr_err("Failed to lookup wptr bo\n");
+ err = -EINVAL;
+ goto err_wptr_map_gart;
+ }
+
+ wptr_bo = wptr_mapping->bo_va->base.bo;
+ if (wptr_bo->tbo.base.size > PAGE_SIZE) {
+ pr_err("Requested GART mapping for wptr bo larger than one page\n");
+ err = -EINVAL;
+ goto err_wptr_map_gart;
+ }
+
+ err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev, wptr_bo);
+ if (err) {
+ pr_err("Failed to map wptr bo to GART\n");
+ goto err_wptr_map_gart;
+ }
+ }
+
pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
p->pasid,
dev->id);
- err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
- &doorbell_offset_in_process);
+ err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo,
+ NULL, NULL, NULL, &doorbell_offset_in_process);
if (err != 0)
goto err_create_queue;
@@ -321,7 +385,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
/* Return gpu_id as doorbell offset for mmap usage */
args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
- if (KFD_IS_SOC15(dev->device_info->asic_family))
+ if (KFD_IS_SOC15(dev))
/* On SOC15 ASICs, include the doorbell offset within the
* process doorbell frame, which is 2 pages.
*/
@@ -343,7 +407,12 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
return 0;
err_create_queue:
+ if (wptr_bo)
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo);
+err_wptr_map_gart:
+err_alloc_doorbells:
err_bind_process:
+err_pdd:
mutex_unlock(&p->mutex);
return err;
}
@@ -490,7 +559,6 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_set_memory_policy_args *args = data;
- struct kfd_dev *dev;
int err = 0;
struct kfd_process_device *pdd;
enum cache_policy default_policy, alternate_policy;
@@ -505,13 +573,15 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
return -EINVAL;
}
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
+ err = -EINVAL;
+ goto err_pdd;
+ }
- pdd = kfd_bind_process_to_device(dev, p);
+ pdd = kfd_bind_process_to_device(pdd->dev, p);
if (IS_ERR(pdd)) {
err = -ESRCH;
goto out;
@@ -524,7 +594,7 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
? cache_policy_coherent : cache_policy_noncoherent;
- if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
+ if (!pdd->dev->dqm->ops.set_cache_memory_policy(pdd->dev->dqm,
&pdd->qpd,
default_policy,
alternate_policy,
@@ -533,6 +603,7 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
err = -EINVAL;
out:
+err_pdd:
mutex_unlock(&p->mutex);
return err;
@@ -542,17 +613,18 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_set_trap_handler_args *args = data;
- struct kfd_dev *dev;
int err = 0;
struct kfd_process_device *pdd;
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
mutex_lock(&p->mutex);
- pdd = kfd_bind_process_to_device(dev, p);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ err = -EINVAL;
+ goto err_pdd;
+ }
+
+ pdd = kfd_bind_process_to_device(pdd->dev, p);
if (IS_ERR(pdd)) {
err = -ESRCH;
goto out;
@@ -561,6 +633,7 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
kfd_process_set_trap_handler(&pdd->qpd, args->tba_addr, args->tma_addr);
out:
+err_pdd:
mutex_unlock(&p->mutex);
return err;
@@ -569,289 +642,40 @@ out:
static int kfd_ioctl_dbg_register(struct file *filep,
struct kfd_process *p, void *data)
{
- struct kfd_ioctl_dbg_register_args *args = data;
- struct kfd_dev *dev;
- struct kfd_dbgmgr *dbgmgr_ptr;
- struct kfd_process_device *pdd;
- bool create_ok;
- long status = 0;
-
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
- if (dev->device_info->asic_family == CHIP_CARRIZO) {
- pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
- return -EINVAL;
- }
-
- mutex_lock(&p->mutex);
- mutex_lock(kfd_get_dbgmgr_mutex());
-
- /*
- * make sure that we have pdd, if this the first queue created for
- * this process
- */
- pdd = kfd_bind_process_to_device(dev, p);
- if (IS_ERR(pdd)) {
- status = PTR_ERR(pdd);
- goto out;
- }
-
- if (!dev->dbgmgr) {
- /* In case of a legal call, we have no dbgmgr yet */
- create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
- if (create_ok) {
- status = kfd_dbgmgr_register(dbgmgr_ptr, p);
- if (status != 0)
- kfd_dbgmgr_destroy(dbgmgr_ptr);
- else
- dev->dbgmgr = dbgmgr_ptr;
- }
- } else {
- pr_debug("debugger already registered\n");
- status = -EINVAL;
- }
-
-out:
- mutex_unlock(kfd_get_dbgmgr_mutex());
- mutex_unlock(&p->mutex);
-
- return status;
+ return -EPERM;
}
static int kfd_ioctl_dbg_unregister(struct file *filep,
struct kfd_process *p, void *data)
{
- struct kfd_ioctl_dbg_unregister_args *args = data;
- struct kfd_dev *dev;
- long status;
-
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev || !dev->dbgmgr)
- return -EINVAL;
-
- if (dev->device_info->asic_family == CHIP_CARRIZO) {
- pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
- return -EINVAL;
- }
-
- mutex_lock(kfd_get_dbgmgr_mutex());
-
- status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
- if (!status) {
- kfd_dbgmgr_destroy(dev->dbgmgr);
- dev->dbgmgr = NULL;
- }
-
- mutex_unlock(kfd_get_dbgmgr_mutex());
-
- return status;
+ return -EPERM;
}
-/*
- * Parse and generate variable size data structure for address watch.
- * Total size of the buffer and # watch points is limited in order
- * to prevent kernel abuse. (no bearing to the much smaller HW limitation
- * which is enforced by dbgdev module)
- * please also note that the watch address itself are not "copied from user",
- * since it be set into the HW in user mode values.
- *
- */
static int kfd_ioctl_dbg_address_watch(struct file *filep,
struct kfd_process *p, void *data)
{
- struct kfd_ioctl_dbg_address_watch_args *args = data;
- struct kfd_dev *dev;
- struct dbg_address_watch_info aw_info;
- unsigned char *args_buff;
- long status;
- void __user *cmd_from_user;
- uint64_t watch_mask_value = 0;
- unsigned int args_idx = 0;
-
- memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
-
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
- if (dev->device_info->asic_family == CHIP_CARRIZO) {
- pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
- return -EINVAL;
- }
-
- cmd_from_user = (void __user *) args->content_ptr;
-
- /* Validate arguments */
-
- if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
- (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
- (cmd_from_user == NULL))
- return -EINVAL;
-
- /* this is the actual buffer to work with */
- args_buff = memdup_user(cmd_from_user,
- args->buf_size_in_bytes - sizeof(*args));
- if (IS_ERR(args_buff))
- return PTR_ERR(args_buff);
-
- aw_info.process = p;
-
- aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
- args_idx += sizeof(aw_info.num_watch_points);
-
- aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
- args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
-
- /*
- * set watch address base pointer to point on the array base
- * within args_buff
- */
- aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
-
- /* skip over the addresses buffer */
- args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
-
- if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
- status = -EINVAL;
- goto out;
- }
-
- watch_mask_value = (uint64_t) args_buff[args_idx];
-
- if (watch_mask_value > 0) {
- /*
- * There is an array of masks.
- * set watch mask base pointer to point on the array base
- * within args_buff
- */
- aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
-
- /* skip over the masks buffer */
- args_idx += sizeof(aw_info.watch_mask) *
- aw_info.num_watch_points;
- } else {
- /* just the NULL mask, set to NULL and skip over it */
- aw_info.watch_mask = NULL;
- args_idx += sizeof(aw_info.watch_mask);
- }
-
- if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
- status = -EINVAL;
- goto out;
- }
-
- /* Currently HSA Event is not supported for DBG */
- aw_info.watch_event = NULL;
-
- mutex_lock(kfd_get_dbgmgr_mutex());
-
- status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
-
- mutex_unlock(kfd_get_dbgmgr_mutex());
-
-out:
- kfree(args_buff);
-
- return status;
+ return -EPERM;
}
/* Parse and generate fixed size data structure for wave control */
static int kfd_ioctl_dbg_wave_control(struct file *filep,
struct kfd_process *p, void *data)
{
- struct kfd_ioctl_dbg_wave_control_args *args = data;
- struct kfd_dev *dev;
- struct dbg_wave_control_info wac_info;
- unsigned char *args_buff;
- uint32_t computed_buff_size;
- long status;
- void __user *cmd_from_user;
- unsigned int args_idx = 0;
-
- memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
-
- /* we use compact form, independent of the packing attribute value */
- computed_buff_size = sizeof(*args) +
- sizeof(wac_info.mode) +
- sizeof(wac_info.operand) +
- sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
- sizeof(wac_info.dbgWave_msg.MemoryVA) +
- sizeof(wac_info.trapId);
-
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
- if (dev->device_info->asic_family == CHIP_CARRIZO) {
- pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
- return -EINVAL;
- }
-
- /* input size must match the computed "compact" size */
- if (args->buf_size_in_bytes != computed_buff_size) {
- pr_debug("size mismatch, computed : actual %u : %u\n",
- args->buf_size_in_bytes, computed_buff_size);
- return -EINVAL;
- }
-
- cmd_from_user = (void __user *) args->content_ptr;
-
- if (cmd_from_user == NULL)
- return -EINVAL;
-
- /* copy the entire buffer from user */
-
- args_buff = memdup_user(cmd_from_user,
- args->buf_size_in_bytes - sizeof(*args));
- if (IS_ERR(args_buff))
- return PTR_ERR(args_buff);
-
- /* move ptr to the start of the "pay-load" area */
- wac_info.process = p;
-
- wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
- args_idx += sizeof(wac_info.operand);
-
- wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
- args_idx += sizeof(wac_info.mode);
-
- wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
- args_idx += sizeof(wac_info.trapId);
-
- wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
- *((uint32_t *)(&args_buff[args_idx]));
- wac_info.dbgWave_msg.MemoryVA = NULL;
-
- mutex_lock(kfd_get_dbgmgr_mutex());
-
- pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
- wac_info.process, wac_info.operand,
- wac_info.mode, wac_info.trapId,
- wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
-
- status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
-
- pr_debug("Returned status of dbg manager is %ld\n", status);
-
- mutex_unlock(kfd_get_dbgmgr_mutex());
-
- kfree(args_buff);
-
- return status;
+ return -EPERM;
}
static int kfd_ioctl_get_clock_counters(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_get_clock_counters_args *args = data;
- struct kfd_dev *dev;
+ struct kfd_process_device *pdd;
- dev = kfd_device_by_id(args->gpu_id);
- if (dev)
+ mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ mutex_unlock(&p->mutex);
+ if (pdd)
/* Reading GPU clock counter from KGD */
- args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd);
+ args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(pdd->dev->adev);
else
/* Node without GPU resource */
args->gpu_clock_counter = 0;
@@ -1007,57 +831,11 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
* through the event_page_offset field.
*/
if (args->event_page_offset) {
- struct kfd_dev *kfd;
- struct kfd_process_device *pdd;
- void *mem, *kern_addr;
- uint64_t size;
-
- kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
- if (!kfd) {
- pr_err("Getting device by id failed in %s\n", __func__);
- return -EINVAL;
- }
-
mutex_lock(&p->mutex);
-
- if (p->signal_page) {
- pr_err("Event page is already set\n");
- err = -EINVAL;
- goto out_unlock;
- }
-
- pdd = kfd_bind_process_to_device(kfd, p);
- if (IS_ERR(pdd)) {
- err = PTR_ERR(pdd);
- goto out_unlock;
- }
-
- mem = kfd_process_device_translate_handle(pdd,
- GET_IDR_HANDLE(args->event_page_offset));
- if (!mem) {
- pr_err("Can't find BO, offset is 0x%llx\n",
- args->event_page_offset);
- err = -EINVAL;
- goto out_unlock;
- }
-
- err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
- mem, &kern_addr, &size);
- if (err) {
- pr_err("Failed to map event page to kernel\n");
- goto out_unlock;
- }
-
- err = kfd_event_page_set(p, kern_addr, size);
- if (err) {
- pr_err("Failed to set event page\n");
- amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->kgd, mem);
- goto out_unlock;
- }
-
- p->signal_handle = args->event_page_offset;
-
+ err = kfd_kmap_event_page(p, args->event_page_offset);
mutex_unlock(&p->mutex);
+ if (err)
+ return err;
}
err = kfd_event_create(filp, p, args->event_type,
@@ -1066,10 +844,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
&args->event_page_offset,
&args->event_slot_index);
- return err;
-
-out_unlock:
- mutex_unlock(&p->mutex);
+ pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__);
return err;
}
@@ -1101,14 +876,11 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_wait_events_args *args = data;
- int err;
- err = kfd_wait_on_events(p, args->num_events,
+ return kfd_wait_on_events(p, args->num_events,
(void __user *)args->events_ptr,
(args->wait_for_all != 0),
- args->timeout, &args->wait_result);
-
- return err;
+ &args->timeout, &args->wait_result);
}
static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
struct kfd_process *p, void *data)
@@ -1118,11 +890,13 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
struct kfd_dev *dev;
long err;
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ err = -EINVAL;
+ goto err_pdd;
+ }
+ dev = pdd->dev;
pdd = kfd_bind_process_to_device(dev, p);
if (IS_ERR(pdd)) {
@@ -1137,11 +911,12 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
dev->kfd2kgd->set_scratch_backing_va(
- dev->kgd, args->va_addr, pdd->qpd.vmid);
+ dev->adev, args->va_addr, pdd->qpd.vmid);
return 0;
bind_process_to_device_fail:
+err_pdd:
mutex_unlock(&p->mutex);
return err;
}
@@ -1150,15 +925,17 @@ static int kfd_ioctl_get_tile_config(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_get_tile_config_args *args = data;
- struct kfd_dev *dev;
+ struct kfd_process_device *pdd;
struct tile_config config;
int err = 0;
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
+ mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ mutex_unlock(&p->mutex);
+ if (!pdd)
return -EINVAL;
- amdgpu_amdkfd_get_tile_config(dev->kgd, &config);
+ amdgpu_amdkfd_get_tile_config(pdd->dev->adev, &config);
args->gb_addr_config = config.gb_addr_config;
args->num_banks = config.num_banks;
@@ -1193,40 +970,37 @@ static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
{
struct kfd_ioctl_acquire_vm_args *args = data;
struct kfd_process_device *pdd;
- struct kfd_dev *dev;
struct file *drm_file;
int ret;
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
drm_file = fget(args->drm_fd);
if (!drm_file)
return -EINVAL;
mutex_lock(&p->mutex);
-
- pdd = kfd_get_process_device_data(dev, p);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
if (!pdd) {
ret = -EINVAL;
- goto err_unlock;
+ goto err_pdd;
}
if (pdd->drm_file) {
ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
- goto err_unlock;
+ goto err_drm_file;
}
ret = kfd_process_device_init_vm(pdd, drm_file);
if (ret)
goto err_unlock;
+
/* On success, the PDD keeps the drm_file reference */
mutex_unlock(&p->mutex);
return 0;
err_unlock:
+err_pdd:
+err_drm_file:
mutex_unlock(&p->mutex);
fput(drm_file);
return ret;
@@ -1234,8 +1008,6 @@ err_unlock:
bool kfd_dev_is_large_bar(struct kfd_dev *dev)
{
- struct kfd_local_mem_info mem_info;
-
if (debug_largebar) {
pr_debug("Simulate large-bar allocation on non large-bar machine\n");
return true;
@@ -1244,13 +1016,25 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)
if (dev->use_iommu_v2)
return false;
- amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
- if (mem_info.local_mem_size_private == 0 &&
- mem_info.local_mem_size_public > 0)
+ if (dev->local_mem_info.local_mem_size_private == 0 &&
+ dev->local_mem_info.local_mem_size_public > 0)
return true;
return false;
}
+static int kfd_ioctl_get_available_memory(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_get_available_memory_args *args = data;
+ struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id);
+
+ if (!pdd)
+ return -EINVAL;
+ args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev);
+ kfd_unlock_pdd(pdd);
+ return 0;
+}
+
static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
struct kfd_process *p, void *data)
{
@@ -1283,19 +1067,23 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
}
mutex_unlock(&p->svms.lock);
#endif
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
+ mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ err = -EINVAL;
+ goto err_pdd;
+ }
+
+ dev = pdd->dev;
if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
!kfd_dev_is_large_bar(dev)) {
pr_err("Alloc host visible vram on small bar is not allowed\n");
- return -EINVAL;
+ err = -EINVAL;
+ goto err_large_bar;
}
- mutex_lock(&p->mutex);
-
pdd = kfd_bind_process_to_device(dev, p);
if (IS_ERR(pdd)) {
err = PTR_ERR(pdd);
@@ -1308,12 +1096,16 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
goto err_unlock;
}
offset = kfd_get_process_doorbells(pdd);
+ if (!offset) {
+ err = -ENOMEM;
+ goto err_unlock;
+ }
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
if (args->size != PAGE_SIZE) {
err = -EINVAL;
goto err_unlock;
}
- offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
+ offset = dev->adev->rmmio_remap.bus_addr;
if (!offset) {
err = -ENOMEM;
goto err_unlock;
@@ -1321,9 +1113,9 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
}
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
- dev->kgd, args->va_addr, args->size,
+ dev->adev, args->va_addr, args->size,
pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
- flags);
+ flags, false);
if (err)
goto err_unlock;
@@ -1353,9 +1145,11 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
return 0;
err_free:
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem,
pdd->drm_priv, NULL);
err_unlock:
+err_pdd:
+err_large_bar:
mutex_unlock(&p->mutex);
return err;
}
@@ -1366,14 +1160,9 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
struct kfd_ioctl_free_memory_of_gpu_args *args = data;
struct kfd_process_device *pdd;
void *mem;
- struct kfd_dev *dev;
int ret;
uint64_t size = 0;
- dev = kfd_device_by_id(GET_GPU_ID(args->handle));
- if (!dev)
- return -EINVAL;
-
mutex_lock(&p->mutex);
/*
* Safeguard to prevent user space from freeing signal BO.
@@ -1385,11 +1174,11 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
goto err_unlock;
}
- pdd = kfd_get_process_device_data(dev, p);
+ pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
if (!pdd) {
pr_err("Process device data doesn't exist\n");
ret = -EINVAL;
- goto err_unlock;
+ goto err_pdd;
}
mem = kfd_process_device_translate_handle(
@@ -1399,7 +1188,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
goto err_unlock;
}
- ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
+ ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev,
(struct kgd_mem *)mem, pdd->drm_priv, &size);
/* If freeing the buffer failed, leave the handle in place for
@@ -1412,6 +1201,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
err_unlock:
+err_pdd:
mutex_unlock(&p->mutex);
return ret;
}
@@ -1422,15 +1212,10 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
struct kfd_ioctl_map_memory_to_gpu_args *args = data;
struct kfd_process_device *pdd, *peer_pdd;
void *mem;
- struct kfd_dev *dev, *peer;
+ struct kfd_dev *dev;
long err = 0;
int i;
uint32_t *devices_arr = NULL;
- bool table_freed = false;
-
- dev = kfd_device_by_id(GET_GPU_ID(args->handle));
- if (!dev)
- return -EINVAL;
if (!args->n_devices) {
pr_debug("Device IDs array empty\n");
@@ -1455,6 +1240,12 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
}
mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
+ if (!pdd) {
+ err = -EINVAL;
+ goto get_process_device_data_failed;
+ }
+ dev = pdd->dev;
pdd = kfd_bind_process_to_device(dev, p);
if (IS_ERR(pdd)) {
@@ -1470,25 +1261,33 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
}
for (i = args->n_success; i < args->n_devices; i++) {
- peer = kfd_device_by_id(devices_arr[i]);
- if (!peer) {
+ peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
+ if (!peer_pdd) {
pr_debug("Getting device by id failed for 0x%x\n",
devices_arr[i]);
err = -EINVAL;
goto get_mem_obj_from_handle_failed;
}
- peer_pdd = kfd_bind_process_to_device(peer, p);
+ peer_pdd = kfd_bind_process_to_device(peer_pdd->dev, p);
if (IS_ERR(peer_pdd)) {
err = PTR_ERR(peer_pdd);
goto get_mem_obj_from_handle_failed;
}
+
err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- peer->kgd, (struct kgd_mem *)mem,
- peer_pdd->drm_priv, &table_freed);
+ peer_pdd->dev->adev, (struct kgd_mem *)mem,
+ peer_pdd->drm_priv);
if (err) {
- pr_err("Failed to map to gpu %d/%d\n",
- i, args->n_devices);
+ struct pci_dev *pdev = peer_pdd->dev->adev->pdev;
+
+ dev_err(dev->adev->dev,
+ "Failed to map peer:%04x:%02x:%02x.%d mem_domain:%d\n",
+ pci_domain_nr(pdev->bus),
+ pdev->bus->number,
+ PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn),
+ ((struct kgd_mem *)mem)->domain);
goto map_memory_to_gpu_failed;
}
args->n_success = i+1;
@@ -1496,28 +1295,24 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
mutex_unlock(&p->mutex);
- err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
+ err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true);
if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n");
goto sync_memory_failed;
}
/* Flush TLBs after waiting for the page table updates to complete */
- if (table_freed) {
- for (i = 0; i < args->n_devices; i++) {
- peer = kfd_device_by_id(devices_arr[i]);
- if (WARN_ON_ONCE(!peer))
- continue;
- peer_pdd = kfd_get_process_device_data(peer, p);
- if (WARN_ON_ONCE(!peer_pdd))
- continue;
- kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
- }
+ for (i = 0; i < args->n_devices; i++) {
+ peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
+ if (WARN_ON_ONCE(!peer_pdd))
+ continue;
+ kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
}
kfree(devices_arr);
return err;
+get_process_device_data_failed:
bind_process_to_device_failed:
get_mem_obj_from_handle_failed:
map_memory_to_gpu_failed:
@@ -1535,14 +1330,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
struct kfd_process_device *pdd, *peer_pdd;
void *mem;
- struct kfd_dev *dev, *peer;
long err = 0;
uint32_t *devices_arr = NULL, i;
- dev = kfd_device_by_id(GET_GPU_ID(args->handle));
- if (!dev)
- return -EINVAL;
-
if (!args->n_devices) {
pr_debug("Device IDs array empty\n");
return -EINVAL;
@@ -1566,8 +1356,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
}
mutex_lock(&p->mutex);
-
- pdd = kfd_get_process_device_data(dev, p);
+ pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
if (!pdd) {
err = -EINVAL;
goto bind_process_to_device_failed;
@@ -1581,19 +1370,13 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
}
for (i = args->n_success; i < args->n_devices; i++) {
- peer = kfd_device_by_id(devices_arr[i]);
- if (!peer) {
- err = -EINVAL;
- goto get_mem_obj_from_handle_failed;
- }
-
- peer_pdd = kfd_get_process_device_data(peer, p);
+ peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
if (!peer_pdd) {
- err = -ENODEV;
+ err = -EINVAL;
goto get_mem_obj_from_handle_failed;
}
err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
- peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
+ peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv);
if (err) {
pr_err("Failed to unmap from gpu %d/%d\n",
i, args->n_devices);
@@ -1603,8 +1386,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
}
mutex_unlock(&p->mutex);
- if (dev->device_info->asic_family == CHIP_ALDEBARAN) {
- err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd,
+ if (kfd_flush_tlb_after_unmap(pdd->dev)) {
+ err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,
(struct kgd_mem *) mem, true);
if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n");
@@ -1613,10 +1396,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
/* Flush TLBs after waiting for the page table updates to complete */
for (i = 0; i < args->n_devices; i++) {
- peer = kfd_device_by_id(devices_arr[i]);
- if (WARN_ON_ONCE(!peer))
- continue;
- peer_pdd = kfd_get_process_device_data(peer, p);
+ peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
if (WARN_ON_ONCE(!peer_pdd))
continue;
kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
@@ -1680,7 +1460,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
{
struct kfd_ioctl_get_dmabuf_info_args *args = data;
struct kfd_dev *dev = NULL;
- struct kgd_dev *dma_buf_kgd;
+ struct amdgpu_device *dmabuf_adev;
void *metadata_buffer = NULL;
uint32_t flags;
unsigned int i;
@@ -1700,15 +1480,15 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
}
/* Get dmabuf info from KGD */
- r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
- &dma_buf_kgd, &args->size,
+ r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd,
+ &dmabuf_adev, &args->size,
metadata_buffer, args->metadata_size,
&args->metadata_size, &flags);
if (r)
goto exit;
/* Reverse-lookup gpu_id from kgd pointer */
- dev = kfd_device_by_kgd(dma_buf_kgd);
+ dev = kfd_device_by_adev(dmabuf_adev);
if (!dev) {
r = -EINVAL;
goto exit;
@@ -1736,29 +1516,29 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
struct kfd_ioctl_import_dmabuf_args *args = data;
struct kfd_process_device *pdd;
struct dma_buf *dmabuf;
- struct kfd_dev *dev;
int idr_handle;
uint64_t size;
void *mem;
int r;
- dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
- return -EINVAL;
-
dmabuf = dma_buf_get(args->dmabuf_fd);
if (IS_ERR(dmabuf))
return PTR_ERR(dmabuf);
mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+ if (!pdd) {
+ r = -EINVAL;
+ goto err_unlock;
+ }
- pdd = kfd_bind_process_to_device(dev, p);
+ pdd = kfd_bind_process_to_device(pdd->dev, p);
if (IS_ERR(pdd)) {
r = PTR_ERR(pdd);
goto err_unlock;
}
- r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
+ r = amdgpu_amdkfd_gpuvm_import_dmabuf(pdd->dev->adev, dmabuf,
args->va_addr, pdd->drm_priv,
(struct kgd_mem **)&mem, &size,
NULL);
@@ -1779,7 +1559,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
return 0;
err_free:
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, (struct kgd_mem *)mem,
pdd->drm_priv, NULL);
err_unlock:
mutex_unlock(&p->mutex);
@@ -1792,15 +1572,20 @@ static int kfd_ioctl_smi_events(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_smi_events_args *args = data;
- struct kfd_dev *dev;
+ struct kfd_process_device *pdd;
- dev = kfd_device_by_id(args->gpuid);
- if (!dev)
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_process_device_data_by_id(p, args->gpuid);
+ mutex_unlock(&p->mutex);
+ if (!pdd)
return -EINVAL;
- return kfd_smi_event_open(dev, &args->anon_fd);
+ return kfd_smi_event_open(pdd->dev, &args->anon_fd);
}
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+
static int kfd_ioctl_set_xnack_mode(struct file *filep,
struct kfd_process *p, void *data)
{
@@ -1811,22 +1596,29 @@ static int kfd_ioctl_set_xnack_mode(struct file *filep,
if (args->xnack_enabled >= 0) {
if (!list_empty(&p->pqm.queues)) {
pr_debug("Process has user queues running\n");
- mutex_unlock(&p->mutex);
- return -EBUSY;
+ r = -EBUSY;
+ goto out_unlock;
}
- if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
+
+ if (p->xnack_enabled == args->xnack_enabled)
+ goto out_unlock;
+
+ if (args->xnack_enabled && !kfd_process_xnack_mode(p, true)) {
r = -EPERM;
- else
- p->xnack_enabled = args->xnack_enabled;
+ goto out_unlock;
+ }
+
+ r = svm_range_switch_xnack_reserve_mem(p, args->xnack_enabled);
} else {
args->xnack_enabled = p->xnack_enabled;
}
+
+out_unlock:
mutex_unlock(&p->mutex);
return r;
}
-#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
{
struct kfd_ioctl_svm_args *args = data;
@@ -1840,22 +1632,1014 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
if (!args->start_addr || !args->size)
return -EINVAL;
- mutex_lock(&p->mutex);
-
r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
args->attrs);
- mutex_unlock(&p->mutex);
-
return r;
}
#else
+static int kfd_ioctl_set_xnack_mode(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ return -EPERM;
+}
static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
{
return -EPERM;
}
#endif
+static int criu_checkpoint_process(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset)
+{
+ struct kfd_criu_process_priv_data process_priv;
+ int ret;
+
+ memset(&process_priv, 0, sizeof(process_priv));
+
+ process_priv.version = KFD_CRIU_PRIV_VERSION;
+ /* For CR, we don't consider negative xnack mode which is used for
+ * querying without changing it, here 0 simply means disabled and 1
+ * means enabled so retry for finding a valid PTE.
+ */
+ process_priv.xnack_mode = p->xnack_enabled ? 1 : 0;
+
+ ret = copy_to_user(user_priv_data + *priv_offset,
+ &process_priv, sizeof(process_priv));
+
+ if (ret) {
+ pr_err("Failed to copy process information to user\n");
+ ret = -EFAULT;
+ }
+
+ *priv_offset += sizeof(process_priv);
+ return ret;
+}
+
+static int criu_checkpoint_devices(struct kfd_process *p,
+ uint32_t num_devices,
+ uint8_t __user *user_addr,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset)
+{
+ struct kfd_criu_device_priv_data *device_priv = NULL;
+ struct kfd_criu_device_bucket *device_buckets = NULL;
+ int ret = 0, i;
+
+ device_buckets = kvzalloc(num_devices * sizeof(*device_buckets), GFP_KERNEL);
+ if (!device_buckets) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ device_priv = kvzalloc(num_devices * sizeof(*device_priv), GFP_KERNEL);
+ if (!device_priv) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ for (i = 0; i < num_devices; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ device_buckets[i].user_gpu_id = pdd->user_gpu_id;
+ device_buckets[i].actual_gpu_id = pdd->dev->id;
+
+ /*
+ * priv_data does not contain useful information for now and is reserved for
+ * future use, so we do not set its contents.
+ */
+ }
+
+ ret = copy_to_user(user_addr, device_buckets, num_devices * sizeof(*device_buckets));
+ if (ret) {
+ pr_err("Failed to copy device information to user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ ret = copy_to_user(user_priv_data + *priv_offset,
+ device_priv,
+ num_devices * sizeof(*device_priv));
+ if (ret) {
+ pr_err("Failed to copy device information to user\n");
+ ret = -EFAULT;
+ }
+ *priv_offset += num_devices * sizeof(*device_priv);
+
+exit:
+ kvfree(device_buckets);
+ kvfree(device_priv);
+ return ret;
+}
+
+static uint32_t get_process_num_bos(struct kfd_process *p)
+{
+ uint32_t num_of_bos = 0;
+ int i;
+
+ /* Run over all PDDs of the process */
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+ void *mem;
+ int id;
+
+ idr_for_each_entry(&pdd->alloc_idr, mem, id) {
+ struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
+
+ if ((uint64_t)kgd_mem->va > pdd->gpuvm_base)
+ num_of_bos++;
+ }
+ }
+ return num_of_bos;
+}
+
+static int criu_get_prime_handle(struct drm_gem_object *gobj, int flags,
+ u32 *shared_fd)
+{
+ struct dma_buf *dmabuf;
+ int ret;
+
+ dmabuf = amdgpu_gem_prime_export(gobj, flags);
+ if (IS_ERR(dmabuf)) {
+ ret = PTR_ERR(dmabuf);
+ pr_err("dmabuf export failed for the BO\n");
+ return ret;
+ }
+
+ ret = dma_buf_fd(dmabuf, flags);
+ if (ret < 0) {
+ pr_err("dmabuf create fd failed, ret:%d\n", ret);
+ goto out_free_dmabuf;
+ }
+
+ *shared_fd = ret;
+ return 0;
+
+out_free_dmabuf:
+ dma_buf_put(dmabuf);
+ return ret;
+}
+
+static int criu_checkpoint_bos(struct kfd_process *p,
+ uint32_t num_bos,
+ uint8_t __user *user_bos,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset)
+{
+ struct kfd_criu_bo_bucket *bo_buckets;
+ struct kfd_criu_bo_priv_data *bo_privs;
+ int ret = 0, pdd_index, bo_index = 0, id;
+ void *mem;
+
+ bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL);
+ if (!bo_buckets)
+ return -ENOMEM;
+
+ bo_privs = kvzalloc(num_bos * sizeof(*bo_privs), GFP_KERNEL);
+ if (!bo_privs) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
+ struct kfd_process_device *pdd = p->pdds[pdd_index];
+ struct amdgpu_bo *dumper_bo;
+ struct kgd_mem *kgd_mem;
+
+ idr_for_each_entry(&pdd->alloc_idr, mem, id) {
+ struct kfd_criu_bo_bucket *bo_bucket;
+ struct kfd_criu_bo_priv_data *bo_priv;
+ int i, dev_idx = 0;
+
+ if (!mem) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ kgd_mem = (struct kgd_mem *)mem;
+ dumper_bo = kgd_mem->bo;
+
+ if ((uint64_t)kgd_mem->va <= pdd->gpuvm_base)
+ continue;
+
+ bo_bucket = &bo_buckets[bo_index];
+ bo_priv = &bo_privs[bo_index];
+
+ bo_bucket->gpu_id = pdd->user_gpu_id;
+ bo_bucket->addr = (uint64_t)kgd_mem->va;
+ bo_bucket->size = amdgpu_bo_size(dumper_bo);
+ bo_bucket->alloc_flags = (uint32_t)kgd_mem->alloc_flags;
+ bo_priv->idr_handle = id;
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ ret = amdgpu_ttm_tt_get_userptr(&dumper_bo->tbo,
+ &bo_priv->user_addr);
+ if (ret) {
+ pr_err("Failed to obtain user address for user-pointer bo\n");
+ goto exit;
+ }
+ }
+ if (bo_bucket->alloc_flags
+ & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
+ ret = criu_get_prime_handle(&dumper_bo->tbo.base,
+ bo_bucket->alloc_flags &
+ KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
+ &bo_bucket->dmabuf_fd);
+ if (ret)
+ goto exit;
+ } else {
+ bo_bucket->dmabuf_fd = KFD_INVALID_FD;
+ }
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
+ bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |
+ KFD_MMAP_GPU_ID(pdd->dev->id);
+ else if (bo_bucket->alloc_flags &
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
+ bo_bucket->offset = KFD_MMAP_TYPE_MMIO |
+ KFD_MMAP_GPU_ID(pdd->dev->id);
+ else
+ bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo);
+
+ for (i = 0; i < p->n_pdds; i++) {
+ if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->dev->adev, kgd_mem))
+ bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id;
+ }
+
+ pr_debug("bo_size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n"
+ "gpu_id = 0x%x alloc_flags = 0x%x idr_handle = 0x%x",
+ bo_bucket->size,
+ bo_bucket->addr,
+ bo_bucket->offset,
+ bo_bucket->gpu_id,
+ bo_bucket->alloc_flags,
+ bo_priv->idr_handle);
+ bo_index++;
+ }
+ }
+
+ ret = copy_to_user(user_bos, bo_buckets, num_bos * sizeof(*bo_buckets));
+ if (ret) {
+ pr_err("Failed to copy BO information to user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ ret = copy_to_user(user_priv_data + *priv_offset, bo_privs, num_bos * sizeof(*bo_privs));
+ if (ret) {
+ pr_err("Failed to copy BO priv information to user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ *priv_offset += num_bos * sizeof(*bo_privs);
+
+exit:
+ while (ret && bo_index--) {
+ if (bo_buckets[bo_index].alloc_flags
+ & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
+ close_fd(bo_buckets[bo_index].dmabuf_fd);
+ }
+
+ kvfree(bo_buckets);
+ kvfree(bo_privs);
+ return ret;
+}
+
+static int criu_get_process_object_info(struct kfd_process *p,
+ uint32_t *num_devices,
+ uint32_t *num_bos,
+ uint32_t *num_objects,
+ uint64_t *objs_priv_size)
+{
+ uint64_t queues_priv_data_size, svm_priv_data_size, priv_size;
+ uint32_t num_queues, num_events, num_svm_ranges;
+ int ret;
+
+ *num_devices = p->n_pdds;
+ *num_bos = get_process_num_bos(p);
+
+ ret = kfd_process_get_queue_info(p, &num_queues, &queues_priv_data_size);
+ if (ret)
+ return ret;
+
+ num_events = kfd_get_num_events(p);
+
+ ret = svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);
+ if (ret)
+ return ret;
+
+ *num_objects = num_queues + num_events + num_svm_ranges;
+
+ if (objs_priv_size) {
+ priv_size = sizeof(struct kfd_criu_process_priv_data);
+ priv_size += *num_devices * sizeof(struct kfd_criu_device_priv_data);
+ priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);
+ priv_size += queues_priv_data_size;
+ priv_size += num_events * sizeof(struct kfd_criu_event_priv_data);
+ priv_size += svm_priv_data_size;
+ *objs_priv_size = priv_size;
+ }
+ return 0;
+}
+
+static int criu_checkpoint(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ int ret;
+ uint32_t num_devices, num_bos, num_objects;
+ uint64_t priv_size, priv_offset = 0, bo_priv_offset;
+
+ if (!args->devices || !args->bos || !args->priv_data)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ if (!p->n_pdds) {
+ pr_err("No pdd for given process\n");
+ ret = -ENODEV;
+ goto exit_unlock;
+ }
+
+ /* Confirm all process queues are evicted */
+ if (!p->queues_paused) {
+ pr_err("Cannot dump process when queues are not in evicted state\n");
+ /* CRIU plugin did not call op PROCESS_INFO before checkpointing */
+ ret = -EINVAL;
+ goto exit_unlock;
+ }
+
+ ret = criu_get_process_object_info(p, &num_devices, &num_bos, &num_objects, &priv_size);
+ if (ret)
+ goto exit_unlock;
+
+ if (num_devices != args->num_devices ||
+ num_bos != args->num_bos ||
+ num_objects != args->num_objects ||
+ priv_size != args->priv_data_size) {
+
+ ret = -EINVAL;
+ goto exit_unlock;
+ }
+
+ /* each function will store private data inside priv_data and adjust priv_offset */
+ ret = criu_checkpoint_process(p, (uint8_t __user *)args->priv_data, &priv_offset);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_checkpoint_devices(p, num_devices, (uint8_t __user *)args->devices,
+ (uint8_t __user *)args->priv_data, &priv_offset);
+ if (ret)
+ goto exit_unlock;
+
+ /* Leave room for BOs in the private data. They need to be restored
+ * before events, but we checkpoint them last to simplify the error
+ * handling.
+ */
+ bo_priv_offset = priv_offset;
+ priv_offset += num_bos * sizeof(struct kfd_criu_bo_priv_data);
+
+ if (num_objects) {
+ ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,
+ &priv_offset);
+ if (ret)
+ goto exit_unlock;
+
+ ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,
+ &priv_offset);
+ if (ret)
+ goto exit_unlock;
+
+ ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);
+ if (ret)
+ goto exit_unlock;
+ }
+
+ /* This must be the last thing in this function that can fail.
+ * Otherwise we leak dmabuf file descriptors.
+ */
+ ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
+ (uint8_t __user *)args->priv_data, &bo_priv_offset);
+
+exit_unlock:
+ mutex_unlock(&p->mutex);
+ if (ret)
+ pr_err("Failed to dump CRIU ret:%d\n", ret);
+ else
+ pr_debug("CRIU dump ret:%d\n", ret);
+
+ return ret;
+}
+
+static int criu_restore_process(struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ int ret = 0;
+ struct kfd_criu_process_priv_data process_priv;
+
+ if (*priv_offset + sizeof(process_priv) > max_priv_data_size)
+ return -EINVAL;
+
+ ret = copy_from_user(&process_priv,
+ (void __user *)(args->priv_data + *priv_offset),
+ sizeof(process_priv));
+ if (ret) {
+ pr_err("Failed to copy process private information from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+ *priv_offset += sizeof(process_priv);
+
+ if (process_priv.version != KFD_CRIU_PRIV_VERSION) {
+ pr_err("Invalid CRIU API version (checkpointed:%d current:%d)\n",
+ process_priv.version, KFD_CRIU_PRIV_VERSION);
+ return -EINVAL;
+ }
+
+ pr_debug("Setting XNACK mode\n");
+ if (process_priv.xnack_mode && !kfd_process_xnack_mode(p, true)) {
+ pr_err("xnack mode cannot be set\n");
+ ret = -EPERM;
+ goto exit;
+ } else {
+ pr_debug("set xnack mode: %d\n", process_priv.xnack_mode);
+ p->xnack_enabled = process_priv.xnack_mode;
+ }
+
+exit:
+ return ret;
+}
+
+static int criu_restore_devices(struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ struct kfd_criu_device_bucket *device_buckets;
+ struct kfd_criu_device_priv_data *device_privs;
+ int ret = 0;
+ uint32_t i;
+
+ if (args->num_devices != p->n_pdds)
+ return -EINVAL;
+
+ if (*priv_offset + (args->num_devices * sizeof(*device_privs)) > max_priv_data_size)
+ return -EINVAL;
+
+ device_buckets = kmalloc_array(args->num_devices, sizeof(*device_buckets), GFP_KERNEL);
+ if (!device_buckets)
+ return -ENOMEM;
+
+ ret = copy_from_user(device_buckets, (void __user *)args->devices,
+ args->num_devices * sizeof(*device_buckets));
+ if (ret) {
+ pr_err("Failed to copy devices buckets from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ for (i = 0; i < args->num_devices; i++) {
+ struct kfd_dev *dev;
+ struct kfd_process_device *pdd;
+ struct file *drm_file;
+
+ /* device private data is not currently used */
+
+ if (!device_buckets[i].user_gpu_id) {
+ pr_err("Invalid user gpu_id\n");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ dev = kfd_device_by_id(device_buckets[i].actual_gpu_id);
+ if (!dev) {
+ pr_err("Failed to find device with gpu_id = %x\n",
+ device_buckets[i].actual_gpu_id);
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd) {
+ pr_err("Failed to get pdd for gpu_id = %x\n",
+ device_buckets[i].actual_gpu_id);
+ ret = -EINVAL;
+ goto exit;
+ }
+ pdd->user_gpu_id = device_buckets[i].user_gpu_id;
+
+ drm_file = fget(device_buckets[i].drm_fd);
+ if (!drm_file) {
+ pr_err("Invalid render node file descriptor sent from plugin (%d)\n",
+ device_buckets[i].drm_fd);
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (pdd->drm_file) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ /* create the vm using render nodes for kfd pdd */
+ if (kfd_process_device_init_vm(pdd, drm_file)) {
+ pr_err("could not init vm for given pdd\n");
+ /* On success, the PDD keeps the drm_file reference */
+ fput(drm_file);
+ ret = -EINVAL;
+ goto exit;
+ }
+ /*
+ * pdd now already has the vm bound to render node so below api won't create a new
+ * exclusive kfd mapping but use existing one with renderDXXX but is still needed
+ * for iommu v2 binding and runtime pm.
+ */
+ pdd = kfd_bind_process_to_device(dev, p);
+ if (IS_ERR(pdd)) {
+ ret = PTR_ERR(pdd);
+ goto exit;
+ }
+
+ if (!pdd->doorbell_index &&
+ kfd_alloc_process_doorbells(pdd->dev, &pdd->doorbell_index) < 0) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+ }
+
+ /*
+ * We are not copying device private data from user as we are not using the data for now,
+ * but we still adjust for its private data.
+ */
+ *priv_offset += args->num_devices * sizeof(*device_privs);
+
+exit:
+ kfree(device_buckets);
+ return ret;
+}
+
+static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
+ struct kfd_criu_bo_bucket *bo_bucket,
+ struct kfd_criu_bo_priv_data *bo_priv,
+ struct kgd_mem **kgd_mem)
+{
+ int idr_handle;
+ int ret;
+ const bool criu_resume = true;
+ u64 offset;
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
+ if (bo_bucket->size != kfd_doorbell_process_slice(pdd->dev))
+ return -EINVAL;
+
+ offset = kfd_get_process_doorbells(pdd);
+ if (!offset)
+ return -ENOMEM;
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+ /* MMIO BOs need remapped bus address */
+ if (bo_bucket->size != PAGE_SIZE) {
+ pr_err("Invalid page size\n");
+ return -EINVAL;
+ }
+ offset = pdd->dev->adev->rmmio_remap.bus_addr;
+ if (!offset) {
+ pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n");
+ return -ENOMEM;
+ }
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ offset = bo_priv->user_addr;
+ }
+ /* Create the BO */
+ ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr,
+ bo_bucket->size, pdd->drm_priv, kgd_mem,
+ &offset, bo_bucket->alloc_flags, criu_resume);
+ if (ret) {
+ pr_err("Could not create the BO\n");
+ return ret;
+ }
+ pr_debug("New BO created: size:0x%llx addr:0x%llx offset:0x%llx\n",
+ bo_bucket->size, bo_bucket->addr, offset);
+
+ /* Restore previous IDR handle */
+ pr_debug("Restoring old IDR handle for the BO");
+ idr_handle = idr_alloc(&pdd->alloc_idr, *kgd_mem, bo_priv->idr_handle,
+ bo_priv->idr_handle + 1, GFP_KERNEL);
+
+ if (idr_handle < 0) {
+ pr_err("Could not allocate idr\n");
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, *kgd_mem, pdd->drm_priv,
+ NULL);
+ return -ENOMEM;
+ }
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
+ bo_bucket->restored_offset = KFD_MMAP_TYPE_DOORBELL | KFD_MMAP_GPU_ID(pdd->dev->id);
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+ bo_bucket->restored_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(pdd->dev->id);
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
+ bo_bucket->restored_offset = offset;
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ bo_bucket->restored_offset = offset;
+ /* Update the VRAM usage count */
+ WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
+ }
+ return 0;
+}
+
+static int criu_restore_bo(struct kfd_process *p,
+ struct kfd_criu_bo_bucket *bo_bucket,
+ struct kfd_criu_bo_priv_data *bo_priv)
+{
+ struct kfd_process_device *pdd;
+ struct kgd_mem *kgd_mem;
+ int ret;
+ int j;
+
+ pr_debug("Restoring BO size:0x%llx addr:0x%llx gpu_id:0x%x flags:0x%x idr_handle:0x%x\n",
+ bo_bucket->size, bo_bucket->addr, bo_bucket->gpu_id, bo_bucket->alloc_flags,
+ bo_priv->idr_handle);
+
+ pdd = kfd_process_device_data_by_id(p, bo_bucket->gpu_id);
+ if (!pdd) {
+ pr_err("Failed to get pdd\n");
+ return -ENODEV;
+ }
+
+ ret = criu_restore_memory_of_gpu(pdd, bo_bucket, bo_priv, &kgd_mem);
+ if (ret)
+ return ret;
+
+ /* now map these BOs to GPU/s */
+ for (j = 0; j < p->n_pdds; j++) {
+ struct kfd_dev *peer;
+ struct kfd_process_device *peer_pdd;
+
+ if (!bo_priv->mapped_gpuids[j])
+ break;
+
+ peer_pdd = kfd_process_device_data_by_id(p, bo_priv->mapped_gpuids[j]);
+ if (!peer_pdd)
+ return -EINVAL;
+
+ peer = peer_pdd->dev;
+
+ peer_pdd = kfd_bind_process_to_device(peer, p);
+ if (IS_ERR(peer_pdd))
+ return PTR_ERR(peer_pdd);
+
+ ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem,
+ peer_pdd->drm_priv);
+ if (ret) {
+ pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds);
+ return ret;
+ }
+ }
+
+ pr_debug("map memory was successful for the BO\n");
+ /* create the dmabuf object and export the bo */
+ if (bo_bucket->alloc_flags
+ & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
+ ret = criu_get_prime_handle(&kgd_mem->bo->tbo.base, DRM_RDWR,
+ &bo_bucket->dmabuf_fd);
+ if (ret)
+ return ret;
+ } else {
+ bo_bucket->dmabuf_fd = KFD_INVALID_FD;
+ }
+
+ return 0;
+}
+
+static int criu_restore_bos(struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ struct kfd_criu_bo_bucket *bo_buckets = NULL;
+ struct kfd_criu_bo_priv_data *bo_privs = NULL;
+ int ret = 0;
+ uint32_t i = 0;
+
+ if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)
+ return -EINVAL;
+
+ /* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */
+ amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info);
+
+ bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL);
+ if (!bo_buckets)
+ return -ENOMEM;
+
+ ret = copy_from_user(bo_buckets, (void __user *)args->bos,
+ args->num_bos * sizeof(*bo_buckets));
+ if (ret) {
+ pr_err("Failed to copy BOs information from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);
+ if (!bo_privs) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,
+ args->num_bos * sizeof(*bo_privs));
+ if (ret) {
+ pr_err("Failed to copy BOs information from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+ *priv_offset += args->num_bos * sizeof(*bo_privs);
+
+ /* Create and map new BOs */
+ for (; i < args->num_bos; i++) {
+ ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]);
+ if (ret) {
+ pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
+ goto exit;
+ }
+ } /* done */
+
+ /* Copy only the buckets back so user can read bo_buckets[N].restored_offset */
+ ret = copy_to_user((void __user *)args->bos,
+ bo_buckets,
+ (args->num_bos * sizeof(*bo_buckets)));
+ if (ret)
+ ret = -EFAULT;
+
+exit:
+ while (ret && i--) {
+ if (bo_buckets[i].alloc_flags
+ & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
+ close_fd(bo_buckets[i].dmabuf_fd);
+ }
+ kvfree(bo_buckets);
+ kvfree(bo_privs);
+ return ret;
+}
+
+static int criu_restore_objects(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ int ret = 0;
+ uint32_t i;
+
+ BUILD_BUG_ON(offsetof(struct kfd_criu_queue_priv_data, object_type));
+ BUILD_BUG_ON(offsetof(struct kfd_criu_event_priv_data, object_type));
+ BUILD_BUG_ON(offsetof(struct kfd_criu_svm_range_priv_data, object_type));
+
+ for (i = 0; i < args->num_objects; i++) {
+ uint32_t object_type;
+
+ if (*priv_offset + sizeof(object_type) > max_priv_data_size) {
+ pr_err("Invalid private data size\n");
+ return -EINVAL;
+ }
+
+ ret = get_user(object_type, (uint32_t __user *)(args->priv_data + *priv_offset));
+ if (ret) {
+ pr_err("Failed to copy private information from user\n");
+ goto exit;
+ }
+
+ switch (object_type) {
+ case KFD_CRIU_OBJECT_TYPE_QUEUE:
+ ret = kfd_criu_restore_queue(p, (uint8_t __user *)args->priv_data,
+ priv_offset, max_priv_data_size);
+ if (ret)
+ goto exit;
+ break;
+ case KFD_CRIU_OBJECT_TYPE_EVENT:
+ ret = kfd_criu_restore_event(filep, p, (uint8_t __user *)args->priv_data,
+ priv_offset, max_priv_data_size);
+ if (ret)
+ goto exit;
+ break;
+ case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
+ ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data,
+ priv_offset, max_priv_data_size);
+ if (ret)
+ goto exit;
+ break;
+ default:
+ pr_err("Invalid object type:%u at index:%d\n", object_type, i);
+ ret = -EINVAL;
+ goto exit;
+ }
+ }
+exit:
+ return ret;
+}
+
+static int criu_restore(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ uint64_t priv_offset = 0;
+ int ret = 0;
+
+ pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",
+ args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);
+
+ if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size ||
+ !args->num_devices || !args->num_bos)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ /*
+ * Set the process to evicted state to avoid running any new queues before all the memory
+ * mappings are ready.
+ */
+ ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE);
+ if (ret)
+ goto exit_unlock;
+
+ /* Each function will adjust priv_offset based on how many bytes they consumed */
+ ret = criu_restore_process(p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_restore_devices(p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_restore_bos(p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_restore_objects(filep, p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ if (priv_offset != args->priv_data_size) {
+ pr_err("Invalid private data size\n");
+ ret = -EINVAL;
+ }
+
+exit_unlock:
+ mutex_unlock(&p->mutex);
+ if (ret)
+ pr_err("Failed to restore CRIU ret:%d\n", ret);
+ else
+ pr_debug("CRIU restore successful\n");
+
+ return ret;
+}
+
+static int criu_unpause(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ int ret;
+
+ mutex_lock(&p->mutex);
+
+ if (!p->queues_paused) {
+ mutex_unlock(&p->mutex);
+ return -EINVAL;
+ }
+
+ ret = kfd_process_restore_queues(p);
+ if (ret)
+ pr_err("Failed to unpause queues ret:%d\n", ret);
+ else
+ p->queues_paused = false;
+
+ mutex_unlock(&p->mutex);
+
+ return ret;
+}
+
+static int criu_resume(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ struct kfd_process *target = NULL;
+ struct pid *pid = NULL;
+ int ret = 0;
+
+ pr_debug("Inside %s, target pid for criu restore: %d\n", __func__,
+ args->pid);
+
+ pid = find_get_pid(args->pid);
+ if (!pid) {
+ pr_err("Cannot find pid info for %i\n", args->pid);
+ return -ESRCH;
+ }
+
+ pr_debug("calling kfd_lookup_process_by_pid\n");
+ target = kfd_lookup_process_by_pid(pid);
+
+ put_pid(pid);
+
+ if (!target) {
+ pr_debug("Cannot find process info for %i\n", args->pid);
+ return -ESRCH;
+ }
+
+ mutex_lock(&target->mutex);
+ ret = kfd_criu_resume_svm(target);
+ if (ret) {
+ pr_err("kfd_criu_resume_svm failed for %i\n", args->pid);
+ goto exit;
+ }
+
+ ret = amdgpu_amdkfd_criu_resume(target->kgd_process_info);
+ if (ret)
+ pr_err("amdgpu_amdkfd_criu_resume failed for %i\n", args->pid);
+
+exit:
+ mutex_unlock(&target->mutex);
+
+ kfd_unref_process(target);
+ return ret;
+}
+
+static int criu_process_info(struct file *filep,
+ struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args)
+{
+ int ret = 0;
+
+ mutex_lock(&p->mutex);
+
+ if (!p->n_pdds) {
+ pr_err("No pdd for given process\n");
+ ret = -ENODEV;
+ goto err_unlock;
+ }
+
+ ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT);
+ if (ret)
+ goto err_unlock;
+
+ p->queues_paused = true;
+
+ args->pid = task_pid_nr_ns(p->lead_thread,
+ task_active_pid_ns(p->lead_thread));
+
+ ret = criu_get_process_object_info(p, &args->num_devices, &args->num_bos,
+ &args->num_objects, &args->priv_data_size);
+ if (ret)
+ goto err_unlock;
+
+ dev_dbg(kfd_device, "Num of devices:%u bos:%u objects:%u priv_data_size:%lld\n",
+ args->num_devices, args->num_bos, args->num_objects,
+ args->priv_data_size);
+
+err_unlock:
+ if (ret) {
+ kfd_process_restore_queues(p);
+ p->queues_paused = false;
+ }
+ mutex_unlock(&p->mutex);
+ return ret;
+}
+
+static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_criu_args *args = data;
+ int ret;
+
+ dev_dbg(kfd_device, "CRIU operation: %d\n", args->op);
+ switch (args->op) {
+ case KFD_CRIU_OP_PROCESS_INFO:
+ ret = criu_process_info(filep, p, args);
+ break;
+ case KFD_CRIU_OP_CHECKPOINT:
+ ret = criu_checkpoint(filep, p, args);
+ break;
+ case KFD_CRIU_OP_UNPAUSE:
+ ret = criu_unpause(filep, p, args);
+ break;
+ case KFD_CRIU_OP_RESTORE:
+ ret = criu_restore(filep, p, args);
+ break;
+ case KFD_CRIU_OP_RESUME:
+ ret = criu_resume(filep, p, args);
+ break;
+ default:
+ dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op);
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret)
+ dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, ret);
+
+ return ret;
+}
+
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
.cmd_drv = 0, .name = #ioctl}
@@ -1898,16 +2682,16 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
kfd_ioctl_wait_events, 0),
- AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER_DEPRECATED,
kfd_ioctl_dbg_register, 0),
- AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED,
kfd_ioctl_dbg_unregister, 0),
- AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED,
kfd_ioctl_dbg_address_watch, 0),
- AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED,
kfd_ioctl_dbg_wave_control, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
@@ -1959,6 +2743,12 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
kfd_ioctl_set_xnack_mode, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,
+ kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,
+ kfd_ioctl_get_available_memory, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
@@ -1973,6 +2763,7 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
char *kdata = NULL;
unsigned int usize, asize;
int retcode = -EINVAL;
+ bool ptrace_attached = false;
if (nr >= AMDKFD_CORE_IOCTL_COUNT)
goto err_i1;
@@ -1998,7 +2789,15 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
* processes need to create their own KFD device context.
*/
process = filep->private_data;
- if (process->lead_thread != current->group_leader) {
+
+ rcu_read_lock();
+ if ((ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE) &&
+ ptrace_parent(process->lead_thread) == current)
+ ptrace_attached = true;
+ rcu_read_unlock();
+
+ if (process->lead_thread != current->group_leader
+ && !ptrace_attached) {
dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
retcode = -EBADF;
goto err_i1;
@@ -2013,6 +2812,19 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
goto err_i1;
}
+ /*
+ * Versions of docker shipped in Ubuntu 18.xx and 20.xx do not support
+ * CAP_CHECKPOINT_RESTORE, so we also allow access if CAP_SYS_ADMIN as CAP_SYS_ADMIN is a
+ * more priviledged access.
+ */
+ if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) {
+ if (!capable(CAP_CHECKPOINT_RESTORE) &&
+ !capable(CAP_SYS_ADMIN)) {
+ retcode = -EACCES;
+ goto err_i1;
+ }
+ }
+
if (cmd & (IOC_IN | IOC_OUT)) {
if (asize <= sizeof(stack_kdata)) {
kdata = stack_kdata;
@@ -2061,12 +2873,11 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
struct vm_area_struct *vma)
{
phys_addr_t address;
- int ret;
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
- address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
+ address = dev->adev->rmmio_remap.bus_addr;
vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
VM_DONTDUMP | VM_PFNMAP;
@@ -2081,12 +2892,11 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
process->pasid, (unsigned long long) vma->vm_start,
address, vma->vm_flags, PAGE_SIZE);
- ret = io_remap_pfn_range(vma,
+ return io_remap_pfn_range(vma,
vma->vm_start,
address >> PAGE_SHIFT,
PAGE_SIZE,
vma->vm_page_prot);
- return ret;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index cfedfb1e8596..8bfdfd062ff6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2015-2017 Advanced Micro Devices, Inc.
+ * Copyright 2015-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -794,6 +795,102 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
},
};
+static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 256,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+};
+
+static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 256,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+};
+
static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
struct crat_subtype_computeunit *cu)
{
@@ -1039,7 +1136,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
props->rec_transfer_size =
iolink->recommended_transfer_size;
- dev->io_link_count++;
dev->node_props.io_links_count++;
list_add_tail(&props->list, &dev->io_link_props);
break;
@@ -1055,15 +1151,17 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
* table, add corresponded reversed direction link now.
*/
if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {
- to_dev = kfd_topology_device_by_proximity_domain(id_to);
+ to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to);
if (!to_dev)
return -ENODEV;
/* same everything but the other direction */
props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
+ if (!props2)
+ return -ENOMEM;
+
props2->node_from = id_to;
props2->node_to = id_from;
props2->kobj = NULL;
- to_dev->io_link_count++;
to_dev->node_props.io_links_count++;
list_add_tail(&props2->list, &to_dev->io_link_props);
}
@@ -1311,6 +1409,80 @@ static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
return 1;
}
+#define KFD_MAX_CACHE_TYPES 6
+
+static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
+ struct kfd_gpu_cache_info *pcache_info)
+{
+ struct amdgpu_device *adev = kdev->adev;
+ int i = 0;
+
+ /* TCP L1 Cache per CU */
+ if (adev->gfx.config.gc_tcp_l1_size) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
+ i++;
+ }
+ /* Scalar L1 Instruction Cache per SQC */
+ if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
+ pcache_info[i].cache_size =
+ adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
+ i++;
+ }
+ /* Scalar L1 Data Cache per SQC */
+ if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
+ i++;
+ }
+ /* GL1 Data Cache per SA */
+ if (adev->gfx.config.gc_gl1c_per_sa &&
+ adev->gfx.config.gc_gl1c_size_per_instance) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *
+ adev->gfx.config.gc_gl1c_size_per_instance;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ i++;
+ }
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ if (adev->gfx.config.gc_gl2c_per_gpu) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;
+ pcache_info[i].cache_level = 2;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ i++;
+ }
+ /* L3 Data Cache per GPU */
+ if (adev->gmc.mall_size) {
+ pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
+ pcache_info[i].cache_level = 3;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ i++;
+ }
+ return i;
+}
+
/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
* tables
*
@@ -1332,6 +1504,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
int *num_of_entries)
{
struct kfd_gpu_cache_info *pcache_info;
+ struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
int num_of_cache_types = 0;
int i, j, k;
int ct = 0;
@@ -1340,7 +1513,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
int ret;
unsigned int num_cu_shared;
- switch (kdev->device_info->asic_family) {
+ switch (kdev->adev->asic_type) {
case CHIP_KAVERI:
pcache_info = kaveri_cache_info;
num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
@@ -1377,67 +1550,88 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
pcache_info = vegam_cache_info;
num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
break;
- case CHIP_VEGA10:
- pcache_info = vega10_cache_info;
- num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
- break;
- case CHIP_VEGA12:
- pcache_info = vega12_cache_info;
- num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
- break;
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
- pcache_info = vega20_cache_info;
- num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
- break;
- case CHIP_ALDEBARAN:
- pcache_info = aldebaran_cache_info;
- num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
- break;
- case CHIP_RAVEN:
- pcache_info = raven_cache_info;
- num_of_cache_types = ARRAY_SIZE(raven_cache_info);
- break;
- case CHIP_RENOIR:
- pcache_info = renoir_cache_info;
- num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
- break;
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_CYAN_SKILLFISH:
- pcache_info = navi10_cache_info;
- num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
- break;
- case CHIP_NAVI14:
- pcache_info = navi14_cache_info;
- num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
- break;
- case CHIP_SIENNA_CICHLID:
- pcache_info = sienna_cichlid_cache_info;
- num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
- break;
- case CHIP_NAVY_FLOUNDER:
- pcache_info = navy_flounder_cache_info;
- num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
- break;
- case CHIP_DIMGREY_CAVEFISH:
- pcache_info = dimgrey_cavefish_cache_info;
- num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
- break;
- case CHIP_VANGOGH:
- pcache_info = vangogh_cache_info;
- num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
- break;
- case CHIP_BEIGE_GOBY:
- pcache_info = beige_goby_cache_info;
- num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
- break;
- case CHIP_YELLOW_CARP:
- pcache_info = yellow_carp_cache_info;
- num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
- break;
default:
- return -EINVAL;
+ switch (KFD_GC_VERSION(kdev)) {
+ case IP_VERSION(9, 0, 1):
+ pcache_info = vega10_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+ break;
+ case IP_VERSION(9, 2, 1):
+ pcache_info = vega12_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
+ break;
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ pcache_info = vega20_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
+ break;
+ case IP_VERSION(9, 4, 2):
+ pcache_info = aldebaran_cache_info;
+ num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
+ break;
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 2):
+ pcache_info = raven_cache_info;
+ num_of_cache_types = ARRAY_SIZE(raven_cache_info);
+ break;
+ case IP_VERSION(9, 3, 0):
+ pcache_info = renoir_cache_info;
+ num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
+ break;
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ pcache_info = navi10_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
+ break;
+ case IP_VERSION(10, 1, 1):
+ pcache_info = navi14_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
+ break;
+ case IP_VERSION(10, 3, 0):
+ pcache_info = sienna_cichlid_cache_info;
+ num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
+ break;
+ case IP_VERSION(10, 3, 2):
+ pcache_info = navy_flounder_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
+ break;
+ case IP_VERSION(10, 3, 4):
+ pcache_info = dimgrey_cavefish_cache_info;
+ num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
+ break;
+ case IP_VERSION(10, 3, 1):
+ pcache_info = vangogh_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
+ break;
+ case IP_VERSION(10, 3, 5):
+ pcache_info = beige_goby_cache_info;
+ num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
+ break;
+ case IP_VERSION(10, 3, 3):
+ pcache_info = yellow_carp_cache_info;
+ num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
+ break;
+ case IP_VERSION(10, 3, 6):
+ pcache_info = gc_10_3_6_cache_info;
+ num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
+ break;
+ case IP_VERSION(10, 3, 7):
+ pcache_info = gfx1037_cache_info;
+ num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info);
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ pcache_info = cache_info;
+ num_of_cache_types =
+ kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info);
+ break;
+ default:
+ return -EINVAL;
+ }
}
*size_filled = 0;
@@ -1560,7 +1754,7 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
/* Fetch the CRAT table from ACPI */
status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
if (status == AE_NOT_FOUND) {
- pr_warn("CRAT table not found\n");
+ pr_info("CRAT table not found\n");
return -ENODATA;
} else if (ACPI_FAILURE(status)) {
const char *err = acpi_format_exception(status);
@@ -1963,8 +2157,6 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
struct crat_subtype_iolink *sub_type_hdr,
uint32_t proximity_domain)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
-
*avail_size -= sizeof(struct crat_subtype_iolink);
if (*avail_size < 0)
return -ENOMEM;
@@ -1981,7 +2173,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
/* Fill in IOLINK subtype.
* TODO: Fill-in other fields of iolink subtype
*/
- if (adev->gmc.xgmi.connected_to_cpu) {
+ if (kdev->adev->gmc.xgmi.connected_to_cpu) {
/*
* with host gpu xgmi link, host can access gpu memory whether
* or not pcie bar type is large, so always create bidirectional
@@ -1990,19 +2182,19 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
sub_type_hdr->num_hops_xgmi = 1;
- if (adev->asic_type == CHIP_ALDEBARAN) {
+ if (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 2)) {
sub_type_hdr->minimum_bandwidth_mbs =
amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
- kdev->kgd, NULL, true);
+ kdev->adev, NULL, true);
sub_type_hdr->maximum_bandwidth_mbs =
sub_type_hdr->minimum_bandwidth_mbs;
}
} else {
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
sub_type_hdr->minimum_bandwidth_mbs =
- amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, true);
+ amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true);
sub_type_hdr->maximum_bandwidth_mbs =
- amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, false);
+ amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false);
}
sub_type_hdr->proximity_domain_from = proximity_domain;
@@ -2044,11 +2236,11 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
sub_type_hdr->proximity_domain_from = proximity_domain_from;
sub_type_hdr->proximity_domain_to = proximity_domain_to;
sub_type_hdr->num_hops_xgmi =
- amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd);
+ amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);
sub_type_hdr->maximum_bandwidth_mbs =
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, peer_kdev->kgd, false);
+ amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, peer_kdev->adev, false);
sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, NULL, true) : 0;
+ amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;
return 0;
}
@@ -2114,7 +2306,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
cu->proximity_domain = proximity_domain;
- amdgpu_amdkfd_get_cu_info(kdev->kgd, &cu_info);
+ amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
cu->num_simd_per_cu = cu_info.simd_per_cu;
cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;
cu->max_waves_simd = cu_info.max_waves_per_simd;
@@ -2145,7 +2337,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* report the total FB size (public+private) as a single
* private heap.
*/
- amdgpu_amdkfd_get_local_mem_info(kdev->kgd, &local_mem_info);
+ local_mem_info = kdev->local_mem_info;
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
sub_type_hdr->length);
@@ -2194,7 +2386,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
/* Fill in Subtype: IO_LINKS
* Only direct links are added here which is Link from GPU to
- * to its NUMA node. Indirect links are added by userspace.
+ * its NUMA node. Indirect links are added by userspace.
*/
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
cache_mem_filled);
@@ -2218,7 +2410,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
*/
if (kdev->hive_id) {
for (nid = 0; nid < proximity_domain; ++nid) {
- peer_dev = kfd_topology_device_by_proximity_domain(nid);
+ peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid);
if (!peer_dev->gpu)
continue;
if (peer_dev->gpu->hive_id != kdev->hive_id)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index d54ceebd346b..482ba84a728d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -232,7 +233,7 @@ struct crat_subtype_ccompute {
#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2)
#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3)
#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4)
-#define CRAT_IOLINK_FLAGS_BI_DIRECTIONAL (1 << 31)
+#define CRAT_IOLINK_FLAGS_BI_DIRECTIONAL (1 << 31)
#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0x7fffffe0
/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
deleted file mode 100644
index 159add0f5aaa..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ /dev/null
@@ -1,845 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/log2.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/mutex.h>
-#include <linux/device.h>
-
-#include "kfd_pm4_headers.h"
-#include "kfd_pm4_headers_diq.h"
-#include "kfd_kernel_queue.h"
-#include "kfd_priv.h"
-#include "kfd_pm4_opcodes.h"
-#include "cik_regs.h"
-#include "kfd_dbgmgr.h"
-#include "kfd_dbgdev.h"
-#include "kfd_device_queue_manager.h"
-
-static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
-{
- dev->kfd2kgd->address_watch_disable(dev->kgd);
-}
-
-static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
- u32 pasid, uint64_t vmid0_address,
- uint32_t *packet_buff, size_t size_in_bytes)
-{
- struct pm4__release_mem *rm_packet;
- struct pm4__indirect_buffer_pasid *ib_packet;
- struct kfd_mem_obj *mem_obj;
- size_t pq_packets_size_in_bytes;
- union ULARGE_INTEGER *largep;
- union ULARGE_INTEGER addr;
- struct kernel_queue *kq;
- uint64_t *rm_state;
- unsigned int *ib_packet_buff;
- int status;
-
- if (WARN_ON(!size_in_bytes))
- return -EINVAL;
-
- kq = dbgdev->kq;
-
- pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
- sizeof(struct pm4__indirect_buffer_pasid);
-
- /*
- * We acquire a buffer from DIQ
- * The receive packet buff will be sitting on the Indirect Buffer
- * and in the PQ we put the IB packet + sync packet(s).
- */
- status = kq_acquire_packet_buffer(kq,
- pq_packets_size_in_bytes / sizeof(uint32_t),
- &ib_packet_buff);
- if (status) {
- pr_err("kq_acquire_packet_buffer failed\n");
- return status;
- }
-
- memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
-
- ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
-
- ib_packet->header.count = 3;
- ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
- ib_packet->header.type = PM4_TYPE_3;
-
- largep = (union ULARGE_INTEGER *) &vmid0_address;
-
- ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
- ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
-
- ib_packet->control = (1 << 23) | (1 << 31) |
- ((size_in_bytes / 4) & 0xfffff);
-
- ib_packet->bitfields5.pasid = pasid;
-
- /*
- * for now we use release mem for GPU-CPU synchronization
- * Consider WaitRegMem + WriteData as a better alternative
- * we get a GART allocations ( gpu/cpu mapping),
- * for the sync variable, and wait until:
- * (a) Sync with HW
- * (b) Sync var is written by CP to mem.
- */
- rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
- (sizeof(struct pm4__indirect_buffer_pasid) /
- sizeof(unsigned int)));
-
- status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
- &mem_obj);
-
- if (status) {
- pr_err("Failed to allocate GART memory\n");
- kq_rollback_packet(kq);
- return status;
- }
-
- rm_state = (uint64_t *) mem_obj->cpu_ptr;
-
- *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
-
- rm_packet->header.opcode = IT_RELEASE_MEM;
- rm_packet->header.type = PM4_TYPE_3;
- rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
-
- rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
- rm_packet->bitfields2.event_index =
- event_index___release_mem__end_of_pipe;
-
- rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
- rm_packet->bitfields2.atc = 0;
- rm_packet->bitfields2.tc_wb_action_ena = 1;
-
- addr.quad_part = mem_obj->gpu_addr;
-
- rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
- rm_packet->address_hi = addr.u.high_part;
-
- rm_packet->bitfields3.data_sel =
- data_sel___release_mem__send_64_bit_data;
-
- rm_packet->bitfields3.int_sel =
- int_sel___release_mem__send_data_after_write_confirm;
-
- rm_packet->bitfields3.dst_sel =
- dst_sel___release_mem__memory_controller;
-
- rm_packet->data_lo = QUEUESTATE__ACTIVE;
-
- kq_submit_packet(kq);
-
- /* Wait till CP writes sync code: */
- status = amdkfd_fence_wait_timeout(
- rm_state,
- QUEUESTATE__ACTIVE, 1500);
-
- kfd_gtt_sa_free(dbgdev->dev, mem_obj);
-
- return status;
-}
-
-static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
-{
- /*
- * no action is needed in this case,
- * just make sure diq will not be used
- */
-
- dbgdev->kq = NULL;
-
- return 0;
-}
-
-static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
-{
- struct queue_properties properties;
- unsigned int qid;
- struct kernel_queue *kq = NULL;
- int status;
-
- properties.type = KFD_QUEUE_TYPE_DIQ;
-
- status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
- &properties, &qid, NULL);
-
- if (status) {
- pr_err("Failed to create DIQ\n");
- return status;
- }
-
- pr_debug("DIQ Created with queue id: %d\n", qid);
-
- kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
-
- if (!kq) {
- pr_err("Error getting DIQ\n");
- pqm_destroy_queue(dbgdev->pqm, qid);
- return -EFAULT;
- }
-
- dbgdev->kq = kq;
-
- return status;
-}
-
-static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
-{
- /* disable watch address */
- dbgdev_address_watch_disable_nodiq(dbgdev->dev);
- return 0;
-}
-
-static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
-{
- /* todo - disable address watch */
- int status;
-
- status = pqm_destroy_queue(dbgdev->pqm,
- dbgdev->kq->queue->properties.queue_id);
- dbgdev->kq = NULL;
-
- return status;
-}
-
-static void dbgdev_address_watch_set_registers(
- const struct dbg_address_watch_info *adw_info,
- union TCP_WATCH_ADDR_H_BITS *addrHi,
- union TCP_WATCH_ADDR_L_BITS *addrLo,
- union TCP_WATCH_CNTL_BITS *cntl,
- unsigned int index, unsigned int vmid)
-{
- union ULARGE_INTEGER addr;
-
- addr.quad_part = 0;
- addrHi->u32All = 0;
- addrLo->u32All = 0;
- cntl->u32All = 0;
-
- if (adw_info->watch_mask)
- cntl->bitfields.mask =
- (uint32_t) (adw_info->watch_mask[index] &
- ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
- else
- cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
-
- addr.quad_part = (unsigned long long) adw_info->watch_address[index];
-
- addrHi->bitfields.addr = addr.u.high_part &
- ADDRESS_WATCH_REG_ADDHIGH_MASK;
- addrLo->bitfields.addr =
- (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
-
- cntl->bitfields.mode = adw_info->watch_mode[index];
- cntl->bitfields.vmid = (uint32_t) vmid;
- /* for now assume it is an ATC address */
- cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
-
- pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
- pr_debug("\t\t%20s %08x\n", "set reg add high :",
- addrHi->bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "set reg add low :",
- addrLo->bitfields.addr);
-}
-
-static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
- struct dbg_address_watch_info *adw_info)
-{
- union TCP_WATCH_ADDR_H_BITS addrHi;
- union TCP_WATCH_ADDR_L_BITS addrLo;
- union TCP_WATCH_CNTL_BITS cntl;
- struct kfd_process_device *pdd;
- unsigned int i;
-
- /* taking the vmid for that process on the safe way using pdd */
- pdd = kfd_get_process_device_data(dbgdev->dev,
- adw_info->process);
- if (!pdd) {
- pr_err("Failed to get pdd for wave control no DIQ\n");
- return -EFAULT;
- }
-
- addrHi.u32All = 0;
- addrLo.u32All = 0;
- cntl.u32All = 0;
-
- if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
- (adw_info->num_watch_points == 0)) {
- pr_err("num_watch_points is invalid\n");
- return -EINVAL;
- }
-
- if (!adw_info->watch_mode || !adw_info->watch_address) {
- pr_err("adw_info fields are not valid\n");
- return -EINVAL;
- }
-
- for (i = 0; i < adw_info->num_watch_points; i++) {
- dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
- &cntl, i, pdd->qpd.vmid);
-
- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
- pr_debug("\t\t%20s %08x\n", "register index :", i);
- pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
- pr_debug("\t\t%20s %08x\n", "Address Low is :",
- addrLo.bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "Address high is :",
- addrHi.bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "Address high is :",
- addrHi.bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "Control Mask is :",
- cntl.bitfields.mask);
- pr_debug("\t\t%20s %08x\n", "Control Mode is :",
- cntl.bitfields.mode);
- pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
- cntl.bitfields.vmid);
- pr_debug("\t\t%20s %08x\n", "Control atc is :",
- cntl.bitfields.atc);
- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
-
- pdd->dev->kfd2kgd->address_watch_execute(
- dbgdev->dev->kgd,
- i,
- cntl.u32All,
- addrHi.u32All,
- addrLo.u32All);
- }
-
- return 0;
-}
-
-static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
- struct dbg_address_watch_info *adw_info)
-{
- struct pm4__set_config_reg *packets_vec;
- union TCP_WATCH_ADDR_H_BITS addrHi;
- union TCP_WATCH_ADDR_L_BITS addrLo;
- union TCP_WATCH_CNTL_BITS cntl;
- struct kfd_mem_obj *mem_obj;
- unsigned int aw_reg_add_dword;
- uint32_t *packet_buff_uint;
- unsigned int i;
- int status;
- size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
- /* we do not control the vmid in DIQ mode, just a place holder */
- unsigned int vmid = 0;
-
- addrHi.u32All = 0;
- addrLo.u32All = 0;
- cntl.u32All = 0;
-
- if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
- (adw_info->num_watch_points == 0)) {
- pr_err("num_watch_points is invalid\n");
- return -EINVAL;
- }
-
- if (!adw_info->watch_mode || !adw_info->watch_address) {
- pr_err("adw_info fields are not valid\n");
- return -EINVAL;
- }
-
- status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
-
- if (status) {
- pr_err("Failed to allocate GART memory\n");
- return status;
- }
-
- packet_buff_uint = mem_obj->cpu_ptr;
-
- memset(packet_buff_uint, 0, ib_size);
-
- packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
-
- packets_vec[0].header.count = 1;
- packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
- packets_vec[0].header.type = PM4_TYPE_3;
- packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
- packets_vec[0].bitfields2.insert_vmid = 1;
- packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
- packets_vec[1].bitfields2.insert_vmid = 0;
- packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
- packets_vec[2].bitfields2.insert_vmid = 0;
- packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
- packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
- packets_vec[3].bitfields2.insert_vmid = 1;
-
- for (i = 0; i < adw_info->num_watch_points; i++) {
- dbgdev_address_watch_set_registers(adw_info,
- &addrHi,
- &addrLo,
- &cntl,
- i,
- vmid);
-
- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
- pr_debug("\t\t%20s %08x\n", "register index :", i);
- pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
- pr_debug("\t\t%20s %p\n", "Add ptr is :",
- adw_info->watch_address);
- pr_debug("\t\t%20s %08llx\n", "Add is :",
- adw_info->watch_address[i]);
- pr_debug("\t\t%20s %08x\n", "Address Low is :",
- addrLo.bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "Address high is :",
- addrHi.bitfields.addr);
- pr_debug("\t\t%20s %08x\n", "Control Mask is :",
- cntl.bitfields.mask);
- pr_debug("\t\t%20s %08x\n", "Control Mode is :",
- cntl.bitfields.mode);
- pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
- cntl.bitfields.vmid);
- pr_debug("\t\t%20s %08x\n", "Control atc is :",
- cntl.bitfields.atc);
- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
-
- aw_reg_add_dword =
- dbgdev->dev->kfd2kgd->address_watch_get_offset(
- dbgdev->dev->kgd,
- i,
- ADDRESS_WATCH_REG_CNTL);
-
- packets_vec[0].bitfields2.reg_offset =
- aw_reg_add_dword - AMD_CONFIG_REG_BASE;
-
- packets_vec[0].reg_data[0] = cntl.u32All;
-
- aw_reg_add_dword =
- dbgdev->dev->kfd2kgd->address_watch_get_offset(
- dbgdev->dev->kgd,
- i,
- ADDRESS_WATCH_REG_ADDR_HI);
-
- packets_vec[1].bitfields2.reg_offset =
- aw_reg_add_dword - AMD_CONFIG_REG_BASE;
- packets_vec[1].reg_data[0] = addrHi.u32All;
-
- aw_reg_add_dword =
- dbgdev->dev->kfd2kgd->address_watch_get_offset(
- dbgdev->dev->kgd,
- i,
- ADDRESS_WATCH_REG_ADDR_LO);
-
- packets_vec[2].bitfields2.reg_offset =
- aw_reg_add_dword - AMD_CONFIG_REG_BASE;
- packets_vec[2].reg_data[0] = addrLo.u32All;
-
- /* enable watch flag if address is not zero*/
- if (adw_info->watch_address[i] > 0)
- cntl.bitfields.valid = 1;
- else
- cntl.bitfields.valid = 0;
-
- aw_reg_add_dword =
- dbgdev->dev->kfd2kgd->address_watch_get_offset(
- dbgdev->dev->kgd,
- i,
- ADDRESS_WATCH_REG_CNTL);
-
- packets_vec[3].bitfields2.reg_offset =
- aw_reg_add_dword - AMD_CONFIG_REG_BASE;
- packets_vec[3].reg_data[0] = cntl.u32All;
-
- status = dbgdev_diq_submit_ib(
- dbgdev,
- adw_info->process->pasid,
- mem_obj->gpu_addr,
- packet_buff_uint,
- ib_size);
-
- if (status) {
- pr_err("Failed to submit IB to DIQ\n");
- break;
- }
- }
-
- kfd_gtt_sa_free(dbgdev->dev, mem_obj);
- return status;
-}
-
-static int dbgdev_wave_control_set_registers(
- struct dbg_wave_control_info *wac_info,
- union SQ_CMD_BITS *in_reg_sq_cmd,
- union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
-{
- int status = 0;
- union SQ_CMD_BITS reg_sq_cmd;
- union GRBM_GFX_INDEX_BITS reg_gfx_index;
- struct HsaDbgWaveMsgAMDGen2 *pMsg;
-
- reg_sq_cmd.u32All = 0;
- reg_gfx_index.u32All = 0;
- pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
-
- switch (wac_info->mode) {
- /* Send command to single wave */
- case HSA_DBG_WAVEMODE_SINGLE:
- /*
- * Limit access to the process waves only,
- * by setting vmid check
- */
- reg_sq_cmd.bits.check_vmid = 1;
- reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
- reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
- reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
-
- reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
- reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
- reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
-
- break;
-
- /* Send command to all waves with matching VMID */
- case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
-
- reg_gfx_index.bits.sh_broadcast_writes = 1;
- reg_gfx_index.bits.se_broadcast_writes = 1;
- reg_gfx_index.bits.instance_broadcast_writes = 1;
-
- reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
-
- break;
-
- /* Send command to all CU waves with matching VMID */
- case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
-
- reg_sq_cmd.bits.check_vmid = 1;
- reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
-
- reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
- reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
- reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
-
- break;
-
- default:
- return -EINVAL;
- }
-
- switch (wac_info->operand) {
- case HSA_DBG_WAVEOP_HALT:
- reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
- break;
-
- case HSA_DBG_WAVEOP_RESUME:
- reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
- break;
-
- case HSA_DBG_WAVEOP_KILL:
- reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
- break;
-
- case HSA_DBG_WAVEOP_DEBUG:
- reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
- break;
-
- case HSA_DBG_WAVEOP_TRAP:
- if (wac_info->trapId < MAX_TRAPID) {
- reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
- reg_sq_cmd.bits.trap_id = wac_info->trapId;
- } else {
- status = -EINVAL;
- }
- break;
-
- default:
- status = -EINVAL;
- break;
- }
-
- if (status == 0) {
- *in_reg_sq_cmd = reg_sq_cmd;
- *in_reg_gfx_index = reg_gfx_index;
- }
-
- return status;
-}
-
-static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
- struct dbg_wave_control_info *wac_info)
-{
-
- int status;
- union SQ_CMD_BITS reg_sq_cmd;
- union GRBM_GFX_INDEX_BITS reg_gfx_index;
- struct kfd_mem_obj *mem_obj;
- uint32_t *packet_buff_uint;
- struct pm4__set_config_reg *packets_vec;
- size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
-
- reg_sq_cmd.u32All = 0;
-
- status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
- &reg_gfx_index);
- if (status) {
- pr_err("Failed to set wave control registers\n");
- return status;
- }
-
- /* we do not control the VMID in DIQ, so reset it to a known value */
- reg_sq_cmd.bits.vm_id = 0;
-
- pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
-
- pr_debug("\t\t mode is: %u\n", wac_info->mode);
- pr_debug("\t\t operand is: %u\n", wac_info->operand);
- pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
- pr_debug("\t\t msg value is: %u\n",
- wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
- pr_debug("\t\t vmid is: N/A\n");
-
- pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
- pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
- pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
- pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
- pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
- pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
- pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
-
- pr_debug("\t\t ibw is : %u\n",
- reg_gfx_index.bitfields.instance_broadcast_writes);
- pr_debug("\t\t ii is : %u\n",
- reg_gfx_index.bitfields.instance_index);
- pr_debug("\t\t sebw is : %u\n",
- reg_gfx_index.bitfields.se_broadcast_writes);
- pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
- pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
- pr_debug("\t\t sbw is : %u\n",
- reg_gfx_index.bitfields.sh_broadcast_writes);
-
- pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
-
- status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
-
- if (status != 0) {
- pr_err("Failed to allocate GART memory\n");
- return status;
- }
-
- packet_buff_uint = mem_obj->cpu_ptr;
-
- memset(packet_buff_uint, 0, ib_size);
-
- packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
- packets_vec[0].header.count = 1;
- packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
- packets_vec[0].header.type = PM4_TYPE_3;
- packets_vec[0].bitfields2.reg_offset =
- GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
-
- packets_vec[0].bitfields2.insert_vmid = 0;
- packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
-
- packets_vec[1].header.count = 1;
- packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
- packets_vec[1].header.type = PM4_TYPE_3;
- packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
-
- packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
- packets_vec[1].bitfields2.insert_vmid = 1;
- packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
-
- /* Restore the GRBM_GFX_INDEX register */
-
- reg_gfx_index.u32All = 0;
- reg_gfx_index.bits.sh_broadcast_writes = 1;
- reg_gfx_index.bits.instance_broadcast_writes = 1;
- reg_gfx_index.bits.se_broadcast_writes = 1;
-
-
- packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
- packets_vec[2].bitfields2.reg_offset =
- GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
-
- packets_vec[2].bitfields2.insert_vmid = 0;
- packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
-
- status = dbgdev_diq_submit_ib(
- dbgdev,
- wac_info->process->pasid,
- mem_obj->gpu_addr,
- packet_buff_uint,
- ib_size);
-
- if (status)
- pr_err("Failed to submit IB to DIQ\n");
-
- kfd_gtt_sa_free(dbgdev->dev, mem_obj);
-
- return status;
-}
-
-static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
- struct dbg_wave_control_info *wac_info)
-{
- int status;
- union SQ_CMD_BITS reg_sq_cmd;
- union GRBM_GFX_INDEX_BITS reg_gfx_index;
- struct kfd_process_device *pdd;
-
- reg_sq_cmd.u32All = 0;
-
- /* taking the VMID for that process on the safe way using PDD */
- pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
-
- if (!pdd) {
- pr_err("Failed to get pdd for wave control no DIQ\n");
- return -EFAULT;
- }
- status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
- &reg_gfx_index);
- if (status) {
- pr_err("Failed to set wave control registers\n");
- return status;
- }
-
- /* for non DIQ we need to patch the VMID: */
-
- reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
-
- pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
-
- pr_debug("\t\t mode is: %u\n", wac_info->mode);
- pr_debug("\t\t operand is: %u\n", wac_info->operand);
- pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
- pr_debug("\t\t msg value is: %u\n",
- wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
- pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid);
-
- pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
- pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
- pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
- pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
- pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
- pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
- pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
-
- pr_debug("\t\t ibw is : %u\n",
- reg_gfx_index.bitfields.instance_broadcast_writes);
- pr_debug("\t\t ii is : %u\n",
- reg_gfx_index.bitfields.instance_index);
- pr_debug("\t\t sebw is : %u\n",
- reg_gfx_index.bitfields.se_broadcast_writes);
- pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
- pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
- pr_debug("\t\t sbw is : %u\n",
- reg_gfx_index.bitfields.sh_broadcast_writes);
-
- pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
-
- return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
- reg_gfx_index.u32All,
- reg_sq_cmd.u32All);
-}
-
-int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
-{
- int status = 0;
- unsigned int vmid;
- uint16_t queried_pasid;
- union SQ_CMD_BITS reg_sq_cmd;
- union GRBM_GFX_INDEX_BITS reg_gfx_index;
- struct kfd_process_device *pdd;
- struct dbg_wave_control_info wac_info;
- int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
- int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
-
- reg_sq_cmd.u32All = 0;
- status = 0;
-
- wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
- wac_info.operand = HSA_DBG_WAVEOP_KILL;
-
- pr_debug("Killing all process wavefronts\n");
-
- /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
- * ATC_VMID15_PASID_MAPPING
- * to check which VMID the current process is mapped to.
- */
-
- for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
- status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
- (dev->kgd, vmid, &queried_pasid);
-
- if (status && queried_pasid == p->pasid) {
- pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
- vmid, p->pasid);
- break;
- }
- }
-
- if (vmid > last_vmid_to_scan) {
- pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
- return -EFAULT;
- }
-
- /* taking the VMID for that process on the safe way using PDD */
- pdd = kfd_get_process_device_data(dev, p);
- if (!pdd)
- return -EFAULT;
-
- status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
- &reg_gfx_index);
- if (status != 0)
- return -EINVAL;
-
- /* for non DIQ we need to patch the VMID: */
- reg_sq_cmd.bits.vm_id = vmid;
-
- dev->kfd2kgd->wave_control_execute(dev->kgd,
- reg_gfx_index.u32All,
- reg_sq_cmd.u32All);
-
- return 0;
-}
-
-void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
- enum DBGDEV_TYPE type)
-{
- pdbgdev->dev = pdev;
- pdbgdev->kq = NULL;
- pdbgdev->type = type;
- pdbgdev->pqm = NULL;
-
- switch (type) {
- case DBGDEV_TYPE_NODIQ:
- pdbgdev->dbgdev_register = dbgdev_register_nodiq;
- pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
- pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
- pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
- break;
- case DBGDEV_TYPE_DIQ:
- default:
- pdbgdev->dbgdev_register = dbgdev_register_diq;
- pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
- pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq;
- pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
- break;
- }
-
-}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
deleted file mode 100644
index 0619c777b47e..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef KFD_DBGDEV_H_
-#define KFD_DBGDEV_H_
-
-enum {
- SQ_CMD_VMID_OFFSET = 28,
- ADDRESS_WATCH_CNTL_OFFSET = 24
-};
-
-enum {
- PRIV_QUEUE_SYNC_TIME_MS = 200
-};
-
-/* CONTEXT reg space definition */
-enum {
- CONTEXT_REG_BASE = 0xA000,
- CONTEXT_REG_END = 0xA400,
- CONTEXT_REG_SIZE = CONTEXT_REG_END - CONTEXT_REG_BASE
-};
-
-/* USER CONFIG reg space definition */
-enum {
- USERCONFIG_REG_BASE = 0xC000,
- USERCONFIG_REG_END = 0x10000,
- USERCONFIG_REG_SIZE = USERCONFIG_REG_END - USERCONFIG_REG_BASE
-};
-
-/* CONFIG reg space definition */
-enum {
- AMD_CONFIG_REG_BASE = 0x2000, /* in dwords */
- AMD_CONFIG_REG_END = 0x2B00,
- AMD_CONFIG_REG_SIZE = AMD_CONFIG_REG_END - AMD_CONFIG_REG_BASE
-};
-
-/* SH reg space definition */
-enum {
- SH_REG_BASE = 0x2C00,
- SH_REG_END = 0x3000,
- SH_REG_SIZE = SH_REG_END - SH_REG_BASE
-};
-
-/* SQ_CMD definitions */
-#define SQ_CMD 0x8DEC
-
-enum SQ_IND_CMD_CMD {
- SQ_IND_CMD_CMD_NULL = 0x00000000,
- SQ_IND_CMD_CMD_HALT = 0x00000001,
- SQ_IND_CMD_CMD_RESUME = 0x00000002,
- SQ_IND_CMD_CMD_KILL = 0x00000003,
- SQ_IND_CMD_CMD_DEBUG = 0x00000004,
- SQ_IND_CMD_CMD_TRAP = 0x00000005,
-};
-
-enum SQ_IND_CMD_MODE {
- SQ_IND_CMD_MODE_SINGLE = 0x00000000,
- SQ_IND_CMD_MODE_BROADCAST = 0x00000001,
- SQ_IND_CMD_MODE_BROADCAST_QUEUE = 0x00000002,
- SQ_IND_CMD_MODE_BROADCAST_PIPE = 0x00000003,
- SQ_IND_CMD_MODE_BROADCAST_ME = 0x00000004,
-};
-
-union SQ_IND_INDEX_BITS {
- struct {
- uint32_t wave_id:4;
- uint32_t simd_id:2;
- uint32_t thread_id:6;
- uint32_t:1;
- uint32_t force_read:1;
- uint32_t read_timeout:1;
- uint32_t unindexed:1;
- uint32_t index:16;
-
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union SQ_IND_CMD_BITS {
- struct {
- uint32_t data:32;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union SQ_CMD_BITS {
- struct {
- uint32_t cmd:3;
- uint32_t:1;
- uint32_t mode:3;
- uint32_t check_vmid:1;
- uint32_t trap_id:3;
- uint32_t:5;
- uint32_t wave_id:4;
- uint32_t simd_id:2;
- uint32_t:2;
- uint32_t queue_id:3;
- uint32_t:1;
- uint32_t vm_id:4;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union SQ_IND_DATA_BITS {
- struct {
- uint32_t data:32;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union GRBM_GFX_INDEX_BITS {
- struct {
- uint32_t instance_index:8;
- uint32_t sh_index:8;
- uint32_t se_index:8;
- uint32_t:5;
- uint32_t sh_broadcast_writes:1;
- uint32_t instance_broadcast_writes:1;
- uint32_t se_broadcast_writes:1;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union TCP_WATCH_ADDR_H_BITS {
- struct {
- uint32_t addr:16;
- uint32_t:16;
-
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-union TCP_WATCH_ADDR_L_BITS {
- struct {
- uint32_t:6;
- uint32_t addr:26;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-enum {
- QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */
- QUEUESTATE__ACTIVE_COMPLETION_PENDING,
- QUEUESTATE__ACTIVE
-};
-
-union ULARGE_INTEGER {
- struct {
- uint32_t low_part;
- uint32_t high_part;
- } u;
- unsigned long long quad_part;
-};
-
-
-#define KFD_CIK_VMID_START_OFFSET (8)
-#define KFD_CIK_VMID_END_OFFSET (KFD_CIK_VMID_START_OFFSET + (8))
-
-
-void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
- enum DBGDEV_TYPE type);
-
-union TCP_WATCH_CNTL_BITS {
- struct {
- uint32_t mask:24;
- uint32_t vmid:4;
- uint32_t atc:1;
- uint32_t mode:2;
- uint32_t valid:1;
- } bitfields, bits;
- uint32_t u32All;
- signed int i32All;
- float f32All;
-};
-
-enum {
- ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
- ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
- ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
- /* extend the mask to 26 bits in order to match the low address field */
- ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
- ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
-};
-
-enum {
- MAX_TRAPID = 8, /* 3 bits in the bitfield. */
- MAX_WATCH_ADDRESSES = 4
-};
-
-enum {
- ADDRESS_WATCH_REG_ADDR_HI = 0,
- ADDRESS_WATCH_REG_ADDR_LO,
- ADDRESS_WATCH_REG_CNTL,
- ADDRESS_WATCH_REG_MAX
-};
-
-#endif /* KFD_DBGDEV_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
deleted file mode 100644
index 9bfa50633654..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/log2.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/device.h>
-
-#include "kfd_priv.h"
-#include "cik_regs.h"
-#include "kfd_pm4_headers.h"
-#include "kfd_pm4_headers_diq.h"
-#include "kfd_dbgmgr.h"
-#include "kfd_dbgdev.h"
-#include "kfd_device_queue_manager.h"
-
-static DEFINE_MUTEX(kfd_dbgmgr_mutex);
-
-struct mutex *kfd_get_dbgmgr_mutex(void)
-{
- return &kfd_dbgmgr_mutex;
-}
-
-
-static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr)
-{
- kfree(pmgr->dbgdev);
-
- pmgr->dbgdev = NULL;
- pmgr->pasid = 0;
- pmgr->dev = NULL;
-}
-
-void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr)
-{
- if (pmgr) {
- kfd_dbgmgr_uninitialize(pmgr);
- kfree(pmgr);
- }
-}
-
-bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
-{
- enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ;
- struct kfd_dbgmgr *new_buff;
-
- if (WARN_ON(!pdev->init_complete))
- return false;
-
- new_buff = kfd_alloc_struct(new_buff);
- if (!new_buff) {
- pr_err("Failed to allocate dbgmgr instance\n");
- return false;
- }
-
- new_buff->pasid = 0;
- new_buff->dev = pdev;
- new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev);
- if (!new_buff->dbgdev) {
- pr_err("Failed to allocate dbgdev instance\n");
- kfree(new_buff);
- return false;
- }
-
- /* get actual type of DBGDevice cpsch or not */
- if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
- type = DBGDEV_TYPE_NODIQ;
-
- kfd_dbgdev_init(new_buff->dbgdev, pdev, type);
- *ppmgr = new_buff;
-
- return true;
-}
-
-long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
-{
- if (pmgr->pasid != 0) {
- pr_debug("H/W debugger is already active using pasid 0x%x\n",
- pmgr->pasid);
- return -EBUSY;
- }
-
- /* remember pasid */
- pmgr->pasid = p->pasid;
-
- /* provide the pqm for diq generation */
- pmgr->dbgdev->pqm = &p->pqm;
-
- /* activate the actual registering */
- pmgr->dbgdev->dbgdev_register(pmgr->dbgdev);
-
- return 0;
-}
-
-long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
-{
- /* Is the requests coming from the already registered process? */
- if (pmgr->pasid != p->pasid) {
- pr_debug("H/W debugger is not registered by calling pasid 0x%x\n",
- p->pasid);
- return -EINVAL;
- }
-
- pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev);
-
- pmgr->pasid = 0;
-
- return 0;
-}
-
-long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
- struct dbg_wave_control_info *wac_info)
-{
- /* Is the requests coming from the already registered process? */
- if (pmgr->pasid != wac_info->process->pasid) {
- pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n",
- wac_info->process->pasid);
- return -EINVAL;
- }
-
- return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info);
-}
-
-long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
- struct dbg_address_watch_info *adw_info)
-{
- /* Is the requests coming from the already registered process? */
- if (pmgr->pasid != adw_info->process->pasid) {
- pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n",
- adw_info->process->pasid);
- return -EINVAL;
- }
-
- return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev,
- adw_info);
-}
-
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
deleted file mode 100644
index f9c6df1fdc5c..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef KFD_DBGMGR_H_
-#define KFD_DBGMGR_H_
-
-#include "kfd_priv.h"
-
-/* must align with hsakmttypes definition */
-#pragma pack(push, 4)
-
-enum HSA_DBG_WAVEOP {
- HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */
- HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */
- HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */
- HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter dbg mode */
- HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take a trap */
- HSA_DBG_NUM_WAVEOP = 5,
- HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF
-};
-
-enum HSA_DBG_WAVEMODE {
- /* send command to a single wave */
- HSA_DBG_WAVEMODE_SINGLE = 0,
- /*
- * Broadcast to all wavefronts of all processes is not
- * supported for HSA user mode
- */
-
- /* send to waves within current process */
- HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2,
- /* send to waves within current process on CU */
- HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3,
- HSA_DBG_NUM_WAVEMODE = 3,
- HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF
-};
-
-enum HSA_DBG_WAVEMSG_TYPE {
- HSA_DBG_WAVEMSG_AUTO = 0,
- HSA_DBG_WAVEMSG_USER = 1,
- HSA_DBG_WAVEMSG_ERROR = 2,
- HSA_DBG_NUM_WAVEMSG,
- HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF
-};
-
-enum HSA_DBG_WATCH_MODE {
- HSA_DBG_WATCH_READ = 0, /* Read operations only */
- HSA_DBG_WATCH_NONREAD = 1, /* Write or Atomic operations only */
- HSA_DBG_WATCH_ATOMIC = 2, /* Atomic Operations only */
- HSA_DBG_WATCH_ALL = 3, /* Read, Write or Atomic operations */
- HSA_DBG_WATCH_NUM,
- HSA_DBG_WATCH_SIZE = 0xFFFFFFFF
-};
-
-/* This structure is hardware specific and may change in the future */
-struct HsaDbgWaveMsgAMDGen2 {
- union {
- struct ui32 {
- uint32_t UserData:8; /* user data */
- uint32_t ShaderArray:1; /* Shader array */
- uint32_t Priv:1; /* Privileged */
- uint32_t Reserved0:4; /* Reserved, should be 0 */
- uint32_t WaveId:4; /* wave id */
- uint32_t SIMD:2; /* SIMD id */
- uint32_t HSACU:4; /* Compute unit */
- uint32_t ShaderEngine:2;/* Shader engine */
- uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */
- uint32_t Reserved1:4; /* Reserved, should be 0 */
- } ui32;
- uint32_t Value;
- };
- uint32_t Reserved2;
-};
-
-union HsaDbgWaveMessageAMD {
- struct HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2;
- /* for future HsaDbgWaveMsgAMDGen3; */
-};
-
-struct HsaDbgWaveMessage {
- void *MemoryVA; /* ptr to associated host-accessible data */
- union HsaDbgWaveMessageAMD DbgWaveMsg;
-};
-
-/*
- * TODO: This definitions to be MOVED to kfd_event, once it is implemented.
- *
- * HSA sync primitive, Event and HW Exception notification API definitions.
- * The API functions allow the runtime to define a so-called sync-primitive,
- * a SW object combining a user-mode provided "syncvar" and a scheduler event
- * that can be signaled through a defined GPU interrupt. A syncvar is
- * a process virtual memory location of a certain size that can be accessed
- * by CPU and GPU shader code within the process to set and query the content
- * within that memory. The definition of the content is determined by the HSA
- * runtime and potentially GPU shader code interfacing with the HSA runtime.
- * The syncvar values may be commonly written through an PM4 WRITE_DATA packet
- * in the user mode instruction stream. The OS scheduler event is typically
- * associated and signaled by an interrupt issued by the GPU, but other HSA
- * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced
- * by the KFD by this mechanism, too.
- */
-
-/* these are the new definitions for events */
-enum HSA_EVENTTYPE {
- HSA_EVENTTYPE_SIGNAL = 0, /* user-mode generated GPU signal */
- HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */
- HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change
- * (start/stop)
- */
- HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */
- HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */
- HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */
- HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */
- HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state
- * (EOP pm4)
- */
- /* ... */
- HSA_EVENTTYPE_MAXID,
- HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF
-};
-
-/* Sub-definitions for various event types: Syncvar */
-struct HsaSyncVar {
- union SyncVar {
- void *UserData; /* pointer to user mode data */
- uint64_t UserDataPtrValue; /* 64bit compatibility of value */
- } SyncVar;
- uint64_t SyncVarSize;
-};
-
-/* Sub-definitions for various event types: NodeChange */
-
-enum HSA_EVENTTYPE_NODECHANGE_FLAGS {
- HSA_EVENTTYPE_NODECHANGE_ADD = 0,
- HSA_EVENTTYPE_NODECHANGE_REMOVE = 1,
- HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF
-};
-
-struct HsaNodeChange {
- /* HSA node added/removed on the platform */
- enum HSA_EVENTTYPE_NODECHANGE_FLAGS Flags;
-};
-
-/* Sub-definitions for various event types: DeviceStateChange */
-enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS {
- /* device started (and available) */
- HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0,
- /* device stopped (i.e. unavailable) */
- HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1,
- HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF
-};
-
-enum HSA_DEVICE {
- HSA_DEVICE_CPU = 0,
- HSA_DEVICE_GPU = 1,
- MAX_HSA_DEVICE = 2
-};
-
-struct HsaDeviceStateChange {
- uint32_t NodeId; /* F-NUMA node that contains the device */
- enum HSA_DEVICE Device; /* device type: GPU or CPU */
- enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */
-};
-
-struct HsaEventData {
- enum HSA_EVENTTYPE EventType; /* event type */
- union EventData {
- /*
- * return data associated with HSA_EVENTTYPE_SIGNAL
- * and other events
- */
- struct HsaSyncVar SyncVar;
-
- /* data associated with HSA_EVENTTYPE_NODE_CHANGE */
- struct HsaNodeChange NodeChangeState;
-
- /* data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE */
- struct HsaDeviceStateChange DeviceState;
- } EventData;
-
- /* the following data entries are internal to the KFD & thunk itself */
-
- /* internal thunk store for Event data (OsEventHandle) */
- uint64_t HWData1;
- /* internal thunk store for Event data (HWAddress) */
- uint64_t HWData2;
- /* internal thunk store for Event data (HWData) */
- uint32_t HWData3;
-};
-
-struct HsaEventDescriptor {
- /* event type to allocate */
- enum HSA_EVENTTYPE EventType;
- /* H-NUMA node containing GPU device that is event source */
- uint32_t NodeId;
- /* pointer to user mode syncvar data, syncvar->UserDataPtrValue
- * may be NULL
- */
- struct HsaSyncVar SyncVar;
-};
-
-struct HsaEvent {
- uint32_t EventId;
- struct HsaEventData EventData;
-};
-
-#pragma pack(pop)
-
-enum DBGDEV_TYPE {
- DBGDEV_TYPE_ILLEGAL = 0,
- DBGDEV_TYPE_NODIQ = 1,
- DBGDEV_TYPE_DIQ = 2,
- DBGDEV_TYPE_TEST = 3
-};
-
-struct dbg_address_watch_info {
- struct kfd_process *process;
- enum HSA_DBG_WATCH_MODE *watch_mode;
- uint64_t *watch_address;
- uint64_t *watch_mask;
- struct HsaEvent *watch_event;
- uint32_t num_watch_points;
-};
-
-struct dbg_wave_control_info {
- struct kfd_process *process;
- uint32_t trapId;
- enum HSA_DBG_WAVEOP operand;
- enum HSA_DBG_WAVEMODE mode;
- struct HsaDbgWaveMessage dbgWave_msg;
-};
-
-struct kfd_dbgdev {
-
- /* The device that owns this data. */
- struct kfd_dev *dev;
-
- /* kernel queue for DIQ */
- struct kernel_queue *kq;
-
- /* a pointer to the pqm of the calling process */
- struct process_queue_manager *pqm;
-
- /* type of debug device ( DIQ, non DIQ, etc. ) */
- enum DBGDEV_TYPE type;
-
- /* virtualized function pointers to device dbg */
- int (*dbgdev_register)(struct kfd_dbgdev *dbgdev);
- int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev);
- int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev,
- struct dbg_address_watch_info *adw_info);
- int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev,
- struct dbg_wave_control_info *wac_info);
-
-};
-
-struct kfd_dbgmgr {
- u32 pasid;
- struct kfd_dev *dev;
- struct kfd_dbgdev *dbgdev;
-};
-
-/* prototypes for debug manager functions */
-struct mutex *kfd_get_dbgmgr_mutex(void);
-void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr);
-bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev);
-long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
-long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
-long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
- struct dbg_wave_control_info *wac_info);
-long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
- struct dbg_address_watch_info *adw_info);
-#endif /* KFD_DBGMGR_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
index 673d5e34f213..ad5a40a685ac 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2016-2017 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -35,7 +36,7 @@ static int kfd_debugfs_open(struct inode *inode, struct file *file)
}
static int kfd_debugfs_hang_hws_read(struct seq_file *m, void *data)
{
- seq_printf(m, "echo gpu_id > hang_hws\n");
+ seq_puts(m, "echo gpu_id > hang_hws\n");
return 0;
}
@@ -100,6 +101,8 @@ void kfd_debugfs_init(void)
kfd_debugfs_rls_by_device, &kfd_debugfs_fops);
debugfs_create_file("hang_hws", S_IFREG | 0200, debugfs_root,
kfd_debugfs_hang_hws_read, &kfd_debugfs_hang_hws_fops);
+ debugfs_create_file("mem_limit", S_IFREG | 0200, debugfs_root,
+ kfd_debugfs_kfd_mem_limits, &kfd_debugfs_fops);
}
void kfd_debugfs_fini(void)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 3b119db16003..65a1d4f9004b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -52,574 +53,7 @@ extern const struct kfd2kgd_calls arcturus_kfd2kgd;
extern const struct kfd2kgd_calls aldebaran_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
-
-#ifdef KFD_SUPPORT_IOMMU_V2
-static const struct kfd_device_info kaveri_device_info = {
- .asic_family = CHIP_KAVERI,
- .asic_name = "kaveri",
- .gfx_target_version = 70000,
- .max_pasid_bits = 16,
- /* max num of queues for KV.TODO should be a dynamic value */
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = false,
- .needs_iommu_device = true,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info carrizo_device_info = {
- .asic_family = CHIP_CARRIZO,
- .asic_name = "carrizo",
- .gfx_target_version = 80001,
- .max_pasid_bits = 16,
- /* max num of queues for CZ.TODO should be a dynamic value */
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = true,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info raven_device_info = {
- .asic_family = CHIP_RAVEN,
- .asic_name = "raven",
- .gfx_target_version = 90002,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = true,
- .needs_pci_atomics = true,
- .num_sdma_engines = 1,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-#endif
-
-#ifdef CONFIG_DRM_AMDGPU_CIK
-static const struct kfd_device_info hawaii_device_info = {
- .asic_family = CHIP_HAWAII,
- .asic_name = "hawaii",
- .gfx_target_version = 70001,
- .max_pasid_bits = 16,
- /* max num of queues for KV.TODO should be a dynamic value */
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = false,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-#endif
-
-static const struct kfd_device_info tonga_device_info = {
- .asic_family = CHIP_TONGA,
- .asic_name = "tonga",
- .gfx_target_version = 80002,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = false,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info fiji_device_info = {
- .asic_family = CHIP_FIJI,
- .asic_name = "fiji",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info fiji_vf_device_info = {
- .asic_family = CHIP_FIJI,
- .asic_name = "fiji",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-
-static const struct kfd_device_info polaris10_device_info = {
- .asic_family = CHIP_POLARIS10,
- .asic_name = "polaris10",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info polaris10_vf_device_info = {
- .asic_family = CHIP_POLARIS10,
- .asic_name = "polaris10",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info polaris11_device_info = {
- .asic_family = CHIP_POLARIS11,
- .asic_name = "polaris11",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info polaris12_device_info = {
- .asic_family = CHIP_POLARIS12,
- .asic_name = "polaris12",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vegam_device_info = {
- .asic_family = CHIP_VEGAM,
- .asic_name = "vegam",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vega10_device_info = {
- .asic_family = CHIP_VEGA10,
- .asic_name = "vega10",
- .gfx_target_version = 90000,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vega10_vf_device_info = {
- .asic_family = CHIP_VEGA10,
- .asic_name = "vega10",
- .gfx_target_version = 90000,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vega12_device_info = {
- .asic_family = CHIP_VEGA12,
- .asic_name = "vega12",
- .gfx_target_version = 90004,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vega20_device_info = {
- .asic_family = CHIP_VEGA20,
- .asic_name = "vega20",
- .gfx_target_version = 90006,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info arcturus_device_info = {
- .asic_family = CHIP_ARCTURUS,
- .asic_name = "arcturus",
- .gfx_target_version = 90008,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 6,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info aldebaran_device_info = {
- .asic_family = CHIP_ALDEBARAN,
- .asic_name = "aldebaran",
- .gfx_target_version = 90010,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 3,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info renoir_device_info = {
- .asic_family = CHIP_RENOIR,
- .asic_name = "renoir",
- .gfx_target_version = 90012,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_engines = 1,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info navi10_device_info = {
- .asic_family = CHIP_NAVI10,
- .asic_name = "navi10",
- .gfx_target_version = 100100,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 145,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info navi12_device_info = {
- .asic_family = CHIP_NAVI12,
- .asic_name = "navi12",
- .gfx_target_version = 100101,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 145,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info navi14_device_info = {
- .asic_family = CHIP_NAVI14,
- .asic_name = "navi14",
- .gfx_target_version = 100102,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 145,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info sienna_cichlid_device_info = {
- .asic_family = CHIP_SIENNA_CICHLID,
- .asic_name = "sienna_cichlid",
- .gfx_target_version = 100300,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 4,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info navy_flounder_device_info = {
- .asic_family = CHIP_NAVY_FLOUNDER,
- .asic_name = "navy_flounder",
- .gfx_target_version = 100301,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info vangogh_device_info = {
- .asic_family = CHIP_VANGOGH,
- .asic_name = "vangogh",
- .gfx_target_version = 100303,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 1,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info dimgrey_cavefish_device_info = {
- .asic_family = CHIP_DIMGREY_CAVEFISH,
- .asic_name = "dimgrey_cavefish",
- .gfx_target_version = 100302,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info beige_goby_device_info = {
- .asic_family = CHIP_BEIGE_GOBY,
- .asic_name = "beige_goby",
- .gfx_target_version = 100304,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 1,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info yellow_carp_device_info = {
- .asic_family = CHIP_YELLOW_CARP,
- .asic_name = "yellow_carp",
- .gfx_target_version = 100305,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_engines = 1,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info cyan_skillfish_device_info = {
- .asic_family = CHIP_CYAN_SKILLFISH,
- .asic_name = "cyan_skillfish",
- .gfx_target_version = 100103,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .num_sdma_engines = 2,
- .num_xgmi_sdma_engines = 0,
- .num_sdma_queues_per_engine = 8,
-};
+extern const struct kfd2kgd_calls gfx_v11_kfd2kgd;
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size);
@@ -627,196 +61,365 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
static int kfd_resume(struct kfd_dev *kfd);
-struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf)
+static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
+{
+ uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
+
+ switch (sdma_version) {
+ case IP_VERSION(4, 0, 0):/* VEGA10 */
+ case IP_VERSION(4, 0, 1):/* VEGA12 */
+ case IP_VERSION(4, 1, 0):/* RAVEN */
+ case IP_VERSION(4, 1, 1):/* RAVEN */
+ case IP_VERSION(4, 1, 2):/* RENOIR */
+ case IP_VERSION(5, 2, 1):/* VANGOGH */
+ case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
+ case IP_VERSION(5, 2, 6):/* GC 10.3.6 */
+ case IP_VERSION(5, 2, 7):/* GC 10.3.7 */
+ kfd->device_info.num_sdma_queues_per_engine = 2;
+ break;
+ case IP_VERSION(4, 2, 0):/* VEGA20 */
+ case IP_VERSION(4, 2, 2):/* ARCTURUS */
+ case IP_VERSION(4, 4, 0):/* ALDEBARAN */
+ case IP_VERSION(5, 0, 0):/* NAVI10 */
+ case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
+ case IP_VERSION(5, 0, 2):/* NAVI14 */
+ case IP_VERSION(5, 0, 5):/* NAVI12 */
+ case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
+ case IP_VERSION(5, 2, 2):/* NAVY_FLOUNDER */
+ case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
+ case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
+ case IP_VERSION(6, 0, 2):
+ case IP_VERSION(6, 0, 3):
+ kfd->device_info.num_sdma_queues_per_engine = 8;
+ break;
+ default:
+ dev_warn(kfd_device,
+ "Default sdma queue per engine(8) is set due to mismatch of sdma ip block(SDMA_HWIP:0x%x).\n",
+ sdma_version);
+ kfd->device_info.num_sdma_queues_per_engine = 8;
+ }
+
+ switch (sdma_version) {
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 2):
+ case IP_VERSION(6, 0, 3):
+ /* Reserve 1 for paging and 1 for gfx */
+ kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
+ /* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */
+ kfd->device_info.reserved_sdma_queues_bitmap = 0xFULL;
+ break;
+ case IP_VERSION(6, 0, 1):
+ /* Reserve 1 for paging and 1 for gfx */
+ kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
+ /* BIT(0)=engine-0 queue-0; BIT(1)=engine-0 queue-1; ... */
+ kfd->device_info.reserved_sdma_queues_bitmap = 0x3ULL;
+ break;
+ default:
+ break;
+ }
+}
+
+static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
{
- struct kfd_dev *kfd;
- const struct kfd_device_info *device_info;
- const struct kfd2kgd_calls *f2g;
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ uint32_t gc_version = KFD_GC_VERSION(kfd);
+
+ switch (gc_version) {
+ case IP_VERSION(9, 0, 1): /* VEGA10 */
+ case IP_VERSION(9, 1, 0): /* RAVEN */
+ case IP_VERSION(9, 2, 1): /* VEGA12 */
+ case IP_VERSION(9, 2, 2): /* RAVEN */
+ case IP_VERSION(9, 3, 0): /* RENOIR */
+ case IP_VERSION(9, 4, 0): /* VEGA20 */
+ case IP_VERSION(9, 4, 1): /* ARCTURUS */
+ case IP_VERSION(9, 4, 2): /* ALDEBARAN */
+ case IP_VERSION(10, 3, 1): /* VANGOGH */
+ case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
+ case IP_VERSION(10, 3, 6): /* GC 10.3.6 */
+ case IP_VERSION(10, 3, 7): /* GC 10.3.7 */
+ case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 1, 10): /* NAVI10 */
+ case IP_VERSION(10, 1, 2): /* NAVI12 */
+ case IP_VERSION(10, 1, 1): /* NAVI14 */
+ case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */
+ case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */
+ case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */
+ case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
+ break;
+ default:
+ dev_warn(kfd_device, "v9 event interrupt handler is set due to "
+ "mismatch of gc ip block(GC_HWIP:0x%x).\n", gc_version);
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
+ }
+}
+
+static void kfd_device_info_init(struct kfd_dev *kfd,
+ bool vf, uint32_t gfx_target_version)
+{
+ uint32_t gc_version = KFD_GC_VERSION(kfd);
+ uint32_t asic_type = kfd->adev->asic_type;
+
+ kfd->device_info.max_pasid_bits = 16;
+ kfd->device_info.max_no_of_hqd = 24;
+ kfd->device_info.num_of_watch_points = 4;
+ kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED;
+ kfd->device_info.gfx_target_version = gfx_target_version;
+
+ if (KFD_IS_SOC15(kfd)) {
+ kfd->device_info.doorbell_size = 8;
+ kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
+ kfd->device_info.supports_cwsr = true;
+
+ kfd_device_info_set_sdma_info(kfd);
+
+ kfd_device_info_set_event_interrupt_class(kfd);
+
+ /* Raven */
+ if (gc_version == IP_VERSION(9, 1, 0) ||
+ gc_version == IP_VERSION(9, 2, 2))
+ kfd->device_info.needs_iommu_device = true;
+
+ if (gc_version < IP_VERSION(11, 0, 0)) {
+ /* Navi2x+, Navi1x+ */
+ if (gc_version == IP_VERSION(10, 3, 6))
+ kfd->device_info.no_atomic_fw_version = 14;
+ else if (gc_version == IP_VERSION(10, 3, 7))
+ kfd->device_info.no_atomic_fw_version = 3;
+ else if (gc_version >= IP_VERSION(10, 3, 0))
+ kfd->device_info.no_atomic_fw_version = 92;
+ else if (gc_version >= IP_VERSION(10, 1, 1))
+ kfd->device_info.no_atomic_fw_version = 145;
+
+ /* Navi1x+ */
+ if (gc_version >= IP_VERSION(10, 1, 1))
+ kfd->device_info.needs_pci_atomics = true;
+ }
+ } else {
+ kfd->device_info.doorbell_size = 4;
+ kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t);
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_cik;
+ kfd->device_info.num_sdma_queues_per_engine = 2;
+
+ if (asic_type != CHIP_KAVERI &&
+ asic_type != CHIP_HAWAII &&
+ asic_type != CHIP_TONGA)
+ kfd->device_info.supports_cwsr = true;
+
+ if (asic_type == CHIP_KAVERI ||
+ asic_type == CHIP_CARRIZO)
+ kfd->device_info.needs_iommu_device = true;
+
+ if (asic_type != CHIP_HAWAII && !vf)
+ kfd->device_info.needs_pci_atomics = true;
+ }
+}
+
+struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
+{
+ struct kfd_dev *kfd = NULL;
+ const struct kfd2kgd_calls *f2g = NULL;
struct pci_dev *pdev = adev->pdev;
+ uint32_t gfx_target_version = 0;
switch (adev->asic_type) {
#ifdef KFD_SUPPORT_IOMMU_V2
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
- if (vf)
- device_info = NULL;
- else
- device_info = &kaveri_device_info;
- f2g = &gfx_v7_kfd2kgd;
+ gfx_target_version = 70000;
+ if (!vf)
+ f2g = &gfx_v7_kfd2kgd;
break;
#endif
case CHIP_CARRIZO:
- if (vf)
- device_info = NULL;
- else
- device_info = &carrizo_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80001;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_HAWAII:
- if (vf)
- device_info = NULL;
- else
- device_info = &hawaii_device_info;
- f2g = &gfx_v7_kfd2kgd;
+ gfx_target_version = 70001;
+ if (!amdgpu_exp_hw_support)
+ pr_info(
+ "KFD support on Hawaii is experimental. See modparam exp_hw_support\n"
+ );
+ else if (!vf)
+ f2g = &gfx_v7_kfd2kgd;
break;
#endif
case CHIP_TONGA:
- if (vf)
- device_info = NULL;
- else
- device_info = &tonga_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80002;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_FIJI:
- if (vf)
- device_info = &fiji_vf_device_info;
- else
- device_info = &fiji_device_info;
+ gfx_target_version = 80003;
f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_POLARIS10:
- if (vf)
- device_info = &polaris10_vf_device_info;
- else
- device_info = &polaris10_device_info;
+ gfx_target_version = 80003;
f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_POLARIS11:
- if (vf)
- device_info = NULL;
- else
- device_info = &polaris11_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80003;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_POLARIS12:
- if (vf)
- device_info = NULL;
- else
- device_info = &polaris12_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80003;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_VEGAM:
- if (vf)
- device_info = NULL;
- else
- device_info = &vegam_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80003;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
default:
switch (adev->ip_versions[GC_HWIP][0]) {
+ /* Vega 10 */
case IP_VERSION(9, 0, 1):
- if (vf)
- device_info = &vega10_vf_device_info;
- else
- device_info = &vega10_device_info;
+ gfx_target_version = 90000;
f2g = &gfx_v9_kfd2kgd;
break;
#ifdef KFD_SUPPORT_IOMMU_V2
+ /* Raven */
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
- if (vf)
- device_info = NULL;
- else
- device_info = &raven_device_info;
- f2g = &gfx_v9_kfd2kgd;
+ gfx_target_version = 90002;
+ if (!vf)
+ f2g = &gfx_v9_kfd2kgd;
break;
#endif
+ /* Vega12 */
case IP_VERSION(9, 2, 1):
- if (vf)
- device_info = NULL;
- else
- device_info = &vega12_device_info;
- f2g = &gfx_v9_kfd2kgd;
+ gfx_target_version = 90004;
+ if (!vf)
+ f2g = &gfx_v9_kfd2kgd;
break;
+ /* Renoir */
case IP_VERSION(9, 3, 0):
- if (vf)
- device_info = NULL;
- else
- device_info = &renoir_device_info;
- f2g = &gfx_v9_kfd2kgd;
+ gfx_target_version = 90012;
+ if (!vf)
+ f2g = &gfx_v9_kfd2kgd;
break;
+ /* Vega20 */
case IP_VERSION(9, 4, 0):
- if (vf)
- device_info = NULL;
- else
- device_info = &vega20_device_info;
- f2g = &gfx_v9_kfd2kgd;
+ gfx_target_version = 90006;
+ if (!vf)
+ f2g = &gfx_v9_kfd2kgd;
break;
+ /* Arcturus */
case IP_VERSION(9, 4, 1):
- device_info = &arcturus_device_info;
+ gfx_target_version = 90008;
f2g = &arcturus_kfd2kgd;
break;
+ /* Aldebaran */
case IP_VERSION(9, 4, 2):
- device_info = &aldebaran_device_info;
+ gfx_target_version = 90010;
f2g = &aldebaran_kfd2kgd;
break;
+ /* Navi10 */
case IP_VERSION(10, 1, 10):
- if (vf)
- device_info = NULL;
- else
- device_info = &navi10_device_info;
- f2g = &gfx_v10_kfd2kgd;
+ gfx_target_version = 100100;
+ if (!vf)
+ f2g = &gfx_v10_kfd2kgd;
break;
+ /* Navi12 */
case IP_VERSION(10, 1, 2):
- device_info = &navi12_device_info;
+ gfx_target_version = 100101;
f2g = &gfx_v10_kfd2kgd;
break;
+ /* Navi14 */
case IP_VERSION(10, 1, 1):
- if (vf)
- device_info = NULL;
- else
- device_info = &navi14_device_info;
- f2g = &gfx_v10_kfd2kgd;
+ gfx_target_version = 100102;
+ if (!vf)
+ f2g = &gfx_v10_kfd2kgd;
break;
+ /* Cyan Skillfish */
case IP_VERSION(10, 1, 3):
- if (vf)
- device_info = NULL;
- else
- device_info = &cyan_skillfish_device_info;
- f2g = &gfx_v10_kfd2kgd;
+ case IP_VERSION(10, 1, 4):
+ gfx_target_version = 100103;
+ if (!vf)
+ f2g = &gfx_v10_kfd2kgd;
break;
+ /* Sienna Cichlid */
case IP_VERSION(10, 3, 0):
- device_info = &sienna_cichlid_device_info;
+ gfx_target_version = 100300;
f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Navy Flounder */
case IP_VERSION(10, 3, 2):
- device_info = &navy_flounder_device_info;
+ gfx_target_version = 100301;
f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Van Gogh */
case IP_VERSION(10, 3, 1):
- if (vf)
- device_info = NULL;
- else
- device_info = &vangogh_device_info;
- f2g = &gfx_v10_3_kfd2kgd;
+ gfx_target_version = 100303;
+ if (!vf)
+ f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Dimgrey Cavefish */
case IP_VERSION(10, 3, 4):
- device_info = &dimgrey_cavefish_device_info;
+ gfx_target_version = 100302;
f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Beige Goby */
case IP_VERSION(10, 3, 5):
- device_info = &beige_goby_device_info;
+ gfx_target_version = 100304;
f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Yellow Carp */
case IP_VERSION(10, 3, 3):
- if (vf)
- device_info = NULL;
- else
- device_info = &yellow_carp_device_info;
- f2g = &gfx_v10_3_kfd2kgd;
+ gfx_target_version = 100305;
+ if (!vf)
+ f2g = &gfx_v10_3_kfd2kgd;
+ break;
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
+ gfx_target_version = 100306;
+ if (!vf)
+ f2g = &gfx_v10_3_kfd2kgd;
+ break;
+ case IP_VERSION(11, 0, 0):
+ gfx_target_version = 110000;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(11, 0, 1):
+ gfx_target_version = 110003;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(11, 0, 2):
+ gfx_target_version = 110002;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(11, 0, 3):
+ /* Note: Compiler version is 11.0.1 while HW version is 11.0.3 */
+ gfx_target_version = 110001;
+ f2g = &gfx_v11_kfd2kgd;
break;
default:
- return NULL;
+ break;
}
break;
}
- if (!device_info || !f2g) {
- dev_err(kfd_device, "%s %s not supported in kfd\n",
- amdgpu_asic_name[adev->asic_type], vf ? "VF" : "");
+ if (!f2g) {
+ if (adev->ip_versions[GC_HWIP][0])
+ dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n",
+ adev->ip_versions[GC_HWIP][0], vf ? "VF" : "");
+ else
+ dev_err(kfd_device, "%s %s not supported in kfd\n",
+ amdgpu_asic_name[adev->asic_type], vf ? "VF" : "");
return NULL;
}
@@ -824,8 +427,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf)
if (!kfd)
return NULL;
- kfd->kgd = kgd;
- kfd->device_info = device_info;
+ kfd->adev = adev;
+ kfd_device_info_init(kfd, vf, gfx_target_version);
kfd->pdev = pdev;
kfd->init_complete = false;
kfd->kfd2kgd = f2g;
@@ -844,31 +447,35 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf)
static void kfd_cwsr_init(struct kfd_dev *kfd)
{
- if (cwsr_enable && kfd->device_info->supports_cwsr) {
- if (kfd->device_info->asic_family < CHIP_VEGA10) {
+ if (cwsr_enable && kfd->device_info.supports_cwsr) {
+ if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx8_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
- } else if (kfd->device_info->asic_family == CHIP_ARCTURUS) {
+ } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) {
BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_arcturus_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
- } else if (kfd->device_info->asic_family == CHIP_ALDEBARAN) {
+ } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) {
BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
- } else if (kfd->device_info->asic_family < CHIP_NAVI10) {
+ } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx9_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
- } else if (kfd->device_info->asic_family < CHIP_SIENNA_CICHLID) {
+ } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) {
BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_nv1x_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
- } else {
+ } else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx10_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
+ } else {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_gfx11_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
}
kfd->cwsr_enabled = true;
@@ -882,23 +489,23 @@ static int kfd_gws_init(struct kfd_dev *kfd)
if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
return 0;
- if (hws_gws_support
- || (kfd->device_info->asic_family == CHIP_VEGA10
- && kfd->mec2_fw_version >= 0x81b3)
- || (kfd->device_info->asic_family >= CHIP_VEGA12
- && kfd->device_info->asic_family <= CHIP_RAVEN
- && kfd->mec2_fw_version >= 0x1b3)
- || (kfd->device_info->asic_family == CHIP_ARCTURUS
- && kfd->mec2_fw_version >= 0x30)
- || (kfd->device_info->asic_family == CHIP_ALDEBARAN
- && kfd->mec2_fw_version >= 0x28))
- ret = amdgpu_amdkfd_alloc_gws(kfd->kgd,
- amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws);
+ if (hws_gws_support || (KFD_IS_SOC15(kfd) &&
+ ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 0, 1)
+ && kfd->mec2_fw_version >= 0x81b3) ||
+ (KFD_GC_VERSION(kfd) <= IP_VERSION(9, 4, 0)
+ && kfd->mec2_fw_version >= 0x1b3) ||
+ (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)
+ && kfd->mec2_fw_version >= 0x30) ||
+ (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)
+ && kfd->mec2_fw_version >= 0x28))))
+ ret = amdgpu_amdkfd_alloc_gws(kfd->adev,
+ kfd->adev->gds.gws_size, &kfd->gws);
return ret;
}
-static void kfd_smi_init(struct kfd_dev *dev) {
+static void kfd_smi_init(struct kfd_dev *dev)
+{
INIT_LIST_HEAD(&dev->smi_clients);
spin_lock_init(&dev->smi_lock);
}
@@ -910,11 +517,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
unsigned int size, map_process_packet_size;
kfd->ddev = ddev;
- kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
+ kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
KGD_ENGINE_MEC1);
- kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
+ kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
KGD_ENGINE_MEC2);
- kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
+ kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
KGD_ENGINE_SDMA1);
kfd->shared_resources = *gpu_resources;
@@ -927,42 +534,36 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
* 32 and 64-bit requests are possible and must be
* supported.
*/
- kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->kgd);
+ kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev);
if (!kfd->pci_atomic_requested &&
- kfd->device_info->needs_pci_atomics &&
- (!kfd->device_info->no_atomic_fw_version ||
- kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) {
+ kfd->device_info.needs_pci_atomics &&
+ (!kfd->device_info.no_atomic_fw_version ||
+ kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) {
dev_info(kfd_device,
"skipped device %x:%x, PCI rejects atomics %d<%d\n",
kfd->pdev->vendor, kfd->pdev->device,
kfd->mec_fw_version,
- kfd->device_info->no_atomic_fw_version);
+ kfd->device_info.no_atomic_fw_version);
return false;
}
/* Verify module parameters regarding mapped process number*/
- if ((hws_max_conc_proc < 0)
- || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
- dev_err(kfd_device,
- "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
- hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
- kfd->vm_info.vmid_num_kfd);
+ if (hws_max_conc_proc >= 0)
+ kfd->max_proc_per_quantum = min((u32)hws_max_conc_proc, kfd->vm_info.vmid_num_kfd);
+ else
kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
- } else
- kfd->max_proc_per_quantum = hws_max_conc_proc;
/* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device *
- kfd->device_info->mqd_size_aligned;
+ kfd->device_info.mqd_size_aligned;
/*
* calculate max size of runlist packet.
* There can be only 2 packets at once
*/
- map_process_packet_size =
- kfd->device_info->asic_family == CHIP_ALDEBARAN ?
+ map_process_packet_size = KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) ?
sizeof(struct pm4_mes_map_process_aldebaran) :
- sizeof(struct pm4_mes_map_process);
+ sizeof(struct pm4_mes_map_process);
size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size +
max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
+ sizeof(struct pm4_mes_runlist)) * 2;
@@ -974,7 +575,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
size += 512 * 1024;
if (amdgpu_amdkfd_alloc_gtt_mem(
- kfd->kgd, size, &kfd->gtt_mem,
+ kfd->adev, size, &kfd->gtt_mem,
&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
false)) {
dev_err(kfd_device, "Could not allocate %d bytes\n", size);
@@ -995,9 +596,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto kfd_doorbell_error;
}
- kfd->hive_id = amdgpu_amdkfd_get_hive_id(kfd->kgd);
+ if (amdgpu_use_xgmi_p2p)
+ kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
- kfd->noretry = amdgpu_amdkfd_get_noretry(kfd->kgd);
+ kfd->noretry = kfd->adev->gmc.noretry;
if (kfd_interrupt_init(kfd)) {
dev_err(kfd_device, "Error initializing interrupts\n");
@@ -1015,7 +617,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
*/
if (kfd_gws_init(kfd)) {
dev_err(kfd_device, "Could not allocate %d gws\n",
- amdgpu_amdkfd_get_num_gws(kfd->kgd));
+ kfd->adev->gds.gws_size);
goto gws_error;
}
@@ -1030,15 +632,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd_cwsr_init(kfd);
- svm_migrate_init((struct amdgpu_device *)kfd->kgd);
+ svm_migrate_init(kfd->adev);
- if(kgd2kfd_resume_iommu(kfd))
+ if (kgd2kfd_resume_iommu(kfd))
goto device_iommu_error;
if (kfd_resume(kfd))
goto kfd_resume_error;
- kfd->dbgmgr = NULL;
+ amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info);
if (kfd_topology_add_device(kfd)) {
dev_err(kfd_device, "Error adding device to topology\n");
@@ -1068,10 +670,10 @@ kfd_interrupt_error:
kfd_doorbell_error:
kfd_gtt_sa_fini(kfd);
kfd_gtt_sa_init_error:
- amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
alloc_gtt_mem_failure:
if (kfd->gws)
- amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
+ amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws);
dev_err(kfd_device,
"device %x:%x NOT added due to errors\n",
kfd->pdev->vendor, kfd->pdev->device);
@@ -1088,9 +690,9 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfd_doorbell_fini(kfd);
ida_destroy(&kfd->doorbell_ida);
kfd_gtt_sa_fini(kfd);
- amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
if (kfd->gws)
- amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
+ amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws);
}
kfree(kfd);
@@ -1229,7 +831,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
if (!kfd->init_complete)
return;
- if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
+ if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) {
dev_err_once(kfd_device, "Ring entry too small\n");
return;
}
@@ -1246,7 +848,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
}
-int kgd2kfd_quiesce_mm(struct mm_struct *mm)
+int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger)
{
struct kfd_process *p;
int r;
@@ -1260,7 +862,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm)
return -ESRCH;
WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
- r = kfd_process_evict_queues(p);
+ r = kfd_process_evict_queues(p, trigger);
kfd_unref_process(p);
return r;
@@ -1338,8 +940,6 @@ out:
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size)
{
- unsigned int num_of_longs;
-
if (WARN_ON(buf_size < chunk_size))
return -EINVAL;
if (WARN_ON(buf_size == 0))
@@ -1350,11 +950,8 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
kfd->gtt_sa_chunk_size = chunk_size;
kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
- num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) /
- BITS_PER_LONG;
-
- kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL);
-
+ kfd->gtt_sa_bitmap = bitmap_zalloc(kfd->gtt_sa_num_of_chunks,
+ GFP_KERNEL);
if (!kfd->gtt_sa_bitmap)
return -ENOMEM;
@@ -1364,13 +961,12 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
mutex_init(&kfd->gtt_sa_lock);
return 0;
-
}
static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
{
mutex_destroy(&kfd->gtt_sa_lock);
- kfree(kfd->gtt_sa_bitmap);
+ bitmap_free(kfd->gtt_sa_bitmap);
}
static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
@@ -1438,7 +1034,7 @@ kfd_gtt_restart_search:
/* If we need only one chunk, mark it as allocated and get out */
if (size <= kfd->gtt_sa_chunk_size) {
pr_debug("Single bit\n");
- set_bit(found, kfd->gtt_sa_bitmap);
+ __set_bit(found, kfd->gtt_sa_bitmap);
goto kfd_gtt_out;
}
@@ -1476,10 +1072,8 @@ kfd_gtt_restart_search:
(*mem_obj)->range_start, (*mem_obj)->range_end);
/* Mark the chunks as allocated */
- for (found = (*mem_obj)->range_start;
- found <= (*mem_obj)->range_end;
- found++)
- set_bit(found, kfd->gtt_sa_bitmap);
+ bitmap_set(kfd->gtt_sa_bitmap, (*mem_obj)->range_start,
+ (*mem_obj)->range_end - (*mem_obj)->range_start + 1);
kfd_gtt_out:
mutex_unlock(&kfd->gtt_sa_lock);
@@ -1494,8 +1088,6 @@ kfd_gtt_no_free_chunk:
int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
{
- unsigned int bit;
-
/* Act like kfree when trying to free a NULL object */
if (!mem_obj)
return 0;
@@ -1506,10 +1098,8 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
mutex_lock(&kfd->gtt_sa_lock);
/* Mark the chunks as free */
- for (bit = mem_obj->range_start;
- bit <= mem_obj->range_end;
- bit++)
- clear_bit(bit, kfd->gtt_sa_bitmap);
+ bitmap_clear(kfd->gtt_sa_bitmap, mem_obj->range_start,
+ mem_obj->range_end - mem_obj->range_start + 1);
mutex_unlock(&kfd->gtt_sa_lock);
@@ -1526,7 +1116,7 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
void kfd_inc_compute_active(struct kfd_dev *kfd)
{
if (atomic_inc_return(&kfd->compute_profile) == 1)
- amdgpu_amdkfd_set_compute_idle(kfd->kgd, false);
+ amdgpu_amdkfd_set_compute_idle(kfd->adev, false);
}
void kfd_dec_compute_active(struct kfd_dev *kfd)
@@ -1534,7 +1124,7 @@ void kfd_dec_compute_active(struct kfd_dev *kfd)
int count = atomic_dec_return(&kfd->compute_profile);
if (count == 0)
- amdgpu_amdkfd_set_compute_idle(kfd->kgd, true);
+ amdgpu_amdkfd_set_compute_idle(kfd->adev, true);
WARN_ONCE(count < 0, "Compute profile ref. count error");
}
@@ -1544,6 +1134,26 @@ void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
}
+/* kfd_get_num_sdma_engines returns the number of PCIe optimized SDMA and
+ * kfd_get_num_xgmi_sdma_engines returns the number of XGMI SDMA.
+ * When the device has more than two engines, we reserve two for PCIe to enable
+ * full-duplex and the rest are used as XGMI.
+ */
+unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev)
+{
+ /* If XGMI is not supported, all SDMA engines are PCIe */
+ if (!kdev->adev->gmc.xgmi.supported)
+ return kdev->adev->sdma.num_instances;
+
+ return min(kdev->adev->sdma.num_instances, 2);
+}
+
+unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev)
+{
+ /* After reserved for PCIe, the rest of engines are XGMI */
+ return kdev->adev->sdma.num_instances - kfd_get_num_sdma_engines(kdev);
+}
+
#if defined(CONFIG_DEBUG_FS)
/* This function will send a package to HIQ to hang the HWS
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 93e33dd84dd4..ecb4c3abc629 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -34,6 +35,7 @@
#include "cik_regs.h"
#include "kfd_kernel_queue.h"
#include "amdgpu_amdkfd.h"
+#include "mes_api_def.h"
/* Size of the per-pipe EOP queue */
#define CIK_HPD_EOP_BYTES_LOG2 11
@@ -47,7 +49,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm,
uint32_t filter_param);
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param);
+ uint32_t filter_param, bool reset);
static int map_queues_cpsch(struct device_queue_manager *dqm);
@@ -58,7 +60,7 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm,
struct queue *q);
static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
static int allocate_sdma_queue(struct device_queue_manager *dqm,
- struct queue *q);
+ struct queue *q, const uint32_t *restore_sdma_id);
static void kfd_process_hw_exception(struct work_struct *work);
static inline
@@ -99,68 +101,245 @@ unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
return dqm->dev->shared_resources.num_pipe_per_mec;
}
-static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
-{
- return dqm->dev->device_info->num_sdma_engines;
-}
-
-static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
-{
- return dqm->dev->device_info->num_xgmi_sdma_engines;
-}
-
static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
{
- return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm);
+ return kfd_get_num_sdma_engines(dqm->dev) +
+ kfd_get_num_xgmi_sdma_engines(dqm->dev);
}
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
{
- return dqm->dev->device_info->num_sdma_engines
- * dqm->dev->device_info->num_sdma_queues_per_engine;
+ return kfd_get_num_sdma_engines(dqm->dev) *
+ dqm->dev->device_info.num_sdma_queues_per_engine;
}
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
{
- return dqm->dev->device_info->num_xgmi_sdma_engines
- * dqm->dev->device_info->num_sdma_queues_per_engine;
+ return kfd_get_num_xgmi_sdma_engines(dqm->dev) *
+ dqm->dev->device_info.num_sdma_queues_per_engine;
+}
+
+static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manager *dqm)
+{
+ return dqm->dev->device_info.reserved_sdma_queues_bitmap;
}
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
return dqm->dev->kfd2kgd->program_sh_mem_settings(
- dqm->dev->kgd, qpd->vmid,
+ dqm->dev->adev, qpd->vmid,
qpd->sh_mem_config,
qpd->sh_mem_ape1_base,
qpd->sh_mem_ape1_limit,
qpd->sh_mem_bases);
}
+static void kfd_hws_hang(struct device_queue_manager *dqm)
+{
+ /*
+ * Issue a GPU reset if HWS is unresponsive
+ */
+ dqm->is_hws_hang = true;
+
+ /* It's possible we're detecting a HWS hang in the
+ * middle of a GPU reset. No need to schedule another
+ * reset in this case.
+ */
+ if (!dqm->is_resetting)
+ schedule_work(&dqm->hw_exception_work);
+}
+
+static int convert_to_mes_queue_type(int queue_type)
+{
+ int mes_queue_type;
+
+ switch (queue_type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ mes_queue_type = MES_QUEUE_TYPE_COMPUTE;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ mes_queue_type = MES_QUEUE_TYPE_SDMA;
+ break;
+ default:
+ WARN(1, "Invalid queue type %d", queue_type);
+ mes_queue_type = -EINVAL;
+ break;
+ }
+
+ return mes_queue_type;
+}
+
+static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+ struct mes_add_queue_input queue_input;
+ int r, queue_type;
+ uint64_t wptr_addr_off;
+
+ if (dqm->is_hws_hang)
+ return -EIO;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
+ queue_input.process_id = qpd->pqm->process->pasid;
+ queue_input.page_table_base_addr = qpd->page_table_base;
+ queue_input.process_va_start = 0;
+ queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
+ /* MES unit for quantum is 100ns */
+ queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */
+ queue_input.process_context_addr = pdd->proc_ctx_gpu_addr;
+ queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */
+ queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
+ queue_input.inprocess_gang_priority = q->properties.priority;
+ queue_input.gang_global_priority_level =
+ AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+ queue_input.doorbell_offset = q->properties.doorbell_off;
+ queue_input.mqd_addr = q->gart_mqd_addr;
+ queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
+
+ if (q->wptr_bo) {
+ wptr_addr_off = (uint64_t)q->properties.write_ptr - (uint64_t)q->wptr_bo->kfd_bo->va;
+ queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off;
+ }
+
+ queue_input.is_kfd_process = 1;
+ queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL);
+ queue_input.queue_size = q->properties.queue_size >> 2;
+
+ queue_input.paging = false;
+ queue_input.tba_addr = qpd->tba_addr;
+ queue_input.tma_addr = qpd->tma_addr;
+
+ queue_type = convert_to_mes_queue_type(q->properties.type);
+ if (queue_type < 0) {
+ pr_err("Queue type not supported with MES, queue:%d\n",
+ q->properties.type);
+ return -EINVAL;
+ }
+ queue_input.queue_type = (uint32_t)queue_type;
+
+ if (q->gws) {
+ queue_input.gws_base = 0;
+ queue_input.gws_size = qpd->num_gws;
+ }
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r) {
+ pr_err("failed to add hardware queue to MES, doorbell=0x%x\n",
+ q->properties.doorbell_off);
+ pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
+ kfd_hws_hang(dqm);
+}
+
+ return r;
+}
+
+static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
+ int r;
+ struct mes_remove_queue_input queue_input;
+
+ if (dqm->is_hws_hang)
+ return -EIO;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
+ queue_input.doorbell_offset = q->properties.doorbell_off;
+ queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+
+ if (r) {
+ pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n",
+ q->properties.doorbell_off);
+ pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
+ kfd_hws_hang(dqm);
+ }
+
+ return r;
+}
+
+static int remove_all_queues_mes(struct device_queue_manager *dqm)
+{
+ struct device_process_node *cur;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+ int retval = 0;
+
+ list_for_each_entry(cur, &dqm->queues, list) {
+ qpd = cur->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->properties.is_active) {
+ retval = remove_queue_mes(dqm, q, qpd);
+ if (retval) {
+ pr_err("%s: Failed to remove queue %d for dev %d",
+ __func__,
+ q->properties.queue_id,
+ dqm->dev->id);
+ return retval;
+ }
+ }
+ }
+ }
+
+ return retval;
+}
+
static void increment_queue_count(struct device_queue_manager *dqm,
- enum kfd_queue_type type)
+ struct qcm_process_device *qpd,
+ struct queue *q)
{
dqm->active_queue_count++;
- if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+ q->properties.type == KFD_QUEUE_TYPE_DIQ)
dqm->active_cp_queue_count++;
+
+ if (q->properties.is_gws) {
+ dqm->gws_queue_count++;
+ qpd->mapped_gws_queue = true;
+ }
}
static void decrement_queue_count(struct device_queue_manager *dqm,
- enum kfd_queue_type type)
+ struct qcm_process_device *qpd,
+ struct queue *q)
{
dqm->active_queue_count--;
- if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+ q->properties.type == KFD_QUEUE_TYPE_DIQ)
dqm->active_cp_queue_count--;
+
+ if (q->properties.is_gws) {
+ dqm->gws_queue_count--;
+ qpd->mapped_gws_queue = false;
+ }
}
-static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
+/*
+ * Allocate a doorbell ID to this queue.
+ * If doorbell_id is passed in, make sure requested ID is valid then allocate it.
+ */
+static int allocate_doorbell(struct qcm_process_device *qpd,
+ struct queue *q,
+ uint32_t const *restore_id)
{
struct kfd_dev *dev = qpd->dqm->dev;
- if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
+ if (!KFD_IS_SOC15(dev)) {
/* On pre-SOC15 chips we need to use the queue ID to
* preserve the user mode ABI.
*/
+
+ if (restore_id && *restore_id != q->properties.queue_id)
+ return -EINVAL;
+
q->doorbell_id = q->properties.queue_id;
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
@@ -169,25 +348,37 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
* The doobell index distance between RLC (2*i) and (2*i+1)
* for a SDMA engine is 512.
*/
- uint32_t *idx_offset =
- dev->shared_resources.sdma_doorbell_idx;
- q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
- + (q->properties.sdma_queue_id & 1)
- * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
- + (q->properties.sdma_queue_id >> 1);
+ uint32_t *idx_offset = dev->shared_resources.sdma_doorbell_idx;
+ uint32_t valid_id = idx_offset[q->properties.sdma_engine_id]
+ + (q->properties.sdma_queue_id & 1)
+ * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
+ + (q->properties.sdma_queue_id >> 1);
+
+ if (restore_id && *restore_id != valid_id)
+ return -EINVAL;
+ q->doorbell_id = valid_id;
} else {
- /* For CP queues on SOC15 reserve a free doorbell ID */
- unsigned int found;
-
- found = find_first_zero_bit(qpd->doorbell_bitmap,
- KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
- if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
- pr_debug("No doorbells available");
- return -EBUSY;
+ /* For CP queues on SOC15 */
+ if (restore_id) {
+ /* make sure that ID is free */
+ if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap))
+ return -EINVAL;
+
+ q->doorbell_id = *restore_id;
+ } else {
+ /* or reserve a free doorbell ID */
+ unsigned int found;
+
+ found = find_first_zero_bit(qpd->doorbell_bitmap,
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+ if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
+ pr_debug("No doorbells available");
+ return -EBUSY;
+ }
+ set_bit(found, qpd->doorbell_bitmap);
+ q->doorbell_id = found;
}
- set_bit(found, qpd->doorbell_bitmap);
- q->doorbell_id = found;
}
q->properties.doorbell_off =
@@ -202,7 +393,7 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
unsigned int old;
struct kfd_dev *dev = qpd->dqm->dev;
- if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
+ if (!KFD_IS_SOC15(dev) ||
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
return;
@@ -216,7 +407,7 @@ static void program_trap_handler_settings(struct device_queue_manager *dqm,
{
if (dqm->dev->kfd2kgd->program_trap_handler_settings)
dqm->dev->kfd2kgd->program_trap_handler_settings(
- dqm->dev->kgd, qpd->vmid,
+ dqm->dev->adev, qpd->vmid,
qpd->tba_addr, qpd->tma_addr);
}
@@ -250,21 +441,20 @@ static int allocate_vmid(struct device_queue_manager *dqm,
program_sh_mem_settings(dqm, qpd);
- if (dqm->dev->device_info->asic_family >= CHIP_VEGA10 &&
- dqm->dev->cwsr_enabled)
+ if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled)
program_trap_handler_settings(dqm, qpd);
/* qpd->page_table_base is set earlier when register_process()
* is called, i.e. when the first queue is created.
*/
- dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
+ dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev,
qpd->vmid,
qpd->page_table_base);
/* invalidate the VM context after pasid and vmid mapping is set up */
kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
if (dqm->dev->kfd2kgd->set_scratch_backing_va)
- dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
+ dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev,
qpd->sh_hidden_private_base, qpd->vmid);
return 0;
@@ -283,7 +473,7 @@ static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
if (ret)
return ret;
- return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
+ return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid,
qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
pmf->release_mem_size / sizeof(uint32_t));
}
@@ -293,7 +483,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
struct queue *q)
{
/* On GFX v7, CP doesn't flush TC at dequeue */
- if (q->device->device_info->asic_family == CHIP_HAWAII)
+ if (q->device->adev->asic_type == CHIP_HAWAII)
if (flush_texture_cache_nocpsch(q->device, qpd))
pr_err("Failed to flush TC\n");
@@ -309,7 +499,9 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
static int create_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
- struct qcm_process_device *qpd)
+ struct qcm_process_device *qpd,
+ const struct kfd_criu_queue_priv_data *qd,
+ const void *restore_mqd, const void *restore_ctl_stack)
{
struct mqd_manager *mqd_mgr;
int retval;
@@ -349,13 +541,13 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
q->pipe, q->queue);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
- retval = allocate_sdma_queue(dqm, q);
+ retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
if (retval)
goto deallocate_vmid;
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
}
- retval = allocate_doorbell(qpd, q);
+ retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
if (retval)
goto out_deallocate_hqd;
@@ -368,8 +560,15 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
retval = -ENOMEM;
goto out_deallocate_doorbell;
}
- mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
- &q->gart_mqd_addr, &q->properties);
+
+ if (qd)
+ mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
+ &q->properties, restore_mqd, restore_ctl_stack,
+ qd->ctl_stack_size);
+ else
+ mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
+ &q->gart_mqd_addr, &q->properties);
+
if (q->properties.is_active) {
if (!dqm->sched_running) {
WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
@@ -390,7 +589,7 @@ add_queue_to_list:
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
if (q->properties.is_active)
- increment_queue_count(dqm, q->properties.type);
+ increment_queue_count(dqm, qpd, q);
/*
* Unconditionally increment this counter, regardless of the queue's
@@ -459,6 +658,70 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm,
dqm->allocated_queues[q->pipe] |= (1 << q->queue);
}
+#define SQ_IND_CMD_CMD_KILL 0x00000003
+#define SQ_IND_CMD_MODE_BROADCAST 0x00000001
+
+static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
+{
+ int status = 0;
+ unsigned int vmid;
+ uint16_t queried_pasid;
+ union SQ_CMD_BITS reg_sq_cmd;
+ union GRBM_GFX_INDEX_BITS reg_gfx_index;
+ struct kfd_process_device *pdd;
+ int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
+ int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
+
+ reg_sq_cmd.u32All = 0;
+ reg_gfx_index.u32All = 0;
+
+ pr_debug("Killing all process wavefronts\n");
+
+ if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) {
+ pr_err("no vmid pasid mapping supported \n");
+ return -EOPNOTSUPP;
+ }
+
+ /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
+ * ATC_VMID15_PASID_MAPPING
+ * to check which VMID the current process is mapped to.
+ */
+
+ for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
+ status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
+ (dev->adev, vmid, &queried_pasid);
+
+ if (status && queried_pasid == p->pasid) {
+ pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
+ vmid, p->pasid);
+ break;
+ }
+ }
+
+ if (vmid > last_vmid_to_scan) {
+ pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
+ return -EFAULT;
+ }
+
+ /* taking the VMID for that process on the safe way using PDD */
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd)
+ return -EFAULT;
+
+ reg_gfx_index.bits.sh_broadcast_writes = 1;
+ reg_gfx_index.bits.se_broadcast_writes = 1;
+ reg_gfx_index.bits.instance_broadcast_writes = 1;
+ reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
+ reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
+ reg_sq_cmd.bits.vm_id = vmid;
+
+ dev->kfd2kgd->wave_control_execute(dev->adev,
+ reg_gfx_index.u32All,
+ reg_sq_cmd.u32All);
+
+ return 0;
+}
+
/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
* to avoid asynchronized access
*/
@@ -515,13 +778,8 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
deallocate_vmid(dqm, qpd, q);
}
qpd->queue_count--;
- if (q->properties.is_active) {
- decrement_queue_count(dqm, q->properties.type);
- if (q->properties.is_gws) {
- dqm->gws_queue_count--;
- qpd->mapped_gws_queue = false;
- }
- }
+ if (q->properties.is_active)
+ decrement_queue_count(dqm, qpd, q);
return retval;
}
@@ -579,8 +837,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
/* Make sure the queue is unmapped before updating the MQD */
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
- retval = unmap_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ if (!dqm->dev->shared_resources.enable_mes)
+ retval = unmap_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false);
+ else if (prev_active)
+ retval = remove_queue_mes(dqm, q, &pdd->qpd);
+
if (retval) {
pr_err("unmap queue failed\n");
goto out_unlock;
@@ -596,9 +858,9 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
}
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
- (dqm->dev->cwsr_enabled?
- KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
- KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
+ (dqm->dev->cwsr_enabled ?
+ KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
+ KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
if (retval) {
pr_err("destroy mqd failed\n");
@@ -614,12 +876,11 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
* dqm->active_queue_count to determine whether a new runlist must be
* uploaded.
*/
- if (q->properties.is_active && !prev_active)
- increment_queue_count(dqm, q->properties.type);
- else if (!q->properties.is_active && prev_active)
- decrement_queue_count(dqm, q->properties.type);
-
- if (q->gws && !q->properties.is_gws) {
+ if (q->properties.is_active && !prev_active) {
+ increment_queue_count(dqm, &pdd->qpd, q);
+ } else if (!q->properties.is_active && prev_active) {
+ decrement_queue_count(dqm, &pdd->qpd, q);
+ } else if (q->gws && !q->properties.is_gws) {
if (q->properties.is_active) {
dqm->gws_queue_count++;
pdd->qpd.mapped_gws_queue = true;
@@ -633,9 +894,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
q->properties.is_gws = false;
}
- if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
- retval = map_queues_cpsch(dqm);
- else if (q->properties.is_active &&
+ if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
+ if (!dqm->dev->shared_resources.enable_mes)
+ retval = map_queues_cpsch(dqm);
+ else if (q->properties.is_active)
+ retval = add_queue_mes(dqm, q, &pdd->qpd);
+ } else if (q->properties.is_active &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
@@ -681,19 +945,15 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = false;
- decrement_queue_count(dqm, q->properties.type);
- if (q->properties.is_gws) {
- dqm->gws_queue_count--;
- qpd->mapped_gws_queue = false;
- }
+ decrement_queue_count(dqm, qpd, q);
if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
continue;
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
- (dqm->dev->cwsr_enabled?
- KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
- KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
+ (dqm->dev->cwsr_enabled ?
+ KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
+ KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
if (retval && !ret)
/* Return the first error, but keep going to
@@ -731,13 +991,23 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
continue;
q->properties.is_active = false;
- decrement_queue_count(dqm, q->properties.type);
+ decrement_queue_count(dqm, qpd, q);
+
+ if (dqm->dev->shared_resources.enable_mes) {
+ retval = remove_queue_mes(dqm, q, qpd);
+ if (retval) {
+ pr_err("Failed to evict queue %d\n",
+ q->properties.queue_id);
+ goto out;
+ }
+ }
}
pdd->last_evict_timestamp = get_jiffies_64();
- retval = execute_queues_cpsch(dqm,
- qpd->is_debug ?
- KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ if (!dqm->dev->shared_resources.enable_mes)
+ retval = execute_queues_cpsch(dqm,
+ qpd->is_debug ?
+ KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
out:
dqm_unlock(dqm);
@@ -776,7 +1046,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
if (!list_empty(&qpd->queues_list)) {
dqm->dev->kfd2kgd->set_vm_context_page_table_base(
- dqm->dev->kgd,
+ dqm->dev->adev,
qpd->vmid,
qpd->page_table_base);
kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
@@ -802,11 +1072,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = true;
- increment_queue_count(dqm, q->properties.type);
- if (q->properties.is_gws) {
- dqm->gws_queue_count++;
- qpd->mapped_gws_queue = true;
- }
+ increment_queue_count(dqm, qpd, q);
if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
continue;
@@ -864,10 +1130,20 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
continue;
q->properties.is_active = true;
- increment_queue_count(dqm, q->properties.type);
+ increment_queue_count(dqm, &pdd->qpd, q);
+
+ if (dqm->dev->shared_resources.enable_mes) {
+ retval = add_queue_mes(dqm, q, qpd);
+ if (retval) {
+ pr_err("Failed to restore queue %d\n",
+ q->properties.queue_id);
+ goto out;
+ }
+ }
}
- retval = execute_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ if (!dqm->dev->shared_resources.enable_mes)
+ retval = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
qpd->evicted = 0;
eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
atomic64_add(eviction_duration, &pdd->evict_duration_counter);
@@ -954,7 +1230,7 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
unsigned int vmid)
{
return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
- dqm->dev->kgd, pasid, vmid);
+ dqm->dev->adev, pasid, vmid);
}
static void init_interrupts(struct device_queue_manager *dqm)
@@ -963,7 +1239,25 @@ static void init_interrupts(struct device_queue_manager *dqm)
for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
if (is_pipe_enabled(dqm, 0, i))
- dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
+ dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i);
+}
+
+static void init_sdma_bitmaps(struct device_queue_manager *dqm)
+{
+ unsigned int num_sdma_queues =
+ min_t(unsigned int, sizeof(dqm->sdma_bitmap)*8,
+ get_num_sdma_queues(dqm));
+ unsigned int num_xgmi_sdma_queues =
+ min_t(unsigned int, sizeof(dqm->xgmi_sdma_bitmap)*8,
+ get_num_xgmi_sdma_queues(dqm));
+
+ if (num_sdma_queues)
+ dqm->sdma_bitmap = GENMASK_ULL(num_sdma_queues-1, 0);
+ if (num_xgmi_sdma_queues)
+ dqm->xgmi_sdma_bitmap = GENMASK_ULL(num_xgmi_sdma_queues-1, 0);
+
+ dqm->sdma_bitmap &= ~get_reserved_sdma_queues_bitmap(dqm);
+ pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);
}
static int initialize_nocpsch(struct device_queue_manager *dqm)
@@ -994,8 +1288,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
- dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
- dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
+ init_sdma_bitmaps(dqm);
return 0;
}
@@ -1014,19 +1307,22 @@ static void uninitialize(struct device_queue_manager *dqm)
static int start_nocpsch(struct device_queue_manager *dqm)
{
+ int r = 0;
+
pr_info("SW scheduler is used");
init_interrupts(dqm);
-
- if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
- return pm_init(&dqm->packet_mgr, dqm);
- dqm->sched_running = true;
- return 0;
+ if (dqm->dev->adev->asic_type == CHIP_HAWAII)
+ r = pm_init(&dqm->packet_mgr, dqm);
+ if (!r)
+ dqm->sched_running = true;
+
+ return r;
}
static int stop_nocpsch(struct device_queue_manager *dqm)
{
- if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
+ if (dqm->dev->adev->asic_type == CHIP_HAWAII)
pm_uninit(&dqm->packet_mgr, false);
dqm->sched_running = false;
@@ -1041,7 +1337,7 @@ static void pre_reset(struct device_queue_manager *dqm)
}
static int allocate_sdma_queue(struct device_queue_manager *dqm,
- struct queue *q)
+ struct queue *q, const uint32_t *restore_sdma_id)
{
int bit;
@@ -1051,31 +1347,54 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
return -ENOMEM;
}
- bit = __ffs64(dqm->sdma_bitmap);
- dqm->sdma_bitmap &= ~(1ULL << bit);
- q->sdma_id = bit;
+ if (restore_sdma_id) {
+ /* Re-use existing sdma_id */
+ if (!(dqm->sdma_bitmap & (1ULL << *restore_sdma_id))) {
+ pr_err("SDMA queue already in use\n");
+ return -EBUSY;
+ }
+ dqm->sdma_bitmap &= ~(1ULL << *restore_sdma_id);
+ q->sdma_id = *restore_sdma_id;
+ } else {
+ /* Find first available sdma_id */
+ bit = __ffs64(dqm->sdma_bitmap);
+ dqm->sdma_bitmap &= ~(1ULL << bit);
+ q->sdma_id = bit;
+ }
+
q->properties.sdma_engine_id = q->sdma_id %
- get_num_sdma_engines(dqm);
+ kfd_get_num_sdma_engines(dqm->dev);
q->properties.sdma_queue_id = q->sdma_id /
- get_num_sdma_engines(dqm);
+ kfd_get_num_sdma_engines(dqm->dev);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
if (dqm->xgmi_sdma_bitmap == 0) {
pr_err("No more XGMI SDMA queue to allocate\n");
return -ENOMEM;
}
- bit = __ffs64(dqm->xgmi_sdma_bitmap);
- dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
- q->sdma_id = bit;
+ if (restore_sdma_id) {
+ /* Re-use existing sdma_id */
+ if (!(dqm->xgmi_sdma_bitmap & (1ULL << *restore_sdma_id))) {
+ pr_err("SDMA queue already in use\n");
+ return -EBUSY;
+ }
+ dqm->xgmi_sdma_bitmap &= ~(1ULL << *restore_sdma_id);
+ q->sdma_id = *restore_sdma_id;
+ } else {
+ bit = __ffs64(dqm->xgmi_sdma_bitmap);
+ dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
+ q->sdma_id = bit;
+ }
/* sdma_engine_id is sdma id including
* both PCIe-optimized SDMAs and XGMI-
* optimized SDMAs. The calculation below
* assumes the first N engines are always
* PCIe-optimized ones
*/
- q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
- q->sdma_id % get_num_xgmi_sdma_engines(dqm);
+ q->properties.sdma_engine_id =
+ kfd_get_num_sdma_engines(dqm->dev) +
+ q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);
q->properties.sdma_queue_id = q->sdma_id /
- get_num_xgmi_sdma_engines(dqm);
+ kfd_get_num_xgmi_sdma_engines(dqm->dev);
}
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
@@ -1132,7 +1451,7 @@ static int set_sched_resources(struct device_queue_manager *dqm)
res.queue_mask |= 1ull
<< amdgpu_queue_mask_bit_to_set_resource_bit(
- (struct amdgpu_device *)dqm->dev->kgd, i);
+ dqm->dev->adev, i);
}
res.gws_mask = ~0ull;
res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
@@ -1147,9 +1466,6 @@ static int set_sched_resources(struct device_queue_manager *dqm)
static int initialize_cpsch(struct device_queue_manager *dqm)
{
- uint64_t num_sdma_queues;
- uint64_t num_xgmi_sdma_queues;
-
pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
mutex_init(&dqm->lock_hidden);
@@ -1158,21 +1474,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
dqm->active_cp_queue_count = 0;
dqm->gws_queue_count = 0;
dqm->active_runlist = false;
-
- num_sdma_queues = get_num_sdma_queues(dqm);
- if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap))
- dqm->sdma_bitmap = ULLONG_MAX;
- else
- dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1);
-
- num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm);
- if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap))
- dqm->xgmi_sdma_bitmap = ULLONG_MAX;
- else
- dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1);
-
INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
+ init_sdma_bitmaps(dqm);
+
return 0;
}
@@ -1183,14 +1488,16 @@ static int start_cpsch(struct device_queue_manager *dqm)
retval = 0;
dqm_lock(dqm);
- retval = pm_init(&dqm->packet_mgr, dqm);
- if (retval)
- goto fail_packet_manager_init;
- retval = set_sched_resources(dqm);
- if (retval)
- goto fail_set_sched_resources;
+ if (!dqm->dev->shared_resources.enable_mes) {
+ retval = pm_init(&dqm->packet_mgr, dqm);
+ if (retval)
+ goto fail_packet_manager_init;
+ retval = set_sched_resources(dqm);
+ if (retval)
+ goto fail_set_sched_resources;
+ }
pr_debug("Allocating fence memory\n");
/* allocate fence memory on the gart */
@@ -1209,13 +1516,15 @@ static int start_cpsch(struct device_queue_manager *dqm)
dqm->is_hws_hang = false;
dqm->is_resetting = false;
dqm->sched_running = true;
- execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ if (!dqm->dev->shared_resources.enable_mes)
+ execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
dqm_unlock(dqm);
return 0;
fail_allocate_vidmem:
fail_set_sched_resources:
- pm_uninit(&dqm->packet_mgr, false);
+ if (!dqm->dev->shared_resources.enable_mes)
+ pm_uninit(&dqm->packet_mgr, false);
fail_packet_manager_init:
dqm_unlock(dqm);
return retval;
@@ -1231,15 +1540,22 @@ static int stop_cpsch(struct device_queue_manager *dqm)
return 0;
}
- if (!dqm->is_hws_hang)
- unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+ if (!dqm->is_hws_hang) {
+ if (!dqm->dev->shared_resources.enable_mes)
+ unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false);
+ else
+ remove_all_queues_mes(dqm);
+ }
+
hanging = dqm->is_hws_hang || dqm->is_resetting;
dqm->sched_running = false;
- pm_release_ib(&dqm->packet_mgr);
+ if (!dqm->dev->shared_resources.enable_mes)
+ pm_release_ib(&dqm->packet_mgr);
kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
- pm_uninit(&dqm->packet_mgr, hanging);
+ if (!dqm->dev->shared_resources.enable_mes)
+ pm_uninit(&dqm->packet_mgr, hanging);
dqm_unlock(dqm);
return 0;
@@ -1266,7 +1582,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
dqm->total_queue_count);
list_add(&kq->list, &qpd->priv_queue_list);
- increment_queue_count(dqm, kq->queue->properties.type);
+ increment_queue_count(dqm, qpd, kq->queue);
qpd->is_debug = true;
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
dqm_unlock(dqm);
@@ -1280,7 +1596,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
{
dqm_lock(dqm);
list_del(&kq->list);
- decrement_queue_count(dqm, kq->queue->properties.type);
+ decrement_queue_count(dqm, qpd, kq->queue);
qpd->is_debug = false;
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
/*
@@ -1294,7 +1610,9 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
}
static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
- struct qcm_process_device *qpd)
+ struct qcm_process_device *qpd,
+ const struct kfd_criu_queue_priv_data *qd,
+ const void *restore_mqd, const void *restore_ctl_stack)
{
int retval;
struct mqd_manager *mqd_mgr;
@@ -1309,13 +1627,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
dqm_lock(dqm);
- retval = allocate_sdma_queue(dqm, q);
+ retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
dqm_unlock(dqm);
if (retval)
goto out;
}
- retval = allocate_doorbell(qpd, q);
+ retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
if (retval)
goto out_deallocate_sdma_queue;
@@ -1340,17 +1658,28 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
* updates the is_evicted flag but is a no-op otherwise.
*/
q->properties.is_evicted = !!qpd->evicted;
- mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
- &q->gart_mqd_addr, &q->properties);
+
+ if (qd)
+ mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
+ &q->properties, restore_mqd, restore_ctl_stack,
+ qd->ctl_stack_size);
+ else
+ mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
+ &q->gart_mqd_addr, &q->properties);
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
if (q->properties.is_active) {
- increment_queue_count(dqm, q->properties.type);
+ increment_queue_count(dqm, qpd, q);
- execute_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ if (!dqm->dev->shared_resources.enable_mes)
+ retval = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ else
+ retval = add_queue_mes(dqm, q, qpd);
+ if (retval)
+ goto cleanup_queue;
}
/*
@@ -1365,6 +1694,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
dqm_unlock(dqm);
return retval;
+cleanup_queue:
+ qpd->queue_count--;
+ list_del(&q->list);
+ if (q->properties.is_active)
+ decrement_queue_count(dqm, qpd, q);
+ mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+ dqm_unlock(dqm);
out_deallocate_doorbell:
deallocate_doorbell(qpd, q);
out_deallocate_sdma_queue:
@@ -1428,7 +1764,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
/* dqm->lock mutex has to be locked before calling this function */
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param)
+ uint32_t filter_param, bool reset)
{
int retval = 0;
struct mqd_manager *mqd_mgr;
@@ -1440,8 +1776,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
if (!dqm->active_runlist)
return retval;
- retval = pm_send_unmap_queue(&dqm->packet_mgr, KFD_QUEUE_TYPE_COMPUTE,
- filter, filter_param, false, 0);
+ retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset);
if (retval)
return retval;
@@ -1453,13 +1788,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
queue_preemption_timeout_ms);
if (retval) {
pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
- dqm->is_hws_hang = true;
- /* It's possible we're detecting a HWS hang in the
- * middle of a GPU reset. No need to schedule another
- * reset in this case.
- */
- if (!dqm->is_resetting)
- schedule_work(&dqm->hw_exception_work);
+ kfd_hws_hang(dqm);
return retval;
}
@@ -1485,6 +1814,21 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
return retval;
}
+/* only for compute queue */
+static int reset_queues_cpsch(struct device_queue_manager *dqm,
+ uint16_t pasid)
+{
+ int retval;
+
+ dqm_lock(dqm);
+
+ retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
+ pasid, true);
+
+ dqm_unlock(dqm);
+ return retval;
+}
+
/* dqm->lock mutex has to be locked before calling this function */
static int execute_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
@@ -1494,7 +1838,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm,
if (dqm->is_hws_hang)
return -EIO;
- retval = unmap_queues_cpsch(dqm, filter, filter_param);
+ retval = unmap_queues_cpsch(dqm, filter, filter_param, false);
if (retval)
return retval;
@@ -1549,14 +1893,14 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
list_del(&q->list);
qpd->queue_count--;
if (q->properties.is_active) {
- decrement_queue_count(dqm, q->properties.type);
- retval = execute_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
- if (retval == -ETIME)
- qpd->reset_wavefronts = true;
- if (q->properties.is_gws) {
- dqm->gws_queue_count--;
- qpd->mapped_gws_queue = false;
+ if (!dqm->dev->shared_resources.enable_mes) {
+ decrement_queue_count(dqm, qpd, q);
+ retval = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ if (retval == -ETIME)
+ qpd->reset_wavefronts = true;
+ } else {
+ retval = remove_queue_mes(dqm, q, qpd);
}
}
@@ -1729,6 +2073,56 @@ static int get_wave_state(struct device_queue_manager *dqm,
ctl_stack_used_size, save_area_used_size);
}
+static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
+ const struct queue *q,
+ u32 *mqd_size,
+ u32 *ctl_stack_size)
+{
+ struct mqd_manager *mqd_mgr;
+ enum KFD_MQD_TYPE mqd_type =
+ get_mqd_type_from_queue_type(q->properties.type);
+
+ dqm_lock(dqm);
+ mqd_mgr = dqm->mqd_mgrs[mqd_type];
+ *mqd_size = mqd_mgr->mqd_size;
+ *ctl_stack_size = 0;
+
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info)
+ mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size);
+
+ dqm_unlock(dqm);
+}
+
+static int checkpoint_mqd(struct device_queue_manager *dqm,
+ const struct queue *q,
+ void *mqd,
+ void *ctl_stack)
+{
+ struct mqd_manager *mqd_mgr;
+ int r = 0;
+ enum KFD_MQD_TYPE mqd_type =
+ get_mqd_type_from_queue_type(q->properties.type);
+
+ dqm_lock(dqm);
+
+ if (q->properties.is_active || !q->device->cwsr_enabled) {
+ r = -EINVAL;
+ goto dqm_unlock;
+ }
+
+ mqd_mgr = dqm->mqd_mgrs[mqd_type];
+ if (!mqd_mgr->checkpoint_mqd) {
+ r = -EOPNOTSUPP;
+ goto dqm_unlock;
+ }
+
+ mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack);
+
+dqm_unlock:
+ dqm_unlock(dqm);
+ return r;
+}
+
static int process_termination_cpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
@@ -1748,7 +2142,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
/* Clean all kernel queues */
list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
list_del(&kq->list);
- decrement_queue_count(dqm, kq->queue->properties.type);
+ decrement_queue_count(dqm, qpd, kq->queue);
qpd->is_debug = false;
dqm->total_queue_count--;
filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
@@ -1762,10 +2156,13 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
deallocate_sdma_queue(dqm, q);
if (q->properties.is_active) {
- decrement_queue_count(dqm, q->properties.type);
- if (q->properties.is_gws) {
- dqm->gws_queue_count--;
- qpd->mapped_gws_queue = false;
+ decrement_queue_count(dqm, qpd, q);
+
+ if (dqm->dev->shared_resources.enable_mes) {
+ retval = remove_queue_mes(dqm, q, qpd);
+ if (retval)
+ pr_err("Failed to remove queue %d\n",
+ q->properties.queue_id);
}
}
@@ -1783,7 +2180,9 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
}
}
- retval = execute_queues_cpsch(dqm, filter, 0);
+ if (!dqm->dev->shared_resources.enable_mes)
+ retval = execute_queues_cpsch(dqm, filter, 0);
+
if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
@@ -1847,10 +2246,10 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
get_num_all_sdma_engines(dqm) *
- dev->device_info->num_sdma_queues_per_engine +
+ dev->device_info.num_sdma_queues_per_engine +
dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
- retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
+ retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
(void *)&(mem_obj->cpu_ptr), false);
@@ -1867,7 +2266,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
if (!dqm)
return NULL;
- switch (dev->device_info->asic_family) {
+ switch (dev->adev->asic_type) {
/* HWS is not available on Hawaii. */
case CHIP_HAWAII:
/* HWS depends on CWSR for timely dequeue. CWSR is not
@@ -1905,6 +2304,9 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.evict_process_queues = evict_process_queues_cpsch;
dqm->ops.restore_process_queues = restore_process_queues_cpsch;
dqm->ops.get_wave_state = get_wave_state;
+ dqm->ops.reset_queues = reset_queues_cpsch;
+ dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
+ dqm->ops.checkpoint_mqd = checkpoint_mqd;
break;
case KFD_SCHED_POLICY_NO_HWS:
/* initialize dqm for no cp scheduling */
@@ -1924,13 +2326,15 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.restore_process_queues =
restore_process_queues_nocpsch;
dqm->ops.get_wave_state = get_wave_state;
+ dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
+ dqm->ops.checkpoint_mqd = checkpoint_mqd;
break;
default:
pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
goto out_free;
}
- switch (dev->device_info->asic_family) {
+ switch (dev->adev->asic_type) {
case CHIP_CARRIZO:
device_queue_manager_init_vi(&dqm->asic_ops);
break;
@@ -1952,31 +2356,18 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
device_queue_manager_init_vi_tonga(&dqm->asic_ops);
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_RENOIR:
- case CHIP_ARCTURUS:
- case CHIP_ALDEBARAN:
- device_queue_manager_init_v9(&dqm->asic_ops);
- break;
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_NAVI14:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_YELLOW_CARP:
- case CHIP_CYAN_SKILLFISH:
- device_queue_manager_init_v10_navi10(&dqm->asic_ops);
- break;
default:
- WARN(1, "Unexpected ASIC family %u",
- dev->device_info->asic_family);
- goto out_free;
+ if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
+ device_queue_manager_init_v11(&dqm->asic_ops);
+ else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
+ device_queue_manager_init_v10_navi10(&dqm->asic_ops);
+ else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
+ device_queue_manager_init_v9(&dqm->asic_ops);
+ else {
+ WARN(1, "Unexpected ASIC family %u",
+ dev->adev->asic_type);
+ goto out_free;
+ }
}
if (init_mqd_managers(dqm))
@@ -2000,7 +2391,7 @@ static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,
{
WARN(!mqd, "No hiq sdma mqd trunk to free");
- amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem);
}
void device_queue_manager_uninit(struct device_queue_manager *dqm)
@@ -2010,7 +2401,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm)
kfree(dqm);
}
-int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid)
+int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid)
{
struct kfd_process_device *pdd;
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
@@ -2031,7 +2422,7 @@ static void kfd_process_hw_exception(struct work_struct *work)
{
struct device_queue_manager *dqm = container_of(work,
struct device_queue_manager, hw_exception_work);
- amdgpu_amdkfd_gpu_reset(dqm->dev->kgd);
+ amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
}
#if defined(CONFIG_DEBUG_FS)
@@ -2065,12 +2456,11 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
int r = 0;
if (!dqm->sched_running) {
- seq_printf(m, " Device is stopped\n");
-
+ seq_puts(m, " Device is stopped\n");
return 0;
}
- r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
+ r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
&dump, &n_regs);
if (!r) {
@@ -2092,7 +2482,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
continue;
r = dqm->dev->kfd2kgd->hqd_dump(
- dqm->dev->kgd, pipe, queue, &dump, &n_regs);
+ dqm->dev->adev, pipe, queue, &dump, &n_regs);
if (r)
break;
@@ -2106,10 +2496,10 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
for (queue = 0;
- queue < dqm->dev->device_info->num_sdma_queues_per_engine;
+ queue < dqm->dev->device_info.num_sdma_queues_per_engine;
queue++) {
r = dqm->dev->kfd2kgd->hqd_sdma_dump(
- dqm->dev->kgd, pipe, queue, &dump, &n_regs);
+ dqm->dev->adev, pipe, queue, &dump, &n_regs);
if (r)
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 499fc0ea387f..a537b9ef3e16 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -34,11 +35,49 @@
#define VMID_NUM 16
+#define KFD_MES_PROCESS_QUANTUM 100000
+#define KFD_MES_GANG_QUANTUM 10000
+
struct device_process_node {
struct qcm_process_device *qpd;
struct list_head list;
};
+union SQ_CMD_BITS {
+ struct {
+ uint32_t cmd:3;
+ uint32_t:1;
+ uint32_t mode:3;
+ uint32_t check_vmid:1;
+ uint32_t trap_id:3;
+ uint32_t:5;
+ uint32_t wave_id:4;
+ uint32_t simd_id:2;
+ uint32_t:2;
+ uint32_t queue_id:3;
+ uint32_t:1;
+ uint32_t vm_id:4;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+union GRBM_GFX_INDEX_BITS {
+ struct {
+ uint32_t instance_index:8;
+ uint32_t sh_index:8;
+ uint32_t se_index:8;
+ uint32_t:5;
+ uint32_t sh_broadcast_writes:1;
+ uint32_t instance_broadcast_writes:1;
+ uint32_t se_broadcast_writes:1;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
/**
* struct device_queue_manager_ops
*
@@ -56,7 +95,7 @@ struct device_process_node {
*
* @initialize: Initializes the pipelines and memory module for that device.
*
- * @start: Initializes the resources/modules the the device needs for queues
+ * @start: Initializes the resources/modules the device needs for queues
* execution. This function is called on device initialization and after the
* system woke up after suspension.
*
@@ -77,16 +116,24 @@ struct device_process_node {
*
* @evict_process_queues: Evict all active queues of a process
*
- * @restore_process_queues: Restore all evicted queues queues of a process
+ * @restore_process_queues: Restore all evicted queues of a process
*
* @get_wave_state: Retrieves context save state and optionally copies the
* control stack, if kept in the MQD, to the given userspace address.
+ *
+ * @reset_queues: reset queues which consume RAS poison
+ * @get_queue_checkpoint_info: Retrieves queue size information for CRIU checkpoint.
+ *
+ * @checkpoint_mqd: checkpoint queue MQD contents for CRIU.
*/
struct device_queue_manager_ops {
int (*create_queue)(struct device_queue_manager *dqm,
struct queue *q,
- struct qcm_process_device *qpd);
+ struct qcm_process_device *qpd,
+ const struct kfd_criu_queue_priv_data *qd,
+ const void *restore_mqd,
+ const void *restore_ctl_stack);
int (*destroy_queue)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
@@ -134,6 +181,17 @@ struct device_queue_manager_ops {
void __user *ctl_stack,
u32 *ctl_stack_used_size,
u32 *save_area_used_size);
+
+ int (*reset_queues)(struct device_queue_manager *dqm,
+ uint16_t pasid);
+ void (*get_queue_checkpoint_info)(struct device_queue_manager *dqm,
+ const struct queue *q, u32 *mqd_size,
+ u32 *ctl_stack_size);
+
+ int (*checkpoint_mqd)(struct device_queue_manager *dqm,
+ const struct queue *q,
+ void *mqd,
+ void *ctl_stack);
};
struct device_queue_manager_asic_ops {
@@ -212,6 +270,8 @@ void device_queue_manager_init_v9(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_v10_navi10(
struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_v11(
+ struct device_queue_manager_asic_ops *asic_ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
unsigned int get_cp_queues_num(struct device_queue_manager *dqm);
@@ -248,9 +308,7 @@ static inline void dqm_unlock(struct device_queue_manager *dqm)
static inline int read_sdma_queue_counter(uint64_t __user *q_rptr, uint64_t *val)
{
- /*
- * SDMA activity counter is stored at queue's RPTR + 0x8 location.
- */
+ /* SDMA activity counter is stored at queue's RPTR + 0x8 location. */
return get_user(*val, q_rptr + 1);
}
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index 0d26506798cf..b1ab5b0775e1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
index ad0593342333..f1a1f5753e65 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -61,15 +62,6 @@ static int update_qpd_v10(struct device_queue_manager *dqm,
(SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
(3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
-#if 0
- /* TODO:
- * This shouldn't be an issue with Navi10. Verify.
- */
- if (vega10_noretry)
- qpd->sh_mem_config |=
- 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
-#endif
-
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
new file mode 100644
index 000000000000..2e129da7acb4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "soc21_enum.h"
+
+static int update_qpd_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd);
+
+void device_queue_manager_init_v11(
+ struct device_queue_manager_asic_ops *asic_ops)
+{
+ asic_ops->update_qpd = update_qpd_v11;
+ asic_ops->init_sdma_vm = init_sdma_vm_v11;
+ asic_ops->mqd_manager_init = mqd_manager_init_v11;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
+{
+ uint32_t shared_base = pdd->lds_base >> 48;
+ uint32_t private_base = pdd->scratch_base >> 48;
+
+ return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+ private_base;
+}
+
+static int update_qpd_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ struct kfd_process_device *pdd;
+
+ pdd = qpd_to_pdd(qpd);
+
+ /* check if sh_mem_config register already configured */
+ if (qpd->sh_mem_config == 0) {
+ qpd->sh_mem_config =
+ (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
+
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ }
+
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+
+ pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+
+ return 0;
+}
+
+static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ /* Not needed on SDMAv4 onwards any more */
+ q->properties.sdma_vm_addr = 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index b5c3d13643f1..d119070956fb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -62,7 +63,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
- if (dqm->dev->device_info->asic_family == CHIP_ALDEBARAN) {
+ if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2)) {
/* Aldebaran can safely support different XNACK modes
* per process
*/
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index 3a7cb2f88366..d7d45832df0f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index 768d153acff4..cd4e61bf0493 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -48,9 +49,13 @@
/* # of doorbell bytes allocated for each process. */
size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
{
- return roundup(kfd->device_info->doorbell_size *
- KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
- PAGE_SIZE);
+ if (!kfd->shared_resources.enable_mes)
+ return roundup(kfd->device_info.doorbell_size *
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ PAGE_SIZE);
+ else
+ return amdgpu_mes_doorbell_process_slice(
+ (struct amdgpu_device *)kfd->adev);
}
/* Doorbell calculations for device init. */
@@ -61,6 +66,16 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
size_t doorbell_process_limit;
/*
+ * With MES enabled, just set the doorbell base as it is needed
+ * to calculate doorbell physical address.
+ */
+ if (kfd->shared_resources.enable_mes) {
+ kfd->doorbell_base =
+ kfd->shared_resources.doorbell_physical_address;
+ return 0;
+ }
+
+ /*
* We start with calculations in bytes because the input data might
* only be byte-aligned.
* Only after we have done the rounding can we assume any alignment.
@@ -142,6 +157,8 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
/* Calculate physical address of doorbell */
address = kfd_get_process_doorbells(pdd);
+ if (!address)
+ return -ENOMEM;
vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
VM_DONTDUMP | VM_PFNMAP;
@@ -180,7 +197,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL;
- inx *= kfd->device_info->doorbell_size / sizeof(u32);
+ inx *= kfd->device_info.doorbell_size / sizeof(u32);
/*
* Calculating the kernel doorbell offset using the first
@@ -201,7 +218,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
unsigned int inx;
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
- * sizeof(u32) / kfd->device_info->doorbell_size;
+ * sizeof(u32) / kfd->device_info.doorbell_size;
mutex_lock(&kfd->doorbell_mutex);
__clear_bit(inx, kfd->doorbell_available_index);
@@ -236,10 +253,16 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
* the process's doorbells. The offset returned is in dword
* units regardless of the ASIC-dependent doorbell size.
*/
- return kfd->doorbell_base_dw_offset +
- pdd->doorbell_index
- * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
- doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
+ if (!kfd->shared_resources.enable_mes)
+ return kfd->doorbell_base_dw_offset +
+ pdd->doorbell_index
+ * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
+ doorbell_id *
+ kfd->device_info.doorbell_size / sizeof(u32);
+ else
+ return amdgpu_mes_get_doorbell_dw_offset_in_bar(
+ (struct amdgpu_device *)kfd->adev,
+ pdd->doorbell_index, doorbell_id);
}
uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
@@ -254,22 +277,46 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
{
+ if (!pdd->doorbell_index) {
+ int r = kfd_alloc_process_doorbells(pdd->dev,
+ &pdd->doorbell_index);
+ if (r)
+ return 0;
+ }
+
return pdd->dev->doorbell_base +
pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev);
}
int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
{
- int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices,
- GFP_KERNEL);
+ int r = 0;
+
+ if (!kfd->shared_resources.enable_mes)
+ r = ida_simple_get(&kfd->doorbell_ida, 1,
+ kfd->max_doorbell_slices, GFP_KERNEL);
+ else
+ r = amdgpu_mes_alloc_process_doorbells(
+ (struct amdgpu_device *)kfd->adev,
+ doorbell_index);
+
if (r > 0)
*doorbell_index = r;
+ if (r < 0)
+ pr_err("Failed to allocate process doorbells\n");
+
return r;
}
void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
{
- if (doorbell_index)
- ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
+ if (doorbell_index) {
+ if (!kfd->shared_resources.enable_mes)
+ ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
+ else
+ amdgpu_mes_free_process_doorbells(
+ (struct amdgpu_device *)kfd->adev,
+ doorbell_index);
+ }
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 3eea4edee355..729d26d648af 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -55,7 +56,6 @@ struct kfd_signal_page {
bool need_to_free_pages;
};
-
static uint64_t *page_slots(struct kfd_signal_page *page)
{
return page->kernel_address;
@@ -92,7 +92,8 @@ fail_alloc_signal_store:
}
static int allocate_event_notification_slot(struct kfd_process *p,
- struct kfd_event *ev)
+ struct kfd_event *ev,
+ const int *restore_id)
{
int id;
@@ -104,14 +105,19 @@ static int allocate_event_notification_slot(struct kfd_process *p,
p->signal_mapped_size = 256*8;
}
- /*
- * Compatibility with old user mode: Only use signal slots
- * user mode has mapped, may be less than
- * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
- * of the event limit without breaking user mode.
- */
- id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
- GFP_KERNEL);
+ if (restore_id) {
+ id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
+ GFP_KERNEL);
+ } else {
+ /*
+ * Compatibility with old user mode: Only use signal slots
+ * user mode has mapped, may be less than
+ * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
+ * of the event limit without breaking user mode.
+ */
+ id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
+ GFP_KERNEL);
+ }
if (id < 0)
return id;
@@ -122,8 +128,8 @@ static int allocate_event_notification_slot(struct kfd_process *p,
}
/*
- * Assumes that p->event_mutex is held and of course that p is not going
- * away (current or locked).
+ * Assumes that p->event_mutex or rcu_readlock is held and of course that p is
+ * not going away.
*/
static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
{
@@ -178,9 +184,8 @@ static struct kfd_event *lookup_signaled_event_by_partial_id(
return ev;
}
-static int create_signal_event(struct file *devkfd,
- struct kfd_process *p,
- struct kfd_event *ev)
+static int create_signal_event(struct file *devkfd, struct kfd_process *p,
+ struct kfd_event *ev, const int *restore_id)
{
int ret;
@@ -193,7 +198,7 @@ static int create_signal_event(struct file *devkfd,
return -ENOSPC;
}
- ret = allocate_event_notification_slot(p, ev);
+ ret = allocate_event_notification_slot(p, ev, restore_id);
if (ret) {
pr_warn("Signal event wasn't created because out of kernel memory\n");
return ret;
@@ -209,16 +214,22 @@ static int create_signal_event(struct file *devkfd,
return 0;
}
-static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
+static int create_other_event(struct kfd_process *p, struct kfd_event *ev, const int *restore_id)
{
- /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
- * intentional integer overflow to -1 without a compiler
- * warning. idr_alloc treats a negative value as "maximum
- * signed integer".
- */
- int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
- (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
- GFP_KERNEL);
+ int id;
+
+ if (restore_id)
+ id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
+ GFP_KERNEL);
+ else
+ /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
+ * intentional integer overflow to -1 without a compiler
+ * warning. idr_alloc treats a negative value as "maximum
+ * signed integer".
+ */
+ id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
+ (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
+ GFP_KERNEL);
if (id < 0)
return id;
@@ -227,12 +238,24 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
return 0;
}
-void kfd_event_init_process(struct kfd_process *p)
+int kfd_event_init_process(struct kfd_process *p)
{
+ int id;
+
mutex_init(&p->event_mutex);
idr_init(&p->event_idr);
p->signal_page = NULL;
- p->signal_event_count = 0;
+ p->signal_event_count = 1;
+ /* Allocate event ID 0. It is used for a fast path to ignore bogus events
+ * that are sent by the CP without a context ID
+ */
+ id = idr_alloc(&p->event_idr, NULL, 0, 1, GFP_KERNEL);
+ if (id < 0) {
+ idr_destroy(&p->event_idr);
+ mutex_destroy(&p->event_mutex);
+ return id;
+ }
+ return 0;
}
static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
@@ -240,16 +263,18 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
struct kfd_event_waiter *waiter;
/* Wake up pending waiters. They will return failure */
+ spin_lock(&ev->lock);
list_for_each_entry(waiter, &ev->wq.head, wait.entry)
- waiter->event = NULL;
+ WRITE_ONCE(waiter->event, NULL);
wake_up_all(&ev->wq);
+ spin_unlock(&ev->lock);
if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
ev->type == KFD_EVENT_TYPE_DEBUG)
p->signal_event_count--;
idr_remove(&p->event_idr, ev->event_id);
- kfree(ev);
+ kfree_rcu(ev, rcu);
}
static void destroy_events(struct kfd_process *p)
@@ -258,8 +283,10 @@ static void destroy_events(struct kfd_process *p)
uint32_t id;
idr_for_each_entry(&p->event_idr, ev, id)
- destroy_event(p, ev);
+ if (ev)
+ destroy_event(p, ev);
idr_destroy(&p->event_idr);
+ mutex_destroy(&p->event_mutex);
}
/*
@@ -295,8 +322,8 @@ static bool event_can_be_cpu_signaled(const struct kfd_event *ev)
return ev->type == KFD_EVENT_TYPE_SIGNAL;
}
-int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
- uint64_t size)
+static int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
+ uint64_t size, uint64_t user_handle)
{
struct kfd_signal_page *page;
@@ -315,10 +342,56 @@ int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
p->signal_page = page;
p->signal_mapped_size = size;
-
+ p->signal_handle = user_handle;
return 0;
}
+int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
+{
+ struct kfd_dev *kfd;
+ struct kfd_process_device *pdd;
+ void *mem, *kern_addr;
+ uint64_t size;
+ int err = 0;
+
+ if (p->signal_page) {
+ pr_err("Event page is already set\n");
+ return -EINVAL;
+ }
+
+ pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(event_page_offset));
+ if (!pdd) {
+ pr_err("Getting device by id failed in %s\n", __func__);
+ return -EINVAL;
+ }
+ kfd = pdd->dev;
+
+ pdd = kfd_bind_process_to_device(kfd, p);
+ if (IS_ERR(pdd))
+ return PTR_ERR(pdd);
+
+ mem = kfd_process_device_translate_handle(pdd,
+ GET_IDR_HANDLE(event_page_offset));
+ if (!mem) {
+ pr_err("Can't find BO, offset is 0x%llx\n", event_page_offset);
+ return -EINVAL;
+ }
+
+ err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(mem, &kern_addr, &size);
+ if (err) {
+ pr_err("Failed to map event page to kernel\n");
+ return err;
+ }
+
+ err = kfd_event_page_set(p, kern_addr, size, event_page_offset);
+ if (err) {
+ pr_err("Failed to set event page\n");
+ amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
+ return err;
+ }
+ return err;
+}
+
int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint32_t event_type, bool auto_reset, uint32_t node_id,
uint32_t *event_id, uint32_t *event_trigger_data,
@@ -334,6 +407,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
ev->auto_reset = auto_reset;
ev->signaled = false;
+ spin_lock_init(&ev->lock);
init_waitqueue_head(&ev->wq);
*event_page_offset = 0;
@@ -343,14 +417,14 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
switch (event_type) {
case KFD_EVENT_TYPE_SIGNAL:
case KFD_EVENT_TYPE_DEBUG:
- ret = create_signal_event(devkfd, p, ev);
+ ret = create_signal_event(devkfd, p, ev, NULL);
if (!ret) {
*event_page_offset = KFD_MMAP_TYPE_EVENTS;
*event_slot_index = ev->event_id;
}
break;
default:
- ret = create_other_event(p, ev);
+ ret = create_other_event(p, ev, NULL);
break;
}
@@ -366,6 +440,166 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
return ret;
}
+int kfd_criu_restore_event(struct file *devkfd,
+ struct kfd_process *p,
+ uint8_t __user *user_priv_ptr,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size)
+{
+ struct kfd_criu_event_priv_data *ev_priv;
+ struct kfd_event *ev = NULL;
+ int ret = 0;
+
+ ev_priv = kmalloc(sizeof(*ev_priv), GFP_KERNEL);
+ if (!ev_priv)
+ return -ENOMEM;
+
+ ev = kzalloc(sizeof(*ev), GFP_KERNEL);
+ if (!ev) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ if (*priv_data_offset + sizeof(*ev_priv) > max_priv_data_size) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ ret = copy_from_user(ev_priv, user_priv_ptr + *priv_data_offset, sizeof(*ev_priv));
+ if (ret) {
+ ret = -EFAULT;
+ goto exit;
+ }
+ *priv_data_offset += sizeof(*ev_priv);
+
+ if (ev_priv->user_handle) {
+ ret = kfd_kmap_event_page(p, ev_priv->user_handle);
+ if (ret)
+ goto exit;
+ }
+
+ ev->type = ev_priv->type;
+ ev->auto_reset = ev_priv->auto_reset;
+ ev->signaled = ev_priv->signaled;
+
+ spin_lock_init(&ev->lock);
+ init_waitqueue_head(&ev->wq);
+
+ mutex_lock(&p->event_mutex);
+ switch (ev->type) {
+ case KFD_EVENT_TYPE_SIGNAL:
+ case KFD_EVENT_TYPE_DEBUG:
+ ret = create_signal_event(devkfd, p, ev, &ev_priv->event_id);
+ break;
+ case KFD_EVENT_TYPE_MEMORY:
+ memcpy(&ev->memory_exception_data,
+ &ev_priv->memory_exception_data,
+ sizeof(struct kfd_hsa_memory_exception_data));
+
+ ret = create_other_event(p, ev, &ev_priv->event_id);
+ break;
+ case KFD_EVENT_TYPE_HW_EXCEPTION:
+ memcpy(&ev->hw_exception_data,
+ &ev_priv->hw_exception_data,
+ sizeof(struct kfd_hsa_hw_exception_data));
+
+ ret = create_other_event(p, ev, &ev_priv->event_id);
+ break;
+ }
+ mutex_unlock(&p->event_mutex);
+
+exit:
+ if (ret)
+ kfree(ev);
+
+ kfree(ev_priv);
+
+ return ret;
+}
+
+int kfd_criu_checkpoint_events(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset)
+{
+ struct kfd_criu_event_priv_data *ev_privs;
+ int i = 0;
+ int ret = 0;
+ struct kfd_event *ev;
+ uint32_t ev_id;
+
+ uint32_t num_events = kfd_get_num_events(p);
+
+ if (!num_events)
+ return 0;
+
+ ev_privs = kvzalloc(num_events * sizeof(*ev_privs), GFP_KERNEL);
+ if (!ev_privs)
+ return -ENOMEM;
+
+
+ idr_for_each_entry(&p->event_idr, ev, ev_id) {
+ struct kfd_criu_event_priv_data *ev_priv;
+
+ /*
+ * Currently, all events have same size of private_data, but the current ioctl's
+ * and CRIU plugin supports private_data of variable sizes
+ */
+ ev_priv = &ev_privs[i];
+
+ ev_priv->object_type = KFD_CRIU_OBJECT_TYPE_EVENT;
+
+ /* We store the user_handle with the first event */
+ if (i == 0 && p->signal_page)
+ ev_priv->user_handle = p->signal_handle;
+
+ ev_priv->event_id = ev->event_id;
+ ev_priv->auto_reset = ev->auto_reset;
+ ev_priv->type = ev->type;
+ ev_priv->signaled = ev->signaled;
+
+ if (ev_priv->type == KFD_EVENT_TYPE_MEMORY)
+ memcpy(&ev_priv->memory_exception_data,
+ &ev->memory_exception_data,
+ sizeof(struct kfd_hsa_memory_exception_data));
+ else if (ev_priv->type == KFD_EVENT_TYPE_HW_EXCEPTION)
+ memcpy(&ev_priv->hw_exception_data,
+ &ev->hw_exception_data,
+ sizeof(struct kfd_hsa_hw_exception_data));
+
+ pr_debug("Checkpointed event[%d] id = 0x%08x auto_reset = %x type = %x signaled = %x\n",
+ i,
+ ev_priv->event_id,
+ ev_priv->auto_reset,
+ ev_priv->type,
+ ev_priv->signaled);
+ i++;
+ }
+
+ ret = copy_to_user(user_priv_data + *priv_data_offset,
+ ev_privs, num_events * sizeof(*ev_privs));
+ if (ret) {
+ pr_err("Failed to copy events priv to user\n");
+ ret = -EFAULT;
+ }
+
+ *priv_data_offset += num_events * sizeof(*ev_privs);
+
+ kvfree(ev_privs);
+ return ret;
+}
+
+int kfd_get_num_events(struct kfd_process *p)
+{
+ struct kfd_event *ev;
+ uint32_t id;
+ u32 num_events = 0;
+
+ idr_for_each_entry(&p->event_idr, ev, id)
+ num_events++;
+
+ return num_events;
+}
+
/* Assumes that p is current. */
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id)
{
@@ -391,13 +625,13 @@ static void set_event(struct kfd_event *ev)
/* Auto reset if the list is non-empty and we're waking
* someone. waitqueue_active is safe here because we're
- * protected by the p->event_mutex, which is also held when
+ * protected by the ev->lock, which is also held when
* updating the wait queues in kfd_wait_on_events.
*/
ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq);
list_for_each_entry(waiter, &ev->wq.head, wait.entry)
- waiter->activated = true;
+ WRITE_ONCE(waiter->activated, true);
wake_up_all(&ev->wq);
}
@@ -408,16 +642,23 @@ int kfd_set_event(struct kfd_process *p, uint32_t event_id)
int ret = 0;
struct kfd_event *ev;
- mutex_lock(&p->event_mutex);
+ rcu_read_lock();
ev = lookup_event_by_id(p, event_id);
+ if (!ev) {
+ ret = -EINVAL;
+ goto unlock_rcu;
+ }
+ spin_lock(&ev->lock);
- if (ev && event_can_be_cpu_signaled(ev))
+ if (event_can_be_cpu_signaled(ev))
set_event(ev);
else
ret = -EINVAL;
- mutex_unlock(&p->event_mutex);
+ spin_unlock(&ev->lock);
+unlock_rcu:
+ rcu_read_unlock();
return ret;
}
@@ -432,23 +673,30 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
int ret = 0;
struct kfd_event *ev;
- mutex_lock(&p->event_mutex);
+ rcu_read_lock();
ev = lookup_event_by_id(p, event_id);
+ if (!ev) {
+ ret = -EINVAL;
+ goto unlock_rcu;
+ }
+ spin_lock(&ev->lock);
- if (ev && event_can_be_cpu_signaled(ev))
+ if (event_can_be_cpu_signaled(ev))
reset_event(ev);
else
ret = -EINVAL;
- mutex_unlock(&p->event_mutex);
+ spin_unlock(&ev->lock);
+unlock_rcu:
+ rcu_read_unlock();
return ret;
}
static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
{
- page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT;
+ WRITE_ONCE(page_slots(p->signal_page)[ev->event_id], UNSIGNALED_EVENT_SLOT);
}
static void set_event_from_interrupt(struct kfd_process *p,
@@ -456,7 +704,9 @@ static void set_event_from_interrupt(struct kfd_process *p,
{
if (ev && event_can_be_gpu_signaled(ev)) {
acknowledge_signal(p, ev);
+ spin_lock(&ev->lock);
set_event(ev);
+ spin_unlock(&ev->lock);
}
}
@@ -475,7 +725,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
if (!p)
return; /* Presumably process exited. */
- mutex_lock(&p->event_mutex);
+ rcu_read_lock();
if (valid_id_bits)
ev = lookup_signaled_event_by_partial_id(p, partial_id,
@@ -503,7 +753,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
if (id >= KFD_SIGNAL_EVENT_LIMIT)
break;
- if (slots[id] != UNSIGNALED_EVENT_SLOT)
+ if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT)
set_event_from_interrupt(p, ev);
}
} else {
@@ -511,15 +761,15 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
* iterate over the signal slots and lookup
* only signaled events from the IDR.
*/
- for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
- if (slots[id] != UNSIGNALED_EVENT_SLOT) {
+ for (id = 1; id < KFD_SIGNAL_EVENT_LIMIT; id++)
+ if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) {
ev = lookup_event_by_id(p, id);
set_event_from_interrupt(p, ev);
}
}
}
- mutex_unlock(&p->event_mutex);
+ rcu_read_unlock();
kfd_unref_process(p);
}
@@ -531,6 +781,8 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
event_waiters = kmalloc_array(num_events,
sizeof(struct kfd_event_waiter),
GFP_KERNEL);
+ if (!event_waiters)
+ return NULL;
for (i = 0; (event_waiters) && (i < num_events) ; i++) {
init_wait(&event_waiters[i].wait);
@@ -540,7 +792,7 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
return event_waiters;
}
-static int init_event_waiter_get_status(struct kfd_process *p,
+static int init_event_waiter(struct kfd_process *p,
struct kfd_event_waiter *waiter,
uint32_t event_id)
{
@@ -549,22 +801,15 @@ static int init_event_waiter_get_status(struct kfd_process *p,
if (!ev)
return -EINVAL;
+ spin_lock(&ev->lock);
waiter->event = ev;
waiter->activated = ev->signaled;
ev->signaled = ev->signaled && !ev->auto_reset;
-
- return 0;
-}
-
-static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter)
-{
- struct kfd_event *ev = waiter->event;
-
- /* Only add to the wait list if we actually need to
- * wait on this event.
- */
if (!waiter->activated)
add_wait_queue(&ev->wq, &waiter->wait);
+ spin_unlock(&ev->lock);
+
+ return 0;
}
/* test_event_condition - Test condition of events being waited for
@@ -584,10 +829,10 @@ static uint32_t test_event_condition(bool all, uint32_t num_events,
uint32_t activated_count = 0;
for (i = 0; i < num_events; i++) {
- if (!event_waiters[i].event)
+ if (!READ_ONCE(event_waiters[i].event))
return KFD_IOC_WAIT_RESULT_FAIL;
- if (event_waiters[i].activated) {
+ if (READ_ONCE(event_waiters[i].activated)) {
if (!all)
return KFD_IOC_WAIT_RESULT_COMPLETE;
@@ -616,6 +861,8 @@ static int copy_signaled_event_data(uint32_t num_events,
for (i = 0; i < num_events; i++) {
waiter = &event_waiters[i];
event = waiter->event;
+ if (!event)
+ return -EINVAL; /* event was destroyed */
if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) {
dst = &data[i].memory_exception_data;
src = &event->memory_exception_data;
@@ -626,11 +873,8 @@ static int copy_signaled_event_data(uint32_t num_events,
}
return 0;
-
}
-
-
static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
{
if (user_timeout_ms == KFD_EVENT_TIMEOUT_IMMEDIATE)
@@ -649,21 +893,28 @@ static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
return msecs_to_jiffies(user_timeout_ms) + 1;
}
-static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
+static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters,
+ bool undo_auto_reset)
{
uint32_t i;
for (i = 0; i < num_events; i++)
- if (waiters[i].event)
+ if (waiters[i].event) {
+ spin_lock(&waiters[i].event->lock);
remove_wait_queue(&waiters[i].event->wq,
&waiters[i].wait);
+ if (undo_auto_reset && waiters[i].activated &&
+ waiters[i].event && waiters[i].event->auto_reset)
+ set_event(waiters[i].event);
+ spin_unlock(&waiters[i].event->lock);
+ }
kfree(waiters);
}
int kfd_wait_on_events(struct kfd_process *p,
uint32_t num_events, void __user *data,
- bool all, uint32_t user_timeout_ms,
+ bool all, uint32_t *user_timeout_ms,
uint32_t *wait_result)
{
struct kfd_event_data __user *events =
@@ -672,7 +923,7 @@ int kfd_wait_on_events(struct kfd_process *p,
int ret = 0;
struct kfd_event_waiter *event_waiters = NULL;
- long timeout = user_timeout_to_jiffies(user_timeout_ms);
+ long timeout = user_timeout_to_jiffies(*user_timeout_ms);
event_waiters = alloc_event_waiters(num_events);
if (!event_waiters) {
@@ -680,6 +931,9 @@ int kfd_wait_on_events(struct kfd_process *p,
goto out;
}
+ /* Use p->event_mutex here to protect against concurrent creation and
+ * destruction of events while we initialize event_waiters.
+ */
mutex_lock(&p->event_mutex);
for (i = 0; i < num_events; i++) {
@@ -691,8 +945,8 @@ int kfd_wait_on_events(struct kfd_process *p,
goto out_unlock;
}
- ret = init_event_waiter_get_status(p, &event_waiters[i],
- event_data.event_id);
+ ret = init_event_waiter(p, &event_waiters[i],
+ event_data.event_id);
if (ret)
goto out_unlock;
}
@@ -710,10 +964,6 @@ int kfd_wait_on_events(struct kfd_process *p,
goto out_unlock;
}
- /* Add to wait lists if we need to wait. */
- for (i = 0; i < num_events; i++)
- init_event_waiter_add_to_waitlist(&event_waiters[i]);
-
mutex_unlock(&p->event_mutex);
while (true) {
@@ -723,15 +973,11 @@ int kfd_wait_on_events(struct kfd_process *p,
}
if (signal_pending(current)) {
- /*
- * This is wrong when a nonzero, non-infinite timeout
- * is specified. We need to use
- * ERESTARTSYS_RESTARTBLOCK, but struct restart_block
- * contains a union with data for each user and it's
- * in generic kernel code that I don't want to
- * touch yet.
- */
ret = -ERESTARTSYS;
+ if (*user_timeout_ms != KFD_EVENT_TIMEOUT_IMMEDIATE &&
+ *user_timeout_ms != KFD_EVENT_TIMEOUT_INFINITE)
+ *user_timeout_ms = jiffies_to_msecs(
+ max(0l, timeout-1));
break;
}
@@ -758,16 +1004,21 @@ int kfd_wait_on_events(struct kfd_process *p,
}
__set_current_state(TASK_RUNNING);
+ mutex_lock(&p->event_mutex);
/* copy_signaled_event_data may sleep. So this has to happen
* after the task state is set back to RUNNING.
+ *
+ * The event may also have been destroyed after signaling. So
+ * copy_signaled_event_data also must confirm that the event
+ * still exists. Therefore this must be under the p->event_mutex
+ * which is also held when events are destroyed.
*/
if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE)
ret = copy_signaled_event_data(num_events,
event_waiters, events);
- mutex_lock(&p->event_mutex);
out_unlock:
- free_waiters(num_events, event_waiters);
+ free_waiters(num_events, event_waiters, ret == -ERESTARTSYS);
mutex_unlock(&p->event_mutex);
out:
if (ret)
@@ -824,8 +1075,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
}
/*
- * Assumes that p->event_mutex is held and of course
- * that p is not going away (current or locked).
+ * Assumes that p is not going away.
*/
static void lookup_events_by_type_and_signal(struct kfd_process *p,
int type, void *event_data)
@@ -837,6 +1087,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
+ rcu_read_lock();
+
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
idr_for_each_entry_continue(&p->event_idr, ev, id)
if (ev->type == type) {
@@ -844,9 +1096,11 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
dev_dbg(kfd_device,
"Event found: id %X type %d",
ev->event_id, ev->type);
+ spin_lock(&ev->lock);
set_event(ev);
if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data)
ev->memory_exception_data = *ev_data;
+ spin_unlock(&ev->lock);
}
if (type == KFD_EVENT_TYPE_MEMORY) {
@@ -869,6 +1123,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
p->lead_thread->pid, p->pasid);
}
}
+
+ rcu_read_unlock();
}
#ifdef KFD_SUPPORT_IOMMU_V2
@@ -878,6 +1134,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid,
{
struct kfd_hsa_memory_exception_data memory_exception_data;
struct vm_area_struct *vma;
+ int user_gpu_id;
/*
* Because we are called from arbitrary context (workqueue) as opposed
@@ -899,12 +1156,17 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid,
return; /* Process is exiting */
}
+ user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+ if (unlikely(user_gpu_id == -EINVAL)) {
+ WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ return;
+ }
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
mmap_read_lock(mm);
vma = find_vma(mm, address);
- memory_exception_data.gpu_id = dev->id;
+ memory_exception_data.gpu_id = user_gpu_id;
memory_exception_data.va = address;
/* Set failure reason */
memory_exception_data.failure.NotPresent = 1;
@@ -935,17 +1197,13 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid,
/* Workaround on Raven to not kill the process when memory is freed
* before IOMMU is able to finish processing all the excessive PPRs
*/
- if (dev->device_info->asic_family != CHIP_RAVEN &&
- dev->device_info->asic_family != CHIP_RENOIR) {
- mutex_lock(&p->event_mutex);
- /* Lookup events by type and signal them */
+ if (KFD_GC_VERSION(dev) != IP_VERSION(9, 1, 0) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 2, 2) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0))
lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY,
&memory_exception_data);
- mutex_unlock(&p->event_mutex);
- }
-
kfd_unref_process(p);
}
#endif /* KFD_SUPPORT_IOMMU_V2 */
@@ -962,12 +1220,7 @@ void kfd_signal_hw_exception_event(u32 pasid)
if (!p)
return; /* Presumably process exited. */
- mutex_lock(&p->event_mutex);
-
- /* Lookup events by type and signal them */
lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
-
- mutex_unlock(&p->event_mutex);
kfd_unref_process(p);
}
@@ -978,11 +1231,19 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
uint32_t id;
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
struct kfd_hsa_memory_exception_data memory_exception_data;
+ int user_gpu_id;
if (!p)
return; /* Presumably process exited. */
+
+ user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+ if (unlikely(user_gpu_id == -EINVAL)) {
+ WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ return;
+ }
+
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
- memory_exception_data.gpu_id = dev->id;
+ memory_exception_data.gpu_id = user_gpu_id;
memory_exception_data.failure.imprecise = true;
/* Set failure reason */
if (info) {
@@ -995,16 +1256,19 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
info->prot_write ? 1 : 0;
memory_exception_data.failure.imprecise = 0;
}
- mutex_lock(&p->event_mutex);
+
+ rcu_read_lock();
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
idr_for_each_entry_continue(&p->event_idr, ev, id)
if (ev->type == KFD_EVENT_TYPE_MEMORY) {
+ spin_lock(&ev->lock);
ev->memory_exception_data = memory_exception_data;
set_event(ev);
+ spin_unlock(&ev->lock);
}
- mutex_unlock(&p->event_mutex);
+ rcu_read_unlock();
kfd_unref_process(p);
}
@@ -1022,31 +1286,44 @@ void kfd_signal_reset_event(struct kfd_dev *dev)
/* Whole gpu reset caused by GPU hang and memory is lost */
memset(&hw_exception_data, 0, sizeof(hw_exception_data));
- hw_exception_data.gpu_id = dev->id;
hw_exception_data.memory_lost = 1;
hw_exception_data.reset_cause = reset_cause;
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
memory_exception_data.ErrorType = KFD_MEM_ERR_SRAM_ECC;
- memory_exception_data.gpu_id = dev->id;
memory_exception_data.failure.imprecise = true;
idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- mutex_lock(&p->event_mutex);
+ int user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+
+ if (unlikely(user_gpu_id == -EINVAL)) {
+ WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ continue;
+ }
+
+ rcu_read_lock();
+
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
idr_for_each_entry_continue(&p->event_idr, ev, id) {
if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+ spin_lock(&ev->lock);
ev->hw_exception_data = hw_exception_data;
+ ev->hw_exception_data.gpu_id = user_gpu_id;
set_event(ev);
+ spin_unlock(&ev->lock);
}
if (ev->type == KFD_EVENT_TYPE_MEMORY &&
reset_cause == KFD_HW_EXCEPTION_ECC) {
+ spin_lock(&ev->lock);
ev->memory_exception_data = memory_exception_data;
+ ev->memory_exception_data.gpu_id = user_gpu_id;
set_event(ev);
+ spin_unlock(&ev->lock);
}
}
- mutex_unlock(&p->event_mutex);
+
+ rcu_read_unlock();
}
srcu_read_unlock(&kfd_processes_srcu, idx);
}
@@ -1058,33 +1335,46 @@ void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid)
struct kfd_hsa_hw_exception_data hw_exception_data;
struct kfd_event *ev;
uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+ int user_gpu_id;
if (!p)
return; /* Presumably process exited. */
+ user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+ if (unlikely(user_gpu_id == -EINVAL)) {
+ WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ return;
+ }
+
memset(&hw_exception_data, 0, sizeof(hw_exception_data));
- hw_exception_data.gpu_id = dev->id;
+ hw_exception_data.gpu_id = user_gpu_id;
hw_exception_data.memory_lost = 1;
hw_exception_data.reset_cause = KFD_HW_EXCEPTION_ECC;
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
memory_exception_data.ErrorType = KFD_MEM_ERR_POISON_CONSUMED;
- memory_exception_data.gpu_id = dev->id;
+ memory_exception_data.gpu_id = user_gpu_id;
memory_exception_data.failure.imprecise = true;
- mutex_lock(&p->event_mutex);
+ rcu_read_lock();
+
idr_for_each_entry_continue(&p->event_idr, ev, id) {
if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+ spin_lock(&ev->lock);
ev->hw_exception_data = hw_exception_data;
set_event(ev);
+ spin_unlock(&ev->lock);
}
if (ev->type == KFD_EVENT_TYPE_MEMORY) {
+ spin_lock(&ev->lock);
ev->memory_exception_data = memory_exception_data;
set_event(ev);
+ spin_unlock(&ev->lock);
}
}
- mutex_unlock(&p->event_mutex);
+
+ rcu_read_unlock();
/* user application will handle SIGBUS signal */
send_sig(SIGBUS, p->lead_thread, 0);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
index c8fe5dbdad55..1c62c8dd6460 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -58,6 +59,7 @@ struct kfd_event {
int type;
+ spinlock_t lock;
wait_queue_head_t wq; /* List of event waiters. */
/* Only for signal events. */
@@ -68,6 +70,8 @@ struct kfd_event {
struct kfd_hsa_memory_exception_data memory_exception_data;
struct kfd_hsa_hw_exception_data hw_exception_data;
};
+
+ struct rcu_head rcu; /* for asynchronous kfree_rcu */
};
#define KFD_EVENT_TIMEOUT_IMMEDIATE 0
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index d1388896f9c1..8aebe408c544 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -34,7 +35,7 @@
#include "kfd_priv.h"
#include <linux/mm.h>
#include <linux/mman.h>
-#include <asm/processor.h>
+#include <linux/processor.h>
/*
* The primary memory I/O features being added for revisions of gfxip
@@ -394,7 +395,7 @@ int kfd_init_apertures(struct kfd_process *process)
pdd->gpuvm_base = pdd->gpuvm_limit = 0;
pdd->scratch_base = pdd->scratch_limit = 0;
} else {
- switch (dev->device_info->asic_family) {
+ switch (dev->adev->asic_type) {
case CHIP_KAVERI:
case CHIP_HAWAII:
case CHIP_CARRIZO:
@@ -406,29 +407,14 @@ int kfd_init_apertures(struct kfd_process *process)
case CHIP_VEGAM:
kfd_init_apertures_vi(pdd, id);
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_RENOIR:
- case CHIP_ARCTURUS:
- case CHIP_ALDEBARAN:
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_NAVI14:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_YELLOW_CARP:
- case CHIP_CYAN_SKILLFISH:
- kfd_init_apertures_v9(pdd, id);
- break;
default:
- WARN(1, "Unexpected ASIC family %u",
- dev->device_info->asic_family);
- return -EINVAL;
+ if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
+ kfd_init_apertures_v9(pdd, id);
+ else {
+ WARN(1, "Unexpected ASIC family %u",
+ dev->adev->asic_type);
+ return -EINVAL;
+ }
}
if (!dev->use_iommu_v2) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
new file mode 100644
index 000000000000..0d53f6067422
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
@@ -0,0 +1,384 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "kfd_priv.h"
+#include "kfd_events.h"
+#include "soc15_int.h"
+#include "kfd_device_queue_manager.h"
+#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
+#include "kfd_smi_events.h"
+
+/*
+ * GFX11 SQ Interrupts
+ *
+ * There are 3 encoding types of interrupts sourced from SQ sent as a 44-bit
+ * packet to the Interrupt Handler:
+ * Auto - Generated by the SQG (various cmd overflows, timestamps etc)
+ * Wave - Generated by S_SENDMSG through a shader program
+ * Error - HW generated errors (Illegal instructions, Memviols, EDC etc)
+ *
+ * The 44-bit packet is mapped as {context_id1[7:0],context_id0[31:0]} plus
+ * 4-bits for VMID (SOC15_VMID_FROM_IH_ENTRY) as such:
+ *
+ * - context_id1[7:6]
+ * Encoding type (0 = Auto, 1 = Wave, 2 = Error)
+ *
+ * - context_id0[26]
+ * PRIV bit indicates that Wave S_SEND or error occurred within trap
+ *
+ * - context_id0[24:0]
+ * 25-bit data with the following layout per encoding type:
+ * Auto - only context_id0[8:0] is used, which reports various interrupts
+ * generated by SQG. The rest is 0.
+ * Wave - user data sent from m0 via S_SENDMSG (context_id0[23:0])
+ * Error - Error Type (context_id0[24:21]), Error Details (context_id0[20:0])
+ *
+ * The other context_id bits show coordinates (SE/SH/CU/SIMD/WGP) for wave
+ * S_SENDMSG and Errors. These are 0 for Auto.
+ */
+
+enum SQ_INTERRUPT_WORD_ENCODING {
+ SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0,
+ SQ_INTERRUPT_WORD_ENCODING_INST,
+ SQ_INTERRUPT_WORD_ENCODING_ERROR,
+};
+
+enum SQ_INTERRUPT_ERROR_TYPE {
+ SQ_INTERRUPT_ERROR_TYPE_EDC_FUE = 0x0,
+ SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST,
+ SQ_INTERRUPT_ERROR_TYPE_MEMVIOL,
+ SQ_INTERRUPT_ERROR_TYPE_EDC_FED,
+};
+
+/* SQ_INTERRUPT_WORD_AUTO_CTXID */
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE__SHIFT 0
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__WLT__SHIFT 1
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF_FULL__SHIFT 2
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__REG_TIMESTAMP__SHIFT 3
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__CMD_TIMESTAMP__SHIFT 4
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_CMD_OVERFLOW__SHIFT 5
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_REG_OVERFLOW__SHIFT 6
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__IMMED_OVERFLOW__SHIFT 7
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_UTC_ERROR__SHIFT 8
+#define SQ_INTERRUPT_WORD_AUTO_CTXID1__ENCODING__SHIFT 6
+
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_MASK 0x00000001
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__WLT_MASK 0x00000002
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF_FULL_MASK 0x00000004
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__REG_TIMESTAMP_MASK 0x00000008
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__CMD_TIMESTAMP_MASK 0x00000010
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_CMD_OVERFLOW_MASK 0x00000020
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_REG_OVERFLOW_MASK 0x00000040
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__IMMED_OVERFLOW_MASK 0x00000080
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_UTC_ERROR_MASK 0x00000100
+#define SQ_INTERRUPT_WORD_AUTO_CTXID1__ENCODING_MASK 0x000000c0
+
+/* SQ_INTERRUPT_WORD_WAVE_CTXID */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__DATA__SHIFT 0
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SH_ID__SHIFT 25
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV__SHIFT 26
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__WAVE_ID__SHIFT 27
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__SIMD_ID__SHIFT 0
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__WGP_ID__SHIFT 2
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__ENCODING__SHIFT 6
+
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__DATA_MASK 0x00ffffff /* [23:0] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SH_ID_MASK 0x02000000 /* [25] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV_MASK 0x04000000 /* [26] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__WAVE_ID_MASK 0xf8000000 /* [31:27] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__SIMD_ID_MASK 0x00000003 /* [33:32] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__WGP_ID_MASK 0x0000003c /* [37:34] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__ENCODING_MASK 0x000000c0 /* [39:38] */
+
+/* SQ_INTERRUPT_WORD_ERROR_CTXID */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__DETAIL__SHIFT 0
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__TYPE__SHIFT 21
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__SH_ID__SHIFT 25
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__PRIV__SHIFT 26
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__WAVE_ID__SHIFT 27
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__SIMD_ID__SHIFT 0
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__WGP_ID__SHIFT 2
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__ENCODING__SHIFT 6
+
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__DETAIL_MASK 0x001fffff /* [20:0] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__TYPE_MASK 0x01e00000 /* [24:21] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__SH_ID_MASK 0x02000000 /* [25] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__PRIV_MASK 0x04000000 /* [26] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__WAVE_ID_MASK 0xf8000000 /* [31:27] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__SIMD_ID_MASK 0x00000003 /* [33:32] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__WGP_ID_MASK 0x0000003c /* [37:34] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__ENCODING_MASK 0x000000c0 /* [39:38] */
+
+/*
+ * The debugger will send user data(m0) with PRIV=1 to indicate it requires
+ * notification from the KFD with the following queue id (DOORBELL_ID) and
+ * trap code (TRAP_CODE).
+ */
+#define KFD_CTXID0_TRAP_CODE_SHIFT 10
+#define KFD_CTXID0_TRAP_CODE_MASK 0xfffc00
+#define KFD_CTXID0_CP_BAD_OP_ECODE_MASK 0x3ffffff
+#define KFD_CTXID0_DOORBELL_ID_MASK 0x0003ff
+
+#define KFD_CTXID0_TRAP_CODE(ctxid0) (((ctxid0) & \
+ KFD_CTXID0_TRAP_CODE_MASK) >> \
+ KFD_CTXID0_TRAP_CODE_SHIFT)
+#define KFD_CTXID0_CP_BAD_OP_ECODE(ctxid0) (((ctxid0) & \
+ KFD_CTXID0_CP_BAD_OP_ECODE_MASK) >> \
+ KFD_CTXID0_TRAP_CODE_SHIFT)
+#define KFD_CTXID0_DOORBELL_ID(ctxid0) ((ctxid0) & \
+ KFD_CTXID0_DOORBELL_ID_MASK)
+
+static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1)
+{
+ pr_debug(
+ "sq_intr: auto, ttrace %d, wlt %d, ttrace_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n",
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, WLT),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_BUF_FULL),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, REG_TIMESTAMP),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, CMD_TIMESTAMP),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_CMD_OVERFLOW),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_REG_OVERFLOW),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, IMMED_OVERFLOW),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_UTC_ERROR));
+}
+
+static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1)
+{
+ pr_debug(
+ "sq_intr: inst, data 0x%08x, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, DATA),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, SH_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, WAVE_ID),
+ REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, SIMD_ID),
+ REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, WGP_ID));
+}
+
+static void print_sq_intr_info_error(uint32_t context_id0, uint32_t context_id1)
+{
+ pr_warn(
+ "sq_intr: error, detail 0x%08x, type %d, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, DETAIL),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, SH_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, PRIV),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, WAVE_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, SIMD_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, WGP_ID));
+}
+
+static void event_interrupt_poison_consumption_v11(struct kfd_dev *dev,
+ uint16_t pasid, uint16_t source_id)
+{
+ int ret = -EINVAL;
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+
+ if (!p)
+ return;
+
+ /* all queues of a process will be unmapped in one time */
+ if (atomic_read(&p->poison)) {
+ kfd_unref_process(p);
+ return;
+ }
+
+ atomic_set(&p->poison, 1);
+ kfd_unref_process(p);
+
+ switch (source_id) {
+ case SOC15_INTSRC_SQ_INTERRUPT_MSG:
+ if (dev->dqm->ops.reset_queues)
+ ret = dev->dqm->ops.reset_queues(dev->dqm, pasid);
+ break;
+ case SOC21_INTSRC_SDMA_ECC:
+ default:
+ break;
+ }
+
+ kfd_signal_poison_consumed_event(dev, pasid);
+
+ /* resetting queue passes, do page retirement without gpu reset
+ resetting queue fails, fallback to gpu reset solution */
+ if (!ret)
+ amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false);
+ else
+ amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true);
+}
+
+static bool event_interrupt_isr_v11(struct kfd_dev *dev,
+ const uint32_t *ih_ring_entry,
+ uint32_t *patched_ihre,
+ bool *patched_flag)
+{
+ uint16_t source_id, client_id, pasid, vmid;
+ const uint32_t *data = ih_ring_entry;
+ uint32_t context_id0;
+
+ source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+ client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+ /* Only handle interrupts from KFD VMIDs */
+ vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+ if (/*!KFD_IRQ_IS_FENCE(client_id, source_id) &&*/
+ (vmid < dev->vm_info.first_vmid_kfd ||
+ vmid > dev->vm_info.last_vmid_kfd))
+ return false;
+
+ pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+ context_id0 = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+
+ if ((source_id == SOC15_INTSRC_CP_END_OF_PIPE) &&
+ (context_id0 & AMDGPU_FENCE_MES_QUEUE_FLAG))
+ return false;
+
+ pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+ client_id, source_id, vmid, pasid);
+ pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+ data[0], data[1], data[2], data[3],
+ data[4], data[5], data[6], data[7]);
+
+ /* If there is no valid PASID, it's likely a bug */
+ if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
+ return false;
+
+ /* Interrupt types we care about: various signals and faults.
+ * They will be forwarded to a work queue (see below).
+ */
+ return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
+ source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
+ source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
+ source_id == SOC21_INTSRC_SDMA_TRAP ||
+ /* KFD_IRQ_IS_FENCE(client_id, source_id) || */
+ (((client_id == SOC21_IH_CLIENTID_VMC) ||
+ ((client_id == SOC21_IH_CLIENTID_GFX) &&
+ (source_id == UTCL2_1_0__SRCID__FAULT))) &&
+ !amdgpu_no_queue_eviction_on_vm_fault);
+}
+
+static void event_interrupt_wq_v11(struct kfd_dev *dev,
+ const uint32_t *ih_ring_entry)
+{
+ uint16_t source_id, client_id, ring_id, pasid, vmid;
+ uint32_t context_id0, context_id1;
+ uint8_t sq_int_enc, sq_int_errtype, sq_int_priv;
+ struct kfd_vm_fault_info info = {0};
+ struct kfd_hsa_memory_exception_data exception_data;
+
+ source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+ client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+ ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
+ pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+ vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+ context_id0 = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+ context_id1 = SOC15_CONTEXT_ID1_FROM_IH_ENTRY(ih_ring_entry);
+
+ /* VMC, UTCL2 */
+ if (client_id == SOC21_IH_CLIENTID_VMC ||
+ ((client_id == SOC21_IH_CLIENTID_GFX) &&
+ (source_id == UTCL2_1_0__SRCID__FAULT))) {
+
+ info.vmid = vmid;
+ info.mc_id = client_id;
+ info.page_addr = ih_ring_entry[4] |
+ (uint64_t)(ih_ring_entry[5] & 0xf) << 32;
+ info.prot_valid = ring_id & 0x08;
+ info.prot_read = ring_id & 0x10;
+ info.prot_write = ring_id & 0x20;
+
+ memset(&exception_data, 0, sizeof(exception_data));
+ exception_data.gpu_id = dev->id;
+ exception_data.va = (info.page_addr) << PAGE_SHIFT;
+ exception_data.failure.NotPresent = info.prot_valid ? 1 : 0;
+ exception_data.failure.NoExecute = info.prot_exec ? 1 : 0;
+ exception_data.failure.ReadOnly = info.prot_write ? 1 : 0;
+ exception_data.failure.imprecise = 0;
+
+ /*kfd_set_dbg_ev_from_interrupt(dev, pasid, -1,
+ KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION),
+ &exception_data, sizeof(exception_data));*/
+ kfd_smi_event_update_vmfault(dev, pasid);
+
+ /* GRBM, SDMA, SE, PMM */
+ } else if (client_id == SOC21_IH_CLIENTID_GRBM_CP ||
+ client_id == SOC21_IH_CLIENTID_GFX) {
+
+ /* CP */
+ if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
+ kfd_signal_event_interrupt(pasid, context_id0, 32);
+ /*else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
+ kfd_set_dbg_ev_from_interrupt(dev, pasid,
+ KFD_CTXID0_DOORBELL_ID(context_id0),
+ KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)),
+ NULL, 0);*/
+
+ /* SDMA */
+ else if (source_id == SOC21_INTSRC_SDMA_TRAP)
+ kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);
+ else if (source_id == SOC21_INTSRC_SDMA_ECC) {
+ event_interrupt_poison_consumption_v11(dev, pasid, source_id);
+ return;
+ }
+
+ /* SQ */
+ else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) {
+ sq_int_enc = REG_GET_FIELD(context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING);
+ switch (sq_int_enc) {
+ case SQ_INTERRUPT_WORD_ENCODING_AUTO:
+ print_sq_intr_info_auto(context_id0, context_id1);
+ break;
+ case SQ_INTERRUPT_WORD_ENCODING_INST:
+ print_sq_intr_info_inst(context_id0, context_id1);
+ sq_int_priv = REG_GET_FIELD(context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV);
+ /*if (sq_int_priv && (kfd_set_dbg_ev_from_interrupt(dev, pasid,
+ KFD_CTXID0_DOORBELL_ID(context_id0),
+ KFD_CTXID0_TRAP_CODE(context_id0),
+ NULL, 0)))
+ return;*/
+ break;
+ case SQ_INTERRUPT_WORD_ENCODING_ERROR:
+ print_sq_intr_info_error(context_id0, context_id1);
+ sq_int_errtype = REG_GET_FIELD(context_id0,
+ SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE);
+ if (sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
+ sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
+ event_interrupt_poison_consumption_v11(
+ dev, pasid, source_id);
+ return;
+ }
+ break;
+ default:
+ break;
+ }
+ kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24);
+ }
+
+ /*} else if (KFD_IRQ_IS_FENCE(client_id, source_id)) {
+ kfd_process_close_interrupt_drain(pasid);*/
+ }
+}
+
+const struct kfd_event_interrupt_class event_interrupt_class_v11 = {
+ .interrupt_isr = event_interrupt_isr_v11,
+ .interrupt_wq = event_interrupt_wq_v11,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 543e7ea75593..0b75a37b689b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -89,6 +90,76 @@ enum SQ_INTERRUPT_ERROR_TYPE {
#define KFD_SQ_INT_DATA__ERR_TYPE_MASK 0xF00000
#define KFD_SQ_INT_DATA__ERR_TYPE__SHIFT 20
+static void event_interrupt_poison_consumption_v9(struct kfd_dev *dev,
+ uint16_t pasid, uint16_t client_id)
+{
+ int old_poison, ret = -EINVAL;
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+
+ if (!p)
+ return;
+
+ /* all queues of a process will be unmapped in one time */
+ old_poison = atomic_cmpxchg(&p->poison, 0, 1);
+ kfd_unref_process(p);
+ if (old_poison)
+ return;
+
+ switch (client_id) {
+ case SOC15_IH_CLIENTID_SE0SH:
+ case SOC15_IH_CLIENTID_SE1SH:
+ case SOC15_IH_CLIENTID_SE2SH:
+ case SOC15_IH_CLIENTID_SE3SH:
+ case SOC15_IH_CLIENTID_UTCL2:
+ ret = kfd_dqm_evict_pasid(dev->dqm, pasid);
+ break;
+ case SOC15_IH_CLIENTID_SDMA0:
+ case SOC15_IH_CLIENTID_SDMA1:
+ case SOC15_IH_CLIENTID_SDMA2:
+ case SOC15_IH_CLIENTID_SDMA3:
+ case SOC15_IH_CLIENTID_SDMA4:
+ break;
+ default:
+ break;
+ }
+
+ kfd_signal_poison_consumed_event(dev, pasid);
+
+ /* resetting queue passes, do page retirement without gpu reset
+ * resetting queue fails, fallback to gpu reset solution
+ */
+ if (!ret) {
+ dev_warn(dev->adev->dev,
+ "RAS poison consumption, unmap queue flow succeeded: client id %d\n",
+ client_id);
+ amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false);
+ } else {
+ dev_warn(dev->adev->dev,
+ "RAS poison consumption, fall back to gpu reset flow: client id %d\n",
+ client_id);
+ amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true);
+ }
+}
+
+static bool context_id_expected(struct kfd_dev *dev)
+{
+ switch (KFD_GC_VERSION(dev)) {
+ case IP_VERSION(9, 0, 1):
+ return dev->mec_fw_version >= 0x817a;
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 0):
+ return dev->mec_fw_version >= 0x17a;
+ default:
+ /* Other GFXv9 and later GPUs always sent valid context IDs
+ * on legitimate events
+ */
+ return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 1);
+ }
+}
+
static bool event_interrupt_isr_v9(struct kfd_dev *dev,
const uint32_t *ih_ring_entry,
uint32_t *patched_ihre,
@@ -135,7 +206,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
*patched_flag = true;
memcpy(patched_ihre, ih_ring_entry,
- dev->device_info->ih_ring_entry_size);
+ dev->device_info.ih_ring_entry_size);
pasid = dev->dqm->vmid_pasid[vmid];
@@ -154,11 +225,26 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
return false;
+ /* Workaround CP firmware sending bogus signals with 0 context_id.
+ * Those can be safely ignored on hardware and firmware versions that
+ * include a valid context_id on legitimate signals. This avoids the
+ * slow path in kfd_signal_event_interrupt that scans all event slots
+ * for signaled events.
+ */
+ if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) {
+ uint32_t context_id =
+ SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+
+ if (context_id == 0 && context_id_expected(dev))
+ return false;
+ }
+
/* Interrupt types we care about: various signals and faults.
* They will be forwarded to a work queue (see below).
*/
return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
source_id == SOC15_INTSRC_SDMA_TRAP ||
+ source_id == SOC15_INTSRC_SDMA_ECC ||
source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
((client_id == SOC15_IH_CLIENTID_VMC ||
@@ -230,8 +316,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
sq_intr_err);
if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
- kfd_signal_poison_consumed_event(dev, pasid);
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->kgd);
+ event_interrupt_poison_consumption_v9(dev, pasid, client_id);
return;
}
break;
@@ -252,8 +337,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
if (source_id == SOC15_INTSRC_SDMA_TRAP) {
kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);
} else if (source_id == SOC15_INTSRC_SDMA_ECC) {
- kfd_signal_poison_consumed_event(dev, pasid);
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->kgd);
+ event_interrupt_poison_consumption_v9(dev, pasid, client_id);
return;
}
} else if (client_id == SOC15_IH_CLIENTID_VMC ||
@@ -262,6 +346,12 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
struct kfd_vm_fault_info info = {0};
uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
+ if (client_id == SOC15_IH_CLIENTID_UTCL2 &&
+ amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) {
+ event_interrupt_poison_consumption_v9(dev, pasid, client_id);
+ return;
+ }
+
info.vmid = vmid;
info.mc_id = client_id;
info.page_addr = ih_ring_entry[4] |
@@ -271,7 +361,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
info.prot_write = ring_id & 0x20;
kfd_smi_event_update_vmfault(dev, pasid);
- kfd_process_vm_fault(dev->dqm, pasid);
+ kfd_dqm_evict_pasid(dev->dqm, pasid);
kfd_signal_vm_fault_event(dev, pasid, &info);
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index bc47f6a44456..34772fe74296 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -54,17 +55,17 @@ int kfd_interrupt_init(struct kfd_dev *kfd)
int r;
r = kfifo_alloc(&kfd->ih_fifo,
- KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size,
+ KFD_IH_NUM_ENTRIES * kfd->device_info.ih_ring_entry_size,
GFP_KERNEL);
if (r) {
- dev_err(kfd_chardev(), "Failed to allocate IH fifo\n");
+ dev_err(kfd->adev->dev, "Failed to allocate IH fifo\n");
return r;
}
kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
if (unlikely(!kfd->ih_wq)) {
kfifo_free(&kfd->ih_fifo);
- dev_err(kfd_chardev(), "Failed to allocate KFD IH workqueue\n");
+ dev_err(kfd->adev->dev, "Failed to allocate KFD IH workqueue\n");
return -ENOMEM;
}
spin_lock_init(&kfd->interrupt_lock);
@@ -114,9 +115,9 @@ bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
int count;
count = kfifo_in(&kfd->ih_fifo, ih_ring_entry,
- kfd->device_info->ih_ring_entry_size);
- if (count != kfd->device_info->ih_ring_entry_size) {
- dev_err_ratelimited(kfd_chardev(),
+ kfd->device_info.ih_ring_entry_size);
+ if (count != kfd->device_info.ih_ring_entry_size) {
+ dev_dbg_ratelimited(kfd->adev->dev,
"Interrupt ring overflow, dropping interrupt %d\n",
count);
return false;
@@ -133,11 +134,11 @@ static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
int count;
count = kfifo_out(&kfd->ih_fifo, ih_ring_entry,
- kfd->device_info->ih_ring_entry_size);
+ kfd->device_info.ih_ring_entry_size);
- WARN_ON(count && count != kfd->device_info->ih_ring_entry_size);
+ WARN_ON(count && count != kfd->device_info.ih_ring_entry_size);
- return count == kfd->device_info->ih_ring_entry_size;
+ return count == kfd->device_info.ih_ring_entry_size;
}
static void interrupt_wq(struct work_struct *work)
@@ -145,15 +146,24 @@ static void interrupt_wq(struct work_struct *work)
struct kfd_dev *dev = container_of(work, struct kfd_dev,
interrupt_work);
uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
+ unsigned long start_jiffies = jiffies;
- if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) {
- dev_err_once(kfd_chardev(), "Ring entry too small\n");
+ if (dev->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {
+ dev_err_once(dev->adev->dev, "Ring entry too small\n");
return;
}
- while (dequeue_ih_ring_entry(dev, ih_ring_entry))
- dev->device_info->event_interrupt_class->interrupt_wq(dev,
+ while (dequeue_ih_ring_entry(dev, ih_ring_entry)) {
+ dev->device_info.event_interrupt_class->interrupt_wq(dev,
ih_ring_entry);
+ if (time_is_before_jiffies(start_jiffies + HZ)) {
+ /* If we spent more than a second processing signals,
+ * reschedule the worker to avoid soft-lockup warnings
+ */
+ queue_work(dev->ih_wq, &dev->interrupt_work);
+ break;
+ }
+ }
}
bool interrupt_is_wanted(struct kfd_dev *dev,
@@ -163,7 +173,7 @@ bool interrupt_is_wanted(struct kfd_dev *dev,
/* integer and bitwise OR so there is no boolean short-circuiting */
unsigned int wanted = 0;
- wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
+ wanted |= dev->device_info.event_interrupt_class->interrupt_isr(dev,
ih_ring_entry, patched_ihre, flag);
return wanted != 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
index 73f2257acc23..fbd0afe4da42 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -30,7 +31,6 @@
#include <linux/pci.h>
#include <linux/amd-iommu.h>
#include "kfd_priv.h"
-#include "kfd_dbgmgr.h"
#include "kfd_topology.h"
#include "kfd_iommu.h"
@@ -89,7 +89,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd)
}
pasid_limit = min_t(unsigned int,
- (unsigned int)(1 << kfd->device_info->max_pasid_bits),
+ (unsigned int)(1 << kfd->device_info.max_pasid_bits),
iommu_info.max_pasids);
if (!kfd_set_pasid_limit(pasid_limit)) {
@@ -163,17 +163,6 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, u32 pasid)
pr_debug("Unbinding process 0x%x from IOMMU\n", pasid);
- mutex_lock(kfd_get_dbgmgr_mutex());
-
- if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
- if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
- kfd_dbgmgr_destroy(dev->dbgmgr);
- dev->dbgmgr = NULL;
- }
- }
-
- mutex_unlock(kfd_get_dbgmgr_mutex());
-
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(dev, p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h
index afd420b01a0c..8cf0fcbe87c2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 64b4ac339904..bcf7bc3302c9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -91,7 +92,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->pq_gpu_addr = kq->pq->gpu_addr;
/* For CIK family asics, kq->eop_mem is not needed */
- if (dev->device_info->asic_family > CHIP_MULLINS) {
+ if (dev->adev->asic_type > CHIP_MULLINS) {
retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
if (retval != 0)
goto err_eop_allocate_vidmem;
@@ -111,7 +112,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
- retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size,
+ retval = kfd_gtt_sa_allocate(dev, dev->device_info.doorbell_size,
&kq->wptr_mem);
if (retval != 0)
@@ -297,7 +298,7 @@ void kq_submit_packet(struct kernel_queue *kq)
}
pr_debug("\n");
#endif
- if (kq->dev->device_info->doorbell_size == 8) {
+ if (kq->dev->device_info.doorbell_size == 8) {
*kq->wptr64_kernel = kq->pending_wptr64;
write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
kq->pending_wptr64);
@@ -310,7 +311,7 @@ void kq_submit_packet(struct kernel_queue *kq)
void kq_rollback_packet(struct kernel_queue *kq)
{
- if (kq->dev->device_info->doorbell_size == 8) {
+ if (kq->dev->device_info.doorbell_size == 8) {
kq->pending_wptr64 = *kq->wptr64_kernel;
kq->pending_wptr = *kq->wptr_kernel %
(kq->queue->properties.queue_size / 4);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index f4cfe9f1871c..383202fd1ea2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 9b9c2b9bf2ef..22b077ac9a19 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -24,6 +24,7 @@
#include <linux/hmm.h>
#include <linux/dma-direction.h>
#include <linux/dma-mapping.h>
+#include <linux/migrate.h>
#include "amdgpu_sync.h"
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
@@ -32,11 +33,12 @@
#include "kfd_priv.h"
#include "kfd_svm.h"
#include "kfd_migrate.h"
+#include "kfd_smi_events.h"
#ifdef dev_fmt
#undef dev_fmt
#endif
-#define dev_fmt(fmt) "kfd_migrate: %s: " fmt, __func__
+#define dev_fmt(fmt) "kfd_migrate: " fmt
static uint64_t
svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
@@ -86,10 +88,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
cpu_addr = &job->ibs[0].ptr[num_dw];
- r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
- if (r)
- goto error_free;
-
+ amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
r = amdgpu_job_submit(job, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
if (r)
@@ -108,8 +107,8 @@ error_free:
* svm_migrate_copy_memory_gart - sdma copy data between ram and vram
*
* @adev: amdgpu device the sdma ring running
- * @src: source page address array
- * @dst: destination page address array
+ * @sys: system DMA pointer to be copied
+ * @vram: vram destination DMA pointer
* @npages: number of pages to copy
* @direction: enum MIGRATION_COPY_DIR
* @mfence: output, sdma fence to signal after sdma is done
@@ -224,8 +223,7 @@ svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
page = pfn_to_page(pfn);
svm_range_bo_ref(prange->svm_bo);
page->zone_device_data = prange->svm_bo;
- get_page(page);
- lock_page(page);
+ zone_device_page_init(page);
}
static void
@@ -299,7 +297,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
dma_addr_t *scratch)
{
- uint64_t npages = migrate->cpages;
+ uint64_t npages = migrate->npages;
struct device *dev = adev->dev;
struct amdgpu_res_cursor cursor;
dma_addr_t *src;
@@ -315,7 +313,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
r = svm_range_vram_node_new(adev, prange, true);
if (r) {
- dev_err(adev->dev, "fail %d to alloc vram\n", r);
+ dev_dbg(adev->dev, "fail %d to alloc vram\n", r);
goto out;
}
@@ -324,17 +322,19 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
for (i = j = 0; i < npages; i++) {
struct page *spage;
+ dst[i] = cursor.start + (j << PAGE_SHIFT);
+ migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
+ svm_migrate_get_vram_page(prange, migrate->dst[i]);
+ migrate->dst[i] = migrate_pfn(migrate->dst[i]);
+
spage = migrate_pfn_to_page(migrate->src[i]);
if (spage && !is_zone_device_page(spage)) {
- dst[i] = cursor.start + (j << PAGE_SHIFT);
- migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
- svm_migrate_get_vram_page(prange, migrate->dst[i]);
- migrate->dst[i] = migrate_pfn(migrate->dst[i]);
src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
DMA_TO_DEVICE);
r = dma_mapping_error(dev, src[i]);
if (r) {
- dev_err(adev->dev, "fail %d dma_map_page\n", r);
+ dev_err(adev->dev, "%s: fail %d dma_map_page\n",
+ __func__, r);
goto out_free_vram_pages;
}
} else {
@@ -346,7 +346,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
mfence);
if (r)
goto out_free_vram_pages;
- amdgpu_res_next(&cursor, j << PAGE_SHIFT);
+ amdgpu_res_next(&cursor, (j + 1) << PAGE_SHIFT);
j = 0;
} else {
amdgpu_res_next(&cursor, PAGE_SIZE);
@@ -365,7 +365,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
if (r)
goto out_free_vram_pages;
amdgpu_res_next(&cursor, (j + 1) * PAGE_SIZE);
- j= 0;
+ j = 0;
} else {
j++;
}
@@ -404,15 +404,15 @@ out:
static long
svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start,
- uint64_t end)
+ uint64_t end, uint32_t trigger)
{
+ struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
uint64_t npages = (end - start) >> PAGE_SHIFT;
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
- struct migrate_vma migrate;
+ struct migrate_vma migrate = { 0 };
unsigned long cpages = 0;
dma_addr_t *scratch;
- size_t size;
void *buf;
int r = -ENOMEM;
@@ -423,9 +423,9 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
- size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
- size *= npages;
- buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
+ buf = kvcalloc(npages,
+ 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
+ GFP_KERNEL);
if (!buf)
goto out;
@@ -433,10 +433,15 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.dst = migrate.src + npages;
scratch = (dma_addr_t *)(migrate.dst + npages);
+ kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ 0, adev->kfd.dev->id, prange->prefetch_loc,
+ prange->preferred_loc, trigger);
+
r = migrate_vma_setup(&migrate);
if (r) {
- dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r,
- prange->start, prange->last);
+ dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
+ __func__, r, prange->start, prange->last);
goto out_free;
}
@@ -461,6 +466,10 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
+ kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ 0, adev->kfd.dev->id, trigger);
+
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
svm_range_free_dma_mappings(prange);
@@ -482,6 +491,7 @@ out:
* @prange: range structure
* @best_loc: the device to migrate to
* @mm: the process mm structure
+ * @trigger: reason of migration
*
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
*
@@ -490,7 +500,7 @@ out:
*/
static int
svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
- struct mm_struct *mm)
+ struct mm_struct *mm, uint32_t trigger)
{
unsigned long addr, start, end;
struct vm_area_struct *vma;
@@ -513,9 +523,6 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
prange->start, prange->last, best_loc);
- /* FIXME: workaround for page locking bug with invalid pages */
- svm_range_prefault(prange, mm, SVM_ADEV_PGMAP_OWNER(adev));
-
start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT;
@@ -527,7 +534,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
break;
next = min(vma->vm_end, end);
- r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
+ r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger);
if (r < 0) {
pr_debug("failed %ld to migrate\n", r);
break;
@@ -549,7 +556,7 @@ static void svm_migrate_page_free(struct page *page)
if (svm_bo) {
pr_debug_ratelimited("ref: %d\n", kref_read(&svm_bo->kref));
- svm_range_bo_unref(svm_bo);
+ svm_range_bo_unref_async(svm_bo);
}
}
@@ -593,7 +600,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
continue;
}
src[i] = svm_migrate_addr(adev, spage);
- if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
+ if (j > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
r = svm_migrate_copy_memory_gart(adev, dst + i - j,
src + i - j, j,
FROM_VRAM_TO_RAM,
@@ -614,7 +621,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
r = dma_mapping_error(dev, dst[i]);
if (r) {
- dev_err(adev->dev, "fail %d dma_map_page\n", r);
+ dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);
goto out_oom;
}
@@ -640,18 +647,35 @@ out_oom:
return r;
}
+/**
+ * svm_migrate_vma_to_ram - migrate range inside one vma from device to system
+ *
+ * @adev: amdgpu device to migrate from
+ * @prange: svm range structure
+ * @vma: vm_area_struct that range [start, end] belongs to
+ * @start: range start virtual address in pages
+ * @end: range end virtual address in pages
+ *
+ * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
+ *
+ * Return:
+ * 0 - success with all pages migrated
+ * negative values - indicate error
+ * positive values - partial migration, number of pages not migrated
+ */
static long
svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
- struct vm_area_struct *vma, uint64_t start, uint64_t end)
+ struct vm_area_struct *vma, uint64_t start, uint64_t end,
+ uint32_t trigger, struct page *fault_page)
{
+ struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
uint64_t npages = (end - start) >> PAGE_SHIFT;
unsigned long upages = npages;
unsigned long cpages = 0;
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
- struct migrate_vma migrate;
+ struct migrate_vma migrate = { 0 };
dma_addr_t *scratch;
- size_t size;
void *buf;
int r = -ENOMEM;
@@ -659,23 +683,32 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.vma = vma;
migrate.start = start;
migrate.end = end;
- migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
+ if (adev->gmc.xgmi.connected_to_cpu)
+ migrate.flags = MIGRATE_VMA_SELECT_DEVICE_COHERENT;
+ else
+ migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
- size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
- size *= npages;
- buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
+ buf = kvcalloc(npages,
+ 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
+ GFP_KERNEL);
if (!buf)
goto out;
migrate.src = buf;
migrate.dst = migrate.src + npages;
+ migrate.fault_page = fault_page;
scratch = (dma_addr_t *)(migrate.dst + npages);
+ kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ adev->kfd.dev->id, 0, prange->prefetch_loc,
+ prange->preferred_loc, trigger);
+
r = migrate_vma_setup(&migrate);
if (r) {
- dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r,
- prange->start, prange->last);
+ dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
+ __func__, r, prange->start, prange->last);
goto out_free;
}
@@ -702,6 +735,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
+
+ kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ adev->kfd.dev->id, 0, trigger);
+
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
out_free:
@@ -711,8 +749,6 @@ out:
pdd = svm_range_get_pdd_by_adev(prange, adev);
if (pdd)
WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);
-
- return upages;
}
return r ? r : upages;
}
@@ -721,13 +757,15 @@ out:
* svm_migrate_vram_to_ram - migrate svm range from device to system
* @prange: range structure
* @mm: process mm, use current->mm if NULL
+ * @trigger: reason of migration
*
- * Context: Process context, caller hold mmap read lock, svms lock, prange lock
+ * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
*
* Return:
* 0 - OK, otherwise error code
*/
-int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
+int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
+ uint32_t trigger, struct page *fault_page)
{
struct amdgpu_device *adev;
struct vm_area_struct *vma;
@@ -761,13 +799,17 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
unsigned long next;
vma = find_vma(mm, addr);
- if (!vma || addr < vma->vm_start)
+ if (!vma || addr < vma->vm_start) {
+ pr_debug("failed to find vma for prange %p\n", prange);
+ r = -EFAULT;
break;
+ }
next = min(vma->vm_end, end);
- r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
+ r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next, trigger,
+ fault_page);
if (r < 0) {
- pr_debug("failed %ld to migrate\n", r);
+ pr_debug("failed %ld to migrate prange %p\n", r, prange);
break;
} else {
upages += r;
@@ -775,7 +817,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
addr = next;
}
- if (!upages) {
+ if (r >= 0 && !upages) {
svm_range_vram_node_free(prange);
prange->actual_loc = 0;
}
@@ -788,6 +830,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
* @prange: range structure
* @best_loc: the device to migrate to
* @mm: process mm, use current->mm if NULL
+ * @trigger: reason of migration
*
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
*
@@ -796,7 +839,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
*/
static int
svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
- struct mm_struct *mm)
+ struct mm_struct *mm, uint32_t trigger)
{
int r, retries = 3;
@@ -808,7 +851,7 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
do {
- r = svm_migrate_vram_to_ram(prange, mm);
+ r = svm_migrate_vram_to_ram(prange, mm, trigger, NULL);
if (r)
return r;
} while (prange->actual_loc && --retries);
@@ -816,17 +859,17 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
if (prange->actual_loc)
return -EDEADLK;
- return svm_migrate_ram_to_vram(prange, best_loc, mm);
+ return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
}
int
svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
- struct mm_struct *mm)
+ struct mm_struct *mm, uint32_t trigger)
{
if (!prange->actual_loc)
- return svm_migrate_ram_to_vram(prange, best_loc, mm);
+ return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
else
- return svm_migrate_vram_to_vram(prange, best_loc, mm);
+ return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger);
}
@@ -843,7 +886,7 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
{
unsigned long addr = vmf->address;
- struct vm_area_struct *vma;
+ struct svm_range_bo *svm_bo;
enum svm_work_list_ops op;
struct svm_range *parent;
struct svm_range *prange;
@@ -851,29 +894,42 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
struct mm_struct *mm;
int r = 0;
- vma = vmf->vma;
- mm = vma->vm_mm;
+ svm_bo = vmf->page->zone_device_data;
+ if (!svm_bo) {
+ pr_debug("failed get device page at addr 0x%lx\n", addr);
+ return VM_FAULT_SIGBUS;
+ }
+ if (!mmget_not_zero(svm_bo->eviction_fence->mm)) {
+ pr_debug("addr 0x%lx of process mm is destroyed\n", addr);
+ return VM_FAULT_SIGBUS;
+ }
- p = kfd_lookup_process_by_mm(vma->vm_mm);
+ mm = svm_bo->eviction_fence->mm;
+ if (mm != vmf->vma->vm_mm)
+ pr_debug("addr 0x%lx is COW mapping in child process\n", addr);
+
+ p = kfd_lookup_process_by_mm(mm);
if (!p) {
pr_debug("failed find process at fault address 0x%lx\n", addr);
- return VM_FAULT_SIGBUS;
+ r = VM_FAULT_SIGBUS;
+ goto out_mmput;
}
if (READ_ONCE(p->svms.faulting_task) == current) {
pr_debug("skipping ram migration\n");
- kfd_unref_process(p);
- return 0;
+ r = 0;
+ goto out_unref_process;
}
- addr >>= PAGE_SHIFT;
+
pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
+ addr >>= PAGE_SHIFT;
mutex_lock(&p->svms.lock);
prange = svm_range_from_addr(&p->svms, addr, &parent);
if (!prange) {
- pr_debug("cannot find svm range at 0x%lx\n", addr);
+ pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr);
r = -EFAULT;
- goto out;
+ goto out_unlock_svms;
}
mutex_lock(&parent->migrate_mutex);
@@ -895,10 +951,12 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
goto out_unlock_prange;
}
- r = svm_migrate_vram_to_ram(prange, mm);
+ r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm,
+ KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU,
+ vmf->page);
if (r)
- pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
- prange, prange->start, prange->last);
+ pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n",
+ r, prange->svms, prange, prange->start, prange->last);
/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
if (p->xnack_enabled && parent == prange)
@@ -912,12 +970,13 @@ out_unlock_prange:
if (prange != parent)
mutex_unlock(&prange->migrate_mutex);
mutex_unlock(&parent->migrate_mutex);
-out:
+out_unlock_svms:
mutex_unlock(&p->svms.lock);
- kfd_unref_process(p);
-
+out_unref_process:
pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
-
+ kfd_unref_process(p);
+out_mmput:
+ mmput(mm);
return r ? VM_FAULT_SIGBUS : 0;
}
@@ -933,12 +992,12 @@ int svm_migrate_init(struct amdgpu_device *adev)
{
struct kfd_dev *kfddev = adev->kfd.dev;
struct dev_pagemap *pgmap;
- struct resource *res;
+ struct resource *res = NULL;
unsigned long size;
void *r;
/* Page migration works on Vega10 or newer */
- if (kfddev->device_info->asic_family < CHIP_VEGA10)
+ if (!KFD_IS_SOC15(kfddev))
return -EINVAL;
pgmap = &kfddev->pgmap;
@@ -948,28 +1007,34 @@ int svm_migrate_init(struct amdgpu_device *adev)
* should remove reserved size
*/
size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20);
- res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
- if (IS_ERR(res))
- return -ENOMEM;
+ if (adev->gmc.xgmi.connected_to_cpu) {
+ pgmap->range.start = adev->gmc.aper_base;
+ pgmap->range.end = adev->gmc.aper_base + adev->gmc.aper_size - 1;
+ pgmap->type = MEMORY_DEVICE_COHERENT;
+ } else {
+ res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
+ if (IS_ERR(res))
+ return -ENOMEM;
+ pgmap->range.start = res->start;
+ pgmap->range.end = res->end;
+ pgmap->type = MEMORY_DEVICE_PRIVATE;
+ }
- pgmap->type = MEMORY_DEVICE_PRIVATE;
pgmap->nr_range = 1;
- pgmap->range.start = res->start;
- pgmap->range.end = res->end;
pgmap->ops = &svm_migrate_pgmap_ops;
pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev);
- pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
-
+ pgmap->flags = 0;
/* Device manager releases device-specific resources, memory region and
* pgmap when driver disconnects from device.
*/
r = devm_memremap_pages(adev->dev, pgmap);
if (IS_ERR(r)) {
pr_err("failed to register HMM device memory\n");
-
/* Disable SVM support capability */
pgmap->type = 0;
- devm_release_mem_region(adev->dev, res->start, resource_size(res));
+ if (pgmap->type == MEMORY_DEVICE_PRIVATE)
+ devm_release_mem_region(adev->dev, res->start,
+ res->end - res->start + 1);
return PTR_ERR(r);
}
@@ -978,6 +1043,8 @@ int svm_migrate_init(struct amdgpu_device *adev)
amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size));
+ svm_range_set_max_pages(adev);
+
pr_info("HMM registered %ldMB device memory\n", size >> 20);
return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
index 2f5b3394c9ed..a5d7e6d22264 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -41,8 +41,9 @@ enum MIGRATION_COPY_DIR {
};
int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
- struct mm_struct *mm);
-int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
+ struct mm_struct *mm, uint32_t trigger);
+int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
+ uint32_t trigger, struct page *fault_page);
unsigned long
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 5e90fe642192..09b966dc3768 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index c021519af810..623ccd227b7d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -71,7 +72,7 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
return NULL;
offset = (q->sdma_engine_id *
- dev->device_info->num_sdma_queues_per_engine +
+ dev->device_info.num_sdma_queues_per_engine +
q->sdma_queue_id) *
dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
@@ -99,8 +100,11 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
{
struct kfd_cu_info cu_info;
uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
- int i, se, sh, cu;
- amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info);
+ bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0);
+ uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1;
+ int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1;
+
+ amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
if (cu_mask_count > cu_info.cu_active_number)
cu_mask_count = cu_info.cu_active_number;
@@ -118,6 +122,10 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
cu_info.num_shader_arrays_per_engine * cu_info.num_shader_engines);
return;
}
+
+ cu_bitmap_sh_mul = (KFD_GC_VERSION(mm->dev) >= IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(mm->dev) < IP_VERSION(12, 0, 0)) ? 2 : 1;
+
/* Count active CUs per SH.
*
* Some CUs in an SH may be disabled. HW expects disabled CUs to be
@@ -127,10 +135,12 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
* Each half of se_mask must be filled only on bits 0-cu_per_sh[se][sh]-1.
*
* See note on Arcturus cu_bitmap layout in gfx_v9_0_get_cu_info.
+ * See note on GFX11 cu_bitmap layout in gfx_v11_0_get_cu_info.
*/
for (se = 0; se < cu_info.num_shader_engines; se++)
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
- cu_per_sh[se][sh] = hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]);
+ cu_per_sh[se][sh] = hweight32(
+ cu_info.cu_bitmap[se % 4][sh + (se / 4) * cu_bitmap_sh_mul]);
/* Symmetrically map cu_mask to all SEs & SHs:
* se_mask programs up to 2 SH in the upper and lower 16 bits.
@@ -159,13 +169,13 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
se_mask[i] = 0;
i = 0;
- for (cu = 0; cu < 16; cu++) {
+ for (cu = 0; cu < 16; cu += inc) {
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
for (se = 0; se < cu_info.num_shader_engines; se++) {
if (cu_per_sh[se][sh] > cu) {
- if (cu_mask[i / 32] & (1 << (i % 32)))
- se_mask[se] |= 1 << (cu + sh * 16);
- i++;
+ if (cu_mask[i / 32] & (en_mask << (i % 32)))
+ se_mask[se] |= en_mask << (cu + sh * 16);
+ i += inc;
if (i == cu_mask_count)
return;
}
@@ -173,3 +183,66 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
}
}
}
+
+int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,
+ queue_id, p->doorbell_off);
+}
+
+int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, mqd, type, timeout,
+ pipe_id, queue_id);
+}
+
+void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+ if (mqd_mem_obj->gtt_mem) {
+ amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, mqd_mem_obj->gtt_mem);
+ kfree(mqd_mem_obj);
+ } else {
+ kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+ }
+}
+
+bool kfd_is_occupied_cp(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->adev, queue_address,
+ pipe_id, queue_id);
+}
+
+int kfd_load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
+ (uint32_t __user *)p->write_ptr,
+ mms);
+}
+
+/*
+ * preempt type here is ignored because there is only one way
+ * to preempt sdma queue
+ */
+int kfd_destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
+}
+
+bool kfd_is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 965e17c5dbb4..57f900ccaa10 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -100,6 +101,20 @@ struct mqd_manager {
u32 *ctl_stack_used_size,
u32 *save_area_used_size);
+ void (*get_checkpoint_info)(struct mqd_manager *mm, void *mqd, uint32_t *ctl_stack_size);
+
+ void (*checkpoint_mqd)(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst);
+
+ void (*restore_mqd)(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *p,
+ const void *mqd_src,
+ const void *ctl_stack_src,
+ const u32 ctl_stack_size);
+
#if defined(CONFIG_DEBUG_FS)
int (*debugfs_show_mqd)(struct seq_file *m, void *data);
#endif
@@ -122,4 +137,31 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
uint32_t *se_mask);
+int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms);
+
+int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id);
+
+void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj);
+
+bool kfd_is_occupied_cp(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id);
+
+int kfd_load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms);
+
+int kfd_destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id);
+
+bool kfd_is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id);
+
#endif /* KFD_MQD_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 8128f4d312f1..4889865c725c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -156,13 +157,6 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
mm->update_mqd(mm, m, q, NULL);
}
-static void free_mqd(struct mqd_manager *mm, void *mqd,
- struct kfd_mem_obj *mqd_mem_obj)
-{
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-}
-
-
static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
uint32_t queue_id, struct queue_properties *p,
struct mm_struct *mms)
@@ -171,20 +165,11 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
- return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+ return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
wptr_shift, wptr_mask, mms);
}
-static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
- (uint32_t __user *)p->write_ptr,
- mms);
-}
-
static void __update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q, struct mqd_update_info *minfo,
unsigned int atc_bit)
@@ -271,42 +256,75 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-static int destroy_mqd(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
{
- return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, mqd, type, timeout,
- pipe_id, queue_id);
+ struct cik_mqd *m;
+
+ m = get_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct cik_mqd));
}
-/*
- * preempt type here is ignored because there is only one way
- * to preempt sdma queue
- */
-static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
{
- return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+ uint64_t addr;
+ struct cik_mqd *m;
+
+ m = (struct cik_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(qp->doorbell_off);
+
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ qp->is_active = 0;
}
-static bool is_occupied(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst)
{
+ struct cik_sdma_rlc_registers *m;
- return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address,
- pipe_id, queue_id);
+ m = get_sdma_mqd(mqd);
+ memcpy(mqd_dst, m, sizeof(struct cik_sdma_rlc_registers));
}
-static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
{
- return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+ uint64_t addr;
+ struct cik_sdma_rlc_registers *m;
+
+ m = (struct cik_sdma_rlc_registers *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ m->sdma_rlc_doorbell =
+ qp->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ qp->is_active = 0;
}
/*
@@ -389,11 +407,13 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
case KFD_MQD_TYPE_CP:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->checkpoint_mqd = checkpoint_mqd;
+ mqd->restore_mqd = restore_mqd;
mqd->mqd_size = sizeof(struct cik_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -405,8 +425,8 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct cik_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -416,11 +436,11 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct cik_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -430,10 +450,12 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_sdma_mqd;
mqd->init_mqd = init_mqd_sdma;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = load_mqd_sdma;
+ mqd->load_mqd = kfd_load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
- mqd->destroy_mqd = destroy_mqd_sdma;
- mqd->is_occupied = is_occupied_sdma;
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+ mqd->restore_mqd = restore_mqd_sdma;
mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 270160fc401b..d3e2b6a599a4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -148,20 +149,12 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
- r = mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+ r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
wptr_shift, 0, mms);
return r;
}
-static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id,
- queue_id, p->doorbell_off);
-}
-
static void update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q,
struct mqd_update_info *minfo)
@@ -213,7 +206,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
/* GC 10 removed WPP_CLAMP from PQ Control */
m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
- 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT ;
+ 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT;
m->cp_hqd_pq_doorbell_control |=
1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
}
@@ -233,31 +226,6 @@ static uint32_t read_doorbell_id(void *mqd)
return m->queue_doorbell_id0;
}
-static int destroy_mqd(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_destroy
- (mm->dev->kgd, mqd, type, timeout,
- pipe_id, queue_id);
-}
-
-static void free_mqd(struct mqd_manager *mm, void *mqd,
- struct kfd_mem_obj *mqd_mem_obj)
-{
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-}
-
-static bool is_occupied(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_is_occupied(
- mm->dev->kgd, queue_address,
- pipe_id, queue_id);
-}
-
static int get_wave_state(struct mqd_manager *mm, void *mqd,
void __user *ctl_stack,
u32 *ctl_stack_used_size,
@@ -285,6 +253,42 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
return 0;
}
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
+{
+ struct v10_compute_mqd *m;
+
+ m = get_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct v10_compute_mqd));
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+ uint64_t addr;
+ struct v10_compute_mqd *m;
+
+ m = (struct v10_compute_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ m->cp_hqd_pq_doorbell_control =
+ qp->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ qp->is_active = 0;
+}
+
static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -316,15 +320,6 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
mm->update_mqd(mm, m, q, NULL);
}
-static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
- (uint32_t __user *)p->write_ptr,
- mms);
-}
-
#define SDMA_RLC_DUMMY_DEFAULT 0xf
static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
@@ -354,23 +349,41 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-/*
- * * preempt type here is ignored because there is only one way
- * * to preempt sdma queue
- */
-static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst)
{
- return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+ struct v10_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct v10_sdma_mqd));
}
-static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src,
+ const u32 ctl_stack_size)
{
- return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+ uint64_t addr;
+ struct v10_sdma_mqd *m;
+
+ m = (struct v10_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ m->sdmax_rlcx_doorbell_offset =
+ qp->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ qp->is_active = 0;
}
#if defined(CONFIG_DEBUG_FS)
@@ -410,13 +423,15 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
pr_debug("%s@%i\n", __func__, __LINE__);
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v10_compute_mqd);
mqd->get_wave_state = get_wave_state;
+ mqd->checkpoint_mqd = checkpoint_mqd;
+ mqd->restore_mqd = restore_mqd;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
@@ -427,10 +442,10 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_hiq_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = hiq_load_mqd_kiq;
+ mqd->load_mqd = kfd_hiq_load_mqd_kiq;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v10_compute_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -441,11 +456,11 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v10_compute_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -456,10 +471,12 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_sdma_mqd;
mqd->init_mqd = init_mqd_sdma;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = load_mqd_sdma;
+ mqd->load_mqd = kfd_load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
- mqd->destroy_mqd = destroy_mqd_sdma;
- mqd->is_occupied = is_occupied_sdma;
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+ mqd->restore_mqd = restore_mqd_sdma;
mqd->mqd_size = sizeof(struct v10_sdma_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
new file mode 100644
index 000000000000..4f6390f3236e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -0,0 +1,454 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "v11_structs.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "amdgpu_amdkfd.h"
+
+static inline struct v11_compute_mqd *get_mqd(void *mqd)
+{
+ return (struct v11_compute_mqd *)mqd;
+}
+
+static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v11_sdma_mqd *)mqd;
+}
+
+static void update_cu_mask(struct mqd_manager *mm, void *mqd,
+ struct mqd_update_info *minfo)
+{
+ struct v11_compute_mqd *m;
+ uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
+
+ if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
+ !minfo->cu_mask.ptr)
+ return;
+
+ mqd_symmetrically_map_cu_mask(mm,
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
+
+ m = get_mqd(mqd);
+ m->compute_static_thread_mgmt_se0 = se_mask[0];
+ m->compute_static_thread_mgmt_se1 = se_mask[1];
+ m->compute_static_thread_mgmt_se2 = se_mask[2];
+ m->compute_static_thread_mgmt_se3 = se_mask[3];
+ m->compute_static_thread_mgmt_se4 = se_mask[4];
+ m->compute_static_thread_mgmt_se5 = se_mask[5];
+ m->compute_static_thread_mgmt_se6 = se_mask[6];
+ m->compute_static_thread_mgmt_se7 = se_mask[7];
+
+ pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
+ m->compute_static_thread_mgmt_se0,
+ m->compute_static_thread_mgmt_se1,
+ m->compute_static_thread_mgmt_se2,
+ m->compute_static_thread_mgmt_se3,
+ m->compute_static_thread_mgmt_se4,
+ m->compute_static_thread_mgmt_se5,
+ m->compute_static_thread_mgmt_se6,
+ m->compute_static_thread_mgmt_se7);
+}
+
+static void set_priority(struct v11_compute_mqd *m, struct queue_properties *q)
+{
+ m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
+ m->cp_hqd_queue_priority = q->priority;
+}
+
+static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
+ struct queue_properties *q)
+{
+ struct kfd_mem_obj *mqd_mem_obj;
+ int size;
+
+ /*
+ * MES write to areas beyond MQD size. So allocate
+ * 1 PAGE_SIZE memory for MQD is MES is enabled.
+ */
+ if (kfd->shared_resources.enable_mes)
+ size = PAGE_SIZE;
+ else
+ size = sizeof(struct v11_compute_mqd);
+
+ if (kfd_gtt_sa_allocate(kfd, size, &mqd_mem_obj))
+ return NULL;
+
+ return mqd_mem_obj;
+}
+
+static void init_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ uint64_t addr;
+ struct v11_compute_mqd *m;
+ int size;
+
+ m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ if (mm->dev->shared_resources.enable_mes)
+ size = PAGE_SIZE;
+ else
+ size = sizeof(struct v11_compute_mqd);
+
+ memset(m, 0, size);
+
+ m->header = 0xC0310800;
+ m->compute_pipelinestat_enable = 1;
+ m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF;
+
+ m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
+ 0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+
+ m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
+
+ m->cp_mqd_base_addr_lo = lower_32_bits(addr);
+ m->cp_mqd_base_addr_hi = upper_32_bits(addr);
+
+ m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
+ 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
+ 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ m->cp_hqd_aql_control =
+ 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
+ }
+
+ if (mm->dev->cwsr_enabled) {
+ m->cp_hqd_persistent_state |=
+ (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+ m->cp_hqd_ctx_save_base_addr_lo =
+ lower_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_base_addr_hi =
+ upper_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+ m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+ m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+ m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+ }
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+ mm->update_mqd(mm, m, q, NULL);
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ int r = 0;
+ /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+
+ r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
+ (uint32_t __user *)p->write_ptr,
+ wptr_shift, 0, mms);
+ return r;
+}
+
+static void update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
+{
+ struct v11_compute_mqd *m;
+
+ m = get_mqd(mqd);
+
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |=
+ ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+ pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+
+ m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+ m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+
+ m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+ m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+
+ m->cp_hqd_pq_doorbell_control =
+ q->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ m->cp_hqd_ib_control = 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT;
+
+ /*
+ * HW does not clamp this field correctly. Maximum EOP queue size
+ * is constrained by per-SE EOP done signal count, which is 8-bit.
+ * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
+ * more than (EOP entry count - 1) so a queue size of 0x800 dwords
+ * is safe, giving a maximum field value of 0xA.
+ */
+ m->cp_hqd_eop_control = min(0xA,
+ ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1);
+ m->cp_hqd_eop_base_addr_lo =
+ lower_32_bits(q->eop_ring_buffer_address >> 8);
+ m->cp_hqd_eop_base_addr_hi =
+ upper_32_bits(q->eop_ring_buffer_address >> 8);
+
+ m->cp_hqd_iq_timer = 0;
+
+ m->cp_hqd_vmid = q->vmid;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ /* GC 10 removed WPP_CLAMP from PQ Control */
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+ 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT ;
+ m->cp_hqd_pq_doorbell_control |=
+ 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
+ }
+ if (mm->dev->cwsr_enabled)
+ m->cp_hqd_ctx_save_control = 0;
+
+ update_cu_mask(mm, mqd, minfo);
+ set_priority(m, q);
+
+ q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+static uint32_t read_doorbell_id(void *mqd)
+{
+ struct v11_compute_mqd *m = (struct v11_compute_mqd *)mqd;
+
+ return m->queue_doorbell_id0;
+}
+
+static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ void __user *ctl_stack,
+ u32 *ctl_stack_used_size,
+ u32 *save_area_used_size)
+{
+ struct v11_compute_mqd *m;
+ /*struct mqd_user_context_save_area_header header;*/
+
+ m = get_mqd(mqd);
+
+ /* Control stack is written backwards, while workgroup context data
+ * is written forwards. Both starts from m->cp_hqd_cntl_stack_size.
+ * Current position is at m->cp_hqd_cntl_stack_offset and
+ * m->cp_hqd_wg_state_offset, respectively.
+ */
+ *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
+ m->cp_hqd_cntl_stack_offset;
+ *save_area_used_size = m->cp_hqd_wg_state_offset -
+ m->cp_hqd_cntl_stack_size;
+
+ /* Control stack is not copied to user mode for GFXv11 because
+ * it's part of the context save area that is already
+ * accessible to user mode
+ */
+/*
+ header.control_stack_size = *ctl_stack_used_size;
+ header.wave_state_size = *save_area_used_size;
+
+ header.wave_state_offset = m->cp_hqd_wg_state_offset;
+ header.control_stack_offset = m->cp_hqd_cntl_stack_offset;
+
+ if (copy_to_user(ctl_stack, &header, sizeof(header)))
+ return -EFAULT;
+*/
+ return 0;
+}
+
+static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ struct v11_compute_mqd *m;
+
+ init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
+
+ m = get_mqd(*mqd);
+
+ m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+}
+
+static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ struct v11_sdma_mqd *m;
+
+ m = (struct v11_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+
+ memset(m, 0, sizeof(struct v11_sdma_mqd));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = mqd_mem_obj->gpu_addr;
+
+ mm->update_mqd(mm, m, q, NULL);
+}
+
+#define SDMA_RLC_DUMMY_DEFAULT 0xf
+
+static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
+{
+ struct v11_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+ m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
+ << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
+ q->vmid << SDMA0_QUEUE0_RB_CNTL__RB_VMID__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 6 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__F32_WPTR_POLL_ENABLE__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+ m->sdmax_rlcx_doorbell_offset =
+ q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdma_engine_id = q->sdma_engine_id;
+ m->sdma_queue_id = q->sdma_queue_id;
+ m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+
+ q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct v11_compute_mqd), false);
+ return 0;
+}
+
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct v11_sdma_mqd), false);
+ return 0;
+}
+
+#endif
+
+struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
+ struct kfd_dev *dev)
+{
+ struct mqd_manager *mqd;
+
+ if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+ return NULL;
+
+ mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+ if (!mqd)
+ return NULL;
+
+ mqd->dev = dev;
+
+ switch (type) {
+ case KFD_MQD_TYPE_CP:
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ mqd->allocate_mqd = allocate_mqd;
+ mqd->init_mqd = init_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->mqd_size = sizeof(struct v11_compute_mqd);
+ mqd->get_wave_state = get_wave_state;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ break;
+ case KFD_MQD_TYPE_HIQ:
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ mqd->allocate_mqd = allocate_hiq_mqd;
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->free_mqd = free_mqd_hiq_sdma;
+ mqd->load_mqd = kfd_hiq_load_mqd_kiq;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->mqd_size = sizeof(struct v11_compute_mqd);
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ mqd->read_doorbell_id = read_doorbell_id;
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ break;
+ case KFD_MQD_TYPE_DIQ:
+ mqd->allocate_mqd = allocate_mqd;
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->free_mqd = kfd_free_mqd_cp;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->mqd_size = sizeof(struct v11_compute_mqd);
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ break;
+ case KFD_MQD_TYPE_SDMA:
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ mqd->allocate_mqd = allocate_sdma_mqd;
+ mqd->init_mqd = init_mqd_sdma;
+ mqd->free_mqd = free_mqd_hiq_sdma;
+ mqd->load_mqd = kfd_load_mqd_sdma;
+ mqd->update_mqd = update_mqd_sdma;
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->mqd_size = sizeof(struct v11_sdma_mqd);
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ break;
+ default:
+ kfree(mqd);
+ return NULL;
+ }
+
+ return mqd;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 4e5932f54b5a..0778e587a2d6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -108,7 +109,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!mqd_mem_obj)
return NULL;
- retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
+ retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->adev,
ALIGN(q->ctl_stack_size, PAGE_SIZE) +
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
&(mqd_mem_obj->gtt_mem),
@@ -199,19 +200,11 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
- return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+ return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
wptr_shift, 0, mms);
}
-static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id,
- queue_id, p->doorbell_off);
-}
-
static void update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q,
struct mqd_update_info *minfo)
@@ -285,38 +278,6 @@ static uint32_t read_doorbell_id(void *mqd)
return m->queue_doorbell_id0;
}
-static int destroy_mqd(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_destroy
- (mm->dev->kgd, mqd, type, timeout,
- pipe_id, queue_id);
-}
-
-static void free_mqd(struct mqd_manager *mm, void *mqd,
- struct kfd_mem_obj *mqd_mem_obj)
-{
- struct kfd_dev *kfd = mm->dev;
-
- if (mqd_mem_obj->gtt_mem) {
- amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
- kfree(mqd_mem_obj);
- } else {
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
- }
-}
-
-static bool is_occupied(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_is_occupied(
- mm->dev->kgd, queue_address,
- pipe_id, queue_id);
-}
-
static int get_wave_state(struct mqd_manager *mm, void *mqd,
void __user *ctl_stack,
u32 *ctl_stack_used_size,
@@ -340,6 +301,57 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
return 0;
}
+static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size)
+{
+ struct v9_mqd *m = get_mqd(mqd);
+
+ *ctl_stack_size = m->cp_hqd_cntl_stack_size;
+}
+
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
+{
+ struct v9_mqd *m;
+ /* Control stack is located one page after MQD. */
+ void *ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
+
+ m = get_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct v9_mqd));
+ memcpy(ctl_stack_dst, ctl_stack, m->cp_hqd_cntl_stack_size);
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, u32 ctl_stack_size)
+{
+ uint64_t addr;
+ struct v9_mqd *m;
+ void *ctl_stack;
+
+ m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ /* Control stack is located one page after MQD. */
+ ctl_stack = (void *)((uintptr_t)*mqd + PAGE_SIZE);
+ memcpy(ctl_stack, ctl_stack_src, ctl_stack_size);
+
+ m->cp_hqd_pq_doorbell_control =
+ qp->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ qp->is_active = 0;
+}
+
static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -371,15 +383,6 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
mm->update_mqd(mm, m, q, NULL);
}
-static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
- (uint32_t __user *)p->write_ptr,
- mms);
-}
-
#define SDMA_RLC_DUMMY_DEFAULT 0xf
static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
@@ -409,23 +412,40 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-/*
- * * preempt type here is ignored because there is only one way
- * * to preempt sdma queue
- */
-static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst)
{
- return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+ struct v9_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct v9_sdma_mqd));
}
-static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
{
- return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+ uint64_t addr;
+ struct v9_sdma_mqd *m;
+
+ m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ m->sdmax_rlcx_doorbell_offset =
+ qp->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ qp->is_active = 0;
}
#if defined(CONFIG_DEBUG_FS)
@@ -464,12 +484,15 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
case KFD_MQD_TYPE_CP:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->get_wave_state = get_wave_state;
+ mqd->get_checkpoint_info = get_checkpoint_info;
+ mqd->checkpoint_mqd = checkpoint_mqd;
+ mqd->restore_mqd = restore_mqd;
mqd->mqd_size = sizeof(struct v9_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -479,10 +502,10 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_hiq_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = hiq_load_mqd_kiq;
+ mqd->load_mqd = kfd_hiq_load_mqd_kiq;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v9_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -492,11 +515,11 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v9_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -506,10 +529,12 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_sdma_mqd;
mqd->init_mqd = init_mqd_sdma;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = load_mqd_sdma;
+ mqd->load_mqd = kfd_load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
- mqd->destroy_mqd = destroy_mqd_sdma;
- mqd->is_occupied = is_occupied_sdma;
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+ mqd->restore_mqd = restore_mqd_sdma;
mqd->mqd_size = sizeof(struct v9_sdma_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index cd9220eb8a7a..530ba6f5b57e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -162,7 +163,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
- return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+ return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
wptr_shift, wptr_mask, mms);
}
@@ -259,31 +260,6 @@ static void update_mqd_tonga(struct mqd_manager *mm, void *mqd,
__update_mqd(mm, mqd, q, minfo, MTYPE_UC, 0);
}
-static int destroy_mqd(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_destroy
- (mm->dev->kgd, mqd, type, timeout,
- pipe_id, queue_id);
-}
-
-static void free_mqd(struct mqd_manager *mm, void *mqd,
- struct kfd_mem_obj *mqd_mem_obj)
-{
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-}
-
-static bool is_occupied(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return mm->dev->kfd2kgd->hqd_is_occupied(
- mm->dev->kgd, queue_address,
- pipe_id, queue_id);
-}
-
static int get_wave_state(struct mqd_manager *mm, void *mqd,
void __user *ctl_stack,
u32 *ctl_stack_used_size,
@@ -306,11 +282,54 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
return 0;
}
+static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size)
+{
+ /* Control stack is stored in user mode */
+ *ctl_stack_size = 0;
+}
+
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
+{
+ struct vi_mqd *m;
+
+ m = get_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct vi_mqd));
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+ uint64_t addr;
+ struct vi_mqd *m;
+
+ m = (struct vi_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ m->cp_hqd_pq_doorbell_control =
+ qp->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ qp->is_active = 0;
+}
+
static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
{
struct vi_mqd *m;
+
init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
m = get_mqd(*mqd);
@@ -343,15 +362,6 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
mm->update_mqd(mm, m, q, NULL);
}
-static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- struct queue_properties *p, struct mm_struct *mms)
-{
- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
- (uint32_t __user *)p->write_ptr,
- mms);
-}
-
static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
struct queue_properties *q,
struct mqd_update_info *minfo)
@@ -380,27 +390,45 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-/*
- * * preempt type here is ignored because there is only one way
- * * to preempt sdma queue
- */
-static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst)
{
- return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+ struct vi_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct vi_sdma_mqd));
}
-static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
{
- return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+ uint64_t addr;
+ struct vi_sdma_mqd *m;
+
+ m = (struct vi_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ m->sdmax_rlcx_doorbell =
+ qp->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ qp->is_active = 0;
}
#if defined(CONFIG_DEBUG_FS)
+
static int debugfs_show_mqd(struct seq_file *m, void *data)
{
seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
@@ -435,12 +463,15 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
case KFD_MQD_TYPE_CP:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->get_wave_state = get_wave_state;
+ mqd->get_checkpoint_info = get_checkpoint_info;
+ mqd->checkpoint_mqd = checkpoint_mqd;
+ mqd->restore_mqd = restore_mqd;
mqd->mqd_size = sizeof(struct vi_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -452,8 +483,8 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct vi_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -463,11 +494,11 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
- mqd->free_mqd = free_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct vi_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
@@ -477,10 +508,12 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_sdma_mqd;
mqd->init_mqd = init_mqd_sdma;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = load_mqd_sdma;
+ mqd->load_mqd = kfd_load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
- mqd->destroy_mqd = destroy_mqd_sdma;
- mqd->is_occupied = is_occupied_sdma;
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+ mqd->restore_mqd = restore_mqd_sdma;
mqd->mqd_size = sizeof(struct vi_sdma_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index e547f1f8c49f..ed02b6d8bf63 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -223,7 +224,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
{
- switch (dqm->dev->device_info->asic_family) {
+ switch (dqm->dev->adev->asic_type) {
case CHIP_KAVERI:
case CHIP_HAWAII:
/* PM4 packet structures on CIK are the same as on VI */
@@ -236,31 +237,16 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
case CHIP_VEGAM:
pm->pmf = &kfd_vi_pm_funcs;
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_RENOIR:
- case CHIP_ARCTURUS:
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_NAVI14:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_YELLOW_CARP:
- case CHIP_CYAN_SKILLFISH:
- pm->pmf = &kfd_v9_pm_funcs;
- break;
- case CHIP_ALDEBARAN:
- pm->pmf = &kfd_aldebaran_pm_funcs;
- break;
default:
- WARN(1, "Unexpected ASIC family %u",
- dqm->dev->device_info->asic_family);
- return -EINVAL;
+ if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2))
+ pm->pmf = &kfd_aldebaran_pm_funcs;
+ else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))
+ pm->pmf = &kfd_v9_pm_funcs;
+ else {
+ WARN(1, "Unexpected ASIC family %u",
+ dqm->dev->adev->asic_type);
+ return -EINVAL;
+ }
}
pm->dqm = dqm;
@@ -383,10 +369,9 @@ out:
return retval;
}
-int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
+int pm_send_unmap_queue(struct packet_manager *pm,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param, bool reset,
- unsigned int sdma_engine)
+ uint32_t filter_param, bool reset)
{
uint32_t *buffer, size;
int retval = 0;
@@ -401,8 +386,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
goto out;
}
- retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param,
- reset, sdma_engine);
+ retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset);
if (!retval)
kq_submit_packet(pm->priv_queue);
else
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 7ea3f671b325..18250845a989 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -178,6 +179,11 @@ static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
+static inline bool pm_use_ext_eng(struct kfd_dev *dev)
+{
+ return dev->adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 2, 0);
+}
+
static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
struct queue *q, bool is_static)
{
@@ -214,7 +220,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
use_static = false; /* no static queues under SDMA */
- if (q->properties.sdma_engine_id < 2)
+ if (q->properties.sdma_engine_id < 2 && !pm_use_ext_eng(q->device))
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
engine_sel__mes_map_queues__sdma0_vi;
else {
@@ -246,10 +252,8 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
}
static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
- enum kfd_queue_type type,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param, bool reset,
- unsigned int sdma_engine)
+ uint32_t filter_param, bool reset)
{
struct pm4_mes_unmap_queues *packet;
@@ -258,31 +262,13 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
sizeof(struct pm4_mes_unmap_queues));
- switch (type) {
- case KFD_QUEUE_TYPE_COMPUTE:
- case KFD_QUEUE_TYPE_DIQ:
- packet->bitfields2.extended_engine_sel =
- extended_engine_sel__mes_unmap_queues__legacy_engine_sel;
- packet->bitfields2.engine_sel =
- engine_sel__mes_unmap_queues__compute;
- break;
- case KFD_QUEUE_TYPE_SDMA:
- case KFD_QUEUE_TYPE_SDMA_XGMI:
- if (sdma_engine < 2) {
- packet->bitfields2.extended_engine_sel =
- extended_engine_sel__mes_unmap_queues__legacy_engine_sel;
- packet->bitfields2.engine_sel =
- engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
- } else {
- packet->bitfields2.extended_engine_sel =
- extended_engine_sel__mes_unmap_queues__sdma0_to_7_sel;
- packet->bitfields2.engine_sel = sdma_engine;
- }
- break;
- default:
- WARN(1, "queue type %d", type);
- return -EINVAL;
- }
+
+ packet->bitfields2.extended_engine_sel = pm_use_ext_eng(pm->dqm->dev) ?
+ extended_engine_sel__mes_unmap_queues__sdma0_to_7_sel :
+ extended_engine_sel__mes_unmap_queues__legacy_engine_sel;
+
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_unmap_queues__compute;
if (reset)
packet->bitfields2.action =
@@ -292,12 +278,6 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
action__mes_unmap_queues__preempt_queues;
switch (filter) {
- case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
- packet->bitfields2.num_queues = 1;
- packet->bitfields3b.doorbell_offset0 = filter_param;
- break;
case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
packet->bitfields2.queue_sel =
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
index 08442e7d9944..4f951eaa6ee8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -110,8 +111,8 @@ static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
-int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
- struct scheduling_resources *res)
+static int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct scheduling_resources *res)
{
struct pm4_mes_set_resources *packet;
@@ -197,10 +198,8 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
}
static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
- enum kfd_queue_type type,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param, bool reset,
- unsigned int sdma_engine)
+ uint32_t filter_param, bool reset)
{
struct pm4_mes_unmap_queues *packet;
@@ -209,21 +208,9 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
sizeof(struct pm4_mes_unmap_queues));
- switch (type) {
- case KFD_QUEUE_TYPE_COMPUTE:
- case KFD_QUEUE_TYPE_DIQ:
- packet->bitfields2.engine_sel =
+
+ packet->bitfields2.engine_sel =
engine_sel__mes_unmap_queues__compute;
- break;
- case KFD_QUEUE_TYPE_SDMA:
- case KFD_QUEUE_TYPE_SDMA_XGMI:
- packet->bitfields2.engine_sel =
- engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
- break;
- default:
- WARN(1, "queue type %d", type);
- return -EINVAL;
- }
if (reset)
packet->bitfields2.action =
@@ -233,12 +220,6 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
action__mes_unmap_queues__preempt_queues;
switch (filter) {
- case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
- packet->bitfields2.num_queues = 1;
- packet->bitfields3b.doorbell_offset0 = filter_param;
- break;
case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
packet->bitfields2.queue_sel =
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
index af5816f51e55..e3b250918f39 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
index e50f73d25de6..7274edfd3f38 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
index 4d7add843746..a666710ed403 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2016 Advanced Micro Devices, Inc.
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
index f795ec815e2a..38f5cb6a222a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2020 Advanced Micro Devices, Inc.
+ * Copyright 2020-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h
index a0ff34878163..f9cd28690151 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
index 5466cfe1c3cc..8147395c083b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -101,8 +102,8 @@ struct pm4_mes_set_resources {
struct pm4_mes_runlist {
union {
- union PM4_MES_TYPE_3_HEADER header; /* header */
- uint32_t ordinal1;
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
};
union {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h
index b72fa3b8c2d4..5bfd0f9cbe23 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 8fd48d0ed240..bf610e3b683b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -25,6 +26,7 @@
#include <linux/hashtable.h>
#include <linux/mmu_notifier.h>
+#include <linux/memremap.h>
#include <linux/mutex.h>
#include <linux/types.h>
#include <linux/atomic.h>
@@ -121,7 +123,26 @@
*/
#define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512
-
+/**
+ * enum kfd_ioctl_flags - KFD ioctl flags
+ * Various flags that can be set in &amdkfd_ioctl_desc.flags to control how
+ * userspace can use a given ioctl.
+ */
+enum kfd_ioctl_flags {
+ /*
+ * @KFD_IOC_FLAG_CHECKPOINT_RESTORE:
+ * Certain KFD ioctls such as AMDKFD_IOC_CRIU_OP can potentially
+ * perform privileged operations and load arbitrary data into MQDs and
+ * eventually HQD registers when the queue is mapped by HWS. In order to
+ * prevent this we should perform additional security checks.
+ *
+ * This is equivalent to callers with the CHECKPOINT_RESTORE capability.
+ *
+ * Note: Since earlier versions of docker do not support CHECKPOINT_RESTORE,
+ * we also allow ioctls with SYS_ADMIN capability.
+ */
+ KFD_IOC_FLAG_CHECKPOINT_RESTORE = BIT(0),
+};
/*
* Kernel module parameter to specify maximum number of supported queues per
* device
@@ -183,7 +204,8 @@ enum cache_policy {
cache_policy_noncoherent
};
-#define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10)
+#define KFD_GC_VERSION(dev) ((dev)->adev->ip_versions[GC_HWIP][0])
+#define KFD_IS_SOC15(dev) ((KFD_GC_VERSION(dev)) >= (IP_VERSION(9, 0, 1)))
struct kfd_event_interrupt_class {
bool (*interrupt_isr)(struct kfd_dev *dev,
@@ -194,8 +216,6 @@ struct kfd_event_interrupt_class {
};
struct kfd_device_info {
- enum amd_asic_type asic_family;
- const char *asic_name;
uint32_t gfx_target_version;
const struct kfd_event_interrupt_class *event_interrupt_class;
unsigned int max_pasid_bits;
@@ -208,11 +228,14 @@ struct kfd_device_info {
bool needs_iommu_device;
bool needs_pci_atomics;
uint32_t no_atomic_fw_version;
- unsigned int num_sdma_engines;
- unsigned int num_xgmi_sdma_engines;
unsigned int num_sdma_queues_per_engine;
+ unsigned int num_reserved_sdma_queues_per_engine;
+ uint64_t reserved_sdma_queues_bitmap;
};
+unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev);
+unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev);
+
struct kfd_mem_obj {
uint32_t range_start;
uint32_t range_end;
@@ -228,9 +251,9 @@ struct kfd_vmid_info {
};
struct kfd_dev {
- struct kgd_dev *kgd;
+ struct amdgpu_device *adev;
- const struct kfd_device_info *device_info;
+ struct kfd_device_info device_info;
struct pci_dev *pdev;
struct drm_device *ddev;
@@ -251,6 +274,7 @@ struct kfd_dev {
struct kgd2kfd_shared_resources shared_resources;
struct kfd_vmid_info vm_info;
+ struct kfd_local_mem_info local_mem_info;
const struct kfd2kgd_calls *kfd2kgd;
struct mutex doorbell_mutex;
@@ -281,9 +305,6 @@ struct kfd_dev {
*/
bool interrupts_active;
- /* Debug manager */
- struct kfd_dbgmgr *dbgmgr;
-
/* Firmware versions */
uint16_t mec_fw_version;
uint16_t mec2_fw_version;
@@ -338,25 +359,24 @@ enum kfd_mempool {
/* Character device interface */
int kfd_chardev_init(void);
void kfd_chardev_exit(void);
-struct device *kfd_chardev(void);
/**
* enum kfd_unmap_queues_filter - Enum for queue filters.
*
- * @KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: Preempts single queue.
- *
* @KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: Preempts all queues in the
* running queues list.
*
+ * @KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: Preempts all non-static queues
+ * in the run list.
+ *
* @KFD_UNMAP_QUEUES_FILTER_BY_PASID: Preempts queues that belongs to
* specific process.
*
*/
enum kfd_unmap_queues_filter {
- KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE,
- KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
- KFD_UNMAP_QUEUES_FILTER_BY_PASID
+ KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES = 1,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES = 2,
+ KFD_UNMAP_QUEUES_FILTER_BY_PASID = 3
};
/**
@@ -442,6 +462,7 @@ enum KFD_QUEUE_PRIORITY {
* it's user mode or kernel mode queue.
*
*/
+
struct queue_properties {
enum kfd_queue_type type;
enum kfd_queue_format format;
@@ -546,6 +567,12 @@ struct queue {
/* procfs */
struct kobject kobj;
+
+ void *gang_ctx_bo;
+ uint64_t gang_ctx_gpu_addr;
+ void *gang_ctx_cpu_ptr;
+
+ struct amdgpu_bo *wptr_bo;
};
enum KFD_MQD_TYPE {
@@ -688,6 +715,7 @@ struct kfd_process_device {
/* VM context for GPUVM allocations */
struct file *drm_file;
void *drm_priv;
+ atomic64_t tlb_seq;
/* GPUVM allocations storage */
struct idr alloc_idr;
@@ -754,6 +782,16 @@ struct kfd_process_device {
uint64_t faults;
uint64_t page_in;
uint64_t page_out;
+ /*
+ * If this process has been checkpointed before, then the user
+ * application will use the original gpu_id on the
+ * checkpointed node to refer to this device.
+ */
+ uint32_t user_gpu_id;
+
+ void *proc_ctx_bo;
+ uint64_t proc_ctx_gpu_addr;
+ void *proc_ctx_cpu_ptr;
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -764,12 +802,13 @@ struct svm_range_list {
struct list_head list;
struct work_struct deferred_list_work;
struct list_head deferred_range_list;
+ struct list_head criu_svm_metadata_list;
spinlock_t deferred_list_lock;
atomic_t evicted_ranges;
atomic_t drain_pagefaults;
struct delayed_work restore_work;
DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
- struct task_struct *faulting_task;
+ struct task_struct *faulting_task;
};
/* Process data */
@@ -856,6 +895,10 @@ struct kfd_process {
struct svm_range_list svms;
bool xnack_enabled;
+
+ atomic_t poison;
+ /* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */
+ bool queues_paused;
};
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
@@ -886,12 +929,12 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev);
int kfd_process_create_wq(void);
void kfd_process_destroy_wq(void);
struct kfd_process *kfd_create_process(struct file *filep);
-struct kfd_process *kfd_get_process(const struct task_struct *);
+struct kfd_process *kfd_get_process(const struct task_struct *task);
struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
-int kfd_process_gpuid_from_kgd(struct kfd_process *p,
+int kfd_process_gpuid_from_adev(struct kfd_process *p,
struct amdgpu_device *adev, uint32_t *gpuid,
uint32_t *gpuidx);
static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
@@ -904,11 +947,16 @@ static inline struct kfd_process_device *kfd_process_device_from_gpuidx(
}
void kfd_unref_process(struct kfd_process *p);
-int kfd_process_evict_queues(struct kfd_process *p);
+int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger);
int kfd_process_restore_queues(struct kfd_process *p);
void kfd_suspend_all_processes(void);
int kfd_resume_all_processes(void);
+struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *process,
+ uint32_t gpu_id);
+
+int kfd_process_get_user_gpu_id(struct kfd_process *p, uint32_t actual_gpu_id);
+
int kfd_process_device_init_vm(struct kfd_process_device *pdd,
struct file *drm_file);
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
@@ -930,6 +978,7 @@ void *kfd_process_device_translate_handle(struct kfd_process_device *p,
int handle);
void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
int handle);
+struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid);
/* PASIDs */
int kfd_pasid_init(void);
@@ -981,10 +1030,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu);
int kfd_topology_remove_device(struct kfd_dev *gpu);
struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
uint32_t proximity_domain);
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
+ uint32_t proximity_domain);
struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
-struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd);
+struct kfd_dev *kfd_device_by_adev(const struct amdgpu_device *adev);
int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
int kfd_numa_node_to_apic_id(int numa_node_id);
void kfd_double_confirm_iommu_support(struct kfd_dev *gpu);
@@ -1004,6 +1055,116 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
uint64_t tba_addr,
uint64_t tma_addr);
+/* CRIU */
+/*
+ * Need to increment KFD_CRIU_PRIV_VERSION each time a change is made to any of the CRIU private
+ * structures:
+ * kfd_criu_process_priv_data
+ * kfd_criu_device_priv_data
+ * kfd_criu_bo_priv_data
+ * kfd_criu_queue_priv_data
+ * kfd_criu_event_priv_data
+ * kfd_criu_svm_range_priv_data
+ */
+
+#define KFD_CRIU_PRIV_VERSION 1
+
+struct kfd_criu_process_priv_data {
+ uint32_t version;
+ uint32_t xnack_mode;
+};
+
+struct kfd_criu_device_priv_data {
+ /* For future use */
+ uint64_t reserved;
+};
+
+struct kfd_criu_bo_priv_data {
+ uint64_t user_addr;
+ uint32_t idr_handle;
+ uint32_t mapped_gpuids[MAX_GPU_INSTANCE];
+};
+
+/*
+ * The first 4 bytes of kfd_criu_queue_priv_data, kfd_criu_event_priv_data,
+ * kfd_criu_svm_range_priv_data is the object type
+ */
+enum kfd_criu_object_type {
+ KFD_CRIU_OBJECT_TYPE_QUEUE,
+ KFD_CRIU_OBJECT_TYPE_EVENT,
+ KFD_CRIU_OBJECT_TYPE_SVM_RANGE,
+};
+
+struct kfd_criu_svm_range_priv_data {
+ uint32_t object_type;
+ uint64_t start_addr;
+ uint64_t size;
+ /* Variable length array of attributes */
+ struct kfd_ioctl_svm_attribute attrs[];
+};
+
+struct kfd_criu_queue_priv_data {
+ uint32_t object_type;
+ uint64_t q_address;
+ uint64_t q_size;
+ uint64_t read_ptr_addr;
+ uint64_t write_ptr_addr;
+ uint64_t doorbell_off;
+ uint64_t eop_ring_buffer_address;
+ uint64_t ctx_save_restore_area_address;
+ uint32_t gpu_id;
+ uint32_t type;
+ uint32_t format;
+ uint32_t q_id;
+ uint32_t priority;
+ uint32_t q_percent;
+ uint32_t doorbell_id;
+ uint32_t gws;
+ uint32_t sdma_id;
+ uint32_t eop_ring_buffer_size;
+ uint32_t ctx_save_restore_area_size;
+ uint32_t ctl_stack_size;
+ uint32_t mqd_size;
+};
+
+struct kfd_criu_event_priv_data {
+ uint32_t object_type;
+ uint64_t user_handle;
+ uint32_t event_id;
+ uint32_t auto_reset;
+ uint32_t type;
+ uint32_t signaled;
+
+ union {
+ struct kfd_hsa_memory_exception_data memory_exception_data;
+ struct kfd_hsa_hw_exception_data hw_exception_data;
+ };
+};
+
+int kfd_process_get_queue_info(struct kfd_process *p,
+ uint32_t *num_queues,
+ uint64_t *priv_data_sizes);
+
+int kfd_criu_checkpoint_queues(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset);
+
+int kfd_criu_restore_queue(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size);
+
+int kfd_criu_checkpoint_events(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset);
+
+int kfd_criu_restore_event(struct file *devkfd,
+ struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size);
+/* CRIU - End */
+
/* Queue Context Management */
int init_queue(struct queue **q, const struct queue_properties *properties);
void uninit_queue(struct queue *q);
@@ -1022,12 +1183,14 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
+struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
+ struct kfd_dev *dev);
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
enum kfd_queue_type type);
void kernel_queue_uninit(struct kernel_queue *kq, bool hanging);
-int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid);
+int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid);
/* Process Queue Manager */
struct process_queue_node {
@@ -1045,6 +1208,10 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct file *f,
struct queue_properties *properties,
unsigned int *qid,
+ struct amdgpu_bo *wptr_bo,
+ const struct kfd_criu_queue_priv_data *q_data,
+ const void *restore_mqd,
+ const void *restore_ctl_stack,
uint32_t *p_doorbell_offset_in_process);
int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
int pqm_update_queue_properties(struct process_queue_manager *pqm, unsigned int qid,
@@ -1067,6 +1234,10 @@ int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
uint64_t fence_value,
unsigned int timeout_ms);
+int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
+ unsigned int qid,
+ u32 *mqd_size,
+ u32 *ctl_stack_size);
/* Packet Manager */
#define KFD_FENCE_COMPLETED (100)
@@ -1095,10 +1266,8 @@ struct packet_manager_funcs {
int (*map_queues)(struct packet_manager *pm, uint32_t *buffer,
struct queue *q, bool is_static);
int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
- enum kfd_queue_type type,
enum kfd_unmap_queues_filter mode,
- uint32_t filter_param, bool reset,
- unsigned int sdma_engine);
+ uint32_t filter_param, bool reset);
int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
uint64_t fence_address, uint64_t fence_value);
int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
@@ -1125,10 +1294,9 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
uint64_t fence_value);
-int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
+int pm_send_unmap_queue(struct packet_manager *pm,
enum kfd_unmap_queues_filter mode,
- uint32_t filter_param, bool reset,
- unsigned int sdma_engine);
+ uint32_t filter_param, bool reset);
void pm_release_ib(struct packet_manager *pm);
@@ -1140,15 +1308,16 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
/* Events */
extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
extern const struct kfd_event_interrupt_class event_interrupt_class_v9;
+extern const struct kfd_event_interrupt_class event_interrupt_class_v11;
extern const struct kfd_device_global_init_class device_global_init_class_cik;
-void kfd_event_init_process(struct kfd_process *p);
+int kfd_event_init_process(struct kfd_process *p);
void kfd_event_free_process(struct kfd_process *p);
int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
int kfd_wait_on_events(struct kfd_process *p,
uint32_t num_events, void __user *data,
- bool all, uint32_t user_timeout_ms,
+ bool all, uint32_t *user_timeout_ms,
uint32_t *wait_result);
void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
uint32_t valid_id_bits);
@@ -1158,12 +1327,14 @@ void kfd_signal_iommu_event(struct kfd_dev *dev,
void kfd_signal_hw_exception_event(u32 pasid);
int kfd_set_event(struct kfd_process *p, uint32_t event_id);
int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
-int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
- uint64_t size);
+int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset);
+
int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint32_t event_type, bool auto_reset, uint32_t node_id,
uint32_t *event_id, uint32_t *event_trigger_data,
uint64_t *event_page_offset, uint32_t *event_slot_index);
+
+int kfd_get_num_events(struct kfd_process *p);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
@@ -1175,7 +1346,13 @@ void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
-int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
+static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
+{
+ return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) &&
+ dev->adev->sdma.instance[0].fw_version >= 18) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
+}
bool kfd_is_locked(void);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index b993011cfa64..951b63677248 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -40,9 +41,9 @@ struct mm_struct;
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
-#include "kfd_dbgmgr.h"
#include "kfd_iommu.h"
#include "kfd_svm.h"
+#include "kfd_smi_events.h"
/*
* List of struct kfd_process (field kfd_process).
@@ -64,7 +65,8 @@ static struct workqueue_struct *kfd_process_wq;
*/
static struct workqueue_struct *kfd_restore_wq;
-static struct kfd_process *find_process(const struct task_struct *thread);
+static struct kfd_process *find_process(const struct task_struct *thread,
+ bool ref);
static void kfd_process_ref_release(struct kref *ref);
static struct kfd_process *create_process(const struct task_struct *thread);
static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
@@ -251,14 +253,13 @@ cleanup:
}
/**
- * @kfd_get_cu_occupancy - Collect number of waves in-flight on this device
+ * kfd_get_cu_occupancy - Collect number of waves in-flight on this device
* by current process. Translates acquired wave count into number of compute units
* that are occupied.
*
- * @atr: Handle of attribute that allows reporting of wave count. The attribute
+ * @attr: Handle of attribute that allows reporting of wave count. The attribute
* handle encapsulates GPU device it is associated with, thereby allowing collection
* of waves in flight, etc
- *
* @buffer: Handle of user provided buffer updated with wave count
*
* Return: Number of bytes written to user buffer or an error value
@@ -288,7 +289,7 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
/* Collect wave count from device if it supports */
wave_cnt = 0;
max_waves_per_cu = 0;
- dev->kfd2kgd->get_cu_occupancy(dev->kgd, proc->pasid, &wave_cnt,
+ dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt,
&max_waves_per_cu);
/* Translate wave count to number of compute units */
@@ -462,6 +463,7 @@ static struct attribute *procfs_queue_attrs[] = {
&attr_queue_gpuid,
NULL
};
+ATTRIBUTE_GROUPS(procfs_queue);
static const struct sysfs_ops procfs_queue_ops = {
.show = kfd_procfs_queue_show,
@@ -469,7 +471,7 @@ static const struct sysfs_ops procfs_queue_ops = {
static struct kobj_type procfs_queue_type = {
.sysfs_ops = &procfs_queue_ops,
- .default_attrs = procfs_queue_attrs,
+ .default_groups = procfs_queue_groups,
};
static const struct sysfs_ops procfs_stats_ops = {
@@ -692,12 +694,12 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
struct kfd_dev *dev = pdd->dev;
if (kptr) {
- amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->kgd, mem);
+ amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
kptr = NULL;
}
- amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv);
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv,
+ amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv);
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, mem, pdd->drm_priv,
NULL);
}
@@ -714,24 +716,25 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
struct kfd_dev *kdev = pdd->dev;
int err;
- err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
- pdd->drm_priv, mem, NULL, flags);
+ err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size,
+ pdd->drm_priv, mem, NULL,
+ flags, false);
if (err)
goto err_alloc_mem;
- err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, *mem,
- pdd->drm_priv, NULL);
+ err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->adev, *mem,
+ pdd->drm_priv);
if (err)
goto err_map_mem;
- err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, *mem, true);
+ err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->adev, *mem, true);
if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n");
goto sync_memory_failed;
}
if (kptr) {
- err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
+ err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(
(struct kgd_mem *)*mem, kptr, NULL);
if (err) {
pr_debug("Map GTT BO to kernel failed\n");
@@ -742,10 +745,10 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
return err;
sync_memory_failed:
- amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->kgd, *mem, pdd->drm_priv);
+ amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->adev, *mem, pdd->drm_priv);
err_map_mem:
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, *mem, pdd->drm_priv,
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->adev, *mem, pdd->drm_priv,
NULL);
err_alloc_mem:
*mem = NULL;
@@ -816,7 +819,7 @@ struct kfd_process *kfd_create_process(struct file *filep)
mutex_lock(&kfd_processes_mutex);
/* A prior open of /dev/kfd could have already created the process. */
- process = find_process(thread);
+ process = find_process(thread, false);
if (process) {
pr_debug("Process already found\n");
} else {
@@ -884,7 +887,7 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread)
if (thread->group_leader->mm != thread->mm)
return ERR_PTR(-EINVAL);
- process = find_process(thread);
+ process = find_process(thread, false);
if (!process)
return ERR_PTR(-EINVAL);
@@ -903,13 +906,16 @@ static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
return NULL;
}
-static struct kfd_process *find_process(const struct task_struct *thread)
+static struct kfd_process *find_process(const struct task_struct *thread,
+ bool ref)
{
struct kfd_process *p;
int idx;
idx = srcu_read_lock(&kfd_processes_srcu);
p = find_process_by_mm(thread->mm);
+ if (p && ref)
+ kref_get(&p->ref);
srcu_read_unlock(&kfd_processes_srcu, idx);
return p;
@@ -920,6 +926,26 @@ void kfd_unref_process(struct kfd_process *p)
kref_put(&p->ref, kfd_process_ref_release);
}
+/* This increments the process->ref counter. */
+struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid)
+{
+ struct task_struct *task = NULL;
+ struct kfd_process *p = NULL;
+
+ if (!pid) {
+ task = current;
+ get_task_struct(task);
+ } else {
+ task = get_pid_task(pid, PIDTYPE_PID);
+ }
+
+ if (task) {
+ p = find_process(task, true);
+ put_task_struct(task);
+ }
+
+ return p;
+}
static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
{
@@ -940,10 +966,10 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
if (!peer_pdd->drm_priv)
continue;
amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
- peer_pdd->dev->kgd, mem, peer_pdd->drm_priv);
+ peer_pdd->dev->adev, mem, peer_pdd->drm_priv);
}
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem,
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, mem,
pdd->drm_priv, NULL);
kfd_process_device_remove_obj_handle(pdd, id);
}
@@ -974,7 +1000,7 @@ static void kfd_process_kunmap_signal_bo(struct kfd_process *p)
if (!mem)
goto out;
- amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->kgd, mem);
+ amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
out:
mutex_unlock(&p->mutex);
@@ -1003,7 +1029,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
if (pdd->drm_file) {
amdgpu_amdkfd_gpuvm_release_process_vm(
- pdd->dev->kgd, pdd->drm_priv);
+ pdd->dev->adev, pdd->drm_priv);
fput(pdd->drm_file);
}
@@ -1011,11 +1037,14 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
get_order(KFD_CWSR_TBA_TMA_SIZE));
- kfree(pdd->qpd.doorbell_bitmap);
+ bitmap_free(pdd->qpd.doorbell_bitmap);
idr_destroy(&pdd->alloc_idr);
kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
+ if (pdd->dev->shared_resources.enable_mes)
+ amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
+ pdd->proc_ctx_bo);
/*
* before destroying pdd, make sure to report availability
* for auto suspend
@@ -1086,6 +1115,15 @@ static void kfd_process_wq_release(struct work_struct *work)
struct kfd_process *p = container_of(work, struct kfd_process,
release_work);
+ kfd_process_dequeue_from_all_devices(p);
+ pqm_uninit(&p->pqm);
+
+ /* Signal the eviction fence after user mode queues are
+ * destroyed. This allows any BOs to be freed without
+ * triggering pointless evictions or waiting for fences.
+ */
+ dma_fence_signal(p->ef);
+
kfd_process_remove_sysfs(p);
kfd_iommu_unbind_process(p);
@@ -1133,7 +1171,6 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
struct mm_struct *mm)
{
struct kfd_process *p;
- int i;
/*
* The kfd_process structure can not be free because the
@@ -1150,39 +1187,9 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
- cancel_delayed_work_sync(&p->svms.restore_work);
-
- mutex_lock(&p->mutex);
-
- /* Iterate over all process device data structures and if the
- * pdd is in debug mode, we should first force unregistration,
- * then we will be able to destroy the queues
- */
- for (i = 0; i < p->n_pdds; i++) {
- struct kfd_dev *dev = p->pdds[i]->dev;
-
- mutex_lock(kfd_get_dbgmgr_mutex());
- if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
- if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
- kfd_dbgmgr_destroy(dev->dbgmgr);
- dev->dbgmgr = NULL;
- }
- }
- mutex_unlock(kfd_get_dbgmgr_mutex());
- }
-
- kfd_process_dequeue_from_all_devices(p);
- pqm_uninit(&p->pqm);
/* Indicate to other users that MM is no longer valid */
p->mm = NULL;
- /* Signal the eviction fence after user mode queues are
- * destroyed. This allows any BOs to be freed without
- * triggering pointless evictions or waiting for fences.
- */
- dma_fence_signal(p->ef);
-
- mutex_unlock(&p->mutex);
mmu_notifier_put(&p->mmu_notifier);
}
@@ -1317,14 +1324,13 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
* support the SVM APIs and don't need to be considered
* for the XNACK mode selection.
*/
- if (dev->device_info->asic_family < CHIP_VEGA10)
+ if (!KFD_IS_SOC15(dev))
continue;
/* Aldebaran can always support XNACK because it can support
* per-process XNACK mode selection. But let the dev->noretry
* setting still influence the default XNACK mode.
*/
- if (supported &&
- dev->device_info->asic_family == CHIP_ALDEBARAN)
+ if (supported && KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2))
continue;
/* GFXv10 and later GPUs do not support shader preemption
@@ -1332,7 +1338,7 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
* management and memory-manager-related preemptions or
* even deadlocks.
*/
- if (dev->device_info->asic_family >= CHIP_NAVI10)
+ if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
return false;
if (dev->noretry)
@@ -1361,15 +1367,20 @@ static struct kfd_process *create_process(const struct task_struct *thread)
process->mm = thread->mm;
process->lead_thread = thread->group_leader;
process->n_pdds = 0;
+ process->queues_paused = false;
INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
process->last_restore_timestamp = get_jiffies_64();
- kfd_event_init_process(process);
+ err = kfd_event_init_process(process);
+ if (err)
+ goto err_event_init;
process->is_32bit_user_mode = in_compat_syscall();
process->pasid = kfd_pasid_alloc();
- if (process->pasid == 0)
+ if (process->pasid == 0) {
+ err = -ENOSPC;
goto err_alloc_pasid;
+ }
err = pqm_init(&process->pqm, process);
if (err != 0)
@@ -1391,6 +1402,11 @@ static struct kfd_process *create_process(const struct task_struct *thread)
hash_add_rcu(kfd_processes_table, &process->kfd_processes,
(uintptr_t)process->mm);
+ /* Avoid free_notifier to start kfd_process_wq_release if
+ * mmu_notifier_get failed because of pending signal.
+ */
+ kref_get(&process->ref);
+
/* MMU notifier registration must be the last call that can fail
* because after this point we cannot unwind the process creation.
* After this point, mmu_notifier_put will trigger the cleanup by
@@ -1403,6 +1419,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)
}
BUG_ON(mn != &process->mmu_notifier);
+ kfd_unref_process(process);
get_task_struct(process->lead_thread);
return process;
@@ -1418,6 +1435,8 @@ err_init_apertures:
err_process_pqm_init:
kfd_pasid_free(process->pasid);
err_alloc_pasid:
+ kfd_event_free_process(process);
+err_event_init:
mutex_destroy(&process->mutex);
kfree(process);
err_alloc_process:
@@ -1431,12 +1450,11 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
int range_start = dev->shared_resources.non_cp_doorbells_start;
int range_end = dev->shared_resources.non_cp_doorbells_end;
- if (!KFD_IS_SOC15(dev->device_info->asic_family))
+ if (!KFD_IS_SOC15(dev))
return 0;
- qpd->doorbell_bitmap =
- kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
- BITS_PER_BYTE), GFP_KERNEL);
+ qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ GFP_KERNEL);
if (!qpd->doorbell_bitmap)
return -ENOMEM;
@@ -1448,9 +1466,9 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
if (i >= range_start && i <= range_end) {
- set_bit(i, qpd->doorbell_bitmap);
- set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
- qpd->doorbell_bitmap);
+ __set_bit(i, qpd->doorbell_bitmap);
+ __set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
+ qpd->doorbell_bitmap);
}
}
@@ -1473,6 +1491,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process *p)
{
struct kfd_process_device *pdd = NULL;
+ int retval = 0;
if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
return NULL;
@@ -1480,11 +1499,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
if (!pdd)
return NULL;
- if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
- pr_err("Failed to alloc doorbell for pdd\n");
- goto err_free_pdd;
- }
-
if (init_doorbell_bitmap(&pdd->qpd, dev)) {
pr_err("Failed to init doorbell for process\n");
goto err_free_pdd;
@@ -1503,7 +1517,23 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
pdd->runtime_inuse = false;
pdd->vram_usage = 0;
pdd->sdma_past_activity_counter = 0;
+ pdd->user_gpu_id = dev->id;
atomic64_set(&pdd->evict_duration_counter, 0);
+
+ if (dev->shared_resources.enable_mes) {
+ retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
+ AMDGPU_MES_PROC_CTX_SIZE,
+ &pdd->proc_ctx_bo,
+ &pdd->proc_ctx_gpu_addr,
+ &pdd->proc_ctx_cpu_ptr,
+ false);
+ if (retval) {
+ pr_err("failed to allocate process context bo\n");
+ goto err_free_pdd;
+ }
+ memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+ }
+
p->pdds[p->n_pdds++] = pdd;
/* Init idr used for memory handle translation */
@@ -1547,13 +1577,14 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
dev = pdd->dev;
ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
- dev->kgd, drm_file, p->pasid,
+ dev->adev, drm_file, p->pasid,
&p->kgd_process_info, &p->ef);
if (ret) {
pr_err("Failed to create process VM object\n");
return ret;
}
pdd->drm_priv = drm_file->private_data;
+ atomic64_set(&pdd->tlb_seq, 0);
ret = kfd_process_device_reserve_ib_mem(pdd);
if (ret)
@@ -1704,7 +1735,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
* Eviction is reference-counted per process-device. This means multiple
* evictions from different sources can be nested safely.
*/
-int kfd_process_evict_queues(struct kfd_process *p)
+int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
{
int r = 0;
int i;
@@ -1713,6 +1744,9 @@ int kfd_process_evict_queues(struct kfd_process *p)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
+ kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid,
+ trigger);
+
r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
&pdd->qpd);
/* evict return -EIO if HWS is hang or asic is resetting, in this case
@@ -1737,6 +1771,9 @@ fail:
if (n_evicted == 0)
break;
+
+ kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
+
if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd))
pr_err("Failed to restore queues\n");
@@ -1756,6 +1793,8 @@ int kfd_process_restore_queues(struct kfd_process *p)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
+ kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
+
r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd);
if (r) {
@@ -1773,21 +1812,20 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
int i;
for (i = 0; i < p->n_pdds; i++)
- if (p->pdds[i] && gpu_id == p->pdds[i]->dev->id)
+ if (p->pdds[i] && gpu_id == p->pdds[i]->user_gpu_id)
return i;
return -EINVAL;
}
int
-kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev,
+kfd_process_gpuid_from_adev(struct kfd_process *p, struct amdgpu_device *adev,
uint32_t *gpuid, uint32_t *gpuidx)
{
- struct kgd_dev *kgd = (struct kgd_dev *)adev;
int i;
for (i = 0; i < p->n_pdds; i++)
- if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) {
- *gpuid = p->pdds[i]->dev->id;
+ if (p->pdds[i] && p->pdds[i]->dev->adev == adev) {
+ *gpuid = p->pdds[i]->user_gpu_id;
*gpuidx = i;
return 0;
}
@@ -1818,7 +1856,7 @@ static void evict_process_worker(struct work_struct *work)
flush_delayed_work(&p->restore_work);
pr_debug("Started evicting pasid 0x%x\n", p->pasid);
- ret = kfd_process_evict_queues(p);
+ ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM);
if (!ret) {
dma_fence_signal(p->ef);
dma_fence_put(p->ef);
@@ -1885,7 +1923,7 @@ void kfd_suspend_all_processes(void)
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
- if (kfd_process_evict_queues(p))
+ if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND))
pr_err("Failed to suspend process 0x%x\n", p->pasid);
dma_fence_signal(p->ef);
dma_fence_put(p->ef);
@@ -1944,21 +1982,62 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
{
+ struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
+ uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
struct kfd_dev *dev = pdd->dev;
+ /*
+ * It can be that we race and lose here, but that is extremely unlikely
+ * and the worst thing which could happen is that we flush the changes
+ * into the TLB once more which is harmless.
+ */
+ if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq)
+ return;
+
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
/* Nothing to flush until a VMID is assigned, which
* only happens when the first queue is created.
*/
if (pdd->qpd.vmid)
- amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
+ amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,
pdd->qpd.vmid);
} else {
- amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
+ amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->adev,
pdd->process->pasid, type);
}
}
+struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *p, uint32_t gpu_id)
+{
+ int i;
+
+ if (gpu_id) {
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ if (pdd->user_gpu_id == gpu_id)
+ return pdd;
+ }
+ }
+ return NULL;
+}
+
+int kfd_process_get_user_gpu_id(struct kfd_process *p, uint32_t actual_gpu_id)
+{
+ int i;
+
+ if (!actual_gpu_id)
+ return 0;
+
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ if (pdd->dev->id == actual_gpu_id)
+ return pdd->user_gpu_id;
+ }
+ return -EINVAL;
+}
+
#if defined(CONFIG_DEBUG_FS)
int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 3627e7ac161b..5137476ec18e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -42,6 +43,20 @@ static inline struct process_queue_node *get_queue_by_qid(
return NULL;
}
+static int assign_queue_slot_by_qid(struct process_queue_manager *pqm,
+ unsigned int qid)
+{
+ if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
+ return -EINVAL;
+
+ if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) {
+ pr_err("Cannot create new queue because requested qid(%u) is in use\n", qid);
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
static int find_available_queue_slot(struct process_queue_manager *pqm,
unsigned int *qid)
{
@@ -118,7 +133,7 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
return ret;
pqn->q->gws = mem;
- pdd->qpd.num_gws = gws ? amdgpu_amdkfd_get_num_gws(dev->kgd) : 0;
+ pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
pqn->q, NULL);
@@ -135,9 +150,8 @@ void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
{
INIT_LIST_HEAD(&pqm->queues);
- pqm->queue_slot_bitmap =
- kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
- BITS_PER_BYTE), GFP_KERNEL);
+ pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ GFP_KERNEL);
if (!pqm->queue_slot_bitmap)
return -ENOMEM;
pqm->process = p;
@@ -159,14 +173,15 @@ void pqm_uninit(struct process_queue_manager *pqm)
kfree(pqn);
}
- kfree(pqm->queue_slot_bitmap);
+ bitmap_free(pqm->queue_slot_bitmap);
pqm->queue_slot_bitmap = NULL;
}
static int init_user_queue(struct process_queue_manager *pqm,
struct kfd_dev *dev, struct queue **q,
struct queue_properties *q_properties,
- struct file *f, unsigned int qid)
+ struct file *f, struct amdgpu_bo *wptr_bo,
+ unsigned int qid)
{
int retval;
@@ -184,8 +199,27 @@ static int init_user_queue(struct process_queue_manager *pqm,
(*q)->device = dev;
(*q)->process = pqm->process;
+ if (dev->shared_resources.enable_mes) {
+ retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
+ AMDGPU_MES_GANG_CTX_SIZE,
+ &(*q)->gang_ctx_bo,
+ &(*q)->gang_ctx_gpu_addr,
+ &(*q)->gang_ctx_cpu_ptr,
+ false);
+ if (retval) {
+ pr_err("failed to allocate gang context bo\n");
+ goto cleanup;
+ }
+ memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
+ (*q)->wptr_bo = wptr_bo;
+ }
+
pr_debug("PQM After init queue");
+ return 0;
+cleanup:
+ if (dev->shared_resources.enable_mes)
+ uninit_queue(*q);
return retval;
}
@@ -194,6 +228,10 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct file *f,
struct queue_properties *properties,
unsigned int *qid,
+ struct amdgpu_bo *wptr_bo,
+ const struct kfd_criu_queue_priv_data *q_data,
+ const void *restore_mqd,
+ const void *restore_ctl_stack,
uint32_t *p_doorbell_offset_in_process)
{
int retval;
@@ -220,12 +258,17 @@ int pqm_create_queue(struct process_queue_manager *pqm,
* Hence we also check the type as well
*/
if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
- max_queues = dev->device_info->max_no_of_hqd/2;
+ max_queues = dev->device_info.max_no_of_hqd/2;
if (pdd->qpd.queue_count >= max_queues)
return -ENOSPC;
- retval = find_available_queue_slot(pqm, qid);
+ if (q_data) {
+ retval = assign_queue_slot_by_qid(pqm, q_data->q_id);
+ *qid = q_data->q_id;
+ } else
+ retval = find_available_queue_slot(pqm, qid);
+
if (retval != 0)
return retval;
@@ -248,12 +291,13 @@ int pqm_create_queue(struct process_queue_manager *pqm,
* allocate_sdma_queue() in create_queue() has the
* corresponding check logic.
*/
- retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
pqn->kq = NULL;
- retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd);
+ retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
+ restore_mqd, restore_ctl_stack);
print_queue(q);
break;
@@ -268,12 +312,13 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
- retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
pqn->kq = NULL;
- retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd);
+ retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
+ restore_mqd, restore_ctl_stack);
print_queue(q);
break;
case KFD_QUEUE_TYPE_DIQ:
@@ -394,6 +439,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
pdd->qpd.num_gws = 0;
}
+ if (dev->shared_resources.enable_mes) {
+ amdgpu_amdkfd_free_gtt_mem(dev->adev,
+ pqn->q->gang_ctx_bo);
+ if (pqn->q->wptr_bo)
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
+
+ }
uninit_queue(pqn->q);
}
@@ -446,6 +498,21 @@ int pqm_update_mqd(struct process_queue_manager *pqm,
return -EFAULT;
}
+ /* ASICs that have WGPs must enforce pairwise enabled mask checks. */
+ if (minfo && minfo->update_flag == UPDATE_FLAG_CU_MASK && minfo->cu_mask.ptr &&
+ KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) {
+ int i;
+
+ for (i = 0; i < minfo->cu_mask.count; i += 2) {
+ uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3;
+
+ if (cu_pair && cu_pair != 0x3) {
+ pr_debug("CUs must be adjacent pairwise enabled.\n");
+ return -EINVAL;
+ }
+ }
+ }
+
retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
pqn->q, minfo);
if (retval != 0)
@@ -498,6 +565,359 @@ int pqm_get_wave_state(struct process_queue_manager *pqm,
save_area_used_size);
}
+static int get_queue_data_sizes(struct kfd_process_device *pdd,
+ struct queue *q,
+ uint32_t *mqd_size,
+ uint32_t *ctl_stack_size)
+{
+ int ret;
+
+ ret = pqm_get_queue_checkpoint_info(&pdd->process->pqm,
+ q->properties.queue_id,
+ mqd_size,
+ ctl_stack_size);
+ if (ret)
+ pr_err("Failed to get queue dump info (%d)\n", ret);
+
+ return ret;
+}
+
+int kfd_process_get_queue_info(struct kfd_process *p,
+ uint32_t *num_queues,
+ uint64_t *priv_data_sizes)
+{
+ uint32_t extra_data_sizes = 0;
+ struct queue *q;
+ int i;
+ int ret;
+
+ *num_queues = 0;
+
+ /* Run over all PDDs of the process */
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ list_for_each_entry(q, &pdd->qpd.queues_list, list) {
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+ uint32_t mqd_size, ctl_stack_size;
+
+ *num_queues = *num_queues + 1;
+
+ ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
+ if (ret)
+ return ret;
+
+ extra_data_sizes += mqd_size + ctl_stack_size;
+ } else {
+ pr_err("Unsupported queue type (%d)\n", q->properties.type);
+ return -EOPNOTSUPP;
+ }
+ }
+ }
+ *priv_data_sizes = extra_data_sizes +
+ (*num_queues * sizeof(struct kfd_criu_queue_priv_data));
+
+ return 0;
+}
+
+static int pqm_checkpoint_mqd(struct process_queue_manager *pqm,
+ unsigned int qid,
+ void *mqd,
+ void *ctl_stack)
+{
+ struct process_queue_node *pqn;
+
+ pqn = get_queue_by_qid(pqm, qid);
+ if (!pqn) {
+ pr_debug("amdkfd: No queue %d exists for operation\n", qid);
+ return -EFAULT;
+ }
+
+ if (!pqn->q->device->dqm->ops.checkpoint_mqd) {
+ pr_err("amdkfd: queue dumping not supported on this device\n");
+ return -EOPNOTSUPP;
+ }
+
+ return pqn->q->device->dqm->ops.checkpoint_mqd(pqn->q->device->dqm,
+ pqn->q, mqd, ctl_stack);
+}
+
+static int criu_checkpoint_queue(struct kfd_process_device *pdd,
+ struct queue *q,
+ struct kfd_criu_queue_priv_data *q_data)
+{
+ uint8_t *mqd, *ctl_stack;
+ int ret;
+
+ mqd = (void *)(q_data + 1);
+ ctl_stack = mqd + q_data->mqd_size;
+
+ q_data->gpu_id = pdd->user_gpu_id;
+ q_data->type = q->properties.type;
+ q_data->format = q->properties.format;
+ q_data->q_id = q->properties.queue_id;
+ q_data->q_address = q->properties.queue_address;
+ q_data->q_size = q->properties.queue_size;
+ q_data->priority = q->properties.priority;
+ q_data->q_percent = q->properties.queue_percent;
+ q_data->read_ptr_addr = (uint64_t)q->properties.read_ptr;
+ q_data->write_ptr_addr = (uint64_t)q->properties.write_ptr;
+ q_data->doorbell_id = q->doorbell_id;
+
+ q_data->sdma_id = q->sdma_id;
+
+ q_data->eop_ring_buffer_address =
+ q->properties.eop_ring_buffer_address;
+
+ q_data->eop_ring_buffer_size = q->properties.eop_ring_buffer_size;
+
+ q_data->ctx_save_restore_area_address =
+ q->properties.ctx_save_restore_area_address;
+
+ q_data->ctx_save_restore_area_size =
+ q->properties.ctx_save_restore_area_size;
+
+ q_data->gws = !!q->gws;
+
+ ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack);
+ if (ret) {
+ pr_err("Failed checkpoint queue_mqd (%d)\n", ret);
+ return ret;
+ }
+
+ pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n", q_data->gpu_id, q_data->q_id);
+ return ret;
+}
+
+static int criu_checkpoint_queues_device(struct kfd_process_device *pdd,
+ uint8_t __user *user_priv,
+ unsigned int *q_index,
+ uint64_t *queues_priv_data_offset)
+{
+ unsigned int q_private_data_size = 0;
+ uint8_t *q_private_data = NULL; /* Local buffer to store individual queue private data */
+ struct queue *q;
+ int ret = 0;
+
+ list_for_each_entry(q, &pdd->qpd.queues_list, list) {
+ struct kfd_criu_queue_priv_data *q_data;
+ uint64_t q_data_size;
+ uint32_t mqd_size;
+ uint32_t ctl_stack_size;
+
+ if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE &&
+ q->properties.type != KFD_QUEUE_TYPE_SDMA &&
+ q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) {
+
+ pr_err("Unsupported queue type (%d)\n", q->properties.type);
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
+ if (ret)
+ break;
+
+ q_data_size = sizeof(*q_data) + mqd_size + ctl_stack_size;
+
+ /* Increase local buffer space if needed */
+ if (q_private_data_size < q_data_size) {
+ kfree(q_private_data);
+
+ q_private_data = kzalloc(q_data_size, GFP_KERNEL);
+ if (!q_private_data) {
+ ret = -ENOMEM;
+ break;
+ }
+ q_private_data_size = q_data_size;
+ }
+
+ q_data = (struct kfd_criu_queue_priv_data *)q_private_data;
+
+ /* data stored in this order: priv_data, mqd, ctl_stack */
+ q_data->mqd_size = mqd_size;
+ q_data->ctl_stack_size = ctl_stack_size;
+
+ ret = criu_checkpoint_queue(pdd, q, q_data);
+ if (ret)
+ break;
+
+ q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE;
+
+ ret = copy_to_user(user_priv + *queues_priv_data_offset,
+ q_data, q_data_size);
+ if (ret) {
+ ret = -EFAULT;
+ break;
+ }
+ *queues_priv_data_offset += q_data_size;
+ *q_index = *q_index + 1;
+ }
+
+ kfree(q_private_data);
+
+ return ret;
+}
+
+int kfd_criu_checkpoint_queues(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset)
+{
+ int ret = 0, pdd_index, q_index = 0;
+
+ for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
+ struct kfd_process_device *pdd = p->pdds[pdd_index];
+
+ /*
+ * criu_checkpoint_queues_device will copy data to user and update q_index and
+ * queues_priv_data_offset
+ */
+ ret = criu_checkpoint_queues_device(pdd, user_priv_data, &q_index,
+ priv_data_offset);
+
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static void set_queue_properties_from_criu(struct queue_properties *qp,
+ struct kfd_criu_queue_priv_data *q_data)
+{
+ qp->is_interop = false;
+ qp->queue_percent = q_data->q_percent;
+ qp->priority = q_data->priority;
+ qp->queue_address = q_data->q_address;
+ qp->queue_size = q_data->q_size;
+ qp->read_ptr = (uint32_t *) q_data->read_ptr_addr;
+ qp->write_ptr = (uint32_t *) q_data->write_ptr_addr;
+ qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address;
+ qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
+ qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address;
+ qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size;
+ qp->ctl_stack_size = q_data->ctl_stack_size;
+ qp->type = q_data->type;
+ qp->format = q_data->format;
+}
+
+int kfd_criu_restore_queue(struct kfd_process *p,
+ uint8_t __user *user_priv_ptr,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size)
+{
+ uint8_t *mqd, *ctl_stack, *q_extra_data = NULL;
+ struct kfd_criu_queue_priv_data *q_data;
+ struct kfd_process_device *pdd;
+ uint64_t q_extra_data_size;
+ struct queue_properties qp;
+ unsigned int queue_id;
+ int ret = 0;
+
+ if (*priv_data_offset + sizeof(*q_data) > max_priv_data_size)
+ return -EINVAL;
+
+ q_data = kmalloc(sizeof(*q_data), GFP_KERNEL);
+ if (!q_data)
+ return -ENOMEM;
+
+ ret = copy_from_user(q_data, user_priv_ptr + *priv_data_offset, sizeof(*q_data));
+ if (ret) {
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ *priv_data_offset += sizeof(*q_data);
+ q_extra_data_size = (uint64_t)q_data->ctl_stack_size + q_data->mqd_size;
+
+ if (*priv_data_offset + q_extra_data_size > max_priv_data_size) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ q_extra_data = kmalloc(q_extra_data_size, GFP_KERNEL);
+ if (!q_extra_data) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ ret = copy_from_user(q_extra_data, user_priv_ptr + *priv_data_offset, q_extra_data_size);
+ if (ret) {
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ *priv_data_offset += q_extra_data_size;
+
+ pdd = kfd_process_device_data_by_id(p, q_data->gpu_id);
+ if (!pdd) {
+ pr_err("Failed to get pdd\n");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (!pdd->doorbell_index &&
+ kfd_alloc_process_doorbells(pdd->dev, &pdd->doorbell_index) < 0) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ /* data stored in this order: mqd, ctl_stack */
+ mqd = q_extra_data;
+ ctl_stack = mqd + q_data->mqd_size;
+
+ memset(&qp, 0, sizeof(qp));
+ set_queue_properties_from_criu(&qp, q_data);
+
+ print_queue_properties(&qp);
+
+ ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack,
+ NULL);
+ if (ret) {
+ pr_err("Failed to create new queue err:%d\n", ret);
+ goto exit;
+ }
+
+ if (q_data->gws)
+ ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws);
+
+exit:
+ if (ret)
+ pr_err("Failed to restore queue (%d)\n", ret);
+ else
+ pr_debug("Queue id %d was restored successfully\n", queue_id);
+
+ kfree(q_data);
+
+ return ret;
+}
+
+int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
+ unsigned int qid,
+ uint32_t *mqd_size,
+ uint32_t *ctl_stack_size)
+{
+ struct process_queue_node *pqn;
+
+ pqn = get_queue_by_qid(pqm, qid);
+ if (!pqn) {
+ pr_debug("amdkfd: No queue %d exists for operation\n", qid);
+ return -EFAULT;
+ }
+
+ if (!pqn->q->device->dqm->ops.get_queue_checkpoint_info) {
+ pr_err("amdkfd: queue dumping not supported on this device\n");
+ return -EOPNOTSUPP;
+ }
+
+ pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm,
+ pqn->q, mqd_size,
+ ctl_stack_size);
+ return 0;
+}
+
#if defined(CONFIG_DEBUG_FS)
int pqm_debugfs_mqds(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index 6dcd621e5b71..0f6992b1895c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index ed4bc5f844ce..0472b56de245 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2020 Advanced Micro Devices, Inc.
+ * Copyright 2020-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -37,6 +38,9 @@ struct kfd_smi_client {
uint64_t events;
struct kfd_dev *dev;
spinlock_t lock;
+ struct rcu_head rcu;
+ pid_t pid;
+ bool suser;
};
#define MAX_KFIFO_SIZE 1024
@@ -81,7 +85,8 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
struct kfd_smi_client *client = filep->private_data;
unsigned char *buf;
- buf = kmalloc_array(MAX_KFIFO_SIZE, sizeof(*buf), GFP_KERNEL);
+ size = min_t(size_t, size, MAX_KFIFO_SIZE);
+ buf = kmalloc(size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -95,7 +100,7 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
ret = -EAGAIN;
goto ret_err;
}
- to_copy = min3(size, sizeof(buf), to_copy);
+ to_copy = min(size, to_copy);
ret = kfifo_out(&client->fifo, buf, to_copy);
spin_unlock(&client->lock);
if (ret <= 0) {
@@ -133,6 +138,14 @@ static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user,
return sizeof(events);
}
+static void kfd_smi_ev_client_free(struct rcu_head *p)
+{
+ struct kfd_smi_client *ev = container_of(p, struct kfd_smi_client, rcu);
+
+ kfifo_free(&ev->fifo);
+ kfree(ev);
+}
+
static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
{
struct kfd_smi_client *client = filep->private_data;
@@ -142,23 +155,31 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
list_del_rcu(&client->list);
spin_unlock(&dev->smi_lock);
- synchronize_rcu();
- kfifo_free(&client->fifo);
- kfree(client);
-
+ call_rcu(&client->rcu, kfd_smi_ev_client_free);
return 0;
}
-static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
- char *event_msg, int len)
+static bool kfd_smi_ev_enabled(pid_t pid, struct kfd_smi_client *client,
+ unsigned int event)
+{
+ uint64_t all = KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS);
+ uint64_t events = READ_ONCE(client->events);
+
+ if (pid && client->pid != pid && !(client->suser && (events & all)))
+ return false;
+
+ return events & KFD_SMI_EVENT_MASK_FROM_INDEX(event);
+}
+
+static void add_event_to_kfifo(pid_t pid, struct kfd_dev *dev,
+ unsigned int smi_event, char *event_msg, int len)
{
struct kfd_smi_client *client;
rcu_read_lock();
list_for_each_entry_rcu(client, &dev->smi_clients, list) {
- if (!(READ_ONCE(client->events) &
- KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event)))
+ if (!kfd_smi_ev_enabled(pid, client, smi_event))
continue;
spin_lock(&client->lock);
if (kfifo_avail(&client->fifo) >= len) {
@@ -174,22 +195,29 @@ static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
rcu_read_unlock();
}
-void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
+__printf(4, 5)
+static void kfd_smi_event_add(pid_t pid, struct kfd_dev *dev,
+ unsigned int event, char *fmt, ...)
{
- /*
- * GpuReset msg = Reset seq number (incremented for
- * every reset message sent before GPU reset).
- * 1 byte event + 1 byte space + 8 bytes seq num +
- * 1 byte \n + 1 byte \0 = 12
- */
- char fifo_in[12];
+ char fifo_in[KFD_SMI_EVENT_MSG_SIZE];
int len;
- unsigned int event;
+ va_list args;
if (list_empty(&dev->smi_clients))
return;
- memset(fifo_in, 0x0, sizeof(fifo_in));
+ len = snprintf(fifo_in, sizeof(fifo_in), "%x ", event);
+
+ va_start(args, fmt);
+ len += vsnprintf(fifo_in + len, sizeof(fifo_in) - len, fmt, args);
+ va_end(args);
+
+ add_event_to_kfifo(pid, dev, event, fifo_in, len);
+}
+
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
+{
+ unsigned int event;
if (post_reset) {
event = KFD_SMI_EVENT_GPU_POST_RESET;
@@ -197,61 +225,112 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
event = KFD_SMI_EVENT_GPU_PRE_RESET;
++(dev->reset_seq_num);
}
-
- len = snprintf(fifo_in, sizeof(fifo_in), "%x %x\n", event,
- dev->reset_seq_num);
-
- add_event_to_kfifo(dev, event, fifo_in, len);
+ kfd_smi_event_add(0, dev, event, "%x\n", dev->reset_seq_num);
}
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
uint64_t throttle_bitmask)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
- /*
- * ThermalThrottle msg = throttle_bitmask(8):
- * thermal_interrupt_count(16):
- * 1 byte event + 1 byte space + 16 byte throttle_bitmask +
- * 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
- * 1 byte \0 = 37
- */
- char fifo_in[37];
- int len;
-
- if (list_empty(&dev->smi_clients))
- return;
-
- len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n",
- KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
- atomic64_read(&adev->smu.throttle_int_counter));
-
- add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len);
+ kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
+ throttle_bitmask,
+ amdgpu_dpm_get_thermal_throttling_counter(dev->adev));
}
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
struct amdgpu_task_info task_info;
- /* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */
- /* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n +
- * 1 byte \0 = 29
- */
- char fifo_in[29];
- int len;
-
- if (list_empty(&dev->smi_clients))
- return;
memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, pasid, &task_info);
+ amdgpu_vm_get_task_info(dev->adev, pasid, &task_info);
/* Report VM faults from user applications, not retry from kernel */
if (!task_info.pid)
return;
- len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
- task_info.pid, task_info.task_name);
+ kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
+ task_info.pid, task_info.task_name);
+}
+
+void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid,
+ unsigned long address, bool write_fault,
+ ktime_t ts)
+{
+ kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_START,
+ "%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid,
+ address, dev->id, write_fault ? 'W' : 'R');
+}
+
+void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
+ unsigned long address, bool migration)
+{
+ kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_END,
+ "%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(),
+ pid, address, dev->id, migration ? 'M' : 'U');
+}
+
+void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid,
+ unsigned long start, unsigned long end,
+ uint32_t from, uint32_t to,
+ uint32_t prefetch_loc, uint32_t preferred_loc,
+ uint32_t trigger)
+{
+ kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_START,
+ "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n",
+ ktime_get_boottime_ns(), pid, start, end - start,
+ from, to, prefetch_loc, preferred_loc, trigger);
+}
+
+void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid,
+ unsigned long start, unsigned long end,
+ uint32_t from, uint32_t to, uint32_t trigger)
+{
+ kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_END,
+ "%lld -%d @%lx(%lx) %x->%x %d\n",
+ ktime_get_boottime_ns(), pid, start, end - start,
+ from, to, trigger);
+}
+
+void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t pid,
+ uint32_t trigger)
+{
+ kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_QUEUE_EVICTION,
+ "%lld -%d %x %d\n", ktime_get_boottime_ns(), pid,
+ dev->id, trigger);
+}
+
+void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid)
+{
+ kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_QUEUE_RESTORE,
+ "%lld -%d %x\n", ktime_get_boottime_ns(), pid,
+ dev->id);
+}
+
+void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
+{
+ struct kfd_process *p;
+ int i;
+
+ p = kfd_lookup_process_by_mm(mm);
+ if (!p)
+ return;
+
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
- add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
+ kfd_smi_event_add(p->lead_thread->pid, pdd->dev,
+ KFD_SMI_EVENT_QUEUE_RESTORE,
+ "%lld -%d %x %c\n", ktime_get_boottime_ns(),
+ p->lead_thread->pid, pdd->dev->id, 'R');
+ }
+ kfd_unref_process(p);
+}
+
+void kfd_smi_event_unmap_from_gpu(struct kfd_dev *dev, pid_t pid,
+ unsigned long address, unsigned long last,
+ uint32_t trigger)
+{
+ kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_UNMAP_FROM_GPU,
+ "%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(),
+ pid, address, last - address + 1, dev->id, trigger);
}
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
@@ -270,23 +349,31 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
return ret;
}
- ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client,
- O_RDWR);
- if (ret < 0) {
- kfifo_free(&client->fifo);
- kfree(client);
- return ret;
- }
- *fd = ret;
-
init_waitqueue_head(&client->wait_queue);
spin_lock_init(&client->lock);
client->events = 0;
client->dev = dev;
+ client->pid = current->tgid;
+ client->suser = capable(CAP_SYS_ADMIN);
spin_lock(&dev->smi_lock);
list_add_rcu(&client->list, &dev->smi_clients);
spin_unlock(&dev->smi_lock);
+ ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client,
+ O_RDWR);
+ if (ret < 0) {
+ spin_lock(&dev->smi_lock);
+ list_del_rcu(&client->list);
+ spin_unlock(&dev->smi_lock);
+
+ synchronize_rcu();
+
+ kfifo_free(&client->fifo);
+ kfree(client);
+ return ret;
+ }
+ *fd = ret;
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index bffd0c32b060..76fe4e0ec2d2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2020 Advanced Micro Devices, Inc.
+ * Copyright 2020-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -28,5 +29,24 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
uint64_t throttle_bitmask);
void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
-
+void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid,
+ unsigned long address, bool write_fault,
+ ktime_t ts);
+void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
+ unsigned long address, bool migration);
+void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid,
+ unsigned long start, unsigned long end,
+ uint32_t from, uint32_t to,
+ uint32_t prefetch_loc, uint32_t preferred_loc,
+ uint32_t trigger);
+void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid,
+ unsigned long start, unsigned long end,
+ uint32_t from, uint32_t to, uint32_t trigger);
+void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t pid,
+ uint32_t trigger);
+void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid);
+void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm);
+void kfd_smi_event_unmap_from_gpu(struct kfd_dev *dev, pid_t pid,
+ unsigned long address, unsigned long last,
+ uint32_t trigger);
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 3cb4681c5f53..64fdf63093a0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -32,6 +32,7 @@
#include "kfd_priv.h"
#include "kfd_svm.h"
#include "kfd_migrate.h"
+#include "kfd_smi_events.h"
#ifdef dev_fmt
#undef dev_fmt
@@ -43,7 +44,18 @@
/* Long enough to ensure no retry fault comes after svm range is restored and
* page table is updated.
*/
-#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000
+#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING (2UL * NSEC_PER_MSEC)
+
+/* Giant svm range split into smaller ranges based on this, it is decided using
+ * minimum of all dGPU/APU 1/32 VRAM size, between 2MB to 1GB and alignment to
+ * power of 2MB.
+ */
+static uint64_t max_svm_range_pages;
+
+struct criu_svm_metadata {
+ struct list_head list;
+ struct kfd_criu_svm_range_priv_data data;
+};
static void svm_range_evict_svm_bo_worker(struct work_struct *work);
static bool
@@ -107,7 +119,7 @@ static void svm_range_add_to_svms(struct svm_range *prange)
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
prange, prange->start, prange->last);
- list_add_tail(&prange->list, &prange->svms->list);
+ list_move_tail(&prange->list, &prange->svms->list);
prange->it_node.start = prange->start;
prange->it_node.last = prange->last;
interval_tree_insert(&prange->it_node, &prange->svms->objects);
@@ -144,8 +156,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
int i, r;
if (!addr) {
- addr = kvmalloc_array(prange->npages, sizeof(*addr),
- GFP_KERNEL | __GFP_ZERO);
+ addr = kvcalloc(prange->npages, sizeof(*addr), GFP_KERNEL);
if (!addr)
return -ENOMEM;
prange->dma_addr[gpuidx] = addr;
@@ -193,7 +204,6 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
struct kfd_process_device *pdd;
- struct amdgpu_device *adev;
pr_debug("mapping to gpu idx 0x%x\n", gpuidx);
pdd = kfd_process_device_from_gpuidx(p, gpuidx);
@@ -201,9 +211,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
pr_debug("failed to find device idx %d\n", gpuidx);
return -EINVAL;
}
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- r = svm_range_dma_map_dev(adev, prange, offset, npages,
+ r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages,
hmm_pfns, gpuidx);
if (r)
break;
@@ -257,13 +266,22 @@ void svm_range_free_dma_mappings(struct svm_range *prange)
}
}
-static void svm_range_free(struct svm_range *prange)
+static void svm_range_free(struct svm_range *prange, bool update_mem_usage)
{
+ uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT;
+ struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
+
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
prange->start, prange->last);
svm_range_vram_node_free(prange);
svm_range_free_dma_mappings(prange);
+
+ if (update_mem_usage && !p->xnack_enabled) {
+ pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size);
+ amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ }
mutex_destroy(&prange->lock);
mutex_destroy(&prange->migrate_mutex);
kfree(prange);
@@ -282,7 +300,7 @@ svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc,
static struct
svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
- uint64_t last)
+ uint64_t last, bool update_mem_usage)
{
uint64_t size = last - start + 1;
struct svm_range *prange;
@@ -291,14 +309,21 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
prange = kzalloc(sizeof(*prange), GFP_KERNEL);
if (!prange)
return NULL;
+
+ p = container_of(svms, struct kfd_process, svms);
+ if (!p->xnack_enabled && update_mem_usage &&
+ amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) {
+ pr_info("SVM mapping failed, exceeds resident system memory limit\n");
+ kfree(prange);
+ return NULL;
+ }
prange->npages = size;
prange->svms = svms;
prange->start = start;
prange->last = last;
INIT_LIST_HEAD(&prange->list);
INIT_LIST_HEAD(&prange->update_list);
- INIT_LIST_HEAD(&prange->remove_list);
- INIT_LIST_HEAD(&prange->insert_list);
INIT_LIST_HEAD(&prange->svm_bo_list);
INIT_LIST_HEAD(&prange->deferred_list);
INIT_LIST_HEAD(&prange->child_list);
@@ -307,7 +332,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
mutex_init(&prange->migrate_mutex);
mutex_init(&prange->lock);
- p = container_of(svms, struct kfd_process, svms);
if (p->xnack_enabled)
bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
MAX_GPU_INSTANCE);
@@ -334,6 +358,8 @@ static void svm_range_bo_release(struct kref *kref)
struct svm_range_bo *svm_bo;
svm_bo = container_of(kref, struct svm_range_bo, kref);
+ pr_debug("svm_bo 0x%p\n", svm_bo);
+
spin_lock(&svm_bo->list_lock);
while (!list_empty(&svm_bo->range_list)) {
struct svm_range *prange =
@@ -367,12 +393,33 @@ static void svm_range_bo_release(struct kref *kref)
kfree(svm_bo);
}
-void svm_range_bo_unref(struct svm_range_bo *svm_bo)
+static void svm_range_bo_wq_release(struct work_struct *work)
{
- if (!svm_bo)
- return;
+ struct svm_range_bo *svm_bo;
- kref_put(&svm_bo->kref, svm_range_bo_release);
+ svm_bo = container_of(work, struct svm_range_bo, release_work);
+ svm_range_bo_release(&svm_bo->kref);
+}
+
+static void svm_range_bo_release_async(struct kref *kref)
+{
+ struct svm_range_bo *svm_bo;
+
+ svm_bo = container_of(kref, struct svm_range_bo, kref);
+ pr_debug("svm_bo 0x%p\n", svm_bo);
+ INIT_WORK(&svm_bo->release_work, svm_range_bo_wq_release);
+ schedule_work(&svm_bo->release_work);
+}
+
+void svm_range_bo_unref_async(struct svm_range_bo *svm_bo)
+{
+ kref_put(&svm_bo->kref, svm_range_bo_release_async);
+}
+
+static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
+{
+ if (svm_bo)
+ kref_put(&svm_bo->kref, svm_range_bo_release);
}
static bool
@@ -494,7 +541,6 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
kfree(svm_bo);
return -ESRCH;
}
- svm_bo->svms = prange->svms;
svm_bo->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
mm,
@@ -508,7 +554,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
bp.flags |= clear ? AMDGPU_GEM_CREATE_VRAM_CLEARED : 0;
- bp.flags |= AMDGPU_AMDKFD_CREATE_SVM_BO;
+ bp.flags |= AMDGPU_GEM_CREATE_DISCARDABLE;
bp.type = ttm_bo_type_device;
bp.resv = NULL;
@@ -524,7 +570,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
goto reserve_bo_failed;
}
- r = dma_resv_reserve_shared(bo->tbo.base.resv, 1);
+ r = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
if (r) {
pr_debug("failed %d to reserve bo\n", r);
amdgpu_bo_unreserve(bo);
@@ -581,7 +627,7 @@ svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id)
return NULL;
}
- return (struct amdgpu_device *)pdd->dev->kgd;
+ return pdd->dev->adev;
}
struct kfd_process_device *
@@ -593,7 +639,7 @@ svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev)
p = container_of(prange->svms, struct kfd_process, svms);
- r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpu_idx);
+ r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpu_idx);
if (r) {
pr_debug("failed to get device id by adev %p\n", adev);
return NULL;
@@ -662,7 +708,8 @@ svm_range_check_attr(struct kfd_process *p,
static void
svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
- uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
+ bool *update_mapping)
{
uint32_t i;
int gpuidx;
@@ -678,6 +725,7 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
case KFD_IOCTL_SVM_ATTR_ACCESS:
case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+ *update_mapping = true;
gpuidx = kfd_process_gpuidx_from_gpuid(p,
attrs[i].value);
if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
@@ -692,9 +740,11 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
}
break;
case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+ *update_mapping = true;
prange->flags |= attrs[i].value;
break;
case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+ *update_mapping = true;
prange->flags &= ~attrs[i].value;
break;
case KFD_IOCTL_SVM_ATTR_GRANULARITY:
@@ -706,6 +756,61 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
}
}
+static bool
+svm_range_is_same_attrs(struct kfd_process *p, struct svm_range *prange,
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+{
+ uint32_t i;
+ int gpuidx;
+
+ for (i = 0; i < nattr; i++) {
+ switch (attrs[i].type) {
+ case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+ if (prange->preferred_loc != attrs[i].value)
+ return false;
+ break;
+ case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+ /* Prefetch should always trigger a migration even
+ * if the value of the attribute didn't change.
+ */
+ return false;
+ case KFD_IOCTL_SVM_ATTR_ACCESS:
+ case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+ case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+ gpuidx = kfd_process_gpuidx_from_gpuid(p,
+ attrs[i].value);
+ if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
+ if (test_bit(gpuidx, prange->bitmap_access) ||
+ test_bit(gpuidx, prange->bitmap_aip))
+ return false;
+ } else if (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS) {
+ if (!test_bit(gpuidx, prange->bitmap_access))
+ return false;
+ } else {
+ if (!test_bit(gpuidx, prange->bitmap_aip))
+ return false;
+ }
+ break;
+ case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+ if ((prange->flags & attrs[i].value) != attrs[i].value)
+ return false;
+ break;
+ case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+ if ((prange->flags & attrs[i].value) != 0)
+ return false;
+ break;
+ case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+ if (prange->granularity != attrs[i].value)
+ return false;
+ break;
+ default:
+ WARN_ONCE(1, "svm_range_check_attrs wasn't called?");
+ }
+ }
+
+ return true;
+}
+
/**
* svm_range_debug_dump - print all range information from svms
* @svms: svm range list header
@@ -743,14 +848,6 @@ static void svm_range_debug_dump(struct svm_range_list *svms)
}
}
-static bool
-svm_range_is_same_attrs(struct svm_range *old, struct svm_range *new)
-{
- return (old->prefetch_loc == new->prefetch_loc &&
- old->flags == new->flags &&
- old->granularity == new->granularity);
-}
-
static int
svm_range_split_array(void *ppnew, void *ppold, size_t size,
uint64_t old_start, uint64_t old_n,
@@ -880,6 +977,7 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
new->prefetch_loc = old->prefetch_loc;
new->actual_loc = old->actual_loc;
new->granularity = old->granularity;
+ new->mapped_to_gpu = old->mapped_to_gpu;
bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
@@ -925,9 +1023,9 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
svms = prange->svms;
if (old_start == start)
- *new = svm_range_new(svms, last + 1, old_last);
+ *new = svm_range_new(svms, last + 1, old_last, false);
else
- *new = svm_range_new(svms, old_start, start - 1);
+ *new = svm_range_new(svms, old_start, start - 1, false);
if (!*new)
return -ENOMEM;
@@ -935,7 +1033,7 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
if (r) {
pr_debug("failed %d split [0x%llx 0x%llx] to [0x%llx 0x%llx]\n",
r, old_start, old_last, start, last);
- svm_range_free(*new);
+ svm_range_free(*new, false);
*new = NULL;
}
@@ -943,26 +1041,26 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
}
static int
-svm_range_split_tail(struct svm_range *prange, struct svm_range *new,
+svm_range_split_tail(struct svm_range *prange,
uint64_t new_last, struct list_head *insert_list)
{
struct svm_range *tail;
int r = svm_range_split(prange, prange->start, new_last, &tail);
if (!r)
- list_add(&tail->insert_list, insert_list);
+ list_add(&tail->list, insert_list);
return r;
}
static int
-svm_range_split_head(struct svm_range *prange, struct svm_range *new,
+svm_range_split_head(struct svm_range *prange,
uint64_t new_start, struct list_head *insert_list)
{
struct svm_range *head;
int r = svm_range_split(prange, new_start, prange->last, &head);
if (!r)
- list_add(&head->insert_list, insert_list);
+ list_add(&head->list, insert_list);
return r;
}
@@ -1053,8 +1151,8 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
if (domain == SVM_RANGE_VRAM_DOMAIN)
bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
- switch (adev->asic_type) {
- case CHIP_ARCTURUS:
+ switch (KFD_GC_VERSION(adev->kfd.dev)) {
+ case IP_VERSION(9, 4, 1):
if (domain == SVM_RANGE_VRAM_DOMAIN) {
if (bo_adev == adev) {
mapping_flags |= coherent ?
@@ -1070,7 +1168,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
}
break;
- case CHIP_ALDEBARAN:
+ case IP_VERSION(9, 4, 2):
if (domain == SVM_RANGE_VRAM_DOMAIN) {
if (bo_adev == adev) {
mapping_flags |= coherent ?
@@ -1117,23 +1215,33 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
pr_debug("[0x%llx 0x%llx]\n", start, last);
- return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL,
- start, last, init_pte_value, 0,
- NULL, NULL, fence, NULL);
+ return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start,
+ last, init_pte_value, 0, 0, NULL, NULL,
+ fence);
}
static int
svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
- unsigned long last)
+ unsigned long last, uint32_t trigger)
{
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
struct kfd_process_device *pdd;
struct dma_fence *fence = NULL;
- struct amdgpu_device *adev;
struct kfd_process *p;
uint32_t gpuidx;
int r = 0;
+ if (!prange->mapped_to_gpu) {
+ pr_debug("prange 0x%p [0x%lx 0x%lx] not mapped to GPU\n",
+ prange, prange->start, prange->last);
+ return 0;
+ }
+
+ if (prange->start == start && prange->last == last) {
+ pr_debug("unmap svms 0x%p prange 0x%p\n", prange->svms, prange);
+ prange->mapped_to_gpu = false;
+ }
+
bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
MAX_GPU_INSTANCE);
p = container_of(prange->svms, struct kfd_process, svms);
@@ -1145,9 +1253,12 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
pr_debug("failed to find device idx %d\n", gpuidx);
return -EINVAL;
}
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- r = svm_range_unmap_from_gpu(adev, drm_priv_to_vm(pdd->drm_priv),
+ kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid,
+ start, last, trigger);
+
+ r = svm_range_unmap_from_gpu(pdd->dev->adev,
+ drm_priv_to_vm(pdd->drm_priv),
start, last, &fence);
if (r)
break;
@@ -1159,21 +1270,20 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
if (r)
break;
}
- amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
- p->pasid, TLB_FLUSH_HEAVYWEIGHT);
+ kfd_flush_tlb(pdd, TLB_FLUSH_HEAVYWEIGHT);
}
return r;
}
static int
-svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- struct svm_range *prange, unsigned long offset,
- unsigned long npages, bool readonly, dma_addr_t *dma_addr,
- struct amdgpu_device *bo_adev, struct dma_fence **fence)
+svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
+ unsigned long offset, unsigned long npages, bool readonly,
+ dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
+ struct dma_fence **fence, bool flush_tlb)
{
- struct amdgpu_bo_va bo_va;
- bool table_freed = false;
+ struct amdgpu_device *adev = pdd->dev->adev;
+ struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
uint64_t pte_flags;
unsigned long last_start;
int last_domain;
@@ -1185,9 +1295,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms,
last_start, last_start + npages - 1, readonly);
- if (prange->svm_bo && prange->ttm_res)
- bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
-
for (i = offset; i < offset + npages; i++) {
last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
@@ -1211,13 +1318,12 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
(last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
pte_flags);
- r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
- NULL, last_start,
- prange->start + i, pte_flags,
- last_start - prange->start,
- NULL, dma_addr,
- &vm->last_update,
- &table_freed);
+ r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL,
+ last_start, prange->start + i,
+ pte_flags,
+ (last_start - prange->start) << PAGE_SHIFT,
+ bo_adev ? bo_adev->vm_manager.vram_base_offset : 0,
+ NULL, dma_addr, &vm->last_update);
for (j = last_start - prange->start; j <= i; j++)
dma_addr[j] |= last_domain;
@@ -1239,13 +1345,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (fence)
*fence = dma_fence_get(vm->last_update);
- if (table_freed) {
- struct kfd_process *p;
-
- p = container_of(prange->svms, struct kfd_process, svms);
- amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
- p->pasid, TLB_FLUSH_LEGACY);
- }
out:
return r;
}
@@ -1253,11 +1352,10 @@ out:
static int
svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
unsigned long npages, bool readonly,
- unsigned long *bitmap, bool wait)
+ unsigned long *bitmap, bool wait, bool flush_tlb)
{
struct kfd_process_device *pdd;
struct amdgpu_device *bo_adev;
- struct amdgpu_device *adev;
struct kfd_process *p;
struct dma_fence *fence = NULL;
uint32_t gpuidx;
@@ -1276,22 +1374,21 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
pr_debug("failed to find device idx %d\n", gpuidx);
return -EINVAL;
}
- adev = (struct amdgpu_device *)pdd->dev->kgd;
pdd = kfd_bind_process_to_device(pdd->dev, p);
if (IS_ERR(pdd))
return -EINVAL;
- if (bo_adev && adev != bo_adev &&
- !amdgpu_xgmi_same_hive(adev, bo_adev)) {
+ if (bo_adev && pdd->dev->adev != bo_adev &&
+ !amdgpu_xgmi_same_hive(pdd->dev->adev, bo_adev)) {
pr_debug("cannot map to device idx %d\n", gpuidx);
continue;
}
- r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv),
- prange, offset, npages, readonly,
+ r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly,
prange->dma_addr[gpuidx],
- bo_adev, wait ? &fence : NULL);
+ bo_adev, wait ? &fence : NULL,
+ flush_tlb);
if (r)
break;
@@ -1304,6 +1401,8 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
break;
}
}
+
+ kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
}
return r;
@@ -1313,7 +1412,7 @@ struct svm_validate_context {
struct kfd_process *process;
struct svm_range *prange;
bool intr;
- unsigned long bitmap[MAX_GPU_INSTANCE];
+ DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
struct ttm_validate_buffer tv[MAX_GPU_INSTANCE];
struct list_head validate_list;
struct ww_acquire_ctx ticket;
@@ -1322,7 +1421,6 @@ struct svm_validate_context {
static int svm_range_reserve_bos(struct svm_validate_context *ctx)
{
struct kfd_process_device *pdd;
- struct amdgpu_device *adev;
struct amdgpu_vm *vm;
uint32_t gpuidx;
int r;
@@ -1334,7 +1432,6 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)
pr_debug("failed to find device idx %d\n", gpuidx);
return -EINVAL;
}
- adev = (struct amdgpu_device *)pdd->dev->kgd;
vm = drm_priv_to_vm(pdd->drm_priv);
ctx->tv[gpuidx].bo = &vm->root.bo->tbo;
@@ -1356,9 +1453,9 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)
r = -EINVAL;
goto unreserve_out;
}
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- r = amdgpu_vm_validate_pt_bos(adev, drm_priv_to_vm(pdd->drm_priv),
+ r = amdgpu_vm_validate_pt_bos(pdd->dev->adev,
+ drm_priv_to_vm(pdd->drm_priv),
svm_range_bo_validate, NULL);
if (r) {
pr_debug("failed %d validate pt bos\n", r);
@@ -1381,12 +1478,10 @@ static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
{
struct kfd_process_device *pdd;
- struct amdgpu_device *adev;
pdd = kfd_process_device_from_gpuidx(p, gpuidx);
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- return SVM_ADEV_PGMAP_OWNER(adev);
+ return SVM_ADEV_PGMAP_OWNER(pdd->dev->adev);
}
/*
@@ -1414,8 +1509,8 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
* 5. Release page table (and SVM BO) reservation
*/
static int svm_range_validate_and_map(struct mm_struct *mm,
- struct svm_range *prange,
- int32_t gpuidx, bool intr, bool wait)
+ struct svm_range *prange, int32_t gpuidx,
+ bool intr, bool wait, bool flush_tlb)
{
struct svm_validate_context ctx;
unsigned long start, end, addr;
@@ -1454,8 +1549,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
prange->bitmap_aip, MAX_GPU_INSTANCE);
}
- if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE))
- return 0;
+ if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) {
+ if (!prange->mapped_to_gpu)
+ return 0;
+
+ bitmap_copy(ctx.bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
+ }
if (prange->actual_loc && !prange->ttm_res) {
/* This should never happen. actual_loc gets set by
@@ -1527,7 +1626,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
}
r = svm_range_map_to_gpus(prange, offset, npages, readonly,
- ctx.bitmap, wait);
+ ctx.bitmap, wait, flush_tlb);
unlock_out:
svm_range_unlock(prange);
@@ -1535,14 +1634,16 @@ unlock_out:
addr = next;
}
- if (addr == end)
+ if (addr == end) {
prange->validated_once = true;
+ prange->mapped_to_gpu = true;
+ }
unreserve_out:
svm_range_unreserve_bos(&ctx);
if (!r)
- prange->validate_timestamp = ktime_to_us(ktime_get());
+ prange->validate_timestamp = ktime_get_boottime();
return r;
}
@@ -1574,6 +1675,7 @@ retry_flush_work:
static void svm_range_restore_work(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
+ struct amdkfd_process_info *process_info;
struct svm_range_list *svms;
struct svm_range *prange;
struct kfd_process *p;
@@ -1589,14 +1691,17 @@ static void svm_range_restore_work(struct work_struct *work)
pr_debug("restore svm ranges\n");
- /* kfd_process_notifier_release destroys this worker thread. So during
- * the lifetime of this thread, kfd_process and mm will be valid.
- */
p = container_of(svms, struct kfd_process, svms);
- mm = p->mm;
- if (!mm)
+ process_info = p->kgd_process_info;
+
+ /* Keep mm reference when svm_range_validate_and_map ranges */
+ mm = get_task_mm(p->lead_thread);
+ if (!mm) {
+ pr_debug("svms 0x%p process mm gone\n", svms);
return;
+ }
+ mutex_lock(&process_info->lock);
svm_range_list_lock_and_flush_work(svms, mm);
mutex_lock(&svms->lock);
@@ -1617,7 +1722,7 @@ static void svm_range_restore_work(struct work_struct *work)
mutex_lock(&prange->migrate_mutex);
r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
- false, true);
+ false, true, false);
if (r)
pr_debug("failed %d to map 0x%lx to gpus\n", r,
prange->start);
@@ -1649,17 +1754,25 @@ static void svm_range_restore_work(struct work_struct *work)
out_reschedule:
mutex_unlock(&svms->lock);
mmap_write_unlock(mm);
+ mutex_unlock(&process_info->lock);
/* If validation failed, reschedule another attempt */
if (evicted_ranges) {
pr_debug("reschedule to restore svm range\n");
schedule_delayed_work(&svms->restore_work,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+
+ kfd_smi_event_queue_restore_rescheduled(mm);
}
+ mmput(mm);
}
/**
* svm_range_evict - evict svm range
+ * @prange: svm range structure
+ * @mm: current process mm_struct
+ * @start: starting process queue number
+ * @last: last process queue number
*
* Stop all queues of the process to ensure GPU doesn't access the memory, then
* return to let CPU evict the buffer and proceed CPU pagetable update.
@@ -1671,7 +1784,8 @@ out_reschedule:
*/
static int
svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
- unsigned long start, unsigned long last)
+ unsigned long start, unsigned long last,
+ enum mmu_notifier_event event)
{
struct svm_range_list *svms = prange->svms;
struct svm_range *pchild;
@@ -1683,10 +1797,15 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
svms, prange->start, prange->last, start, last);
- if (!p->xnack_enabled) {
+ if (!p->xnack_enabled ||
+ (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) {
int evicted_ranges;
+ bool mapped = prange->mapped_to_gpu;
list_for_each_entry(pchild, &prange->child_list, child_list) {
+ if (!pchild->mapped_to_gpu)
+ continue;
+ mapped = true;
mutex_lock_nested(&pchild->lock, 1);
if (pchild->start <= last && pchild->last >= start) {
pr_debug("increment pchild invalid [0x%lx 0x%lx]\n",
@@ -1696,6 +1815,9 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
mutex_unlock(&pchild->lock);
}
+ if (!mapped)
+ return r;
+
if (prange->start <= last && prange->last >= start)
atomic_inc(&prange->invalid);
@@ -1707,7 +1829,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
prange->svms, prange->start, prange->last);
/* First eviction, stop the queues */
- r = kgd2kfd_quiesce_mm(mm);
+ r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM);
if (r)
pr_debug("failed to quiesce KFD\n");
@@ -1716,6 +1838,12 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
} else {
unsigned long s, l;
+ uint32_t trigger;
+
+ if (event == MMU_NOTIFY_MIGRATE)
+ trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE;
+ else
+ trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY;
pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
prange->svms, start, last);
@@ -1724,13 +1852,13 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
s = max(start, pchild->start);
l = min(last, pchild->last);
if (l >= s)
- svm_range_unmap_from_gpus(pchild, s, l);
+ svm_range_unmap_from_gpus(pchild, s, l, trigger);
mutex_unlock(&pchild->lock);
}
s = max(start, prange->start);
l = min(last, prange->last);
if (l >= s)
- svm_range_unmap_from_gpus(prange, s, l);
+ svm_range_unmap_from_gpus(prange, s, l, trigger);
}
return r;
@@ -1740,7 +1868,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
{
struct svm_range *new;
- new = svm_range_new(old->svms, old->start, old->last);
+ new = svm_range_new(old->svms, old->start, old->last, false);
if (!new)
return NULL;
@@ -1757,91 +1885,144 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
new->prefetch_loc = old->prefetch_loc;
new->actual_loc = old->actual_loc;
new->granularity = old->granularity;
+ new->mapped_to_gpu = old->mapped_to_gpu;
bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
return new;
}
+void svm_range_set_max_pages(struct amdgpu_device *adev)
+{
+ uint64_t max_pages;
+ uint64_t pages, _pages;
+
+ /* 1/32 VRAM size in pages */
+ pages = adev->gmc.real_vram_size >> 17;
+ pages = clamp(pages, 1ULL << 9, 1ULL << 18);
+ pages = rounddown_pow_of_two(pages);
+ do {
+ max_pages = READ_ONCE(max_svm_range_pages);
+ _pages = min_not_zero(max_pages, pages);
+ } while (cmpxchg(&max_svm_range_pages, max_pages, _pages) != max_pages);
+}
+
+static int
+svm_range_split_new(struct svm_range_list *svms, uint64_t start, uint64_t last,
+ uint64_t max_pages, struct list_head *insert_list,
+ struct list_head *update_list)
+{
+ struct svm_range *prange;
+ uint64_t l;
+
+ pr_debug("max_svm_range_pages 0x%llx adding [0x%llx 0x%llx]\n",
+ max_pages, start, last);
+
+ while (last >= start) {
+ l = min(last, ALIGN_DOWN(start + max_pages, max_pages) - 1);
+
+ prange = svm_range_new(svms, start, l, true);
+ if (!prange)
+ return -ENOMEM;
+ list_add(&prange->list, insert_list);
+ list_add(&prange->update_list, update_list);
+
+ start = l + 1;
+ }
+ return 0;
+}
+
/**
- * svm_range_handle_overlap - split overlap ranges
- * @svms: svm range list header
- * @new: range added with this attributes
- * @start: range added start address, in pages
- * @last: range last address, in pages
- * @update_list: output, the ranges attributes are updated. For set_attr, this
- * will do validation and map to GPUs. For unmap, this will be
- * removed and unmap from GPUs
- * @insert_list: output, the ranges will be inserted into svms, attributes are
- * not changes. For set_attr, this will add into svms.
- * @remove_list:output, the ranges will be removed from svms
- * @left: the remaining range after overlap, For set_attr, this will be added
- * as new range.
+ * svm_range_add - add svm range and handle overlap
+ * @p: the range add to this process svms
+ * @start: page size aligned
+ * @size: page size aligned
+ * @nattr: number of attributes
+ * @attrs: array of attributes
+ * @update_list: output, the ranges need validate and update GPU mapping
+ * @insert_list: output, the ranges need insert to svms
+ * @remove_list: output, the ranges are replaced and need remove from svms
*
- * Total have 5 overlap cases.
+ * Check if the virtual address range has overlap with any existing ranges,
+ * split partly overlapping ranges and add new ranges in the gaps. All changes
+ * should be applied to the range_list and interval tree transactionally. If
+ * any range split or allocation fails, the entire update fails. Therefore any
+ * existing overlapping svm_ranges are cloned and the original svm_ranges left
+ * unchanged.
*
- * This function handles overlap of an address interval with existing
- * struct svm_ranges for applying new attributes. This may require
- * splitting existing struct svm_ranges. All changes should be applied to
- * the range_list and interval tree transactionally. If any split operation
- * fails, the entire update fails. Therefore the existing overlapping
- * svm_ranges are cloned and the original svm_ranges left unchanged. If the
- * transaction succeeds, the modified clones are added and the originals
- * freed. Otherwise the clones are removed and the old svm_ranges remain.
+ * If the transaction succeeds, the caller can update and insert clones and
+ * new ranges, then free the originals.
*
- * Context: The caller must hold svms->lock
+ * Otherwise the caller can free the clones and new ranges, while the old
+ * svm_ranges remain unchanged.
+ *
+ * Context: Process context, caller must hold svms->lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
*/
static int
-svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
- unsigned long start, unsigned long last,
- struct list_head *update_list,
- struct list_head *insert_list,
- struct list_head *remove_list,
- unsigned long *left)
+svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
+ struct list_head *update_list, struct list_head *insert_list,
+ struct list_head *remove_list)
{
+ unsigned long last = start + size - 1UL;
+ struct svm_range_list *svms = &p->svms;
struct interval_tree_node *node;
struct svm_range *prange;
struct svm_range *tmp;
+ struct list_head new_list;
int r = 0;
+ pr_debug("svms 0x%p [0x%llx 0x%lx]\n", &p->svms, start, last);
+
INIT_LIST_HEAD(update_list);
INIT_LIST_HEAD(insert_list);
INIT_LIST_HEAD(remove_list);
+ INIT_LIST_HEAD(&new_list);
node = interval_tree_iter_first(&svms->objects, start, last);
while (node) {
struct interval_tree_node *next;
- struct svm_range *old;
unsigned long next_start;
pr_debug("found overlap node [0x%lx 0x%lx]\n", node->start,
node->last);
- old = container_of(node, struct svm_range, it_node);
+ prange = container_of(node, struct svm_range, it_node);
next = interval_tree_iter_next(node, start, last);
next_start = min(node->last, last) + 1;
- if (node->start < start || node->last > last) {
- /* node intersects the updated range, clone+split it */
+ if (svm_range_is_same_attrs(p, prange, nattr, attrs)) {
+ /* nothing to do */
+ } else if (node->start < start || node->last > last) {
+ /* node intersects the update range and its attributes
+ * will change. Clone and split it, apply updates only
+ * to the overlapping part
+ */
+ struct svm_range *old = prange;
+
prange = svm_range_clone(old);
if (!prange) {
r = -ENOMEM;
goto out;
}
- list_add(&old->remove_list, remove_list);
- list_add(&prange->insert_list, insert_list);
+ list_add(&old->update_list, remove_list);
+ list_add(&prange->list, insert_list);
+ list_add(&prange->update_list, update_list);
if (node->start < start) {
pr_debug("change old range start\n");
- r = svm_range_split_head(prange, new, start,
+ r = svm_range_split_head(prange, start,
insert_list);
if (r)
goto out;
}
if (node->last > last) {
pr_debug("change old range last\n");
- r = svm_range_split_tail(prange, new, last,
+ r = svm_range_split_tail(prange, last,
insert_list);
if (r)
goto out;
@@ -1850,36 +2031,37 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
/* The node is contained within start..last,
* just update it
*/
- prange = old;
- }
-
- if (!svm_range_is_same_attrs(prange, new))
list_add(&prange->update_list, update_list);
+ }
/* insert a new node if needed */
if (node->start > start) {
- prange = svm_range_new(prange->svms, start,
- node->start - 1);
- if (!prange) {
- r = -ENOMEM;
+ r = svm_range_split_new(svms, start, node->start - 1,
+ READ_ONCE(max_svm_range_pages),
+ &new_list, update_list);
+ if (r)
goto out;
- }
-
- list_add(&prange->insert_list, insert_list);
- list_add(&prange->update_list, update_list);
}
node = next;
start = next_start;
}
- if (left && start <= last)
- *left = last - start + 1;
+ /* add a final range at the end if needed */
+ if (start <= last)
+ r = svm_range_split_new(svms, start, last,
+ READ_ONCE(max_svm_range_pages),
+ &new_list, update_list);
out:
- if (r)
- list_for_each_entry_safe(prange, tmp, insert_list, insert_list)
- svm_range_free(prange);
+ if (r) {
+ list_for_each_entry_safe(prange, tmp, insert_list, list)
+ svm_range_free(prange, false);
+ list_for_each_entry_safe(prange, tmp, &new_list, list)
+ svm_range_free(prange, true);
+ } else {
+ list_splice(&new_list, insert_list);
+ }
return r;
}
@@ -1913,10 +2095,9 @@ svm_range_update_notifier_and_interval_tree(struct mm_struct *mm,
}
static void
-svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
+svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange,
+ struct mm_struct *mm)
{
- struct mm_struct *mm = prange->work_item.mm;
-
switch (prange->work_item.op) {
case SVM_OP_NULL:
pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n",
@@ -1927,7 +2108,7 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
svms, prange, prange->start, prange->last);
svm_range_unlink(prange);
svm_range_remove_notifier(prange);
- svm_range_free(prange);
+ svm_range_free(prange, true);
break;
case SVM_OP_UPDATE_RANGE_NOTIFIER:
pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n",
@@ -1962,7 +2143,6 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
static void svm_range_drain_retry_fault(struct svm_range_list *svms)
{
struct kfd_process_device *pdd;
- struct amdgpu_device *adev;
struct kfd_process *p;
int drain;
uint32_t i;
@@ -1980,9 +2160,9 @@ restart:
continue;
pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1);
+ amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
+ &pdd->dev->adev->irq.ih1);
pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
}
if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain)
@@ -1994,40 +2174,44 @@ static void svm_range_deferred_list_work(struct work_struct *work)
struct svm_range_list *svms;
struct svm_range *prange;
struct mm_struct *mm;
- struct kfd_process *p;
svms = container_of(work, struct svm_range_list, deferred_list_work);
pr_debug("enter svms 0x%p\n", svms);
- p = container_of(svms, struct kfd_process, svms);
- /* Avoid mm is gone when inserting mmu notifier */
- mm = get_task_mm(p->lead_thread);
- if (!mm) {
- pr_debug("svms 0x%p process mm gone\n", svms);
- return;
- }
-retry:
- mmap_write_lock(mm);
-
- /* Checking for the need to drain retry faults must be inside
- * mmap write lock to serialize with munmap notifiers.
- */
- if (unlikely(atomic_read(&svms->drain_pagefaults))) {
- mmap_write_unlock(mm);
- svm_range_drain_retry_fault(svms);
- goto retry;
- }
-
spin_lock(&svms->deferred_list_lock);
while (!list_empty(&svms->deferred_range_list)) {
prange = list_first_entry(&svms->deferred_range_list,
struct svm_range, deferred_list);
- list_del_init(&prange->deferred_list);
spin_unlock(&svms->deferred_list_lock);
pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
prange->start, prange->last, prange->work_item.op);
+ mm = prange->work_item.mm;
+retry:
+ mmap_write_lock(mm);
+
+ /* Checking for the need to drain retry faults must be inside
+ * mmap write lock to serialize with munmap notifiers.
+ */
+ if (unlikely(atomic_read(&svms->drain_pagefaults))) {
+ mmap_write_unlock(mm);
+ svm_range_drain_retry_fault(svms);
+ goto retry;
+ }
+
+ /* Remove from deferred_list must be inside mmap write lock, for
+ * two race cases:
+ * 1. unmap_from_cpu may change work_item.op and add the range
+ * to deferred_list again, cause use after free bug.
+ * 2. svm_range_list_lock_and_flush_work may hold mmap write
+ * lock and continue because deferred_list is empty, but
+ * deferred_list work is actually waiting for mmap lock.
+ */
+ spin_lock(&svms->deferred_list_lock);
+ list_del_init(&prange->deferred_list);
+ spin_unlock(&svms->deferred_list_lock);
+
mutex_lock(&svms->lock);
mutex_lock(&prange->migrate_mutex);
while (!list_empty(&prange->child_list)) {
@@ -2038,19 +2222,20 @@ retry:
pr_debug("child prange 0x%p op %d\n", pchild,
pchild->work_item.op);
list_del_init(&pchild->child_list);
- svm_range_handle_list_op(svms, pchild);
+ svm_range_handle_list_op(svms, pchild, mm);
}
mutex_unlock(&prange->migrate_mutex);
- svm_range_handle_list_op(svms, prange);
+ svm_range_handle_list_op(svms, prange, mm);
mutex_unlock(&svms->lock);
+ mmap_write_unlock(mm);
+
+ /* Pairs with mmget in svm_range_add_list_work */
+ mmput(mm);
spin_lock(&svms->deferred_list_lock);
}
spin_unlock(&svms->deferred_list_lock);
-
- mmap_write_unlock(mm);
- mmput(mm);
pr_debug("exit svms 0x%p\n", svms);
}
@@ -2068,6 +2253,9 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
prange->work_item.op = op;
} else {
prange->work_item.op = op;
+
+ /* Pairs with mmput in deferred_list_work */
+ mmget(mm);
prange->work_item.mm = mm;
list_add_tail(&prange->deferred_list,
&prange->svms->deferred_range_list);
@@ -2123,6 +2311,7 @@ static void
svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
unsigned long start, unsigned long last)
{
+ uint32_t trigger = KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU;
struct svm_range_list *svms;
struct svm_range *pchild;
struct kfd_process *p;
@@ -2150,14 +2339,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
s = max(start, pchild->start);
l = min(last, pchild->last);
if (l >= s)
- svm_range_unmap_from_gpus(pchild, s, l);
+ svm_range_unmap_from_gpus(pchild, s, l, trigger);
svm_range_unmap_split(mm, prange, pchild, start, last);
mutex_unlock(&pchild->lock);
}
s = max(start, prange->start);
l = min(last, prange->last);
if (l >= s)
- svm_range_unmap_from_gpus(prange, s, l);
+ svm_range_unmap_from_gpus(prange, s, l, trigger);
svm_range_unmap_split(mm, prange, prange, start, last);
if (unmap_parent)
@@ -2172,6 +2361,9 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
/**
* svm_range_cpu_invalidate_pagetables - interval notifier callback
+ * @mni: mmu_interval_notifier struct
+ * @range: mmu_notifier_range struct
+ * @cur_seq: value to pass to mmu_interval_set_seq()
*
* If event is MMU_NOTIFY_UNMAP, this is from CPU unmap range, otherwise, it
* is from migration, or CPU page invalidation callback.
@@ -2198,11 +2390,13 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
if (range->event == MMU_NOTIFY_RELEASE)
return true;
+ if (!mmget_not_zero(mni->mm))
+ return true;
start = mni->interval_tree.start;
last = mni->interval_tree.last;
- start = (start > range->start ? start : range->start) >> PAGE_SHIFT;
- last = (last < (range->end - 1) ? last : range->end - 1) >> PAGE_SHIFT;
+ start = max(start, range->start) >> PAGE_SHIFT;
+ last = min(last, range->end - 1) >> PAGE_SHIFT;
pr_debug("[0x%lx 0x%lx] range[0x%lx 0x%lx] notifier[0x%lx 0x%lx] %d\n",
start, last, range->start >> PAGE_SHIFT,
(range->end - 1) >> PAGE_SHIFT,
@@ -2219,11 +2413,12 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
svm_range_unmap_from_cpu(mni->mm, prange, start, last);
break;
default:
- svm_range_evict(prange, mni->mm, start, last);
+ svm_range_evict(prange, mni->mm, start, last, range->event);
break;
}
svm_range_unlock(prange);
+ mmput(mni->mm);
return true;
}
@@ -2304,7 +2499,7 @@ svm_range_best_restore_location(struct svm_range *prange,
p = container_of(prange->svms, struct kfd_process, svms);
- r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, gpuidx);
+ r = kfd_process_gpuid_from_adev(p, adev, &gpuid, gpuidx);
if (r < 0) {
pr_debug("failed to get gpuid from kgd\n");
return -1;
@@ -2476,14 +2671,14 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
last = addr;
}
- prange = svm_range_new(&p->svms, start, last);
+ prange = svm_range_new(&p->svms, start, last, true);
if (!prange) {
pr_debug("Failed to create prange in address [0x%llx]\n", addr);
return NULL;
}
- if (kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx)) {
+ if (kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx)) {
pr_debug("failed to get gpuid from kgd\n");
- svm_range_free(prange);
+ svm_range_free(prange, true);
return NULL;
}
@@ -2548,7 +2743,7 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
uint32_t gpuid;
int r;
- r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx);
+ r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx);
if (r < 0)
return;
}
@@ -2582,11 +2777,12 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
struct svm_range_list *svms;
struct svm_range *prange;
struct kfd_process *p;
- uint64_t timestamp;
+ ktime_t timestamp = ktime_get_boottime();
int32_t best_loc;
int32_t gpuidx = MAX_GPU_INSTANCE;
bool write_locked = false;
struct vm_area_struct *vma;
+ bool migration = false;
int r = 0;
if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
@@ -2599,11 +2795,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
pr_debug("kfd process not founded pasid 0x%x\n", pasid);
return 0;
}
- if (!p->xnack_enabled) {
- pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
- r = -EFAULT;
- goto out;
- }
svms = &p->svms;
pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
@@ -2614,6 +2805,12 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
goto out;
}
+ if (!p->xnack_enabled) {
+ pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
+ r = -EFAULT;
+ goto out;
+ }
+
/* p->lead_thread is available as kfd_process_wq_release flush the work
* before releasing task ref.
*/
@@ -2662,9 +2859,9 @@ retry_write_locked:
goto out_unlock_range;
}
- timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
/* skip duplicate vm fault on different pages of same range */
- if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
+ if (ktime_before(timestamp, ktime_add_ns(prange->validate_timestamp,
+ AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING))) {
pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
svms, prange->start, prange->last);
r = 0;
@@ -2700,9 +2897,14 @@ retry_write_locked:
svms, prange->start, prange->last, best_loc,
prange->actual_loc);
+ kfd_smi_event_page_fault_start(adev->kfd.dev, p->lead_thread->pid, addr,
+ write_fault, timestamp);
+
if (prange->actual_loc != best_loc) {
+ migration = true;
if (best_loc) {
- r = svm_migrate_to_vram(prange, best_loc, mm);
+ r = svm_migrate_to_vram(prange, best_loc, mm,
+ KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
if (r) {
pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
r, addr);
@@ -2710,12 +2912,16 @@ retry_write_locked:
* VRAM failed
*/
if (prange->actual_loc)
- r = svm_migrate_vram_to_ram(prange, mm);
+ r = svm_migrate_vram_to_ram(prange, mm,
+ KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
+ NULL);
else
r = 0;
}
} else {
- r = svm_migrate_vram_to_ram(prange, mm);
+ r = svm_migrate_vram_to_ram(prange, mm,
+ KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
+ NULL);
}
if (r) {
pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
@@ -2724,11 +2930,14 @@ retry_write_locked:
}
}
- r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
+ r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
if (r)
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
r, svms, prange->start, prange->last);
+ kfd_smi_event_page_fault_end(adev->kfd.dev, p->lead_thread->pid, addr,
+ migration);
+
out_unlock_range:
mutex_unlock(&prange->migrate_mutex);
out_unlock_svms:
@@ -2749,6 +2958,64 @@ out:
return r;
}
+int
+svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled)
+{
+ struct svm_range *prange, *pchild;
+ uint64_t reserved_size = 0;
+ uint64_t size;
+ int r = 0;
+
+ pr_debug("switching xnack from %d to %d\n", p->xnack_enabled, xnack_enabled);
+
+ mutex_lock(&p->svms.lock);
+
+ list_for_each_entry(prange, &p->svms.list, list) {
+ svm_range_lock(prange);
+ list_for_each_entry(pchild, &prange->child_list, child_list) {
+ size = (pchild->last - pchild->start + 1) << PAGE_SHIFT;
+ if (xnack_enabled) {
+ amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ } else {
+ r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ if (r)
+ goto out_unlock;
+ reserved_size += size;
+ }
+ }
+
+ size = (prange->last - prange->start + 1) << PAGE_SHIFT;
+ if (xnack_enabled) {
+ amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ } else {
+ r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ if (r)
+ goto out_unlock;
+ reserved_size += size;
+ }
+out_unlock:
+ svm_range_unlock(prange);
+ if (r)
+ break;
+ }
+
+ if (r)
+ amdgpu_amdkfd_unreserve_mem_limit(NULL, reserved_size,
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+ else
+ /* Change xnack mode must be inside svms lock, to avoid race with
+ * svm_range_deferred_list_work unreserve memory in parallel.
+ */
+ p->xnack_enabled = xnack_enabled;
+
+ mutex_unlock(&p->svms.lock);
+ return r;
+}
+
void svm_range_list_fini(struct kfd_process *p)
{
struct svm_range *prange;
@@ -2756,6 +3023,8 @@ void svm_range_list_fini(struct kfd_process *p)
pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms);
+ cancel_delayed_work_sync(&p->svms.restore_work);
+
/* Ensure list work is finished before process is destroyed */
flush_work(&p->svms.deferred_list_work);
@@ -2766,11 +3035,10 @@ void svm_range_list_fini(struct kfd_process *p)
atomic_inc(&p->svms.drain_pagefaults);
svm_range_drain_retry_fault(&p->svms);
-
list_for_each_entry_safe(prange, next, &p->svms.list, list) {
svm_range_unlink(prange);
svm_range_remove_notifier(prange);
- svm_range_free(prange);
+ svm_range_free(prange, true);
}
mutex_destroy(&p->svms.lock);
@@ -2791,6 +3059,7 @@ int svm_range_list_init(struct kfd_process *p)
INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
INIT_LIST_HEAD(&svms->deferred_range_list);
+ INIT_LIST_HEAD(&svms->criu_svm_metadata_list);
spin_lock_init(&svms->deferred_list_lock);
for (i = 0; i < p->n_pdds; i++)
@@ -2895,59 +3164,6 @@ svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size)
}
/**
- * svm_range_add - add svm range and handle overlap
- * @p: the range add to this process svms
- * @start: page size aligned
- * @size: page size aligned
- * @nattr: number of attributes
- * @attrs: array of attributes
- * @update_list: output, the ranges need validate and update GPU mapping
- * @insert_list: output, the ranges need insert to svms
- * @remove_list: output, the ranges are replaced and need remove from svms
- *
- * Check if the virtual address range has overlap with the registered ranges,
- * split the overlapped range, copy and adjust pages address and vram nodes in
- * old and new ranges.
- *
- * Context: Process context, caller must hold svms->lock
- *
- * Return:
- * 0 - OK, otherwise error code
- */
-static int
-svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
- uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
- struct list_head *update_list, struct list_head *insert_list,
- struct list_head *remove_list)
-{
- uint64_t last = start + size - 1UL;
- struct svm_range_list *svms;
- struct svm_range new = {0};
- struct svm_range *prange;
- unsigned long left = 0;
- int r = 0;
-
- pr_debug("svms 0x%p [0x%llx 0x%llx]\n", &p->svms, start, last);
-
- svm_range_apply_attrs(p, &new, nattr, attrs);
-
- svms = &p->svms;
-
- r = svm_range_handle_overlap(svms, &new, start, last, update_list,
- insert_list, remove_list, &left);
- if (r)
- return r;
-
- if (left) {
- prange = svm_range_new(svms, last - left + 1, last);
- list_add(&prange->insert_list, insert_list);
- list_add(&prange->update_list, update_list);
- }
-
- return 0;
-}
-
-/**
* svm_range_best_prefetch_location - decide the best prefetch location
* @prange: svm range structure
*
@@ -2980,7 +3196,6 @@ svm_range_best_prefetch_location(struct svm_range *prange)
uint32_t best_loc = prange->prefetch_loc;
struct kfd_process_device *pdd;
struct amdgpu_device *bo_adev;
- struct amdgpu_device *adev;
struct kfd_process *p;
uint32_t gpuidx;
@@ -3008,12 +3223,11 @@ svm_range_best_prefetch_location(struct svm_range *prange)
pr_debug("failed to get device by idx 0x%x\n", gpuidx);
continue;
}
- adev = (struct amdgpu_device *)pdd->dev->kgd;
- if (adev == bo_adev)
+ if (pdd->dev->adev == bo_adev)
continue;
- if (!amdgpu_xgmi_same_hive(adev, bo_adev)) {
+ if (!amdgpu_xgmi_same_hive(pdd->dev->adev, bo_adev)) {
best_loc = 0;
break;
}
@@ -3027,28 +3241,6 @@ out:
return best_loc;
}
-/* FIXME: This is a workaround for page locking bug when some pages are
- * invalid during migration to VRAM
- */
-void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm,
- void *owner)
-{
- struct hmm_range *hmm_range;
- int r;
-
- if (prange->validated_once)
- return;
-
- r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
- prange->start << PAGE_SHIFT,
- prange->npages, &hmm_range,
- false, true, owner);
- if (!r) {
- amdgpu_hmm_range_get_pages_done(hmm_range);
- prange->validated_once = true;
- }
-}
-
/* svm_range_trigger_migration - start page migration if prefetch loc changed
* @mm: current process mm_struct
* @prange: svm range structure
@@ -3088,12 +3280,13 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
return 0;
if (!best_loc) {
- r = svm_migrate_vram_to_ram(prange, mm);
+ r = svm_migrate_vram_to_ram(prange, mm,
+ KFD_MIGRATE_TRIGGER_PREFETCH, NULL);
*migrated = !r;
return r;
}
- r = svm_migrate_to_vram(prange, best_loc, mm);
+ r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
*migrated = !r;
return r;
@@ -3118,24 +3311,23 @@ int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
static void svm_range_evict_svm_bo_worker(struct work_struct *work)
{
struct svm_range_bo *svm_bo;
- struct kfd_process *p;
struct mm_struct *mm;
+ int r = 0;
svm_bo = container_of(work, struct svm_range_bo, eviction_work);
if (!svm_bo_ref_unless_zero(svm_bo))
return; /* svm_bo was freed while eviction was pending */
- /* svm_range_bo_release destroys this worker thread. So during
- * the lifetime of this thread, kfd_process and mm will be valid.
- */
- p = container_of(svm_bo->svms, struct kfd_process, svms);
- mm = p->mm;
- if (!mm)
+ if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+ mm = svm_bo->eviction_fence->mm;
+ } else {
+ svm_range_bo_unref(svm_bo);
return;
+ }
mmap_read_lock(mm);
spin_lock(&svm_bo->list_lock);
- while (!list_empty(&svm_bo->range_list)) {
+ while (!list_empty(&svm_bo->range_list) && !r) {
struct svm_range *prange =
list_first_entry(&svm_bo->range_list,
struct svm_range, svm_bo_list);
@@ -3149,41 +3341,49 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
mutex_lock(&prange->migrate_mutex);
do {
- svm_migrate_vram_to_ram(prange,
- svm_bo->eviction_fence->mm);
- } while (prange->actual_loc && --retries);
- WARN(prange->actual_loc, "Migration failed during eviction");
+ r = svm_migrate_vram_to_ram(prange, mm,
+ KFD_MIGRATE_TRIGGER_TTM_EVICTION, NULL);
+ } while (!r && prange->actual_loc && --retries);
- mutex_lock(&prange->lock);
- prange->svm_bo = NULL;
- mutex_unlock(&prange->lock);
+ if (!r && prange->actual_loc)
+ pr_info_once("Migration failed during eviction");
+ if (!prange->actual_loc) {
+ mutex_lock(&prange->lock);
+ prange->svm_bo = NULL;
+ mutex_unlock(&prange->lock);
+ }
mutex_unlock(&prange->migrate_mutex);
spin_lock(&svm_bo->list_lock);
}
spin_unlock(&svm_bo->list_lock);
mmap_read_unlock(mm);
+ mmput(mm);
dma_fence_signal(&svm_bo->eviction_fence->base);
+
/* This is the last reference to svm_bo, after svm_range_vram_node_free
* has been called in svm_migrate_vram_to_ram
*/
- WARN_ONCE(kref_read(&svm_bo->kref) != 1, "This was not the last reference\n");
+ WARN_ONCE(!r && kref_read(&svm_bo->kref) != 1, "This was not the last reference\n");
svm_range_bo_unref(svm_bo);
}
static int
-svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
- uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
+ uint64_t start, uint64_t size, uint32_t nattr,
+ struct kfd_ioctl_svm_attribute *attrs)
{
- struct mm_struct *mm = current->mm;
+ struct amdkfd_process_info *process_info = p->kgd_process_info;
struct list_head update_list;
struct list_head insert_list;
struct list_head remove_list;
struct svm_range_list *svms;
struct svm_range *prange;
struct svm_range *next;
+ bool update_mapping = false;
+ bool flush_tlb;
int r = 0;
pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
@@ -3195,6 +3395,8 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
svms = &p->svms;
+ mutex_lock(&process_info->lock);
+
svm_range_list_lock_and_flush_work(svms, mm);
r = svm_range_is_valid(p, start, size);
@@ -3215,22 +3417,21 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
goto out;
}
/* Apply changes as a transaction */
- list_for_each_entry_safe(prange, next, &insert_list, insert_list) {
+ list_for_each_entry_safe(prange, next, &insert_list, list) {
svm_range_add_to_svms(prange);
svm_range_add_notifier_locked(mm, prange);
}
list_for_each_entry(prange, &update_list, update_list) {
- svm_range_apply_attrs(p, prange, nattr, attrs);
+ svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping);
/* TODO: unmap ranges from GPU that lost access */
}
- list_for_each_entry_safe(prange, next, &remove_list,
- remove_list) {
+ list_for_each_entry_safe(prange, next, &remove_list, update_list) {
pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n",
prange->svms, prange, prange->start,
prange->last);
svm_range_unlink(prange);
svm_range_remove_notifier(prange);
- svm_range_free(prange);
+ svm_range_free(prange, false);
}
mmap_write_downgrade(mm);
@@ -3248,14 +3449,23 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
if (r)
goto out_unlock_range;
- if (migrated && !p->xnack_enabled) {
+ if (migrated && (!p->xnack_enabled ||
+ (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) &&
+ prange->mapped_to_gpu) {
pr_debug("restore_work will update mappings of GPUs\n");
mutex_unlock(&prange->migrate_mutex);
continue;
}
+ if (!migrated && !update_mapping) {
+ mutex_unlock(&prange->migrate_mutex);
+ continue;
+ }
+
+ flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
+
r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
- true, true);
+ true, true, flush_tlb);
if (r)
pr_debug("failed %d to map svm range\n", r);
@@ -3270,6 +3480,8 @@ out_unlock_range:
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);
out:
+ mutex_unlock(&process_info->lock);
+
pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
&p->svms, start, start + size - 1, r);
@@ -3277,8 +3489,9 @@ out:
}
static int
-svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
- uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm,
+ uint64_t start, uint64_t size, uint32_t nattr,
+ struct kfd_ioctl_svm_attribute *attrs)
{
DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
@@ -3288,7 +3501,6 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
bool get_accessible = false;
bool get_flags = false;
uint64_t last = start + size - 1UL;
- struct mm_struct *mm = current->mm;
uint8_t granularity = 0xff;
struct interval_tree_node *node;
struct svm_range_list *svms;
@@ -3453,10 +3665,321 @@ fill_values:
return 0;
}
+int kfd_criu_resume_svm(struct kfd_process *p)
+{
+ struct kfd_ioctl_svm_attribute *set_attr_new, *set_attr = NULL;
+ int nattr_common = 4, nattr_accessibility = 1;
+ struct criu_svm_metadata *criu_svm_md = NULL;
+ struct svm_range_list *svms = &p->svms;
+ struct criu_svm_metadata *next = NULL;
+ uint32_t set_flags = 0xffffffff;
+ int i, j, num_attrs, ret = 0;
+ uint64_t set_attr_size;
+ struct mm_struct *mm;
+
+ if (list_empty(&svms->criu_svm_metadata_list)) {
+ pr_debug("No SVM data from CRIU restore stage 2\n");
+ return ret;
+ }
+
+ mm = get_task_mm(p->lead_thread);
+ if (!mm) {
+ pr_err("failed to get mm for the target process\n");
+ return -ESRCH;
+ }
+
+ num_attrs = nattr_common + (nattr_accessibility * p->n_pdds);
+
+ i = j = 0;
+ list_for_each_entry(criu_svm_md, &svms->criu_svm_metadata_list, list) {
+ pr_debug("criu_svm_md[%d]\n\tstart: 0x%llx size: 0x%llx (npages)\n",
+ i, criu_svm_md->data.start_addr, criu_svm_md->data.size);
+
+ for (j = 0; j < num_attrs; j++) {
+ pr_debug("\ncriu_svm_md[%d]->attrs[%d].type : 0x%x\ncriu_svm_md[%d]->attrs[%d].value : 0x%x\n",
+ i, j, criu_svm_md->data.attrs[j].type,
+ i, j, criu_svm_md->data.attrs[j].value);
+ switch (criu_svm_md->data.attrs[j].type) {
+ /* During Checkpoint operation, the query for
+ * KFD_IOCTL_SVM_ATTR_PREFETCH_LOC attribute might
+ * return KFD_IOCTL_SVM_LOCATION_UNDEFINED if they were
+ * not used by the range which was checkpointed. Care
+ * must be taken to not restore with an invalid value
+ * otherwise the gpuidx value will be invalid and
+ * set_attr would eventually fail so just replace those
+ * with another dummy attribute such as
+ * KFD_IOCTL_SVM_ATTR_SET_FLAGS.
+ */
+ case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+ if (criu_svm_md->data.attrs[j].value ==
+ KFD_IOCTL_SVM_LOCATION_UNDEFINED) {
+ criu_svm_md->data.attrs[j].type =
+ KFD_IOCTL_SVM_ATTR_SET_FLAGS;
+ criu_svm_md->data.attrs[j].value = 0;
+ }
+ break;
+ case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+ set_flags = criu_svm_md->data.attrs[j].value;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* CLR_FLAGS is not available via get_attr during checkpoint but
+ * it needs to be inserted before restoring the ranges so
+ * allocate extra space for it before calling set_attr
+ */
+ set_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
+ (num_attrs + 1);
+ set_attr_new = krealloc(set_attr, set_attr_size,
+ GFP_KERNEL);
+ if (!set_attr_new) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+ set_attr = set_attr_new;
+
+ memcpy(set_attr, criu_svm_md->data.attrs, num_attrs *
+ sizeof(struct kfd_ioctl_svm_attribute));
+ set_attr[num_attrs].type = KFD_IOCTL_SVM_ATTR_CLR_FLAGS;
+ set_attr[num_attrs].value = ~set_flags;
+
+ ret = svm_range_set_attr(p, mm, criu_svm_md->data.start_addr,
+ criu_svm_md->data.size, num_attrs + 1,
+ set_attr);
+ if (ret) {
+ pr_err("CRIU: failed to set range attributes\n");
+ goto exit;
+ }
+
+ i++;
+ }
+exit:
+ kfree(set_attr);
+ list_for_each_entry_safe(criu_svm_md, next, &svms->criu_svm_metadata_list, list) {
+ pr_debug("freeing criu_svm_md[]\n\tstart: 0x%llx\n",
+ criu_svm_md->data.start_addr);
+ kfree(criu_svm_md);
+ }
+
+ mmput(mm);
+ return ret;
+
+}
+
+int kfd_criu_restore_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_ptr,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size)
+{
+ uint64_t svm_priv_data_size, svm_object_md_size, svm_attrs_size;
+ int nattr_common = 4, nattr_accessibility = 1;
+ struct criu_svm_metadata *criu_svm_md = NULL;
+ struct svm_range_list *svms = &p->svms;
+ uint32_t num_devices;
+ int ret = 0;
+
+ num_devices = p->n_pdds;
+ /* Handle one SVM range object at a time, also the number of gpus are
+ * assumed to be same on the restore node, checking must be done while
+ * evaluating the topology earlier
+ */
+
+ svm_attrs_size = sizeof(struct kfd_ioctl_svm_attribute) *
+ (nattr_common + nattr_accessibility * num_devices);
+ svm_object_md_size = sizeof(struct criu_svm_metadata) + svm_attrs_size;
+
+ svm_priv_data_size = sizeof(struct kfd_criu_svm_range_priv_data) +
+ svm_attrs_size;
+
+ criu_svm_md = kzalloc(svm_object_md_size, GFP_KERNEL);
+ if (!criu_svm_md) {
+ pr_err("failed to allocate memory to store svm metadata\n");
+ return -ENOMEM;
+ }
+ if (*priv_data_offset + svm_priv_data_size > max_priv_data_size) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ ret = copy_from_user(&criu_svm_md->data, user_priv_ptr + *priv_data_offset,
+ svm_priv_data_size);
+ if (ret) {
+ ret = -EFAULT;
+ goto exit;
+ }
+ *priv_data_offset += svm_priv_data_size;
+
+ list_add_tail(&criu_svm_md->list, &svms->criu_svm_metadata_list);
+
+ return 0;
+
+
+exit:
+ kfree(criu_svm_md);
+ return ret;
+}
+
+int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size)
+{
+ uint64_t total_size, accessibility_size, common_attr_size;
+ int nattr_common = 4, nattr_accessibility = 1;
+ int num_devices = p->n_pdds;
+ struct svm_range_list *svms;
+ struct svm_range *prange;
+ uint32_t count = 0;
+
+ *svm_priv_data_size = 0;
+
+ svms = &p->svms;
+ if (!svms)
+ return -EINVAL;
+
+ mutex_lock(&svms->lock);
+ list_for_each_entry(prange, &svms->list, list) {
+ pr_debug("prange: 0x%p start: 0x%lx\t npages: 0x%llx\t end: 0x%llx\n",
+ prange, prange->start, prange->npages,
+ prange->start + prange->npages - 1);
+ count++;
+ }
+ mutex_unlock(&svms->lock);
+
+ *num_svm_ranges = count;
+ /* Only the accessbility attributes need to be queried for all the gpus
+ * individually, remaining ones are spanned across the entire process
+ * regardless of the various gpu nodes. Of the remaining attributes,
+ * KFD_IOCTL_SVM_ATTR_CLR_FLAGS need not be saved.
+ *
+ * KFD_IOCTL_SVM_ATTR_PREFERRED_LOC
+ * KFD_IOCTL_SVM_ATTR_PREFETCH_LOC
+ * KFD_IOCTL_SVM_ATTR_SET_FLAGS
+ * KFD_IOCTL_SVM_ATTR_GRANULARITY
+ *
+ * ** ACCESSBILITY ATTRIBUTES **
+ * (Considered as one, type is altered during query, value is gpuid)
+ * KFD_IOCTL_SVM_ATTR_ACCESS
+ * KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE
+ * KFD_IOCTL_SVM_ATTR_NO_ACCESS
+ */
+ if (*num_svm_ranges > 0) {
+ common_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
+ nattr_common;
+ accessibility_size = sizeof(struct kfd_ioctl_svm_attribute) *
+ nattr_accessibility * num_devices;
+
+ total_size = sizeof(struct kfd_criu_svm_range_priv_data) +
+ common_attr_size + accessibility_size;
+
+ *svm_priv_data_size = *num_svm_ranges * total_size;
+ }
+
+ pr_debug("num_svm_ranges %u total_priv_size %llu\n", *num_svm_ranges,
+ *svm_priv_data_size);
+ return 0;
+}
+
+int kfd_criu_checkpoint_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset)
+{
+ struct kfd_criu_svm_range_priv_data *svm_priv = NULL;
+ struct kfd_ioctl_svm_attribute *query_attr = NULL;
+ uint64_t svm_priv_data_size, query_attr_size = 0;
+ int index, nattr_common = 4, ret = 0;
+ struct svm_range_list *svms;
+ int num_devices = p->n_pdds;
+ struct svm_range *prange;
+ struct mm_struct *mm;
+
+ svms = &p->svms;
+ if (!svms)
+ return -EINVAL;
+
+ mm = get_task_mm(p->lead_thread);
+ if (!mm) {
+ pr_err("failed to get mm for the target process\n");
+ return -ESRCH;
+ }
+
+ query_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
+ (nattr_common + num_devices);
+
+ query_attr = kzalloc(query_attr_size, GFP_KERNEL);
+ if (!query_attr) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ query_attr[0].type = KFD_IOCTL_SVM_ATTR_PREFERRED_LOC;
+ query_attr[1].type = KFD_IOCTL_SVM_ATTR_PREFETCH_LOC;
+ query_attr[2].type = KFD_IOCTL_SVM_ATTR_SET_FLAGS;
+ query_attr[3].type = KFD_IOCTL_SVM_ATTR_GRANULARITY;
+
+ for (index = 0; index < num_devices; index++) {
+ struct kfd_process_device *pdd = p->pdds[index];
+
+ query_attr[index + nattr_common].type =
+ KFD_IOCTL_SVM_ATTR_ACCESS;
+ query_attr[index + nattr_common].value = pdd->user_gpu_id;
+ }
+
+ svm_priv_data_size = sizeof(*svm_priv) + query_attr_size;
+
+ svm_priv = kzalloc(svm_priv_data_size, GFP_KERNEL);
+ if (!svm_priv) {
+ ret = -ENOMEM;
+ goto exit_query;
+ }
+
+ index = 0;
+ list_for_each_entry(prange, &svms->list, list) {
+
+ svm_priv->object_type = KFD_CRIU_OBJECT_TYPE_SVM_RANGE;
+ svm_priv->start_addr = prange->start;
+ svm_priv->size = prange->npages;
+ memcpy(&svm_priv->attrs, query_attr, query_attr_size);
+ pr_debug("CRIU: prange: 0x%p start: 0x%lx\t npages: 0x%llx end: 0x%llx\t size: 0x%llx\n",
+ prange, prange->start, prange->npages,
+ prange->start + prange->npages - 1,
+ prange->npages * PAGE_SIZE);
+
+ ret = svm_range_get_attr(p, mm, svm_priv->start_addr,
+ svm_priv->size,
+ (nattr_common + num_devices),
+ svm_priv->attrs);
+ if (ret) {
+ pr_err("CRIU: failed to obtain range attributes\n");
+ goto exit_priv;
+ }
+
+ if (copy_to_user(user_priv_data + *priv_data_offset, svm_priv,
+ svm_priv_data_size)) {
+ pr_err("Failed to copy svm priv to user\n");
+ ret = -EFAULT;
+ goto exit_priv;
+ }
+
+ *priv_data_offset += svm_priv_data_size;
+
+ }
+
+
+exit_priv:
+ kfree(svm_priv);
+exit_query:
+ kfree(query_attr);
+exit:
+ mmput(mm);
+ return ret;
+}
+
int
svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs)
{
+ struct mm_struct *mm = current->mm;
int r;
start >>= PAGE_SHIFT;
@@ -3464,10 +3987,10 @@ svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
switch (op) {
case KFD_IOCTL_SVM_OP_SET_ATTR:
- r = svm_range_set_attr(p, start, size, nattrs, attrs);
+ r = svm_range_set_attr(p, mm, start, size, nattrs, attrs);
break;
case KFD_IOCTL_SVM_OP_GET_ATTR:
- r = svm_range_get_attr(p, start, size, nattrs, attrs);
+ r = svm_range_get_attr(p, mm, start, size, nattrs, attrs);
break;
default:
r = EINVAL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 6dc91c33e80f..7a33b93f9df6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -46,8 +46,8 @@ struct svm_range_bo {
spinlock_t list_lock;
struct amdgpu_amdkfd_fence *eviction_fence;
struct work_struct eviction_work;
- struct svm_range_list *svms;
uint32_t evicting;
+ struct work_struct release_work;
};
enum svm_work_list_ops {
@@ -75,8 +75,6 @@ struct svm_work_list_item {
* aligned, page size is (last - start + 1)
* @list: link list node, used to scan all ranges of svms
* @update_list:link list node used to add to update_list
- * @remove_list:link list node used to add to remove list
- * @insert_list:link list node used to add to insert list
* @mapping: bo_va mapping structure to create and update GPU page table
* @npages: number of pages
* @dma_addr: dma mapping address on each GPU for system memory physical page
@@ -112,8 +110,6 @@ struct svm_range {
struct interval_tree_node it_node;
struct list_head list;
struct list_head update_list;
- struct list_head remove_list;
- struct list_head insert_list;
uint64_t npages;
dma_addr_t *dma_addr[MAX_GPU_INSTANCE];
struct ttm_resource *ttm_res;
@@ -128,7 +124,7 @@ struct svm_range {
uint32_t actual_loc;
uint8_t granularity;
atomic_t invalid;
- uint64_t validate_timestamp;
+ ktime_t validate_timestamp;
struct mmu_interval_notifier notifier;
struct svm_work_list_item work_item;
struct list_head deferred_list;
@@ -136,6 +132,7 @@ struct svm_range {
DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
bool validated_once;
+ bool mapped_to_gpu;
};
static inline void svm_range_lock(struct svm_range *prange)
@@ -184,8 +181,16 @@ void schedule_deferred_list_work(struct svm_range_list *svms);
void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
unsigned long offset, unsigned long npages);
void svm_range_free_dma_mappings(struct svm_range *prange);
-void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm,
- void *owner);
+int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size);
+int kfd_criu_checkpoint_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset);
+int kfd_criu_restore_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_ptr,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size);
+int kfd_criu_resume_svm(struct kfd_process *p);
struct kfd_process_device *
svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev);
void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_struct *mm);
@@ -195,7 +200,11 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s
*/
#define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0)
-void svm_range_bo_unref(struct svm_range_bo *svm_bo);
+void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);
+
+void svm_range_set_max_pages(struct amdgpu_device *adev);
+int svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled);
+
#else
struct kfd_process;
@@ -223,6 +232,35 @@ static inline int svm_range_schedule_evict_svm_bo(
return -EINVAL;
}
+static inline int svm_range_get_info(struct kfd_process *p,
+ uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size)
+{
+ *num_svm_ranges = 0;
+ *svm_priv_data_size = 0;
+ return 0;
+}
+
+static inline int kfd_criu_checkpoint_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_offset)
+{
+ return 0;
+}
+
+static inline int kfd_criu_restore_svm(struct kfd_process *p,
+ uint8_t __user *user_priv_ptr,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size)
+{
+ return -EINVAL;
+}
+
+static inline int kfd_criu_resume_svm(struct kfd_process *p)
+{
+ return 0;
+}
+
#define KFD_IS_SVM_API_SUPPORTED(dev) false
#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index dd593ad0614a..3f0a4a415907 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -39,28 +40,39 @@
#include "kfd_svm.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_ras.h"
+#include "amdgpu.h"
/* topology_device_list - Master list of all topology devices */
static struct list_head topology_device_list;
static struct kfd_system_properties sys_props;
static DECLARE_RWSEM(topology_lock);
-static atomic_t topology_crat_proximity_domain;
+static uint32_t topology_crat_proximity_domain;
-struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
uint32_t proximity_domain)
{
struct kfd_topology_device *top_dev;
struct kfd_topology_device *device = NULL;
- down_read(&topology_lock);
-
list_for_each_entry(top_dev, &topology_device_list, list)
if (top_dev->proximity_domain == proximity_domain) {
device = top_dev;
break;
}
+ return device;
+}
+
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
+ uint32_t proximity_domain)
+{
+ struct kfd_topology_device *device = NULL;
+
+ down_read(&topology_lock);
+
+ device = kfd_topology_device_by_proximity_domain_no_lock(
+ proximity_domain);
up_read(&topology_lock);
return device;
@@ -113,7 +125,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
return device;
}
-struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)
+struct kfd_dev *kfd_device_by_adev(const struct amdgpu_device *adev)
{
struct kfd_topology_device *top_dev;
struct kfd_dev *device = NULL;
@@ -121,7 +133,7 @@ struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)
down_read(&topology_lock);
list_for_each_entry(top_dev, &topology_device_list, list)
- if (top_dev->gpu && top_dev->gpu->kgd == kgd) {
+ if (top_dev->gpu && top_dev->gpu->adev == adev) {
device = top_dev->gpu;
break;
}
@@ -137,6 +149,7 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
struct kfd_mem_properties *mem;
struct kfd_cache_properties *cache;
struct kfd_iolink_properties *iolink;
+ struct kfd_iolink_properties *p2plink;
struct kfd_perf_properties *perf;
list_del(&dev->list);
@@ -162,6 +175,13 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
kfree(iolink);
}
+ while (dev->p2p_link_props.next != &dev->p2p_link_props) {
+ p2plink = container_of(dev->p2p_link_props.next,
+ struct kfd_iolink_properties, list);
+ list_del(&p2plink->list);
+ kfree(p2plink);
+ }
+
while (dev->perf_props.next != &dev->perf_props) {
perf = container_of(dev->perf_props.next,
struct kfd_perf_properties, list);
@@ -203,6 +223,7 @@ struct kfd_topology_device *kfd_create_topology_device(
INIT_LIST_HEAD(&dev->mem_props);
INIT_LIST_HEAD(&dev->cache_props);
INIT_LIST_HEAD(&dev->io_link_props);
+ INIT_LIST_HEAD(&dev->p2p_link_props);
INIT_LIST_HEAD(&dev->perf_props);
list_add_tail(&dev->list, device_list);
@@ -454,6 +475,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.caches_count);
sysfs_show_32bit_prop(buffer, offs, "io_links_count",
dev->node_props.io_links_count);
+ sysfs_show_32bit_prop(buffer, offs, "p2p_links_count",
+ dev->node_props.p2p_links_count);
sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base",
dev->node_props.cpu_core_id_base);
sysfs_show_32bit_prop(buffer, offs, "simd_id_base",
@@ -503,7 +526,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
if (dev->gpu) {
log_max_watch_addr =
- __ilog2_u32(dev->gpu->device_info->num_of_watch_points);
+ __ilog2_u32(dev->gpu->device_info.num_of_watch_points);
if (log_max_watch_addr) {
dev->node_props.capability |=
@@ -515,7 +538,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);
}
- if (dev->gpu->device_info->asic_family == CHIP_TONGA)
+ if (dev->gpu->adev->asic_type == CHIP_TONGA)
dev->node_props.capability |=
HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
@@ -531,7 +554,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
dev->gpu->sdma_fw_version);
sysfs_show_64bit_prop(buffer, offs, "unique_id",
- amdgpu_amdkfd_get_unique_id(dev->gpu->kgd));
+ dev->gpu->adev->unique_id);
}
@@ -557,6 +580,7 @@ static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr)
static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
{
+ struct kfd_iolink_properties *p2plink;
struct kfd_iolink_properties *iolink;
struct kfd_cache_properties *cache;
struct kfd_mem_properties *mem;
@@ -574,6 +598,18 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
dev->kobj_iolink = NULL;
}
+ if (dev->kobj_p2plink) {
+ list_for_each_entry(p2plink, &dev->p2p_link_props, list)
+ if (p2plink->kobj) {
+ kfd_remove_sysfs_file(p2plink->kobj,
+ &p2plink->attr);
+ p2plink->kobj = NULL;
+ }
+ kobject_del(dev->kobj_p2plink);
+ kobject_put(dev->kobj_p2plink);
+ dev->kobj_p2plink = NULL;
+ }
+
if (dev->kobj_cache) {
list_for_each_entry(cache, &dev->cache_props, list)
if (cache->kobj) {
@@ -620,6 +656,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
uint32_t id)
{
+ struct kfd_iolink_properties *p2plink;
struct kfd_iolink_properties *iolink;
struct kfd_cache_properties *cache;
struct kfd_mem_properties *mem;
@@ -657,6 +694,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
if (!dev->kobj_iolink)
return -ENOMEM;
+ dev->kobj_p2plink = kobject_create_and_add("p2p_links", dev->kobj_node);
+ if (!dev->kobj_p2plink)
+ return -ENOMEM;
+
dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
if (!dev->kobj_perf)
return -ENOMEM;
@@ -746,6 +787,27 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
i++;
}
+ i = 0;
+ list_for_each_entry(p2plink, &dev->p2p_link_props, list) {
+ p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+ if (!p2plink->kobj)
+ return -ENOMEM;
+ ret = kobject_init_and_add(p2plink->kobj, &iolink_type,
+ dev->kobj_p2plink, "%d", i);
+ if (ret < 0) {
+ kobject_put(p2plink->kobj);
+ return ret;
+ }
+
+ p2plink->attr.name = "properties";
+ p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
+ sysfs_attr_init(&iolink->attr);
+ ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
+ if (ret < 0)
+ return ret;
+ i++;
+ }
+
/* All hardware blocks have the same number of attributes. */
num_attrs = ARRAY_SIZE(perf_attr_iommu);
list_for_each_entry(perf, &dev->perf_props, list) {
@@ -1059,7 +1121,7 @@ int kfd_topology_init(void)
down_write(&topology_lock);
kfd_topology_update_device_list(&temp_topology_device_list,
&topology_device_list);
- atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1);
+ topology_crat_proximity_domain = sys_props.num_devices-1;
ret = kfd_topology_update_sysfs();
up_write(&topology_lock);
@@ -1101,15 +1163,12 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
uint32_t buf[7];
uint64_t local_mem_size;
int i;
- struct kfd_local_mem_info local_mem_info;
if (!gpu)
return 0;
- amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info);
-
- local_mem_size = local_mem_info.local_mem_size_private +
- local_mem_info.local_mem_size_public;
+ local_mem_size = gpu->local_mem_info.local_mem_size_private +
+ gpu->local_mem_info.local_mem_size_public;
buf[0] = gpu->pdev->devfn;
buf[1] = gpu->pdev->subsystem_vendor |
@@ -1137,6 +1196,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
struct kfd_mem_properties *mem;
struct kfd_cache_properties *cache;
struct kfd_iolink_properties *iolink;
+ struct kfd_iolink_properties *p2plink;
down_write(&topology_lock);
list_for_each_entry(dev, &topology_device_list, list) {
@@ -1157,6 +1217,8 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
cache->gpu = dev->gpu;
list_for_each_entry(iolink, &dev->io_link_props, list)
iolink->gpu = dev->gpu;
+ list_for_each_entry(p2plink, &dev->p2p_link_props, list)
+ p2plink->gpu = dev->gpu;
break;
}
}
@@ -1189,7 +1251,7 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
* for APUs - If CRAT from ACPI reports more than one bank, then
* all the banks will report the same mem_clk_max information
*/
- amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info);
+ amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info);
list_for_each_entry(mem, &dev->mem_props, list)
mem->mem_clk_max = local_mem_info.mem_clk_max;
@@ -1217,8 +1279,7 @@ static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev,
/* set gpu (dev) flags. */
} else {
if (!dev->gpu->pci_atomic_requested ||
- dev->gpu->device_info->asic_family ==
- CHIP_HAWAII)
+ dev->gpu->adev->asic_type == CHIP_HAWAII)
link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
}
@@ -1239,7 +1300,7 @@ static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev,
*/
if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS ||
(inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
- to_dev->gpu->device_info->asic_family == CHIP_VEGA20)) {
+ KFD_GC_VERSION(to_dev->gpu) == IP_VERSION(9, 4, 0))) {
outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
}
@@ -1264,6 +1325,12 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
if (!peer_dev)
continue;
+ /* Include the CPU peer in GPU hive if connected over xGMI. */
+ if (!peer_dev->gpu && !peer_dev->node_props.hive_id &&
+ dev->node_props.hive_id &&
+ dev->gpu->adev->gmc.xgmi.connected_to_cpu)
+ peer_dev->node_props.hive_id = dev->node_props.hive_id;
+
list_for_each_entry(inbound_link, &peer_dev->io_link_props,
list) {
if (inbound_link->node_to != link->node_from)
@@ -1274,6 +1341,256 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
}
}
+
+ /* Create indirect links so apply flags setting to all */
+ list_for_each_entry(link, &dev->p2p_link_props, list) {
+ link->flags = CRAT_IOLINK_FLAGS_ENABLED;
+ kfd_set_iolink_no_atomics(dev, NULL, link);
+ peer_dev = kfd_topology_device_by_proximity_domain(
+ link->node_to);
+
+ if (!peer_dev)
+ continue;
+
+ list_for_each_entry(inbound_link, &peer_dev->p2p_link_props,
+ list) {
+ if (inbound_link->node_to != link->node_from)
+ continue;
+
+ inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
+ kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
+ kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
+ }
+ }
+}
+
+static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev,
+ struct kfd_iolink_properties *p2plink)
+{
+ int ret;
+
+ p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+ if (!p2plink->kobj)
+ return -ENOMEM;
+
+ ret = kobject_init_and_add(p2plink->kobj, &iolink_type,
+ dev->kobj_p2plink, "%d", dev->node_props.p2p_links_count - 1);
+ if (ret < 0) {
+ kobject_put(p2plink->kobj);
+ return ret;
+ }
+
+ p2plink->attr.name = "properties";
+ p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
+ sysfs_attr_init(&p2plink->attr);
+ ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node)
+{
+ struct kfd_iolink_properties *gpu_link, *tmp_link, *cpu_link;
+ struct kfd_iolink_properties *props = NULL, *props2 = NULL;
+ struct kfd_topology_device *cpu_dev;
+ int ret = 0;
+ int i, num_cpu;
+
+ num_cpu = 0;
+ list_for_each_entry(cpu_dev, &topology_device_list, list) {
+ if (cpu_dev->gpu)
+ break;
+ num_cpu++;
+ }
+
+ gpu_link = list_first_entry(&kdev->io_link_props,
+ struct kfd_iolink_properties, list);
+ if (!gpu_link)
+ return -ENOMEM;
+
+ for (i = 0; i < num_cpu; i++) {
+ /* CPU <--> GPU */
+ if (gpu_link->node_to == i)
+ continue;
+
+ /* find CPU <--> CPU links */
+ cpu_link = NULL;
+ cpu_dev = kfd_topology_device_by_proximity_domain(i);
+ if (cpu_dev) {
+ list_for_each_entry(tmp_link,
+ &cpu_dev->io_link_props, list) {
+ if (tmp_link->node_to == gpu_link->node_to) {
+ cpu_link = tmp_link;
+ break;
+ }
+ }
+ }
+
+ if (!cpu_link)
+ return -ENOMEM;
+
+ /* CPU <--> CPU <--> GPU, GPU node*/
+ props = kfd_alloc_struct(props);
+ if (!props)
+ return -ENOMEM;
+
+ memcpy(props, gpu_link, sizeof(struct kfd_iolink_properties));
+ props->weight = gpu_link->weight + cpu_link->weight;
+ props->min_latency = gpu_link->min_latency + cpu_link->min_latency;
+ props->max_latency = gpu_link->max_latency + cpu_link->max_latency;
+ props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth);
+ props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth);
+
+ props->node_from = gpu_node;
+ props->node_to = i;
+ kdev->node_props.p2p_links_count++;
+ list_add_tail(&props->list, &kdev->p2p_link_props);
+ ret = kfd_build_p2p_node_entry(kdev, props);
+ if (ret < 0)
+ return ret;
+
+ /* for small Bar, no CPU --> GPU in-direct links */
+ if (kfd_dev_is_large_bar(kdev->gpu)) {
+ /* CPU <--> CPU <--> GPU, CPU node*/
+ props2 = kfd_alloc_struct(props2);
+ if (!props2)
+ return -ENOMEM;
+
+ memcpy(props2, props, sizeof(struct kfd_iolink_properties));
+ props2->node_from = i;
+ props2->node_to = gpu_node;
+ props2->kobj = NULL;
+ cpu_dev->node_props.p2p_links_count++;
+ list_add_tail(&props2->list, &cpu_dev->p2p_link_props);
+ ret = kfd_build_p2p_node_entry(cpu_dev, props2);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ return ret;
+}
+
+#if defined(CONFIG_HSA_AMD_P2P)
+static int kfd_add_peer_prop(struct kfd_topology_device *kdev,
+ struct kfd_topology_device *peer, int from, int to)
+{
+ struct kfd_iolink_properties *props = NULL;
+ struct kfd_iolink_properties *iolink1, *iolink2, *iolink3;
+ struct kfd_topology_device *cpu_dev;
+ int ret = 0;
+
+ if (!amdgpu_device_is_peer_accessible(
+ kdev->gpu->adev,
+ peer->gpu->adev))
+ return ret;
+
+ iolink1 = list_first_entry(&kdev->io_link_props,
+ struct kfd_iolink_properties, list);
+ if (!iolink1)
+ return -ENOMEM;
+
+ iolink2 = list_first_entry(&peer->io_link_props,
+ struct kfd_iolink_properties, list);
+ if (!iolink2)
+ return -ENOMEM;
+
+ props = kfd_alloc_struct(props);
+ if (!props)
+ return -ENOMEM;
+
+ memcpy(props, iolink1, sizeof(struct kfd_iolink_properties));
+
+ props->weight = iolink1->weight + iolink2->weight;
+ props->min_latency = iolink1->min_latency + iolink2->min_latency;
+ props->max_latency = iolink1->max_latency + iolink2->max_latency;
+ props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth);
+ props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth);
+
+ if (iolink1->node_to != iolink2->node_to) {
+ /* CPU->CPU link*/
+ cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to);
+ if (cpu_dev) {
+ list_for_each_entry(iolink3, &cpu_dev->io_link_props, list)
+ if (iolink3->node_to == iolink2->node_to)
+ break;
+
+ props->weight += iolink3->weight;
+ props->min_latency += iolink3->min_latency;
+ props->max_latency += iolink3->max_latency;
+ props->min_bandwidth = min(props->min_bandwidth,
+ iolink3->min_bandwidth);
+ props->max_bandwidth = min(props->max_bandwidth,
+ iolink3->max_bandwidth);
+ } else {
+ WARN(1, "CPU node not found");
+ }
+ }
+
+ props->node_from = from;
+ props->node_to = to;
+ peer->node_props.p2p_links_count++;
+ list_add_tail(&props->list, &peer->p2p_link_props);
+ ret = kfd_build_p2p_node_entry(peer, props);
+
+ return ret;
+}
+#endif
+
+static int kfd_dev_create_p2p_links(void)
+{
+ struct kfd_topology_device *dev;
+ struct kfd_topology_device *new_dev;
+#if defined(CONFIG_HSA_AMD_P2P)
+ uint32_t i;
+#endif
+ uint32_t k;
+ int ret = 0;
+
+ k = 0;
+ list_for_each_entry(dev, &topology_device_list, list)
+ k++;
+ if (k < 2)
+ return 0;
+
+ new_dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list);
+ if (WARN_ON(!new_dev->gpu))
+ return 0;
+
+ k--;
+
+ /* create in-direct links */
+ ret = kfd_create_indirect_link_prop(new_dev, k);
+ if (ret < 0)
+ goto out;
+
+ /* create p2p links */
+#if defined(CONFIG_HSA_AMD_P2P)
+ i = 0;
+ list_for_each_entry(dev, &topology_device_list, list) {
+ if (dev == new_dev)
+ break;
+ if (!dev->gpu || !dev->gpu->adev ||
+ (dev->gpu->hive_id &&
+ dev->gpu->hive_id == new_dev->gpu->hive_id))
+ goto next;
+
+ /* check if node(s) is/are peer accessible in one direction or bi-direction */
+ ret = kfd_add_peer_prop(new_dev, dev, i, k);
+ if (ret < 0)
+ goto out;
+
+ ret = kfd_add_peer_prop(dev, new_dev, k, i);
+ if (ret < 0)
+ goto out;
+next:
+ i++;
+ }
+#endif
+
+out:
+ return ret;
}
int kfd_topology_add_device(struct kfd_dev *gpu)
@@ -1286,34 +1603,14 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
void *crat_image = NULL;
size_t image_size = 0;
int proximity_domain;
- struct amdgpu_device *adev;
+ int i;
+ const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
INIT_LIST_HEAD(&temp_topology_device_list);
gpu_id = kfd_generate_gpu_id(gpu);
-
pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
- proximity_domain = atomic_inc_return(&topology_crat_proximity_domain);
-
- adev = (struct amdgpu_device *)(gpu->kgd);
-
- /* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */
- if (gpu->hive_id && adev->gmc.xgmi.connected_to_cpu) {
- struct kfd_topology_device *top_dev;
-
- down_read(&topology_lock);
-
- list_for_each_entry(top_dev, &topology_device_list, list) {
- if (top_dev->gpu)
- break;
-
- top_dev->node_props.hive_id = gpu->hive_id;
- }
-
- up_read(&topology_lock);
- }
-
/* Check to see if this gpu device exists in the topology_device_list.
* If so, assign the gpu to that device,
* else create a Virtual CRAT for this gpu device and then parse that
@@ -1322,12 +1619,16 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
*/
dev = kfd_assign_gpu(gpu);
if (!dev) {
+ down_write(&topology_lock);
+ proximity_domain = ++topology_crat_proximity_domain;
+
res = kfd_create_crat_image_virtual(&crat_image, &image_size,
COMPUTE_UNIT_GPU, gpu,
proximity_domain);
if (res) {
pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
gpu_id);
+ topology_crat_proximity_domain--;
return res;
}
res = kfd_parse_crat_table(crat_image,
@@ -1336,10 +1637,10 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
if (res) {
pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
gpu_id);
+ topology_crat_proximity_domain--;
goto err;
}
- down_write(&topology_lock);
kfd_topology_update_device_list(&temp_topology_device_list,
&topology_device_list);
@@ -1364,6 +1665,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->gpu_id = gpu_id;
gpu->id = gpu_id;
+ kfd_dev_create_p2p_links();
+
/* TODO: Move the following lines to function
* kfd_add_non_crat_information
*/
@@ -1372,45 +1675,49 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
* needed for the topology
*/
- amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info);
+ amdgpu_amdkfd_get_cu_info(dev->gpu->adev, &cu_info);
- strncpy(dev->node_props.name, gpu->device_info->asic_name,
- KFD_TOPOLOGY_PUBLIC_NAME_SIZE);
+ for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) {
+ dev->node_props.name[i] = __tolower(asic_name[i]);
+ if (asic_name[i] == '\0')
+ break;
+ }
+ dev->node_props.name[i] = '\0';
dev->node_props.simd_arrays_per_engine =
cu_info.num_shader_arrays_per_engine;
- dev->node_props.gfx_target_version = gpu->device_info->gfx_target_version;
+ dev->node_props.gfx_target_version = gpu->device_info.gfx_target_version;
dev->node_props.vendor_id = gpu->pdev->vendor;
dev->node_props.device_id = gpu->pdev->device;
dev->node_props.capability |=
- ((amdgpu_amdkfd_get_asic_rev_id(dev->gpu->kgd) <<
- HSA_CAP_ASIC_REVISION_SHIFT) &
+ ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) &
HSA_CAP_ASIC_REVISION_MASK);
dev->node_props.location_id = pci_dev_id(gpu->pdev);
dev->node_props.domain = pci_domain_nr(gpu->pdev->bus);
dev->node_props.max_engine_clk_fcompute =
- amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd);
+ amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev);
dev->node_props.max_engine_clk_ccompute =
cpufreq_quick_get_max(0) / 1000;
dev->node_props.drm_render_minor =
gpu->shared_resources.drm_render_minor;
dev->node_props.hive_id = gpu->hive_id;
- dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines;
+ dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu);
dev->node_props.num_sdma_xgmi_engines =
- gpu->device_info->num_xgmi_sdma_engines;
+ kfd_get_num_xgmi_sdma_engines(gpu);
dev->node_props.num_sdma_queues_per_engine =
- gpu->device_info->num_sdma_queues_per_engine;
+ gpu->device_info.num_sdma_queues_per_engine -
+ gpu->device_info.num_reserved_sdma_queues_per_engine;
dev->node_props.num_gws = (dev->gpu->gws &&
dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
- amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
+ dev->gpu->adev->gds.gws_size : 0;
dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm);
kfd_fill_mem_clk_max_info(dev);
kfd_fill_iolink_non_crat_info(dev);
- switch (dev->gpu->device_info->asic_family) {
+ switch (dev->gpu->adev->asic_type) {
case CHIP_KAVERI:
case CHIP_HAWAII:
case CHIP_TONGA:
@@ -1429,36 +1736,20 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_RENOIR:
- case CHIP_ARCTURUS:
- case CHIP_ALDEBARAN:
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_NAVI14:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_YELLOW_CARP:
- case CHIP_CYAN_SKILLFISH:
- dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
- HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
- HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
- break;
default:
- WARN(1, "Unexpected ASIC family %u",
- dev->gpu->device_info->asic_family);
+ if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 0, 1))
+ dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
+ else
+ WARN(1, "Unexpected ASIC family %u",
+ dev->gpu->adev->asic_type);
}
/*
- * Overwrite ATS capability according to needs_iommu_device to fix
- * potential missing corresponding bit in CRAT of BIOS.
- */
+ * Overwrite ATS capability according to needs_iommu_device to fix
+ * potential missing corresponding bit in CRAT of BIOS.
+ */
if (dev->gpu->use_iommu_v2)
dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
else
@@ -1469,7 +1760,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
* because it doesn't consider masked out CUs
* max_waves_per_simd: Carrizo reports wrong max_waves_per_simd
*/
- if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
+ if (dev->gpu->adev->asic_type == CHIP_CARRIZO) {
dev->node_props.simd_count =
cu_info.simd_per_cu * cu_info.cu_active_number;
dev->node_props.max_waves_per_simd = 10;
@@ -1477,16 +1768,17 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
/* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
dev->node_props.capability |=
- ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
+ ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
HSA_CAP_SRAM_EDCSUPPORTED : 0;
- dev->node_props.capability |= ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
+ dev->node_props.capability |=
+ ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
HSA_CAP_MEM_EDCSUPPORTED : 0;
- if (adev->asic_type != CHIP_VEGA10)
- dev->node_props.capability |= (adev->ras_enabled != 0) ?
+ if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 0, 1))
+ dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ?
HSA_CAP_RASEVENTNOTIFY : 0;
- if (KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev))
+ if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev->kfd.dev))
dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED;
kfd_debug_print_topology();
@@ -1498,25 +1790,93 @@ err:
return res;
}
+/**
+ * kfd_topology_update_io_links() - Update IO links after device removal.
+ * @proximity_domain: Proximity domain value of the dev being removed.
+ *
+ * The topology list currently is arranged in increasing order of
+ * proximity domain.
+ *
+ * Two things need to be done when a device is removed:
+ * 1. All the IO links to this device need to be removed.
+ * 2. All nodes after the current device node need to move
+ * up once this device node is removed from the topology
+ * list. As a result, the proximity domain values for
+ * all nodes after the node being deleted reduce by 1.
+ * This would also cause the proximity domain values for
+ * io links to be updated based on new proximity domain
+ * values.
+ *
+ * Context: The caller must hold write topology_lock.
+ */
+static void kfd_topology_update_io_links(int proximity_domain)
+{
+ struct kfd_topology_device *dev;
+ struct kfd_iolink_properties *iolink, *p2plink, *tmp;
+
+ list_for_each_entry(dev, &topology_device_list, list) {
+ if (dev->proximity_domain > proximity_domain)
+ dev->proximity_domain--;
+
+ list_for_each_entry_safe(iolink, tmp, &dev->io_link_props, list) {
+ /*
+ * If there is an io link to the dev being deleted
+ * then remove that IO link also.
+ */
+ if (iolink->node_to == proximity_domain) {
+ list_del(&iolink->list);
+ dev->node_props.io_links_count--;
+ } else {
+ if (iolink->node_from > proximity_domain)
+ iolink->node_from--;
+ if (iolink->node_to > proximity_domain)
+ iolink->node_to--;
+ }
+ }
+
+ list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) {
+ /*
+ * If there is a p2p link to the dev being deleted
+ * then remove that p2p link also.
+ */
+ if (p2plink->node_to == proximity_domain) {
+ list_del(&p2plink->list);
+ dev->node_props.p2p_links_count--;
+ } else {
+ if (p2plink->node_from > proximity_domain)
+ p2plink->node_from--;
+ if (p2plink->node_to > proximity_domain)
+ p2plink->node_to--;
+ }
+ }
+ }
+}
+
int kfd_topology_remove_device(struct kfd_dev *gpu)
{
struct kfd_topology_device *dev, *tmp;
uint32_t gpu_id;
int res = -ENODEV;
+ int i = 0;
down_write(&topology_lock);
- list_for_each_entry_safe(dev, tmp, &topology_device_list, list)
+ list_for_each_entry_safe(dev, tmp, &topology_device_list, list) {
if (dev->gpu == gpu) {
gpu_id = dev->gpu_id;
kfd_remove_sysfs_node_entry(dev);
kfd_release_topology_device(dev);
sys_props.num_devices--;
+ kfd_topology_update_io_links(i);
+ topology_crat_proximity_domain = sys_props.num_devices-1;
+ sys_props.generation_count++;
res = 0;
if (kfd_topology_update_sysfs() < 0)
kfd_topology_release_sysfs();
break;
}
+ i++;
+ }
up_write(&topology_lock);
@@ -1592,7 +1952,7 @@ void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)
gpu->use_iommu_v2 = false;
- if (!gpu->device_info->needs_iommu_device)
+ if (!gpu->device_info.needs_iommu_device)
return;
down_read(&topology_lock);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index a8db017c9b8e..9f6c949186c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -25,38 +26,11 @@
#include <linux/types.h>
#include <linux/list.h>
+#include <linux/kfd_sysfs.h>
#include "kfd_crat.h"
#define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32
-#define HSA_CAP_HOT_PLUGGABLE 0x00000001
-#define HSA_CAP_ATS_PRESENT 0x00000002
-#define HSA_CAP_SHARED_WITH_GRAPHICS 0x00000004
-#define HSA_CAP_QUEUE_SIZE_POW2 0x00000008
-#define HSA_CAP_QUEUE_SIZE_32BIT 0x00000010
-#define HSA_CAP_QUEUE_IDLE_EVENT 0x00000020
-#define HSA_CAP_VA_LIMIT 0x00000040
-#define HSA_CAP_WATCH_POINTS_SUPPORTED 0x00000080
-#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00
-#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8
-#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK 0x00003000
-#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT 12
-
-#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0
-#define HSA_CAP_DOORBELL_TYPE_1_0 0x1
-#define HSA_CAP_DOORBELL_TYPE_2_0 0x2
-#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000
-
-#define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED 0x00080000 /* Old buggy user mode depends on this being 0 */
-#define HSA_CAP_MEM_EDCSUPPORTED 0x00100000
-#define HSA_CAP_RASEVENTNOTIFY 0x00200000
-#define HSA_CAP_ASIC_REVISION_MASK 0x03c00000
-#define HSA_CAP_ASIC_REVISION_SHIFT 22
-#define HSA_CAP_SRAM_EDCSUPPORTED 0x04000000
-#define HSA_CAP_SVMAPI_SUPPORTED 0x08000000
-#define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000
-#define HSA_CAP_RESERVED 0xe00f8000
-
struct kfd_node_properties {
uint64_t hive_id;
uint32_t cpu_cores_count;
@@ -64,6 +38,7 @@ struct kfd_node_properties {
uint32_t mem_banks_count;
uint32_t caches_count;
uint32_t io_links_count;
+ uint32_t p2p_links_count;
uint32_t cpu_core_id_base;
uint32_t simd_id_base;
uint32_t capability;
@@ -93,17 +68,6 @@ struct kfd_node_properties {
char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
};
-#define HSA_MEM_HEAP_TYPE_SYSTEM 0
-#define HSA_MEM_HEAP_TYPE_FB_PUBLIC 1
-#define HSA_MEM_HEAP_TYPE_FB_PRIVATE 2
-#define HSA_MEM_HEAP_TYPE_GPU_GDS 3
-#define HSA_MEM_HEAP_TYPE_GPU_LDS 4
-#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH 5
-
-#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001
-#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002
-#define HSA_MEM_FLAGS_RESERVED 0xfffffffc
-
struct kfd_mem_properties {
struct list_head list;
uint32_t heap_type;
@@ -116,12 +80,6 @@ struct kfd_mem_properties {
struct attribute attr;
};
-#define HSA_CACHE_TYPE_DATA 0x00000001
-#define HSA_CACHE_TYPE_INSTRUCTION 0x00000002
-#define HSA_CACHE_TYPE_CPU 0x00000004
-#define HSA_CACHE_TYPE_HSACU 0x00000008
-#define HSA_CACHE_TYPE_RESERVED 0xfffffff0
-
struct kfd_cache_properties {
struct list_head list;
uint32_t processor_id_low;
@@ -172,14 +130,15 @@ struct kfd_topology_device {
struct list_head mem_props;
uint32_t cache_count;
struct list_head cache_props;
- uint32_t io_link_count;
struct list_head io_link_props;
+ struct list_head p2p_link_props;
struct list_head perf_props;
struct kfd_dev *gpu;
struct kobject *kobj_node;
struct kobject *kobj_mem;
struct kobject *kobj_cache;
struct kobject *kobj_iolink;
+ struct kobject *kobj_p2plink;
struct kobject *kobj_perf;
struct attribute attr_gpuid;
struct attribute attr_name;
diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
index daf3c44547d3..e3f3b0b93a59 100644
--- a/drivers/gpu/drm/amd/amdkfd/soc15_int.h
+++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
@@ -31,7 +31,8 @@
#define SOC15_INTSRC_VMC_FAULT 0
#define SOC15_INTSRC_SDMA_TRAP 224
#define SOC15_INTSRC_SDMA_ECC 220
-
+#define SOC21_INTSRC_SDMA_TRAP 49
+#define SOC21_INTSRC_SDMA_ECC 62
#define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff)
#define SOC15_SOURCE_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 8 & 0xff)