aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h877
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm301
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c63
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c64
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pasid.c31
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h80
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c245
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c227
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h29
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c296
23 files changed, 1707 insertions, 546 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 61474627a32c..e1e4115dcf78 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -53,6 +53,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_int_process_v9.o \
$(AMDKFD_PATH)/kfd_dbgdev.o \
$(AMDKFD_PATH)/kfd_dbgmgr.o \
+ $(AMDKFD_PATH)/kfd_smi_events.o \
$(AMDKFD_PATH)/kfd_crat.o
ifneq ($(CONFIG_AMD_IOMMU_V2),)
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 9f59ba93cfe0..24b471734117 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -24,6 +24,7 @@
#include "kfd_events.h"
#include "cik_int.h"
#include "amdgpu_amdkfd.h"
+#include "kfd_smi_events.h"
static bool cik_event_interrupt_isr(struct kfd_dev *dev,
const uint32_t *ih_ring_entry,
@@ -107,6 +108,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) {
struct kfd_vm_fault_info info;
+ kfd_smi_event_update_vmfault(dev, pasid);
kfd_process_vm_fault(dev->dqm, pasid);
memset(&info, 0, sizeof(info));
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index d3400da6ab64..577d901fdb63 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -679,169 +679,175 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbf810000, 0x00000000,
};
-static const uint32_t cwsr_trap_gfx10_hex[] = {
- 0xbf820001, 0xbf8201c1,
+static const uint32_t cwsr_trap_nv1x_hex[] = {
+ 0xbf820001, 0xbf8201cd,
0xb0804004, 0xb978f802,
- 0x8a788678, 0xb971f803,
- 0x876eff71, 0x00000400,
- 0xbf850033, 0x876eff71,
- 0x00000100, 0xbf840002,
- 0x8878ff78, 0x00002000,
- 0x8a77ff77, 0xff000000,
- 0xb96ef807, 0x876fff6e,
- 0x02000000, 0x8f6f866f,
- 0x88776f77, 0x876fff6e,
- 0x003f8000, 0x8f6f896f,
- 0x88776f77, 0x8a6eff6e,
- 0x023f8000, 0xb9eef807,
- 0xb97af812, 0xb97bf813,
- 0x8ffa887a, 0xf4051bbd,
- 0xfa000000, 0xbf8cc07f,
- 0xf4051ebd, 0xfa000008,
- 0xbf8cc07f, 0x87ee6e6e,
- 0xbf840001, 0xbe80206e,
- 0xb971f803, 0x8771ff71,
- 0x000001ff, 0xbf850002,
- 0x806c846c, 0x826d806d,
- 0x876dff6d, 0x0000ffff,
- 0x906e8977, 0x876fff6e,
- 0x003f8000, 0x906e8677,
- 0x876eff6e, 0x02000000,
- 0x886e6f6e, 0xb9eef807,
- 0x87fe7e7e, 0x87ea6a6a,
- 0xb9f8f802, 0xbe80226c,
- 0xb971f803, 0x8771ff71,
- 0x00000100, 0xbf840006,
- 0xbef60380, 0xb9f60203,
- 0x876dff6d, 0x0000ffff,
- 0x80ec886c, 0x82ed806d,
- 0xbef60380, 0xb9f60283,
- 0xb972f816, 0xb9762c07,
- 0x8f769a76, 0x886d766d,
- 0xb97603c7, 0x8f769976,
- 0x886d766d, 0xb9760647,
- 0x8f769876, 0x886d766d,
- 0xb976f807, 0x8776ff76,
- 0x00007fff, 0xb9f6f807,
+ 0x8a788678, 0xb96ef801,
+ 0x876eff6e, 0x00000800,
+ 0xbf840003, 0x876eff78,
+ 0x00002000, 0xbf840009,
+ 0xb97bf803, 0x876eff7b,
+ 0x00000400, 0xbf850033,
+ 0x876eff7b, 0x00000100,
+ 0xbf840002, 0x8878ff78,
+ 0x00002000, 0x8a77ff77,
+ 0xff000000, 0xb96ef807,
+ 0x876fff6e, 0x02000000,
+ 0x8f6f866f, 0x88776f77,
+ 0x876fff6e, 0x003f8000,
+ 0x8f6f896f, 0x88776f77,
+ 0x8a6eff6e, 0x023f8000,
+ 0xb9eef807, 0xb97af812,
+ 0xb97bf813, 0x8ffa887a,
+ 0xf4051bbd, 0xfa000000,
+ 0xbf8cc07f, 0xf4051ebd,
+ 0xfa000008, 0xbf8cc07f,
+ 0x87ee6e6e, 0xbf840001,
+ 0xbe80206e, 0xb97bf803,
+ 0x877bff7b, 0x000001ff,
+ 0xbf850002, 0x806c846c,
+ 0x826d806d, 0x876dff6d,
+ 0x0000ffff, 0x906e8977,
+ 0x876fff6e, 0x003f8000,
+ 0x906e8677, 0x876eff6e,
+ 0x02000000, 0x886e6f6e,
+ 0xb9eef807, 0x87fe7e7e,
+ 0x87ea6a6a, 0xb9f8f802,
+ 0xbe80226c, 0x876dff6d,
+ 0x0000ffff, 0xbefa0380,
+ 0xb9fa0283, 0xb97a2c07,
+ 0x8f7a9a7a, 0x886d7a6d,
+ 0xb97a03c7, 0x8f7a997a,
+ 0x886d7a6d, 0xb97a0647,
+ 0x8f7a987a, 0x886d7a6d,
+ 0xb97af807, 0x877aff7a,
+ 0x00007fff, 0xb9faf807,
0xbeee037e, 0xbeef037f,
0xbefe0480, 0xbf900004,
0xbf8e0002, 0xbf88fffe,
+ 0xb97b02dc, 0x8f7b997b,
+ 0x887b7b7f, 0xb97a2a05,
+ 0x807a817a, 0xbf0d997b,
+ 0xbf850002, 0x8f7a897a,
+ 0xbf820001, 0x8f7a8a7a,
+ 0x877bff7f, 0x0000ffff,
+ 0x807aff7a, 0x00000200,
+ 0x807a7e7a, 0x827b807b,
+ 0xf4491c3d, 0xfa000050,
+ 0xf4491d3d, 0xfa000060,
+ 0xf4411e7d, 0xfa000074,
0xbef4037e, 0x8775ff7f,
0x0000ffff, 0x8875ff75,
0x00040000, 0xbef60380,
0xbef703ff, 0x10807fac,
- 0x8776ff7f, 0x08000000,
- 0x90768376, 0x88777677,
- 0x8776ff7f, 0x70000000,
- 0x90768176, 0x88777677,
- 0xbefb037c, 0xbefa0380,
+ 0x877aff7f, 0x08000000,
+ 0x907a837a, 0x88777a77,
+ 0x877aff7f, 0x70000000,
+ 0x907a817a, 0x88777a77,
+ 0xbef1037c, 0xbef00380,
0xb97302dc, 0x8f739973,
- 0x8873737f, 0xb97a2a05,
- 0x807a817a, 0x907c9973,
- 0x877c817c, 0xbf06817c,
- 0xbf850002, 0x8f7a897a,
- 0xbf820001, 0x8f7a8a7a,
- 0xb9761e06, 0x8f768a76,
- 0x807a767a, 0x807aff7a,
- 0x00000200, 0xbef603ff,
- 0x01000000, 0xbefe037c,
- 0xbefc037a, 0xf4611efa,
- 0xf8000000, 0x807a847a,
- 0xbefc037e, 0xbefe037c,
- 0xbefc037a, 0xf4611b3a,
- 0xf8000000, 0x807a847a,
- 0xbefc037e, 0xbefe037c,
- 0xbefc037a, 0xf4611b7a,
- 0xf8000000, 0x807a847a,
- 0xbefc037e, 0xbefe037c,
- 0xbefc037a, 0xf4611bba,
- 0xf8000000, 0x807a847a,
- 0xbefc037e, 0xbefe037c,
- 0xbefc037a, 0xf4611bfa,
- 0xf8000000, 0x807a847a,
- 0xbefc037e, 0xbefe037c,
- 0xbefc037a, 0xf4611e3a,
- 0xf8000000, 0x807a847a,
- 0xbefc037e, 0xb971f803,
- 0xbefe037c, 0xbefc037a,
- 0xf4611c7a, 0xf8000000,
- 0x807a847a, 0xbefc037e,
- 0xbefe037c, 0xbefc037a,
- 0xf4611cba, 0xf8000000,
- 0x807a847a, 0xbefc037e,
- 0xb97bf801, 0xbefe037c,
- 0xbefc037a, 0xf4611efa,
- 0xf8000000, 0x807a847a,
- 0xbefc037e, 0xb97bf814,
- 0xbefe037c, 0xbefc037a,
- 0xf4611efa, 0xf8000000,
- 0x807a847a, 0xbefc037e,
- 0xb97bf815, 0xbefe037c,
- 0xbefc037a, 0xf4611efa,
- 0xf8000000, 0x807a847a,
- 0xbefc037e, 0x8776ff7f,
- 0x04000000, 0xbeef0380,
- 0x886f6f76, 0xb97a2a05,
- 0x807a817a, 0x907c9973,
- 0x877c817c, 0xbf06817c,
- 0xbf850002, 0x8f7a897a,
- 0xbf820001, 0x8f7a8a7a,
- 0xb9761e06, 0x8f768a76,
- 0x807a767a, 0xbef603ff,
- 0x01000000, 0xbef20374,
- 0x80747a74, 0x82758075,
- 0xbefc0380, 0xbf800000,
- 0xbe802f00, 0xbe822f02,
- 0xbe842f04, 0xbe862f06,
- 0xbe882f08, 0xbe8a2f0a,
- 0xbe8c2f0c, 0xbe8e2f0e,
- 0xf469003a, 0xfa000000,
- 0xf469013a, 0xfa000010,
- 0xf469023a, 0xfa000020,
- 0xf469033a, 0xfa000030,
- 0x8074c074, 0x82758075,
- 0x807c907c, 0xbf0aff7c,
- 0x00000060, 0xbf85ffea,
- 0xbe802f00, 0xbe822f02,
- 0xbe842f04, 0xbe862f06,
- 0xbe882f08, 0xbe8a2f0a,
- 0xf469003a, 0xfa000000,
- 0xf469013a, 0xfa000010,
- 0xf469023a, 0xfa000020,
- 0x8074b074, 0x82758075,
- 0xbef40372, 0xbefa0380,
+ 0x8873737f, 0xb97bf816,
+ 0xba80f816, 0x00000000,
0xbefe03c1, 0x907c9973,
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820002, 0xbeff03c1,
0xbf82000b, 0xbef603ff,
0x01000000, 0xe0704000,
- 0x7a5d0000, 0xe0704080,
- 0x7a5d0100, 0xe0704100,
- 0x7a5d0200, 0xe0704180,
- 0x7a5d0300, 0xbf82000a,
+ 0x705d0000, 0xe0704080,
+ 0x705d0100, 0xe0704100,
+ 0x705d0200, 0xe0704180,
+ 0x705d0300, 0xbf82000a,
0xbef603ff, 0x01000000,
- 0xe0704000, 0x7a5d0000,
- 0xe0704100, 0x7a5d0100,
- 0xe0704200, 0x7a5d0200,
- 0xe0704300, 0x7a5d0300,
+ 0xe0704000, 0x705d0000,
+ 0xe0704100, 0x705d0100,
+ 0xe0704200, 0x705d0200,
+ 0xe0704300, 0x705d0300,
+ 0xb9702a05, 0x80708170,
+ 0xbf0d9973, 0xbf850002,
+ 0x8f708970, 0xbf820001,
+ 0x8f708a70, 0xb97a1e06,
+ 0x8f7a8a7a, 0x80707a70,
+ 0x8070ff70, 0x00000200,
+ 0xbef603ff, 0x01000000,
+ 0xbefe037c, 0xbefc0370,
+ 0xf4611c7a, 0xf8000000,
+ 0x80708470, 0xbefc037e,
+ 0xbefe037c, 0xbefc0370,
+ 0xf4611b3a, 0xf8000000,
+ 0x80708470, 0xbefc037e,
+ 0xbefe037c, 0xbefc0370,
+ 0xf4611b7a, 0xf8000000,
+ 0x80708470, 0xbefc037e,
+ 0xbefe037c, 0xbefc0370,
+ 0xf4611bba, 0xf8000000,
+ 0x80708470, 0xbefc037e,
+ 0xbefe037c, 0xbefc0370,
+ 0xf4611bfa, 0xf8000000,
+ 0x80708470, 0xbefc037e,
+ 0xbefe037c, 0xbefc0370,
+ 0xf4611e3a, 0xf8000000,
+ 0x80708470, 0xbefc037e,
+ 0xb97af803, 0xbefe037c,
+ 0xbefc0370, 0xf4611eba,
+ 0xf8000000, 0x80708470,
+ 0xbefc037e, 0xbefe037c,
+ 0xbefc0370, 0xf4611efa,
+ 0xf8000000, 0x80708470,
+ 0xbefc037e, 0xb971f801,
+ 0xbefe037c, 0xbefc0370,
+ 0xf4611c7a, 0xf8000000,
+ 0x80708470, 0xbefc037e,
+ 0xb971f814, 0xbefe037c,
+ 0xbefc0370, 0xf4611c7a,
+ 0xf8000000, 0x80708470,
+ 0xbefc037e, 0xb971f815,
+ 0xbefe037c, 0xbefc0370,
+ 0xf4611c7a, 0xf8000000,
+ 0x80708470, 0xbefc037e,
+ 0xb9702a05, 0x80708170,
+ 0xbf0d9973, 0xbf850002,
+ 0x8f708970, 0xbf820001,
+ 0x8f708a70, 0xb97a1e06,
+ 0x8f7a8a7a, 0x80707a70,
+ 0xbef603ff, 0x01000000,
+ 0xbefb0374, 0x80747074,
+ 0x82758075, 0xbefc0380,
+ 0xbf800000, 0xbe802f00,
+ 0xbe822f02, 0xbe842f04,
+ 0xbe862f06, 0xbe882f08,
+ 0xbe8a2f0a, 0xbe8c2f0c,
+ 0xbe8e2f0e, 0xf469003a,
+ 0xfa000000, 0xf469013a,
+ 0xfa000010, 0xf469023a,
+ 0xfa000020, 0xf469033a,
+ 0xfa000030, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0aff7c, 0x00000060,
+ 0xbf85ffea, 0xbe802f00,
+ 0xbe822f02, 0xbe842f04,
+ 0xbe862f06, 0xbe882f08,
+ 0xbe8a2f0a, 0xf469003a,
+ 0xfa000000, 0xf469013a,
+ 0xfa000010, 0xf469023a,
+ 0xfa000020, 0x8074b074,
+ 0x82758075, 0xbef4037b,
0xbefe03c1, 0x907c9973,
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
- 0xb9714306, 0x8771c171,
- 0xbf840046, 0xbf8a0000,
- 0x8776ff6f, 0x04000000,
- 0xbf840042, 0x8f718671,
- 0x8f718271, 0xbef60371,
- 0xb97a2a05, 0x807a817a,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850002,
- 0x8f7a897a, 0xbf820001,
- 0x8f7a8a7a, 0xb9761e06,
- 0x8f768a76, 0x807a767a,
- 0x807aff7a, 0x00000200,
- 0x807aff7a, 0x00000080,
+ 0xb97b4306, 0x877bc17b,
+ 0xbf840044, 0xbf8a0000,
+ 0x877aff73, 0x04000000,
+ 0xbf840040, 0x8f7b867b,
+ 0x8f7b827b, 0xbef6037b,
+ 0xb9702a05, 0x80708170,
+ 0xbf0d9973, 0xbf850002,
+ 0x8f708970, 0xbf820001,
+ 0x8f708a70, 0xb97a1e06,
+ 0x8f7a8a7a, 0x80707a70,
+ 0x8070ff70, 0x00000200,
+ 0x8070ff70, 0x00000080,
0xbef603ff, 0x01000000,
0xd7650000, 0x000100c1,
0xd7660000, 0x000200c1,
@@ -852,87 +858,86 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf800000, 0xbf800000,
0xbf800000, 0xd8d80000,
0x01000000, 0xbf8c0000,
- 0xe0704000, 0x7a5d0100,
- 0x807c037c, 0x807a037a,
+ 0xe0704000, 0x705d0100,
+ 0x807c037c, 0x80700370,
0xd5250000, 0x0001ff00,
- 0x00000080, 0xbf0a717c,
+ 0x00000080, 0xbf0a7b7c,
0xbf85fff4, 0xbf820011,
0xbe8303ff, 0x00000100,
0xbf800000, 0xbf800000,
0xbf800000, 0xd8d80000,
0x01000000, 0xbf8c0000,
- 0xe0704000, 0x7a5d0100,
- 0x807c037c, 0x807a037a,
+ 0xe0704000, 0x705d0100,
+ 0x807c037c, 0x80700370,
0xd5250000, 0x0001ff00,
- 0x00000100, 0xbf0a717c,
+ 0x00000100, 0xbf0a7b7c,
0xbf85fff4, 0xbefe03c1,
0x907c9973, 0x877c817c,
0xbf06817c, 0xbf850004,
- 0xbefa03ff, 0x00000200,
+ 0xbef003ff, 0x00000200,
0xbeff0380, 0xbf820003,
- 0xbefa03ff, 0x00000400,
- 0xbeff03c1, 0xb9712a05,
- 0x80718171, 0x8f718271,
+ 0xbef003ff, 0x00000400,
+ 0xbeff03c1, 0xb97b2a05,
+ 0x807b817b, 0x8f7b827b,
0x907c9973, 0x877c817c,
0xbf06817c, 0xbf850017,
0xbef603ff, 0x01000000,
- 0xbefc0384, 0xbf0a717c,
+ 0xbefc0384, 0xbf0a7b7c,
0xbf840037, 0x7e008700,
0x7e028701, 0x7e048702,
0x7e068703, 0xe0704000,
- 0x7a5d0000, 0xe0704080,
- 0x7a5d0100, 0xe0704100,
- 0x7a5d0200, 0xe0704180,
- 0x7a5d0300, 0x807c847c,
- 0x807aff7a, 0x00000200,
- 0xbf0a717c, 0xbf85ffef,
+ 0x705d0000, 0xe0704080,
+ 0x705d0100, 0xe0704100,
+ 0x705d0200, 0xe0704180,
+ 0x705d0300, 0x807c847c,
+ 0x8070ff70, 0x00000200,
+ 0xbf0a7b7c, 0xbf85ffef,
0xbf820025, 0xbef603ff,
0x01000000, 0xbefc0384,
- 0xbf0a717c, 0xbf840020,
+ 0xbf0a7b7c, 0xbf840020,
0x7e008700, 0x7e028701,
0x7e048702, 0x7e068703,
- 0xe0704000, 0x7a5d0000,
- 0xe0704100, 0x7a5d0100,
- 0xe0704200, 0x7a5d0200,
- 0xe0704300, 0x7a5d0300,
- 0x807c847c, 0x807aff7a,
- 0x00000400, 0xbf0a717c,
- 0xbf85ffef, 0xb9711e06,
- 0x8771c171, 0xbf84000c,
- 0x8f718371, 0x80717c71,
+ 0xe0704000, 0x705d0000,
+ 0xe0704100, 0x705d0100,
+ 0xe0704200, 0x705d0200,
+ 0xe0704300, 0x705d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xb97b1e06,
+ 0x877bc17b, 0xbf84000c,
+ 0x8f7b837b, 0x807b7c7b,
0xbefe03c1, 0xbeff0380,
0x7e008700, 0xe0704000,
- 0x7a5d0000, 0x807c817c,
- 0x807aff7a, 0x00000080,
- 0xbf0a717c, 0xbf85fff8,
- 0xbf820142, 0xbef4037e,
+ 0x705d0000, 0x807c817c,
+ 0x8070ff70, 0x00000080,
+ 0xbf0a7b7c, 0xbf85fff8,
+ 0xbf82014f, 0xbef4037e,
0x8775ff7f, 0x0000ffff,
0x8875ff75, 0x00040000,
0xbef60380, 0xbef703ff,
- 0x10807fac, 0x8772ff7f,
- 0x08000000, 0x90728372,
- 0x88777277, 0x8772ff7f,
- 0x70000000, 0x90728172,
- 0x88777277, 0xb97302dc,
- 0x8f739973, 0x8873737f,
- 0x8772ff7f, 0x04000000,
- 0xbf840036, 0xbefe03c1,
- 0x907c9973, 0x877c817c,
+ 0x10807fac, 0x876eff7f,
+ 0x08000000, 0x906e836e,
+ 0x88776e77, 0x876eff7f,
+ 0x70000000, 0x906e816e,
+ 0x88776e77, 0xb97202dc,
+ 0x8f729972, 0x8872727f,
+ 0x876eff7f, 0x04000000,
+ 0xbf840034, 0xbefe03c1,
+ 0x907c9972, 0x877c817c,
0xbf06817c, 0xbf850002,
0xbeff0380, 0xbf820001,
0xbeff03c1, 0xb96f4306,
- 0x876fc16f, 0xbf84002b,
+ 0x876fc16f, 0xbf840029,
0x8f6f866f, 0x8f6f826f,
0xbef6036f, 0xb9782a05,
- 0x80788178, 0x907c9973,
- 0x877c817c, 0xbf06817c,
+ 0x80788178, 0xbf0d9972,
0xbf850002, 0x8f788978,
0xbf820001, 0x8f788a78,
- 0xb9721e06, 0x8f728a72,
- 0x80787278, 0x8078ff78,
+ 0xb96e1e06, 0x8f6e8a6e,
+ 0x80786e78, 0x8078ff78,
0x00000200, 0x8078ff78,
0x00000080, 0xbef603ff,
- 0x01000000, 0x907c9973,
+ 0x01000000, 0x907c9972,
0x877c817c, 0xbf06817c,
0xbefc0380, 0xbf850009,
0xe0310000, 0x781d0000,
@@ -944,15 +949,15 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x00000100, 0x8078ff78,
0x00000100, 0xbf0a6f7c,
0xbf85fff8, 0xbef80380,
- 0xbefe03c1, 0x907c9973,
+ 0xbefe03c1, 0x907c9972,
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
0xb96f2a05, 0x806f816f,
- 0x8f6f826f, 0x907c9973,
+ 0x8f6f826f, 0x907c9972,
0x877c817c, 0xbf06817c,
0xbf850021, 0xbef603ff,
- 0x01000000, 0xbef20378,
+ 0x01000000, 0xbeee0378,
0x8078ff78, 0x00000200,
0xbefc0384, 0xe0304000,
0x785d0000, 0xe0304080,
@@ -964,12 +969,12 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x807c847c, 0x8078ff78,
0x00000200, 0xbf0a6f7c,
0xbf85ffee, 0xe0304000,
- 0x725d0000, 0xe0304080,
- 0x725d0100, 0xe0304100,
- 0x725d0200, 0xe0304180,
- 0x725d0300, 0xbf820032,
+ 0x6e5d0000, 0xe0304080,
+ 0x6e5d0100, 0xe0304100,
+ 0x6e5d0200, 0xe0304180,
+ 0x6e5d0300, 0xbf820032,
0xbef603ff, 0x01000000,
- 0xbef20378, 0x8078ff78,
+ 0xbeee0378, 0x8078ff78,
0x00000400, 0xbefc0384,
0xe0304000, 0x785d0000,
0xe0304100, 0x785d0100,
@@ -989,16 +994,15 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x8078ff78, 0x00000080,
0xbf0a6f7c, 0xbf85fff7,
0xbeff03c1, 0xe0304000,
- 0x725d0000, 0xe0304100,
- 0x725d0100, 0xe0304200,
- 0x725d0200, 0xe0304300,
- 0x725d0300, 0xbf8c3f70,
+ 0x6e5d0000, 0xe0304100,
+ 0x6e5d0100, 0xe0304200,
+ 0x6e5d0200, 0xe0304300,
+ 0x6e5d0300, 0xbf8c3f70,
0xb9782a05, 0x80788178,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850002,
+ 0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
- 0x8f788a78, 0xb9721e06,
- 0x8f728a72, 0x80787278,
+ 0x8f788a78, 0xb96e1e06,
+ 0x8f6e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
0x80f8ff78, 0x00000050,
0xbef603ff, 0x01000000,
@@ -1021,23 +1025,22 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbe8c310c, 0xbe8e310e,
0xbf06807c, 0xbf84fff0,
0xb9782a05, 0x80788178,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850002,
+ 0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
- 0x8f788a78, 0xb9721e06,
- 0x8f728a72, 0x80787278,
+ 0x8f788a78, 0xb96e1e06,
+ 0x8f6e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
0xbef603ff, 0x01000000,
0xf4211bfa, 0xf0000000,
0x80788478, 0xf4211b3a,
0xf0000000, 0x80788478,
0xf4211b7a, 0xf0000000,
- 0x80788478, 0xf4211eba,
- 0xf0000000, 0x80788478,
- 0xf4211efa, 0xf0000000,
0x80788478, 0xf4211c3a,
0xf0000000, 0x80788478,
0xf4211c7a, 0xf0000000,
+ 0x80788478, 0xf4211eba,
+ 0xf0000000, 0x80788478,
+ 0xf4211efa, 0xf0000000,
0x80788478, 0xf4211e7a,
0xf0000000, 0x80788478,
0xf4211cfa, 0xf0000000,
@@ -1046,31 +1049,41 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf8cc07f, 0xb9eef814,
0xf4211bba, 0xf0000000,
0x80788478, 0xbf8cc07f,
- 0xb9eef815, 0xbef2036d,
- 0x876dff72, 0x0000ffff,
- 0xbefc036f, 0xbefe037a,
- 0xbeff037b, 0x876f71ff,
- 0x000003ff, 0xb9ef4803,
- 0xb9f9f816, 0x876f71ff,
- 0xfffff800, 0x906f8b6f,
- 0xb9efa2c3, 0xb9f3f801,
- 0x876fff72, 0xfc000000,
- 0x906f9a6f, 0x8f6f906f,
- 0xbef30380, 0x88736f73,
- 0x876fff72, 0x02000000,
- 0x906f996f, 0x8f6f8f6f,
- 0x88736f73, 0x876fff72,
- 0x01000000, 0x906f986f,
- 0x8f6f996f, 0x88736f73,
- 0x876fff70, 0x00800000,
- 0x906f976f, 0xb9f3f807,
- 0x87fe7e7e, 0x87ea6a6a,
- 0xb9f0f802, 0xbf8a0000,
- 0xbe80226c, 0xbf810000,
+ 0xb9eef815, 0xbefc036f,
+ 0xbefe0370, 0xbeff0371,
+ 0x876f7bff, 0x000003ff,
+ 0xb9ef4803, 0xb9f9f816,
+ 0x876f7bff, 0xfffff800,
+ 0x906f8b6f, 0xb9efa2c3,
+ 0xb9f3f801, 0xb96e2a05,
+ 0x806e816e, 0xbf0d9972,
+ 0xbf850002, 0x8f6e896e,
+ 0xbf820001, 0x8f6e8a6e,
+ 0x806eff6e, 0x00000200,
+ 0x806e746e, 0x826f8075,
+ 0x876fff6f, 0x0000ffff,
+ 0xf4091c37, 0xfa000050,
+ 0xf4091d37, 0xfa000060,
+ 0xf4011e77, 0xfa000074,
+ 0xbf8cc07f, 0x876fff6d,
+ 0xfc000000, 0x906f9a6f,
+ 0x8f6f906f, 0xbeee0380,
+ 0x886e6f6e, 0x876fff6d,
+ 0x02000000, 0x906f996f,
+ 0x8f6f8f6f, 0x886e6f6e,
+ 0x876fff6d, 0x01000000,
+ 0x906f986f, 0x8f6f996f,
+ 0x886e6f6e, 0x876fff7a,
+ 0x00800000, 0x906f976f,
+ 0xb9eef807, 0x876dff6d,
+ 0x0000ffff, 0x87fe7e7e,
+ 0x87ea6a6a, 0xb9faf802,
+ 0xbf8a0000, 0xbe80226c,
+ 0xbf810000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
- 0xbf9f0000, 0x00000000,
};
+
static const uint32_t cwsr_trap_arcturus_hex[] = {
0xbf820001, 0xbf8202c4,
0xb8f8f802, 0x89788678,
@@ -1560,3 +1573,399 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0xbf8a0000, 0x95806f6c,
0xbf810000, 0x00000000,
};
+
+static const uint32_t cwsr_trap_gfx10_hex[] = {
+ 0xbf820001, 0xbf8201cf,
+ 0xb0804004, 0xb978f802,
+ 0x8a788678, 0xb96ef801,
+ 0x876eff6e, 0x00000800,
+ 0xbf840003, 0x876eff78,
+ 0x00002000, 0xbf840009,
+ 0xb97bf803, 0x876eff7b,
+ 0x00000400, 0xbf85001d,
+ 0x876eff7b, 0x00000100,
+ 0xbf840002, 0x8878ff78,
+ 0x00002000, 0xb97af812,
+ 0xb97bf813, 0x8ffa887a,
+ 0xf4051bbd, 0xfa000000,
+ 0xbf8cc07f, 0xf4051ebd,
+ 0xfa000008, 0xbf8cc07f,
+ 0x87ee6e6e, 0xbf840001,
+ 0xbe80206e, 0xb97bf803,
+ 0x877bff7b, 0x000001ff,
+ 0xbf850002, 0x806c846c,
+ 0x826d806d, 0x876dff6d,
+ 0x0000ffff, 0x87fe7e7e,
+ 0x87ea6a6a, 0xb9f8f802,
+ 0xbe80226c, 0x876dff6d,
+ 0x0000ffff, 0xbefa0380,
+ 0xb9fa0283, 0xbeee037e,
+ 0xbeef037f, 0xbefe0480,
+ 0xbf900004, 0xbf8cc07f,
+ 0xb97b02dc, 0x8f7b997b,
+ 0x887b7b7f, 0xb97a2a05,
+ 0x807a817a, 0xbf0d997b,
+ 0xbf850002, 0x8f7a897a,
+ 0xbf820001, 0x8f7a8a7a,
+ 0x877bff7f, 0x0000ffff,
+ 0x807aff7a, 0x00000200,
+ 0x807a7e7a, 0x827b807b,
+ 0xbef4037e, 0x8775ff7f,
+ 0x0000ffff, 0x8875ff75,
+ 0x00040000, 0xbef60380,
+ 0xbef703ff, 0x10807fac,
+ 0x877aff7f, 0x08000000,
+ 0x907a837a, 0x88777a77,
+ 0x877aff7f, 0x70000000,
+ 0x907a817a, 0x88777a77,
+ 0xbef1037c, 0xbef00380,
+ 0xb97302dc, 0x8f739973,
+ 0x8873737f, 0xbefe03c1,
+ 0x907c9973, 0x877c817c,
+ 0xbf06817c, 0xbf850002,
+ 0xbeff0380, 0xbf820002,
+ 0xbeff03c1, 0xbf82000b,
+ 0xbef603ff, 0x01000000,
+ 0xe0704000, 0x705d0000,
+ 0xe0704080, 0x705d0100,
+ 0xe0704100, 0x705d0200,
+ 0xe0704180, 0x705d0300,
+ 0xbf82000a, 0xbef603ff,
+ 0x01000000, 0xe0704000,
+ 0x705d0000, 0xe0704100,
+ 0x705d0100, 0xe0704200,
+ 0x705d0200, 0xe0704300,
+ 0x705d0300, 0xb9702a05,
+ 0x80708170, 0xbf0d9973,
+ 0xbf850002, 0x8f708970,
+ 0xbf820001, 0x8f708a70,
+ 0xb97a1e06, 0x8f7a8a7a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000200, 0xbef603ff,
+ 0x01000000, 0x7e000280,
+ 0x7e020280, 0x7e040280,
+ 0xbefc0380, 0xd7610002,
+ 0x0000f871, 0x807c817c,
+ 0xd7610002, 0x0000f86c,
+ 0x807c817c, 0xd7610002,
+ 0x0000f86d, 0x807c817c,
+ 0xd7610002, 0x0000f86e,
+ 0x807c817c, 0xd7610002,
+ 0x0000f86f, 0x807c817c,
+ 0xd7610002, 0x0000f878,
+ 0x807c817c, 0xb97af803,
+ 0xd7610002, 0x0000f87a,
+ 0x807c817c, 0xd7610002,
+ 0x0000f87b, 0x807c817c,
+ 0xb971f801, 0xd7610002,
+ 0x0000f871, 0x807c817c,
+ 0xb971f814, 0xd7610002,
+ 0x0000f871, 0x807c817c,
+ 0xb971f815, 0xd7610002,
+ 0x0000f871, 0x807c817c,
+ 0xbeff0380, 0xe0704000,
+ 0x705d0200, 0xb9702a05,
+ 0x80708170, 0xbf0d9973,
+ 0xbf850002, 0x8f708970,
+ 0xbf820001, 0x8f708a70,
+ 0xb97a1e06, 0x8f7a8a7a,
+ 0x80707a70, 0xbef603ff,
+ 0x01000000, 0xbef90380,
+ 0xbefc0380, 0xbf800000,
+ 0xbe802f00, 0xbe822f02,
+ 0xbe842f04, 0xbe862f06,
+ 0xbe882f08, 0xbe8a2f0a,
+ 0xbe8c2f0c, 0xbe8e2f0e,
+ 0xd7610002, 0x0000f200,
+ 0x80798179, 0xd7610002,
+ 0x0000f201, 0x80798179,
+ 0xd7610002, 0x0000f202,
+ 0x80798179, 0xd7610002,
+ 0x0000f203, 0x80798179,
+ 0xd7610002, 0x0000f204,
+ 0x80798179, 0xd7610002,
+ 0x0000f205, 0x80798179,
+ 0xd7610002, 0x0000f206,
+ 0x80798179, 0xd7610002,
+ 0x0000f207, 0x80798179,
+ 0xd7610002, 0x0000f208,
+ 0x80798179, 0xd7610002,
+ 0x0000f209, 0x80798179,
+ 0xd7610002, 0x0000f20a,
+ 0x80798179, 0xd7610002,
+ 0x0000f20b, 0x80798179,
+ 0xd7610002, 0x0000f20c,
+ 0x80798179, 0xd7610002,
+ 0x0000f20d, 0x80798179,
+ 0xd7610002, 0x0000f20e,
+ 0x80798179, 0xd7610002,
+ 0x0000f20f, 0x80798179,
+ 0xbf06a079, 0xbf840006,
+ 0xe0704000, 0x705d0200,
+ 0x8070ff70, 0x00000080,
+ 0xbef90380, 0x7e040280,
+ 0x807c907c, 0xbf0aff7c,
+ 0x00000060, 0xbf85ffbc,
+ 0xbe802f00, 0xbe822f02,
+ 0xbe842f04, 0xbe862f06,
+ 0xbe882f08, 0xbe8a2f0a,
+ 0xd7610002, 0x0000f200,
+ 0x80798179, 0xd7610002,
+ 0x0000f201, 0x80798179,
+ 0xd7610002, 0x0000f202,
+ 0x80798179, 0xd7610002,
+ 0x0000f203, 0x80798179,
+ 0xd7610002, 0x0000f204,
+ 0x80798179, 0xd7610002,
+ 0x0000f205, 0x80798179,
+ 0xd7610002, 0x0000f206,
+ 0x80798179, 0xd7610002,
+ 0x0000f207, 0x80798179,
+ 0xd7610002, 0x0000f208,
+ 0x80798179, 0xd7610002,
+ 0x0000f209, 0x80798179,
+ 0xd7610002, 0x0000f20a,
+ 0x80798179, 0xd7610002,
+ 0x0000f20b, 0x80798179,
+ 0xe0704000, 0x705d0200,
+ 0xbefe03c1, 0x907c9973,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850002, 0xbeff0380,
+ 0xbf820001, 0xbeff03c1,
+ 0xb97b4306, 0x877bc17b,
+ 0xbf840044, 0xbf8a0000,
+ 0x877aff73, 0x04000000,
+ 0xbf840040, 0x8f7b867b,
+ 0x8f7b827b, 0xbef6037b,
+ 0xb9702a05, 0x80708170,
+ 0xbf0d9973, 0xbf850002,
+ 0x8f708970, 0xbf820001,
+ 0x8f708a70, 0xb97a1e06,
+ 0x8f7a8a7a, 0x80707a70,
+ 0x8070ff70, 0x00000200,
+ 0x8070ff70, 0x00000080,
+ 0xbef603ff, 0x01000000,
+ 0xd7650000, 0x000100c1,
+ 0xd7660000, 0x000200c1,
+ 0x16000084, 0x907c9973,
+ 0x877c817c, 0xbf06817c,
+ 0xbefc0380, 0xbf850012,
+ 0xbe8303ff, 0x00000080,
+ 0xbf800000, 0xbf800000,
+ 0xbf800000, 0xd8d80000,
+ 0x01000000, 0xbf8c0000,
+ 0xe0704000, 0x705d0100,
+ 0x807c037c, 0x80700370,
+ 0xd5250000, 0x0001ff00,
+ 0x00000080, 0xbf0a7b7c,
+ 0xbf85fff4, 0xbf820011,
+ 0xbe8303ff, 0x00000100,
+ 0xbf800000, 0xbf800000,
+ 0xbf800000, 0xd8d80000,
+ 0x01000000, 0xbf8c0000,
+ 0xe0704000, 0x705d0100,
+ 0x807c037c, 0x80700370,
+ 0xd5250000, 0x0001ff00,
+ 0x00000100, 0xbf0a7b7c,
+ 0xbf85fff4, 0xbefe03c1,
+ 0x907c9973, 0x877c817c,
+ 0xbf06817c, 0xbf850004,
+ 0xbef003ff, 0x00000200,
+ 0xbeff0380, 0xbf820003,
+ 0xbef003ff, 0x00000400,
+ 0xbeff03c1, 0xb97b2a05,
+ 0x807b817b, 0x8f7b827b,
+ 0x907c9973, 0x877c817c,
+ 0xbf06817c, 0xbf850017,
+ 0xbef603ff, 0x01000000,
+ 0xbefc0384, 0xbf0a7b7c,
+ 0xbf840037, 0x7e008700,
+ 0x7e028701, 0x7e048702,
+ 0x7e068703, 0xe0704000,
+ 0x705d0000, 0xe0704080,
+ 0x705d0100, 0xe0704100,
+ 0x705d0200, 0xe0704180,
+ 0x705d0300, 0x807c847c,
+ 0x8070ff70, 0x00000200,
+ 0xbf0a7b7c, 0xbf85ffef,
+ 0xbf820025, 0xbef603ff,
+ 0x01000000, 0xbefc0384,
+ 0xbf0a7b7c, 0xbf840020,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
+ 0xe0704000, 0x705d0000,
+ 0xe0704100, 0x705d0100,
+ 0xe0704200, 0x705d0200,
+ 0xe0704300, 0x705d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xb97b1e06,
+ 0x877bc17b, 0xbf84000c,
+ 0x8f7b837b, 0x807b7c7b,
+ 0xbefe03c1, 0xbeff0380,
+ 0x7e008700, 0xe0704000,
+ 0x705d0000, 0x807c817c,
+ 0x8070ff70, 0x00000080,
+ 0xbf0a7b7c, 0xbf85fff8,
+ 0xbf82013a, 0xbef4037e,
+ 0x8775ff7f, 0x0000ffff,
+ 0x8875ff75, 0x00040000,
+ 0xbef60380, 0xbef703ff,
+ 0x10807fac, 0x876eff7f,
+ 0x08000000, 0x906e836e,
+ 0x88776e77, 0x876eff7f,
+ 0x70000000, 0x906e816e,
+ 0x88776e77, 0xb97202dc,
+ 0x8f729972, 0x8872727f,
+ 0x876eff7f, 0x04000000,
+ 0xbf840034, 0xbefe03c1,
+ 0x907c9972, 0x877c817c,
+ 0xbf06817c, 0xbf850002,
+ 0xbeff0380, 0xbf820001,
+ 0xbeff03c1, 0xb96f4306,
+ 0x876fc16f, 0xbf840029,
+ 0x8f6f866f, 0x8f6f826f,
+ 0xbef6036f, 0xb9782a05,
+ 0x80788178, 0xbf0d9972,
+ 0xbf850002, 0x8f788978,
+ 0xbf820001, 0x8f788a78,
+ 0xb96e1e06, 0x8f6e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x8078ff78,
+ 0x00000080, 0xbef603ff,
+ 0x01000000, 0x907c9972,
+ 0x877c817c, 0xbf06817c,
+ 0xbefc0380, 0xbf850009,
+ 0xe0310000, 0x781d0000,
+ 0x807cff7c, 0x00000080,
+ 0x8078ff78, 0x00000080,
+ 0xbf0a6f7c, 0xbf85fff8,
+ 0xbf820008, 0xe0310000,
+ 0x781d0000, 0x807cff7c,
+ 0x00000100, 0x8078ff78,
+ 0x00000100, 0xbf0a6f7c,
+ 0xbf85fff8, 0xbef80380,
+ 0xbefe03c1, 0x907c9972,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850002, 0xbeff0380,
+ 0xbf820001, 0xbeff03c1,
+ 0xb96f2a05, 0x806f816f,
+ 0x8f6f826f, 0x907c9972,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850021, 0xbef603ff,
+ 0x01000000, 0xbeee0378,
+ 0x8078ff78, 0x00000200,
+ 0xbefc0384, 0xe0304000,
+ 0x785d0000, 0xe0304080,
+ 0x785d0100, 0xe0304100,
+ 0x785d0200, 0xe0304180,
+ 0x785d0300, 0xbf8c3f70,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807c847c, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7c,
+ 0xbf85ffee, 0xe0304000,
+ 0x6e5d0000, 0xe0304080,
+ 0x6e5d0100, 0xe0304100,
+ 0x6e5d0200, 0xe0304180,
+ 0x6e5d0300, 0xbf820032,
+ 0xbef603ff, 0x01000000,
+ 0xbeee0378, 0x8078ff78,
+ 0x00000400, 0xbefc0384,
+ 0xe0304000, 0x785d0000,
+ 0xe0304100, 0x785d0100,
+ 0xe0304200, 0x785d0200,
+ 0xe0304300, 0x785d0300,
+ 0xbf8c3f70, 0x7e008500,
+ 0x7e028501, 0x7e048502,
+ 0x7e068503, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffee,
+ 0xb96f1e06, 0x876fc16f,
+ 0xbf84000e, 0x8f6f836f,
+ 0x806f7c6f, 0xbefe03c1,
+ 0xbeff0380, 0xe0304000,
+ 0x785d0000, 0xbf8c3f70,
+ 0x7e008500, 0x807c817c,
+ 0x8078ff78, 0x00000080,
+ 0xbf0a6f7c, 0xbf85fff7,
+ 0xbeff03c1, 0xe0304000,
+ 0x6e5d0000, 0xe0304100,
+ 0x6e5d0100, 0xe0304200,
+ 0x6e5d0200, 0xe0304300,
+ 0x6e5d0300, 0xbf8c3f70,
+ 0xb9782a05, 0x80788178,
+ 0xbf0d9972, 0xbf850002,
+ 0x8f788978, 0xbf820001,
+ 0x8f788a78, 0xb96e1e06,
+ 0x8f6e8a6e, 0x80786e78,
+ 0x8078ff78, 0x00000200,
+ 0x80f8ff78, 0x00000050,
+ 0xbef603ff, 0x01000000,
+ 0xbefc03ff, 0x0000006c,
+ 0x80f89078, 0xf429003a,
+ 0xf0000000, 0xbf8cc07f,
+ 0x80fc847c, 0xbf800000,
+ 0xbe803100, 0xbe823102,
+ 0x80f8a078, 0xf42d003a,
+ 0xf0000000, 0xbf8cc07f,
+ 0x80fc887c, 0xbf800000,
+ 0xbe803100, 0xbe823102,
+ 0xbe843104, 0xbe863106,
+ 0x80f8c078, 0xf431003a,
+ 0xf0000000, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe803100, 0xbe823102,
+ 0xbe843104, 0xbe863106,
+ 0xbe883108, 0xbe8a310a,
+ 0xbe8c310c, 0xbe8e310e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb9782a05, 0x80788178,
+ 0xbf0d9972, 0xbf850002,
+ 0x8f788978, 0xbf820001,
+ 0x8f788a78, 0xb96e1e06,
+ 0x8f6e8a6e, 0x80786e78,
+ 0x8078ff78, 0x00000200,
+ 0xbef603ff, 0x01000000,
+ 0xf4211bfa, 0xf0000000,
+ 0x80788478, 0xf4211b3a,
+ 0xf0000000, 0x80788478,
+ 0xf4211b7a, 0xf0000000,
+ 0x80788478, 0xf4211c3a,
+ 0xf0000000, 0x80788478,
+ 0xf4211c7a, 0xf0000000,
+ 0x80788478, 0xf4211eba,
+ 0xf0000000, 0x80788478,
+ 0xf4211efa, 0xf0000000,
+ 0x80788478, 0xf4211e7a,
+ 0xf0000000, 0x80788478,
+ 0xf4211cfa, 0xf0000000,
+ 0x80788478, 0xf4211bba,
+ 0xf0000000, 0x80788478,
+ 0xbf8cc07f, 0xb9eef814,
+ 0xf4211bba, 0xf0000000,
+ 0x80788478, 0xbf8cc07f,
+ 0xb9eef815, 0xbefc036f,
+ 0xbefe0370, 0xbeff0371,
+ 0x876f7bff, 0x000003ff,
+ 0xb9ef4803, 0x876f7bff,
+ 0xfffff800, 0x906f8b6f,
+ 0xb9efa2c3, 0xb9f3f801,
+ 0xb96e2a05, 0x806e816e,
+ 0xbf0d9972, 0xbf850002,
+ 0x8f6e896e, 0xbf820001,
+ 0x8f6e8a6e, 0x806eff6e,
+ 0x00000200, 0x806e746e,
+ 0x826f8075, 0x876fff6f,
+ 0x0000ffff, 0xf4091c37,
+ 0xfa000050, 0xf4091d37,
+ 0xfa000060, 0xf4011e77,
+ 0xfa000074, 0xbf8cc07f,
+ 0x876dff6d, 0x0000ffff,
+ 0x87fe7e7e, 0x87ea6a6a,
+ 0xb9faf802, 0xbf8a0000,
+ 0xbe80226c, 0xbf810000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0x00000000,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index 4433bda2ce25..5b220f2a7501 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -20,6 +20,21 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
+/* To compile this assembly code:
+ *
+ * Navi1x:
+ * cpp -DASIC_TARGET_NAVI1X=1 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3
+ * sp3-nv1x nv1x.sp3 -hex nv1x.hex
+ *
+ * Others:
+ * cpp -DASIC_TARGET_NAVI1X=0 cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3
+ * sp3-gfx10 gfx10.sp3 -hex gfx10.hex
+ */
+
+#define NO_SQC_STORE !ASIC_TARGET_NAVI1X
+
+var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
+
var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
@@ -59,6 +74,8 @@ var SQ_WAVE_IB_STS_RCNT_SIZE = 6
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x003F8000
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF
+var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800
+
var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24
var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27
@@ -96,17 +113,19 @@ var s_save_pc_hi = ttmp1
var s_save_exec_lo = ttmp2
var s_save_exec_hi = ttmp3
var s_save_status = ttmp12
-var s_save_trapsts = ttmp5
-var s_save_xnack_mask = ttmp6
+var s_save_trapsts = ttmp15
+var s_save_xnack_mask = s_save_trapsts
var s_wave_size = ttmp7
var s_save_buf_rsrc0 = ttmp8
var s_save_buf_rsrc1 = ttmp9
var s_save_buf_rsrc2 = ttmp10
var s_save_buf_rsrc3 = ttmp11
-var s_save_mem_offset = ttmp14
+var s_save_mem_offset = ttmp4
var s_save_alloc_size = s_save_trapsts
-var s_save_tmp = s_save_buf_rsrc2
-var s_save_m0 = ttmp15
+var s_save_tmp = ttmp14
+var s_save_m0 = ttmp5
+var s_save_ttmps_lo = s_save_tmp
+var s_save_ttmps_hi = s_save_trapsts
var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
@@ -128,23 +147,25 @@ var s_restore_spi_init_lo = exec_lo
var s_restore_spi_init_hi = exec_hi
var s_restore_mem_offset = ttmp12
var s_restore_alloc_size = ttmp3
-var s_restore_tmp = ttmp6
+var s_restore_tmp = ttmp2
var s_restore_mem_offset_save = s_restore_tmp
var s_restore_m0 = s_restore_alloc_size
var s_restore_mode = ttmp7
-var s_restore_flat_scratch = ttmp2
+var s_restore_flat_scratch = s_restore_tmp
var s_restore_pc_lo = ttmp0
var s_restore_pc_hi = ttmp1
-var s_restore_exec_lo = ttmp14
-var s_restore_exec_hi = ttmp15
-var s_restore_status = ttmp4
-var s_restore_trapsts = ttmp5
+var s_restore_exec_lo = ttmp4
+var s_restore_exec_hi = ttmp5
+var s_restore_status = ttmp14
+var s_restore_trapsts = ttmp15
var s_restore_xnack_mask = ttmp13
var s_restore_buf_rsrc0 = ttmp8
var s_restore_buf_rsrc1 = ttmp9
var s_restore_buf_rsrc2 = ttmp10
var s_restore_buf_rsrc3 = ttmp11
-var s_restore_size = ttmp7
+var s_restore_size = ttmp6
+var s_restore_ttmps_lo = s_restore_tmp
+var s_restore_ttmps_hi = s_restore_alloc_size
shader main
asic(DEFAULT)
@@ -159,6 +180,24 @@ L_JUMP_TO_RESTORE:
L_SKIP_RESTORE:
s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK
+
+if SINGLE_STEP_MISSED_WORKAROUND
+ // No single step exceptions if MODE.DEBUG_EN=0.
+ s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
+ s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND
+
+ // Second-level trap already handled exception if STATUS.HALT=1.
+ s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+
+ // Prioritize single step exception over context save.
+ // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
+ s_cbranch_scc0 L_FETCH_2ND_TRAP
+
+L_NO_SINGLE_STEP_WORKAROUND:
+end
+
+
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
s_cbranch_scc1 L_SAVE
@@ -170,6 +209,8 @@ L_SKIP_RESTORE:
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
L_FETCH_2ND_TRAP:
+
+#if ASIC_TARGET_NAVI1X
// Preserve and clear scalar XNACK state before issuing scalar loads.
// Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into
// unused space ttmp11[31:24].
@@ -183,6 +224,7 @@ L_FETCH_2ND_TRAP:
s_or_b32 ttmp11, ttmp11, ttmp3
s_andn2_b32 ttmp2, ttmp2, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK)
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+#endif
// Read second-level TBA/TMA from first-level TMA and jump if available.
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
@@ -207,6 +249,7 @@ L_NO_NEXT_TRAP:
L_EXCP_CASE:
s_and_b32 ttmp1, ttmp1, 0xFFFF
+#if ASIC_TARGET_NAVI1X
// Restore SQ_WAVE_IB_STS.
s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
@@ -214,6 +257,7 @@ L_EXCP_CASE:
s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
s_or_b32 ttmp2, ttmp2, ttmp3
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+#endif
// Restore SQ_WAVE_STATUS.
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
@@ -223,23 +267,11 @@ L_EXCP_CASE:
s_rfe_b64 [ttmp0, ttmp1]
L_SAVE:
- //check whether there is mem_viol
- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
- s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
- s_cbranch_scc0 L_NO_PC_REWIND
-
- //if so, need rewind PC assuming GDS operation gets NACKed
- s_mov_b32 s_save_tmp, 0
- s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit
s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
- s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8
- s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0
-
-L_NO_PC_REWIND:
s_mov_b32 s_save_tmp, 0
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
- s_getreg_b32 s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK)
+#if ASIC_TARGET_NAVI1X
s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE)
s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
@@ -253,6 +285,7 @@ L_NO_PC_REWIND:
s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp
+#endif
/* inform SPI the readiness and wait for SPI's go signal */
s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
@@ -261,12 +294,31 @@ L_NO_PC_REWIND:
s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
+#if ASIC_TARGET_NAVI1X
L_SLEEP:
// sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause
// SQ hang, since the 7,8th wave could not get arbit to exec inst, while
// other waves are stuck into the sleep-loop and waiting for wrexec!=0
s_sleep 0x2
s_cbranch_execz L_SLEEP
+#else
+ s_waitcnt lgkmcnt(0)
+#endif
+
+ // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+ get_wave_size(s_save_ttmps_hi)
+ get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi)
+ s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes()
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
+ s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0
+
+#if ASIC_TARGET_NAVI1X
+ s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
+ s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
+ s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
+#endif
/* setup Resource Contants */
s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo
@@ -285,9 +337,45 @@ L_SLEEP:
/* global mem offset */
s_mov_b32 s_save_mem_offset, 0x0
- s_getreg_b32 s_wave_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
- s_lshl_b32 s_wave_size, s_wave_size, S_WAVE_SIZE
- s_or_b32 s_wave_size, s_save_spi_init_hi, s_wave_size //share s_wave_size with exec_hi, it's at bit25
+ get_wave_size(s_wave_size)
+
+#if ASIC_TARGET_NAVI1X
+ // Save and clear vector XNACK state late to free up SGPRs.
+ s_getreg_b32 s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK)
+ s_setreg_imm32_b32 hwreg(HW_REG_SHADER_XNACK_MASK), 0x0
+#endif
+
+ /* save first 4 VGPRs, needed for SGPR save */
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_SAVE_4VGPR_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_SAVE_4VGPR_WAVE32
+L_ENABLE_SAVE_4VGPR_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+ s_branch L_SAVE_4VGPR_WAVE64
+L_SAVE_4VGPR_WAVE32:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3
+ s_branch L_SAVE_HWREG
+
+L_SAVE_4VGPR_WAVE64:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
/* save HW registers */
@@ -300,6 +388,13 @@ L_SAVE_HWREG:
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+#if NO_SQC_STORE
+ v_mov_b32 v0, 0x0 //Offset[31:0] from buffer resource
+ v_mov_b32 v1, 0x0 //Offset[63:32] from buffer resource
+ v_mov_b32 v2, 0x0 //Set of SGPRs for TCP store
+ s_mov_b32 m0, 0x0 //Next lane of v2 to write to
+#endif
+
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
@@ -307,8 +402,10 @@ L_SAVE_HWREG:
write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset)
- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
- write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset)
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_TRAPSTS)
+ write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
+
+ // Not used on Sienna_Cichlid but keep layout same for debugger.
write_hwreg_to_mem(s_save_xnack_mask, s_save_buf_rsrc0, s_save_mem_offset)
s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE)
@@ -320,10 +417,11 @@ L_SAVE_HWREG:
s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI)
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
- /* the first wave in the threadgroup */
- s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
- s_mov_b32 s_save_exec_hi, 0x0
- s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26]
+#if NO_SQC_STORE
+ // Write HWREG/SGPRs with 32 VGPR lanes, wave32 is common case.
+ s_mov_b32 exec_hi, 0x0
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+#endif
/* save SGPRs */
// Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
@@ -334,10 +432,14 @@ L_SAVE_HWREG:
s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+#if NO_SQC_STORE
+ s_mov_b32 ttmp13, 0x0 //next VGPR lane to copy SGPR into
+#else
// backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0
s_mov_b32 s_save_xnack_mask, s_save_buf_rsrc0
s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset
s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0
+#endif
s_mov_b32 m0, 0x0 //SGPR initial index value =0
s_nop 0x0 //Manually inserted wait states
@@ -353,6 +455,18 @@ L_SAVE_SGPR_LOOP:
s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0]
write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset)
+
+#if NO_SQC_STORE
+ s_cmp_eq_u32 ttmp13, 0x20 //have 32 VGPR lanes filled?
+ s_cbranch_scc0 L_SAVE_SGPR_SKIP_TCP_STORE
+
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80
+ s_mov_b32 ttmp13, 0x0
+ v_mov_b32 v2, 0x0
+L_SAVE_SGPR_SKIP_TCP_STORE:
+#endif
+
s_add_u32 m0, m0, 16 //next sgpr index
s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0
s_cbranch_scc1 L_SAVE_SGPR_LOOP //first 96 SGPR save is complete?
@@ -366,43 +480,12 @@ L_SAVE_SGPR_LOOP:
s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
write_12sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset)
+#if NO_SQC_STORE
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+#else
// restore s_save_buf_rsrc0,1
s_mov_b32 s_save_buf_rsrc0, s_save_xnack_mask
-
- /* save first 4 VGPR, then LDS save could use */
- // each wave will alloc 4 vgprs at least...
-
- s_mov_b32 s_save_mem_offset, 0
- s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
- s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
- s_and_b32 m0, m0, 1
- s_cmp_eq_u32 m0, 1
- s_cbranch_scc1 L_ENABLE_SAVE_4VGPR_EXEC_HI
- s_mov_b32 exec_hi, 0x00000000
- s_branch L_SAVE_4VGPR_WAVE32
-L_ENABLE_SAVE_4VGPR_EXEC_HI:
- s_mov_b32 exec_hi, 0xFFFFFFFF
- s_branch L_SAVE_4VGPR_WAVE64
-L_SAVE_4VGPR_WAVE32:
- s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
-
- // VGPR Allocated in 4-GPR granularity
-
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2
- buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3
- s_branch L_SAVE_LDS
-
-L_SAVE_4VGPR_WAVE64:
- s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
-
- // VGPR Allocated in 4-GPR granularity
-
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
- buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+#endif
/* save LDS */
@@ -423,7 +506,7 @@ L_SAVE_LDS_NORMAL:
s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
s_barrier //LDS is used? wait for other waves in the same TG
- s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
+ s_and_b32 s_save_tmp, s_wave_size, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
s_cbranch_scc0 L_SAVE_LDS_DONE
// first wave do LDS save;
@@ -598,9 +681,7 @@ L_RESTORE:
s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)
s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE
//determine it is wave32 or wave64
- s_getreg_b32 s_restore_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
- s_lshl_b32 s_restore_size, s_restore_size, S_WAVE_SIZE
- s_or_b32 s_restore_size, s_restore_spi_init_hi, s_restore_size
+ get_wave_size(s_restore_size)
s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
s_cbranch_scc0 L_RESTORE_VGPR
@@ -634,7 +715,7 @@ L_RESTORE_LDS_NORMAL:
s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
- s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
s_and_b32 m0, m0, 1
s_cmp_eq_u32 m0, 1
s_mov_b32 m0, 0x0
@@ -842,38 +923,55 @@ L_RESTORE_HWREG:
s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch
- s_mov_b32 s_restore_tmp, s_restore_pc_hi
- s_and_b32 s_restore_pc_hi, s_restore_tmp, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
-
s_mov_b32 m0, s_restore_m0
s_mov_b32 exec_lo, s_restore_exec_lo
s_mov_b32 exec_hi, s_restore_exec_hi
s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
+
+#if ASIC_TARGET_NAVI1X
s_setreg_b32 hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask
+#endif
+
s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts
s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0
s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
- s_and_b32 s_restore_m0, s_restore_tmp, S_SAVE_PC_HI_RCNT_MASK
+
+ // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+ get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size)
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes()
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
+ s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
+ s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
+ s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1
+ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1
+ s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
+ s_waitcnt lgkmcnt(0)
+
+#if ASIC_TARGET_NAVI1X
+ s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
- s_mov_b32 s_restore_mode, 0x0
- s_or_b32 s_restore_mode, s_restore_mode, s_restore_m0
- s_and_b32 s_restore_m0, s_restore_tmp, S_SAVE_PC_HI_FIRST_REPLAY_MASK
+ s_mov_b32 s_restore_tmp, 0x0
+ s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
+ s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
- s_or_b32 s_restore_mode, s_restore_mode, s_restore_m0
- s_and_b32 s_restore_m0, s_restore_tmp, S_SAVE_PC_HI_REPLAY_W64H_MASK
+ s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
+ s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_REPLAY_W64H_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_REPLAY_W64H_SHIFT
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT
- s_or_b32 s_restore_mode, s_restore_mode, s_restore_m0
+ s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
- s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_mode
+ s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
+#endif
+ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
@@ -887,32 +985,53 @@ L_END_PGM:
end
function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
+#if NO_SQC_STORE
+ // Copy into VGPR for later TCP store.
+ v_writelane_b32 v2, s, m0
+ s_add_u32 m0, m0, 0x1
+#else
s_mov_b32 exec_lo, m0
s_mov_b32 m0, s_mem_offset
s_buffer_store_dword s, s_rsrc, m0 glc:1
s_add_u32 s_mem_offset, s_mem_offset, 4
s_mov_b32 m0, exec_lo
+#endif
end
function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
+#if NO_SQC_STORE
+ // Copy into VGPR for later TCP store.
+ for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++
+ v_writelane_b32 v2, s[sgpr_idx], ttmp13
+ s_add_u32 ttmp13, ttmp13, 0x1
+ end
+#else
s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
s_add_u32 s_rsrc[0], s_rsrc[0], 4*16
s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0
+#endif
end
function write_12sgpr_to_mem(s, s_rsrc, s_mem_offset)
+#if NO_SQC_STORE
+ // Copy into VGPR for later TCP store.
+ for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++
+ v_writelane_b32 v2, s[sgpr_idx], ttmp13
+ s_add_u32 ttmp13, ttmp13, 0x1
+ end
+#else
s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
s_add_u32 s_rsrc[0], s_rsrc[0], 4*12
s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0
+#endif
end
-
function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1
s_add_u32 s_mem_offset, s_mem_offset, 4
@@ -942,9 +1061,7 @@ end
function get_vgpr_size_bytes(s_vgpr_size_byte, s_size)
s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
- s_lshr_b32 m0, s_size, S_WAVE_SIZE
- s_and_b32 m0, m0, 1
- s_cmp_eq_u32 m0, 1
+ s_bitcmp1_b32 s_size, S_WAVE_SIZE
s_cbranch_scc1 L_ENABLE_SHIFT_W64
s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+7) //Number of VGPRs = (vgpr_size + 1) * 4 * 32 * 4 (non-zero value)
s_branch L_SHIFT_DONE
@@ -965,3 +1082,9 @@ end
function get_hwreg_size_bytes
return 128
end
+
+function get_wave_size(s_reg)
+ s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
+ s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
+ s_or_b32 s_reg, s_save_spi_init_hi, s_reg //share with exec_hi, it's at bit25
+end
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index cf0017f4d9d5..e9b96ad3d9a5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -39,6 +39,7 @@
#include "kfd_device_queue_manager.h"
#include "kfd_dbgmgr.h"
#include "amdgpu_amdkfd.h"
+#include "kfd_smi_events.h"
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
@@ -1740,6 +1741,20 @@ err_unlock:
return r;
}
+/* Handle requests for watching SMI events */
+static int kfd_ioctl_smi_events(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_smi_events_args *args = data;
+ struct kfd_dev *dev;
+
+ dev = kfd_device_by_id(args->gpuid);
+ if (!dev)
+ return -EINVAL;
+
+ return kfd_smi_event_open(dev, &args->anon_fd);
+}
+
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
.cmd_drv = 0, .name = #ioctl}
@@ -1835,6 +1850,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
kfd_ioctl_alloc_queue_gws, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
+ kfd_ioctl_smi_events, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 1009a3b8dcc2..6a250f8fcfb8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -678,6 +678,8 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
+ case CHIP_SIENNA_CICHLID:
+ case CHIP_NAVY_FLOUNDER:
pcache_info = navi10_cache_info;
num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 0491ab2b4a9b..4bfedaab183f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -46,6 +46,7 @@ extern const struct kfd2kgd_calls gfx_v8_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v9_kfd2kgd;
extern const struct kfd2kgd_calls arcturus_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
+extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
static const struct kfd2kgd_calls *kfd2kgd_funcs[] = {
#ifdef KFD_SUPPORT_IOMMU_V2
@@ -72,6 +73,8 @@ static const struct kfd2kgd_calls *kfd2kgd_funcs[] = {
[CHIP_NAVI10] = &gfx_v10_kfd2kgd,
[CHIP_NAVI12] = &gfx_v10_kfd2kgd,
[CHIP_NAVI14] = &gfx_v10_kfd2kgd,
+ [CHIP_SIENNA_CICHLID] = &gfx_v10_3_kfd2kgd,
+ [CHIP_NAVY_FLOUNDER] = &gfx_v10_3_kfd2kgd,
};
#ifdef KFD_SUPPORT_IOMMU_V2
@@ -458,6 +461,42 @@ static const struct kfd_device_info navi14_device_info = {
.num_sdma_queues_per_engine = 8,
};
+static const struct kfd_device_info sienna_cichlid_device_info = {
+ .asic_family = CHIP_SIENNA_CICHLID,
+ .asic_name = "sienna_cichlid",
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 8,
+ .ih_ring_entry_size = 8 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_v9,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .needs_iommu_device = false,
+ .supports_cwsr = true,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 4,
+ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 8,
+};
+
+static const struct kfd_device_info navy_flounder_device_info = {
+ .asic_family = CHIP_NAVY_FLOUNDER,
+ .asic_name = "navy_flounder",
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 8,
+ .ih_ring_entry_size = 8 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_v9,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .needs_iommu_device = false,
+ .supports_cwsr = true,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 8,
+};
+
/* For each entry, [0] is regular and [1] is virtualisation device. */
static const struct kfd_device_info *kfd_supported_devices[][2] = {
#ifdef KFD_SUPPORT_IOMMU_V2
@@ -480,6 +519,8 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = {
[CHIP_NAVI10] = {&navi10_device_info, NULL},
[CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info},
[CHIP_NAVI14] = {&navi14_device_info, NULL},
+ [CHIP_SIENNA_CICHLID] = {&sienna_cichlid_device_info, &sienna_cichlid_device_info},
+ [CHIP_NAVY_FLOUNDER] = {&navy_flounder_device_info, &navy_flounder_device_info},
};
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
@@ -559,6 +600,10 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx9_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
+ } else if (kfd->device_info->asic_family < CHIP_SIENNA_CICHLID) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_nv1x_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
} else {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx10_hex;
@@ -577,15 +622,24 @@ static int kfd_gws_init(struct kfd_dev *kfd)
return 0;
if (hws_gws_support
- || (kfd->device_info->asic_family >= CHIP_VEGA10
+ || (kfd->device_info->asic_family == CHIP_VEGA10
+ && kfd->mec2_fw_version >= 0x81b3)
+ || (kfd->device_info->asic_family >= CHIP_VEGA12
&& kfd->device_info->asic_family <= CHIP_RAVEN
- && kfd->mec2_fw_version >= 0x1b3))
+ && kfd->mec2_fw_version >= 0x1b3)
+ || (kfd->device_info->asic_family == CHIP_ARCTURUS
+ && kfd->mec2_fw_version >= 0x30))
ret = amdgpu_amdkfd_alloc_gws(kfd->kgd,
amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws);
return ret;
}
+static void kfd_smi_init(struct kfd_dev *dev) {
+ INIT_LIST_HEAD(&dev->smi_clients);
+ spin_lock_init(&dev->smi_lock);
+}
+
bool kgd2kfd_device_init(struct kfd_dev *kfd,
struct drm_device *ddev,
const struct kgd2kfd_shared_resources *gpu_resources)
@@ -700,6 +754,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto kfd_topology_add_device_error;
}
+ kfd_smi_init(kfd);
+
kfd->init_complete = true;
dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
kfd->pdev->device);
@@ -910,6 +966,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm)
if (!p)
return -ESRCH;
+ WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
r = kfd_process_evict_queues(p);
kfd_unref_process(p);
@@ -977,6 +1034,8 @@ int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
/* During process initialization eviction_work.dwork is initialized
* to kfd_evict_bo_worker
*/
+ WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies",
+ p->lead_thread->pid, delay_jiffies);
schedule_delayed_work(&p->eviction_work, delay_jiffies);
out:
kfd_unref_process(p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e9c4867abeff..e0e60b0d0669 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -137,7 +137,7 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
qpd->sh_mem_bases);
}
-void increment_queue_count(struct device_queue_manager *dqm,
+static void increment_queue_count(struct device_queue_manager *dqm,
enum kfd_queue_type type)
{
dqm->active_queue_count++;
@@ -145,7 +145,7 @@ void increment_queue_count(struct device_queue_manager *dqm,
dqm->active_cp_queue_count++;
}
-void decrement_queue_count(struct device_queue_manager *dqm,
+static void decrement_queue_count(struct device_queue_manager *dqm,
enum kfd_queue_type type)
{
dqm->active_queue_count--;
@@ -153,6 +153,30 @@ void decrement_queue_count(struct device_queue_manager *dqm,
dqm->active_cp_queue_count--;
}
+int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val)
+{
+ int ret;
+ uint64_t tmp = 0;
+
+ if (!val)
+ return -EINVAL;
+ /*
+ * SDMA activity counter is stored at queue's RPTR + 0x8 location.
+ */
+ if (!access_ok((const void __user *)(q_rptr +
+ sizeof(uint64_t)), sizeof(uint64_t))) {
+ pr_err("Can't access sdma queue activity counter\n");
+ return -EFAULT;
+ }
+
+ ret = get_user(tmp, (uint64_t *)(q_rptr + sizeof(uint64_t)));
+ if (!ret) {
+ *val = tmp;
+ }
+
+ return ret;
+}
+
static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
{
struct kfd_dev *dev = qpd->dqm->dev;
@@ -487,6 +511,7 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
if (retval == -ETIME)
qpd->reset_wavefronts = true;
+
mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
list_del(&q->list);
@@ -521,9 +546,23 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q)
{
int retval;
+ uint64_t sdma_val = 0;
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+
+ /* Get the SDMA queue stats */
+ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+ (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+ retval = read_sdma_queue_counter((uint64_t)q->properties.read_ptr,
+ &sdma_val);
+ if (retval)
+ pr_err("Failed to read SDMA queue counter for queue: %d\n",
+ q->properties.queue_id);
+ }
dqm_lock(dqm);
retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
+ if (!retval)
+ pdd->sdma_past_activity_counter += sdma_val;
dqm_unlock(dqm);
return retval;
@@ -1428,6 +1467,18 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
{
int retval;
struct mqd_manager *mqd_mgr;
+ uint64_t sdma_val = 0;
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+
+ /* Get the SDMA queue stats */
+ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+ (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+ retval = read_sdma_queue_counter((uint64_t)q->properties.read_ptr,
+ &sdma_val);
+ if (retval)
+ pr_err("Failed to read SDMA queue counter for queue: %d\n",
+ q->properties.queue_id);
+ }
retval = 0;
@@ -1449,10 +1500,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
deallocate_doorbell(qpd, q);
- if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
- deallocate_sdma_queue(dqm, q);
- else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+ (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
deallocate_sdma_queue(dqm, q);
+ pdd->sdma_past_activity_counter += sdma_val;
+ }
list_del(&q->list);
qpd->queue_count--;
@@ -1886,6 +1938,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
+ case CHIP_SIENNA_CICHLID:
+ case CHIP_NAVY_FLOUNDER:
device_queue_manager_init_v10_navi10(&dqm->asic_ops);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 4afa015c69b1..49d8e324c636 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -251,4 +251,5 @@ static inline void dqm_unlock(struct device_queue_manager *dqm)
mutex_unlock(&dqm->lock_hidden);
}
+int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val);
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 78714f9a8b11..c1166c40ac15 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -415,6 +415,8 @@ int kfd_init_apertures(struct kfd_process *process)
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
+ case CHIP_SIENNA_CICHLID:
+ case CHIP_NAVY_FLOUNDER:
kfd_init_apertures_v9(pdd, id);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index fce6ccabe38b..241bd6ff79f4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -24,6 +24,7 @@
#include "kfd_events.h"
#include "soc15_int.h"
#include "kfd_device_queue_manager.h"
+#include "kfd_smi_events.h"
static bool event_interrupt_isr_v9(struct kfd_dev *dev,
const uint32_t *ih_ring_entry,
@@ -117,6 +118,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
info.prot_read = ring_id & 0x10;
info.prot_write = ring_id & 0x20;
+ kfd_smi_event_update_vmfault(dev, pasid);
kfd_process_vm_fault(dev->dqm, pasid);
kfd_signal_vm_fault_event(dev, pasid, &info);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index d1d68a51bfb8..18e08d82d978 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -113,7 +113,7 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
- 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+ 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
if (q->format == KFD_QUEUE_FORMAT_AQL) {
m->cp_hqd_aql_control =
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 48cda3073b70..3b6f5963180d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -160,7 +160,7 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
- 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+ 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
if (q->format == KFD_QUEUE_FORMAT_AQL) {
m->cp_hqd_aql_control =
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index a5e8ff1e5945..31799e5f3b3c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -117,7 +117,7 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
- 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+ 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
set_priority(m, q);
m->cp_hqd_eop_rptr = 1 << CP_HQD_EOP_RPTR__INIT_FETCHER__SHIFT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 685ca82d42fe..47ee40fbbd86 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -245,6 +245,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
+ case CHIP_SIENNA_CICHLID:
+ case CHIP_NAVY_FLOUNDER:
pm->pmf = &kfd_v9_pm_funcs;
break;
default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index bdca9dc5f118..dfaf771a42e6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -39,7 +39,7 @@ static int pm_map_process_v9(struct packet_manager *pm,
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
sizeof(struct pm4_mes_map_process));
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
- packet->bitfields2.process_quantum = 1;
+ packet->bitfields2.process_quantum = 10;
packet->bitfields2.pasid = qpd->pqm->process->pasid;
packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
index bed4d0ccb6b1..a852e0d7d804 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
@@ -50,7 +50,7 @@ static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
sizeof(struct pm4_mes_map_process));
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
- packet->bitfields2.process_quantum = 1;
+ packet->bitfields2.process_quantum = 10;
packet->bitfields2.pasid = qpd->pqm->process->pasid;
packet->bitfields3.page_table_base = qpd->page_table_base;
packet->bitfields10.gds_size = qpd->gds_size;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
index 33b08ff00b50..2a07c4f2cd0d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
@@ -25,7 +25,7 @@
#include "amdgpu_ids.h"
static unsigned int pasid_bits = 16;
-static const struct kfd2kgd_calls *kfd2kgd;
+static bool pasids_allocated; /* = false */
bool kfd_set_pasid_limit(unsigned int new_limit)
{
@@ -33,7 +33,7 @@ bool kfd_set_pasid_limit(unsigned int new_limit)
return false;
if (new_limit < (1U << pasid_bits)) {
- if (kfd2kgd)
+ if (pasids_allocated)
/* We've already allocated user PASIDs, too late to
* change the limit
*/
@@ -53,32 +53,17 @@ unsigned int kfd_get_pasid_limit(void)
unsigned int kfd_pasid_alloc(void)
{
- int r;
+ int r = amdgpu_pasid_alloc(pasid_bits);
- /* Find the first best KFD device for calling KGD */
- if (!kfd2kgd) {
- struct kfd_dev *dev = NULL;
- unsigned int i = 0;
-
- while ((kfd_topology_enum_kfd_devices(i, &dev)) == 0) {
- if (dev && dev->kfd2kgd) {
- kfd2kgd = dev->kfd2kgd;
- break;
- }
- i++;
- }
-
- if (!kfd2kgd)
- return false;
+ if (r > 0) {
+ pasids_allocated = true;
+ return r;
}
- r = amdgpu_pasid_alloc(pasid_bits);
-
- return r > 0 ? r : 0;
+ return 0;
}
void kfd_pasid_free(unsigned int pasid)
{
- if (kfd2kgd)
- amdgpu_pasid_free(pasid);
+ amdgpu_pasid_free(pasid);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index fee60921fccf..6727e9de5b8b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -97,7 +97,7 @@
* Size of the per-process TBA+TMA buffer: 2 pages
*
* The first page is the TBA used for the CWSR ISA code. The second
- * page is used as TMA for daisy changing a user-mode trap handler.
+ * page is used as TMA for user-mode trap handler setup in daisy-chain mode.
*/
#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
#define KFD_CWSR_TMA_OFFSET PAGE_SIZE
@@ -157,26 +157,21 @@ extern int debug_largebar;
*/
extern int ignore_crat;
-/*
- * Set sh_mem_config.retry_disable on Vega10
- */
+/* Set sh_mem_config.retry_disable on GFX v9 */
extern int amdgpu_noretry;
-/*
- * Halt if HWS hang is detected
- */
+/* Halt if HWS hang is detected */
extern int halt_if_hws_hang;
-/*
- * Whether MEC FW support GWS barriers
- */
+/* Whether MEC FW support GWS barriers */
extern bool hws_gws_support;
-/*
- * Queue preemption timeout in ms
- */
+/* Queue preemption timeout in ms */
extern int queue_preemption_timeout_ms;
+/* Enable eviction debug messages */
+extern bool debug_evictions;
+
enum cache_policy {
cache_policy_coherent,
cache_policy_noncoherent
@@ -296,7 +291,7 @@ struct kfd_dev {
/* xGMI */
uint64_t hive_id;
-
+
/* UUID */
uint64_t unique_id;
@@ -308,8 +303,12 @@ struct kfd_dev {
/* Compute Profile ref. count */
atomic_t compute_profile;
- /* Global GWS resource shared b/t processes*/
+ /* Global GWS resource shared between processes */
void *gws;
+
+ /* Clients watching SMI events */
+ struct list_head smi_clients;
+ spinlock_t smi_lock;
};
enum kfd_mempool {
@@ -324,7 +323,7 @@ void kfd_chardev_exit(void);
struct device *kfd_chardev(void);
/**
- * enum kfd_unmap_queues_filter
+ * enum kfd_unmap_queues_filter - Enum for queue filters.
*
* @KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: Preempts single queue.
*
@@ -343,15 +342,17 @@ enum kfd_unmap_queues_filter {
};
/**
- * enum kfd_queue_type
+ * enum kfd_queue_type - Enum for various queue types.
*
* @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type.
*
- * @KFD_QUEUE_TYPE_SDMA: Sdma user mode queue type.
+ * @KFD_QUEUE_TYPE_SDMA: SDMA user mode queue type.
*
* @KFD_QUEUE_TYPE_HIQ: HIQ queue type.
*
* @KFD_QUEUE_TYPE_DIQ: DIQ queue type.
+ *
+ * @KFD_QUEUE_TYPE_SDMA_XGMI: Special SDMA queue for XGMI interface.
*/
enum kfd_queue_type {
KFD_QUEUE_TYPE_COMPUTE,
@@ -397,9 +398,9 @@ enum KFD_QUEUE_PRIORITY {
*
* @write_ptr: Defines the number of dwords written to the ring buffer.
*
- * @doorbell_ptr: This field aim is to notify the H/W of new packet written to
- * the queue ring buffer. This field should be similar to write_ptr and the
- * user should update this field after he updated the write_ptr.
+ * @doorbell_ptr: Notifies the H/W of new packet written to the queue ring
+ * buffer. This field should be similar to write_ptr and the user should
+ * update this field after updating the write_ptr.
*
* @doorbell_off: The doorbell offset in the doorbell pci-bar.
*
@@ -468,7 +469,7 @@ struct queue_properties {
*
* @list: Queue linked list.
*
- * @mqd: The queue MQD.
+ * @mqd: The queue MQD (memory queue descriptor).
*
* @mqd_mem_obj: The MQD local gpu memory object.
*
@@ -477,7 +478,7 @@ struct queue_properties {
* @properties: The queue properties.
*
* @mec: Used only in no cp scheduling mode and identifies to micro engine id
- * that the queue should be execute on.
+ * that the queue should be executed on.
*
* @pipe: Used only in no cp scheduling mode and identifies the queue's pipe
* id.
@@ -518,9 +519,6 @@ struct queue {
struct kobject kobj;
};
-/*
- * Please read the kfd_mqd_manager.h description.
- */
enum KFD_MQD_TYPE {
KFD_MQD_TYPE_HIQ = 0, /* for hiq */
KFD_MQD_TYPE_CP, /* for cp queues and diq */
@@ -578,9 +576,7 @@ struct qcm_process_device {
*/
bool mapped_gws_queue;
- /*
- * All the memory management data should be here too
- */
+ /* All the memory management data should be here too */
uint64_t gds_context_area;
/* Contains page table flags such as AMDGPU_PTE_VALID since gfx9 */
uint64_t page_table_base;
@@ -630,7 +626,14 @@ enum kfd_pdd_bound {
PDD_BOUND_SUSPENDED,
};
-#define MAX_VRAM_FILENAME_LEN 11
+#define MAX_SYSFS_FILENAME_LEN 11
+
+/*
+ * SDMA counter runs at 100MHz frequency.
+ * We display SDMA activity in microsecond granularity in sysfs.
+ * As a result, the divisor is 100.
+ */
+#define SDMA_ACTIVITY_DIVISOR 100
/* Data that is per-process-per device. */
struct kfd_process_device {
@@ -678,7 +681,12 @@ struct kfd_process_device {
/* VRAM usage */
uint64_t vram_usage;
struct attribute attr_vram;
- char vram_filename[MAX_VRAM_FILENAME_LEN];
+ char vram_filename[MAX_SYSFS_FILENAME_LEN];
+
+ /* SDMA activity tracking */
+ uint64_t sdma_past_activity_counter;
+ struct attribute attr_sdma;
+ char sdma_filename[MAX_SYSFS_FILENAME_LEN];
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -768,11 +776,13 @@ extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
extern struct srcu_struct kfd_processes_srcu;
/**
- * Ioctl function type.
+ * typedef amdkfd_ioctl_t - typedef for ioctl function pointer.
+ *
+ * @filep: pointer to file structure.
+ * @p: amdkfd process pointer.
+ * @data: pointer to arg that was copied from user.
*
- * \param filep pointer to file structure.
- * \param p amdkfd process pointer.
- * \param data pointer to arg that was copied from user.
+ * Return: returns ioctl completion code.
*/
typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p,
void *data);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 0e0c42e9f6a3..40695d52e9a8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -25,6 +25,7 @@
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
+#include <linux/mmu_context.h>
#include <linux/slab.h>
#include <linux/amd-iommu.h>
#include <linux/notifier.h>
@@ -76,6 +77,178 @@ struct kfd_procfs_tree {
static struct kfd_procfs_tree procfs;
+/*
+ * Structure for SDMA activity tracking
+ */
+struct kfd_sdma_activity_handler_workarea {
+ struct work_struct sdma_activity_work;
+ struct kfd_process_device *pdd;
+ uint64_t sdma_activity_counter;
+};
+
+struct temp_sdma_queue_list {
+ uint64_t rptr;
+ uint64_t sdma_val;
+ unsigned int queue_id;
+ struct list_head list;
+};
+
+static void kfd_sdma_activity_worker(struct work_struct *work)
+{
+ struct kfd_sdma_activity_handler_workarea *workarea;
+ struct kfd_process_device *pdd;
+ uint64_t val;
+ struct mm_struct *mm;
+ struct queue *q;
+ struct qcm_process_device *qpd;
+ struct device_queue_manager *dqm;
+ int ret = 0;
+ struct temp_sdma_queue_list sdma_q_list;
+ struct temp_sdma_queue_list *sdma_q, *next;
+
+ workarea = container_of(work, struct kfd_sdma_activity_handler_workarea,
+ sdma_activity_work);
+ if (!workarea)
+ return;
+
+ pdd = workarea->pdd;
+ if (!pdd)
+ return;
+ dqm = pdd->dev->dqm;
+ qpd = &pdd->qpd;
+ if (!dqm || !qpd)
+ return;
+ /*
+ * Total SDMA activity is current SDMA activity + past SDMA activity
+ * Past SDMA count is stored in pdd.
+ * To get the current activity counters for all active SDMA queues,
+ * we loop over all SDMA queues and get their counts from user-space.
+ *
+ * We cannot call get_user() with dqm_lock held as it can cause
+ * a circular lock dependency situation. To read the SDMA stats,
+ * we need to do the following:
+ *
+ * 1. Create a temporary list of SDMA queue nodes from the qpd->queues_list,
+ * with dqm_lock/dqm_unlock().
+ * 2. Call get_user() for each node in temporary list without dqm_lock.
+ * Save the SDMA count for each node and also add the count to the total
+ * SDMA count counter.
+ * Its possible, during this step, a few SDMA queue nodes got deleted
+ * from the qpd->queues_list.
+ * 3. Do a second pass over qpd->queues_list to check if any nodes got deleted.
+ * If any node got deleted, its SDMA count would be captured in the sdma
+ * past activity counter. So subtract the SDMA counter stored in step 2
+ * for this node from the total SDMA count.
+ */
+ INIT_LIST_HEAD(&sdma_q_list.list);
+
+ /*
+ * Create the temp list of all SDMA queues
+ */
+ dqm_lock(dqm);
+
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
+ (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
+ continue;
+
+ sdma_q = kzalloc(sizeof(struct temp_sdma_queue_list), GFP_KERNEL);
+ if (!sdma_q) {
+ dqm_unlock(dqm);
+ goto cleanup;
+ }
+
+ INIT_LIST_HEAD(&sdma_q->list);
+ sdma_q->rptr = (uint64_t)q->properties.read_ptr;
+ sdma_q->queue_id = q->properties.queue_id;
+ list_add_tail(&sdma_q->list, &sdma_q_list.list);
+ }
+
+ /*
+ * If the temp list is empty, then no SDMA queues nodes were found in
+ * qpd->queues_list. Return the past activity count as the total sdma
+ * count
+ */
+ if (list_empty(&sdma_q_list.list)) {
+ workarea->sdma_activity_counter = pdd->sdma_past_activity_counter;
+ dqm_unlock(dqm);
+ return;
+ }
+
+ dqm_unlock(dqm);
+
+ /*
+ * Get the usage count for each SDMA queue in temp_list.
+ */
+ mm = get_task_mm(pdd->process->lead_thread);
+ if (!mm)
+ goto cleanup;
+
+ kthread_use_mm(mm);
+
+ list_for_each_entry(sdma_q, &sdma_q_list.list, list) {
+ val = 0;
+ ret = read_sdma_queue_counter(sdma_q->rptr, &val);
+ if (ret) {
+ pr_debug("Failed to read SDMA queue active counter for queue id: %d",
+ sdma_q->queue_id);
+ } else {
+ sdma_q->sdma_val = val;
+ workarea->sdma_activity_counter += val;
+ }
+ }
+
+ kthread_unuse_mm(mm);
+ mmput(mm);
+
+ /*
+ * Do a second iteration over qpd_queues_list to check if any SDMA
+ * nodes got deleted while fetching SDMA counter.
+ */
+ dqm_lock(dqm);
+
+ workarea->sdma_activity_counter += pdd->sdma_past_activity_counter;
+
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (list_empty(&sdma_q_list.list))
+ break;
+
+ if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
+ (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
+ continue;
+
+ list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
+ if (((uint64_t)q->properties.read_ptr == sdma_q->rptr) &&
+ (sdma_q->queue_id == q->properties.queue_id)) {
+ list_del(&sdma_q->list);
+ kfree(sdma_q);
+ break;
+ }
+ }
+ }
+
+ dqm_unlock(dqm);
+
+ /*
+ * If temp list is not empty, it implies some queues got deleted
+ * from qpd->queues_list during SDMA usage read. Subtract the SDMA
+ * count for each node from the total SDMA count.
+ */
+ list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
+ workarea->sdma_activity_counter -= sdma_q->sdma_val;
+ list_del(&sdma_q->list);
+ kfree(sdma_q);
+ }
+
+ return;
+
+cleanup:
+ list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
+ list_del(&sdma_q->list);
+ kfree(sdma_q);
+ }
+}
+
static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
@@ -87,8 +260,24 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
} else if (strncmp(attr->name, "vram_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_vram);
- if (pdd)
- return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
+ return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
+ } else if (strncmp(attr->name, "sdma_", 5) == 0) {
+ struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
+ attr_sdma);
+ struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler;
+
+ INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
+ kfd_sdma_activity_worker);
+
+ sdma_activity_work_handler.pdd = pdd;
+
+ schedule_work(&sdma_activity_work_handler.sdma_activity_work);
+
+ flush_work(&sdma_activity_work_handler.sdma_activity_work);
+
+ return snprintf(buffer, PAGE_SIZE, "%llu\n",
+ (sdma_activity_work_handler.sdma_activity_counter)/
+ SDMA_ACTIVITY_DIVISOR);
} else {
pr_err("Invalid attribute");
return -EINVAL;
@@ -210,7 +399,24 @@ int kfd_procfs_add_queue(struct queue *q)
return 0;
}
-int kfd_procfs_add_vram_usage(struct kfd_process *p)
+static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr,
+ char *name)
+{
+ int ret = 0;
+
+ if (!p || !attr || !name)
+ return -EINVAL;
+
+ attr->name = name;
+ attr->mode = KFD_SYSFS_FILE_MODE;
+ sysfs_attr_init(attr);
+
+ ret = sysfs_create_file(p->kobj, attr);
+
+ return ret;
+}
+
+static int kfd_procfs_add_sysfs_files(struct kfd_process *p)
{
int ret = 0;
struct kfd_process_device *pdd;
@@ -221,17 +427,25 @@ int kfd_procfs_add_vram_usage(struct kfd_process *p)
if (!p->kobj)
return -EFAULT;
- /* Create proc/<pid>/vram_<gpuid> file for each GPU */
+ /*
+ * Create sysfs files for each GPU:
+ * - proc/<pid>/vram_<gpuid>
+ * - proc/<pid>/sdma_<gpuid>
+ */
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
- snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u",
+ snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u",
pdd->dev->id);
- pdd->attr_vram.name = pdd->vram_filename;
- pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE;
- sysfs_attr_init(&pdd->attr_vram);
- ret = sysfs_create_file(p->kobj, &pdd->attr_vram);
+ ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename);
if (ret)
pr_warn("Creating vram usage for gpu id %d failed",
(int)pdd->dev->id);
+
+ snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u",
+ pdd->dev->id);
+ ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, pdd->sdma_filename);
+ if (ret)
+ pr_warn("Creating sdma usage for gpu id %d failed",
+ (int)pdd->dev->id);
}
return ret;
@@ -445,9 +659,9 @@ struct kfd_process *kfd_create_process(struct file *filep)
if (!process->kobj_queues)
pr_warn("Creating KFD proc/queues folder failed");
- ret = kfd_procfs_add_vram_usage(process);
+ ret = kfd_procfs_add_sysfs_files(process);
if (ret)
- pr_warn("Creating vram usage file for pid %d failed",
+ pr_warn("Creating sysfs usage file for pid %d failed",
(int)process->lead_thread->pid);
}
out:
@@ -598,8 +812,10 @@ static void kfd_process_wq_release(struct work_struct *work)
kobject_put(p->kobj_queues);
p->kobj_queues = NULL;
- list_for_each_entry(pdd, &p->per_device_data, per_device_list)
+ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
sysfs_remove_file(p->kobj, &pdd->attr_vram);
+ sysfs_remove_file(p->kobj, &pdd->attr_sdma);
+ }
kobject_del(p->kobj);
kobject_put(p->kobj);
@@ -907,6 +1123,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
pdd->already_dequeued = false;
pdd->runtime_inuse = false;
pdd->vram_usage = 0;
+ pdd->sdma_past_activity_counter = 0;
list_add(&pdd->per_device_list, &p->per_device_data);
/* Init idr used for memory handle translation */
@@ -1003,8 +1220,10 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
*/
if (!pdd->runtime_inuse) {
err = pm_runtime_get_sync(dev->ddev->dev);
- if (err < 0)
+ if (err < 0) {
+ pm_runtime_put_autosuspend(dev->ddev->dev);
return ERR_PTR(err);
+ }
}
err = kfd_iommu_bind_process_to_device(pdd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
new file mode 100644
index 000000000000..7b348bf9df21
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/poll.h>
+#include <linux/wait.h>
+#include <linux/anon_inodes.h>
+#include <uapi/linux/kfd_ioctl.h>
+#include "amdgpu_vm.h"
+#include "kfd_priv.h"
+#include "kfd_smi_events.h"
+
+struct kfd_smi_client {
+ struct list_head list;
+ struct kfifo fifo;
+ wait_queue_head_t wait_queue;
+ /* events enabled */
+ uint64_t events;
+ struct kfd_dev *dev;
+ spinlock_t lock;
+};
+
+#define MAX_KFIFO_SIZE 1024
+
+static __poll_t kfd_smi_ev_poll(struct file *, struct poll_table_struct *);
+static ssize_t kfd_smi_ev_read(struct file *, char __user *, size_t, loff_t *);
+static ssize_t kfd_smi_ev_write(struct file *, const char __user *, size_t,
+ loff_t *);
+static int kfd_smi_ev_release(struct inode *, struct file *);
+
+static const char kfd_smi_name[] = "kfd_smi_ev";
+
+static const struct file_operations kfd_smi_ev_fops = {
+ .owner = THIS_MODULE,
+ .poll = kfd_smi_ev_poll,
+ .read = kfd_smi_ev_read,
+ .write = kfd_smi_ev_write,
+ .release = kfd_smi_ev_release
+};
+
+static __poll_t kfd_smi_ev_poll(struct file *filep,
+ struct poll_table_struct *wait)
+{
+ struct kfd_smi_client *client = filep->private_data;
+ __poll_t mask = 0;
+
+ poll_wait(filep, &client->wait_queue, wait);
+
+ spin_lock(&client->lock);
+ if (!kfifo_is_empty(&client->fifo))
+ mask = EPOLLIN | EPOLLRDNORM;
+ spin_unlock(&client->lock);
+
+ return mask;
+}
+
+static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
+ size_t size, loff_t *offset)
+{
+ int ret;
+ size_t to_copy;
+ struct kfd_smi_client *client = filep->private_data;
+ unsigned char *buf;
+
+ buf = kmalloc(MAX_KFIFO_SIZE * sizeof(*buf), GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ /* kfifo_to_user can sleep so we can't use spinlock protection around
+ * it. Instead, we kfifo out as spinlocked then copy them to the user.
+ */
+ spin_lock(&client->lock);
+ to_copy = kfifo_len(&client->fifo);
+ if (!to_copy) {
+ spin_unlock(&client->lock);
+ ret = -EAGAIN;
+ goto ret_err;
+ }
+ to_copy = min3(size, sizeof(buf), to_copy);
+ ret = kfifo_out(&client->fifo, buf, to_copy);
+ spin_unlock(&client->lock);
+ if (ret <= 0) {
+ ret = -EAGAIN;
+ goto ret_err;
+ }
+
+ ret = copy_to_user(user, buf, to_copy);
+ if (ret) {
+ ret = -EFAULT;
+ goto ret_err;
+ }
+
+ kfree(buf);
+ return to_copy;
+
+ret_err:
+ kfree(buf);
+ return ret;
+}
+
+static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user,
+ size_t size, loff_t *offset)
+{
+ struct kfd_smi_client *client = filep->private_data;
+ uint64_t events;
+
+ if (!access_ok(user, size) || size < sizeof(events))
+ return -EFAULT;
+ if (copy_from_user(&events, user, sizeof(events)))
+ return -EFAULT;
+
+ WRITE_ONCE(client->events, events);
+
+ return sizeof(events);
+}
+
+static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
+{
+ struct kfd_smi_client *client = filep->private_data;
+ struct kfd_dev *dev = client->dev;
+
+ spin_lock(&dev->smi_lock);
+ list_del_rcu(&client->list);
+ spin_unlock(&dev->smi_lock);
+
+ synchronize_rcu();
+ kfifo_free(&client->fifo);
+ kfree(client);
+
+ return 0;
+}
+
+void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
+ struct amdgpu_task_info task_info;
+ /* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */
+ /* 16 bytes event + 1 byte space + 25 bytes msg + 1 byte \n = 43
+ */
+ char fifo_in[43];
+ struct kfd_smi_client *client;
+ int len;
+
+ if (list_empty(&dev->smi_clients))
+ return;
+
+ memset(&task_info, 0, sizeof(struct amdgpu_task_info));
+ amdgpu_vm_get_task_info(adev, pasid, &task_info);
+ /* Report VM faults from user applications, not retry from kernel */
+ if (!task_info.pid)
+ return;
+
+ len = snprintf(fifo_in, 43, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
+ task_info.pid, task_info.task_name);
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(client, &dev->smi_clients, list) {
+ if (!(READ_ONCE(client->events) & KFD_SMI_EVENT_VMFAULT))
+ continue;
+ spin_lock(&client->lock);
+ if (kfifo_avail(&client->fifo) >= len) {
+ kfifo_in(&client->fifo, fifo_in, len);
+ wake_up_all(&client->wait_queue);
+ }
+ else
+ pr_debug("smi_event(vmfault): no space left\n");
+ spin_unlock(&client->lock);
+ }
+
+ rcu_read_unlock();
+}
+
+int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
+{
+ struct kfd_smi_client *client;
+ int ret;
+
+ client = kzalloc(sizeof(struct kfd_smi_client), GFP_KERNEL);
+ if (!client)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&client->list);
+
+ ret = kfifo_alloc(&client->fifo, MAX_KFIFO_SIZE, GFP_KERNEL);
+ if (ret) {
+ kfree(client);
+ return ret;
+ }
+
+ ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client,
+ O_RDWR);
+ if (ret < 0) {
+ kfifo_free(&client->fifo);
+ kfree(client);
+ return ret;
+ }
+ *fd = ret;
+
+ init_waitqueue_head(&client->wait_queue);
+ spin_lock_init(&client->lock);
+ client->events = 0;
+ client->dev = dev;
+
+ spin_lock(&dev->smi_lock);
+ list_add_rcu(&client->list, &dev->smi_clients);
+ spin_unlock(&dev->smi_lock);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
new file mode 100644
index 000000000000..a9cb218fef96
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KFD_SMI_EVENTS_H_INCLUDED
+#define KFD_SMI_EVENTS_H_INCLUDED
+
+int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
+void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index bb77f7af2b6d..f185f6cbc05c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -210,39 +210,41 @@ struct kfd_topology_device *kfd_create_topology_device(
}
-#define sysfs_show_gen_prop(buffer, fmt, ...) \
- snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__)
-#define sysfs_show_32bit_prop(buffer, name, value) \
- sysfs_show_gen_prop(buffer, "%s %u\n", name, value)
-#define sysfs_show_64bit_prop(buffer, name, value) \
- sysfs_show_gen_prop(buffer, "%s %llu\n", name, value)
-#define sysfs_show_32bit_val(buffer, value) \
- sysfs_show_gen_prop(buffer, "%u\n", value)
-#define sysfs_show_str_val(buffer, value) \
- sysfs_show_gen_prop(buffer, "%s\n", value)
+#define sysfs_show_gen_prop(buffer, offs, fmt, ...) \
+ (offs += snprintf(buffer+offs, PAGE_SIZE-offs, \
+ fmt, __VA_ARGS__))
+#define sysfs_show_32bit_prop(buffer, offs, name, value) \
+ sysfs_show_gen_prop(buffer, offs, "%s %u\n", name, value)
+#define sysfs_show_64bit_prop(buffer, offs, name, value) \
+ sysfs_show_gen_prop(buffer, offs, "%s %llu\n", name, value)
+#define sysfs_show_32bit_val(buffer, offs, value) \
+ sysfs_show_gen_prop(buffer, offs, "%u\n", value)
+#define sysfs_show_str_val(buffer, offs, value) \
+ sysfs_show_gen_prop(buffer, offs, "%s\n", value)
static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
- ssize_t ret;
+ int offs = 0;
/* Making sure that the buffer is an empty string */
buffer[0] = 0;
if (attr == &sys_props.attr_genid) {
- ret = sysfs_show_32bit_val(buffer, sys_props.generation_count);
+ sysfs_show_32bit_val(buffer, offs,
+ sys_props.generation_count);
} else if (attr == &sys_props.attr_props) {
- sysfs_show_64bit_prop(buffer, "platform_oem",
- sys_props.platform_oem);
- sysfs_show_64bit_prop(buffer, "platform_id",
- sys_props.platform_id);
- ret = sysfs_show_64bit_prop(buffer, "platform_rev",
- sys_props.platform_rev);
+ sysfs_show_64bit_prop(buffer, offs, "platform_oem",
+ sys_props.platform_oem);
+ sysfs_show_64bit_prop(buffer, offs, "platform_id",
+ sys_props.platform_id);
+ sysfs_show_64bit_prop(buffer, offs, "platform_rev",
+ sys_props.platform_rev);
} else {
- ret = -EINVAL;
+ offs = -EINVAL;
}
- return ret;
+ return offs;
}
static void kfd_topology_kobj_release(struct kobject *kobj)
@@ -262,7 +264,7 @@ static struct kobj_type sysprops_type = {
static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
- ssize_t ret;
+ int offs = 0;
struct kfd_iolink_properties *iolink;
/* Making sure that the buffer is an empty string */
@@ -271,21 +273,23 @@ static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
iolink = container_of(attr, struct kfd_iolink_properties, attr);
if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu))
return -EPERM;
- sysfs_show_32bit_prop(buffer, "type", iolink->iolink_type);
- sysfs_show_32bit_prop(buffer, "version_major", iolink->ver_maj);
- sysfs_show_32bit_prop(buffer, "version_minor", iolink->ver_min);
- sysfs_show_32bit_prop(buffer, "node_from", iolink->node_from);
- sysfs_show_32bit_prop(buffer, "node_to", iolink->node_to);
- sysfs_show_32bit_prop(buffer, "weight", iolink->weight);
- sysfs_show_32bit_prop(buffer, "min_latency", iolink->min_latency);
- sysfs_show_32bit_prop(buffer, "max_latency", iolink->max_latency);
- sysfs_show_32bit_prop(buffer, "min_bandwidth", iolink->min_bandwidth);
- sysfs_show_32bit_prop(buffer, "max_bandwidth", iolink->max_bandwidth);
- sysfs_show_32bit_prop(buffer, "recommended_transfer_size",
- iolink->rec_transfer_size);
- ret = sysfs_show_32bit_prop(buffer, "flags", iolink->flags);
-
- return ret;
+ sysfs_show_32bit_prop(buffer, offs, "type", iolink->iolink_type);
+ sysfs_show_32bit_prop(buffer, offs, "version_major", iolink->ver_maj);
+ sysfs_show_32bit_prop(buffer, offs, "version_minor", iolink->ver_min);
+ sysfs_show_32bit_prop(buffer, offs, "node_from", iolink->node_from);
+ sysfs_show_32bit_prop(buffer, offs, "node_to", iolink->node_to);
+ sysfs_show_32bit_prop(buffer, offs, "weight", iolink->weight);
+ sysfs_show_32bit_prop(buffer, offs, "min_latency", iolink->min_latency);
+ sysfs_show_32bit_prop(buffer, offs, "max_latency", iolink->max_latency);
+ sysfs_show_32bit_prop(buffer, offs, "min_bandwidth",
+ iolink->min_bandwidth);
+ sysfs_show_32bit_prop(buffer, offs, "max_bandwidth",
+ iolink->max_bandwidth);
+ sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size",
+ iolink->rec_transfer_size);
+ sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags);
+
+ return offs;
}
static const struct sysfs_ops iolink_ops = {
@@ -300,7 +304,7 @@ static struct kobj_type iolink_type = {
static ssize_t mem_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
- ssize_t ret;
+ int offs = 0;
struct kfd_mem_properties *mem;
/* Making sure that the buffer is an empty string */
@@ -309,13 +313,15 @@ static ssize_t mem_show(struct kobject *kobj, struct attribute *attr,
mem = container_of(attr, struct kfd_mem_properties, attr);
if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu))
return -EPERM;
- sysfs_show_32bit_prop(buffer, "heap_type", mem->heap_type);
- sysfs_show_64bit_prop(buffer, "size_in_bytes", mem->size_in_bytes);
- sysfs_show_32bit_prop(buffer, "flags", mem->flags);
- sysfs_show_32bit_prop(buffer, "width", mem->width);
- ret = sysfs_show_32bit_prop(buffer, "mem_clk_max", mem->mem_clk_max);
-
- return ret;
+ sysfs_show_32bit_prop(buffer, offs, "heap_type", mem->heap_type);
+ sysfs_show_64bit_prop(buffer, offs, "size_in_bytes",
+ mem->size_in_bytes);
+ sysfs_show_32bit_prop(buffer, offs, "flags", mem->flags);
+ sysfs_show_32bit_prop(buffer, offs, "width", mem->width);
+ sysfs_show_32bit_prop(buffer, offs, "mem_clk_max",
+ mem->mem_clk_max);
+
+ return offs;
}
static const struct sysfs_ops mem_ops = {
@@ -330,7 +336,7 @@ static struct kobj_type mem_type = {
static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
- ssize_t ret;
+ int offs = 0;
uint32_t i, j;
struct kfd_cache_properties *cache;
@@ -340,30 +346,27 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
cache = container_of(attr, struct kfd_cache_properties, attr);
if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
return -EPERM;
- sysfs_show_32bit_prop(buffer, "processor_id_low",
+ sysfs_show_32bit_prop(buffer, offs, "processor_id_low",
cache->processor_id_low);
- sysfs_show_32bit_prop(buffer, "level", cache->cache_level);
- sysfs_show_32bit_prop(buffer, "size", cache->cache_size);
- sysfs_show_32bit_prop(buffer, "cache_line_size", cache->cacheline_size);
- sysfs_show_32bit_prop(buffer, "cache_lines_per_tag",
- cache->cachelines_per_tag);
- sysfs_show_32bit_prop(buffer, "association", cache->cache_assoc);
- sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency);
- sysfs_show_32bit_prop(buffer, "type", cache->cache_type);
- snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer);
+ sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level);
+ sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size);
+ sysfs_show_32bit_prop(buffer, offs, "cache_line_size",
+ cache->cacheline_size);
+ sysfs_show_32bit_prop(buffer, offs, "cache_lines_per_tag",
+ cache->cachelines_per_tag);
+ sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);
+ sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);
+ sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);
+ offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
- for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) {
+ for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
/* Check each bit */
- if (cache->sibling_map[i] & (1 << j))
- ret = snprintf(buffer, PAGE_SIZE,
- "%s%d%s", buffer, 1, ",");
- else
- ret = snprintf(buffer, PAGE_SIZE,
- "%s%d%s", buffer, 0, ",");
- }
+ offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
+ (cache->sibling_map[i] >> j) & 1);
+
/* Replace the last "," with end of line */
- *(buffer + strlen(buffer) - 1) = 0xA;
- return ret;
+ buffer[offs-1] = '\n';
+ return offs;
}
static const struct sysfs_ops cache_ops = {
@@ -385,6 +388,7 @@ struct kfd_perf_attr {
static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs,
char *buf)
{
+ int offs = 0;
struct kfd_perf_attr *attr;
buf[0] = 0;
@@ -392,7 +396,7 @@ static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs,
if (!attr->data) /* invalid data for PMC */
return 0;
else
- return sysfs_show_32bit_val(buf, attr->data);
+ return sysfs_show_32bit_val(buf, offs, attr->data);
}
#define KFD_PERF_DESC(_name, _data) \
@@ -411,6 +415,7 @@ static struct kfd_perf_attr perf_attr_iommu[] = {
static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
+ int offs = 0;
struct kfd_topology_device *dev;
uint32_t log_max_watch_addr;
@@ -422,7 +427,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
attr_gpuid);
if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
return -EPERM;
- return sysfs_show_32bit_val(buffer, dev->gpu_id);
+ return sysfs_show_32bit_val(buffer, offs, dev->gpu_id);
}
if (strcmp(attr->name, "name") == 0) {
@@ -431,69 +436,69 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
return -EPERM;
- return sysfs_show_str_val(buffer, dev->node_props.name);
+ return sysfs_show_str_val(buffer, offs, dev->node_props.name);
}
dev = container_of(attr, struct kfd_topology_device,
attr_props);
if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
return -EPERM;
- sysfs_show_32bit_prop(buffer, "cpu_cores_count",
- dev->node_props.cpu_cores_count);
- sysfs_show_32bit_prop(buffer, "simd_count",
- dev->node_props.simd_count);
- sysfs_show_32bit_prop(buffer, "mem_banks_count",
- dev->node_props.mem_banks_count);
- sysfs_show_32bit_prop(buffer, "caches_count",
- dev->node_props.caches_count);
- sysfs_show_32bit_prop(buffer, "io_links_count",
- dev->node_props.io_links_count);
- sysfs_show_32bit_prop(buffer, "cpu_core_id_base",
- dev->node_props.cpu_core_id_base);
- sysfs_show_32bit_prop(buffer, "simd_id_base",
- dev->node_props.simd_id_base);
- sysfs_show_32bit_prop(buffer, "max_waves_per_simd",
- dev->node_props.max_waves_per_simd);
- sysfs_show_32bit_prop(buffer, "lds_size_in_kb",
- dev->node_props.lds_size_in_kb);
- sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
- dev->node_props.gds_size_in_kb);
- sysfs_show_32bit_prop(buffer, "num_gws",
- dev->node_props.num_gws);
- sysfs_show_32bit_prop(buffer, "wave_front_size",
- dev->node_props.wave_front_size);
- sysfs_show_32bit_prop(buffer, "array_count",
- dev->node_props.array_count);
- sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine",
- dev->node_props.simd_arrays_per_engine);
- sysfs_show_32bit_prop(buffer, "cu_per_simd_array",
- dev->node_props.cu_per_simd_array);
- sysfs_show_32bit_prop(buffer, "simd_per_cu",
- dev->node_props.simd_per_cu);
- sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu",
- dev->node_props.max_slots_scratch_cu);
- sysfs_show_32bit_prop(buffer, "vendor_id",
- dev->node_props.vendor_id);
- sysfs_show_32bit_prop(buffer, "device_id",
- dev->node_props.device_id);
- sysfs_show_32bit_prop(buffer, "location_id",
- dev->node_props.location_id);
- sysfs_show_32bit_prop(buffer, "domain",
- dev->node_props.domain);
- sysfs_show_32bit_prop(buffer, "drm_render_minor",
- dev->node_props.drm_render_minor);
- sysfs_show_64bit_prop(buffer, "hive_id",
- dev->node_props.hive_id);
- sysfs_show_32bit_prop(buffer, "num_sdma_engines",
- dev->node_props.num_sdma_engines);
- sysfs_show_32bit_prop(buffer, "num_sdma_xgmi_engines",
- dev->node_props.num_sdma_xgmi_engines);
- sysfs_show_32bit_prop(buffer, "num_sdma_queues_per_engine",
- dev->node_props.num_sdma_queues_per_engine);
- sysfs_show_32bit_prop(buffer, "num_cp_queues",
- dev->node_props.num_cp_queues);
- sysfs_show_64bit_prop(buffer, "unique_id",
- dev->node_props.unique_id);
+ sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count",
+ dev->node_props.cpu_cores_count);
+ sysfs_show_32bit_prop(buffer, offs, "simd_count",
+ dev->node_props.simd_count);
+ sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
+ dev->node_props.mem_banks_count);
+ sysfs_show_32bit_prop(buffer, offs, "caches_count",
+ dev->node_props.caches_count);
+ sysfs_show_32bit_prop(buffer, offs, "io_links_count",
+ dev->node_props.io_links_count);
+ sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base",
+ dev->node_props.cpu_core_id_base);
+ sysfs_show_32bit_prop(buffer, offs, "simd_id_base",
+ dev->node_props.simd_id_base);
+ sysfs_show_32bit_prop(buffer, offs, "max_waves_per_simd",
+ dev->node_props.max_waves_per_simd);
+ sysfs_show_32bit_prop(buffer, offs, "lds_size_in_kb",
+ dev->node_props.lds_size_in_kb);
+ sysfs_show_32bit_prop(buffer, offs, "gds_size_in_kb",
+ dev->node_props.gds_size_in_kb);
+ sysfs_show_32bit_prop(buffer, offs, "num_gws",
+ dev->node_props.num_gws);
+ sysfs_show_32bit_prop(buffer, offs, "wave_front_size",
+ dev->node_props.wave_front_size);
+ sysfs_show_32bit_prop(buffer, offs, "array_count",
+ dev->node_props.array_count);
+ sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine",
+ dev->node_props.simd_arrays_per_engine);
+ sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array",
+ dev->node_props.cu_per_simd_array);
+ sysfs_show_32bit_prop(buffer, offs, "simd_per_cu",
+ dev->node_props.simd_per_cu);
+ sysfs_show_32bit_prop(buffer, offs, "max_slots_scratch_cu",
+ dev->node_props.max_slots_scratch_cu);
+ sysfs_show_32bit_prop(buffer, offs, "vendor_id",
+ dev->node_props.vendor_id);
+ sysfs_show_32bit_prop(buffer, offs, "device_id",
+ dev->node_props.device_id);
+ sysfs_show_32bit_prop(buffer, offs, "location_id",
+ dev->node_props.location_id);
+ sysfs_show_32bit_prop(buffer, offs, "domain",
+ dev->node_props.domain);
+ sysfs_show_32bit_prop(buffer, offs, "drm_render_minor",
+ dev->node_props.drm_render_minor);
+ sysfs_show_64bit_prop(buffer, offs, "hive_id",
+ dev->node_props.hive_id);
+ sysfs_show_32bit_prop(buffer, offs, "num_sdma_engines",
+ dev->node_props.num_sdma_engines);
+ sysfs_show_32bit_prop(buffer, offs, "num_sdma_xgmi_engines",
+ dev->node_props.num_sdma_xgmi_engines);
+ sysfs_show_32bit_prop(buffer, offs, "num_sdma_queues_per_engine",
+ dev->node_props.num_sdma_queues_per_engine);
+ sysfs_show_32bit_prop(buffer, offs, "num_cp_queues",
+ dev->node_props.num_cp_queues);
+ sysfs_show_64bit_prop(buffer, offs, "unique_id",
+ dev->node_props.unique_id);
if (dev->gpu) {
log_max_watch_addr =
@@ -513,22 +518,21 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.capability |=
HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
- sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
+ sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute",
dev->node_props.max_engine_clk_fcompute);
- sysfs_show_64bit_prop(buffer, "local_mem_size",
- (unsigned long long int) 0);
+ sysfs_show_64bit_prop(buffer, offs, "local_mem_size", 0ULL);
- sysfs_show_32bit_prop(buffer, "fw_version",
- dev->gpu->mec_fw_version);
- sysfs_show_32bit_prop(buffer, "capability",
- dev->node_props.capability);
- sysfs_show_32bit_prop(buffer, "sdma_fw_version",
- dev->gpu->sdma_fw_version);
+ sysfs_show_32bit_prop(buffer, offs, "fw_version",
+ dev->gpu->mec_fw_version);
+ sysfs_show_32bit_prop(buffer, offs, "capability",
+ dev->node_props.capability);
+ sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
+ dev->gpu->sdma_fw_version);
}
- return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
- cpufreq_quick_get_max(0)/1000);
+ return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute",
+ cpufreq_quick_get_max(0)/1000);
}
static const struct sysfs_ops node_ops = {
@@ -632,8 +636,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
ret = kobject_init_and_add(dev->kobj_node, &node_type,
sys_props.kobj_nodes, "%d", id);
- if (ret < 0)
+ if (ret < 0) {
+ kobject_put(dev->kobj_node);
return ret;
+ }
dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node);
if (!dev->kobj_mem)
@@ -680,8 +686,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
return -ENOMEM;
ret = kobject_init_and_add(mem->kobj, &mem_type,
dev->kobj_mem, "%d", i);
- if (ret < 0)
+ if (ret < 0) {
+ kobject_put(mem->kobj);
return ret;
+ }
mem->attr.name = "properties";
mem->attr.mode = KFD_SYSFS_FILE_MODE;
@@ -699,8 +707,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
return -ENOMEM;
ret = kobject_init_and_add(cache->kobj, &cache_type,
dev->kobj_cache, "%d", i);
- if (ret < 0)
+ if (ret < 0) {
+ kobject_put(cache->kobj);
return ret;
+ }
cache->attr.name = "properties";
cache->attr.mode = KFD_SYSFS_FILE_MODE;
@@ -718,8 +728,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
return -ENOMEM;
ret = kobject_init_and_add(iolink->kobj, &iolink_type,
dev->kobj_iolink, "%d", i);
- if (ret < 0)
+ if (ret < 0) {
+ kobject_put(iolink->kobj);
return ret;
+ }
iolink->attr.name = "properties";
iolink->attr.mode = KFD_SYSFS_FILE_MODE;
@@ -798,8 +810,10 @@ static int kfd_topology_update_sysfs(void)
ret = kobject_init_and_add(sys_props.kobj_topology,
&sysprops_type, &kfd_device->kobj,
"topology");
- if (ret < 0)
+ if (ret < 0) {
+ kobject_put(sys_props.kobj_topology);
return ret;
+ }
sys_props.kobj_nodes = kobject_create_and_add("nodes",
sys_props.kobj_topology);
@@ -1359,6 +1373,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
+ case CHIP_SIENNA_CICHLID:
+ case CHIP_NAVY_FLOUNDER:
dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);