aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c191
1 files changed, 62 insertions, 129 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 47c853ef1051..8562afe5b761 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -40,7 +40,6 @@
#include "soc15d.h"
#include "mmhub_v1_0.h"
#include "gfxhub_v1_0.h"
-#include "gmc_v9_0.h"
enum hqd_dequeue_request_type {
@@ -104,13 +103,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
lock_srbm(kgd, mec, pipe, queue_id, 0);
}
-static uint32_t get_queue_mask(struct amdgpu_device *adev,
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id)
{
- unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
- queue_id) & 31;
+ unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+ queue_id;
- return ((uint32_t)1) << bit;
+ return 1ull << bit;
}
static void release_queue(struct kgd_dev *kgd)
@@ -259,21 +258,6 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
acquire_queue(kgd, pipe_id, queue_id);
- /* HIQ is set during driver init period with vmid set to 0*/
- if (m->cp_hqd_vmid == 0) {
- uint32_t value, mec, pipe;
-
- mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
- pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
-
- pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
- mec, pipe, queue_id);
- value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
- value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
- ((mec << 5) | (pipe << 3) | queue_id | 0x80));
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
- }
-
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
mqd_hqd = &m->cp_mqd_base_addr_lo;
hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
@@ -324,7 +308,7 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uintptr_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
- get_queue_mask(adev, pipe_id, queue_id));
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
}
/* Start the EOP fetcher */
@@ -340,6 +324,59 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
return 0;
}
+int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t doorbell_off)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct v9_mqd *m;
+ uint32_t mec, pipe;
+ int r;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ r = amdgpu_ring_alloc(kiq_ring, 7);
+ if (r) {
+ pr_err("Failed to alloc KIQ (%d).\n", r);
+ goto out_unlock;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+ PACKET3_MAP_QUEUES_PIPE(pipe) |
+ PACKET3_MAP_QUEUES_ME((mec - 1)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+ amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+ release_queue(kgd);
+
+ return r;
+}
+
int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
@@ -618,100 +655,6 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
- uint32_t flush_type)
-{
- signed long r;
- uint32_t seq;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
- spin_lock(&adev->gfx.kiq.ring_lock);
- amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
- amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
- amdgpu_ring_write(ring,
- PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
- PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
- PACKET3_INVALIDATE_TLBS_PASID(pasid) |
- PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
- amdgpu_fence_emit_polling(ring, &seq);
- amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
-
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
- if (r < 1) {
- DRM_ERROR("wait for kiq fence error: %ld.\n", r);
- return -ETIME;
- }
-
- return 0;
-}
-
-int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- int vmid, i;
- uint16_t queried_pasid;
- bool ret;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
- uint32_t flush_type = 0;
-
- if (adev->in_gpu_reset)
- return -EIO;
- if (adev->gmc.xgmi.num_physical_nodes &&
- adev->asic_type == CHIP_VEGA20)
- flush_type = 2;
-
- if (ring->sched.ready)
- return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
-
- for (vmid = 0; vmid < 16; vmid++) {
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
- continue;
-
- ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid,
- &queried_pasid);
- if (ret && queried_pasid == pasid) {
- for (i = 0; i < adev->num_vmhubs; i++)
- amdgpu_gmc_flush_gpu_tlb(adev, vmid,
- i, flush_type);
- break;
- }
- }
-
- return 0;
-}
-
-int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- int i;
-
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
- pr_err("non kfd vmid %d\n", vmid);
- return 0;
- }
-
- /* Use legacy mode tlb invalidation.
- *
- * Currently on Raven the code below is broken for anything but
- * legacy mode due to a MMHUB power gating problem. A workaround
- * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
- * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
- * bit.
- *
- * TODO 1: agree on the right set of invalidation registers for
- * KFD use. Use the last one for now. Invalidate both GC and
- * MMHUB.
- *
- * TODO 2: support range-based invalidation, requires kfg2kgd
- * interface change
- */
- for (i = 0; i < adev->num_vmhubs; i++)
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
-
- return 0;
-}
-
int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd)
{
return 0;
@@ -758,8 +701,8 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
return 0;
}
-void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base)
+static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd,
+ uint32_t vmid, uint64_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
@@ -769,16 +712,7 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmi
return;
}
- /* TODO: take advantage of per-process address space size. For
- * now, all processes share the same address space size, like
- * on GFX8 and older.
- */
- if (adev->asic_type == CHIP_ARCTURUS) {
- /* Two MMHUBs */
- mmhub_v9_4_setup_vm_pt_regs(adev, 0, vmid, page_table_base);
- mmhub_v9_4_setup_vm_pt_regs(adev, 1, vmid, page_table_base);
- } else
- mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
+ mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
}
@@ -788,6 +722,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
.init_interrupts = kgd_gfx_v9_init_interrupts,
.hqd_load = kgd_gfx_v9_hqd_load,
+ .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
.hqd_dump = kgd_gfx_v9_hqd_dump,
.hqd_sdma_dump = kgd_hqd_sdma_dump,
@@ -803,7 +738,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.get_tile_config = kgd_gfx_v9_get_tile_config,
.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
- .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
- .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
.get_hive_id = amdgpu_amdkfd_get_hive_id,
};