diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 413 |
1 files changed, 259 insertions, 154 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 97105a5bb246..46ab46757b25 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -131,18 +131,6 @@ MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 -struct ras_gfx_subblock_reg { - const char *name; - uint32_t hwip; - uint32_t inst; - uint32_t seg; - uint32_t reg_offset; - uint32_t sec_count_mask; - uint32_t sec_count_shift; - uint32_t ded_count_mask; - uint32_t ded_count_shift; -}; - enum ta_ras_gfx_subblock { /*CPC*/ TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, @@ -751,6 +739,134 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, void *inject_if); +static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, + uint64_t queue_mask) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, + PACKET3_SET_RESOURCES_VMID_MASK(0) | + /* vmid_mask:0* queue_type:0 (KIQ) */ + PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); + amdgpu_ring_write(kiq_ring, + lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, + upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ +} + +static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = kiq_ring->adev; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | + /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | + /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | + PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | + /* num_queues: must be 1 */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); +} + +static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(action) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + + if (action == PREEMPT_QUEUES_NO_UNMAP) { + amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, seq); + } else { + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } +} + +static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + u64 addr, + u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_CONTEXT_ID(0) | + PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | + PACKET3_QUERY_STATUS_COMMAND(2)); + /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); + amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); + amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); +} + +static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + +static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { + .kiq_set_resources = gfx_v9_0_kiq_set_resources, + .kiq_map_queues = gfx_v9_0_kiq_map_queues, + .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, + .kiq_query_status = gfx_v9_0_kiq_query_status, + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, + .set_resources_size = 8, + .map_queues_size = 7, + .unmap_queues_size = 6, + .query_status_size = 7, + .invalidate_tlbs_size = 12, +}; + +static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) +{ + adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs; +} + static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { @@ -991,8 +1107,7 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) (adev->gfx.mec_feature_version < 46) || (adev->gfx.pfp_fw_version < 0x000000b7) || (adev->gfx.pfp_feature_version < 46)) - DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ - GRBM requires 1-cycle delay in cp firmware\n"); + DRM_WARN_ONCE("CP firmware version too old, please update!"); switch (adev->asic_type) { case CHIP_VEGA10: @@ -3119,74 +3234,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); } -static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) -{ - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - uint64_t queue_mask = 0; - int r, i; - - for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { - if (!test_bit(i, adev->gfx.mec.queue_bitmap)) - continue; - - /* This situation may be hit in the future if a new HW - * generation exposes more than 64 queues. If so, the - * definition of queue_mask needs updating */ - if (WARN_ON(i >= (sizeof(queue_mask)*8))) { - DRM_ERROR("Invalid KCQ enabled: %d\n", i); - break; - } - - queue_mask |= (1ull << i); - } - - r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); - if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - return r; - } - - /* set resources */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); - amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | - PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ - amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ - amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* oac mask */ - amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); - uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); - /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ - PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ - PACKET3_MAP_QUEUES_QUEUE(ring->queue) | - PACKET3_MAP_QUEUES_PIPE(ring->pipe) | - PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | - PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ - PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ - PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ - PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ - amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); - } - - r = amdgpu_ring_test_helper(kiq_ring); - if (r) - DRM_ERROR("KCQ enable failed\n"); - - return r; -} - static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -3323,8 +3370,11 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; - /* activate the queue */ - mqd->cp_hqd_active = 1; + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; return 0; } @@ -3593,7 +3643,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) goto done; } - r = gfx_v9_0_kiq_kcq_enable(adev); + r = amdgpu_gfx_enable_kcq(adev); done: return r; } @@ -3650,6 +3700,23 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) return 0; } +static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) +{ + u32 tmp; + + if (adev->asic_type != CHIP_ARCTURUS) + return; + + tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, + adev->df.hash_status.hash_64k); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, + adev->df.hash_status.hash_2m); + tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, + adev->df.hash_status.hash_1g); + WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); +} + static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) { if (adev->asic_type != CHIP_ARCTURUS) @@ -3667,6 +3734,8 @@ static int gfx_v9_0_hw_init(void *handle) gfx_v9_0_constants_init(adev); + gfx_v9_0_init_tcp_config(adev); + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -3678,36 +3747,6 @@ static int gfx_v9_0_hw_init(void *handle) return r; } -static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) -{ - int r, i; - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - - r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); - if (r) - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ - PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | - PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | - PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); - amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - } - r = amdgpu_ring_test_helper(kiq_ring); - if (r) - DRM_ERROR("KCQ disable failed\n"); - - return r; -} - static int gfx_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -3719,7 +3758,7 @@ static int gfx_v9_0_hw_fini(void *handle) /* DF freeze and kcq disable will fail */ if (!amdgpu_ras_intr_triggered()) /* disable KCQ to avoid CPC touch memory not valid anymore */ - gfx_v9_0_kcq_disable(adev); + amdgpu_gfx_disable_kcq(adev); if (amdgpu_sriov_vf(adev)) { gfx_v9_0_cp_gfx_enable(adev, false); @@ -3936,30 +3975,58 @@ static const u32 sgpr_init_compute_shader[] = 0xbe800080, 0xbf810000, }; +/* When below register arrays changed, please update gpr_reg_size, + and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds, + to cover all gfx9 ASICs */ static const struct soc15_reg_entry vgpr_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, +}; + +static const struct soc15_reg_entry sgpr1_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, }; -static const struct soc15_reg_entry sgpr_init_regs[] = { - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, +static const struct soc15_reg_entry sgpr2_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, }; static const struct soc15_reg_entry sec_ded_counter_registers[] = { @@ -3996,6 +4063,7 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, + { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1}, }; static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) @@ -4054,6 +4122,12 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) unsigned total_size, vgpr_offset, sgpr_offset; u64 gpu_addr; + int compute_dim_x = adev->gfx.config.max_shader_engines * + adev->gfx.config.max_cu_per_sh * + adev->gfx.config.max_sh_per_se; + int sgpr_work_group_size = 5; + int gpr_reg_size = compute_dim_x / 16 + 6; + /* only support when RAS is enabled */ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) return 0; @@ -4063,9 +4137,11 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) return 0; total_size = - ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */ total_size += - ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */ + total_size += + (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */ total_size = ALIGN(total_size, 256); vgpr_offset = total_size; total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); @@ -4092,7 +4168,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* VGPR */ /* write the register state for the compute dispatch */ - for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { + for (i = 0; i < gpr_reg_size; i++) { ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) - PACKET3_SET_SH_REG_START; @@ -4108,7 +4184,35 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* write dispatch packet */ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); - ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = compute_dim_x; /* x */ + ib.ptr[ib.length_dw++] = 1; /* y */ + ib.ptr[ib.length_dw++] = 1; /* z */ + ib.ptr[ib.length_dw++] = + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + + /* write CS partial flush packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); + ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + + /* SGPR1 */ + /* write the register state for the compute dispatch */ + for (i = 0; i < gpr_reg_size; i++) { + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; + } + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ + gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); + ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + + /* write dispatch packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ ib.ptr[ib.length_dw++] = 1; /* y */ ib.ptr[ib.length_dw++] = 1; /* z */ ib.ptr[ib.length_dw++] = @@ -4118,13 +4222,13 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); - /* SGPR */ + /* SGPR2 */ /* write the register state for the compute dispatch */ - for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { + for (i = 0; i < gpr_reg_size; i++) { ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); - ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) - PACKET3_SET_SH_REG_START; - ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; + ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; } /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; @@ -4136,7 +4240,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* write dispatch packet */ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); - ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ ib.ptr[ib.length_dw++] = 1; /* y */ ib.ptr[ib.length_dw++] = 1; /* z */ ib.ptr[ib.length_dw++] = @@ -4189,6 +4293,7 @@ static int gfx_v9_0_early_init(void *handle) else adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; + gfx_v9_0_set_kiq_pm4_funcs(adev); gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev); gfx_v9_0_set_gds_init(adev); @@ -4202,10 +4307,6 @@ static int gfx_v9_0_ecc_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = amdgpu_gfx_ras_late_init(adev); - if (r) - return r; - r = gfx_v9_0_do_edc_gds_workarounds(adev); if (r) return r; @@ -4215,6 +4316,10 @@ static int gfx_v9_0_ecc_late_init(void *handle) if (r) return r; + r = amdgpu_gfx_ras_late_init(adev); + if (r) + return r; + return 0; } @@ -5440,7 +5545,7 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, } -static const struct ras_gfx_subblock_reg ras_subblock_regs[] = { +static const struct soc15_ras_field_entry gc_ras_fields_vg20[] = { { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) @@ -6099,29 +6204,29 @@ static int __get_ras_error_count(const struct soc15_reg_entry *reg, uint32_t i; uint32_t sec_cnt, ded_cnt; - for (i = 0; i < ARRAY_SIZE(ras_subblock_regs); i++) { - if(ras_subblock_regs[i].reg_offset != reg->reg_offset || - ras_subblock_regs[i].seg != reg->seg || - ras_subblock_regs[i].inst != reg->inst) + for (i = 0; i < ARRAY_SIZE(gc_ras_fields_vg20); i++) { + if(gc_ras_fields_vg20[i].reg_offset != reg->reg_offset || + gc_ras_fields_vg20[i].seg != reg->seg || + gc_ras_fields_vg20[i].inst != reg->inst) continue; sec_cnt = (value & - ras_subblock_regs[i].sec_count_mask) >> - ras_subblock_regs[i].sec_count_shift; + gc_ras_fields_vg20[i].sec_count_mask) >> + gc_ras_fields_vg20[i].sec_count_shift; if (sec_cnt) { DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n", - ras_subblock_regs[i].name, + gc_ras_fields_vg20[i].name, se_id, inst_id, sec_cnt); *sec_count += sec_cnt; } ded_cnt = (value & - ras_subblock_regs[i].ded_count_mask) >> - ras_subblock_regs[i].ded_count_shift; + gc_ras_fields_vg20[i].ded_count_mask) >> + gc_ras_fields_vg20[i].ded_count_shift; if (ded_cnt) { DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n", - ras_subblock_regs[i].name, + gc_ras_fields_vg20[i].name, se_id, inst_id, ded_cnt); *ded_count += ded_cnt; |