aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c211
1 files changed, 117 insertions, 94 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index ba9e53a1abc3..02702597ddeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -40,6 +40,7 @@
#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
#include "soc15.h"
+#include "soc15d.h"
#include "soc15_common.h"
#include "clearstate_gfx10.h"
#include "v10_structs.h"
@@ -50,11 +51,8 @@
* Navi10 has two graphic rings to share each graphic pipe.
* 1. Primary ring
* 2. Async ring
- *
- * In bring-up phase, it just used primary ring so set gfx ring number as 1 at
- * first.
*/
-#define GFX10_NUM_GFX_RINGS 2
+#define GFX10_NUM_GFX_RINGS_NV1X 1
#define GFX10_MEC_HPD_SIZE 2048
#define F32_CE_PROGRAM_RAM_SIZE 65536
@@ -123,7 +121,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL, 0x001f0000, 0x00070104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
@@ -171,7 +169,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL, 0x001f0000, 0x00070105),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070105),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
@@ -348,15 +346,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
}
+static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
.kiq_set_resources = gfx10_kiq_set_resources,
.kiq_map_queues = gfx10_kiq_map_queues,
.kiq_unmap_queues = gfx10_kiq_unmap_queues,
.kiq_query_status = gfx10_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
.query_status_size = 7,
+ .invalidate_tlbs_size = 2,
};
static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
@@ -474,18 +486,10 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
else
udelay(1);
}
- if (i < adev->usec_timeout) {
- if (amdgpu_emu_mode == 1)
- DRM_INFO("ring test on %d succeeded in %d msecs\n",
- ring->idx, i);
- else
- DRM_INFO("ring test on %d succeeded in %d usecs\n",
- ring->idx, i);
- } else {
- DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
- ring->idx, scratch, tmp);
- r = -EINVAL;
- }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
amdgpu_gfx_scratch_free(adev, scratch);
return r;
@@ -535,14 +539,10 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
}
tmp = RREG32(scratch);
- if (tmp == 0xDEADBEEF) {
- DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+ if (tmp == 0xDEADBEEF)
r = 0;
- } else {
- DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
- scratch, tmp);
+ else
r = -EINVAL;
- }
err2:
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
@@ -591,8 +591,7 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
}
if (adev->gfx.cp_fw_write_wait == false)
- DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
- GRBM requires 1-cycle delay in cp firmware\n");
+ DRM_WARN_ONCE("CP firmware version too old, please update!");
}
@@ -617,11 +616,29 @@ static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
}
+static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
+{
+ bool ret = false;
+
+ switch (adev->pdev->revision) {
+ case 0xc2:
+ case 0xc3:
+ ret = true;
+ break;
+ default:
+ ret = false;
+ break;
+ }
+
+ return ret ;
+}
+
static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_NAVI10:
- adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+ if (!gfx_v10_0_navi10_gfxoff_should_enable(adev))
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
break;
default:
break;
@@ -805,10 +822,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
info->fw = adev->gfx.rlc_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
+ if (info->fw) {
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ }
if (adev->gfx.rlc.is_rlc_v2_1 &&
adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
@@ -1286,7 +1304,7 @@ static int gfx_v10_0_sw_init(void *handle)
case CHIP_NAVI14:
case CHIP_NAVI12:
adev->gfx.me.num_me = 1;
- adev->gfx.me.num_pipe_per_me = 2;
+ adev->gfx.me.num_pipe_per_me = 1;
adev->gfx.me.num_queue_per_pipe = 1;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
@@ -1948,7 +1966,7 @@ static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev)
rlc_autoload_info[rlc_toc->id].size = rlc_toc->size * 4;
rlc_toc++;
- };
+ }
return 0;
}
@@ -2692,18 +2710,20 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
amdgpu_ring_commit(ring);
/* submit cs packet to copy state 0 to next available state */
- ring = &adev->gfx.gfx_ring[1];
- r = amdgpu_ring_alloc(ring, 2);
- if (r) {
- DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
- return r;
- }
-
- amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
- amdgpu_ring_write(ring, 0);
+ if (adev->gfx.num_gfx_rings > 1) {
+ /* maximum supported gfx ring is 2 */
+ ring = &adev->gfx.gfx_ring[1];
+ r = amdgpu_ring_alloc(ring, 2);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
- amdgpu_ring_commit(ring);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_commit(ring);
+ }
return 0;
}
@@ -2800,39 +2820,41 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
/* Init gfx ring 1 for pipe 1 */
- mutex_lock(&adev->srbm_mutex);
- gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
- ring = &adev->gfx.gfx_ring[1];
- rb_bufsz = order_base_2(ring->ring_size / 8);
- tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
- tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
- WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
- /* Initialize the ring buffer's write pointers */
- ring->wptr = 0;
- WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
- /* Set the wb address wether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
- WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
- WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
- CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
- WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
- lower_32_bits(wptr_gpu_addr));
- WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
- upper_32_bits(wptr_gpu_addr));
-
- mdelay(1);
- WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
-
- rb_addr = ring->gpu_addr >> 8;
- WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr);
- WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr));
- WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1);
-
- gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
- mutex_unlock(&adev->srbm_mutex);
-
+ if (adev->gfx.num_gfx_rings > 1) {
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
+ /* maximum supported gfx ring is 2 */
+ ring = &adev->gfx.gfx_ring[1];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
+ WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
+ /* Set the wb address wether it's enabled or not */
+ rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+ CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+
+ mdelay(1);
+ WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr);
+ WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1);
+
+ gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
+ }
/* Switch to pipe 0 */
mutex_lock(&adev->srbm_mutex);
gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
@@ -3319,8 +3341,11 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
mqd->cp_hqd_ib_control = tmp;
- /* activate the queue */
- mqd->cp_hqd_active = 1;
+ /* map_queues packet doesn't need activate the queue,
+ * so only kiq need set this field.
+ */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ mqd->cp_hqd_active = 1;
return 0;
}
@@ -3492,6 +3517,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
/* reset ring buffer */
ring->wptr = 0;
+ atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
amdgpu_ring_clear_ring(ring);
} else {
amdgpu_ring_clear_ring(ring);
@@ -3591,23 +3617,16 @@ static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i];
- DRM_INFO("gfx %d ring me %d pipe %d q %d\n",
- i, ring->me, ring->pipe, ring->queue);
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->sched.ready = false;
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
return r;
- }
}
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
- ring->sched.ready = true;
- DRM_INFO("compute ring %d mec %d pipe %d q %d\n",
- i, ring->me, ring->pipe, ring->queue);
- r = amdgpu_ring_test_ring(ring);
+ r = amdgpu_ring_test_helper(ring);
if (r)
- ring->sched.ready = false;
+ return r;
}
return 0;
@@ -3909,11 +3928,13 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
{
uint64_t clock;
+ amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->gfx.gpu_clock_mutex);
WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
mutex_unlock(&adev->gfx.gpu_clock_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
return clock;
}
@@ -3950,7 +3971,8 @@ static int gfx_v10_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS;
+ adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X;
+
adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
gfx_v10_0_set_kiq_pm4_funcs(adev);
@@ -4215,7 +4237,7 @@ static int gfx_v10_0_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_PG_STATE_GATE);
switch (adev->asic_type) {
case CHIP_NAVI10:
case CHIP_NAVI14:
@@ -4241,7 +4263,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
case CHIP_NAVI14:
case CHIP_NAVI12:
gfx_v10_0_update_gfx_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -4723,6 +4745,7 @@ static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
{
struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
amdgpu_ring_write(ring, 0 | /* src: register*/
@@ -4731,9 +4754,9 @@ static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
amdgpu_ring_write(ring, reg);
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
- adev->virt.reg_val_offs * 4));
+ kiq->reg_val_offs * 4));
amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
- adev->virt.reg_val_offs * 4));
+ kiq->reg_val_offs * 4));
}
static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,