aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2017-06-16 09:54:02 +1000
committerDave Airlie <airlied@redhat.com>2017-06-16 09:56:53 +1000
commit04d4fb5fa63876d8e7cf67f2788aecfafc6a28a7 (patch)
tree92aec67d7b5a1359baff1a508d381234f046743e /drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
parentMerge tag 'drm-misc-next-2017-06-15' of git://anongit.freedesktop.org/git/drm-misc into drm-next (diff)
parentdrm/amdgpu: Fix compiler warnings (diff)
downloadlinux-dev-04d4fb5fa63876d8e7cf67f2788aecfafc6a28a7.tar.xz
linux-dev-04d4fb5fa63876d8e7cf67f2788aecfafc6a28a7.zip
Merge branch 'drm-next-4.13' of git://people.freedesktop.org/~agd5f/linux into drm-next
New radeon and amdgpu features for 4.13: - Lots of Vega10 bug fixes - Preliminary Raven support - KIQ support for compute rings - MEC queue management rework from Andres - Audio support for DCE6 - SR-IOV improvements - Improved module parameters for controlling radeon vs amdgpu support for SI and CIK - Bug fixes - General code cleanups [airlied: dropped drmP.h header from one file was needed and build broke] * 'drm-next-4.13' of git://people.freedesktop.org/~agd5f/linux: (362 commits) drm/amdgpu: Fix compiler warnings drm/amdgpu: vm_update_ptes remove code duplication drm/amd/amdgpu: Port VCN over to new SOC15 macros drm/amd/amdgpu: Port PSP v10.0 over to new SOC15 macros drm/amd/amdgpu: Port PSP v3.1 over to new SOC15 macros drm/amd/amdgpu: Port NBIO v7.0 driver over to new SOC15 macros drm/amd/amdgpu: Port NBIO v6.1 driver over to new SOC15 macros drm/amd/amdgpu: Port UVD 7.0 over to new SOC15 macros drm/amd/amdgpu: Port MMHUB over to new SOC15 macros drm/amd/amdgpu: Cleanup gfxhub read-modify-write patterns drm/amd/amdgpu: Port GFXHUB over to new SOC15 macros drm/amd/amdgpu: Add offset variant to SOC15 macros drm/amd/powerplay: add avfs control for Vega10 drm/amdgpu: add virtual display support for raven drm/amdgpu/gfx9: fix compute ring doorbell index drm/amd/amdgpu: Rename KIQ ring to avoid spaces drm/amd/amdgpu: gfx9 tidy ups (v2) drm/amdgpu: add contiguous flag in ucode bo create drm/amdgpu: fix missed gpu info firmware when cache firmware during S3 drm/amdgpu: export test ib debugfs interface ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c650
1 files changed, 357 insertions, 293 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index f7414cabd4ff..ec754288f146 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -27,6 +27,7 @@
#include "amdgpu_gfx.h"
#include "cikd.h"
#include "cik.h"
+#include "cik_structs.h"
#include "atom.h"
#include "amdgpu_ucode.h"
#include "clearstate_ci.h"
@@ -48,7 +49,7 @@
#include "oss/oss_2_0_sh_mask.h"
#define GFX7_NUM_GFX_RINGS 1
-#define GFX7_NUM_COMPUTE_RINGS 8
+#define GFX7_MEC_HPD_SIZE 2048
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -1607,19 +1608,6 @@ static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
}
/**
- * gfx_v7_0_create_bitmask - create a bitmask
- *
- * @bit_width: length of the mask
- *
- * create a variable length bit mask (CIK).
- * Returns the bitmask.
- */
-static u32 gfx_v7_0_create_bitmask(u32 bit_width)
-{
- return (u32)((1ULL << bit_width) - 1);
-}
-
-/**
* gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs
*
* @adev: amdgpu_device pointer
@@ -1637,8 +1625,8 @@ static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
- mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se /
- adev->gfx.config.max_sh_per_se);
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se);
return (~data) & mask;
}
@@ -1837,7 +1825,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
/**
* gmc_v7_0_init_compute_vmid - gart enable
*
- * @rdev: amdgpu_device pointer
+ * @adev: amdgpu_device pointer
*
* Initialize compute vmid sh_mem registers
*
@@ -2821,26 +2809,23 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
}
}
-#define MEC_HPD_SIZE 2048
-
static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
{
int r;
u32 *hpd;
+ size_t mec_hpd_size;
- /*
- * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
- * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
- * Nonetheless, we assign only 1 pipe because all other pipes will
- * be handled by KFD
- */
- adev->gfx.mec.num_mec = 1;
- adev->gfx.mec.num_pipe = 1;
- adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
+ bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* take ownership of the relevant compute queues */
+ amdgpu_gfx_compute_queue_acquire(adev);
+ /* allocate space for ALL pipes (even the ones we don't own) */
+ mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
+ * GFX7_MEC_HPD_SIZE * 2;
if (adev->gfx.mec.hpd_eop_obj == NULL) {
r = amdgpu_bo_create(adev,
- adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
+ mec_hpd_size,
PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
&adev->gfx.mec.hpd_eop_obj);
@@ -2870,7 +2855,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
}
/* clear memory. Not sure if this is required or not */
- memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
+ memset(hpd, 0, mec_hpd_size);
amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
@@ -2917,275 +2902,296 @@ struct hqd_registers
u32 cp_mqd_control;
};
-struct bonaire_mqd
+static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev,
+ int mec, int pipe)
{
- u32 header;
- u32 dispatch_initiator;
- u32 dimensions[3];
- u32 start_idx[3];
- u32 num_threads[3];
- u32 pipeline_stat_enable;
- u32 perf_counter_enable;
- u32 pgm[2];
- u32 tba[2];
- u32 tma[2];
- u32 pgm_rsrc[2];
- u32 vmid;
- u32 resource_limits;
- u32 static_thread_mgmt01[2];
- u32 tmp_ring_size;
- u32 static_thread_mgmt23[2];
- u32 restart[3];
- u32 thread_trace_enable;
- u32 reserved1;
- u32 user_data[16];
- u32 vgtcs_invoke_count[2];
- struct hqd_registers queue_state;
- u32 dequeue_cntr;
- u32 interrupt_queue[64];
-};
-
-/**
- * gfx_v7_0_cp_compute_resume - setup the compute queue registers
- *
- * @adev: amdgpu_device pointer
- *
- * Program the compute queues and test them to make sure they
- * are working.
- * Returns 0 for success, error for failure.
- */
-static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
-{
- int r, i, j;
- u32 tmp;
- bool use_doorbell = true;
- u64 hqd_gpu_addr;
- u64 mqd_gpu_addr;
u64 eop_gpu_addr;
- u64 wb_gpu_addr;
- u32 *buf;
- struct bonaire_mqd *mqd;
- struct amdgpu_ring *ring;
-
- /* fix up chicken bits */
- tmp = RREG32(mmCP_CPF_DEBUG);
- tmp |= (1 << 23);
- WREG32(mmCP_CPF_DEBUG, tmp);
+ u32 tmp;
+ size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe)
+ * GFX7_MEC_HPD_SIZE * 2;
- /* init the pipes */
mutex_lock(&adev->srbm_mutex);
- for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
- int me = (i < 4) ? 1 : 2;
- int pipe = (i < 4) ? i : (i - 4);
+ eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
- eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
+ cik_srbm_select(adev, mec + 1, pipe, 0, 0);
- cik_srbm_select(adev, me, pipe, 0, 0);
+ /* write the EOP addr */
+ WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
+ WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
- /* write the EOP addr */
- WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
- WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
+ /* set the VMID assigned */
+ WREG32(mmCP_HPD_EOP_VMID, 0);
- /* set the VMID assigned */
- WREG32(mmCP_HPD_EOP_VMID, 0);
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = RREG32(mmCP_HPD_EOP_CONTROL);
+ tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
+ tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
+ WREG32(mmCP_HPD_EOP_CONTROL, tmp);
- /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
- tmp = RREG32(mmCP_HPD_EOP_CONTROL);
- tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
- tmp |= order_base_2(MEC_HPD_SIZE / 8);
- WREG32(mmCP_HPD_EOP_CONTROL, tmp);
- }
cik_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
+}
- /* init the queues. Just two for now. */
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
+static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
+{
+ int i;
- if (ring->mqd_obj == NULL) {
- r = amdgpu_bo_create(adev,
- sizeof(struct bonaire_mqd),
- PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
- &ring->mqd_obj);
- if (r) {
- dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
- return r;
- }
+ /* disable the queue if it's active */
+ if (RREG32(mmCP_HQD_ACTIVE) & 1) {
+ WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
}
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0)) {
- gfx_v7_0_cp_compute_fini(adev);
- return r;
- }
- r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
- &mqd_gpu_addr);
- if (r) {
- dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
- gfx_v7_0_cp_compute_fini(adev);
- return r;
- }
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
- if (r) {
- dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
- gfx_v7_0_cp_compute_fini(adev);
- return r;
- }
+ if (i == adev->usec_timeout)
+ return -ETIMEDOUT;
- /* init the mqd struct */
- memset(buf, 0, sizeof(struct bonaire_mqd));
+ WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
+ WREG32(mmCP_HQD_PQ_RPTR, 0);
+ WREG32(mmCP_HQD_PQ_WPTR, 0);
+ }
- mqd = (struct bonaire_mqd *)buf;
- mqd->header = 0xC0310800;
- mqd->static_thread_mgmt01[0] = 0xffffffff;
- mqd->static_thread_mgmt01[1] = 0xffffffff;
- mqd->static_thread_mgmt23[0] = 0xffffffff;
- mqd->static_thread_mgmt23[1] = 0xffffffff;
+ return 0;
+}
- mutex_lock(&adev->srbm_mutex);
- cik_srbm_select(adev, ring->me,
- ring->pipe,
- ring->queue, 0);
+static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
+ struct cik_mqd *mqd,
+ uint64_t mqd_gpu_addr,
+ struct amdgpu_ring *ring)
+{
+ u64 hqd_gpu_addr;
+ u64 wb_gpu_addr;
- /* disable wptr polling */
- tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
- tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK;
- WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
+ /* init the mqd struct */
+ memset(mqd, 0, sizeof(struct cik_mqd));
- /* enable doorbell? */
- mqd->queue_state.cp_hqd_pq_doorbell_control =
- RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
- if (use_doorbell)
- mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
- else
- mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
- WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
- mqd->queue_state.cp_hqd_pq_doorbell_control);
-
- /* disable the queue if it's active */
- mqd->queue_state.cp_hqd_dequeue_request = 0;
- mqd->queue_state.cp_hqd_pq_rptr = 0;
- mqd->queue_state.cp_hqd_pq_wptr= 0;
- if (RREG32(mmCP_HQD_ACTIVE) & 1) {
- WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
- for (j = 0; j < adev->usec_timeout; j++) {
- if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
- break;
- udelay(1);
- }
- WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
- WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
- WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
- }
+ mqd->header = 0xC0310800;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
- /* set the pointer to the MQD */
- mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
- mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
- WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
- WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
- /* set MQD vmid to 0 */
- mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
- mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
- WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
-
- /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
- hqd_gpu_addr = ring->gpu_addr >> 8;
- mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
- mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
- WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
- WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
-
- /* set up the HQD, this is similar to CP_RB0_CNTL */
- mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
- mqd->queue_state.cp_hqd_pq_control &=
- ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
- CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
-
- mqd->queue_state.cp_hqd_pq_control |=
- order_base_2(ring->ring_size / 8);
- mqd->queue_state.cp_hqd_pq_control |=
- (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
+ /* enable doorbell? */
+ mqd->cp_hqd_pq_doorbell_control =
+ RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
+ if (ring->use_doorbell)
+ mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+ else
+ mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
+ mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
+ mqd->cp_hqd_pq_control &=
+ ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
+ CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
+
+ mqd->cp_hqd_pq_control |=
+ order_base_2(ring->ring_size / 8);
+ mqd->cp_hqd_pq_control |=
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
#ifdef __BIG_ENDIAN
- mqd->queue_state.cp_hqd_pq_control |=
- 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
+ mqd->cp_hqd_pq_control |=
+ 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
#endif
- mqd->queue_state.cp_hqd_pq_control &=
- ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
+ mqd->cp_hqd_pq_control &=
+ ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
- mqd->queue_state.cp_hqd_pq_control |=
- CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
- CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
- WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
-
- /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
- mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
- mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
- WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
- WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
- mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
-
- /* set the wb address wether it's enabled or not */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
- mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
- mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
- upper_32_bits(wb_gpu_addr) & 0xffff;
- WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
- mqd->queue_state.cp_hqd_pq_rptr_report_addr);
- WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
- mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
-
- /* enable the doorbell if requested */
- if (use_doorbell) {
- mqd->queue_state.cp_hqd_pq_doorbell_control =
- RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
- mqd->queue_state.cp_hqd_pq_doorbell_control &=
- ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
- mqd->queue_state.cp_hqd_pq_doorbell_control |=
- (ring->doorbell_index <<
- CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
- mqd->queue_state.cp_hqd_pq_doorbell_control |=
- CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
- mqd->queue_state.cp_hqd_pq_doorbell_control &=
- ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
- CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
+ mqd->cp_hqd_pq_control |=
+ CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
+ CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
- } else {
- mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set the wb address wether it's enabled or not */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ mqd->cp_hqd_pq_doorbell_control =
+ RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
+ mqd->cp_hqd_pq_doorbell_control &=
+ ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
+ mqd->cp_hqd_pq_doorbell_control |=
+ (ring->doorbell_index <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
+ mqd->cp_hqd_pq_doorbell_control |=
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+ mqd->cp_hqd_pq_doorbell_control &=
+ ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
+
+ } else {
+ mqd->cp_hqd_pq_doorbell_control = 0;
+ }
+
+ /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
+ mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ /* defaults */
+ mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
+ mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR);
+ mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI);
+ mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
+ mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
+ mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
+ mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
+ mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
+ mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
+ mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
+ mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
+ mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+ mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
+ mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
+ mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
+ mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
+
+ /* activate the queue */
+ mqd->cp_hqd_active = 1;
+}
+
+int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
+{
+ uint32_t tmp;
+ uint32_t mqd_reg;
+ uint32_t *mqd_data;
+
+ /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_MQD_CONTROL */
+ mqd_data = &mqd->cp_mqd_base_addr_lo;
+
+ /* disable wptr polling */
+ tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+ WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
+
+ /* program all HQD registers */
+ for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++)
+ WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
+
+ /* activate the HQD */
+ for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
+ WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
+
+ return 0;
+}
+
+static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
+{
+ int r;
+ u64 mqd_gpu_addr;
+ struct cik_mqd *mqd;
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+
+ if (ring->mqd_obj == NULL) {
+ r = amdgpu_bo_create(adev,
+ sizeof(struct cik_mqd),
+ PAGE_SIZE, true,
+ AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
+ &ring->mqd_obj);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
+ return r;
}
- WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
- mqd->queue_state.cp_hqd_pq_doorbell_control);
+ }
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto out;
- /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- ring->wptr = 0;
- mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
- WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
- mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+ r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
+ &mqd_gpu_addr);
+ if (r) {
+ dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
+ goto out_unreserve;
+ }
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
+ goto out_unreserve;
+ }
- /* set the vmid for the queue */
- mqd->queue_state.cp_hqd_vmid = 0;
- WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
+ mutex_lock(&adev->srbm_mutex);
+ cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- /* activate the queue */
- mqd->queue_state.cp_hqd_active = 1;
- WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
+ gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
+ gfx_v7_0_mqd_deactivate(adev);
+ gfx_v7_0_mqd_commit(adev, mqd);
- cik_srbm_select(adev, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
+ cik_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
- amdgpu_bo_kunmap(ring->mqd_obj);
- amdgpu_bo_unreserve(ring->mqd_obj);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+out_unreserve:
+ amdgpu_bo_unreserve(ring->mqd_obj);
+out:
+ return 0;
+}
- ring->ready = true;
+/**
+ * gfx_v7_0_cp_compute_resume - setup the compute queue registers
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Program the compute queues and test them to make sure they
+ * are working.
+ * Returns 0 for success, error for failure.
+ */
+static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
+{
+ int r, i, j;
+ u32 tmp;
+ struct amdgpu_ring *ring;
+
+ /* fix up chicken bits */
+ tmp = RREG32(mmCP_CPF_DEBUG);
+ tmp |= (1 << 23);
+ WREG32(mmCP_CPF_DEBUG, tmp);
+
+ /* init all pipes (even the ones we don't own) */
+ for (i = 0; i < adev->gfx.mec.num_mec; i++)
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++)
+ gfx_v7_0_compute_pipe_init(adev, i, j);
+
+ /* init the queues */
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ r = gfx_v7_0_compute_queue_init(adev, i);
+ if (r) {
+ gfx_v7_0_cp_compute_fini(adev);
+ return r;
+ }
}
gfx_v7_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
-
+ ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r)
ring->ready = false;
@@ -3797,6 +3803,9 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
gfx_v7_0_update_rlc(adev, tmp);
data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ if (orig != data)
+ WREG32(mmRLC_CGCG_CGLS_CTRL, data);
+
} else {
gfx_v7_0_enable_gui_idle_interrupt(adev, false);
@@ -3806,11 +3815,11 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
RREG32(mmCB_CGTT_SCLK_CTRL);
data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
- }
-
- if (orig != data)
- WREG32(mmRLC_CGCG_CGLS_CTRL, data);
+ if (orig != data)
+ WREG32(mmRLC_CGCG_CGLS_CTRL, data);
+ gfx_v7_0_enable_gui_idle_interrupt(adev, true);
+ }
}
static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
@@ -4089,7 +4098,7 @@ static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
- mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
return (~data) & mask;
}
@@ -4470,7 +4479,7 @@ static int gfx_v7_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS;
+ adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
gfx_v7_0_set_ring_funcs(adev);
@@ -4662,11 +4671,57 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.gb_addr_config = gb_addr_config;
}
+static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int mec, int pipe, int queue)
+{
+ int r;
+ unsigned irq_type;
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+ sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->gfx.eop_irq, irq_type);
+ if (r)
+ return r;
+
+
+ return 0;
+}
+
static int gfx_v7_0_sw_init(void *handle)
{
struct amdgpu_ring *ring;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int i, r;
+ int i, j, k, r, ring_id;
+
+ switch (adev->asic_type) {
+ case CHIP_KAVERI:
+ adev->gfx.mec.num_mec = 2;
+ break;
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ default:
+ adev->gfx.mec.num_mec = 1;
+ break;
+ }
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
/* EOP Event */
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
@@ -4716,29 +4771,23 @@ static int gfx_v7_0_sw_init(void *handle)
return r;
}
- /* set up the compute queues */
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- unsigned irq_type;
-
- /* max 32 queues per MEC */
- if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
- DRM_ERROR("Too many (%d) compute rings!\n", i);
- break;
+ /* set up the compute queues - allocate horizontally across pipes */
+ ring_id = 0;
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
+ continue;
+
+ r = gfx_v7_0_compute_ring_init(adev,
+ ring_id,
+ i, k, j);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
}
- ring = &adev->gfx.compute_ring[i];
- ring->ring_obj = NULL;
- ring->use_doorbell = true;
- ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
- ring->me = 1; /* first MEC */
- ring->pipe = i / 8;
- ring->queue = i % 8;
- sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
- irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
- /* type-2 packets are deprecated on MEC, use type-3 instead */
- r = amdgpu_ring_init(adev, ring, 1024,
- &adev->gfx.eop_irq, irq_type);
- if (r)
- return r;
}
/* reserve GDS, GWS and OA resource for gfx */
@@ -4969,8 +5018,8 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
u32 mec_int_cntl, mec_int_cntl_reg;
/*
- * amdgpu controls only pipe 0 of MEC1. That's why this function only
- * handles the setting of interrupts for this specific pipe. All other
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
* pipes' interrupts are set by amdkfd.
*/
@@ -4979,6 +5028,15 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
case 0:
mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
break;
+ case 1:
+ mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
+ break;
+ case 2:
+ mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
+ break;
+ case 3:
+ mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
+ break;
default:
DRM_DEBUG("invalid pipe %d\n", pipe);
return;
@@ -5336,6 +5394,12 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
unsigned disable_masks[4 * 2];
+ u32 ao_cu_num;
+
+ if (adev->flags & AMD_IS_APU)
+ ao_cu_num = 2;
+ else
+ ao_cu_num = adev->gfx.config.max_cu_per_sh;
memset(cu_info, 0, sizeof(*cu_info));
@@ -5354,9 +5418,9 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
cu_info->bitmap[i][j] = bitmap;
- for (k = 0; k < 16; k ++) {
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
if (bitmap & mask) {
- if (counter < 2)
+ if (counter < ao_cu_num)
ao_bitmap |= mask;
counter ++;
}