aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c107
1 files changed, 82 insertions, 25 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index a41272fbcba2..2badbc0355f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -56,19 +56,23 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
{
- unsigned long bo_size;
+ unsigned long bo_size, fw_shared_bo_size;
const char *fw_name;
const struct common_firmware_header *hdr;
unsigned char fw_check;
int i, r;
INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
+ mutex_init(&adev->vcn.vcn_pg_lock);
+ atomic_set(&adev->vcn.total_submission_cnt, 0);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
switch (adev->asic_type) {
case CHIP_RAVEN:
- if (adev->rev_id >= 8)
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
fw_name = FIRMWARE_RAVEN2;
- else if (adev->pdev->device == 0x15d8)
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
fw_name = FIRMWARE_PICASSO;
else
fw_name = FIRMWARE_RAVEN;
@@ -178,6 +182,17 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
return r;
}
}
+
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)),
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].fw_shared_bo,
+ &adev->vcn.inst[i].fw_shared_gpu_addr, &adev->vcn.inst[i].fw_shared_cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "VCN %d (%d) failed to allocate firmware shared bo\n", i, r);
+ return r;
+ }
+
+ fw_shared_bo_size = amdgpu_bo_size(adev->vcn.inst[i].fw_shared_bo);
+ adev->vcn.inst[i].saved_shm_bo = kvmalloc(fw_shared_bo_size, GFP_KERNEL);
}
return 0;
@@ -192,6 +207,12 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
if (adev->vcn.harvest_config & (1 << j))
continue;
+
+ kvfree(adev->vcn.inst[j].saved_shm_bo);
+ amdgpu_bo_free_kernel(&adev->vcn.inst[j].fw_shared_bo,
+ &adev->vcn.inst[j].fw_shared_gpu_addr,
+ (void **)&adev->vcn.inst[j].fw_shared_cpu_addr);
+
if (adev->vcn.indirect_sram) {
amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo,
&adev->vcn.inst[j].dpg_sram_gpu_addr,
@@ -210,6 +231,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
}
release_firmware(adev->vcn.fw);
+ mutex_destroy(&adev->vcn.vcn_pg_lock);
return 0;
}
@@ -236,6 +258,17 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev)
return -ENOMEM;
memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
+
+ if (adev->vcn.inst[i].fw_shared_bo == NULL)
+ return 0;
+
+ if (!adev->vcn.inst[i].saved_shm_bo)
+ return -ENOMEM;
+
+ size = amdgpu_bo_size(adev->vcn.inst[i].fw_shared_bo);
+ ptr = adev->vcn.inst[i].fw_shared_cpu_addr;
+
+ memcpy_fromio(adev->vcn.inst[i].saved_shm_bo, ptr, size);
}
return 0;
}
@@ -273,6 +306,17 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
}
memset_io(ptr, 0, size);
}
+
+ if (adev->vcn.inst[i].fw_shared_bo == NULL)
+ return -EINVAL;
+
+ size = amdgpu_bo_size(adev->vcn.inst[i].fw_shared_bo);
+ ptr = adev->vcn.inst[i].fw_shared_cpu_addr;
+
+ if (adev->vcn.inst[i].saved_shm_bo != NULL)
+ memcpy_toio(ptr, adev->vcn.inst[i].saved_shm_bo, size);
+ else
+ memset_io(ptr, 0, size);
}
return 0;
}
@@ -295,7 +339,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
struct dpg_pause_state new_state;
- if (fence[j])
+ if (fence[j] ||
+ unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt)))
new_state.fw_based = VCN_DPG_STATE__PAUSE;
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
@@ -307,8 +352,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
fences += fence[j];
}
- if (fences == 0) {
- amdgpu_gfx_off_ctrl(adev, true);
+ if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) {
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_GATE);
} else {
@@ -319,36 +363,46 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
- if (set_clocks) {
- amdgpu_gfx_off_ctrl(adev, false);
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_UNGATE);
- }
+ atomic_inc(&adev->vcn.total_submission_cnt);
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+ mutex_lock(&adev->vcn.vcn_pg_lock);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_UNGATE);
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
struct dpg_pause_state new_state;
- unsigned int fences = 0;
- unsigned int i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
- }
- if (fences)
+ if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
+ atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
new_state.fw_based = VCN_DPG_STATE__PAUSE;
- else
- new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+ } else {
+ unsigned int fences = 0;
+ unsigned int i;
- if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
- new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
+
+ if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt))
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+ }
adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
}
+ mutex_unlock(&adev->vcn.vcn_pg_lock);
}
void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
{
+ if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+ atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
+
+ atomic_dec(&ring->adev->vcn.total_submission_cnt);
+
schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
}
@@ -390,7 +444,8 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(adev, 64, &job);
+ r = amdgpu_job_alloc_with_ib(adev, 64,
+ AMDGPU_IB_POOL_DIRECT, &job);
if (r)
goto err;
@@ -557,7 +612,8 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job);
if (r)
return r;
@@ -610,7 +666,8 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job);
if (r)
return r;