aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
diff options
context:
space:
mode:
authorAlex Deucher <alexander.deucher@amd.com>2024-11-13 14:43:15 -0500
committerAlex Deucher <alexander.deucher@amd.com>2025-02-27 15:52:29 -0500
commitbee48570cf8e312faec9b8f465762052555158d0 (patch)
tree45ff50535a58b4e6305e41cc7c7cd88242e48e83 /drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
parentdrm/amdgpu/vcn5.0.1: split code along instances (diff)
downloadlinux-rng-bee48570cf8e312faec9b8f465762052555158d0.tar.xz
linux-rng-bee48570cf8e312faec9b8f465762052555158d0.zip
drm/amdgpu/vcn: switch work handler to be per instance
Have a separate work handler for each VCN instance. This paves the way for per instance VCN power gating at runtime. v2: index instances directly on vcn1.0 and 2.0 to make it clear that they only support a single instance (Lijo) Reviewed-by: Boyuan Zhang <Boyuan.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c84
1 files changed, 45 insertions, 39 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 83faf6e6788a..0c3081a198d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -100,6 +100,9 @@ int amdgpu_vcn_early_init(struct amdgpu_device *adev)
amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix));
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ adev->vcn.inst[i].adev = adev;
+ adev->vcn.inst[i].inst = i;
+
if (i == 1 && amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6))
r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw,
AMDGPU_UCODE_REQUIRED,
@@ -124,12 +127,13 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
unsigned int fw_shared_size, log_offset;
int i, r;
- INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
- mutex_init(&adev->vcn.vcn_pg_lock);
mutex_init(&adev->vcn.vcn1_jpeg1_workaround);
- atomic_set(&adev->vcn.total_submission_cnt, 0);
- for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ mutex_init(&adev->vcn.inst[i].vcn_pg_lock);
+ atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0);
+ INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler);
atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
+ }
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
@@ -277,10 +281,10 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
amdgpu_ucode_release(&adev->vcn.inst[j].fw);
+ mutex_destroy(&adev->vcn.inst[j].vcn_pg_lock);
}
mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround);
- mutex_destroy(&adev->vcn.vcn_pg_lock);
return 0;
}
@@ -331,8 +335,10 @@ int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev)
int amdgpu_vcn_suspend(struct amdgpu_device *adev)
{
bool in_ras_intr = amdgpu_ras_intr_triggered();
+ int i;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
/* err_event_athub will corrupt VCPU buffer, so we need to
* restore fw data and clear buffer in amdgpu_vcn_resume() */
@@ -388,46 +394,45 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
{
- struct amdgpu_device *adev =
- container_of(work, struct amdgpu_device, vcn.idle_work.work);
+ struct amdgpu_vcn_inst *vcn_inst =
+ container_of(work, struct amdgpu_vcn_inst, idle_work.work);
+ struct amdgpu_device *adev = vcn_inst->adev;
unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
- unsigned int i, j;
+ unsigned int i = vcn_inst->inst, j;
int r = 0;
- for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
- if (adev->vcn.harvest_config & (1 << j))
- continue;
-
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
- /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
- !adev->vcn.using_unified_queue) {
- struct dpg_pause_state new_state;
+ for (j = 0; j < adev->vcn.num_enc_rings; ++j)
+ fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[j]);
- if (fence[j] ||
- unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt)))
- new_state.fw_based = VCN_DPG_STATE__PAUSE;
- else
- new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ !adev->vcn.using_unified_queue) {
+ struct dpg_pause_state new_state;
- adev->vcn.pause_dpg_mode(adev, j, &new_state);
- }
+ if (fence[i] ||
+ unlikely(atomic_read(&vcn_inst->dpg_enc_submission_cnt)))
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
- fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
- fences += fence[j];
+ adev->vcn.pause_dpg_mode(adev, i, &new_state);
}
- if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) {
+ fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_dec);
+ fences += fence[i];
+
+ if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) {
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_GATE);
+ AMD_PG_STATE_GATE);
r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
- false);
+ false);
if (r)
dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
} else {
- schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT);
}
}
@@ -436,18 +441,18 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
int r = 0;
- atomic_inc(&adev->vcn.total_submission_cnt);
+ atomic_inc(&adev->vcn.inst[ring->me].total_submission_cnt);
- if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) {
+ if (!cancel_delayed_work_sync(&adev->vcn.inst[ring->me].idle_work)) {
r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
true);
if (r)
dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
}
- mutex_lock(&adev->vcn.vcn_pg_lock);
+ mutex_lock(&adev->vcn.inst[ring->me].vcn_pg_lock);
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_UNGATE);
+ AMD_PG_STATE_UNGATE);
/* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
@@ -472,7 +477,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
}
- mutex_unlock(&adev->vcn.vcn_pg_lock);
+ mutex_unlock(&adev->vcn.inst[ring->me].vcn_pg_lock);
}
void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
@@ -485,9 +490,10 @@ void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
!adev->vcn.using_unified_queue)
atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
- atomic_dec(&ring->adev->vcn.total_submission_cnt);
+ atomic_dec(&ring->adev->vcn.inst[ring->me].total_submission_cnt);
- schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ schedule_delayed_work(&ring->adev->vcn.inst[ring->me].idle_work,
+ VCN_IDLE_TIMEOUT);
}
int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)