aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c343
1 files changed, 312 insertions, 31 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index fb2d74f30448..897a5ce9c9da 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -30,6 +30,7 @@
#include "soc15d.h"
#include "soc15_hw_ip.h"
#include "vcn_v2_0.h"
+#include "mmsch_v4_0.h"
#include "vcn/vcn_4_0_0_offset.h"
#include "vcn/vcn_4_0_0_sh_mask.h"
@@ -45,6 +46,8 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
+#define VCN_HARVEST_MMSCH 0
+
#define RDECODE_MSG_CREATE 0x00000000
#define RDECODE_MESSAGE_CREATE 0x00000001
@@ -53,12 +56,14 @@ static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN1
};
+static int vcn_v4_0_start_sriov(struct amdgpu_device *adev);
static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev);
static int vcn_v4_0_set_powergating_state(void *handle,
enum amd_powergating_state state);
static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev,
int inst_idx, struct dpg_pause_state *new_state);
+static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring);
/**
* vcn_v4_0_early_init - set function pointers
@@ -71,6 +76,9 @@ static int vcn_v4_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (amdgpu_sriov_vf(adev))
+ adev->vcn.harvest_config = VCN_HARVEST_MMSCH;
+
/* re-use enc ring as unified ring */
adev->vcn.num_enc_rings = 1;
@@ -92,6 +100,7 @@ static int vcn_v4_0_sw_init(void *handle)
struct amdgpu_ring *ring;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i, r;
+ int vcn_doorbell_index = 0;
r = amdgpu_vcn_sw_init(adev);
if (r)
@@ -103,6 +112,12 @@ static int vcn_v4_0_sw_init(void *handle)
if (r)
return r;
+ if (amdgpu_sriov_vf(adev)) {
+ vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 - MMSCH_DOORBELL_OFFSET;
+ /* get DWORD offset */
+ vcn_doorbell_index = vcn_doorbell_index << 1;
+ }
+
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
@@ -119,7 +134,10 @@ static int vcn_v4_0_sw_init(void *handle)
ring = &adev->vcn.inst[i].ring_enc[0];
ring->use_doorbell = true;
- ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;
+ if (amdgpu_sriov_vf(adev))
+ ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1;
+ else
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;
sprintf(ring->name, "vcn_unified_%d", i);
@@ -132,10 +150,23 @@ static int vcn_v4_0_sw_init(void *handle)
fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
fw_shared->sq.is_enabled = 1;
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
+ fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
+ AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
+
+ if (amdgpu_sriov_vf(adev))
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+
if (amdgpu_vcnfw_log)
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
}
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+ }
+
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
@@ -169,6 +200,9 @@ static int vcn_v4_0_sw_fini(void *handle)
drm_dev_exit(idx);
}
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_free_mm_table(adev);
+
r = amdgpu_vcn_suspend(adev);
if (r)
return r;
@@ -191,18 +225,42 @@ static int vcn_v4_0_hw_init(void *handle)
struct amdgpu_ring *ring;
int i, r;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ if (amdgpu_sriov_vf(adev)) {
+ r = vcn_v4_0_start_sriov(adev);
+ if (r)
+ goto done;
- ring = &adev->vcn.inst[i].ring_enc[0];
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
- adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
- ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i);
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) {
+ ring->sched.ready = false;
+ ring->no_scheduler = true;
+ dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name);
+ } else {
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ vcn_v4_0_unified_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ }
+ } else {
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
- r = amdgpu_ring_test_helper(ring);
- if (r)
- goto done;
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+
+ }
}
done:
@@ -230,12 +288,14 @@ static int vcn_v4_0_hw_fini(void *handle)
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
-
- if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+ if (!amdgpu_sriov_vf(adev)) {
+ if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
(adev->vcn.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(VCN, i, regUVD_STATUS))) {
vcn_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ }
}
+
}
return 0;
@@ -1107,6 +1167,214 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
return 0;
}
+static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
+{
+ int i;
+ struct amdgpu_ring *ring_enc;
+ uint64_t cache_addr;
+ uint64_t rb_enc_addr;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t offset, cache_size;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw;
+ uint32_t init_status;
+ uint32_t enabled_vcn;
+
+ struct mmsch_v4_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v4_0_cmd_direct_read_modify_write
+ direct_rd_mod_wt = { {0} };
+ struct mmsch_v4_0_cmd_end end = { {0} };
+ struct mmsch_v4_0_init_header header;
+
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ end.cmd_header.command_type =
+ MMSCH_COMMAND__END;
+
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v4_0_init_header) >> 2;
+ for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) {
+ header.inst[i].init_status = 0;
+ header.inst[i].table_offset = 0;
+ header.inst[i].table_size = 0;
+ }
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ table_size = 0;
+
+ MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_STATUS),
+ ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
+ offset = 0;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_OFFSET0),
+ 0);
+ } else {
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = cache_size;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_SIZE0),
+ cache_size);
+
+ cache_addr = adev->vcn.inst[i].gpu_addr + offset;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_OFFSET1),
+ 0);
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_SIZE1),
+ AMDGPU_VCN_STACK_SIZE);
+
+ cache_addr = adev->vcn.inst[i].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_OFFSET2),
+ 0);
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_SIZE2),
+ AMDGPU_VCN_CONTEXT_SIZE);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ rb_setup = &fw_shared->rb_setup;
+
+ ring_enc = &adev->vcn.inst[i].ring_enc[0];
+ ring_enc->wptr = 0;
+ rb_enc_addr = ring_enc->gpu_addr;
+
+ rb_setup->is_rb_enabled_flags |= RB_ENABLED;
+ rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
+ rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
+ rb_setup->rb_size = ring_enc->ring_size / 4;
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+
+ /* add end packet */
+ MMSCH_V4_0_INSERT_END();
+
+ /* refine header */
+ header.inst[i].init_status = 0;
+ header.inst[i].table_offset = header.total_size;
+ header.inst[i].table_size = table_size;
+ header.total_size += table_size;
+ }
+
+ /* Update init table header in memory */
+ size = sizeof(struct mmsch_v4_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ /* message MMSCH (in VCN[0]) to initialize this client
+ * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
+ * of memory descriptor location
+ */
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ /* 2, update vmid of descriptor */
+ tmp = RREG32_SOC15(VCN, 0, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ /* use domain0 for MM scheduler */
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_VMID, tmp);
+
+ /* 3, notify mmsch about the size of this descriptor */
+ size = header.total_size;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE, size);
+
+ /* 4, set resp to zero */
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ /* 5, kick off the initialization and wait until
+ * MMSCH_VF_MAILBOX_RESP becomes non-zero
+ */
+ param = 0x00000001;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP);
+ if (resp != 0)
+ break;
+
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+ enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
+ init_status = ((struct mmsch_v4_0_init_header *)(table_loc))->inst[enabled_vcn].init_status;
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
+ && init_status != MMSCH_VF_ENGINE_STATUS__PASS)
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
+ "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
+
+ return 0;
+}
+
/**
* vcn_v4_0_stop_dpg_mode - VCN stop with dpg mode
*
@@ -1327,21 +1595,23 @@ static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring)
}
}
-static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p)
+static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job)
{
struct drm_gpu_scheduler **scheds;
/* The create msg must be in the first IB submitted */
- if (atomic_read(&p->entity->fence_seq))
+ if (atomic_read(&job->base.entity->fence_seq))
return -EINVAL;
scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
[AMDGPU_RING_PRIO_0].sched;
- drm_sched_entity_modify_sched(p->entity, scheds, 1);
+ drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
return 0;
}
-static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
+static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
+ uint64_t addr)
{
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_bo_va_mapping *map;
@@ -1412,7 +1682,7 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
continue;
- r = vcn_v4_0_limit_sched(p);
+ r = vcn_v4_0_limit_sched(p, job);
if (r)
goto out;
}
@@ -1425,32 +1695,34 @@ out:
#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003)
static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
- struct amdgpu_job *job,
- struct amdgpu_ib *ib)
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
- struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
+ struct amdgpu_ring *ring = amdgpu_job_ring(job);
+ struct amdgpu_vcn_decode_buffer *decode_buffer;
+ uint64_t addr;
uint32_t val;
- int r = 0;
/* The first instance can decode anything */
if (!ring->me)
- return r;
+ return 0;
/* unified queue ib header has 8 double words. */
if (ib->length_dw < 8)
- return r;
+ return 0;
val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE
+ if (val != RADEON_VCN_ENGINE_TYPE_DECODE)
+ return 0;
- if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
- decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10];
+ decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10];
- if (decode_buffer->valid_buf_flag & 0x1)
- r = vcn_v4_0_dec_msg(p, ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
- decode_buffer->msg_buffer_address_lo);
- }
- return r;
+ if (!(decode_buffer->valid_buf_flag & 0x1))
+ return 0;
+
+ addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
+ decode_buffer->msg_buffer_address_lo;
+ return vcn_v4_0_dec_msg(p, job, addr);
}
static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
@@ -1596,6 +1868,15 @@ static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_sta
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret;
+ /* for SRIOV, guest should not control VCN Power-gating
+ * MMSCH FW should control Power-gating and clock-gating
+ * guest should avoid touching CGC and PG
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
if(state == adev->vcn.cur_state)
return 0;