diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 293 |
1 files changed, 285 insertions, 8 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 9a19a6a57b23..0b52af415b28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -27,6 +27,7 @@ #include <linux/firmware.h> #include <linux/module.h> #include <linux/pci.h> +#include <linux/debugfs.h> #include <drm/drm_drv.h> #include "amdgpu.h" @@ -51,6 +52,10 @@ #define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin" #define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin" #define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin" +#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin" +#define FIRMWARE_VCN4_0_0 "amdgpu/vcn_4_0_0.bin" +#define FIRMWARE_VCN4_0_2 "amdgpu/vcn_4_0_2.bin" +#define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); @@ -68,6 +73,10 @@ MODULE_FIRMWARE(FIRMWARE_VANGOGH); MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH); MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY); MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP); +MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2); +MODULE_FIRMWARE(FIRMWARE_VCN4_0_0); +MODULE_FIRMWARE(FIRMWARE_VCN4_0_2); +MODULE_FIRMWARE(FIRMWARE_VCN4_0_4); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -77,6 +86,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) const char *fw_name; const struct common_firmware_header *hdr; unsigned char fw_check; + unsigned int fw_shared_size, log_offset; int i, r; INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); @@ -165,6 +175,30 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) adev->vcn.indirect_sram = true; break; + case IP_VERSION(3, 1, 2): + fw_name = FIRMWARE_VCN_3_1_2; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; + case IP_VERSION(4, 0, 0): + fw_name = FIRMWARE_VCN4_0_0; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; + case IP_VERSION(4, 0, 2): + fw_name = FIRMWARE_VCN4_0_2; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; + case IP_VERSION(4, 0, 4): + fw_name = FIRMWARE_VCN4_0_4; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; default: return -EINVAL; } @@ -218,7 +252,19 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); - bo_size += AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); + + if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)){ + fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)); + log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log); + } else { + fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); + log_offset = offsetof(struct amdgpu_fw_shared, fw_log); + } + + bo_size += fw_shared_size; + + if (amdgpu_vcnfw_log) + bo_size += AMDGPU_VCNFW_LOG_SIZE; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (adev->vcn.harvest_config & (1 << i)) @@ -232,10 +278,18 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) return r; } - adev->vcn.inst[i].fw_shared_cpu_addr = adev->vcn.inst[i].cpu_addr + - bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); - adev->vcn.inst[i].fw_shared_gpu_addr = adev->vcn.inst[i].gpu_addr + - bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); + adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr + + bo_size - fw_shared_size; + adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr + + bo_size - fw_shared_size; + + adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size; + + if (amdgpu_vcnfw_log) { + adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE; + adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE; + adev->vcn.inst[i].fw_shared.log_offset = log_offset; + } if (adev->vcn.indirect_sram) { r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, @@ -283,6 +337,18 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) return 0; } +/* from vcn4 and above, only unified queue is used */ +static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + bool ret = false; + + if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)) + ret = true; + + return ret; +} + bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) { bool ret = false; @@ -672,19 +738,55 @@ error: return r; } +static uint32_t *amdgpu_vcn_unified_ring_ib_header(struct amdgpu_ib *ib, + uint32_t ib_pack_in_dw, bool enc) +{ + uint32_t *ib_checksum; + + ib->ptr[ib->length_dw++] = 0x00000010; /* single queue checksum */ + ib->ptr[ib->length_dw++] = 0x30000002; + ib_checksum = &ib->ptr[ib->length_dw++]; + ib->ptr[ib->length_dw++] = ib_pack_in_dw; + + ib->ptr[ib->length_dw++] = 0x00000010; /* engine info */ + ib->ptr[ib->length_dw++] = 0x30000001; + ib->ptr[ib->length_dw++] = enc ? 0x2 : 0x3; + ib->ptr[ib->length_dw++] = ib_pack_in_dw * sizeof(uint32_t); + + return ib_checksum; +} + +static void amdgpu_vcn_unified_ring_ib_checksum(uint32_t **ib_checksum, + uint32_t ib_pack_in_dw) +{ + uint32_t i; + uint32_t checksum = 0; + + for (i = 0; i < ib_pack_in_dw; i++) + checksum += *(*ib_checksum + 2 + i); + + **ib_checksum = checksum; +} + static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, struct amdgpu_ib *ib_msg, struct dma_fence **fence) { struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; - const unsigned int ib_size_dw = 64; + unsigned int ib_size_dw = 64; struct amdgpu_device *adev = ring->adev; struct dma_fence *f = NULL; struct amdgpu_job *job; struct amdgpu_ib *ib; uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); + bool sq = amdgpu_vcn_using_unified_queue(ring); + uint32_t *ib_checksum; + uint32_t ib_pack_in_dw; int i, r; + if (sq) + ib_size_dw += 8; + r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, &job); if (r) @@ -693,6 +795,13 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, ib = &job->ibs[0]; ib->length_dw = 0; + /* single queue headers */ + if (sq) { + ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t) + + 4 + 2; /* engine info + decoding ib in dw */ + ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false); + } + ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8; ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER); decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]); @@ -706,6 +815,9 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; + if (sq) + amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw); + r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err_free; @@ -792,13 +904,18 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand struct amdgpu_ib *ib_msg, struct dma_fence **fence) { - const unsigned ib_size_dw = 16; + unsigned int ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; + uint32_t *ib_checksum = NULL; uint64_t addr; + bool sq = amdgpu_vcn_using_unified_queue(ring); int i, r; + if (sq) + ib_size_dw += 8; + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, &job); if (r) @@ -808,6 +925,10 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); ib->length_dw = 0; + + if (sq) + ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); + ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; @@ -827,6 +948,9 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; + if (sq) + amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); + r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err; @@ -846,13 +970,18 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han struct amdgpu_ib *ib_msg, struct dma_fence **fence) { - const unsigned ib_size_dw = 16; + unsigned int ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; + uint32_t *ib_checksum = NULL; uint64_t addr; + bool sq = amdgpu_vcn_using_unified_queue(ring); int i, r; + if (sq) + ib_size_dw += 8; + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, &job); if (r) @@ -862,6 +991,10 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); ib->length_dw = 0; + + if (sq) + ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); + ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; ib->ptr[ib->length_dw++] = handle; @@ -881,6 +1014,9 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; + if (sq) + amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); + r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err; @@ -931,6 +1067,20 @@ error: return r; } +int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + long r; + + r = amdgpu_vcn_enc_ring_test_ib(ring, timeout); + if (r) + goto error; + + r = amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout); + +error: + return r; +} + enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring) { switch(ring) { @@ -971,3 +1121,130 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); } } + +/* + * debugfs for mapping vcn firmware log buffer. + */ +#if defined(CONFIG_DEBUG_FS) +static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_vcn_inst *vcn; + void *log_buf; + volatile struct amdgpu_vcn_fwlog *plog; + unsigned int read_pos, write_pos, available, i, read_bytes = 0; + unsigned int read_num[2] = {0}; + + vcn = file_inode(f)->i_private; + if (!vcn) + return -ENODEV; + + if (!vcn->fw_shared.cpu_addr || !amdgpu_vcnfw_log) + return -EFAULT; + + log_buf = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size; + + plog = (volatile struct amdgpu_vcn_fwlog *)log_buf; + read_pos = plog->rptr; + write_pos = plog->wptr; + + if (read_pos > AMDGPU_VCNFW_LOG_SIZE || write_pos > AMDGPU_VCNFW_LOG_SIZE) + return -EFAULT; + + if (!size || (read_pos == write_pos)) + return 0; + + if (write_pos > read_pos) { + available = write_pos - read_pos; + read_num[0] = min(size, (size_t)available); + } else { + read_num[0] = AMDGPU_VCNFW_LOG_SIZE - read_pos; + available = read_num[0] + write_pos - plog->header_size; + if (size > available) + read_num[1] = write_pos - plog->header_size; + else if (size > read_num[0]) + read_num[1] = size - read_num[0]; + else + read_num[0] = size; + } + + for (i = 0; i < 2; i++) { + if (read_num[i]) { + if (read_pos == AMDGPU_VCNFW_LOG_SIZE) + read_pos = plog->header_size; + if (read_num[i] == copy_to_user((buf + read_bytes), + (log_buf + read_pos), read_num[i])) + return -EFAULT; + + read_bytes += read_num[i]; + read_pos += read_num[i]; + } + } + + plog->rptr = read_pos; + *pos += read_bytes; + return read_bytes; +} + +static const struct file_operations amdgpu_debugfs_vcnfwlog_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_vcn_fwlog_read, + .llseek = default_llseek +}; +#endif + +void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i, + struct amdgpu_vcn_inst *vcn) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + sprintf(name, "amdgpu_vcn_%d_fwlog", i); + debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, vcn, + &amdgpu_debugfs_vcnfwlog_fops, + AMDGPU_VCNFW_LOG_SIZE); +#endif +} + +void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn) +{ +#if defined(CONFIG_DEBUG_FS) + volatile uint32_t *flag = vcn->fw_shared.cpu_addr; + void *fw_log_cpu_addr = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size; + uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size; + volatile struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr; + volatile struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr + + vcn->fw_shared.log_offset; + *flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG); + fw_log->is_enabled = 1; + fw_log->addr_lo = cpu_to_le32(fw_log_gpu_addr & 0xFFFFFFFF); + fw_log->addr_hi = cpu_to_le32(fw_log_gpu_addr >> 32); + fw_log->size = cpu_to_le32(AMDGPU_VCNFW_LOG_SIZE); + + log_buf->header_size = sizeof(struct amdgpu_vcn_fwlog); + log_buf->buffer_size = AMDGPU_VCNFW_LOG_SIZE; + log_buf->rptr = log_buf->header_size; + log_buf->wptr = log_buf->header_size; + log_buf->wrapped = 0; +#endif +} + +int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->vcn.ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + + return 0; +} |