diff options
Diffstat (limited to 'drivers/misc/habanalabs/goya/goya.c')
-rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 167 |
1 files changed, 110 insertions, 57 deletions
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index a0e181714891..6fba14b81f90 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -9,6 +9,7 @@ #include "include/hw_ip/mmu/mmu_general.h" #include "include/hw_ip/mmu/mmu_v1_0.h" #include "include/goya/asic_reg/goya_masks.h" +#include "include/goya/goya_reg_map.h" #include <linux/pci.h> #include <linux/genalloc.h> @@ -41,8 +42,8 @@ * PQ, CQ and CP are not secured. * PQ, CB and the data are on the SRAM/DRAM. * - * Since QMAN DMA is secured, KMD is parsing the DMA CB: - * - KMD checks DMA pointer + * Since QMAN DMA is secured, the driver is parsing the DMA CB: + * - checks DMA pointer * - WREG, MSG_PROT are not allowed. * - MSG_LONG/SHORT are allowed. * @@ -55,15 +56,15 @@ * QMAN DMA: PQ, CQ and CP are secured. * MMU is set to bypass on the Secure props register of the QMAN. * The reasons we don't enable MMU for PQ, CQ and CP are: - * - PQ entry is in kernel address space and KMD doesn't map it. + * - PQ entry is in kernel address space and the driver doesn't map it. * - CP writes to MSIX register and to kernel address space (completion * queue). * - * DMA is not secured but because CP is secured, KMD still needs to parse the - * CB, but doesn't need to check the DMA addresses. + * DMA is not secured but because CP is secured, the driver still needs to parse + * the CB, but doesn't need to check the DMA addresses. * - * For QMAN DMA 0, DMA is also secured because only KMD uses this DMA and KMD - * doesn't map memory in MMU. + * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and + * the driver doesn't map memory in MMU. * * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode) * @@ -335,18 +336,18 @@ void goya_get_fixed_properties(struct hl_device *hdev) for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) { prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; - prop->hw_queues_props[i].kmd_only = 0; + prop->hw_queues_props[i].driver_only = 0; } for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) { prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; - prop->hw_queues_props[i].kmd_only = 1; + prop->hw_queues_props[i].driver_only = 1; } for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES + NUMBER_OF_INT_HW_QUEUES; i++) { prop->hw_queues_props[i].type = QUEUE_TYPE_INT; - prop->hw_queues_props[i].kmd_only = 0; + prop->hw_queues_props[i].driver_only = 0; } for (; i < HL_MAX_QUEUES; i++) @@ -1006,36 +1007,34 @@ int goya_init_cpu_queues(struct hl_device *hdev) eq = &hdev->event_queue; - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_0, - lower_32_bits(cpu_pq->bus_address)); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_1, - upper_32_bits(cpu_pq->bus_address)); + WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); + WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_2, lower_32_bits(eq->bus_address)); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_3, upper_32_bits(eq->bus_address)); + WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); + WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8, + WREG32(mmCPU_CQ_BASE_ADDR_LOW, lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR)); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9, + WREG32(mmCPU_CQ_BASE_ADDR_HIGH, upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR)); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4, HL_EQ_SIZE_IN_BYTES); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_10, HL_CPU_ACCESSIBLE_MEM_SIZE); + WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); + WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); + WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); /* Used for EQ CI */ - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, 0); + WREG32(mmCPU_EQ_CI, 0); WREG32(mmCPU_IF_PF_PQ_PI, 0); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_7, PQ_INIT_STATUS_READY_FOR_CP); + WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP); WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GOYA_ASYNC_EVENT_ID_PI_UPDATE); err = hl_poll_timeout( hdev, - mmPSOC_GLOBAL_CONF_SCRATCHPAD_7, + mmCPU_PQ_INIT_STATUS, status, (status == PQ_INIT_STATUS_READY_FOR_HOST), 1000, @@ -2063,6 +2062,25 @@ static void goya_disable_msix(struct hl_device *hdev) goya->hw_cap_initialized &= ~HW_CAP_MSIX; } +static void goya_enable_timestamp(struct hl_device *hdev) +{ + /* Disable the timestamp counter */ + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); + + /* Zero the lower/upper parts of the 64-bit counter */ + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); + + /* Enable the counter */ + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); +} + +static void goya_disable_timestamp(struct hl_device *hdev) +{ + /* Disable the timestamp counter */ + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); +} + static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) { u32 wait_timeout_ms, cpu_timeout_ms; @@ -2103,6 +2121,8 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) goya_disable_external_queues(hdev); goya_disable_internal_queues(hdev); + goya_disable_timestamp(hdev); + if (hard_reset) { goya_disable_msix(hdev); goya_mmu_remove_device_cpu_mappings(hdev); @@ -2205,12 +2225,12 @@ static void goya_read_device_fw_version(struct hl_device *hdev, switch (fwc) { case FW_COMP_UBOOT: - ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_29); + ver_off = RREG32(mmUBOOT_VER_OFFSET); dest = hdev->asic_prop.uboot_ver; name = "U-Boot"; break; case FW_COMP_PREBOOT: - ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_28); + ver_off = RREG32(mmPREBOOT_VER_OFFSET); dest = hdev->asic_prop.preboot_ver; name = "Preboot"; break; @@ -2469,7 +2489,7 @@ static int goya_hw_init(struct hl_device *hdev) * we need to reset the chip before doing H/W init. This register is * cleared by the H/W upon H/W reset */ - WREG32(mmPSOC_GLOBAL_CONF_APP_STATUS, HL_DEVICE_HW_STATE_DIRTY); + WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); rc = goya_init_cpu(hdev, GOYA_CPU_TIMEOUT_USEC); if (rc) { @@ -2505,6 +2525,8 @@ static int goya_hw_init(struct hl_device *hdev) goya_init_tpc_qmans(hdev); + goya_enable_timestamp(hdev); + /* MSI-X must be enabled before CPU queues are initialized */ rc = goya_enable_msix(hdev); if (rc) @@ -2729,9 +2751,10 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) GOYA_ASYNC_EVENT_ID_PI_UPDATE); } -void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val) +void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd) { - /* Not needed in Goya */ + /* The QMANs are on the SRAM so need to copy to IO space */ + memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd)); } static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size, @@ -2830,7 +2853,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) { dev_err_ratelimited(hdev->dev, - "Can't send KMD job on QMAN0 because the device is not idle\n"); + "Can't send driver job on QMAN0 because the device is not idle\n"); return -EBUSY; } @@ -3313,9 +3336,11 @@ static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev, int rc; dev_dbg(hdev->dev, "DMA packet details:\n"); - dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr); - dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr); - dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize); + dev_dbg(hdev->dev, "source == 0x%llx\n", + le64_to_cpu(user_dma_pkt->src_addr)); + dev_dbg(hdev->dev, "destination == 0x%llx\n", + le64_to_cpu(user_dma_pkt->dst_addr)); + dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); ctl = le32_to_cpu(user_dma_pkt->ctl); user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >> @@ -3344,9 +3369,11 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev, struct packet_lin_dma *user_dma_pkt) { dev_dbg(hdev->dev, "DMA packet details:\n"); - dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr); - dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr); - dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize); + dev_dbg(hdev->dev, "source == 0x%llx\n", + le64_to_cpu(user_dma_pkt->src_addr)); + dev_dbg(hdev->dev, "destination == 0x%llx\n", + le64_to_cpu(user_dma_pkt->dst_addr)); + dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); /* * WA for HW-23. @@ -3386,7 +3413,8 @@ static int goya_validate_wreg32(struct hl_device *hdev, dev_dbg(hdev->dev, "WREG32 packet details:\n"); dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset); - dev_dbg(hdev->dev, "value == 0x%x\n", wreg_pkt->value); + dev_dbg(hdev->dev, "value == 0x%x\n", + le32_to_cpu(wreg_pkt->value)); if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) { dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n", @@ -3428,12 +3456,13 @@ static int goya_validate_cb(struct hl_device *hdev, while (cb_parsed_length < parser->user_cb_size) { enum packet_id pkt_id; u16 pkt_size; - void *user_pkt; + struct goya_packet *user_pkt; - user_pkt = (void *) (uintptr_t) + user_pkt = (struct goya_packet *) (uintptr_t) (parser->user_cb->kernel_address + cb_parsed_length); - pkt_id = (enum packet_id) (((*(u64 *) user_pkt) & + pkt_id = (enum packet_id) ( + (le64_to_cpu(user_pkt->header) & PACKET_HEADER_PACKET_ID_MASK) >> PACKET_HEADER_PACKET_ID_SHIFT); @@ -3453,7 +3482,8 @@ static int goya_validate_cb(struct hl_device *hdev, * need to validate here as well because patch_cb() is * not called in MMU path while this function is called */ - rc = goya_validate_wreg32(hdev, parser, user_pkt); + rc = goya_validate_wreg32(hdev, + parser, (struct packet_wreg32 *) user_pkt); break; case PACKET_WREG_BULK: @@ -3481,10 +3511,10 @@ static int goya_validate_cb(struct hl_device *hdev, case PACKET_LIN_DMA: if (is_mmu) rc = goya_validate_dma_pkt_mmu(hdev, parser, - user_pkt); + (struct packet_lin_dma *) user_pkt); else rc = goya_validate_dma_pkt_no_mmu(hdev, parser, - user_pkt); + (struct packet_lin_dma *) user_pkt); break; case PACKET_MSG_LONG: @@ -3657,15 +3687,16 @@ static int goya_patch_cb(struct hl_device *hdev, enum packet_id pkt_id; u16 pkt_size; u32 new_pkt_size = 0; - void *user_pkt, *kernel_pkt; + struct goya_packet *user_pkt, *kernel_pkt; - user_pkt = (void *) (uintptr_t) + user_pkt = (struct goya_packet *) (uintptr_t) (parser->user_cb->kernel_address + cb_parsed_length); - kernel_pkt = (void *) (uintptr_t) + kernel_pkt = (struct goya_packet *) (uintptr_t) (parser->patched_cb->kernel_address + cb_patched_cur_length); - pkt_id = (enum packet_id) (((*(u64 *) user_pkt) & + pkt_id = (enum packet_id) ( + (le64_to_cpu(user_pkt->header) & PACKET_HEADER_PACKET_ID_MASK) >> PACKET_HEADER_PACKET_ID_SHIFT); @@ -3680,15 +3711,18 @@ static int goya_patch_cb(struct hl_device *hdev, switch (pkt_id) { case PACKET_LIN_DMA: - rc = goya_patch_dma_packet(hdev, parser, user_pkt, - kernel_pkt, &new_pkt_size); + rc = goya_patch_dma_packet(hdev, parser, + (struct packet_lin_dma *) user_pkt, + (struct packet_lin_dma *) kernel_pkt, + &new_pkt_size); cb_patched_cur_length += new_pkt_size; break; case PACKET_WREG_32: memcpy(kernel_pkt, user_pkt, pkt_size); cb_patched_cur_length += pkt_size; - rc = goya_validate_wreg32(hdev, parser, kernel_pkt); + rc = goya_validate_wreg32(hdev, parser, + (struct packet_wreg32 *) kernel_pkt); break; case PACKET_WREG_BULK: @@ -3937,7 +3971,7 @@ void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address, void goya_update_eq_ci(struct hl_device *hdev, u32 val) { - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, val); + WREG32(mmCPU_EQ_CI, val); } void goya_restore_phase_topology(struct hl_device *hdev) @@ -4352,6 +4386,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, size_t total_pkt_size; long result; int rc; + int irq_num_entries, irq_arr_index; + __le32 *goya_irq_arr; total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) + irq_arr_size; @@ -4369,8 +4405,16 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, if (!pkt) return -ENOMEM; - pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0])); - memcpy(&pkt->irqs, irq_arr, irq_arr_size); + irq_num_entries = irq_arr_size / sizeof(irq_arr[0]); + pkt->length = cpu_to_le32(irq_num_entries); + + /* We must perform any necessary endianness conversation on the irq + * array being passed to the goya hardware + */ + for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs; + irq_arr_index < irq_num_entries ; irq_arr_index++) + goya_irq_arr[irq_arr_index] = + cpu_to_le32(irq_arr[irq_arr_index]); pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << ARMCP_PKT_CTL_OPCODE_SHIFT); @@ -4425,6 +4469,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) struct goya_device *goya = hdev->asic_specific; goya->events_stat[event_type]++; + goya->events_stat_aggregate[event_type]++; switch (event_type) { case GOYA_ASYNC_EVENT_ID_PCIE_IF: @@ -4506,12 +4551,16 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) } } -void *goya_get_events_stat(struct hl_device *hdev, u32 *size) +void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) { struct goya_device *goya = hdev->asic_specific; - *size = (u32) sizeof(goya->events_stat); + if (aggregate) { + *size = (u32) sizeof(goya->events_stat_aggregate); + return goya->events_stat_aggregate; + } + *size = (u32) sizeof(goya->events_stat); return goya->events_stat; } @@ -4912,6 +4961,10 @@ int goya_armcp_info_get(struct hl_device *hdev) prop->dram_end_address = prop->dram_base_address + dram_size; } + if (!strlen(prop->armcp_info.card_name)) + strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME, + CARD_NAME_MAX_LEN); + return 0; } @@ -5025,7 +5078,7 @@ static int goya_get_eeprom_data(struct hl_device *hdev, void *data, static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev) { - return RREG32(mmPSOC_GLOBAL_CONF_APP_STATUS); + return RREG32(mmHW_STATE); } static const struct hl_asic_funcs goya_funcs = { @@ -5042,7 +5095,7 @@ static const struct hl_asic_funcs goya_funcs = { .resume = goya_resume, .cb_mmap = goya_cb_mmap, .ring_doorbell = goya_ring_doorbell, - .flush_pq_write = goya_flush_pq_write, + .pqe_write = goya_pqe_write, .asic_dma_alloc_coherent = goya_dma_alloc_coherent, .asic_dma_free_coherent = goya_dma_free_coherent, .get_int_queue_base = goya_get_int_queue_base, |