aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/goya/goya.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/goya/goya.c')
-rw-r--r--drivers/misc/habanalabs/goya/goya.c167
1 files changed, 110 insertions, 57 deletions
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index a0e181714891..6fba14b81f90 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -9,6 +9,7 @@
#include "include/hw_ip/mmu/mmu_general.h"
#include "include/hw_ip/mmu/mmu_v1_0.h"
#include "include/goya/asic_reg/goya_masks.h"
+#include "include/goya/goya_reg_map.h"
#include <linux/pci.h>
#include <linux/genalloc.h>
@@ -41,8 +42,8 @@
* PQ, CQ and CP are not secured.
* PQ, CB and the data are on the SRAM/DRAM.
*
- * Since QMAN DMA is secured, KMD is parsing the DMA CB:
- * - KMD checks DMA pointer
+ * Since QMAN DMA is secured, the driver is parsing the DMA CB:
+ * - checks DMA pointer
* - WREG, MSG_PROT are not allowed.
* - MSG_LONG/SHORT are allowed.
*
@@ -55,15 +56,15 @@
* QMAN DMA: PQ, CQ and CP are secured.
* MMU is set to bypass on the Secure props register of the QMAN.
* The reasons we don't enable MMU for PQ, CQ and CP are:
- * - PQ entry is in kernel address space and KMD doesn't map it.
+ * - PQ entry is in kernel address space and the driver doesn't map it.
* - CP writes to MSIX register and to kernel address space (completion
* queue).
*
- * DMA is not secured but because CP is secured, KMD still needs to parse the
- * CB, but doesn't need to check the DMA addresses.
+ * DMA is not secured but because CP is secured, the driver still needs to parse
+ * the CB, but doesn't need to check the DMA addresses.
*
- * For QMAN DMA 0, DMA is also secured because only KMD uses this DMA and KMD
- * doesn't map memory in MMU.
+ * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
+ * the driver doesn't map memory in MMU.
*
* QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
*
@@ -335,18 +336,18 @@ void goya_get_fixed_properties(struct hl_device *hdev)
for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
- prop->hw_queues_props[i].kmd_only = 0;
+ prop->hw_queues_props[i].driver_only = 0;
}
for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
- prop->hw_queues_props[i].kmd_only = 1;
+ prop->hw_queues_props[i].driver_only = 1;
}
for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
NUMBER_OF_INT_HW_QUEUES; i++) {
prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
- prop->hw_queues_props[i].kmd_only = 0;
+ prop->hw_queues_props[i].driver_only = 0;
}
for (; i < HL_MAX_QUEUES; i++)
@@ -1006,36 +1007,34 @@ int goya_init_cpu_queues(struct hl_device *hdev)
eq = &hdev->event_queue;
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_0,
- lower_32_bits(cpu_pq->bus_address));
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_1,
- upper_32_bits(cpu_pq->bus_address));
+ WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
+ WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_2, lower_32_bits(eq->bus_address));
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_3, upper_32_bits(eq->bus_address));
+ WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
+ WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8,
+ WREG32(mmCPU_CQ_BASE_ADDR_LOW,
lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9,
+ WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES);
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4, HL_EQ_SIZE_IN_BYTES);
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_10, HL_CPU_ACCESSIBLE_MEM_SIZE);
+ WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
+ WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
+ WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
/* Used for EQ CI */
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, 0);
+ WREG32(mmCPU_EQ_CI, 0);
WREG32(mmCPU_IF_PF_PQ_PI, 0);
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_7, PQ_INIT_STATUS_READY_FOR_CP);
+ WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
GOYA_ASYNC_EVENT_ID_PI_UPDATE);
err = hl_poll_timeout(
hdev,
- mmPSOC_GLOBAL_CONF_SCRATCHPAD_7,
+ mmCPU_PQ_INIT_STATUS,
status,
(status == PQ_INIT_STATUS_READY_FOR_HOST),
1000,
@@ -2063,6 +2062,25 @@ static void goya_disable_msix(struct hl_device *hdev)
goya->hw_cap_initialized &= ~HW_CAP_MSIX;
}
+static void goya_enable_timestamp(struct hl_device *hdev)
+{
+ /* Disable the timestamp counter */
+ WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
+
+ /* Zero the lower/upper parts of the 64-bit counter */
+ WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
+ WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
+
+ /* Enable the counter */
+ WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
+}
+
+static void goya_disable_timestamp(struct hl_device *hdev)
+{
+ /* Disable the timestamp counter */
+ WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
+}
+
static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
{
u32 wait_timeout_ms, cpu_timeout_ms;
@@ -2103,6 +2121,8 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
goya_disable_external_queues(hdev);
goya_disable_internal_queues(hdev);
+ goya_disable_timestamp(hdev);
+
if (hard_reset) {
goya_disable_msix(hdev);
goya_mmu_remove_device_cpu_mappings(hdev);
@@ -2205,12 +2225,12 @@ static void goya_read_device_fw_version(struct hl_device *hdev,
switch (fwc) {
case FW_COMP_UBOOT:
- ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_29);
+ ver_off = RREG32(mmUBOOT_VER_OFFSET);
dest = hdev->asic_prop.uboot_ver;
name = "U-Boot";
break;
case FW_COMP_PREBOOT:
- ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_28);
+ ver_off = RREG32(mmPREBOOT_VER_OFFSET);
dest = hdev->asic_prop.preboot_ver;
name = "Preboot";
break;
@@ -2469,7 +2489,7 @@ static int goya_hw_init(struct hl_device *hdev)
* we need to reset the chip before doing H/W init. This register is
* cleared by the H/W upon H/W reset
*/
- WREG32(mmPSOC_GLOBAL_CONF_APP_STATUS, HL_DEVICE_HW_STATE_DIRTY);
+ WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
rc = goya_init_cpu(hdev, GOYA_CPU_TIMEOUT_USEC);
if (rc) {
@@ -2505,6 +2525,8 @@ static int goya_hw_init(struct hl_device *hdev)
goya_init_tpc_qmans(hdev);
+ goya_enable_timestamp(hdev);
+
/* MSI-X must be enabled before CPU queues are initialized */
rc = goya_enable_msix(hdev);
if (rc)
@@ -2729,9 +2751,10 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
GOYA_ASYNC_EVENT_ID_PI_UPDATE);
}
-void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val)
+void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
{
- /* Not needed in Goya */
+ /* The QMANs are on the SRAM so need to copy to IO space */
+ memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
}
static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
@@ -2830,7 +2853,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
dev_err_ratelimited(hdev->dev,
- "Can't send KMD job on QMAN0 because the device is not idle\n");
+ "Can't send driver job on QMAN0 because the device is not idle\n");
return -EBUSY;
}
@@ -3313,9 +3336,11 @@ static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
int rc;
dev_dbg(hdev->dev, "DMA packet details:\n");
- dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr);
- dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr);
- dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize);
+ dev_dbg(hdev->dev, "source == 0x%llx\n",
+ le64_to_cpu(user_dma_pkt->src_addr));
+ dev_dbg(hdev->dev, "destination == 0x%llx\n",
+ le64_to_cpu(user_dma_pkt->dst_addr));
+ dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
ctl = le32_to_cpu(user_dma_pkt->ctl);
user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
@@ -3344,9 +3369,11 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
struct packet_lin_dma *user_dma_pkt)
{
dev_dbg(hdev->dev, "DMA packet details:\n");
- dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr);
- dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr);
- dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize);
+ dev_dbg(hdev->dev, "source == 0x%llx\n",
+ le64_to_cpu(user_dma_pkt->src_addr));
+ dev_dbg(hdev->dev, "destination == 0x%llx\n",
+ le64_to_cpu(user_dma_pkt->dst_addr));
+ dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
/*
* WA for HW-23.
@@ -3386,7 +3413,8 @@ static int goya_validate_wreg32(struct hl_device *hdev,
dev_dbg(hdev->dev, "WREG32 packet details:\n");
dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
- dev_dbg(hdev->dev, "value == 0x%x\n", wreg_pkt->value);
+ dev_dbg(hdev->dev, "value == 0x%x\n",
+ le32_to_cpu(wreg_pkt->value));
if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
@@ -3428,12 +3456,13 @@ static int goya_validate_cb(struct hl_device *hdev,
while (cb_parsed_length < parser->user_cb_size) {
enum packet_id pkt_id;
u16 pkt_size;
- void *user_pkt;
+ struct goya_packet *user_pkt;
- user_pkt = (void *) (uintptr_t)
+ user_pkt = (struct goya_packet *) (uintptr_t)
(parser->user_cb->kernel_address + cb_parsed_length);
- pkt_id = (enum packet_id) (((*(u64 *) user_pkt) &
+ pkt_id = (enum packet_id) (
+ (le64_to_cpu(user_pkt->header) &
PACKET_HEADER_PACKET_ID_MASK) >>
PACKET_HEADER_PACKET_ID_SHIFT);
@@ -3453,7 +3482,8 @@ static int goya_validate_cb(struct hl_device *hdev,
* need to validate here as well because patch_cb() is
* not called in MMU path while this function is called
*/
- rc = goya_validate_wreg32(hdev, parser, user_pkt);
+ rc = goya_validate_wreg32(hdev,
+ parser, (struct packet_wreg32 *) user_pkt);
break;
case PACKET_WREG_BULK:
@@ -3481,10 +3511,10 @@ static int goya_validate_cb(struct hl_device *hdev,
case PACKET_LIN_DMA:
if (is_mmu)
rc = goya_validate_dma_pkt_mmu(hdev, parser,
- user_pkt);
+ (struct packet_lin_dma *) user_pkt);
else
rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
- user_pkt);
+ (struct packet_lin_dma *) user_pkt);
break;
case PACKET_MSG_LONG:
@@ -3657,15 +3687,16 @@ static int goya_patch_cb(struct hl_device *hdev,
enum packet_id pkt_id;
u16 pkt_size;
u32 new_pkt_size = 0;
- void *user_pkt, *kernel_pkt;
+ struct goya_packet *user_pkt, *kernel_pkt;
- user_pkt = (void *) (uintptr_t)
+ user_pkt = (struct goya_packet *) (uintptr_t)
(parser->user_cb->kernel_address + cb_parsed_length);
- kernel_pkt = (void *) (uintptr_t)
+ kernel_pkt = (struct goya_packet *) (uintptr_t)
(parser->patched_cb->kernel_address +
cb_patched_cur_length);
- pkt_id = (enum packet_id) (((*(u64 *) user_pkt) &
+ pkt_id = (enum packet_id) (
+ (le64_to_cpu(user_pkt->header) &
PACKET_HEADER_PACKET_ID_MASK) >>
PACKET_HEADER_PACKET_ID_SHIFT);
@@ -3680,15 +3711,18 @@ static int goya_patch_cb(struct hl_device *hdev,
switch (pkt_id) {
case PACKET_LIN_DMA:
- rc = goya_patch_dma_packet(hdev, parser, user_pkt,
- kernel_pkt, &new_pkt_size);
+ rc = goya_patch_dma_packet(hdev, parser,
+ (struct packet_lin_dma *) user_pkt,
+ (struct packet_lin_dma *) kernel_pkt,
+ &new_pkt_size);
cb_patched_cur_length += new_pkt_size;
break;
case PACKET_WREG_32:
memcpy(kernel_pkt, user_pkt, pkt_size);
cb_patched_cur_length += pkt_size;
- rc = goya_validate_wreg32(hdev, parser, kernel_pkt);
+ rc = goya_validate_wreg32(hdev, parser,
+ (struct packet_wreg32 *) kernel_pkt);
break;
case PACKET_WREG_BULK:
@@ -3937,7 +3971,7 @@ void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
void goya_update_eq_ci(struct hl_device *hdev, u32 val)
{
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, val);
+ WREG32(mmCPU_EQ_CI, val);
}
void goya_restore_phase_topology(struct hl_device *hdev)
@@ -4352,6 +4386,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
size_t total_pkt_size;
long result;
int rc;
+ int irq_num_entries, irq_arr_index;
+ __le32 *goya_irq_arr;
total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
irq_arr_size;
@@ -4369,8 +4405,16 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
if (!pkt)
return -ENOMEM;
- pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
- memcpy(&pkt->irqs, irq_arr, irq_arr_size);
+ irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
+ pkt->length = cpu_to_le32(irq_num_entries);
+
+ /* We must perform any necessary endianness conversation on the irq
+ * array being passed to the goya hardware
+ */
+ for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
+ irq_arr_index < irq_num_entries ; irq_arr_index++)
+ goya_irq_arr[irq_arr_index] =
+ cpu_to_le32(irq_arr[irq_arr_index]);
pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
@@ -4425,6 +4469,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
struct goya_device *goya = hdev->asic_specific;
goya->events_stat[event_type]++;
+ goya->events_stat_aggregate[event_type]++;
switch (event_type) {
case GOYA_ASYNC_EVENT_ID_PCIE_IF:
@@ -4506,12 +4551,16 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
}
}
-void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
+void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
{
struct goya_device *goya = hdev->asic_specific;
- *size = (u32) sizeof(goya->events_stat);
+ if (aggregate) {
+ *size = (u32) sizeof(goya->events_stat_aggregate);
+ return goya->events_stat_aggregate;
+ }
+ *size = (u32) sizeof(goya->events_stat);
return goya->events_stat;
}
@@ -4912,6 +4961,10 @@ int goya_armcp_info_get(struct hl_device *hdev)
prop->dram_end_address = prop->dram_base_address + dram_size;
}
+ if (!strlen(prop->armcp_info.card_name))
+ strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
+ CARD_NAME_MAX_LEN);
+
return 0;
}
@@ -5025,7 +5078,7 @@ static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
{
- return RREG32(mmPSOC_GLOBAL_CONF_APP_STATUS);
+ return RREG32(mmHW_STATE);
}
static const struct hl_asic_funcs goya_funcs = {
@@ -5042,7 +5095,7 @@ static const struct hl_asic_funcs goya_funcs = {
.resume = goya_resume,
.cb_mmap = goya_cb_mmap,
.ring_doorbell = goya_ring_doorbell,
- .flush_pq_write = goya_flush_pq_write,
+ .pqe_write = goya_pqe_write,
.asic_dma_alloc_coherent = goya_dma_alloc_coherent,
.asic_dma_free_coherent = goya_dma_free_coherent,
.get_int_queue_base = goya_get_int_queue_base,