aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/gaudi/gaudi.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/gaudi/gaudi.c')
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c340
1 files changed, 212 insertions, 128 deletions
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 00a0a7238d81..5f65a1691551 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -21,7 +21,6 @@
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/iommu.h>
#include <linux/seq_file.h>
-#include <linux/bitfield.h>
/*
* Gaudi security scheme:
@@ -154,6 +153,29 @@ static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
[PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
};
+static inline bool validate_packet_id(enum packet_id id)
+{
+ switch (id) {
+ case PACKET_WREG_32:
+ case PACKET_WREG_BULK:
+ case PACKET_MSG_LONG:
+ case PACKET_MSG_SHORT:
+ case PACKET_CP_DMA:
+ case PACKET_REPEAT:
+ case PACKET_MSG_PROT:
+ case PACKET_FENCE:
+ case PACKET_LIN_DMA:
+ case PACKET_NOP:
+ case PACKET_STOP:
+ case PACKET_ARB_POINT:
+ case PACKET_WAIT:
+ case PACKET_LOAD_AND_EXE:
+ return true;
+ default:
+ return false;
+ }
+}
+
static const char * const
gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
"tpc_address_exceed_slm",
@@ -337,13 +359,14 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
u32 tpc_id);
static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
-static int gaudi_armcp_info_get(struct hl_device *hdev);
+static int gaudi_cpucp_info_get(struct hl_device *hdev);
static void gaudi_disable_clock_gating(struct hl_device *hdev);
static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
static int gaudi_get_fixed_properties(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u32 num_sync_stream_queues = 0;
int i;
prop->max_queues = GAUDI_QUEUE_ID_SIZE;
@@ -360,6 +383,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
prop->hw_queues_props[i].driver_only = 0;
prop->hw_queues_props[i].requires_kernel_cb = 1;
prop->hw_queues_props[i].supports_sync_stream = 1;
+ num_sync_stream_queues++;
} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
prop->hw_queues_props[i].driver_only = 1;
@@ -417,6 +441,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
prop->pmmu.end_addr =
(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
prop->pmmu.page_size = PAGE_SIZE_4KB;
+ prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
/* PMMU and HPMMU are the same except of page size */
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
@@ -433,7 +458,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
prop->num_of_events = GAUDI_EVENT_SIZE;
prop->tpc_enabled_mask = TPC_ENABLED_MASK;
- prop->max_power_default = MAX_POWER_DEFAULT;
+ prop->max_power_default = MAX_POWER_DEFAULT_PCI;
prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
@@ -441,11 +466,16 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
- strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
+ strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN);
prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
+ prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
+ num_sync_stream_queues * HL_RSVD_SOBS;
+ prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
+ num_sync_stream_queues * HL_RSVD_MONS;
+
return 0;
}
@@ -569,10 +599,15 @@ static int gaudi_early_init(struct hl_device *hdev)
prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
- rc = hl_pci_init(hdev);
+ rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
+ mmCPU_BOOT_ERR0, GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
if (rc)
goto free_queue_props;
+ /* GAUDI Firmware does not yet support security */
+ prop->fw_security_disabled = true;
+ dev_info(hdev->dev, "firmware-level security is disabled\n");
+
return 0;
free_queue_props:
@@ -652,10 +687,10 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev,
init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
- ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
- (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
- (1 << GAUDI_PKT_CTL_RB_SHIFT) |
- (1 << GAUDI_PKT_CTL_MB_SHIFT));
+ ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
+ ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
+ ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
+ ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
@@ -757,13 +792,13 @@ static int gaudi_late_init(struct hl_device *hdev)
struct gaudi_device *gaudi = hdev->asic_specific;
int rc;
- rc = gaudi->armcp_info_get(hdev);
+ rc = gaudi->cpucp_info_get(hdev);
if (rc) {
- dev_err(hdev->dev, "Failed to get armcp info\n");
+ dev_err(hdev->dev, "Failed to get cpucp info\n");
return rc;
}
- rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
+ rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
if (rc) {
dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
return rc;
@@ -788,7 +823,7 @@ static int gaudi_late_init(struct hl_device *hdev)
return 0;
disable_pci_access:
- hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
+ hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
return rc;
}
@@ -958,7 +993,7 @@ static int gaudi_sw_init(struct hl_device *hdev)
}
}
- gaudi->armcp_info_get = gaudi_armcp_info_get;
+ gaudi->cpucp_info_get = gaudi_cpucp_info_get;
gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
@@ -1830,9 +1865,11 @@ static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
- WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
- WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
- WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
+ WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
+ WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
+ QMAN_LDMA_SRC_OFFSET);
+ WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
+ QMAN_LDMA_DST_OFFSET);
WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
@@ -1888,6 +1925,9 @@ static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
+ /* WA for H/W bug H3-2116 */
+ WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
+
/* STOP_ON bit implies no completion to operation in case of RAZWI */
if (hdev->stop_on_err)
dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
@@ -1987,13 +2027,19 @@ static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
- WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
- WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
- WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
+ WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
+ QMAN_CPDMA_SIZE_OFFSET);
+ WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
+ QMAN_CPDMA_SRC_OFFSET);
+ WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
+ QMAN_CPDMA_DST_OFFSET);
} else {
- WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
- WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
- WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
+ WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
+ QMAN_LDMA_SIZE_OFFSET);
+ WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
+ QMAN_LDMA_SRC_OFFSET);
+ WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
+ QMAN_LDMA_DST_OFFSET);
/* Configure RAZWI IRQ */
dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
@@ -2097,13 +2143,19 @@ static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
- WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
- WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
- WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
+ WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
+ QMAN_CPDMA_SIZE_OFFSET);
+ WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
+ QMAN_CPDMA_SRC_OFFSET);
+ WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
+ QMAN_CPDMA_DST_OFFSET);
} else {
- WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
- WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
- WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
+ WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
+ QMAN_LDMA_SIZE_OFFSET);
+ WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
+ QMAN_LDMA_SRC_OFFSET);
+ WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
+ QMAN_LDMA_DST_OFFSET);
/* Configure RAZWI IRQ */
mme_id = mme_offset /
@@ -2211,13 +2263,19 @@ static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
- WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
- WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
- WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
+ WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
+ QMAN_CPDMA_SIZE_OFFSET);
+ WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
+ QMAN_CPDMA_SRC_OFFSET);
+ WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
+ QMAN_CPDMA_DST_OFFSET);
} else {
- WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
- WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
- WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
+ WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
+ QMAN_LDMA_SIZE_OFFSET);
+ WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
+ QMAN_LDMA_SRC_OFFSET);
+ WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
+ QMAN_LDMA_DST_OFFSET);
/* Configure RAZWI IRQ */
tpc_id = tpc_offset /
@@ -2298,7 +2356,8 @@ static void gaudi_init_tpc_qmans(struct hl_device *hdev)
tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
- gaudi->hw_cap_initialized |= 1 << (HW_CAP_TPC_SHIFT + tpc_id);
+ gaudi->hw_cap_initialized |=
+ FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
}
}
@@ -2485,6 +2544,7 @@ static void gaudi_set_clock_gating(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
u32 qman_offset;
+ bool enable;
int i;
/* In case we are during debug session, don't enable the clock gate
@@ -2494,46 +2554,43 @@ static void gaudi_set_clock_gating(struct hl_device *hdev)
return;
for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
- if (!(hdev->clock_gating_mask &
- (BIT_ULL(gaudi_dma_assignment[i]))))
- continue;
+ enable = !!(hdev->clock_gating_mask &
+ (BIT_ULL(gaudi_dma_assignment[i])));
qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
- WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
+ WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
+ enable ? QMAN_CGM1_PWR_GATE_EN : 0);
WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
- QMAN_UPPER_CP_CGM_PWR_GATE_EN);
+ enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
}
for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
- if (!(hdev->clock_gating_mask &
- (BIT_ULL(gaudi_dma_assignment[i]))))
- continue;
+ enable = !!(hdev->clock_gating_mask &
+ (BIT_ULL(gaudi_dma_assignment[i])));
qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
- WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
+ WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
+ enable ? QMAN_CGM1_PWR_GATE_EN : 0);
WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
- QMAN_COMMON_CP_CGM_PWR_GATE_EN);
+ enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
}
- if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0))) {
- WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
- WREG32(mmMME0_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN);
- }
+ enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
+ WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
+ WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
- if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2))) {
- WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
- WREG32(mmMME2_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN);
- }
+ enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
+ WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
+ WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
- if (!(hdev->clock_gating_mask &
- (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i))))
- continue;
+ enable = !!(hdev->clock_gating_mask &
+ (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
- QMAN_CGM1_PWR_GATE_EN);
+ enable ? QMAN_CGM1_PWR_GATE_EN : 0);
WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
- QMAN_COMMON_CP_CGM_PWR_GATE_EN);
+ enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
qman_offset += TPC_QMAN_OFFSET;
}
@@ -2826,7 +2883,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
if (err) {
dev_err(hdev->dev,
- "Failed to communicate with ARM CPU (ArmCP timeout)\n");
+ "Failed to communicate with Device CPU (CPU-CP timeout)\n");
return -EIO;
}
@@ -2839,6 +2896,18 @@ static void gaudi_pre_hw_init(struct hl_device *hdev)
/* Perform read from the device to make sure device is up */
RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
+ /* Set the access through PCI bars (Linux driver only) as
+ * secured
+ */
+ WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
+ (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
+ PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
+
+ /* Perform read to flush the waiting writes to ensure
+ * configuration was set in the device
+ */
+ RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
+
/*
* Let's mark in the H/W that we have reached this point. We check
* this value in the reset_before_init function to understand whether
@@ -2847,31 +2916,6 @@ static void gaudi_pre_hw_init(struct hl_device *hdev)
*/
WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
- /* Set the access through PCI bars (Linux driver only) as secured */
- WREG32(mmPCIE_WRAP_LBW_PROT_OVR, (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
- PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
-
- /* Perform read to flush the waiting writes to ensure configuration
- * was set in the device
- */
- RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
-
- if (hdev->axi_drain) {
- WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG,
- 1 << PCIE_WRAP_LBW_DRAIN_CFG_EN_SHIFT);
- WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG,
- 1 << PCIE_WRAP_HBW_DRAIN_CFG_EN_SHIFT);
-
- /* Perform read to flush the DRAIN cfg */
- RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
- } else {
- WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG, 0);
- WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG, 0);
-
- /* Perform read to flush the DRAIN cfg */
- RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
- }
-
/* Configure the reset registers. Must be done as early as possible
* in case we fail during H/W initialization
*/
@@ -2879,13 +2923,13 @@ static void gaudi_pre_hw_init(struct hl_device *hdev)
(CFG_RST_H_DMA_MASK |
CFG_RST_H_MME_MASK |
CFG_RST_H_SM_MASK |
- CFG_RST_H_TPC_MASK));
+ CFG_RST_H_TPC_7_MASK));
WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
(CFG_RST_H_HBM_MASK |
- CFG_RST_H_TPC_MASK |
+ CFG_RST_H_TPC_7_MASK |
CFG_RST_H_NIC_MASK |
CFG_RST_H_SM_MASK |
CFG_RST_H_DMA_MASK |
@@ -3050,7 +3094,7 @@ static int gaudi_suspend(struct hl_device *hdev)
{
int rc;
- rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
+ rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
if (rc)
dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
@@ -3063,17 +3107,16 @@ static int gaudi_resume(struct hl_device *hdev)
}
static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
- u64 kaddress, phys_addr_t paddress, u32 size)
+ void *cpu_addr, dma_addr_t dma_addr, size_t size)
{
int rc;
vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
VM_DONTCOPY | VM_NORESERVE;
- rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
- size, vma->vm_page_prot);
+ rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
if (rc)
- dev_err(hdev->dev, "remap_pfn_range error %d", rc);
+ dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
return rc;
}
@@ -3420,7 +3463,8 @@ static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
&fence_dma_addr);
if (!fence_ptr) {
dev_err(hdev->dev,
- "Failed to allocate memory for queue testing\n");
+ "Failed to allocate memory for H/W queue %d testing\n",
+ hw_queue_id);
return -ENOMEM;
}
@@ -3431,14 +3475,16 @@ static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
GFP_KERNEL, &pkt_dma_addr);
if (!fence_pkt) {
dev_err(hdev->dev,
- "Failed to allocate packet for queue testing\n");
+ "Failed to allocate packet for H/W queue %d testing\n",
+ hw_queue_id);
rc = -ENOMEM;
goto free_fence_ptr;
}
- tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
- (1 << GAUDI_PKT_CTL_EB_SHIFT) |
- (1 << GAUDI_PKT_CTL_MB_SHIFT);
+ tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
+ tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
+ tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
+
fence_pkt->ctl = cpu_to_le32(tmp);
fence_pkt->value = cpu_to_le32(fence_val);
fence_pkt->addr = cpu_to_le64(fence_dma_addr);
@@ -3448,7 +3494,8 @@ static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
pkt_dma_addr);
if (rc) {
dev_err(hdev->dev,
- "Failed to send fence packet\n");
+ "Failed to send fence packet to H/W queue %d\n",
+ hw_queue_id);
goto free_pkt;
}
@@ -3772,6 +3819,12 @@ static int gaudi_validate_cb(struct hl_device *hdev,
PACKET_HEADER_PACKET_ID_MASK) >>
PACKET_HEADER_PACKET_ID_SHIFT);
+ if (!validate_packet_id(pkt_id)) {
+ dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
+ rc = -EINVAL;
+ break;
+ }
+
pkt_size = gaudi_packet_sizes[pkt_id];
cb_parsed_length += pkt_size;
if (cb_parsed_length > parser->user_cb_size) {
@@ -3932,8 +3985,6 @@ static int gaudi_patch_dma_packet(struct hl_device *hdev,
}
}
- new_dma_pkt->ctl = user_dma_pkt->ctl;
-
ctl = le32_to_cpu(user_dma_pkt->ctl);
if (likely(dma_desc_cnt))
ctl &= ~GAUDI_PKT_CTL_EB_MASK;
@@ -3995,6 +4046,12 @@ static int gaudi_patch_cb(struct hl_device *hdev,
PACKET_HEADER_PACKET_ID_MASK) >>
PACKET_HEADER_PACKET_ID_SHIFT);
+ if (!validate_packet_id(pkt_id)) {
+ dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
+ rc = -EINVAL;
+ break;
+ }
+
pkt_size = gaudi_packet_sizes[pkt_id];
cb_parsed_length += pkt_size;
if (cb_parsed_length > parser->user_cb_size) {
@@ -4072,8 +4129,9 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,
parser->patched_cb_size = parser->user_cb_size +
sizeof(struct packet_msg_prot) * 2;
- rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
- &patched_cb_handle, HL_KERNEL_ASID_ID, false);
+ rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
+ parser->patched_cb_size, false, false,
+ &patched_cb_handle);
if (rc) {
dev_err(hdev->dev,
@@ -4145,8 +4203,9 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
if (rc)
goto free_userptr;
- rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
- &patched_cb_handle, HL_KERNEL_ASID_ID, false);
+ rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
+ parser->patched_cb_size, false, false,
+ &patched_cb_handle);
if (rc) {
dev_err(hdev->dev,
"Failed to allocate patched CB for DMA CS %d\n", rc);
@@ -4242,11 +4301,11 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
cq_pkt = (struct packet_msg_prot *) (uintptr_t)
(kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
- tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
- (1 << GAUDI_PKT_CTL_MB_SHIFT);
+ tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
+ tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
if (eb)
- tmp |= (1 << GAUDI_PKT_CTL_EB_SHIFT);
+ tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
cq_pkt->ctl = cpu_to_le32(tmp);
cq_pkt->value = cpu_to_le32(cq_val);
@@ -4254,8 +4313,8 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
cq_pkt++;
- tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
- (1 << GAUDI_PKT_CTL_MB_SHIFT);
+ tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
+ tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
cq_pkt->ctl = cpu_to_le32(tmp);
cq_pkt->value = cpu_to_le32(1);
@@ -4287,11 +4346,12 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
cb_size = sizeof(*lin_dma_pkt);
- ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
- (1 << GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
- (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
- (1 << GAUDI_PKT_CTL_RB_SHIFT) |
- (1 << GAUDI_PKT_CTL_MB_SHIFT));
+ ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
+ ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
+ ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
+ ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
+ ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
+
lin_dma_pkt->ctl = cpu_to_le32(ctl);
lin_dma_pkt->src_addr = cpu_to_le64(val);
lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
@@ -4897,9 +4957,10 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev,
fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
job->job_cb_size - sizeof(struct packet_msg_prot));
- tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
- (1 << GAUDI_PKT_CTL_EB_SHIFT) |
- (1 << GAUDI_PKT_CTL_MB_SHIFT);
+ tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
+ tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
+ tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
+
fence_pkt->ctl = cpu_to_le32(tmp);
fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
fence_pkt->addr = cpu_to_le64(fence_dma_addr);
@@ -5215,7 +5276,7 @@ static int gaudi_extract_ecc_info(struct hl_device *hdev,
*memory_wrapper_idx = 0xFF;
/* Iterate through memory wrappers, a single bit must be set */
- for (i = 0 ; i > num_mem_regs ; i++) {
+ for (i = 0 ; i < num_mem_regs ; i++) {
err_addr += i * 4;
err_word = RREG32(err_addr);
if (err_word) {
@@ -5573,7 +5634,7 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
bool soft_reset_required = false;
/* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
- * gating, and thus cannot be done in ArmCP and should be done instead
+ * gating, and thus cannot be done in CPU-CP and should be done instead
* by the driver.
*/
@@ -5620,21 +5681,25 @@ static void gaudi_print_clk_change_info(struct hl_device *hdev,
{
switch (event_type) {
case GAUDI_EVENT_FIX_POWER_ENV_S:
+ hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
dev_info_ratelimited(hdev->dev,
"Clock throttling due to power consumption\n");
break;
case GAUDI_EVENT_FIX_POWER_ENV_E:
+ hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
dev_info_ratelimited(hdev->dev,
"Power envelop is safe, back to optimal clock\n");
break;
case GAUDI_EVENT_FIX_THERMAL_ENV_S:
+ hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
dev_info_ratelimited(hdev->dev,
"Clock throttling due to overheating\n");
break;
case GAUDI_EVENT_FIX_THERMAL_ENV_E:
+ hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
dev_info_ratelimited(hdev->dev,
"Thermal envelop is safe, back to optimal clock\n");
break;
@@ -6005,7 +6070,7 @@ static int gaudi_send_heartbeat(struct hl_device *hdev)
return hl_fw_send_heartbeat(hdev);
}
-static int gaudi_armcp_info_get(struct hl_device *hdev)
+static int gaudi_cpucp_info_get(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -6014,18 +6079,27 @@ static int gaudi_armcp_info_get(struct hl_device *hdev)
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
return 0;
- rc = hl_fw_armcp_info_get(hdev);
+ rc = hl_fw_cpucp_info_get(hdev);
if (rc)
return rc;
- if (!strlen(prop->armcp_info.card_name))
- strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
+ if (!strlen(prop->cpucp_info.card_name))
+ strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN);
+ hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
+
+ if (hdev->card_type == cpucp_card_type_pci)
+ prop->max_power_default = MAX_POWER_DEFAULT_PCI;
+ else if (hdev->card_type == cpucp_card_type_pmc)
+ prop->max_power_default = MAX_POWER_DEFAULT_PMC;
+
+ hdev->max_power = prop->max_power_default;
+
return 0;
}
-static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
+static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
struct seq_file *s)
{
struct gaudi_device *gaudi = hdev->asic_specific;
@@ -6057,7 +6131,7 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
is_idle &= is_eng_idle;
if (mask)
- *mask |= !is_eng_idle <<
+ *mask |= ((u64) !is_eng_idle) <<
(GAUDI_ENGINE_ID_DMA_0 + dma_id);
if (s)
seq_printf(s, fmt, dma_id,
@@ -6080,7 +6154,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
is_idle &= is_eng_idle;
if (mask)
- *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_TPC_0 + i);
+ *mask |= ((u64) !is_eng_idle) <<
+ (GAUDI_ENGINE_ID_TPC_0 + i);
if (s)
seq_printf(s, fmt, i,
is_eng_idle ? "Y" : "N",
@@ -6108,7 +6183,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
is_idle &= is_eng_idle;
if (mask)
- *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_MME_0 + i);
+ *mask |= ((u64) !is_eng_idle) <<
+ (GAUDI_ENGINE_ID_MME_0 + i);
if (s) {
if (!is_slave)
seq_printf(s, fmt, i,
@@ -6246,6 +6322,15 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
1000,
kernel_timeout);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Timeout while waiting for TPC%d vector pipe\n",
+ tpc_id);
+ hdev->asic_funcs->set_clock_gating(hdev);
+ mutex_unlock(&gaudi->clk_gate_mutex);
+ return -EIO;
+ }
+
rc = hl_poll_timeout(
hdev,
mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
@@ -6575,7 +6660,6 @@ static const struct hl_asic_funcs gaudi_funcs = {
.send_cpu_message = gaudi_send_cpu_message,
.get_hw_state = gaudi_get_hw_state,
.pci_bars_map = gaudi_pci_bars_map,
- .set_dram_bar_base = gaudi_set_hbm_bar_base,
.init_iatu = gaudi_init_iatu,
.rreg = hl_rreg,
.wreg = hl_wreg,