aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/gaudi/gaudi.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/gaudi/gaudi.c')
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c412
1 files changed, 105 insertions, 307 deletions
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 21c2b678ff72..fba322241096 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -95,7 +95,7 @@
#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
-#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
+#define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */
#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
BIT(GAUDI_ENGINE_ID_MME_0) |\
@@ -557,6 +557,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
}
prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
+ prop->host_base_address = HOST_PHYS_BASE;
+ prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
prop->collective_first_sob = 0;
prop->collective_first_mon = 0;
@@ -595,18 +597,19 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB;
+ prop->device_mem_alloc_default_page_size = prop->dram_page_size;
prop->dram_supports_virtual_memory = false;
- prop->pmmu.hop0_shift = MMU_V1_1_HOP0_SHIFT;
- prop->pmmu.hop1_shift = MMU_V1_1_HOP1_SHIFT;
- prop->pmmu.hop2_shift = MMU_V1_1_HOP2_SHIFT;
- prop->pmmu.hop3_shift = MMU_V1_1_HOP3_SHIFT;
- prop->pmmu.hop4_shift = MMU_V1_1_HOP4_SHIFT;
- prop->pmmu.hop0_mask = MMU_V1_1_HOP0_MASK;
- prop->pmmu.hop1_mask = MMU_V1_1_HOP1_MASK;
- prop->pmmu.hop2_mask = MMU_V1_1_HOP2_MASK;
- prop->pmmu.hop3_mask = MMU_V1_1_HOP3_MASK;
- prop->pmmu.hop4_mask = MMU_V1_1_HOP4_MASK;
+ prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
+ prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
+ prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
+ prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
+ prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
+ prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
+ prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
+ prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
+ prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
+ prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
prop->pmmu.start_addr = VA_HOST_SPACE_START;
prop->pmmu.end_addr =
(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
@@ -673,6 +676,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->set_max_power_on_device_init = true;
+ prop->dma_mask = 48;
+
return 0;
}
@@ -754,8 +759,6 @@ static int gaudi_init_iatu(struct hl_device *hdev)
if (rc)
goto done;
- hdev->asic_funcs->set_dma_mask_from_fw(hdev);
-
/* Outbound Region 0 - Point to Host */
outbound_region.addr = HOST_PHYS_BASE;
outbound_region.size = HOST_PHYS_SIZE;
@@ -1008,7 +1011,7 @@ free_job:
release_cb:
hl_cb_put(cb);
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+ hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
return rc;
}
@@ -1470,7 +1473,7 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev,
job->patched_cb = NULL;
job->job_cb_size = job->user_cb_size;
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+ hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
/* increment refcount as for external queues we get completion */
if (hw_queue_prop->type == QUEUE_TYPE_EXT)
@@ -2808,9 +2811,8 @@ static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
QM_ARB_ERR_MSG_EN_MASK);
- /* Increase ARB WDT to support streams architecture */
- WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
- GAUDI_ARB_WDT_TIMEOUT);
+ /* Set timeout to maximum */
+ WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
QMAN_EXTERNAL_MAKE_TRUSTED);
@@ -2987,9 +2989,8 @@ static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
QM_ARB_ERR_MSG_EN_MASK);
- /* Increase ARB WDT to support streams architecture */
- WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
- GAUDI_ARB_WDT_TIMEOUT);
+ /* Set timeout to maximum */
+ WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
@@ -3124,9 +3125,8 @@ static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
QM_ARB_ERR_MSG_EN_MASK);
- /* Increase ARB WDT to support streams architecture */
- WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
- GAUDI_ARB_WDT_TIMEOUT);
+ /* Set timeout to maximum */
+ WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
@@ -3258,9 +3258,8 @@ static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
QM_ARB_ERR_MSG_EN_MASK);
- /* Increase ARB WDT to support streams architecture */
- WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
- GAUDI_ARB_WDT_TIMEOUT);
+ /* Set timeout to maximum */
+ WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
@@ -3409,9 +3408,8 @@ static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
QM_ARB_ERR_MSG_EN_MASK);
- /* Increase ARB WDT to support streams architecture */
- WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
- GAUDI_ARB_WDT_TIMEOUT);
+ /* Set timeout to maximum */
+ WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
@@ -3792,9 +3790,6 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_
{
u32 wait_timeout_ms;
- dev_info(hdev->dev,
- "Halting compute engines and disabling interrupts\n");
-
if (hdev->pldm)
wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
else
@@ -4212,7 +4207,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset
}
if (fw_reset) {
- dev_info(hdev->dev,
+ dev_dbg(hdev->dev,
"Firmware performs HARD reset, going to wait %dms\n",
reset_timeout_ms);
@@ -4304,11 +4299,11 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
- dev_info(hdev->dev,
+ dev_dbg(hdev->dev,
"Issued HARD reset command, going to wait %dms\n",
reset_timeout_ms);
} else {
- dev_info(hdev->dev,
+ dev_dbg(hdev->dev,
"Firmware performs HARD reset, going to wait %dms\n",
reset_timeout_ms);
}
@@ -4745,12 +4740,11 @@ static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
}
-static int gaudi_hbm_scrubbing(struct hl_device *hdev)
+static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 cur_addr = DRAM_BASE_ADDR_USER;
- u32 val;
- u32 chunk_size;
+ u32 chunk_size, busy;
int rc, dma_id;
while (cur_addr < prop->dram_end_address) {
@@ -4764,8 +4758,10 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev)
"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
cur_addr, cur_addr + chunk_size);
- WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
- WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
+ WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
+ lower_32_bits(val));
+ WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
+ upper_32_bits(val));
WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
lower_32_bits(cur_addr));
WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
@@ -4788,8 +4784,8 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev)
rc = hl_poll_timeout(
hdev,
mmDMA0_CORE_STS0 + dma_offset,
- val,
- ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
+ busy,
+ ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
1000,
HBM_SCRUBBING_TIMEOUT_US);
@@ -4843,7 +4839,7 @@ static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
}
/* Scrub HBM using all DMA channels in parallel */
- rc = gaudi_hbm_scrubbing(hdev);
+ rc = gaudi_scrub_device_dram(hdev, 0xdeadbeaf);
if (rc)
dev_err(hdev->dev,
"Failed to clear HBM in mem scrub all\n");
@@ -5038,37 +5034,7 @@ static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
}
-static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir)
-{
- struct scatterlist *sg;
- int i;
-
- if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
- return -ENOMEM;
-
- /* Shift to the device's base physical address of host memory */
- for_each_sg(sgl, sg, nents, i)
- sg->dma_address += HOST_PHYS_BASE;
-
- return 0;
-}
-
-static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir)
-{
- struct scatterlist *sg;
- int i;
-
- /* Cancel the device's base physical address of host memory */
- for_each_sg(sgl, sg, nents, i)
- sg->dma_address -= HOST_PHYS_BASE;
-
- dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
-}
-
-static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
- struct sg_table *sgt)
+static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
{
struct scatterlist *sg, *sg_next_iter;
u32 count, dma_desc_cnt;
@@ -5077,8 +5043,7 @@ static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
dma_desc_cnt = 0;
- for_each_sg(sgt->sgl, sg, sgt->nents, count) {
-
+ for_each_sgtable_dma_sg(sgt, sg, count) {
len = sg_dma_len(sg);
addr = sg_dma_address(sg);
@@ -5132,8 +5097,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
list_add_tail(&userptr->job_node, parser->job_userptr_list);
- rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
- userptr->sgt->nents, dir);
+ rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
if (rc) {
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
goto unpin_memory;
@@ -5408,7 +5372,7 @@ static int gaudi_patch_dma_packet(struct hl_device *hdev,
sgt = userptr->sgt;
dma_desc_cnt = 0;
- for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+ for_each_sgtable_dma_sg(sgt, sg, count) {
len = sg_dma_len(sg);
dma_addr = sg_dma_address(sg);
@@ -5562,7 +5526,7 @@ static int gaudi_patch_cb(struct hl_device *hdev,
static int gaudi_parse_cb_mmu(struct hl_device *hdev,
struct hl_cs_parser *parser)
{
- u64 patched_cb_handle;
+ u64 handle;
u32 patched_cb_size;
struct hl_cb *user_cb;
int rc;
@@ -5578,9 +5542,9 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,
else
parser->patched_cb_size = parser->user_cb_size;
- rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
+ rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
parser->patched_cb_size, false, false,
- &patched_cb_handle);
+ &handle);
if (rc) {
dev_err(hdev->dev,
@@ -5589,13 +5553,10 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,
return rc;
}
- patched_cb_handle >>= PAGE_SHIFT;
- parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
- (u32) patched_cb_handle);
+ parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
/* hl_cb_get should never fail */
if (!parser->patched_cb) {
- dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
- (u32) patched_cb_handle);
+ dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
rc = -EFAULT;
goto out;
}
@@ -5635,8 +5596,7 @@ out:
* cb_put will release it, but here we want to remove it from the
* idr
*/
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
- patched_cb_handle << PAGE_SHIFT);
+ hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
return rc;
}
@@ -5644,7 +5604,7 @@ out:
static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
struct hl_cs_parser *parser)
{
- u64 patched_cb_handle;
+ u64 handle;
int rc;
rc = gaudi_validate_cb(hdev, parser, false);
@@ -5652,22 +5612,19 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
if (rc)
goto free_userptr;
- rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
+ rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
parser->patched_cb_size, false, false,
- &patched_cb_handle);
+ &handle);
if (rc) {
dev_err(hdev->dev,
"Failed to allocate patched CB for DMA CS %d\n", rc);
goto free_userptr;
}
- patched_cb_handle >>= PAGE_SHIFT;
- parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
- (u32) patched_cb_handle);
+ parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
/* hl_cb_get should never fail here */
if (!parser->patched_cb) {
- dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
- (u32) patched_cb_handle);
+ dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
rc = -EFAULT;
goto out;
}
@@ -5684,8 +5641,7 @@ out:
* cb_put will release it, but here we want to remove it from the
* idr
*/
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
- patched_cb_handle << PAGE_SHIFT);
+ hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
free_userptr:
if (rc)
@@ -5798,7 +5754,6 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
struct hl_cs_job *job;
u32 cb_size, ctl, err_cause;
struct hl_cb *cb;
- u64 id;
int rc;
cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
@@ -5865,9 +5820,8 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
}
release_cb:
- id = cb->id;
hl_cb_put(cb);
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
+ hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
return rc;
}
@@ -5930,7 +5884,7 @@ static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
release_cb:
hl_cb_put(cb);
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+ hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
return rc;
}
@@ -6101,184 +6055,6 @@ static void gaudi_restore_phase_topology(struct hl_device *hdev)
}
-static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
- bool user_address, u32 *val)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 hbm_bar_addr, host_phys_end;
- int rc = 0;
-
- host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
-
- if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
-
- *val = RREG32(addr - CFG_BASE);
-
- } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
-
- *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
-
- } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
-
- u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
-
- hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
-
- if (hbm_bar_addr != U64_MAX) {
- *val = readl(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
- hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
- }
-
- if (hbm_bar_addr == U64_MAX)
- rc = -EIO;
-
- } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
- user_address && !iommu_present(&pci_bus_type)) {
-
- *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
-
- } else {
- rc = -EFAULT;
- }
-
- return rc;
-}
-
-static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
- bool user_address, u32 val)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 hbm_bar_addr, host_phys_end;
- int rc = 0;
-
- host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
-
- if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
-
- WREG32(addr - CFG_BASE, val);
-
- } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
-
- writel(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
-
- } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
-
- u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
-
- hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
-
- if (hbm_bar_addr != U64_MAX) {
- writel(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
- hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
- }
-
- if (hbm_bar_addr == U64_MAX)
- rc = -EIO;
-
- } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
- user_address && !iommu_present(&pci_bus_type)) {
-
- *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
-
- } else {
- rc = -EFAULT;
- }
-
- return rc;
-}
-
-static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
- bool user_address, u64 *val)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 hbm_bar_addr, host_phys_end;
- int rc = 0;
-
- host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
-
- if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
-
- u32 val_l = RREG32(addr - CFG_BASE);
- u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
-
- *val = (((u64) val_h) << 32) | val_l;
-
- } else if ((addr >= SRAM_BASE_ADDR) &&
- (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
-
- *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
-
- } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
-
- u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
-
- hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
-
- if (hbm_bar_addr != U64_MAX) {
- *val = readq(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
- hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
- }
-
- if (hbm_bar_addr == U64_MAX)
- rc = -EIO;
-
- } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
- user_address && !iommu_present(&pci_bus_type)) {
-
- *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
-
- } else {
- rc = -EFAULT;
- }
-
- return rc;
-}
-
-static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
- bool user_address, u64 val)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 hbm_bar_addr, host_phys_end;
- int rc = 0;
-
- host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
-
- if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
-
- WREG32(addr - CFG_BASE, lower_32_bits(val));
- WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
-
- } else if ((addr >= SRAM_BASE_ADDR) &&
- (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
-
- writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
-
- } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
-
- u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
-
- hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
-
- if (hbm_bar_addr != U64_MAX) {
- writeq(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
- hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
- }
-
- if (hbm_bar_addr == U64_MAX)
- rc = -EIO;
-
- } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
- user_address && !iommu_present(&pci_bus_type)) {
-
- *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
-
- } else {
- rc = -EFAULT;
- }
-
- return rc;
-}
-
static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
u32 size_to_dma, dma_addr_t dma_addr)
{
@@ -7628,19 +7404,18 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
/* In case it's the first razwi, save its parameters*/
- rc = atomic_cmpxchg(&hdev->last_error.razwi_write_disable, 0, 1);
+ rc = atomic_cmpxchg(&hdev->last_error.razwi.write_disable, 0, 1);
if (!rc) {
- hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime;
- hdev->last_error.razwi_timestamp = ktime_get();
- hdev->last_error.razwi_addr = razwi_addr;
- hdev->last_error.razwi_engine_id_1 = engine_id_1;
- hdev->last_error.razwi_engine_id_2 = engine_id_2;
+ hdev->last_error.razwi.timestamp = ktime_get();
+ hdev->last_error.razwi.addr = razwi_addr;
+ hdev->last_error.razwi.engine_id_1 = engine_id_1;
+ hdev->last_error.razwi.engine_id_2 = engine_id_2;
/*
* If first engine id holds non valid value the razwi initiator
* does not have engine id
*/
- hdev->last_error.razwi_non_engine_initiator = (engine_id_1 == U16_MAX);
- hdev->last_error.razwi_type = razwi_type;
+ hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX);
+ hdev->last_error.razwi.type = razwi_type;
}
}
@@ -8103,7 +7878,6 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_MMU_PAGE_FAULT:
case GAUDI_EVENT_MMU_WR_PERM:
case GAUDI_EVENT_RAZWI_OR_ADC:
- case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
fallthrough;
@@ -8123,6 +7897,19 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
hl_fw_unmask_irq(hdev, event_type);
break;
+ case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
+ gaudi_print_irq_info(hdev, event_type, true);
+ gaudi_handle_qman_err(hdev, event_type);
+ hl_fw_unmask_irq(hdev, event_type);
+
+ /* In TPC QM event, notify on TPC assertion. While there isn't
+ * a specific event for assertion yet, the FW generates QM event.
+ * The SW upper layer will inspect an internal mapped area to indicate
+ * if the event is a tpc assertion or tpc QM.
+ */
+ hl_notifier_event_send_all(hdev, HL_NOTIFIER_EVENT_TPC_ASSERT);
+ break;
+
case GAUDI_EVENT_RAZWI_OR_ADC_SW:
gaudi_print_irq_info(hdev, event_type, true);
goto reset_device;
@@ -8328,8 +8115,6 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
set_default_power_values(hdev);
- hdev->max_power = prop->max_power_default;
-
return 0;
}
@@ -8501,6 +8286,16 @@ static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
return hl_fw_get_eeprom_data(hdev, data, max_size);
}
+static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
+{
+ struct gaudi_device *gaudi = hdev->asic_specific;
+
+ if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
+ return 0;
+
+ return hl_fw_get_monitor_dump(hdev, data);
+}
+
/*
* this function should be used only during initialization and/or after reset,
* when there are no active users.
@@ -9066,11 +8861,6 @@ static void gaudi_reset_sob(struct hl_device *hdev, void *data)
kref_init(&hw_sob->kref);
}
-static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
-{
- hdev->dma_mask = 48;
-}
-
static u64 gaudi_get_device_time(struct hl_device *hdev)
{
u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
@@ -9132,7 +8922,7 @@ static int gaudi_add_sync_to_engine_map_entry(
*/
if (reg_value == 0 || reg_value == 0xffffffff)
return 0;
- reg_value -= (u32)CFG_BASE;
+ reg_value -= lower_32_bits(CFG_BASE);
/* create a new hash entry */
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
@@ -9377,6 +9167,12 @@ static u32 *gaudi_get_stream_master_qid_arr(void)
return gaudi_stream_master;
}
+static void gaudi_get_valid_dram_page_orders(struct hl_info_dev_memalloc_page_sizes *info)
+{
+ /* set 0 since multiple pages are not supported */
+ info->page_order_bitmask = 0;
+}
+
static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
@@ -9418,24 +9214,21 @@ static const struct hl_asic_funcs gaudi_funcs = {
.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
.asic_dma_free_coherent = gaudi_dma_free_coherent,
.scrub_device_mem = gaudi_scrub_device_mem,
+ .scrub_device_dram = gaudi_scrub_device_dram,
.get_int_queue_base = gaudi_get_int_queue_base,
.test_queues = gaudi_test_queues,
.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
.asic_dma_pool_free = gaudi_dma_pool_free,
.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
- .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
+ .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
.cs_parser = gaudi_cs_parser,
- .asic_dma_map_sg = gaudi_dma_map_sg,
+ .asic_dma_map_sgtable = hl_dma_map_sgtable,
.get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
.update_eq_ci = gaudi_update_eq_ci,
.context_switch = gaudi_context_switch,
.restore_phase_topology = gaudi_restore_phase_topology,
- .debugfs_read32 = gaudi_debugfs_read32,
- .debugfs_write32 = gaudi_debugfs_write32,
- .debugfs_read64 = gaudi_debugfs_read64,
- .debugfs_write64 = gaudi_debugfs_write64,
.debugfs_read_dma = gaudi_debugfs_read_dma,
.add_device_attr = gaudi_add_device_attr,
.handle_eqe = gaudi_handle_eqe,
@@ -9444,6 +9237,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
.write_pte = gaudi_write_pte,
.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
+ .mmu_prefetch_cache_range = NULL,
.send_heartbeat = gaudi_send_heartbeat,
.debug_coresight = gaudi_debug_coresight,
.is_device_idle = gaudi_is_device_idle,
@@ -9452,6 +9246,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
.hw_queues_unlock = gaudi_hw_queues_unlock,
.get_pci_id = gaudi_get_pci_id,
.get_eeprom_data = gaudi_get_eeprom_data,
+ .get_monitor_dump = gaudi_get_monitor_dump,
.send_cpu_message = gaudi_send_cpu_message,
.pci_bars_map = gaudi_pci_bars_map,
.init_iatu = gaudi_init_iatu,
@@ -9469,7 +9264,6 @@ static const struct hl_asic_funcs gaudi_funcs = {
.gen_wait_cb = gaudi_gen_wait_cb,
.reset_sob = gaudi_reset_sob,
.reset_sob_group = gaudi_reset_sob_group,
- .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
.get_device_time = gaudi_get_device_time,
.collective_wait_init_cs = gaudi_collective_wait_init_cs,
.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
@@ -9486,7 +9280,11 @@ static const struct hl_asic_funcs gaudi_funcs = {
.get_sob_addr = gaudi_get_sob_addr,
.set_pci_memory_regions = gaudi_set_pci_memory_regions,
.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
- .is_valid_dram_page_size = NULL
+ .is_valid_dram_page_size = NULL,
+ .mmu_get_real_page_size = hl_mmu_get_real_page_size,
+ .get_valid_dram_page_orders = gaudi_get_valid_dram_page_orders,
+ .access_dev_mem = hl_access_dev_mem,
+ .set_dram_bar_base = gaudi_set_hbm_bar_base,
};
/**