aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--drivers/misc/habanalabs/command_buffer.c6
-rw-r--r--drivers/misc/habanalabs/command_submission.c17
-rw-r--r--drivers/misc/habanalabs/context.c4
-rw-r--r--drivers/misc/habanalabs/device.c16
-rw-r--r--drivers/misc/habanalabs/firmware_if.c7
-rw-r--r--drivers/misc/habanalabs/goya/goya.c347
-rw-r--r--drivers/misc/habanalabs/goya/goyaP.h40
-rw-r--r--drivers/misc/habanalabs/goya/goya_coresight.c16
-rw-r--r--drivers/misc/habanalabs/habanalabs.h116
-rw-r--r--drivers/misc/habanalabs/habanalabs_drv.c2
-rw-r--r--drivers/misc/habanalabs/hw_queue.c46
-rw-r--r--drivers/misc/habanalabs/include/armcp_if.h8
-rw-r--r--drivers/misc/habanalabs/irq.c14
-rw-r--r--drivers/misc/habanalabs/memory.c4
-rw-r--r--drivers/misc/habanalabs/pci.c16
-rw-r--r--include/uapi/misc/habanalabs.h9
16 files changed, 368 insertions, 300 deletions
diff --git a/drivers/misc/habanalabs/command_buffer.c b/drivers/misc/habanalabs/command_buffer.c
index b1ffca47d748..e495f44064fa 100644
--- a/drivers/misc/habanalabs/command_buffer.c
+++ b/drivers/misc/habanalabs/command_buffer.c
@@ -13,7 +13,7 @@
static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
{
- hdev->asic_funcs->dma_free_coherent(hdev, cb->size,
+ hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
(void *) (uintptr_t) cb->kernel_address,
cb->bus_address);
kfree(cb);
@@ -66,10 +66,10 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
return NULL;
if (ctx_id == HL_KERNEL_ASID_ID)
- p = hdev->asic_funcs->dma_alloc_coherent(hdev, cb_size,
+ p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
&cb->bus_address, GFP_ATOMIC);
else
- p = hdev->asic_funcs->dma_alloc_coherent(hdev, cb_size,
+ p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
&cb->bus_address,
GFP_USER | __GFP_ZERO);
if (!p) {
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index 02c48da0b645..6fe785e26859 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -93,7 +93,6 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
parser.user_cb_size = job->user_cb_size;
parser.ext_queue = job->ext_queue;
job->patched_cb = NULL;
- parser.use_virt_addr = hdev->mmu_enable;
rc = hdev->asic_funcs->cs_parser(hdev, &parser);
if (job->ext_queue) {
@@ -601,7 +600,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
void __user *chunks;
u32 num_chunks;
u64 cs_seq = ULONG_MAX;
- int rc, do_restore;
+ int rc, do_ctx_switch;
bool need_soft_reset = false;
if (hl_device_disabled_or_in_reset(hdev)) {
@@ -612,9 +611,9 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
goto out;
}
- do_restore = atomic_cmpxchg(&ctx->thread_restore_token, 1, 0);
+ do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
- if (do_restore || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
+ if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
long ret;
chunks = (void __user *)(uintptr_t)args->in.chunks_restore;
@@ -622,7 +621,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
mutex_lock(&hpriv->restore_phase_mutex);
- if (do_restore) {
+ if (do_ctx_switch) {
rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
if (rc) {
dev_err_ratelimited(hdev->dev,
@@ -678,18 +677,18 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
}
}
- ctx->thread_restore_wait_token = 1;
- } else if (!ctx->thread_restore_wait_token) {
+ ctx->thread_ctx_switch_wait_token = 1;
+ } else if (!ctx->thread_ctx_switch_wait_token) {
u32 tmp;
rc = hl_poll_timeout_memory(hdev,
- (u64) (uintptr_t) &ctx->thread_restore_wait_token,
+ (u64) (uintptr_t) &ctx->thread_ctx_switch_wait_token,
jiffies_to_usecs(hdev->timeout_jiffies),
&tmp);
if (rc || !tmp) {
dev_err(hdev->dev,
- "restore phase hasn't finished in time\n");
+ "context switch phase didn't finish in time\n");
rc = -ETIMEDOUT;
goto out;
}
diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c
index 619ace1c4ef7..4804cdcf4c48 100644
--- a/drivers/misc/habanalabs/context.c
+++ b/drivers/misc/habanalabs/context.c
@@ -106,8 +106,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
ctx->cs_sequence = 1;
spin_lock_init(&ctx->cs_lock);
- atomic_set(&ctx->thread_restore_token, 1);
- ctx->thread_restore_wait_token = 0;
+ atomic_set(&ctx->thread_ctx_switch_token, 1);
+ ctx->thread_ctx_switch_wait_token = 0;
if (is_kernel_ctx) {
ctx->asid = HL_KERNEL_ASID_ID; /* KMD gets ASID 0 */
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index 25bfb093ff26..91a9e47a3482 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -5,6 +5,8 @@
* All Rights Reserved.
*/
+#define pr_fmt(fmt) "habanalabs: " fmt
+
#include "habanalabs.h"
#include <linux/pci.h>
@@ -708,10 +710,10 @@ again:
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
hl_cq_reset(hdev, &hdev->completion_queue[i]);
- /* Make sure the setup phase for the user context will run again */
+ /* Make sure the context switch phase will run again */
if (hdev->user_ctx) {
- atomic_set(&hdev->user_ctx->thread_restore_token, 1);
- hdev->user_ctx->thread_restore_wait_token = 0;
+ atomic_set(&hdev->user_ctx->thread_ctx_switch_token, 1);
+ hdev->user_ctx->thread_ctx_switch_wait_token = 0;
}
/* Finished tear-down, starting to re-initialize */
@@ -1145,7 +1147,13 @@ int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr,
* either by the direct access of the device or by another core
*/
u32 *paddr = (u32 *) (uintptr_t) addr;
- ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
+ ktime_t timeout;
+
+ /* timeout should be longer when working with simulator */
+ if (!hdev->pdev)
+ timeout_us *= 10;
+
+ timeout = ktime_add_us(ktime_get(), timeout_us);
might_sleep();
diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c
index 1acf82650b20..eda5d7fcb79f 100644
--- a/drivers/misc/habanalabs/firmware_if.c
+++ b/drivers/misc/habanalabs/firmware_if.c
@@ -249,8 +249,7 @@ int hl_fw_armcp_info_get(struct hl_device *hdev)
pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.addr = cpu_to_le64(armcp_info_dma_addr +
- prop->host_phys_base_address);
+ pkt.addr = cpu_to_le64(armcp_info_dma_addr);
pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info));
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
@@ -281,7 +280,6 @@ out:
int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
struct armcp_packet pkt = {};
void *eeprom_info_cpu_addr;
dma_addr_t eeprom_info_dma_addr;
@@ -301,8 +299,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.addr = cpu_to_le64(eeprom_info_dma_addr +
- prop->host_phys_base_address);
+ pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
pkt.data_max_size = cpu_to_le32(max_size);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index bde11fc2c251..a582e29c1ee4 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -297,7 +297,7 @@ static u32 goya_all_events[] = {
GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
};
-static void goya_get_fixed_properties(struct hl_device *hdev)
+void goya_get_fixed_properties(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
int i;
@@ -345,7 +345,6 @@ static void goya_get_fixed_properties(struct hl_device *hdev)
prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB;
- prop->host_phys_base_address = HOST_PHYS_BASE;
prop->va_space_host_start_address = VA_HOST_SPACE_START;
prop->va_space_host_end_address = VA_HOST_SPACE_END;
prop->va_space_dram_start_address = VA_DDR_SPACE_START;
@@ -389,33 +388,26 @@ static int goya_pci_bars_map(struct hl_device *hdev)
return 0;
}
-/*
- * goya_set_ddr_bar_base - set DDR bar to map specific device address
- *
- * @hdev: pointer to hl_device structure
- * @addr: address in DDR. Must be aligned to DDR bar size
- *
- * This function configures the iATU so that the DDR bar will start at the
- * specified addr.
- *
- */
-static int goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
+static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
{
struct goya_device *goya = hdev->asic_specific;
+ u64 old_addr = addr;
int rc;
if ((goya) && (goya->ddr_bar_cur_addr == addr))
- return 0;
+ return old_addr;
/* Inbound Region 1 - Bar 4 - Point to DDR */
rc = hl_pci_set_dram_bar_base(hdev, 1, 4, addr);
if (rc)
- return rc;
+ return U64_MAX;
- if (goya)
+ if (goya) {
+ old_addr = goya->ddr_bar_cur_addr;
goya->ddr_bar_cur_addr = addr;
+ }
- return 0;
+ return old_addr;
}
/*
@@ -429,7 +421,7 @@ static int goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
static int goya_init_iatu(struct hl_device *hdev)
{
return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
- HOST_PHYS_SIZE);
+ HOST_PHYS_BASE, HOST_PHYS_SIZE);
}
/*
@@ -542,14 +534,7 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev)
prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
}
-/*
- * goya_late_init - GOYA late initialization code
- *
- * @hdev: pointer to hl_device structure
- *
- * Get ArmCP info and send message to CPU to enable PCI access
- */
-static int goya_late_init(struct hl_device *hdev)
+int goya_late_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc;
@@ -648,9 +633,6 @@ static int goya_sw_init(struct hl_device *hdev)
goya->tpc_clk = GOYA_PLL_FREQ_LOW;
goya->ic_clk = GOYA_PLL_FREQ_LOW;
- goya->mmu_prepare_reg = goya_mmu_prepare_reg;
- goya->qman0_set_security = goya_qman0_set_security;
-
hdev->asic_specific = goya;
/* Create DMA pool for small allocations */
@@ -663,7 +645,7 @@ static int goya_sw_init(struct hl_device *hdev)
}
hdev->cpu_accessible_dma_mem =
- hdev->asic_funcs->dma_alloc_coherent(hdev,
+ hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
HL_CPU_ACCESSIBLE_MEM_SIZE,
&hdev->cpu_accessible_dma_address,
GFP_KERNEL | __GFP_ZERO);
@@ -678,7 +660,7 @@ static int goya_sw_init(struct hl_device *hdev)
dev_err(hdev->dev,
"Failed to create CPU accessible DMA pool\n");
rc = -ENOMEM;
- goto free_cpu_pq_dma_mem;
+ goto free_cpu_dma_mem;
}
rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
@@ -688,17 +670,18 @@ static int goya_sw_init(struct hl_device *hdev)
dev_err(hdev->dev,
"Failed to add memory to CPU accessible DMA pool\n");
rc = -EFAULT;
- goto free_cpu_pq_pool;
+ goto free_cpu_accessible_dma_pool;
}
spin_lock_init(&goya->hw_queues_lock);
return 0;
-free_cpu_pq_pool:
+free_cpu_accessible_dma_pool:
gen_pool_destroy(hdev->cpu_accessible_dma_pool);
-free_cpu_pq_dma_mem:
- hdev->asic_funcs->dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
+free_cpu_dma_mem:
+ hdev->asic_funcs->asic_dma_free_coherent(hdev,
+ HL_CPU_ACCESSIBLE_MEM_SIZE,
hdev->cpu_accessible_dma_mem,
hdev->cpu_accessible_dma_address);
free_dma_pool:
@@ -721,7 +704,8 @@ static int goya_sw_fini(struct hl_device *hdev)
gen_pool_destroy(hdev->cpu_accessible_dma_pool);
- hdev->asic_funcs->dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
+ hdev->asic_funcs->asic_dma_free_coherent(hdev,
+ HL_CPU_ACCESSIBLE_MEM_SIZE,
hdev->cpu_accessible_dma_mem,
hdev->cpu_accessible_dma_address);
@@ -815,11 +799,10 @@ static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
* Initialize the H/W registers of the QMAN DMA channels
*
*/
-static void goya_init_dma_qmans(struct hl_device *hdev)
+void goya_init_dma_qmans(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
struct hl_hw_queue *q;
- dma_addr_t bus_address;
int i;
if (goya->hw_cap_initialized & HW_CAP_DMA)
@@ -828,10 +811,7 @@ static void goya_init_dma_qmans(struct hl_device *hdev)
q = &hdev->kernel_queues[0];
for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
- bus_address = q->bus_address +
- hdev->asic_prop.host_phys_base_address;
-
- goya_init_dma_qman(hdev, i, bus_address);
+ goya_init_dma_qman(hdev, i, q->bus_address);
goya_init_dma_ch(hdev, i);
}
@@ -968,11 +948,10 @@ static int goya_stop_external_queues(struct hl_device *hdev)
* Returns 0 on success
*
*/
-static int goya_init_cpu_queues(struct hl_device *hdev)
+int goya_init_cpu_queues(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
struct hl_eq *eq;
- dma_addr_t bus_address;
u32 status;
struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
int err;
@@ -985,19 +964,18 @@ static int goya_init_cpu_queues(struct hl_device *hdev)
eq = &hdev->event_queue;
- bus_address = cpu_pq->bus_address +
- hdev->asic_prop.host_phys_base_address;
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_0, lower_32_bits(bus_address));
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_1, upper_32_bits(bus_address));
+ WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_0,
+ lower_32_bits(cpu_pq->bus_address));
+ WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_1,
+ upper_32_bits(cpu_pq->bus_address));
- bus_address = eq->bus_address + hdev->asic_prop.host_phys_base_address;
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_2, lower_32_bits(bus_address));
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_3, upper_32_bits(bus_address));
+ WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_2, lower_32_bits(eq->bus_address));
+ WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_3, upper_32_bits(eq->bus_address));
- bus_address = hdev->cpu_accessible_dma_address +
- hdev->asic_prop.host_phys_base_address;
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8, lower_32_bits(bus_address));
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9, upper_32_bits(bus_address));
+ WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8,
+ lower_32_bits(hdev->cpu_accessible_dma_address));
+ WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9,
+ upper_32_bits(hdev->cpu_accessible_dma_address));
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES);
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4, HL_EQ_SIZE_IN_BYTES);
@@ -1549,7 +1527,7 @@ static void goya_init_mme_cmdq(struct hl_device *hdev)
WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
}
-static void goya_init_mme_qmans(struct hl_device *hdev)
+void goya_init_mme_qmans(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
u32 so_base_lo, so_base_hi;
@@ -1656,7 +1634,7 @@ static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
}
-static void goya_init_tpc_qmans(struct hl_device *hdev)
+void goya_init_tpc_qmans(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
u32 so_base_lo, so_base_hi;
@@ -2225,11 +2203,10 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
* Before pushing u-boot/linux to device, need to set the ddr bar to
* base address of dram
*/
- rc = goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
- if (rc) {
+ if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
dev_err(hdev->dev,
"failed to map DDR bar to DRAM base address\n");
- return rc;
+ return -EIO;
}
if (hdev->pldm) {
@@ -2373,7 +2350,7 @@ static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
return 0;
}
-static int goya_mmu_init(struct hl_device *hdev)
+int goya_mmu_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct goya_device *goya = hdev->asic_specific;
@@ -2464,12 +2441,12 @@ static int goya_hw_init(struct hl_device *hdev)
* After CPU initialization is finished, change DDR bar mapping inside
* iATU to point to the start address of the MMU page tables
*/
- rc = goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE +
- (MMU_PAGE_TABLES_ADDR & ~(prop->dram_pci_bar_size - 0x1ull)));
- if (rc) {
+ if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE +
+ (MMU_PAGE_TABLES_ADDR &
+ ~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
dev_err(hdev->dev,
"failed to map DDR bar to MMU page tables\n");
- return rc;
+ return -EIO;
}
rc = goya_mmu_init(hdev);
@@ -2649,7 +2626,7 @@ static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
return rc;
}
-static void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
+void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
{
u32 db_reg_offset, db_value;
bool invalid_queue = false;
@@ -2747,13 +2724,23 @@ void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val)
static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags)
{
- return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
+ void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
+ dma_handle, flags);
+
+ /* Shift to the device's base physical address of host memory */
+ if (kernel_addr)
+ *dma_handle += HOST_PHYS_BASE;
+
+ return kernel_addr;
}
static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
void *cpu_addr, dma_addr_t dma_handle)
{
- dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
+ /* Cancel the device's base physical address of host memory */
+ dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
+
+ dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
}
void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
@@ -2816,7 +2803,6 @@ void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
{
- struct goya_device *goya = hdev->asic_specific;
struct packet_msg_prot *fence_pkt;
u32 *fence_ptr;
dma_addr_t fence_dma_addr;
@@ -2837,7 +2823,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
return -EBUSY;
}
- fence_ptr = hdev->asic_funcs->dma_pool_zalloc(hdev, 4, GFP_KERNEL,
+ fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
&fence_dma_addr);
if (!fence_ptr) {
dev_err(hdev->dev,
@@ -2847,7 +2833,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
*fence_ptr = 0;
- goya->qman0_set_security(hdev, true);
+ goya_qman0_set_security(hdev, true);
/*
* goya cs parser saves space for 2xpacket_msg_prot at end of CB. For
@@ -2865,8 +2851,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
(1 << GOYA_PKT_CTL_MB_SHIFT);
fence_pkt->ctl = cpu_to_le32(tmp);
fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL);
- fence_pkt->addr = cpu_to_le64(fence_dma_addr +
- hdev->asic_prop.host_phys_base_address);
+ fence_pkt->addr = cpu_to_le64(fence_dma_addr);
rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
job->job_cb_size, cb->bus_address);
@@ -2886,10 +2871,10 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
}
free_fence_ptr:
- hdev->asic_funcs->dma_pool_free(hdev, (void *) fence_ptr,
+ hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
fence_dma_addr);
- goya->qman0_set_security(hdev, false);
+ goya_qman0_set_security(hdev, false);
return rc;
}
@@ -2920,7 +2905,7 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
fence_val = GOYA_QMAN0_FENCE_VAL;
- fence_ptr = hdev->asic_funcs->dma_pool_zalloc(hdev, 4, GFP_KERNEL,
+ fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
&fence_dma_addr);
if (!fence_ptr) {
dev_err(hdev->dev,
@@ -2930,7 +2915,7 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
*fence_ptr = 0;
- fence_pkt = hdev->asic_funcs->dma_pool_zalloc(hdev,
+ fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
sizeof(struct packet_msg_prot),
GFP_KERNEL, &pkt_dma_addr);
if (!fence_pkt) {
@@ -2945,8 +2930,7 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
(1 << GOYA_PKT_CTL_MB_SHIFT);
fence_pkt->ctl = cpu_to_le32(tmp);
fence_pkt->value = cpu_to_le32(fence_val);
- fence_pkt->addr = cpu_to_le64(fence_dma_addr +
- hdev->asic_prop.host_phys_base_address);
+ fence_pkt->addr = cpu_to_le64(fence_dma_addr);
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
sizeof(struct packet_msg_prot),
@@ -2974,10 +2958,10 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
}
free_pkt:
- hdev->asic_funcs->dma_pool_free(hdev, (void *) fence_pkt,
+ hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
pkt_dma_addr);
free_fence_ptr:
- hdev->asic_funcs->dma_pool_free(hdev, (void *) fence_ptr,
+ hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
fence_dma_addr);
return rc;
}
@@ -3018,16 +3002,27 @@ int goya_test_queues(struct hl_device *hdev)
static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
gfp_t mem_flags, dma_addr_t *dma_handle)
{
+ void *kernel_addr;
+
if (size > GOYA_DMA_POOL_BLK_SIZE)
return NULL;
- return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
+ kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
+
+ /* Shift to the device's base physical address of host memory */
+ if (kernel_addr)
+ *dma_handle += HOST_PHYS_BASE;
+
+ return kernel_addr;
}
static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
dma_addr_t dma_addr)
{
- dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
+ /* Cancel the device's base physical address of host memory */
+ dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
+
+ dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
}
void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
@@ -3042,19 +3037,33 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
}
-static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sg,
+static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir)
{
- if (!dma_map_sg(&hdev->pdev->dev, sg, nents, dir))
+ struct scatterlist *sg;
+ int i;
+
+ if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
return -ENOMEM;
+ /* Shift to the device's base physical address of host memory */
+ for_each_sg(sgl, sg, nents, i)
+ sg->dma_address += HOST_PHYS_BASE;
+
return 0;
}
-static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sg,
+static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir)
{
- dma_unmap_sg(&hdev->pdev->dev, sg, nents, dir);
+ struct scatterlist *sg;
+ int i;
+
+ /* Cancel the device's base physical address of host memory */
+ for_each_sg(sgl, sg, nents, i)
+ sg->dma_address -= HOST_PHYS_BASE;
+
+ dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
}
u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
@@ -3204,31 +3213,29 @@ static int goya_validate_dma_pkt_host(struct hl_device *hdev,
return -EFAULT;
}
- if (parser->ctx_id != HL_KERNEL_ASID_ID) {
- if (sram_addr) {
- if (!hl_mem_area_inside_range(device_memory_addr,
- le32_to_cpu(user_dma_pkt->tsize),
- hdev->asic_prop.sram_user_base_address,
- hdev->asic_prop.sram_end_address)) {
+ if (sram_addr) {
+ if (!hl_mem_area_inside_range(device_memory_addr,
+ le32_to_cpu(user_dma_pkt->tsize),
+ hdev->asic_prop.sram_user_base_address,
+ hdev->asic_prop.sram_end_address)) {
- dev_err(hdev->dev,
- "SRAM address 0x%llx + 0x%x is invalid\n",
- device_memory_addr,
- user_dma_pkt->tsize);
- return -EFAULT;
- }
- } else {
- if (!hl_mem_area_inside_range(device_memory_addr,
- le32_to_cpu(user_dma_pkt->tsize),
- hdev->asic_prop.dram_user_base_address,
- hdev->asic_prop.dram_end_address)) {
-
- dev_err(hdev->dev,
- "DRAM address 0x%llx + 0x%x is invalid\n",
- device_memory_addr,
- user_dma_pkt->tsize);
- return -EFAULT;
- }
+ dev_err(hdev->dev,
+ "SRAM address 0x%llx + 0x%x is invalid\n",
+ device_memory_addr,
+ user_dma_pkt->tsize);
+ return -EFAULT;
+ }
+ } else {
+ if (!hl_mem_area_inside_range(device_memory_addr,
+ le32_to_cpu(user_dma_pkt->tsize),
+ hdev->asic_prop.dram_user_base_address,
+ hdev->asic_prop.dram_end_address)) {
+
+ dev_err(hdev->dev,
+ "DRAM address 0x%llx + 0x%x is invalid\n",
+ device_memory_addr,
+ user_dma_pkt->tsize);
+ return -EFAULT;
}
}
@@ -3606,8 +3613,6 @@ static int goya_patch_dma_packet(struct hl_device *hdev,
new_dma_pkt->ctl = cpu_to_le32(ctl);
new_dma_pkt->tsize = cpu_to_le32((u32) len);
- dma_addr += hdev->asic_prop.host_phys_base_address;
-
if (dir == DMA_TO_DEVICE) {
new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
@@ -3858,36 +3863,35 @@ free_userptr:
return rc;
}
-static int goya_parse_cb_no_ext_quque(struct hl_device *hdev,
+static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
struct hl_cs_parser *parser)
{
struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
struct goya_device *goya = hdev->asic_specific;
- if (!(goya->hw_cap_initialized & HW_CAP_MMU)) {
- /* For internal queue jobs, just check if cb address is valid */
- if (hl_mem_area_inside_range(
- (u64) (uintptr_t) parser->user_cb,
- parser->user_cb_size,
- asic_prop->sram_user_base_address,
- asic_prop->sram_end_address))
- return 0;
+ if (goya->hw_cap_initialized & HW_CAP_MMU)
+ return 0;
- if (hl_mem_area_inside_range(
- (u64) (uintptr_t) parser->user_cb,
- parser->user_cb_size,
- asic_prop->dram_user_base_address,
- asic_prop->dram_end_address))
- return 0;
+ /* For internal queue jobs, just check if CB address is valid */
+ if (hl_mem_area_inside_range(
+ (u64) (uintptr_t) parser->user_cb,
+ parser->user_cb_size,
+ asic_prop->sram_user_base_address,
+ asic_prop->sram_end_address))
+ return 0;
- dev_err(hdev->dev,
- "Internal CB address %px + 0x%x is not in SRAM nor in DRAM\n",
- parser->user_cb, parser->user_cb_size);
+ if (hl_mem_area_inside_range(
+ (u64) (uintptr_t) parser->user_cb,
+ parser->user_cb_size,
+ asic_prop->dram_user_base_address,
+ asic_prop->dram_end_address))
+ return 0;
- return -EFAULT;
- }
+ dev_err(hdev->dev,
+ "Internal CB address %px + 0x%x is not in SRAM nor in DRAM\n",
+ parser->user_cb, parser->user_cb_size);
- return 0;
+ return -EFAULT;
}
int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
@@ -3895,9 +3899,9 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
struct goya_device *goya = hdev->asic_specific;
if (!parser->ext_queue)
- return goya_parse_cb_no_ext_quque(hdev, parser);
+ return goya_parse_cb_no_ext_queue(hdev, parser);
- if ((goya->hw_cap_initialized & HW_CAP_MMU) && parser->use_virt_addr)
+ if (goya->hw_cap_initialized & HW_CAP_MMU)
return goya_parse_cb_mmu(hdev, parser);
else
return goya_parse_cb_no_mmu(hdev, parser);
@@ -3928,12 +3932,12 @@ void goya_add_end_of_cb_packets(u64 kernel_address, u32 len, u64 cq_addr,
cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF);
}
-static void goya_update_eq_ci(struct hl_device *hdev, u32 val)
+void goya_update_eq_ci(struct hl_device *hdev, u32 val)
{
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, val);
}
-static void goya_restore_phase_topology(struct hl_device *hdev)
+void goya_restore_phase_topology(struct hl_device *hdev)
{
int i, num_of_sob_in_longs, num_of_mon_in_longs;
@@ -3970,6 +3974,7 @@ static void goya_restore_phase_topology(struct hl_device *hdev)
static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 ddr_bar_addr;
int rc = 0;
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
@@ -3987,15 +3992,16 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
u64 bar_base_addr = DRAM_PHYS_BASE +
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
- rc = goya_set_ddr_bar_base(hdev, bar_base_addr);
- if (!rc) {
+ ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
+ if (ddr_bar_addr != U64_MAX) {
*val = readl(hdev->pcie_bar[DDR_BAR_ID] +
(addr - bar_base_addr));
- rc = goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE +
- (MMU_PAGE_TABLES_ADDR &
- ~(prop->dram_pci_bar_size - 0x1ull)));
+ ddr_bar_addr = goya_set_ddr_bar_base(hdev,
+ ddr_bar_addr);
}
+ if (ddr_bar_addr == U64_MAX)
+ rc = -EIO;
} else {
rc = -EFAULT;
}
@@ -4020,6 +4026,7 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 ddr_bar_addr;
int rc = 0;
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
@@ -4037,15 +4044,16 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
u64 bar_base_addr = DRAM_PHYS_BASE +
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
- rc = goya_set_ddr_bar_base(hdev, bar_base_addr);
- if (!rc) {
+ ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
+ if (ddr_bar_addr != U64_MAX) {
writel(val, hdev->pcie_bar[DDR_BAR_ID] +
(addr - bar_base_addr));
- rc = goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE +
- (MMU_PAGE_TABLES_ADDR &
- ~(prop->dram_pci_bar_size - 0x1ull)));
+ ddr_bar_addr = goya_set_ddr_bar_base(hdev,
+ ddr_bar_addr);
}
+ if (ddr_bar_addr == U64_MAX)
+ rc = -EIO;
} else {
rc = -EFAULT;
}
@@ -4414,7 +4422,6 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size,
u64 val, bool is_dram)
{
struct packet_lin_dma *lin_dma_pkt;
- struct hl_cs_parser parser;
struct hl_cs_job *job;
u32 cb_size, ctl;
struct hl_cb *cb;
@@ -4454,36 +4461,16 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size,
job->user_cb->cs_cnt++;
job->user_cb_size = cb_size;
job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
+ job->patched_cb = job->user_cb;
+ job->job_cb_size = job->user_cb_size +
+ sizeof(struct packet_msg_prot) * 2;
hl_debugfs_add_job(hdev, job);
- parser.ctx_id = HL_KERNEL_ASID_ID;
- parser.cs_sequence = 0;
- parser.job_id = job->id;
- parser.hw_queue_id = job->hw_queue_id;
- parser.job_userptr_list = &job->userptr_list;
- parser.user_cb = job->user_cb;
- parser.user_cb_size = job->user_cb_size;
- parser.ext_queue = job->ext_queue;
- parser.use_virt_addr = hdev->mmu_enable;
-
- rc = hdev->asic_funcs->cs_parser(hdev, &parser);
- if (rc) {
- dev_err(hdev->dev, "Failed to parse kernel CB\n");
- goto free_job;
- }
-
- job->patched_cb = parser.patched_cb;
- job->job_cb_size = parser.patched_cb_size;
- job->patched_cb->cs_cnt++;
-
rc = goya_send_job_on_qman0(hdev, job);
- job->patched_cb->cs_cnt--;
hl_cb_put(job->patched_cb);
-free_job:
- hl_userptr_delete_list(hdev, &job->userptr_list);
hl_debugfs_remove_job(hdev, job);
kfree(job);
cb->cs_cnt--;
@@ -4495,7 +4482,7 @@ release_cb:
return rc;
}
-static int goya_context_switch(struct hl_device *hdev, u32 asid)
+int goya_context_switch(struct hl_device *hdev, u32 asid)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 addr = prop->sram_base_address;
@@ -4557,7 +4544,7 @@ void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
/* zero the MMBP and ASID bits and then set the ASID */
for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++)
- goya->mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
+ goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
}
static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard)
@@ -4792,12 +4779,12 @@ static const struct hl_asic_funcs goya_funcs = {
.cb_mmap = goya_cb_mmap,
.ring_doorbell = goya_ring_doorbell,
.flush_pq_write = goya_flush_pq_write,
- .dma_alloc_coherent = goya_dma_alloc_coherent,
- .dma_free_coherent = goya_dma_free_coherent,
+ .asic_dma_alloc_coherent = goya_dma_alloc_coherent,
+ .asic_dma_free_coherent = goya_dma_free_coherent,
.get_int_queue_base = goya_get_int_queue_base,
.test_queues = goya_test_queues,
- .dma_pool_zalloc = goya_dma_pool_zalloc,
- .dma_pool_free = goya_dma_pool_free,
+ .asic_dma_pool_zalloc = goya_dma_pool_zalloc,
+ .asic_dma_pool_free = goya_dma_pool_free,
.cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
.cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
.hl_dma_unmap_sg = goya_dma_unmap_sg,
@@ -4830,7 +4817,9 @@ static const struct hl_asic_funcs goya_funcs = {
.get_hw_state = goya_get_hw_state,
.pci_bars_map = goya_pci_bars_map,
.set_dram_bar_base = goya_set_ddr_bar_base,
- .init_iatu = goya_init_iatu
+ .init_iatu = goya_init_iatu,
+ .rreg = hl_rreg,
+ .wreg = hl_wreg
};
/*
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index b572e0263ac5..14e216cb3668 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -147,9 +147,6 @@ enum goya_fw_component {
};
struct goya_device {
- void (*mmu_prepare_reg)(struct hl_device *hdev, u64 reg, u32 asid);
- void (*qman0_set_security)(struct hl_device *hdev, bool secure);
-
/* TODO: remove hw_queues_lock after moving to scheduler code */
spinlock_t hw_queues_lock;
@@ -162,13 +159,34 @@ struct goya_device {
u32 hw_cap_initialized;
};
+void goya_get_fixed_properties(struct hl_device *hdev);
+int goya_mmu_init(struct hl_device *hdev);
+void goya_init_dma_qmans(struct hl_device *hdev);
+void goya_init_mme_qmans(struct hl_device *hdev);
+void goya_init_tpc_qmans(struct hl_device *hdev);
+int goya_init_cpu_queues(struct hl_device *hdev);
+void goya_init_security(struct hl_device *hdev);
+int goya_late_init(struct hl_device *hdev);
+void goya_late_fini(struct hl_device *hdev);
+
+void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
+void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val);
+void goya_update_eq_ci(struct hl_device *hdev, u32 val);
+void goya_restore_phase_topology(struct hl_device *hdev);
+int goya_context_switch(struct hl_device *hdev, u32 asid);
+
int goya_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus,
u8 i2c_addr, u8 i2c_reg, u32 *val);
int goya_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus,
u8 i2c_addr, u8 i2c_reg, u32 val);
+void goya_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state);
+
+int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id);
+int goya_test_queues(struct hl_device *hdev);
int goya_test_cpu_queue(struct hl_device *hdev);
int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
u32 timeout, long *result);
+
long goya_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr);
long goya_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr);
long goya_get_current(struct hl_device *hdev, int sensor_index, u32 attr);
@@ -176,33 +194,31 @@ long goya_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr);
long goya_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr);
void goya_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
long value);
-void goya_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state);
+u64 goya_get_max_power(struct hl_device *hdev);
+void goya_set_max_power(struct hl_device *hdev, u64 value);
+
void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
void goya_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
int goya_armcp_info_get(struct hl_device *hdev);
-void goya_init_security(struct hl_device *hdev);
int goya_debug_coresight(struct hl_device *hdev, void *data);
-u64 goya_get_max_power(struct hl_device *hdev);
-void goya_set_max_power(struct hl_device *hdev, u64 value);
-int goya_test_queues(struct hl_device *hdev);
+
void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
int goya_mmu_clear_pgt_range(struct hl_device *hdev);
int goya_mmu_set_dram_default_page(struct hl_device *hdev);
-void goya_late_fini(struct hl_device *hdev);
int goya_suspend(struct hl_device *hdev);
int goya_resume(struct hl_device *hdev);
-void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val);
+
void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry);
void *goya_get_events_stat(struct hl_device *hdev, u32 *size);
+
void goya_add_end_of_cb_packets(u64 kernel_address, u32 len, u64 cq_addr,
u32 cq_val, u32 msix_vec);
int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser);
void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
- dma_addr_t *dma_handle, u16 *queue_len);
+ dma_addr_t *dma_handle, u16 *queue_len);
u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt);
-int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id);
int goya_send_heartbeat(struct hl_device *hdev);
void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
dma_addr_t *dma_handle);
diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c
index 68726fb4c56a..1ac951f52d1e 100644
--- a/drivers/misc/habanalabs/goya/goya_coresight.c
+++ b/drivers/misc/habanalabs/goya/goya_coresight.c
@@ -459,10 +459,14 @@ static int goya_config_bmon(struct hl_device *hdev,
if (!input)
return -EINVAL;
- WREG32(base_reg + 0x208, lower_32_bits(input->addr_range0));
- WREG32(base_reg + 0x20C, upper_32_bits(input->addr_range0));
- WREG32(base_reg + 0x248, lower_32_bits(input->addr_range1));
- WREG32(base_reg + 0x24C, upper_32_bits(input->addr_range1));
+ WREG32(base_reg + 0x200, lower_32_bits(input->start_addr0));
+ WREG32(base_reg + 0x204, upper_32_bits(input->start_addr0));
+ WREG32(base_reg + 0x208, lower_32_bits(input->addr_mask0));
+ WREG32(base_reg + 0x20C, upper_32_bits(input->addr_mask0));
+ WREG32(base_reg + 0x240, lower_32_bits(input->start_addr1));
+ WREG32(base_reg + 0x244, upper_32_bits(input->start_addr1));
+ WREG32(base_reg + 0x248, lower_32_bits(input->addr_mask1));
+ WREG32(base_reg + 0x24C, upper_32_bits(input->addr_mask1));
WREG32(base_reg + 0x224, 0);
WREG32(base_reg + 0x234, 0);
WREG32(base_reg + 0x30C, input->bw_win);
@@ -482,8 +486,12 @@ static int goya_config_bmon(struct hl_device *hdev,
WREG32(base_reg + 0x100, 0x11);
WREG32(base_reg + 0x304, 0x1);
} else {
+ WREG32(base_reg + 0x200, 0);
+ WREG32(base_reg + 0x204, 0);
WREG32(base_reg + 0x208, 0xFFFFFFFF);
WREG32(base_reg + 0x20C, 0xFFFFFFFF);
+ WREG32(base_reg + 0x240, 0);
+ WREG32(base_reg + 0x244, 0);
WREG32(base_reg + 0x248, 0xFFFFFFFF);
WREG32(base_reg + 0x24C, 0xFFFFFFFF);
WREG32(base_reg + 0x224, 0xFFFFFFFF);
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 2f02bb55f66a..71243b319920 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -11,8 +11,6 @@
#include "include/armcp_if.h"
#include "include/qman_if.h"
-#define pr_fmt(fmt) "habanalabs: " fmt
-
#include <linux/cdev.h>
#include <linux/iopoll.h>
#include <linux/irqreturn.h>
@@ -137,8 +135,6 @@ enum hl_device_hw_state {
* @dram_user_base_address: DRAM physical start address for user access.
* @dram_size: DRAM total size.
* @dram_pci_bar_size: size of PCI bar towards DRAM.
- * @host_phys_base_address: base physical address of host memory for
- * transactions that the device generates.
* @max_power_default: max power of the device after reset
* @va_space_host_start_address: base address of virtual memory range for
* mapping host memory.
@@ -186,7 +182,6 @@ struct asic_fixed_properties {
u64 dram_user_base_address;
u64 dram_size;
u64 dram_pci_bar_size;
- u64 host_phys_base_address;
u64 max_power_default;
u64 va_space_host_start_address;
u64 va_space_host_end_address;
@@ -323,6 +318,18 @@ struct hl_cs_job;
#define HL_EQ_LENGTH 64
#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
+#define HL_CPU_PKT_SHIFT 5
+#define HL_CPU_PKT_SIZE (1 << HL_CPU_PKT_SHIFT)
+#define HL_CPU_PKT_MASK (~((1 << HL_CPU_PKT_SHIFT) - 1))
+#define HL_CPU_MAX_PKTS_IN_CB 32
+#define HL_CPU_CB_SIZE (HL_CPU_PKT_SIZE * \
+ HL_CPU_MAX_PKTS_IN_CB)
+#define HL_CPU_CB_QUEUE_SIZE (HL_QUEUE_LENGTH * HL_CPU_CB_SIZE)
+
+/* KMD <-> ArmCP shared memory size (EQ + PQ + CPU CB queue) */
+#define HL_CPU_ACCESSIBLE_MEM_SIZE (HL_EQ_SIZE_IN_BYTES + \
+ HL_QUEUE_SIZE_IN_BYTES + \
+ HL_CPU_CB_QUEUE_SIZE)
/**
* struct hl_hw_queue - describes a H/W transport queue.
@@ -443,19 +450,19 @@ enum hl_pll_frequency {
* @cb_mmap: maps a CB.
* @ring_doorbell: increment PI on a given QMAN.
* @flush_pq_write: flush PQ entry write if necessary, WARN if flushing failed.
- * @dma_alloc_coherent: Allocate coherent DMA memory by calling
- * dma_alloc_coherent(). This is ASIC function because its
- * implementation is not trivial when the driver is loaded
- * in simulation mode (not upstreamed).
- * @dma_free_coherent: Free coherent DMA memory by calling dma_free_coherent().
- * This is ASIC function because its implementation is not
- * trivial when the driver is loaded in simulation mode
- * (not upstreamed).
+ * @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling
+ * dma_alloc_coherent(). This is ASIC function because
+ * its implementation is not trivial when the driver
+ * is loaded in simulation mode (not upstreamed).
+ * @asic_dma_free_coherent: Free coherent DMA memory by calling
+ * dma_free_coherent(). This is ASIC function because
+ * its implementation is not trivial when the driver
+ * is loaded in simulation mode (not upstreamed).
* @get_int_queue_base: get the internal queue base address.
* @test_queues: run simple test on all queues for sanity check.
- * @dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool.
- * size of allocation is HL_DMA_POOL_BLK_SIZE.
- * @dma_pool_free: free small DMA allocation from pool.
+ * @asic_dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool.
+ * size of allocation is HL_DMA_POOL_BLK_SIZE.
+ * @asic_dma_pool_free: free small DMA allocation from pool.
* @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
* @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
* @hl_dma_unmap_sg: DMA unmap scatter-gather list.
@@ -489,8 +496,11 @@ enum hl_pll_frequency {
* @send_cpu_message: send buffer to ArmCP.
* @get_hw_state: retrieve the H/W state
* @pci_bars_map: Map PCI BARs.
- * @set_dram_bar_base: Set DRAM BAR to map specific device address.
+ * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns
+ * old address the bar pointed to or U64_MAX for failure
* @init_iatu: Initialize the iATU unit inside the PCI controller.
+ * @rreg: Read a register. Needed for simulator support.
+ * @wreg: Write a register. Needed for simulator support.
*/
struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
@@ -508,27 +518,27 @@ struct hl_asic_funcs {
u64 kaddress, phys_addr_t paddress, u32 size);
void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
void (*flush_pq_write)(struct hl_device *hdev, u64 *pq, u64 exp_val);
- void* (*dma_alloc_coherent)(struct hl_device *hdev, size_t size,
+ void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size,
dma_addr_t *dma_handle, gfp_t flag);
- void (*dma_free_coherent)(struct hl_device *hdev, size_t size,
+ void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size,
void *cpu_addr, dma_addr_t dma_handle);
void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id,
dma_addr_t *dma_handle, u16 *queue_len);
int (*test_queues)(struct hl_device *hdev);
- void* (*dma_pool_zalloc)(struct hl_device *hdev, size_t size,
+ void* (*asic_dma_pool_zalloc)(struct hl_device *hdev, size_t size,
gfp_t mem_flags, dma_addr_t *dma_handle);
- void (*dma_pool_free)(struct hl_device *hdev, void *vaddr,
+ void (*asic_dma_pool_free)(struct hl_device *hdev, void *vaddr,
dma_addr_t dma_addr);
void* (*cpu_accessible_dma_pool_alloc)(struct hl_device *hdev,
size_t size, dma_addr_t *dma_handle);
void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
size_t size, void *vaddr);
void (*hl_dma_unmap_sg)(struct hl_device *hdev,
- struct scatterlist *sg, int nents,
+ struct scatterlist *sgl, int nents,
enum dma_data_direction dir);
int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
int (*asic_dma_map_sg)(struct hl_device *hdev,
- struct scatterlist *sg, int nents,
+ struct scatterlist *sgl, int nents,
enum dma_data_direction dir);
u32 (*get_dma_desc_list_size)(struct hl_device *hdev,
struct sg_table *sgt);
@@ -564,8 +574,10 @@ struct hl_asic_funcs {
u16 len, u32 timeout, long *result);
enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev);
int (*pci_bars_map)(struct hl_device *hdev);
- int (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
+ u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
int (*init_iatu)(struct hl_device *hdev);
+ u32 (*rreg)(struct hl_device *hdev, u32 reg);
+ void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
};
@@ -613,12 +625,13 @@ struct hl_va_range {
* DRAM mapping.
* @cs_lock: spinlock to protect cs_sequence.
* @dram_phys_mem: amount of used physical DRAM memory by this context.
- * @thread_restore_token: token to prevent multiple threads of the same context
- * from running the restore phase. Only one thread
- * should run it.
- * @thread_restore_wait_token: token to prevent the threads that didn't run
- * the restore phase from moving to their execution
- * phase before the restore phase has finished.
+ * @thread_ctx_switch_token: token to prevent multiple threads of the same
+ * context from running the context switch phase.
+ * Only a single thread should run it.
+ * @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
+ * the context switch phase from moving to their
+ * execution phase before the context switch phase
+ * has finished.
* @asid: context's unique address space ID in the device's MMU.
*/
struct hl_ctx {
@@ -638,8 +651,8 @@ struct hl_ctx {
u64 *dram_default_hops;
spinlock_t cs_lock;
atomic64_t dram_phys_mem;
- atomic_t thread_restore_token;
- u32 thread_restore_wait_token;
+ atomic_t thread_ctx_switch_token;
+ u32 thread_ctx_switch_wait_token;
u32 asid;
};
@@ -766,8 +779,6 @@ struct hl_cs_job {
* @patched_cb_size: the size of the CB after parsing.
* @ext_queue: whether the job is for external queue or internal queue.
* @job_id: the id of the related job inside the related CS.
- * @use_virt_addr: whether to treat the addresses in the CB as virtual during
- * parsing.
*/
struct hl_cs_parser {
struct hl_cb *user_cb;
@@ -780,7 +791,6 @@ struct hl_cs_parser {
u32 patched_cb_size;
u8 ext_queue;
u8 job_id;
- u8 use_virt_addr;
};
@@ -1009,13 +1019,10 @@ struct hl_dbg_device_entry {
u32 hl_rreg(struct hl_device *hdev, u32 reg);
void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
-#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
- readl_poll_timeout(hdev->rmmio + addr, val, cond, sleep_us, timeout_us)
-
-#define RREG32(reg) hl_rreg(hdev, (reg))
-#define WREG32(reg, v) hl_wreg(hdev, (reg), (v))
+#define RREG32(reg) hdev->asic_funcs->rreg(hdev, (reg))
+#define WREG32(reg, v) hdev->asic_funcs->wreg(hdev, (reg), (v))
#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n", \
- hl_rreg(hdev, (reg)))
+ hdev->asic_funcs->rreg(hdev, (reg)))
#define WREG32_P(reg, val, mask) \
do { \
@@ -1033,6 +1040,30 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | \
(val) << REG_FIELD_SHIFT(reg, field))
+#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
+({ \
+ ktime_t __timeout; \
+ /* timeout should be longer when working with simulator */ \
+ if (hdev->pdev) \
+ __timeout = ktime_add_us(ktime_get(), timeout_us); \
+ else \
+ __timeout = ktime_add_us(ktime_get(), (timeout_us * 10)); \
+ might_sleep_if(sleep_us); \
+ for (;;) { \
+ (val) = RREG32(addr); \
+ if (cond) \
+ break; \
+ if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
+ (val) = RREG32(addr); \
+ break; \
+ } \
+ if (sleep_us) \
+ usleep_range((sleep_us >> 2) + 1, sleep_us); \
+ } \
+ (cond) ? 0 : -ETIMEDOUT; \
+})
+
+
#define HL_ENG_BUSY(buf, size, fmt, ...) ({ \
if (buf) \
snprintf(buf, size, fmt, ##__VA_ARGS__); \
@@ -1418,7 +1449,8 @@ int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
u64 addr);
int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
- u64 dram_base_address, u64 host_phys_size);
+ u64 dram_base_address, u64 host_phys_base_address,
+ u64 host_phys_size);
int hl_pci_init(struct hl_device *hdev, u8 dma_mask);
void hl_pci_fini(struct hl_device *hdev);
int hl_pci_set_dma_mask(struct hl_device *hdev, u8 dma_mask);
diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c
index 1667df7ca64c..5f4d155be767 100644
--- a/drivers/misc/habanalabs/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/habanalabs_drv.c
@@ -6,6 +6,8 @@
*
*/
+#define pr_fmt(fmt) "habanalabs: " fmt
+
#include "habanalabs.h"
#include <linux/pci.h>
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
index ef3bb6951360..2894d8975933 100644
--- a/drivers/misc/habanalabs/hw_queue.c
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -82,7 +82,7 @@ static void ext_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
bd += hl_pi_2_offset(q->pi);
bd->ctl = __cpu_to_le32(ctl);
bd->len = __cpu_to_le32(len);
- bd->ptr = __cpu_to_le64(ptr + hdev->asic_prop.host_phys_base_address);
+ bd->ptr = __cpu_to_le64(ptr);
q->pi = hl_queue_inc_ptr(q->pi);
hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
@@ -263,9 +263,7 @@ static void ext_hw_queue_schedule_job(struct hl_cs_job *job)
* checked in hl_queue_sanity_checks
*/
cq = &hdev->completion_queue[q->hw_queue_id];
- cq_addr = cq->bus_address +
- hdev->asic_prop.host_phys_base_address;
- cq_addr += cq->pi * sizeof(struct hl_cq_entry);
+ cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry);
hdev->asic_funcs->add_end_of_cb_packets(cb->kernel_address, len,
cq_addr,
@@ -415,14 +413,20 @@ void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
}
static int ext_and_cpu_hw_queue_init(struct hl_device *hdev,
- struct hl_hw_queue *q)
+ struct hl_hw_queue *q, bool is_cpu_queue)
{
void *p;
int rc;
- p = hdev->asic_funcs->dma_alloc_coherent(hdev,
- HL_QUEUE_SIZE_IN_BYTES,
- &q->bus_address, GFP_KERNEL | __GFP_ZERO);
+ if (is_cpu_queue)
+ p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
+ HL_QUEUE_SIZE_IN_BYTES,
+ &q->bus_address);
+ else
+ p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
+ HL_QUEUE_SIZE_IN_BYTES,
+ &q->bus_address,
+ GFP_KERNEL | __GFP_ZERO);
if (!p)
return -ENOMEM;
@@ -446,8 +450,15 @@ static int ext_and_cpu_hw_queue_init(struct hl_device *hdev,
return 0;
free_queue:
- hdev->asic_funcs->dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES,
- (void *) (uintptr_t) q->kernel_address, q->bus_address);
+ if (is_cpu_queue)
+ hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
+ HL_QUEUE_SIZE_IN_BYTES,
+ (void *) (uintptr_t) q->kernel_address);
+ else
+ hdev->asic_funcs->asic_dma_free_coherent(hdev,
+ HL_QUEUE_SIZE_IN_BYTES,
+ (void *) (uintptr_t) q->kernel_address,
+ q->bus_address);
return rc;
}
@@ -474,12 +485,12 @@ static int int_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
static int cpu_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
{
- return ext_and_cpu_hw_queue_init(hdev, q);
+ return ext_and_cpu_hw_queue_init(hdev, q, true);
}
static int ext_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
{
- return ext_and_cpu_hw_queue_init(hdev, q);
+ return ext_and_cpu_hw_queue_init(hdev, q, false);
}
/*
@@ -569,8 +580,15 @@ static void hw_queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
kfree(q->shadow_queue);
- hdev->asic_funcs->dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES,
- (void *) (uintptr_t) q->kernel_address, q->bus_address);
+ if (q->queue_type == QUEUE_TYPE_CPU)
+ hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
+ HL_QUEUE_SIZE_IN_BYTES,
+ (void *) (uintptr_t) q->kernel_address);
+ else
+ hdev->asic_funcs->asic_dma_free_coherent(hdev,
+ HL_QUEUE_SIZE_IN_BYTES,
+ (void *) (uintptr_t) q->kernel_address,
+ q->bus_address);
}
int hl_hw_queues_create(struct hl_device *hdev)
diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h
index c8f28cadc335..1f1e35e86d84 100644
--- a/drivers/misc/habanalabs/include/armcp_if.h
+++ b/drivers/misc/habanalabs/include/armcp_if.h
@@ -300,14 +300,6 @@ enum armcp_pwm_attributes {
armcp_pwm_enable
};
-#define HL_CPU_PKT_SHIFT 5
-#define HL_CPU_PKT_SIZE (1 << HL_CPU_PKT_SHIFT)
-#define HL_CPU_PKT_MASK (~((1 << HL_CPU_PKT_SHIFT) - 1))
-#define HL_CPU_MAX_PKTS_IN_CB 32
-#define HL_CPU_CB_SIZE (HL_CPU_PKT_SIZE * \
- HL_CPU_MAX_PKTS_IN_CB)
-#define HL_CPU_ACCESSIBLE_MEM_SIZE (HL_QUEUE_LENGTH * HL_CPU_CB_SIZE)
-
/* Event Queue Packets */
struct eq_generic_event {
diff --git a/drivers/misc/habanalabs/irq.c b/drivers/misc/habanalabs/irq.c
index e69a09c10e3f..ea9f72ff456c 100644
--- a/drivers/misc/habanalabs/irq.c
+++ b/drivers/misc/habanalabs/irq.c
@@ -222,7 +222,7 @@ int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
BUILD_BUG_ON(HL_CQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
- p = hdev->asic_funcs->dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
+ p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
&q->bus_address, GFP_KERNEL | __GFP_ZERO);
if (!p)
return -ENOMEM;
@@ -248,7 +248,7 @@ int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
*/
void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q)
{
- hdev->asic_funcs->dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
+ hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
(void *) (uintptr_t) q->kernel_address, q->bus_address);
}
@@ -284,8 +284,9 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
BUILD_BUG_ON(HL_EQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
- p = hdev->asic_funcs->dma_alloc_coherent(hdev, HL_EQ_SIZE_IN_BYTES,
- &q->bus_address, GFP_KERNEL | __GFP_ZERO);
+ p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
+ HL_EQ_SIZE_IN_BYTES,
+ &q->bus_address);
if (!p)
return -ENOMEM;
@@ -308,8 +309,9 @@ void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q)
{
flush_workqueue(hdev->eq_wq);
- hdev->asic_funcs->dma_free_coherent(hdev, HL_EQ_SIZE_IN_BYTES,
- (void *) (uintptr_t) q->kernel_address, q->bus_address);
+ hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
+ HL_EQ_SIZE_IN_BYTES,
+ (void *) (uintptr_t) q->kernel_address);
}
void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q)
diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c
index 43ef3ad8438a..d67d24c13efd 100644
--- a/drivers/misc/habanalabs/memory.c
+++ b/drivers/misc/habanalabs/memory.c
@@ -759,10 +759,6 @@ static int map_phys_page_pack(struct hl_ctx *ctx, u64 vaddr,
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
paddr = phys_pg_pack->pages[i];
- /* For accessing the host we need to turn on bit 39 */
- if (phys_pg_pack->created_from_userptr)
- paddr += hdev->asic_prop.host_phys_base_address;
-
rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size);
if (rc) {
dev_err(hdev->dev,
diff --git a/drivers/misc/habanalabs/pci.c b/drivers/misc/habanalabs/pci.c
index d472d02a8e6e..0e78a04d63f4 100644
--- a/drivers/misc/habanalabs/pci.c
+++ b/drivers/misc/habanalabs/pci.c
@@ -236,6 +236,8 @@ int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
* @hdev: Pointer to hl_device structure.
* @sram_base_address: SRAM base address.
* @dram_base_address: DRAM base address.
+ * @host_phys_base_address: Base physical address of host memory for device
+ * transactions.
* @host_phys_size: Size of host memory for device transactions.
*
* This is needed in case the firmware doesn't initialize the iATU.
@@ -243,7 +245,8 @@ int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
* Return: 0 on success, negative value for failure.
*/
int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
- u64 dram_base_address, u64 host_phys_size)
+ u64 dram_base_address, u64 host_phys_base_address,
+ u64 host_phys_size)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 host_phys_end_addr;
@@ -259,14 +262,17 @@ int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
/* Point to DRAM */
if (!hdev->asic_funcs->set_dram_bar_base)
return -EINVAL;
- rc |= hdev->asic_funcs->set_dram_bar_base(hdev, dram_base_address);
+ if (hdev->asic_funcs->set_dram_bar_base(hdev, dram_base_address) ==
+ U64_MAX)
+ return -EIO;
+
/* Outbound Region 0 - Point to Host */
- host_phys_end_addr = prop->host_phys_base_address + host_phys_size - 1;
+ host_phys_end_addr = host_phys_base_address + host_phys_size - 1;
rc |= hl_pci_iatu_write(hdev, 0x008,
- lower_32_bits(prop->host_phys_base_address));
+ lower_32_bits(host_phys_base_address));
rc |= hl_pci_iatu_write(hdev, 0x00C,
- upper_32_bits(prop->host_phys_base_address));
+ upper_32_bits(host_phys_base_address));
rc |= hl_pci_iatu_write(hdev, 0x010, lower_32_bits(host_phys_end_addr));
rc |= hl_pci_iatu_write(hdev, 0x014, 0);
rc |= hl_pci_iatu_write(hdev, 0x018, 0);
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 613d431da783..8ac292cf4d00 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -374,9 +374,12 @@ struct hl_debug_params_stm {
};
struct hl_debug_params_bmon {
- /* Transaction address filter */
- __u64 addr_range0;
- __u64 addr_range1;
+ /* Two address ranges that the user can request to filter */
+ __u64 start_addr0;
+ __u64 addr_mask0;
+
+ __u64 start_addr1;
+ __u64 addr_mask1;
/* Capture window configuration */
__u32 bw_win;