From 3110c60fdc7a5a7626b7cd401c4918751d7c19db Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 4 Mar 2019 10:22:09 +0200 Subject: habanalabs: Move device CPU code into common file This patch moves the code that is responsible of the communication vs. the F/W to a dedicated file. This will allow us to share the code between different ASICs. Signed-off-by: Tomer Tayar Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/include/armcp_if.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/misc/habanalabs/include/armcp_if.h') diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h index 9dddb917e72c..ccb82390241e 100644 --- a/drivers/misc/habanalabs/include/armcp_if.h +++ b/drivers/misc/habanalabs/include/armcp_if.h @@ -302,6 +302,14 @@ enum armcp_pwm_attributes { armcp_pwm_enable }; +#define HL_CPU_PKT_SHIFT 5 +#define HL_CPU_PKT_SIZE (1 << HL_CPU_PKT_SHIFT) +#define HL_CPU_PKT_MASK (~((1 << HL_CPU_PKT_SHIFT) - 1)) +#define HL_CPU_MAX_PKTS_IN_CB 32 +#define HL_CPU_CB_SIZE (HL_CPU_PKT_SIZE * \ + HL_CPU_MAX_PKTS_IN_CB) +#define HL_CPU_ACCESSIBLE_MEM_SIZE (HL_QUEUE_LENGTH * HL_CPU_CB_SIZE) + /* Event Queue Packets */ struct eq_generic_event { -- cgit v1.2.3-59-g8ed1b From c535bfdd0f86aeed165d4f5aba187570f42d785f Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 4 Mar 2019 15:51:30 +0200 Subject: habanalabs: use EQ MSI/X ID per chip The Event Queue MSI/X ID is different per ASIC. This patch renames the current define to have the GOYA_ prefix to mark it only for Goya. It also moves it from the common armcp_if.h file to the ASIC specific goya_fw_if.h file. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 8 ++++---- drivers/misc/habanalabs/include/armcp_if.h | 2 -- drivers/misc/habanalabs/include/goya/goya_fw_if.h | 2 ++ 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/misc/habanalabs/include/armcp_if.h') diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 30f83521de90..78eacd14c9ac 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2204,10 +2204,10 @@ static int goya_enable_msix(struct hl_device *hdev) } } - irq = pci_irq_vector(hdev->pdev, EVENT_QUEUE_MSIX_IDX); + irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX); rc = request_irq(irq, hl_irq_handler_eq, 0, - goya_irq_name[EVENT_QUEUE_MSIX_IDX], + goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX], &hdev->event_queue); if (rc) { dev_err(hdev->dev, "Failed to request IRQ %d", irq); @@ -2238,7 +2238,7 @@ static void goya_sync_irqs(struct hl_device *hdev) for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) synchronize_irq(pci_irq_vector(hdev->pdev, i)); - synchronize_irq(pci_irq_vector(hdev->pdev, EVENT_QUEUE_MSIX_IDX)); + synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX)); } static void goya_disable_msix(struct hl_device *hdev) @@ -2251,7 +2251,7 @@ static void goya_disable_msix(struct hl_device *hdev) goya_sync_irqs(hdev); - irq = pci_irq_vector(hdev->pdev, EVENT_QUEUE_MSIX_IDX); + irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX); free_irq(irq, &hdev->event_queue); for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) { diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h index ccb82390241e..c8f28cadc335 100644 --- a/drivers/misc/habanalabs/include/armcp_if.h +++ b/drivers/misc/habanalabs/include/armcp_if.h @@ -32,8 +32,6 @@ struct hl_eq_entry { #define EQ_CTL_EVENT_TYPE_SHIFT 16 #define EQ_CTL_EVENT_TYPE_MASK 0x03FF0000 -#define EVENT_QUEUE_MSIX_IDX 5 - enum pq_init_status { PQ_INIT_STATUS_NA = 0, PQ_INIT_STATUS_READY_FOR_CP, diff --git a/drivers/misc/habanalabs/include/goya/goya_fw_if.h b/drivers/misc/habanalabs/include/goya/goya_fw_if.h index a9920cb4a07b..0fa80fe9f6cc 100644 --- a/drivers/misc/habanalabs/include/goya/goya_fw_if.h +++ b/drivers/misc/habanalabs/include/goya/goya_fw_if.h @@ -8,6 +8,8 @@ #ifndef GOYA_FW_IF_H #define GOYA_FW_IF_H +#define GOYA_EVENT_QUEUE_MSIX_IDX 5 + #define CPU_BOOT_ADDR 0x7FF8040000ull #define UBOOT_FW_OFFSET 0x100000 /* 1MB in SRAM */ -- cgit v1.2.3-59-g8ed1b From 03d5f641dc711eb93145ded91ed68b4be729be4d Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Sun, 28 Apr 2019 19:17:38 +0300 Subject: habanalabs: Use single pool for CPU accessible host memory The device's CPU accessible memory on host is managed in a dedicated pool, except for 2 regions - Primary Queue (PQ) and Event Queue (EQ) - which are allocated from generic DMA pools. Due to address length limitations of the CPU, the addresses of all these memory regions must have the same MSBs starting at bit 40. This patch modifies the allocation of the PQ and EQ to be also from the dedicated pool, to ensure compliance with the limitation. Signed-off-by: Tomer Tayar Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/habanalabs.h | 12 +++++++++ drivers/misc/habanalabs/hw_queue.c | 40 ++++++++++++++++++++++-------- drivers/misc/habanalabs/include/armcp_if.h | 8 ------ drivers/misc/habanalabs/irq.c | 10 +++++--- 4 files changed, 48 insertions(+), 22 deletions(-) (limited to 'drivers/misc/habanalabs/include/armcp_if.h') diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 65717e4055da..687651db614c 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -321,6 +321,18 @@ struct hl_cs_job; #define HL_EQ_LENGTH 64 #define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE) +#define HL_CPU_PKT_SHIFT 5 +#define HL_CPU_PKT_SIZE (1 << HL_CPU_PKT_SHIFT) +#define HL_CPU_PKT_MASK (~((1 << HL_CPU_PKT_SHIFT) - 1)) +#define HL_CPU_MAX_PKTS_IN_CB 32 +#define HL_CPU_CB_SIZE (HL_CPU_PKT_SIZE * \ + HL_CPU_MAX_PKTS_IN_CB) +#define HL_CPU_CB_QUEUE_SIZE (HL_QUEUE_LENGTH * HL_CPU_CB_SIZE) + +/* KMD <-> ArmCP shared memory size (EQ + PQ + CPU CB queue) */ +#define HL_CPU_ACCESSIBLE_MEM_SIZE (HL_EQ_SIZE_IN_BYTES + \ + HL_QUEUE_SIZE_IN_BYTES + \ + HL_CPU_CB_QUEUE_SIZE) /** * struct hl_hw_queue - describes a H/W transport queue. diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c index ef3bb6951360..a1ee52cfd505 100644 --- a/drivers/misc/habanalabs/hw_queue.c +++ b/drivers/misc/habanalabs/hw_queue.c @@ -415,14 +415,20 @@ void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id) } static int ext_and_cpu_hw_queue_init(struct hl_device *hdev, - struct hl_hw_queue *q) + struct hl_hw_queue *q, bool is_cpu_queue) { void *p; int rc; - p = hdev->asic_funcs->dma_alloc_coherent(hdev, - HL_QUEUE_SIZE_IN_BYTES, - &q->bus_address, GFP_KERNEL | __GFP_ZERO); + if (is_cpu_queue) + p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, + HL_QUEUE_SIZE_IN_BYTES, + &q->bus_address); + else + p = hdev->asic_funcs->dma_alloc_coherent(hdev, + HL_QUEUE_SIZE_IN_BYTES, + &q->bus_address, + GFP_KERNEL | __GFP_ZERO); if (!p) return -ENOMEM; @@ -446,8 +452,15 @@ static int ext_and_cpu_hw_queue_init(struct hl_device *hdev, return 0; free_queue: - hdev->asic_funcs->dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address, q->bus_address); + if (is_cpu_queue) + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, + HL_QUEUE_SIZE_IN_BYTES, + (void *) (uintptr_t) q->kernel_address); + else + hdev->asic_funcs->dma_free_coherent(hdev, + HL_QUEUE_SIZE_IN_BYTES, + (void *) (uintptr_t) q->kernel_address, + q->bus_address); return rc; } @@ -474,12 +487,12 @@ static int int_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) static int cpu_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) { - return ext_and_cpu_hw_queue_init(hdev, q); + return ext_and_cpu_hw_queue_init(hdev, q, true); } static int ext_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) { - return ext_and_cpu_hw_queue_init(hdev, q); + return ext_and_cpu_hw_queue_init(hdev, q, false); } /* @@ -569,8 +582,15 @@ static void hw_queue_fini(struct hl_device *hdev, struct hl_hw_queue *q) kfree(q->shadow_queue); - hdev->asic_funcs->dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address, q->bus_address); + if (q->queue_type == QUEUE_TYPE_CPU) + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, + HL_QUEUE_SIZE_IN_BYTES, + (void *) (uintptr_t) q->kernel_address); + else + hdev->asic_funcs->dma_free_coherent(hdev, + HL_QUEUE_SIZE_IN_BYTES, + (void *) (uintptr_t) q->kernel_address, + q->bus_address); } int hl_hw_queues_create(struct hl_device *hdev) diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h index c8f28cadc335..1f1e35e86d84 100644 --- a/drivers/misc/habanalabs/include/armcp_if.h +++ b/drivers/misc/habanalabs/include/armcp_if.h @@ -300,14 +300,6 @@ enum armcp_pwm_attributes { armcp_pwm_enable }; -#define HL_CPU_PKT_SHIFT 5 -#define HL_CPU_PKT_SIZE (1 << HL_CPU_PKT_SHIFT) -#define HL_CPU_PKT_MASK (~((1 << HL_CPU_PKT_SHIFT) - 1)) -#define HL_CPU_MAX_PKTS_IN_CB 32 -#define HL_CPU_CB_SIZE (HL_CPU_PKT_SIZE * \ - HL_CPU_MAX_PKTS_IN_CB) -#define HL_CPU_ACCESSIBLE_MEM_SIZE (HL_QUEUE_LENGTH * HL_CPU_CB_SIZE) - /* Event Queue Packets */ struct eq_generic_event { diff --git a/drivers/misc/habanalabs/irq.c b/drivers/misc/habanalabs/irq.c index e69a09c10e3f..86a8ad57f1ca 100644 --- a/drivers/misc/habanalabs/irq.c +++ b/drivers/misc/habanalabs/irq.c @@ -284,8 +284,9 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q) BUILD_BUG_ON(HL_EQ_SIZE_IN_BYTES > HL_PAGE_SIZE); - p = hdev->asic_funcs->dma_alloc_coherent(hdev, HL_EQ_SIZE_IN_BYTES, - &q->bus_address, GFP_KERNEL | __GFP_ZERO); + p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, + HL_EQ_SIZE_IN_BYTES, + &q->bus_address); if (!p) return -ENOMEM; @@ -308,8 +309,9 @@ void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q) { flush_workqueue(hdev->eq_wq); - hdev->asic_funcs->dma_free_coherent(hdev, HL_EQ_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address, q->bus_address); + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, + HL_EQ_SIZE_IN_BYTES, + (void *) (uintptr_t) q->kernel_address); } void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q) -- cgit v1.2.3-59-g8ed1b