From 70b2f993ea4a79c298aac4ec1c58089020536ba5 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 13 Jul 2020 12:21:04 +0300 Subject: habanalabs: create common folder For internal needs of our CI we need to move all the common code into a common folder instead of putting them in the root folder of the driver. Same applies to the common header files under include/ Signed-off-by: Oded Gabbay Reviewed-by: Omer Shpigelman --- drivers/misc/habanalabs/Makefile | 11 +- drivers/misc/habanalabs/asid.c | 57 - drivers/misc/habanalabs/command_buffer.c | 463 ----- drivers/misc/habanalabs/command_submission.c | 1232 ------------- drivers/misc/habanalabs/common/Makefile | 9 + drivers/misc/habanalabs/common/asid.c | 57 + drivers/misc/habanalabs/common/command_buffer.c | 463 +++++ .../misc/habanalabs/common/command_submission.c | 1232 +++++++++++++ drivers/misc/habanalabs/common/context.c | 237 +++ drivers/misc/habanalabs/common/debugfs.c | 1411 ++++++++++++++ drivers/misc/habanalabs/common/device.c | 1506 +++++++++++++++ drivers/misc/habanalabs/common/firmware_if.c | 589 ++++++ drivers/misc/habanalabs/common/habanalabs.h | 1948 ++++++++++++++++++++ drivers/misc/habanalabs/common/habanalabs_drv.c | 529 ++++++ drivers/misc/habanalabs/common/habanalabs_ioctl.c | 546 ++++++ drivers/misc/habanalabs/common/hw_queue.c | 918 +++++++++ drivers/misc/habanalabs/common/hwmon.c | 579 ++++++ drivers/misc/habanalabs/common/irq.c | 320 ++++ drivers/misc/habanalabs/common/memory.c | 1843 ++++++++++++++++++ drivers/misc/habanalabs/common/mmu.c | 1037 +++++++++++ drivers/misc/habanalabs/common/pci.c | 400 ++++ drivers/misc/habanalabs/common/sysfs.c | 442 +++++ drivers/misc/habanalabs/context.c | 237 --- drivers/misc/habanalabs/debugfs.c | 1411 -------------- drivers/misc/habanalabs/device.c | 1506 --------------- drivers/misc/habanalabs/firmware_if.c | 589 ------ drivers/misc/habanalabs/gaudi/Makefile | 2 +- drivers/misc/habanalabs/gaudi/gaudiP.h | 2 +- drivers/misc/habanalabs/goya/goyaP.h | 2 +- drivers/misc/habanalabs/habanalabs.h | 1948 -------------------- drivers/misc/habanalabs/habanalabs_drv.c | 529 ------ drivers/misc/habanalabs/habanalabs_ioctl.c | 546 ------ drivers/misc/habanalabs/hw_queue.c | 918 --------- drivers/misc/habanalabs/hwmon.c | 579 ------ drivers/misc/habanalabs/include/armcp_if.h | 407 ---- drivers/misc/habanalabs/include/common/armcp_if.h | 407 ++++ .../misc/habanalabs/include/common/hl_boot_if.h | 98 + drivers/misc/habanalabs/include/common/qman_if.h | 68 + drivers/misc/habanalabs/include/hl_boot_if.h | 98 - drivers/misc/habanalabs/include/qman_if.h | 68 - drivers/misc/habanalabs/irq.c | 320 ---- drivers/misc/habanalabs/memory.c | 1843 ------------------ drivers/misc/habanalabs/mmu.c | 1037 ----------- drivers/misc/habanalabs/pci.c | 400 ---- drivers/misc/habanalabs/sysfs.c | 442 ----- 45 files changed, 14647 insertions(+), 14639 deletions(-) delete mode 100644 drivers/misc/habanalabs/asid.c delete mode 100644 drivers/misc/habanalabs/command_buffer.c delete mode 100644 drivers/misc/habanalabs/command_submission.c create mode 100644 drivers/misc/habanalabs/common/Makefile create mode 100644 drivers/misc/habanalabs/common/asid.c create mode 100644 drivers/misc/habanalabs/common/command_buffer.c create mode 100644 drivers/misc/habanalabs/common/command_submission.c create mode 100644 drivers/misc/habanalabs/common/context.c create mode 100644 drivers/misc/habanalabs/common/debugfs.c create mode 100644 drivers/misc/habanalabs/common/device.c create mode 100644 drivers/misc/habanalabs/common/firmware_if.c create mode 100644 drivers/misc/habanalabs/common/habanalabs.h create mode 100644 drivers/misc/habanalabs/common/habanalabs_drv.c create mode 100644 drivers/misc/habanalabs/common/habanalabs_ioctl.c create mode 100644 drivers/misc/habanalabs/common/hw_queue.c create mode 100644 drivers/misc/habanalabs/common/hwmon.c create mode 100644 drivers/misc/habanalabs/common/irq.c create mode 100644 drivers/misc/habanalabs/common/memory.c create mode 100644 drivers/misc/habanalabs/common/mmu.c create mode 100644 drivers/misc/habanalabs/common/pci.c create mode 100644 drivers/misc/habanalabs/common/sysfs.c delete mode 100644 drivers/misc/habanalabs/context.c delete mode 100644 drivers/misc/habanalabs/debugfs.c delete mode 100644 drivers/misc/habanalabs/device.c delete mode 100644 drivers/misc/habanalabs/firmware_if.c delete mode 100644 drivers/misc/habanalabs/habanalabs.h delete mode 100644 drivers/misc/habanalabs/habanalabs_drv.c delete mode 100644 drivers/misc/habanalabs/habanalabs_ioctl.c delete mode 100644 drivers/misc/habanalabs/hw_queue.c delete mode 100644 drivers/misc/habanalabs/hwmon.c delete mode 100644 drivers/misc/habanalabs/include/armcp_if.h create mode 100644 drivers/misc/habanalabs/include/common/armcp_if.h create mode 100644 drivers/misc/habanalabs/include/common/hl_boot_if.h create mode 100644 drivers/misc/habanalabs/include/common/qman_if.h delete mode 100644 drivers/misc/habanalabs/include/hl_boot_if.h delete mode 100644 drivers/misc/habanalabs/include/qman_if.h delete mode 100644 drivers/misc/habanalabs/irq.c delete mode 100644 drivers/misc/habanalabs/memory.c delete mode 100644 drivers/misc/habanalabs/mmu.c delete mode 100644 drivers/misc/habanalabs/pci.c delete mode 100644 drivers/misc/habanalabs/sysfs.c (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile index 421ebd903069..a786c0a7de9a 100644 --- a/drivers/misc/habanalabs/Makefile +++ b/drivers/misc/habanalabs/Makefile @@ -3,16 +3,15 @@ # Makefile for HabanaLabs AI accelerators driver # -obj-m := habanalabs.o +obj-$(CONFIG_HABANA_AI) := habanalabs.o -habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \ - command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o memory.o \ - command_submission.o mmu.o firmware_if.o pci.o - -habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o +include $(src)/common/Makefile +habanalabs-y += $(HL_COMMON_FILES) include $(src)/goya/Makefile habanalabs-y += $(HL_GOYA_FILES) include $(src)/gaudi/Makefile habanalabs-y += $(HL_GAUDI_FILES) + +habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o diff --git a/drivers/misc/habanalabs/asid.c b/drivers/misc/habanalabs/asid.c deleted file mode 100644 index a2fdf31cf27c..000000000000 --- a/drivers/misc/habanalabs/asid.c +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2016-2019 HabanaLabs, Ltd. - * All Rights Reserved. - */ - -#include "habanalabs.h" - -#include - -int hl_asid_init(struct hl_device *hdev) -{ - hdev->asid_bitmap = kcalloc(BITS_TO_LONGS(hdev->asic_prop.max_asid), - sizeof(*hdev->asid_bitmap), GFP_KERNEL); - if (!hdev->asid_bitmap) - return -ENOMEM; - - mutex_init(&hdev->asid_mutex); - - /* ASID 0 is reserved for the kernel driver and device CPU */ - set_bit(0, hdev->asid_bitmap); - - return 0; -} - -void hl_asid_fini(struct hl_device *hdev) -{ - mutex_destroy(&hdev->asid_mutex); - kfree(hdev->asid_bitmap); -} - -unsigned long hl_asid_alloc(struct hl_device *hdev) -{ - unsigned long found; - - mutex_lock(&hdev->asid_mutex); - - found = find_first_zero_bit(hdev->asid_bitmap, - hdev->asic_prop.max_asid); - if (found == hdev->asic_prop.max_asid) - found = 0; - else - set_bit(found, hdev->asid_bitmap); - - mutex_unlock(&hdev->asid_mutex); - - return found; -} - -void hl_asid_free(struct hl_device *hdev, unsigned long asid) -{ - if (WARN((asid == 0 || asid >= hdev->asic_prop.max_asid), - "Invalid ASID %lu", asid)) - return; - clear_bit(asid, hdev->asid_bitmap); -} diff --git a/drivers/misc/habanalabs/command_buffer.c b/drivers/misc/habanalabs/command_buffer.c deleted file mode 100644 index 02d13f71b1df..000000000000 --- a/drivers/misc/habanalabs/command_buffer.c +++ /dev/null @@ -1,463 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2016-2019 HabanaLabs, Ltd. - * All Rights Reserved. - */ - -#include -#include "habanalabs.h" - -#include -#include - -static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) -{ - hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size, - (void *) (uintptr_t) cb->kernel_address, - cb->bus_address); - kfree(cb); -} - -static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb) -{ - if (cb->is_pool) { - spin_lock(&hdev->cb_pool_lock); - list_add(&cb->pool_list, &hdev->cb_pool); - spin_unlock(&hdev->cb_pool_lock); - } else { - cb_fini(hdev, cb); - } -} - -static void cb_release(struct kref *ref) -{ - struct hl_device *hdev; - struct hl_cb *cb; - - cb = container_of(ref, struct hl_cb, refcount); - hdev = cb->hdev; - - hl_debugfs_remove_cb(cb); - - cb_do_release(hdev, cb); -} - -static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, - int ctx_id) -{ - struct hl_cb *cb; - void *p; - - /* - * We use of GFP_ATOMIC here because this function can be called from - * the latency-sensitive code path for command submission. Due to H/W - * limitations in some of the ASICs, the kernel must copy the user CB - * that is designated for an external queue and actually enqueue - * the kernel's copy. Hence, we must never sleep in this code section - * and must use GFP_ATOMIC for all memory allocations. - */ - if (ctx_id == HL_KERNEL_ASID_ID) - cb = kzalloc(sizeof(*cb), GFP_ATOMIC); - else - cb = kzalloc(sizeof(*cb), GFP_KERNEL); - - if (!cb) - return NULL; - - if (ctx_id == HL_KERNEL_ASID_ID) - p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, - &cb->bus_address, GFP_ATOMIC); - else - p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, - &cb->bus_address, - GFP_USER | __GFP_ZERO); - if (!p) { - dev_err(hdev->dev, - "failed to allocate %d of dma memory for CB\n", - cb_size); - kfree(cb); - return NULL; - } - - cb->kernel_address = (u64) (uintptr_t) p; - cb->size = cb_size; - - return cb; -} - -int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, - u32 cb_size, u64 *handle, int ctx_id) -{ - struct hl_cb *cb; - bool alloc_new_cb = true; - int rc; - - /* - * Can't use generic function to check this because of special case - * where we create a CB as part of the reset process - */ - if ((hdev->disabled) || ((atomic_read(&hdev->in_reset)) && - (ctx_id != HL_KERNEL_ASID_ID))) { - dev_warn_ratelimited(hdev->dev, - "Device is disabled or in reset. Can't create new CBs\n"); - rc = -EBUSY; - goto out_err; - } - - if (cb_size > SZ_2M) { - dev_err(hdev->dev, "CB size %d must be less than %d\n", - cb_size, SZ_2M); - rc = -EINVAL; - goto out_err; - } - - /* Minimum allocation must be PAGE SIZE */ - if (cb_size < PAGE_SIZE) - cb_size = PAGE_SIZE; - - if (ctx_id == HL_KERNEL_ASID_ID && - cb_size <= hdev->asic_prop.cb_pool_cb_size) { - - spin_lock(&hdev->cb_pool_lock); - if (!list_empty(&hdev->cb_pool)) { - cb = list_first_entry(&hdev->cb_pool, typeof(*cb), - pool_list); - list_del(&cb->pool_list); - spin_unlock(&hdev->cb_pool_lock); - alloc_new_cb = false; - } else { - spin_unlock(&hdev->cb_pool_lock); - dev_dbg(hdev->dev, "CB pool is empty\n"); - } - } - - if (alloc_new_cb) { - cb = hl_cb_alloc(hdev, cb_size, ctx_id); - if (!cb) { - rc = -ENOMEM; - goto out_err; - } - } - - cb->hdev = hdev; - cb->ctx_id = ctx_id; - - spin_lock(&mgr->cb_lock); - rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC); - spin_unlock(&mgr->cb_lock); - - if (rc < 0) { - dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n"); - goto release_cb; - } - - cb->id = rc; - - kref_init(&cb->refcount); - spin_lock_init(&cb->lock); - - /* - * idr is 32-bit so we can safely OR it with a mask that is above - * 32 bit - */ - *handle = cb->id | HL_MMAP_CB_MASK; - *handle <<= PAGE_SHIFT; - - hl_debugfs_add_cb(cb); - - return 0; - -release_cb: - cb_do_release(hdev, cb); -out_err: - *handle = 0; - - return rc; -} - -int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle) -{ - struct hl_cb *cb; - u32 handle; - int rc = 0; - - /* - * handle was given to user to do mmap, I need to shift it back to - * how the idr module gave it to me - */ - cb_handle >>= PAGE_SHIFT; - handle = (u32) cb_handle; - - spin_lock(&mgr->cb_lock); - - cb = idr_find(&mgr->cb_handles, handle); - if (cb) { - idr_remove(&mgr->cb_handles, handle); - spin_unlock(&mgr->cb_lock); - kref_put(&cb->refcount, cb_release); - } else { - spin_unlock(&mgr->cb_lock); - dev_err(hdev->dev, - "CB destroy failed, no match to handle 0x%x\n", handle); - rc = -EINVAL; - } - - return rc; -} - -int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) -{ - union hl_cb_args *args = data; - struct hl_device *hdev = hpriv->hdev; - u64 handle = 0; - int rc; - - if (hl_device_disabled_or_in_reset(hdev)) { - dev_warn_ratelimited(hdev->dev, - "Device is %s. Can't execute CB IOCTL\n", - atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); - return -EBUSY; - } - - switch (args->in.op) { - case HL_CB_OP_CREATE: - if (args->in.cb_size > HL_MAX_CB_SIZE) { - dev_err(hdev->dev, - "User requested CB size %d must be less than %d\n", - args->in.cb_size, HL_MAX_CB_SIZE); - rc = -EINVAL; - } else { - rc = hl_cb_create(hdev, &hpriv->cb_mgr, - args->in.cb_size, &handle, - hpriv->ctx->asid); - } - - memset(args, 0, sizeof(*args)); - args->out.cb_handle = handle; - break; - - case HL_CB_OP_DESTROY: - rc = hl_cb_destroy(hdev, &hpriv->cb_mgr, - args->in.cb_handle); - break; - - default: - rc = -ENOTTY; - break; - } - - return rc; -} - -static void cb_vm_close(struct vm_area_struct *vma) -{ - struct hl_cb *cb = (struct hl_cb *) vma->vm_private_data; - long new_mmap_size; - - new_mmap_size = cb->mmap_size - (vma->vm_end - vma->vm_start); - - if (new_mmap_size > 0) { - cb->mmap_size = new_mmap_size; - return; - } - - spin_lock(&cb->lock); - cb->mmap = false; - spin_unlock(&cb->lock); - - hl_cb_put(cb); - vma->vm_private_data = NULL; -} - -static const struct vm_operations_struct cb_vm_ops = { - .close = cb_vm_close -}; - -int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) -{ - struct hl_device *hdev = hpriv->hdev; - struct hl_cb *cb; - phys_addr_t address; - u32 handle; - int rc; - - handle = vma->vm_pgoff; - - /* reference was taken here */ - cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle); - if (!cb) { - dev_err(hdev->dev, - "CB mmap failed, no match to handle 0x%x\n", handle); - return -EINVAL; - } - - /* Validation check */ - if ((vma->vm_end - vma->vm_start) != ALIGN(cb->size, PAGE_SIZE)) { - dev_err(hdev->dev, - "CB mmap failed, mmap size 0x%lx != 0x%x cb size\n", - vma->vm_end - vma->vm_start, cb->size); - rc = -EINVAL; - goto put_cb; - } - - spin_lock(&cb->lock); - - if (cb->mmap) { - dev_err(hdev->dev, - "CB mmap failed, CB already mmaped to user\n"); - rc = -EINVAL; - goto release_lock; - } - - cb->mmap = true; - - spin_unlock(&cb->lock); - - vma->vm_ops = &cb_vm_ops; - - /* - * Note: We're transferring the cb reference to - * vma->vm_private_data here. - */ - - vma->vm_private_data = cb; - - /* Calculate address for CB */ - address = virt_to_phys((void *) (uintptr_t) cb->kernel_address); - - rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address, - address, cb->size); - - if (rc) { - spin_lock(&cb->lock); - cb->mmap = false; - goto release_lock; - } - - cb->mmap_size = cb->size; - - return 0; - -release_lock: - spin_unlock(&cb->lock); -put_cb: - hl_cb_put(cb); - return rc; -} - -struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr, - u32 handle) -{ - struct hl_cb *cb; - - spin_lock(&mgr->cb_lock); - cb = idr_find(&mgr->cb_handles, handle); - - if (!cb) { - spin_unlock(&mgr->cb_lock); - dev_warn(hdev->dev, - "CB get failed, no match to handle 0x%x\n", handle); - return NULL; - } - - kref_get(&cb->refcount); - - spin_unlock(&mgr->cb_lock); - - return cb; - -} - -void hl_cb_put(struct hl_cb *cb) -{ - kref_put(&cb->refcount, cb_release); -} - -void hl_cb_mgr_init(struct hl_cb_mgr *mgr) -{ - spin_lock_init(&mgr->cb_lock); - idr_init(&mgr->cb_handles); -} - -void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr) -{ - struct hl_cb *cb; - struct idr *idp; - u32 id; - - idp = &mgr->cb_handles; - - idr_for_each_entry(idp, cb, id) { - if (kref_put(&cb->refcount, cb_release) != 1) - dev_err(hdev->dev, - "CB %d for CTX ID %d is still alive\n", - id, cb->ctx_id); - } - - idr_destroy(&mgr->cb_handles); -} - -struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size) -{ - u64 cb_handle; - struct hl_cb *cb; - int rc; - - rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle, - HL_KERNEL_ASID_ID); - if (rc) { - dev_err(hdev->dev, - "Failed to allocate CB for the kernel driver %d\n", rc); - return NULL; - } - - cb_handle >>= PAGE_SHIFT; - cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle); - /* hl_cb_get should never fail here so use kernel WARN */ - WARN(!cb, "Kernel CB handle invalid 0x%x\n", (u32) cb_handle); - if (!cb) - goto destroy_cb; - - return cb; - -destroy_cb: - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb_handle << PAGE_SHIFT); - - return NULL; -} - -int hl_cb_pool_init(struct hl_device *hdev) -{ - struct hl_cb *cb; - int i; - - INIT_LIST_HEAD(&hdev->cb_pool); - spin_lock_init(&hdev->cb_pool_lock); - - for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) { - cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size, - HL_KERNEL_ASID_ID); - if (cb) { - cb->is_pool = true; - list_add(&cb->pool_list, &hdev->cb_pool); - } else { - hl_cb_pool_fini(hdev); - return -ENOMEM; - } - } - - return 0; -} - -int hl_cb_pool_fini(struct hl_device *hdev) -{ - struct hl_cb *cb, *tmp; - - list_for_each_entry_safe(cb, tmp, &hdev->cb_pool, pool_list) { - list_del(&cb->pool_list); - cb_fini(hdev, cb); - } - - return 0; -} diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c deleted file mode 100644 index c605be89f764..000000000000 --- a/drivers/misc/habanalabs/command_submission.c +++ /dev/null @@ -1,1232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2016-2019 HabanaLabs, Ltd. - * All Rights Reserved. - */ - -#include -#include "habanalabs.h" - -#include -#include - -#define HL_CS_FLAGS_SIG_WAIT (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT) - -static void job_wq_completion(struct work_struct *work); -static long _hl_cs_wait_ioctl(struct hl_device *hdev, - struct hl_ctx *ctx, u64 timeout_us, u64 seq); -static void cs_do_release(struct kref *ref); - -static void hl_sob_reset(struct kref *ref) -{ - struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, - kref); - struct hl_device *hdev = hw_sob->hdev; - - hdev->asic_funcs->reset_sob(hdev, hw_sob); -} - -void hl_sob_reset_error(struct kref *ref) -{ - struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, - kref); - struct hl_device *hdev = hw_sob->hdev; - - dev_crit(hdev->dev, - "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n", - hw_sob->q_idx, hw_sob->sob_id); -} - -static const char *hl_fence_get_driver_name(struct dma_fence *fence) -{ - return "HabanaLabs"; -} - -static const char *hl_fence_get_timeline_name(struct dma_fence *fence) -{ - struct hl_cs_compl *hl_cs_compl = - container_of(fence, struct hl_cs_compl, base_fence); - - return dev_name(hl_cs_compl->hdev->dev); -} - -static bool hl_fence_enable_signaling(struct dma_fence *fence) -{ - return true; -} - -static void hl_fence_release(struct dma_fence *fence) -{ - struct hl_cs_compl *hl_cs_cmpl = - container_of(fence, struct hl_cs_compl, base_fence); - struct hl_device *hdev = hl_cs_cmpl->hdev; - - /* EBUSY means the CS was never submitted and hence we don't have - * an attached hw_sob object that we should handle here - */ - if (fence->error == -EBUSY) - goto free; - - if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || - (hl_cs_cmpl->type == CS_TYPE_WAIT)) { - - dev_dbg(hdev->dev, - "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n", - hl_cs_cmpl->cs_seq, - hl_cs_cmpl->type, - hl_cs_cmpl->hw_sob->sob_id, - hl_cs_cmpl->sob_val); - - /* - * A signal CS can get completion while the corresponding wait - * for signal CS is on its way to the PQ. The wait for signal CS - * will get stuck if the signal CS incremented the SOB to its - * max value and there are no pending (submitted) waits on this - * SOB. - * We do the following to void this situation: - * 1. The wait for signal CS must get a ref for the signal CS as - * soon as possible in cs_ioctl_signal_wait() and put it - * before being submitted to the PQ but after it incremented - * the SOB refcnt in init_signal_wait_cs(). - * 2. Signal/Wait for signal CS will decrement the SOB refcnt - * here. - * These two measures guarantee that the wait for signal CS will - * reset the SOB upon completion rather than the signal CS and - * hence the above scenario is avoided. - */ - kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); - } - -free: - kfree_rcu(hl_cs_cmpl, base_fence.rcu); -} - -static const struct dma_fence_ops hl_fence_ops = { - .get_driver_name = hl_fence_get_driver_name, - .get_timeline_name = hl_fence_get_timeline_name, - .enable_signaling = hl_fence_enable_signaling, - .release = hl_fence_release -}; - -static void cs_get(struct hl_cs *cs) -{ - kref_get(&cs->refcount); -} - -static int cs_get_unless_zero(struct hl_cs *cs) -{ - return kref_get_unless_zero(&cs->refcount); -} - -static void cs_put(struct hl_cs *cs) -{ - kref_put(&cs->refcount, cs_do_release); -} - -static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job) -{ - /* - * Patched CB is created for external queues jobs, and for H/W queues - * jobs if the user CB was allocated by driver and MMU is disabled. - */ - return (job->queue_type == QUEUE_TYPE_EXT || - (job->queue_type == QUEUE_TYPE_HW && - job->is_kernel_allocated_cb && - !hdev->mmu_enable)); -} - -/* - * cs_parser - parse the user command submission - * - * @hpriv : pointer to the private data of the fd - * @job : pointer to the job that holds the command submission info - * - * The function parses the command submission of the user. It calls the - * ASIC specific parser, which returns a list of memory blocks to send - * to the device as different command buffers - * - */ -static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) -{ - struct hl_device *hdev = hpriv->hdev; - struct hl_cs_parser parser; - int rc; - - parser.ctx_id = job->cs->ctx->asid; - parser.cs_sequence = job->cs->sequence; - parser.job_id = job->id; - - parser.hw_queue_id = job->hw_queue_id; - parser.job_userptr_list = &job->userptr_list; - parser.patched_cb = NULL; - parser.user_cb = job->user_cb; - parser.user_cb_size = job->user_cb_size; - parser.queue_type = job->queue_type; - parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; - job->patched_cb = NULL; - - rc = hdev->asic_funcs->cs_parser(hdev, &parser); - - if (is_cb_patched(hdev, job)) { - if (!rc) { - job->patched_cb = parser.patched_cb; - job->job_cb_size = parser.patched_cb_size; - job->contains_dma_pkt = parser.contains_dma_pkt; - - spin_lock(&job->patched_cb->lock); - job->patched_cb->cs_cnt++; - spin_unlock(&job->patched_cb->lock); - } - - /* - * Whether the parsing worked or not, we don't need the - * original CB anymore because it was already parsed and - * won't be accessed again for this CS - */ - spin_lock(&job->user_cb->lock); - job->user_cb->cs_cnt--; - spin_unlock(&job->user_cb->lock); - hl_cb_put(job->user_cb); - job->user_cb = NULL; - } else if (!rc) { - job->job_cb_size = job->user_cb_size; - } - - return rc; -} - -static void free_job(struct hl_device *hdev, struct hl_cs_job *job) -{ - struct hl_cs *cs = job->cs; - - if (is_cb_patched(hdev, job)) { - hl_userptr_delete_list(hdev, &job->userptr_list); - - /* - * We might arrive here from rollback and patched CB wasn't - * created, so we need to check it's not NULL - */ - if (job->patched_cb) { - spin_lock(&job->patched_cb->lock); - job->patched_cb->cs_cnt--; - spin_unlock(&job->patched_cb->lock); - - hl_cb_put(job->patched_cb); - } - } - - /* For H/W queue jobs, if a user CB was allocated by driver and MMU is - * enabled, the user CB isn't released in cs_parser() and thus should be - * released here. - */ - if (job->queue_type == QUEUE_TYPE_HW && - job->is_kernel_allocated_cb && hdev->mmu_enable) { - spin_lock(&job->user_cb->lock); - job->user_cb->cs_cnt--; - spin_unlock(&job->user_cb->lock); - - hl_cb_put(job->user_cb); - } - - /* - * This is the only place where there can be multiple threads - * modifying the list at the same time - */ - spin_lock(&cs->job_lock); - list_del(&job->cs_node); - spin_unlock(&cs->job_lock); - - hl_debugfs_remove_job(hdev, job); - - if (job->queue_type == QUEUE_TYPE_EXT || - job->queue_type == QUEUE_TYPE_HW) - cs_put(cs); - - kfree(job); -} - -static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx) -{ - hdev->aggregated_cs_counters.device_in_reset_drop_cnt += - ctx->cs_counters.device_in_reset_drop_cnt; - hdev->aggregated_cs_counters.out_of_mem_drop_cnt += - ctx->cs_counters.out_of_mem_drop_cnt; - hdev->aggregated_cs_counters.parsing_drop_cnt += - ctx->cs_counters.parsing_drop_cnt; - hdev->aggregated_cs_counters.queue_full_drop_cnt += - ctx->cs_counters.queue_full_drop_cnt; -} - -static void cs_do_release(struct kref *ref) -{ - struct hl_cs *cs = container_of(ref, struct hl_cs, - refcount); - struct hl_device *hdev = cs->ctx->hdev; - struct hl_cs_job *job, *tmp; - - cs->completed = true; - - /* - * Although if we reached here it means that all external jobs have - * finished, because each one of them took refcnt to CS, we still - * need to go over the internal jobs and free them. Otherwise, we - * will have leaked memory and what's worse, the CS object (and - * potentially the CTX object) could be released, while the JOB - * still holds a pointer to them (but no reference). - */ - list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) - free_job(hdev, job); - - /* We also need to update CI for internal queues */ - if (cs->submitted) { - hdev->asic_funcs->hw_queues_lock(hdev); - - hdev->cs_active_cnt--; - if (!hdev->cs_active_cnt) { - struct hl_device_idle_busy_ts *ts; - - ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++]; - ts->busy_to_idle_ts = ktime_get(); - - if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE) - hdev->idle_busy_ts_idx = 0; - } else if (hdev->cs_active_cnt < 0) { - dev_crit(hdev->dev, "CS active cnt %d is negative\n", - hdev->cs_active_cnt); - } - - hdev->asic_funcs->hw_queues_unlock(hdev); - - hl_int_hw_queue_update_ci(cs); - - spin_lock(&hdev->hw_queues_mirror_lock); - /* remove CS from hw_queues mirror list */ - list_del_init(&cs->mirror_node); - spin_unlock(&hdev->hw_queues_mirror_lock); - - /* - * Don't cancel TDR in case this CS was timedout because we - * might be running from the TDR context - */ - if ((!cs->timedout) && - (hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT)) { - struct hl_cs *next; - - if (cs->tdr_active) - cancel_delayed_work_sync(&cs->work_tdr); - - spin_lock(&hdev->hw_queues_mirror_lock); - - /* queue TDR for next CS */ - next = list_first_entry_or_null( - &hdev->hw_queues_mirror_list, - struct hl_cs, mirror_node); - - if ((next) && (!next->tdr_active)) { - next->tdr_active = true; - schedule_delayed_work(&next->work_tdr, - hdev->timeout_jiffies); - } - - spin_unlock(&hdev->hw_queues_mirror_lock); - } - } else if (cs->type == CS_TYPE_WAIT) { - /* - * In case the wait for signal CS was submitted, the put occurs - * in init_signal_wait_cs() right before hanging on the PQ. - */ - dma_fence_put(cs->signal_fence); - } - - /* - * Must be called before hl_ctx_put because inside we use ctx to get - * the device - */ - hl_debugfs_remove_cs(cs); - - hl_ctx_put(cs->ctx); - - /* We need to mark an error for not submitted because in that case - * the dma fence release flow is different. Mainly, we don't need - * to handle hw_sob for signal/wait - */ - if (cs->timedout) - dma_fence_set_error(cs->fence, -ETIMEDOUT); - else if (cs->aborted) - dma_fence_set_error(cs->fence, -EIO); - else if (!cs->submitted) - dma_fence_set_error(cs->fence, -EBUSY); - - dma_fence_signal(cs->fence); - dma_fence_put(cs->fence); - - cs_counters_aggregate(hdev, cs->ctx); - - kfree(cs->jobs_in_queue_cnt); - kfree(cs); -} - -static void cs_timedout(struct work_struct *work) -{ - struct hl_device *hdev; - int ctx_asid, rc; - struct hl_cs *cs = container_of(work, struct hl_cs, - work_tdr.work); - rc = cs_get_unless_zero(cs); - if (!rc) - return; - - if ((!cs->submitted) || (cs->completed)) { - cs_put(cs); - return; - } - - /* Mark the CS is timed out so we won't try to cancel its TDR */ - cs->timedout = true; - - hdev = cs->ctx->hdev; - ctx_asid = cs->ctx->asid; - - dev_err(hdev->dev, - "Command submission %llu has not finished in time!\n", - cs->sequence); - - cs_put(cs); - - if (hdev->reset_on_lockup) - hl_device_reset(hdev, false, false); -} - -static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, - enum hl_cs_type cs_type, struct hl_cs **cs_new) -{ - struct hl_cs_compl *cs_cmpl; - struct dma_fence *other = NULL; - struct hl_cs *cs; - int rc; - - cs = kzalloc(sizeof(*cs), GFP_ATOMIC); - if (!cs) - return -ENOMEM; - - cs->ctx = ctx; - cs->submitted = false; - cs->completed = false; - cs->type = cs_type; - INIT_LIST_HEAD(&cs->job_list); - INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); - kref_init(&cs->refcount); - spin_lock_init(&cs->job_lock); - - cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC); - if (!cs_cmpl) { - rc = -ENOMEM; - goto free_cs; - } - - cs_cmpl->hdev = hdev; - cs_cmpl->type = cs->type; - spin_lock_init(&cs_cmpl->lock); - cs->fence = &cs_cmpl->base_fence; - - spin_lock(&ctx->cs_lock); - - cs_cmpl->cs_seq = ctx->cs_sequence; - other = ctx->cs_pending[cs_cmpl->cs_seq & - (hdev->asic_prop.max_pending_cs - 1)]; - if ((other) && (!dma_fence_is_signaled(other))) { - dev_dbg(hdev->dev, - "Rejecting CS because of too many in-flights CS\n"); - rc = -EAGAIN; - goto free_fence; - } - - cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, - sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); - if (!cs->jobs_in_queue_cnt) { - rc = -ENOMEM; - goto free_fence; - } - - dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock, - ctx->asid, ctx->cs_sequence); - - cs->sequence = cs_cmpl->cs_seq; - - ctx->cs_pending[cs_cmpl->cs_seq & - (hdev->asic_prop.max_pending_cs - 1)] = - &cs_cmpl->base_fence; - ctx->cs_sequence++; - - dma_fence_get(&cs_cmpl->base_fence); - - dma_fence_put(other); - - spin_unlock(&ctx->cs_lock); - - *cs_new = cs; - - return 0; - -free_fence: - spin_unlock(&ctx->cs_lock); - kfree(cs_cmpl); -free_cs: - kfree(cs); - return rc; -} - -static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) -{ - struct hl_cs_job *job, *tmp; - - list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) - free_job(hdev, job); -} - -void hl_cs_rollback_all(struct hl_device *hdev) -{ - int i; - struct hl_cs *cs, *tmp; - - /* flush all completions */ - for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) - flush_workqueue(hdev->cq_wq[i]); - - /* Make sure we don't have leftovers in the H/W queues mirror list */ - list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list, - mirror_node) { - cs_get(cs); - cs->aborted = true; - dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n", - cs->ctx->asid, cs->sequence); - cs_rollback(hdev, cs); - cs_put(cs); - } -} - -static void job_wq_completion(struct work_struct *work) -{ - struct hl_cs_job *job = container_of(work, struct hl_cs_job, - finish_work); - struct hl_cs *cs = job->cs; - struct hl_device *hdev = cs->ctx->hdev; - - /* job is no longer needed */ - free_job(hdev, job); -} - -static int validate_queue_index(struct hl_device *hdev, - struct hl_cs_chunk *chunk, - enum hl_queue_type *queue_type, - bool *is_kernel_allocated_cb) -{ - struct asic_fixed_properties *asic = &hdev->asic_prop; - struct hw_queue_properties *hw_queue_prop; - - /* This must be checked here to prevent out-of-bounds access to - * hw_queues_props array - */ - if (chunk->queue_index >= asic->max_queues) { - dev_err(hdev->dev, "Queue index %d is invalid\n", - chunk->queue_index); - return -EINVAL; - } - - hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; - - if (hw_queue_prop->type == QUEUE_TYPE_NA) { - dev_err(hdev->dev, "Queue index %d is invalid\n", - chunk->queue_index); - return -EINVAL; - } - - if (hw_queue_prop->driver_only) { - dev_err(hdev->dev, - "Queue index %d is restricted for the kernel driver\n", - chunk->queue_index); - return -EINVAL; - } - - *queue_type = hw_queue_prop->type; - *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb; - - return 0; -} - -static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev, - struct hl_cb_mgr *cb_mgr, - struct hl_cs_chunk *chunk) -{ - struct hl_cb *cb; - u32 cb_handle; - - cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT); - - cb = hl_cb_get(hdev, cb_mgr, cb_handle); - if (!cb) { - dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle); - return NULL; - } - - if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) { - dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size); - goto release_cb; - } - - spin_lock(&cb->lock); - cb->cs_cnt++; - spin_unlock(&cb->lock); - - return cb; - -release_cb: - hl_cb_put(cb); - return NULL; -} - -struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, - enum hl_queue_type queue_type, bool is_kernel_allocated_cb) -{ - struct hl_cs_job *job; - - job = kzalloc(sizeof(*job), GFP_ATOMIC); - if (!job) - return NULL; - - job->queue_type = queue_type; - job->is_kernel_allocated_cb = is_kernel_allocated_cb; - - if (is_cb_patched(hdev, job)) - INIT_LIST_HEAD(&job->userptr_list); - - if (job->queue_type == QUEUE_TYPE_EXT) - INIT_WORK(&job->finish_work, job_wq_completion); - - return job; -} - -static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, - u32 num_chunks, u64 *cs_seq) -{ - struct hl_device *hdev = hpriv->hdev; - struct hl_cs_chunk *cs_chunk_array; - struct hl_cs_job *job; - struct hl_cs *cs; - struct hl_cb *cb; - bool int_queues_only = true; - u32 size_to_copy; - int rc, i; - - *cs_seq = ULLONG_MAX; - - if (num_chunks > HL_MAX_JOBS_PER_CS) { - dev_err(hdev->dev, - "Number of chunks can NOT be larger than %d\n", - HL_MAX_JOBS_PER_CS); - rc = -EINVAL; - goto out; - } - - cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array), - GFP_ATOMIC); - if (!cs_chunk_array) { - rc = -ENOMEM; - goto out; - } - - size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); - if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) { - dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); - rc = -EFAULT; - goto free_cs_chunk_array; - } - - /* increment refcnt for context */ - hl_ctx_get(hdev, hpriv->ctx); - - rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs); - if (rc) { - hl_ctx_put(hpriv->ctx); - goto free_cs_chunk_array; - } - - *cs_seq = cs->sequence; - - hl_debugfs_add_cs(cs); - - /* Validate ALL the CS chunks before submitting the CS */ - for (i = 0 ; i < num_chunks ; i++) { - struct hl_cs_chunk *chunk = &cs_chunk_array[i]; - enum hl_queue_type queue_type; - bool is_kernel_allocated_cb; - - rc = validate_queue_index(hdev, chunk, &queue_type, - &is_kernel_allocated_cb); - if (rc) { - hpriv->ctx->cs_counters.parsing_drop_cnt++; - goto free_cs_object; - } - - if (is_kernel_allocated_cb) { - cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); - if (!cb) { - hpriv->ctx->cs_counters.parsing_drop_cnt++; - rc = -EINVAL; - goto free_cs_object; - } - } else { - cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; - } - - if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW) - int_queues_only = false; - - job = hl_cs_allocate_job(hdev, queue_type, - is_kernel_allocated_cb); - if (!job) { - hpriv->ctx->cs_counters.out_of_mem_drop_cnt++; - dev_err(hdev->dev, "Failed to allocate a new job\n"); - rc = -ENOMEM; - if (is_kernel_allocated_cb) - goto release_cb; - else - goto free_cs_object; - } - - job->id = i + 1; - job->cs = cs; - job->user_cb = cb; - job->user_cb_size = chunk->cb_size; - job->hw_queue_id = chunk->queue_index; - - cs->jobs_in_queue_cnt[job->hw_queue_id]++; - - list_add_tail(&job->cs_node, &cs->job_list); - - /* - * Increment CS reference. When CS reference is 0, CS is - * done and can be signaled to user and free all its resources - * Only increment for JOB on external or H/W queues, because - * only for those JOBs we get completion - */ - if (job->queue_type == QUEUE_TYPE_EXT || - job->queue_type == QUEUE_TYPE_HW) - cs_get(cs); - - hl_debugfs_add_job(hdev, job); - - rc = cs_parser(hpriv, job); - if (rc) { - hpriv->ctx->cs_counters.parsing_drop_cnt++; - dev_err(hdev->dev, - "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", - cs->ctx->asid, cs->sequence, job->id, rc); - goto free_cs_object; - } - } - - if (int_queues_only) { - hpriv->ctx->cs_counters.parsing_drop_cnt++; - dev_err(hdev->dev, - "Reject CS %d.%llu because only internal queues jobs are present\n", - cs->ctx->asid, cs->sequence); - rc = -EINVAL; - goto free_cs_object; - } - - rc = hl_hw_queue_schedule_cs(cs); - if (rc) { - if (rc != -EAGAIN) - dev_err(hdev->dev, - "Failed to submit CS %d.%llu to H/W queues, error %d\n", - cs->ctx->asid, cs->sequence, rc); - goto free_cs_object; - } - - rc = HL_CS_STATUS_SUCCESS; - goto put_cs; - -release_cb: - spin_lock(&cb->lock); - cb->cs_cnt--; - spin_unlock(&cb->lock); - hl_cb_put(cb); -free_cs_object: - cs_rollback(hdev, cs); - *cs_seq = ULLONG_MAX; - /* The path below is both for good and erroneous exits */ -put_cs: - /* We finished with the CS in this function, so put the ref */ - cs_put(cs); -free_cs_chunk_array: - kfree(cs_chunk_array); -out: - return rc; -} - -static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, - void __user *chunks, u32 num_chunks, - u64 *cs_seq) -{ - struct hl_device *hdev = hpriv->hdev; - struct hl_ctx *ctx = hpriv->ctx; - struct hl_cs_chunk *cs_chunk_array, *chunk; - struct hw_queue_properties *hw_queue_prop; - struct dma_fence *sig_fence = NULL; - struct hl_cs_job *job; - struct hl_cs *cs; - struct hl_cb *cb; - enum hl_queue_type q_type; - u64 *signal_seq_arr = NULL, signal_seq; - u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size; - int rc; - - *cs_seq = ULLONG_MAX; - - if (num_chunks > HL_MAX_JOBS_PER_CS) { - dev_err(hdev->dev, - "Number of chunks can NOT be larger than %d\n", - HL_MAX_JOBS_PER_CS); - rc = -EINVAL; - goto out; - } - - cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array), - GFP_ATOMIC); - if (!cs_chunk_array) { - rc = -ENOMEM; - goto out; - } - - size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); - if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) { - dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); - rc = -EFAULT; - goto free_cs_chunk_array; - } - - /* currently it is guaranteed to have only one chunk */ - chunk = &cs_chunk_array[0]; - q_idx = chunk->queue_index; - hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; - q_type = hw_queue_prop->type; - - if ((q_idx >= hdev->asic_prop.max_queues) || - (!hw_queue_prop->supports_sync_stream)) { - dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx); - rc = -EINVAL; - goto free_cs_chunk_array; - } - - if (cs_type == CS_TYPE_WAIT) { - struct hl_cs_compl *sig_waitcs_cmpl; - - signal_seq_arr_len = chunk->num_signal_seq_arr; - - /* currently only one signal seq is supported */ - if (signal_seq_arr_len != 1) { - dev_err(hdev->dev, - "Wait for signal CS supports only one signal CS seq\n"); - rc = -EINVAL; - goto free_cs_chunk_array; - } - - signal_seq_arr = kmalloc_array(signal_seq_arr_len, - sizeof(*signal_seq_arr), - GFP_ATOMIC); - if (!signal_seq_arr) { - rc = -ENOMEM; - goto free_cs_chunk_array; - } - - size_to_copy = chunk->num_signal_seq_arr * - sizeof(*signal_seq_arr); - if (copy_from_user(signal_seq_arr, - u64_to_user_ptr(chunk->signal_seq_arr), - size_to_copy)) { - dev_err(hdev->dev, - "Failed to copy signal seq array from user\n"); - rc = -EFAULT; - goto free_signal_seq_array; - } - - /* currently it is guaranteed to have only one signal seq */ - signal_seq = signal_seq_arr[0]; - sig_fence = hl_ctx_get_fence(ctx, signal_seq); - if (IS_ERR(sig_fence)) { - dev_err(hdev->dev, - "Failed to get signal CS with seq 0x%llx\n", - signal_seq); - rc = PTR_ERR(sig_fence); - goto free_signal_seq_array; - } - - if (!sig_fence) { - /* signal CS already finished */ - rc = 0; - goto free_signal_seq_array; - } - - sig_waitcs_cmpl = - container_of(sig_fence, struct hl_cs_compl, base_fence); - - if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) { - dev_err(hdev->dev, - "CS seq 0x%llx is not of a signal CS\n", - signal_seq); - dma_fence_put(sig_fence); - rc = -EINVAL; - goto free_signal_seq_array; - } - - if (dma_fence_is_signaled(sig_fence)) { - /* signal CS already finished */ - dma_fence_put(sig_fence); - rc = 0; - goto free_signal_seq_array; - } - } - - /* increment refcnt for context */ - hl_ctx_get(hdev, ctx); - - rc = allocate_cs(hdev, ctx, cs_type, &cs); - if (rc) { - if (cs_type == CS_TYPE_WAIT) - dma_fence_put(sig_fence); - hl_ctx_put(ctx); - goto free_signal_seq_array; - } - - /* - * Save the signal CS fence for later initialization right before - * hanging the wait CS on the queue. - */ - if (cs->type == CS_TYPE_WAIT) - cs->signal_fence = sig_fence; - - hl_debugfs_add_cs(cs); - - *cs_seq = cs->sequence; - - job = hl_cs_allocate_job(hdev, q_type, true); - if (!job) { - ctx->cs_counters.out_of_mem_drop_cnt++; - dev_err(hdev->dev, "Failed to allocate a new job\n"); - rc = -ENOMEM; - goto put_cs; - } - - cb = hl_cb_kernel_create(hdev, PAGE_SIZE); - if (!cb) { - ctx->cs_counters.out_of_mem_drop_cnt++; - kfree(job); - rc = -EFAULT; - goto put_cs; - } - - if (cs->type == CS_TYPE_WAIT) - cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); - else - cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); - - job->id = 0; - job->cs = cs; - job->user_cb = cb; - job->user_cb->cs_cnt++; - job->user_cb_size = cb_size; - job->hw_queue_id = q_idx; - - /* - * No need in parsing, user CB is the patched CB. - * We call hl_cb_destroy() out of two reasons - we don't need the CB in - * the CB idr anymore and to decrement its refcount as it was - * incremented inside hl_cb_kernel_create(). - */ - job->patched_cb = job->user_cb; - job->job_cb_size = job->user_cb_size; - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); - - cs->jobs_in_queue_cnt[job->hw_queue_id]++; - - list_add_tail(&job->cs_node, &cs->job_list); - - /* increment refcount as for external queues we get completion */ - cs_get(cs); - - hl_debugfs_add_job(hdev, job); - - rc = hl_hw_queue_schedule_cs(cs); - if (rc) { - if (rc != -EAGAIN) - dev_err(hdev->dev, - "Failed to submit CS %d.%llu to H/W queues, error %d\n", - ctx->asid, cs->sequence, rc); - goto free_cs_object; - } - - rc = HL_CS_STATUS_SUCCESS; - goto put_cs; - -free_cs_object: - cs_rollback(hdev, cs); - *cs_seq = ULLONG_MAX; - /* The path below is both for good and erroneous exits */ -put_cs: - /* We finished with the CS in this function, so put the ref */ - cs_put(cs); -free_signal_seq_array: - if (cs_type == CS_TYPE_WAIT) - kfree(signal_seq_arr); -free_cs_chunk_array: - kfree(cs_chunk_array); -out: - return rc; -} - -int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) -{ - struct hl_device *hdev = hpriv->hdev; - union hl_cs_args *args = data; - struct hl_ctx *ctx = hpriv->ctx; - void __user *chunks_execute, *chunks_restore; - enum hl_cs_type cs_type; - u32 num_chunks_execute, num_chunks_restore, sig_wait_flags; - u64 cs_seq = ULONG_MAX; - int rc, do_ctx_switch; - bool need_soft_reset = false; - - if (hl_device_disabled_or_in_reset(hdev)) { - dev_warn_ratelimited(hdev->dev, - "Device is %s. Can't submit new CS\n", - atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); - rc = -EBUSY; - goto out; - } - - sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT; - - if (unlikely(sig_wait_flags == HL_CS_FLAGS_SIG_WAIT)) { - dev_err(hdev->dev, - "Signal and wait CS flags are mutually exclusive, context %d\n", - ctx->asid); - rc = -EINVAL; - goto out; - } - - if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) && - (!hdev->supports_sync_stream))) { - dev_err(hdev->dev, "Sync stream CS is not supported\n"); - rc = -EINVAL; - goto out; - } - - if (args->in.cs_flags & HL_CS_FLAGS_SIGNAL) - cs_type = CS_TYPE_SIGNAL; - else if (args->in.cs_flags & HL_CS_FLAGS_WAIT) - cs_type = CS_TYPE_WAIT; - else - cs_type = CS_TYPE_DEFAULT; - - chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute; - num_chunks_execute = args->in.num_chunks_execute; - - if (cs_type == CS_TYPE_DEFAULT) { - if (!num_chunks_execute) { - dev_err(hdev->dev, - "Got execute CS with 0 chunks, context %d\n", - ctx->asid); - rc = -EINVAL; - goto out; - } - } else if (num_chunks_execute != 1) { - dev_err(hdev->dev, - "Sync stream CS mandates one chunk only, context %d\n", - ctx->asid); - rc = -EINVAL; - goto out; - } - - do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); - - if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { - long ret; - - chunks_restore = - (void __user *) (uintptr_t) args->in.chunks_restore; - num_chunks_restore = args->in.num_chunks_restore; - - mutex_lock(&hpriv->restore_phase_mutex); - - if (do_ctx_switch) { - rc = hdev->asic_funcs->context_switch(hdev, ctx->asid); - if (rc) { - dev_err_ratelimited(hdev->dev, - "Failed to switch to context %d, rejecting CS! %d\n", - ctx->asid, rc); - /* - * If we timedout, or if the device is not IDLE - * while we want to do context-switch (-EBUSY), - * we need to soft-reset because QMAN is - * probably stuck. However, we can't call to - * reset here directly because of deadlock, so - * need to do it at the very end of this - * function - */ - if ((rc == -ETIMEDOUT) || (rc == -EBUSY)) - need_soft_reset = true; - mutex_unlock(&hpriv->restore_phase_mutex); - goto out; - } - } - - hdev->asic_funcs->restore_phase_topology(hdev); - - if (!num_chunks_restore) { - dev_dbg(hdev->dev, - "Need to run restore phase but restore CS is empty\n"); - rc = 0; - } else { - rc = cs_ioctl_default(hpriv, chunks_restore, - num_chunks_restore, &cs_seq); - } - - mutex_unlock(&hpriv->restore_phase_mutex); - - if (rc) { - dev_err(hdev->dev, - "Failed to submit restore CS for context %d (%d)\n", - ctx->asid, rc); - goto out; - } - - /* Need to wait for restore completion before execution phase */ - if (num_chunks_restore) { - ret = _hl_cs_wait_ioctl(hdev, ctx, - jiffies_to_usecs(hdev->timeout_jiffies), - cs_seq); - if (ret <= 0) { - dev_err(hdev->dev, - "Restore CS for context %d failed to complete %ld\n", - ctx->asid, ret); - rc = -ENOEXEC; - goto out; - } - } - - ctx->thread_ctx_switch_wait_token = 1; - } else if (!ctx->thread_ctx_switch_wait_token) { - u32 tmp; - - rc = hl_poll_timeout_memory(hdev, - &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), - 100, jiffies_to_usecs(hdev->timeout_jiffies), false); - - if (rc == -ETIMEDOUT) { - dev_err(hdev->dev, - "context switch phase timeout (%d)\n", tmp); - goto out; - } - } - - if (cs_type == CS_TYPE_DEFAULT) - rc = cs_ioctl_default(hpriv, chunks_execute, num_chunks_execute, - &cs_seq); - else - rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks_execute, - num_chunks_execute, &cs_seq); - -out: - if (rc != -EAGAIN) { - memset(args, 0, sizeof(*args)); - args->out.status = rc; - args->out.seq = cs_seq; - } - - if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset)) - hl_device_reset(hdev, false, false); - - return rc; -} - -static long _hl_cs_wait_ioctl(struct hl_device *hdev, - struct hl_ctx *ctx, u64 timeout_us, u64 seq) -{ - struct dma_fence *fence; - unsigned long timeout; - long rc; - - if (timeout_us == MAX_SCHEDULE_TIMEOUT) - timeout = timeout_us; - else - timeout = usecs_to_jiffies(timeout_us); - - hl_ctx_get(hdev, ctx); - - fence = hl_ctx_get_fence(ctx, seq); - if (IS_ERR(fence)) { - rc = PTR_ERR(fence); - if (rc == -EINVAL) - dev_notice_ratelimited(hdev->dev, - "Can't wait on CS %llu because current CS is at seq %llu\n", - seq, ctx->cs_sequence); - } else if (fence) { - rc = dma_fence_wait_timeout(fence, true, timeout); - if (fence->error == -ETIMEDOUT) - rc = -ETIMEDOUT; - else if (fence->error == -EIO) - rc = -EIO; - dma_fence_put(fence); - } else { - dev_dbg(hdev->dev, - "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", - seq, ctx->cs_sequence); - rc = 1; - } - - hl_ctx_put(ctx); - - return rc; -} - -int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) -{ - struct hl_device *hdev = hpriv->hdev; - union hl_wait_cs_args *args = data; - u64 seq = args->in.seq; - long rc; - - rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq); - - memset(args, 0, sizeof(*args)); - - if (rc < 0) { - if (rc == -ERESTARTSYS) { - dev_err_ratelimited(hdev->dev, - "user process got signal while waiting for CS handle %llu\n", - seq); - args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED; - rc = -EINTR; - } else if (rc == -ETIMEDOUT) { - dev_err_ratelimited(hdev->dev, - "CS %llu has timed-out while user process is waiting for it\n", - seq); - args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT; - } else if (rc == -EIO) { - dev_err_ratelimited(hdev->dev, - "CS %llu has been aborted while user process is waiting for it\n", - seq); - args->out.status = HL_WAIT_CS_STATUS_ABORTED; - } - return rc; - } - - if (rc == 0) - args->out.status = HL_WAIT_CS_STATUS_BUSY; - else - args->out.status = HL_WAIT_CS_STATUS_COMPLETED; - - return 0; -} diff --git a/drivers/misc/habanalabs/common/Makefile b/drivers/misc/habanalabs/common/Makefile new file mode 100644 index 000000000000..97d03b5c8683 --- /dev/null +++ b/drivers/misc/habanalabs/common/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +subdir-ccflags-y += -I$(src)/common + +HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \ + common/asid.o common/habanalabs_ioctl.o \ + common/command_buffer.o common/hw_queue.o common/irq.o \ + common/sysfs.o common/hwmon.o common/memory.o \ + common/command_submission.o common/mmu.o common/firmware_if.o \ + common/pci.o diff --git a/drivers/misc/habanalabs/common/asid.c b/drivers/misc/habanalabs/common/asid.c new file mode 100644 index 000000000000..a2fdf31cf27c --- /dev/null +++ b/drivers/misc/habanalabs/common/asid.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" + +#include + +int hl_asid_init(struct hl_device *hdev) +{ + hdev->asid_bitmap = kcalloc(BITS_TO_LONGS(hdev->asic_prop.max_asid), + sizeof(*hdev->asid_bitmap), GFP_KERNEL); + if (!hdev->asid_bitmap) + return -ENOMEM; + + mutex_init(&hdev->asid_mutex); + + /* ASID 0 is reserved for the kernel driver and device CPU */ + set_bit(0, hdev->asid_bitmap); + + return 0; +} + +void hl_asid_fini(struct hl_device *hdev) +{ + mutex_destroy(&hdev->asid_mutex); + kfree(hdev->asid_bitmap); +} + +unsigned long hl_asid_alloc(struct hl_device *hdev) +{ + unsigned long found; + + mutex_lock(&hdev->asid_mutex); + + found = find_first_zero_bit(hdev->asid_bitmap, + hdev->asic_prop.max_asid); + if (found == hdev->asic_prop.max_asid) + found = 0; + else + set_bit(found, hdev->asid_bitmap); + + mutex_unlock(&hdev->asid_mutex); + + return found; +} + +void hl_asid_free(struct hl_device *hdev, unsigned long asid) +{ + if (WARN((asid == 0 || asid >= hdev->asic_prop.max_asid), + "Invalid ASID %lu", asid)) + return; + clear_bit(asid, hdev->asid_bitmap); +} diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c new file mode 100644 index 000000000000..02d13f71b1df --- /dev/null +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -0,0 +1,463 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include +#include "habanalabs.h" + +#include +#include + +static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) +{ + hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size, + (void *) (uintptr_t) cb->kernel_address, + cb->bus_address); + kfree(cb); +} + +static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb) +{ + if (cb->is_pool) { + spin_lock(&hdev->cb_pool_lock); + list_add(&cb->pool_list, &hdev->cb_pool); + spin_unlock(&hdev->cb_pool_lock); + } else { + cb_fini(hdev, cb); + } +} + +static void cb_release(struct kref *ref) +{ + struct hl_device *hdev; + struct hl_cb *cb; + + cb = container_of(ref, struct hl_cb, refcount); + hdev = cb->hdev; + + hl_debugfs_remove_cb(cb); + + cb_do_release(hdev, cb); +} + +static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, + int ctx_id) +{ + struct hl_cb *cb; + void *p; + + /* + * We use of GFP_ATOMIC here because this function can be called from + * the latency-sensitive code path for command submission. Due to H/W + * limitations in some of the ASICs, the kernel must copy the user CB + * that is designated for an external queue and actually enqueue + * the kernel's copy. Hence, we must never sleep in this code section + * and must use GFP_ATOMIC for all memory allocations. + */ + if (ctx_id == HL_KERNEL_ASID_ID) + cb = kzalloc(sizeof(*cb), GFP_ATOMIC); + else + cb = kzalloc(sizeof(*cb), GFP_KERNEL); + + if (!cb) + return NULL; + + if (ctx_id == HL_KERNEL_ASID_ID) + p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, + &cb->bus_address, GFP_ATOMIC); + else + p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, + &cb->bus_address, + GFP_USER | __GFP_ZERO); + if (!p) { + dev_err(hdev->dev, + "failed to allocate %d of dma memory for CB\n", + cb_size); + kfree(cb); + return NULL; + } + + cb->kernel_address = (u64) (uintptr_t) p; + cb->size = cb_size; + + return cb; +} + +int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, + u32 cb_size, u64 *handle, int ctx_id) +{ + struct hl_cb *cb; + bool alloc_new_cb = true; + int rc; + + /* + * Can't use generic function to check this because of special case + * where we create a CB as part of the reset process + */ + if ((hdev->disabled) || ((atomic_read(&hdev->in_reset)) && + (ctx_id != HL_KERNEL_ASID_ID))) { + dev_warn_ratelimited(hdev->dev, + "Device is disabled or in reset. Can't create new CBs\n"); + rc = -EBUSY; + goto out_err; + } + + if (cb_size > SZ_2M) { + dev_err(hdev->dev, "CB size %d must be less than %d\n", + cb_size, SZ_2M); + rc = -EINVAL; + goto out_err; + } + + /* Minimum allocation must be PAGE SIZE */ + if (cb_size < PAGE_SIZE) + cb_size = PAGE_SIZE; + + if (ctx_id == HL_KERNEL_ASID_ID && + cb_size <= hdev->asic_prop.cb_pool_cb_size) { + + spin_lock(&hdev->cb_pool_lock); + if (!list_empty(&hdev->cb_pool)) { + cb = list_first_entry(&hdev->cb_pool, typeof(*cb), + pool_list); + list_del(&cb->pool_list); + spin_unlock(&hdev->cb_pool_lock); + alloc_new_cb = false; + } else { + spin_unlock(&hdev->cb_pool_lock); + dev_dbg(hdev->dev, "CB pool is empty\n"); + } + } + + if (alloc_new_cb) { + cb = hl_cb_alloc(hdev, cb_size, ctx_id); + if (!cb) { + rc = -ENOMEM; + goto out_err; + } + } + + cb->hdev = hdev; + cb->ctx_id = ctx_id; + + spin_lock(&mgr->cb_lock); + rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC); + spin_unlock(&mgr->cb_lock); + + if (rc < 0) { + dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n"); + goto release_cb; + } + + cb->id = rc; + + kref_init(&cb->refcount); + spin_lock_init(&cb->lock); + + /* + * idr is 32-bit so we can safely OR it with a mask that is above + * 32 bit + */ + *handle = cb->id | HL_MMAP_CB_MASK; + *handle <<= PAGE_SHIFT; + + hl_debugfs_add_cb(cb); + + return 0; + +release_cb: + cb_do_release(hdev, cb); +out_err: + *handle = 0; + + return rc; +} + +int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle) +{ + struct hl_cb *cb; + u32 handle; + int rc = 0; + + /* + * handle was given to user to do mmap, I need to shift it back to + * how the idr module gave it to me + */ + cb_handle >>= PAGE_SHIFT; + handle = (u32) cb_handle; + + spin_lock(&mgr->cb_lock); + + cb = idr_find(&mgr->cb_handles, handle); + if (cb) { + idr_remove(&mgr->cb_handles, handle); + spin_unlock(&mgr->cb_lock); + kref_put(&cb->refcount, cb_release); + } else { + spin_unlock(&mgr->cb_lock); + dev_err(hdev->dev, + "CB destroy failed, no match to handle 0x%x\n", handle); + rc = -EINVAL; + } + + return rc; +} + +int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) +{ + union hl_cb_args *args = data; + struct hl_device *hdev = hpriv->hdev; + u64 handle = 0; + int rc; + + if (hl_device_disabled_or_in_reset(hdev)) { + dev_warn_ratelimited(hdev->dev, + "Device is %s. Can't execute CB IOCTL\n", + atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); + return -EBUSY; + } + + switch (args->in.op) { + case HL_CB_OP_CREATE: + if (args->in.cb_size > HL_MAX_CB_SIZE) { + dev_err(hdev->dev, + "User requested CB size %d must be less than %d\n", + args->in.cb_size, HL_MAX_CB_SIZE); + rc = -EINVAL; + } else { + rc = hl_cb_create(hdev, &hpriv->cb_mgr, + args->in.cb_size, &handle, + hpriv->ctx->asid); + } + + memset(args, 0, sizeof(*args)); + args->out.cb_handle = handle; + break; + + case HL_CB_OP_DESTROY: + rc = hl_cb_destroy(hdev, &hpriv->cb_mgr, + args->in.cb_handle); + break; + + default: + rc = -ENOTTY; + break; + } + + return rc; +} + +static void cb_vm_close(struct vm_area_struct *vma) +{ + struct hl_cb *cb = (struct hl_cb *) vma->vm_private_data; + long new_mmap_size; + + new_mmap_size = cb->mmap_size - (vma->vm_end - vma->vm_start); + + if (new_mmap_size > 0) { + cb->mmap_size = new_mmap_size; + return; + } + + spin_lock(&cb->lock); + cb->mmap = false; + spin_unlock(&cb->lock); + + hl_cb_put(cb); + vma->vm_private_data = NULL; +} + +static const struct vm_operations_struct cb_vm_ops = { + .close = cb_vm_close +}; + +int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_cb *cb; + phys_addr_t address; + u32 handle; + int rc; + + handle = vma->vm_pgoff; + + /* reference was taken here */ + cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle); + if (!cb) { + dev_err(hdev->dev, + "CB mmap failed, no match to handle 0x%x\n", handle); + return -EINVAL; + } + + /* Validation check */ + if ((vma->vm_end - vma->vm_start) != ALIGN(cb->size, PAGE_SIZE)) { + dev_err(hdev->dev, + "CB mmap failed, mmap size 0x%lx != 0x%x cb size\n", + vma->vm_end - vma->vm_start, cb->size); + rc = -EINVAL; + goto put_cb; + } + + spin_lock(&cb->lock); + + if (cb->mmap) { + dev_err(hdev->dev, + "CB mmap failed, CB already mmaped to user\n"); + rc = -EINVAL; + goto release_lock; + } + + cb->mmap = true; + + spin_unlock(&cb->lock); + + vma->vm_ops = &cb_vm_ops; + + /* + * Note: We're transferring the cb reference to + * vma->vm_private_data here. + */ + + vma->vm_private_data = cb; + + /* Calculate address for CB */ + address = virt_to_phys((void *) (uintptr_t) cb->kernel_address); + + rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address, + address, cb->size); + + if (rc) { + spin_lock(&cb->lock); + cb->mmap = false; + goto release_lock; + } + + cb->mmap_size = cb->size; + + return 0; + +release_lock: + spin_unlock(&cb->lock); +put_cb: + hl_cb_put(cb); + return rc; +} + +struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr, + u32 handle) +{ + struct hl_cb *cb; + + spin_lock(&mgr->cb_lock); + cb = idr_find(&mgr->cb_handles, handle); + + if (!cb) { + spin_unlock(&mgr->cb_lock); + dev_warn(hdev->dev, + "CB get failed, no match to handle 0x%x\n", handle); + return NULL; + } + + kref_get(&cb->refcount); + + spin_unlock(&mgr->cb_lock); + + return cb; + +} + +void hl_cb_put(struct hl_cb *cb) +{ + kref_put(&cb->refcount, cb_release); +} + +void hl_cb_mgr_init(struct hl_cb_mgr *mgr) +{ + spin_lock_init(&mgr->cb_lock); + idr_init(&mgr->cb_handles); +} + +void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr) +{ + struct hl_cb *cb; + struct idr *idp; + u32 id; + + idp = &mgr->cb_handles; + + idr_for_each_entry(idp, cb, id) { + if (kref_put(&cb->refcount, cb_release) != 1) + dev_err(hdev->dev, + "CB %d for CTX ID %d is still alive\n", + id, cb->ctx_id); + } + + idr_destroy(&mgr->cb_handles); +} + +struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size) +{ + u64 cb_handle; + struct hl_cb *cb; + int rc; + + rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle, + HL_KERNEL_ASID_ID); + if (rc) { + dev_err(hdev->dev, + "Failed to allocate CB for the kernel driver %d\n", rc); + return NULL; + } + + cb_handle >>= PAGE_SHIFT; + cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle); + /* hl_cb_get should never fail here so use kernel WARN */ + WARN(!cb, "Kernel CB handle invalid 0x%x\n", (u32) cb_handle); + if (!cb) + goto destroy_cb; + + return cb; + +destroy_cb: + hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb_handle << PAGE_SHIFT); + + return NULL; +} + +int hl_cb_pool_init(struct hl_device *hdev) +{ + struct hl_cb *cb; + int i; + + INIT_LIST_HEAD(&hdev->cb_pool); + spin_lock_init(&hdev->cb_pool_lock); + + for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) { + cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size, + HL_KERNEL_ASID_ID); + if (cb) { + cb->is_pool = true; + list_add(&cb->pool_list, &hdev->cb_pool); + } else { + hl_cb_pool_fini(hdev); + return -ENOMEM; + } + } + + return 0; +} + +int hl_cb_pool_fini(struct hl_device *hdev) +{ + struct hl_cb *cb, *tmp; + + list_for_each_entry_safe(cb, tmp, &hdev->cb_pool, pool_list) { + list_del(&cb->pool_list); + cb_fini(hdev, cb); + } + + return 0; +} diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c new file mode 100644 index 000000000000..c605be89f764 --- /dev/null +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -0,0 +1,1232 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include +#include "habanalabs.h" + +#include +#include + +#define HL_CS_FLAGS_SIG_WAIT (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT) + +static void job_wq_completion(struct work_struct *work); +static long _hl_cs_wait_ioctl(struct hl_device *hdev, + struct hl_ctx *ctx, u64 timeout_us, u64 seq); +static void cs_do_release(struct kref *ref); + +static void hl_sob_reset(struct kref *ref) +{ + struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, + kref); + struct hl_device *hdev = hw_sob->hdev; + + hdev->asic_funcs->reset_sob(hdev, hw_sob); +} + +void hl_sob_reset_error(struct kref *ref) +{ + struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, + kref); + struct hl_device *hdev = hw_sob->hdev; + + dev_crit(hdev->dev, + "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n", + hw_sob->q_idx, hw_sob->sob_id); +} + +static const char *hl_fence_get_driver_name(struct dma_fence *fence) +{ + return "HabanaLabs"; +} + +static const char *hl_fence_get_timeline_name(struct dma_fence *fence) +{ + struct hl_cs_compl *hl_cs_compl = + container_of(fence, struct hl_cs_compl, base_fence); + + return dev_name(hl_cs_compl->hdev->dev); +} + +static bool hl_fence_enable_signaling(struct dma_fence *fence) +{ + return true; +} + +static void hl_fence_release(struct dma_fence *fence) +{ + struct hl_cs_compl *hl_cs_cmpl = + container_of(fence, struct hl_cs_compl, base_fence); + struct hl_device *hdev = hl_cs_cmpl->hdev; + + /* EBUSY means the CS was never submitted and hence we don't have + * an attached hw_sob object that we should handle here + */ + if (fence->error == -EBUSY) + goto free; + + if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || + (hl_cs_cmpl->type == CS_TYPE_WAIT)) { + + dev_dbg(hdev->dev, + "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n", + hl_cs_cmpl->cs_seq, + hl_cs_cmpl->type, + hl_cs_cmpl->hw_sob->sob_id, + hl_cs_cmpl->sob_val); + + /* + * A signal CS can get completion while the corresponding wait + * for signal CS is on its way to the PQ. The wait for signal CS + * will get stuck if the signal CS incremented the SOB to its + * max value and there are no pending (submitted) waits on this + * SOB. + * We do the following to void this situation: + * 1. The wait for signal CS must get a ref for the signal CS as + * soon as possible in cs_ioctl_signal_wait() and put it + * before being submitted to the PQ but after it incremented + * the SOB refcnt in init_signal_wait_cs(). + * 2. Signal/Wait for signal CS will decrement the SOB refcnt + * here. + * These two measures guarantee that the wait for signal CS will + * reset the SOB upon completion rather than the signal CS and + * hence the above scenario is avoided. + */ + kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); + } + +free: + kfree_rcu(hl_cs_cmpl, base_fence.rcu); +} + +static const struct dma_fence_ops hl_fence_ops = { + .get_driver_name = hl_fence_get_driver_name, + .get_timeline_name = hl_fence_get_timeline_name, + .enable_signaling = hl_fence_enable_signaling, + .release = hl_fence_release +}; + +static void cs_get(struct hl_cs *cs) +{ + kref_get(&cs->refcount); +} + +static int cs_get_unless_zero(struct hl_cs *cs) +{ + return kref_get_unless_zero(&cs->refcount); +} + +static void cs_put(struct hl_cs *cs) +{ + kref_put(&cs->refcount, cs_do_release); +} + +static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job) +{ + /* + * Patched CB is created for external queues jobs, and for H/W queues + * jobs if the user CB was allocated by driver and MMU is disabled. + */ + return (job->queue_type == QUEUE_TYPE_EXT || + (job->queue_type == QUEUE_TYPE_HW && + job->is_kernel_allocated_cb && + !hdev->mmu_enable)); +} + +/* + * cs_parser - parse the user command submission + * + * @hpriv : pointer to the private data of the fd + * @job : pointer to the job that holds the command submission info + * + * The function parses the command submission of the user. It calls the + * ASIC specific parser, which returns a list of memory blocks to send + * to the device as different command buffers + * + */ +static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_cs_parser parser; + int rc; + + parser.ctx_id = job->cs->ctx->asid; + parser.cs_sequence = job->cs->sequence; + parser.job_id = job->id; + + parser.hw_queue_id = job->hw_queue_id; + parser.job_userptr_list = &job->userptr_list; + parser.patched_cb = NULL; + parser.user_cb = job->user_cb; + parser.user_cb_size = job->user_cb_size; + parser.queue_type = job->queue_type; + parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; + job->patched_cb = NULL; + + rc = hdev->asic_funcs->cs_parser(hdev, &parser); + + if (is_cb_patched(hdev, job)) { + if (!rc) { + job->patched_cb = parser.patched_cb; + job->job_cb_size = parser.patched_cb_size; + job->contains_dma_pkt = parser.contains_dma_pkt; + + spin_lock(&job->patched_cb->lock); + job->patched_cb->cs_cnt++; + spin_unlock(&job->patched_cb->lock); + } + + /* + * Whether the parsing worked or not, we don't need the + * original CB anymore because it was already parsed and + * won't be accessed again for this CS + */ + spin_lock(&job->user_cb->lock); + job->user_cb->cs_cnt--; + spin_unlock(&job->user_cb->lock); + hl_cb_put(job->user_cb); + job->user_cb = NULL; + } else if (!rc) { + job->job_cb_size = job->user_cb_size; + } + + return rc; +} + +static void free_job(struct hl_device *hdev, struct hl_cs_job *job) +{ + struct hl_cs *cs = job->cs; + + if (is_cb_patched(hdev, job)) { + hl_userptr_delete_list(hdev, &job->userptr_list); + + /* + * We might arrive here from rollback and patched CB wasn't + * created, so we need to check it's not NULL + */ + if (job->patched_cb) { + spin_lock(&job->patched_cb->lock); + job->patched_cb->cs_cnt--; + spin_unlock(&job->patched_cb->lock); + + hl_cb_put(job->patched_cb); + } + } + + /* For H/W queue jobs, if a user CB was allocated by driver and MMU is + * enabled, the user CB isn't released in cs_parser() and thus should be + * released here. + */ + if (job->queue_type == QUEUE_TYPE_HW && + job->is_kernel_allocated_cb && hdev->mmu_enable) { + spin_lock(&job->user_cb->lock); + job->user_cb->cs_cnt--; + spin_unlock(&job->user_cb->lock); + + hl_cb_put(job->user_cb); + } + + /* + * This is the only place where there can be multiple threads + * modifying the list at the same time + */ + spin_lock(&cs->job_lock); + list_del(&job->cs_node); + spin_unlock(&cs->job_lock); + + hl_debugfs_remove_job(hdev, job); + + if (job->queue_type == QUEUE_TYPE_EXT || + job->queue_type == QUEUE_TYPE_HW) + cs_put(cs); + + kfree(job); +} + +static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx) +{ + hdev->aggregated_cs_counters.device_in_reset_drop_cnt += + ctx->cs_counters.device_in_reset_drop_cnt; + hdev->aggregated_cs_counters.out_of_mem_drop_cnt += + ctx->cs_counters.out_of_mem_drop_cnt; + hdev->aggregated_cs_counters.parsing_drop_cnt += + ctx->cs_counters.parsing_drop_cnt; + hdev->aggregated_cs_counters.queue_full_drop_cnt += + ctx->cs_counters.queue_full_drop_cnt; +} + +static void cs_do_release(struct kref *ref) +{ + struct hl_cs *cs = container_of(ref, struct hl_cs, + refcount); + struct hl_device *hdev = cs->ctx->hdev; + struct hl_cs_job *job, *tmp; + + cs->completed = true; + + /* + * Although if we reached here it means that all external jobs have + * finished, because each one of them took refcnt to CS, we still + * need to go over the internal jobs and free them. Otherwise, we + * will have leaked memory and what's worse, the CS object (and + * potentially the CTX object) could be released, while the JOB + * still holds a pointer to them (but no reference). + */ + list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) + free_job(hdev, job); + + /* We also need to update CI for internal queues */ + if (cs->submitted) { + hdev->asic_funcs->hw_queues_lock(hdev); + + hdev->cs_active_cnt--; + if (!hdev->cs_active_cnt) { + struct hl_device_idle_busy_ts *ts; + + ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++]; + ts->busy_to_idle_ts = ktime_get(); + + if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE) + hdev->idle_busy_ts_idx = 0; + } else if (hdev->cs_active_cnt < 0) { + dev_crit(hdev->dev, "CS active cnt %d is negative\n", + hdev->cs_active_cnt); + } + + hdev->asic_funcs->hw_queues_unlock(hdev); + + hl_int_hw_queue_update_ci(cs); + + spin_lock(&hdev->hw_queues_mirror_lock); + /* remove CS from hw_queues mirror list */ + list_del_init(&cs->mirror_node); + spin_unlock(&hdev->hw_queues_mirror_lock); + + /* + * Don't cancel TDR in case this CS was timedout because we + * might be running from the TDR context + */ + if ((!cs->timedout) && + (hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT)) { + struct hl_cs *next; + + if (cs->tdr_active) + cancel_delayed_work_sync(&cs->work_tdr); + + spin_lock(&hdev->hw_queues_mirror_lock); + + /* queue TDR for next CS */ + next = list_first_entry_or_null( + &hdev->hw_queues_mirror_list, + struct hl_cs, mirror_node); + + if ((next) && (!next->tdr_active)) { + next->tdr_active = true; + schedule_delayed_work(&next->work_tdr, + hdev->timeout_jiffies); + } + + spin_unlock(&hdev->hw_queues_mirror_lock); + } + } else if (cs->type == CS_TYPE_WAIT) { + /* + * In case the wait for signal CS was submitted, the put occurs + * in init_signal_wait_cs() right before hanging on the PQ. + */ + dma_fence_put(cs->signal_fence); + } + + /* + * Must be called before hl_ctx_put because inside we use ctx to get + * the device + */ + hl_debugfs_remove_cs(cs); + + hl_ctx_put(cs->ctx); + + /* We need to mark an error for not submitted because in that case + * the dma fence release flow is different. Mainly, we don't need + * to handle hw_sob for signal/wait + */ + if (cs->timedout) + dma_fence_set_error(cs->fence, -ETIMEDOUT); + else if (cs->aborted) + dma_fence_set_error(cs->fence, -EIO); + else if (!cs->submitted) + dma_fence_set_error(cs->fence, -EBUSY); + + dma_fence_signal(cs->fence); + dma_fence_put(cs->fence); + + cs_counters_aggregate(hdev, cs->ctx); + + kfree(cs->jobs_in_queue_cnt); + kfree(cs); +} + +static void cs_timedout(struct work_struct *work) +{ + struct hl_device *hdev; + int ctx_asid, rc; + struct hl_cs *cs = container_of(work, struct hl_cs, + work_tdr.work); + rc = cs_get_unless_zero(cs); + if (!rc) + return; + + if ((!cs->submitted) || (cs->completed)) { + cs_put(cs); + return; + } + + /* Mark the CS is timed out so we won't try to cancel its TDR */ + cs->timedout = true; + + hdev = cs->ctx->hdev; + ctx_asid = cs->ctx->asid; + + dev_err(hdev->dev, + "Command submission %llu has not finished in time!\n", + cs->sequence); + + cs_put(cs); + + if (hdev->reset_on_lockup) + hl_device_reset(hdev, false, false); +} + +static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, + enum hl_cs_type cs_type, struct hl_cs **cs_new) +{ + struct hl_cs_compl *cs_cmpl; + struct dma_fence *other = NULL; + struct hl_cs *cs; + int rc; + + cs = kzalloc(sizeof(*cs), GFP_ATOMIC); + if (!cs) + return -ENOMEM; + + cs->ctx = ctx; + cs->submitted = false; + cs->completed = false; + cs->type = cs_type; + INIT_LIST_HEAD(&cs->job_list); + INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); + kref_init(&cs->refcount); + spin_lock_init(&cs->job_lock); + + cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC); + if (!cs_cmpl) { + rc = -ENOMEM; + goto free_cs; + } + + cs_cmpl->hdev = hdev; + cs_cmpl->type = cs->type; + spin_lock_init(&cs_cmpl->lock); + cs->fence = &cs_cmpl->base_fence; + + spin_lock(&ctx->cs_lock); + + cs_cmpl->cs_seq = ctx->cs_sequence; + other = ctx->cs_pending[cs_cmpl->cs_seq & + (hdev->asic_prop.max_pending_cs - 1)]; + if ((other) && (!dma_fence_is_signaled(other))) { + dev_dbg(hdev->dev, + "Rejecting CS because of too many in-flights CS\n"); + rc = -EAGAIN; + goto free_fence; + } + + cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, + sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); + if (!cs->jobs_in_queue_cnt) { + rc = -ENOMEM; + goto free_fence; + } + + dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock, + ctx->asid, ctx->cs_sequence); + + cs->sequence = cs_cmpl->cs_seq; + + ctx->cs_pending[cs_cmpl->cs_seq & + (hdev->asic_prop.max_pending_cs - 1)] = + &cs_cmpl->base_fence; + ctx->cs_sequence++; + + dma_fence_get(&cs_cmpl->base_fence); + + dma_fence_put(other); + + spin_unlock(&ctx->cs_lock); + + *cs_new = cs; + + return 0; + +free_fence: + spin_unlock(&ctx->cs_lock); + kfree(cs_cmpl); +free_cs: + kfree(cs); + return rc; +} + +static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) +{ + struct hl_cs_job *job, *tmp; + + list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) + free_job(hdev, job); +} + +void hl_cs_rollback_all(struct hl_device *hdev) +{ + int i; + struct hl_cs *cs, *tmp; + + /* flush all completions */ + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + flush_workqueue(hdev->cq_wq[i]); + + /* Make sure we don't have leftovers in the H/W queues mirror list */ + list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list, + mirror_node) { + cs_get(cs); + cs->aborted = true; + dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n", + cs->ctx->asid, cs->sequence); + cs_rollback(hdev, cs); + cs_put(cs); + } +} + +static void job_wq_completion(struct work_struct *work) +{ + struct hl_cs_job *job = container_of(work, struct hl_cs_job, + finish_work); + struct hl_cs *cs = job->cs; + struct hl_device *hdev = cs->ctx->hdev; + + /* job is no longer needed */ + free_job(hdev, job); +} + +static int validate_queue_index(struct hl_device *hdev, + struct hl_cs_chunk *chunk, + enum hl_queue_type *queue_type, + bool *is_kernel_allocated_cb) +{ + struct asic_fixed_properties *asic = &hdev->asic_prop; + struct hw_queue_properties *hw_queue_prop; + + /* This must be checked here to prevent out-of-bounds access to + * hw_queues_props array + */ + if (chunk->queue_index >= asic->max_queues) { + dev_err(hdev->dev, "Queue index %d is invalid\n", + chunk->queue_index); + return -EINVAL; + } + + hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; + + if (hw_queue_prop->type == QUEUE_TYPE_NA) { + dev_err(hdev->dev, "Queue index %d is invalid\n", + chunk->queue_index); + return -EINVAL; + } + + if (hw_queue_prop->driver_only) { + dev_err(hdev->dev, + "Queue index %d is restricted for the kernel driver\n", + chunk->queue_index); + return -EINVAL; + } + + *queue_type = hw_queue_prop->type; + *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb; + + return 0; +} + +static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev, + struct hl_cb_mgr *cb_mgr, + struct hl_cs_chunk *chunk) +{ + struct hl_cb *cb; + u32 cb_handle; + + cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT); + + cb = hl_cb_get(hdev, cb_mgr, cb_handle); + if (!cb) { + dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle); + return NULL; + } + + if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) { + dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size); + goto release_cb; + } + + spin_lock(&cb->lock); + cb->cs_cnt++; + spin_unlock(&cb->lock); + + return cb; + +release_cb: + hl_cb_put(cb); + return NULL; +} + +struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, + enum hl_queue_type queue_type, bool is_kernel_allocated_cb) +{ + struct hl_cs_job *job; + + job = kzalloc(sizeof(*job), GFP_ATOMIC); + if (!job) + return NULL; + + job->queue_type = queue_type; + job->is_kernel_allocated_cb = is_kernel_allocated_cb; + + if (is_cb_patched(hdev, job)) + INIT_LIST_HEAD(&job->userptr_list); + + if (job->queue_type == QUEUE_TYPE_EXT) + INIT_WORK(&job->finish_work, job_wq_completion); + + return job; +} + +static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, + u32 num_chunks, u64 *cs_seq) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_cs_chunk *cs_chunk_array; + struct hl_cs_job *job; + struct hl_cs *cs; + struct hl_cb *cb; + bool int_queues_only = true; + u32 size_to_copy; + int rc, i; + + *cs_seq = ULLONG_MAX; + + if (num_chunks > HL_MAX_JOBS_PER_CS) { + dev_err(hdev->dev, + "Number of chunks can NOT be larger than %d\n", + HL_MAX_JOBS_PER_CS); + rc = -EINVAL; + goto out; + } + + cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array), + GFP_ATOMIC); + if (!cs_chunk_array) { + rc = -ENOMEM; + goto out; + } + + size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); + if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) { + dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); + rc = -EFAULT; + goto free_cs_chunk_array; + } + + /* increment refcnt for context */ + hl_ctx_get(hdev, hpriv->ctx); + + rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs); + if (rc) { + hl_ctx_put(hpriv->ctx); + goto free_cs_chunk_array; + } + + *cs_seq = cs->sequence; + + hl_debugfs_add_cs(cs); + + /* Validate ALL the CS chunks before submitting the CS */ + for (i = 0 ; i < num_chunks ; i++) { + struct hl_cs_chunk *chunk = &cs_chunk_array[i]; + enum hl_queue_type queue_type; + bool is_kernel_allocated_cb; + + rc = validate_queue_index(hdev, chunk, &queue_type, + &is_kernel_allocated_cb); + if (rc) { + hpriv->ctx->cs_counters.parsing_drop_cnt++; + goto free_cs_object; + } + + if (is_kernel_allocated_cb) { + cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); + if (!cb) { + hpriv->ctx->cs_counters.parsing_drop_cnt++; + rc = -EINVAL; + goto free_cs_object; + } + } else { + cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; + } + + if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW) + int_queues_only = false; + + job = hl_cs_allocate_job(hdev, queue_type, + is_kernel_allocated_cb); + if (!job) { + hpriv->ctx->cs_counters.out_of_mem_drop_cnt++; + dev_err(hdev->dev, "Failed to allocate a new job\n"); + rc = -ENOMEM; + if (is_kernel_allocated_cb) + goto release_cb; + else + goto free_cs_object; + } + + job->id = i + 1; + job->cs = cs; + job->user_cb = cb; + job->user_cb_size = chunk->cb_size; + job->hw_queue_id = chunk->queue_index; + + cs->jobs_in_queue_cnt[job->hw_queue_id]++; + + list_add_tail(&job->cs_node, &cs->job_list); + + /* + * Increment CS reference. When CS reference is 0, CS is + * done and can be signaled to user and free all its resources + * Only increment for JOB on external or H/W queues, because + * only for those JOBs we get completion + */ + if (job->queue_type == QUEUE_TYPE_EXT || + job->queue_type == QUEUE_TYPE_HW) + cs_get(cs); + + hl_debugfs_add_job(hdev, job); + + rc = cs_parser(hpriv, job); + if (rc) { + hpriv->ctx->cs_counters.parsing_drop_cnt++; + dev_err(hdev->dev, + "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", + cs->ctx->asid, cs->sequence, job->id, rc); + goto free_cs_object; + } + } + + if (int_queues_only) { + hpriv->ctx->cs_counters.parsing_drop_cnt++; + dev_err(hdev->dev, + "Reject CS %d.%llu because only internal queues jobs are present\n", + cs->ctx->asid, cs->sequence); + rc = -EINVAL; + goto free_cs_object; + } + + rc = hl_hw_queue_schedule_cs(cs); + if (rc) { + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to submit CS %d.%llu to H/W queues, error %d\n", + cs->ctx->asid, cs->sequence, rc); + goto free_cs_object; + } + + rc = HL_CS_STATUS_SUCCESS; + goto put_cs; + +release_cb: + spin_lock(&cb->lock); + cb->cs_cnt--; + spin_unlock(&cb->lock); + hl_cb_put(cb); +free_cs_object: + cs_rollback(hdev, cs); + *cs_seq = ULLONG_MAX; + /* The path below is both for good and erroneous exits */ +put_cs: + /* We finished with the CS in this function, so put the ref */ + cs_put(cs); +free_cs_chunk_array: + kfree(cs_chunk_array); +out: + return rc; +} + +static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, + void __user *chunks, u32 num_chunks, + u64 *cs_seq) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_ctx *ctx = hpriv->ctx; + struct hl_cs_chunk *cs_chunk_array, *chunk; + struct hw_queue_properties *hw_queue_prop; + struct dma_fence *sig_fence = NULL; + struct hl_cs_job *job; + struct hl_cs *cs; + struct hl_cb *cb; + enum hl_queue_type q_type; + u64 *signal_seq_arr = NULL, signal_seq; + u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size; + int rc; + + *cs_seq = ULLONG_MAX; + + if (num_chunks > HL_MAX_JOBS_PER_CS) { + dev_err(hdev->dev, + "Number of chunks can NOT be larger than %d\n", + HL_MAX_JOBS_PER_CS); + rc = -EINVAL; + goto out; + } + + cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array), + GFP_ATOMIC); + if (!cs_chunk_array) { + rc = -ENOMEM; + goto out; + } + + size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); + if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) { + dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); + rc = -EFAULT; + goto free_cs_chunk_array; + } + + /* currently it is guaranteed to have only one chunk */ + chunk = &cs_chunk_array[0]; + q_idx = chunk->queue_index; + hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; + q_type = hw_queue_prop->type; + + if ((q_idx >= hdev->asic_prop.max_queues) || + (!hw_queue_prop->supports_sync_stream)) { + dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx); + rc = -EINVAL; + goto free_cs_chunk_array; + } + + if (cs_type == CS_TYPE_WAIT) { + struct hl_cs_compl *sig_waitcs_cmpl; + + signal_seq_arr_len = chunk->num_signal_seq_arr; + + /* currently only one signal seq is supported */ + if (signal_seq_arr_len != 1) { + dev_err(hdev->dev, + "Wait for signal CS supports only one signal CS seq\n"); + rc = -EINVAL; + goto free_cs_chunk_array; + } + + signal_seq_arr = kmalloc_array(signal_seq_arr_len, + sizeof(*signal_seq_arr), + GFP_ATOMIC); + if (!signal_seq_arr) { + rc = -ENOMEM; + goto free_cs_chunk_array; + } + + size_to_copy = chunk->num_signal_seq_arr * + sizeof(*signal_seq_arr); + if (copy_from_user(signal_seq_arr, + u64_to_user_ptr(chunk->signal_seq_arr), + size_to_copy)) { + dev_err(hdev->dev, + "Failed to copy signal seq array from user\n"); + rc = -EFAULT; + goto free_signal_seq_array; + } + + /* currently it is guaranteed to have only one signal seq */ + signal_seq = signal_seq_arr[0]; + sig_fence = hl_ctx_get_fence(ctx, signal_seq); + if (IS_ERR(sig_fence)) { + dev_err(hdev->dev, + "Failed to get signal CS with seq 0x%llx\n", + signal_seq); + rc = PTR_ERR(sig_fence); + goto free_signal_seq_array; + } + + if (!sig_fence) { + /* signal CS already finished */ + rc = 0; + goto free_signal_seq_array; + } + + sig_waitcs_cmpl = + container_of(sig_fence, struct hl_cs_compl, base_fence); + + if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) { + dev_err(hdev->dev, + "CS seq 0x%llx is not of a signal CS\n", + signal_seq); + dma_fence_put(sig_fence); + rc = -EINVAL; + goto free_signal_seq_array; + } + + if (dma_fence_is_signaled(sig_fence)) { + /* signal CS already finished */ + dma_fence_put(sig_fence); + rc = 0; + goto free_signal_seq_array; + } + } + + /* increment refcnt for context */ + hl_ctx_get(hdev, ctx); + + rc = allocate_cs(hdev, ctx, cs_type, &cs); + if (rc) { + if (cs_type == CS_TYPE_WAIT) + dma_fence_put(sig_fence); + hl_ctx_put(ctx); + goto free_signal_seq_array; + } + + /* + * Save the signal CS fence for later initialization right before + * hanging the wait CS on the queue. + */ + if (cs->type == CS_TYPE_WAIT) + cs->signal_fence = sig_fence; + + hl_debugfs_add_cs(cs); + + *cs_seq = cs->sequence; + + job = hl_cs_allocate_job(hdev, q_type, true); + if (!job) { + ctx->cs_counters.out_of_mem_drop_cnt++; + dev_err(hdev->dev, "Failed to allocate a new job\n"); + rc = -ENOMEM; + goto put_cs; + } + + cb = hl_cb_kernel_create(hdev, PAGE_SIZE); + if (!cb) { + ctx->cs_counters.out_of_mem_drop_cnt++; + kfree(job); + rc = -EFAULT; + goto put_cs; + } + + if (cs->type == CS_TYPE_WAIT) + cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); + else + cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); + + job->id = 0; + job->cs = cs; + job->user_cb = cb; + job->user_cb->cs_cnt++; + job->user_cb_size = cb_size; + job->hw_queue_id = q_idx; + + /* + * No need in parsing, user CB is the patched CB. + * We call hl_cb_destroy() out of two reasons - we don't need the CB in + * the CB idr anymore and to decrement its refcount as it was + * incremented inside hl_cb_kernel_create(). + */ + job->patched_cb = job->user_cb; + job->job_cb_size = job->user_cb_size; + hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); + + cs->jobs_in_queue_cnt[job->hw_queue_id]++; + + list_add_tail(&job->cs_node, &cs->job_list); + + /* increment refcount as for external queues we get completion */ + cs_get(cs); + + hl_debugfs_add_job(hdev, job); + + rc = hl_hw_queue_schedule_cs(cs); + if (rc) { + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to submit CS %d.%llu to H/W queues, error %d\n", + ctx->asid, cs->sequence, rc); + goto free_cs_object; + } + + rc = HL_CS_STATUS_SUCCESS; + goto put_cs; + +free_cs_object: + cs_rollback(hdev, cs); + *cs_seq = ULLONG_MAX; + /* The path below is both for good and erroneous exits */ +put_cs: + /* We finished with the CS in this function, so put the ref */ + cs_put(cs); +free_signal_seq_array: + if (cs_type == CS_TYPE_WAIT) + kfree(signal_seq_arr); +free_cs_chunk_array: + kfree(cs_chunk_array); +out: + return rc; +} + +int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) +{ + struct hl_device *hdev = hpriv->hdev; + union hl_cs_args *args = data; + struct hl_ctx *ctx = hpriv->ctx; + void __user *chunks_execute, *chunks_restore; + enum hl_cs_type cs_type; + u32 num_chunks_execute, num_chunks_restore, sig_wait_flags; + u64 cs_seq = ULONG_MAX; + int rc, do_ctx_switch; + bool need_soft_reset = false; + + if (hl_device_disabled_or_in_reset(hdev)) { + dev_warn_ratelimited(hdev->dev, + "Device is %s. Can't submit new CS\n", + atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); + rc = -EBUSY; + goto out; + } + + sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT; + + if (unlikely(sig_wait_flags == HL_CS_FLAGS_SIG_WAIT)) { + dev_err(hdev->dev, + "Signal and wait CS flags are mutually exclusive, context %d\n", + ctx->asid); + rc = -EINVAL; + goto out; + } + + if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) && + (!hdev->supports_sync_stream))) { + dev_err(hdev->dev, "Sync stream CS is not supported\n"); + rc = -EINVAL; + goto out; + } + + if (args->in.cs_flags & HL_CS_FLAGS_SIGNAL) + cs_type = CS_TYPE_SIGNAL; + else if (args->in.cs_flags & HL_CS_FLAGS_WAIT) + cs_type = CS_TYPE_WAIT; + else + cs_type = CS_TYPE_DEFAULT; + + chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute; + num_chunks_execute = args->in.num_chunks_execute; + + if (cs_type == CS_TYPE_DEFAULT) { + if (!num_chunks_execute) { + dev_err(hdev->dev, + "Got execute CS with 0 chunks, context %d\n", + ctx->asid); + rc = -EINVAL; + goto out; + } + } else if (num_chunks_execute != 1) { + dev_err(hdev->dev, + "Sync stream CS mandates one chunk only, context %d\n", + ctx->asid); + rc = -EINVAL; + goto out; + } + + do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); + + if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { + long ret; + + chunks_restore = + (void __user *) (uintptr_t) args->in.chunks_restore; + num_chunks_restore = args->in.num_chunks_restore; + + mutex_lock(&hpriv->restore_phase_mutex); + + if (do_ctx_switch) { + rc = hdev->asic_funcs->context_switch(hdev, ctx->asid); + if (rc) { + dev_err_ratelimited(hdev->dev, + "Failed to switch to context %d, rejecting CS! %d\n", + ctx->asid, rc); + /* + * If we timedout, or if the device is not IDLE + * while we want to do context-switch (-EBUSY), + * we need to soft-reset because QMAN is + * probably stuck. However, we can't call to + * reset here directly because of deadlock, so + * need to do it at the very end of this + * function + */ + if ((rc == -ETIMEDOUT) || (rc == -EBUSY)) + need_soft_reset = true; + mutex_unlock(&hpriv->restore_phase_mutex); + goto out; + } + } + + hdev->asic_funcs->restore_phase_topology(hdev); + + if (!num_chunks_restore) { + dev_dbg(hdev->dev, + "Need to run restore phase but restore CS is empty\n"); + rc = 0; + } else { + rc = cs_ioctl_default(hpriv, chunks_restore, + num_chunks_restore, &cs_seq); + } + + mutex_unlock(&hpriv->restore_phase_mutex); + + if (rc) { + dev_err(hdev->dev, + "Failed to submit restore CS for context %d (%d)\n", + ctx->asid, rc); + goto out; + } + + /* Need to wait for restore completion before execution phase */ + if (num_chunks_restore) { + ret = _hl_cs_wait_ioctl(hdev, ctx, + jiffies_to_usecs(hdev->timeout_jiffies), + cs_seq); + if (ret <= 0) { + dev_err(hdev->dev, + "Restore CS for context %d failed to complete %ld\n", + ctx->asid, ret); + rc = -ENOEXEC; + goto out; + } + } + + ctx->thread_ctx_switch_wait_token = 1; + } else if (!ctx->thread_ctx_switch_wait_token) { + u32 tmp; + + rc = hl_poll_timeout_memory(hdev, + &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), + 100, jiffies_to_usecs(hdev->timeout_jiffies), false); + + if (rc == -ETIMEDOUT) { + dev_err(hdev->dev, + "context switch phase timeout (%d)\n", tmp); + goto out; + } + } + + if (cs_type == CS_TYPE_DEFAULT) + rc = cs_ioctl_default(hpriv, chunks_execute, num_chunks_execute, + &cs_seq); + else + rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks_execute, + num_chunks_execute, &cs_seq); + +out: + if (rc != -EAGAIN) { + memset(args, 0, sizeof(*args)); + args->out.status = rc; + args->out.seq = cs_seq; + } + + if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset)) + hl_device_reset(hdev, false, false); + + return rc; +} + +static long _hl_cs_wait_ioctl(struct hl_device *hdev, + struct hl_ctx *ctx, u64 timeout_us, u64 seq) +{ + struct dma_fence *fence; + unsigned long timeout; + long rc; + + if (timeout_us == MAX_SCHEDULE_TIMEOUT) + timeout = timeout_us; + else + timeout = usecs_to_jiffies(timeout_us); + + hl_ctx_get(hdev, ctx); + + fence = hl_ctx_get_fence(ctx, seq); + if (IS_ERR(fence)) { + rc = PTR_ERR(fence); + if (rc == -EINVAL) + dev_notice_ratelimited(hdev->dev, + "Can't wait on CS %llu because current CS is at seq %llu\n", + seq, ctx->cs_sequence); + } else if (fence) { + rc = dma_fence_wait_timeout(fence, true, timeout); + if (fence->error == -ETIMEDOUT) + rc = -ETIMEDOUT; + else if (fence->error == -EIO) + rc = -EIO; + dma_fence_put(fence); + } else { + dev_dbg(hdev->dev, + "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", + seq, ctx->cs_sequence); + rc = 1; + } + + hl_ctx_put(ctx); + + return rc; +} + +int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) +{ + struct hl_device *hdev = hpriv->hdev; + union hl_wait_cs_args *args = data; + u64 seq = args->in.seq; + long rc; + + rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq); + + memset(args, 0, sizeof(*args)); + + if (rc < 0) { + if (rc == -ERESTARTSYS) { + dev_err_ratelimited(hdev->dev, + "user process got signal while waiting for CS handle %llu\n", + seq); + args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED; + rc = -EINTR; + } else if (rc == -ETIMEDOUT) { + dev_err_ratelimited(hdev->dev, + "CS %llu has timed-out while user process is waiting for it\n", + seq); + args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT; + } else if (rc == -EIO) { + dev_err_ratelimited(hdev->dev, + "CS %llu has been aborted while user process is waiting for it\n", + seq); + args->out.status = HL_WAIT_CS_STATUS_ABORTED; + } + return rc; + } + + if (rc == 0) + args->out.status = HL_WAIT_CS_STATUS_BUSY; + else + args->out.status = HL_WAIT_CS_STATUS_COMPLETED; + + return 0; +} diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c new file mode 100644 index 000000000000..1e3e5b19ecd9 --- /dev/null +++ b/drivers/misc/habanalabs/common/context.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" + +#include + +static void hl_ctx_fini(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + int i; + + /* + * If we arrived here, there are no jobs waiting for this context + * on its queues so we can safely remove it. + * This is because for each CS, we increment the ref count and for + * every CS that was finished we decrement it and we won't arrive + * to this function unless the ref count is 0 + */ + + for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++) + dma_fence_put(ctx->cs_pending[i]); + + kfree(ctx->cs_pending); + + if (ctx->asid != HL_KERNEL_ASID_ID) { + /* The engines are stopped as there is no executing CS, but the + * Coresight might be still working by accessing addresses + * related to the stopped engines. Hence stop it explicitly. + * Stop only if this is the compute context, as there can be + * only one compute context + */ + if ((hdev->in_debug) && (hdev->compute_ctx == ctx)) + hl_device_set_debug_mode(hdev, false); + + hl_vm_ctx_fini(ctx); + hl_asid_free(hdev, ctx->asid); + } else { + hl_mmu_ctx_fini(ctx); + } +} + +void hl_ctx_do_release(struct kref *ref) +{ + struct hl_ctx *ctx; + + ctx = container_of(ref, struct hl_ctx, refcount); + + hl_ctx_fini(ctx); + + if (ctx->hpriv) + hl_hpriv_put(ctx->hpriv); + + kfree(ctx); +} + +int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv) +{ + struct hl_ctx_mgr *mgr = &hpriv->ctx_mgr; + struct hl_ctx *ctx; + int rc; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + rc = -ENOMEM; + goto out_err; + } + + mutex_lock(&mgr->ctx_lock); + rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL); + mutex_unlock(&mgr->ctx_lock); + + if (rc < 0) { + dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n"); + goto free_ctx; + } + + ctx->handle = rc; + + rc = hl_ctx_init(hdev, ctx, false); + if (rc) + goto remove_from_idr; + + hl_hpriv_get(hpriv); + ctx->hpriv = hpriv; + + /* TODO: remove for multiple contexts per process */ + hpriv->ctx = ctx; + + /* TODO: remove the following line for multiple process support */ + hdev->compute_ctx = ctx; + + return 0; + +remove_from_idr: + mutex_lock(&mgr->ctx_lock); + idr_remove(&mgr->ctx_handles, ctx->handle); + mutex_unlock(&mgr->ctx_lock); +free_ctx: + kfree(ctx); +out_err: + return rc; +} + +void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx) +{ + if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1) + return; + + dev_warn(hdev->dev, + "user process released device but its command submissions are still executing\n"); +} + +int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) +{ + int rc = 0; + + ctx->hdev = hdev; + + kref_init(&ctx->refcount); + + ctx->cs_sequence = 1; + spin_lock_init(&ctx->cs_lock); + atomic_set(&ctx->thread_ctx_switch_token, 1); + ctx->thread_ctx_switch_wait_token = 0; + ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs, + sizeof(struct dma_fence *), + GFP_KERNEL); + if (!ctx->cs_pending) + return -ENOMEM; + + if (is_kernel_ctx) { + ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */ + rc = hl_mmu_ctx_init(ctx); + if (rc) { + dev_err(hdev->dev, "Failed to init mmu ctx module\n"); + goto mem_ctx_err; + } + } else { + ctx->asid = hl_asid_alloc(hdev); + if (!ctx->asid) { + dev_err(hdev->dev, "No free ASID, failed to create context\n"); + return -ENOMEM; + } + + rc = hl_vm_ctx_init(ctx); + if (rc) { + dev_err(hdev->dev, "Failed to init mem ctx module\n"); + rc = -ENOMEM; + goto mem_ctx_err; + } + } + + return 0; + +mem_ctx_err: + if (ctx->asid != HL_KERNEL_ASID_ID) + hl_asid_free(hdev, ctx->asid); + + return rc; +} + +void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx) +{ + kref_get(&ctx->refcount); +} + +int hl_ctx_put(struct hl_ctx *ctx) +{ + return kref_put(&ctx->refcount, hl_ctx_do_release); +} + +struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq) +{ + struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop; + struct dma_fence *fence; + + spin_lock(&ctx->cs_lock); + + if (seq >= ctx->cs_sequence) { + spin_unlock(&ctx->cs_lock); + return ERR_PTR(-EINVAL); + } + + if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) { + spin_unlock(&ctx->cs_lock); + return NULL; + } + + fence = dma_fence_get( + ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]); + spin_unlock(&ctx->cs_lock); + + return fence; +} + +/* + * hl_ctx_mgr_init - initialize the context manager + * + * @mgr: pointer to context manager structure + * + * This manager is an object inside the hpriv object of the user process. + * The function is called when a user process opens the FD. + */ +void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr) +{ + mutex_init(&mgr->ctx_lock); + idr_init(&mgr->ctx_handles); +} + +/* + * hl_ctx_mgr_fini - finalize the context manager + * + * @hdev: pointer to device structure + * @mgr: pointer to context manager structure + * + * This function goes over all the contexts in the manager and frees them. + * It is called when a process closes the FD. + */ +void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr) +{ + struct hl_ctx *ctx; + struct idr *idp; + u32 id; + + idp = &mgr->ctx_handles; + + idr_for_each_entry(idp, ctx, id) + hl_ctx_free(hdev, ctx); + + idr_destroy(&mgr->ctx_handles); + mutex_destroy(&mgr->ctx_lock); +} diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c new file mode 100644 index 000000000000..fc4372c18ce2 --- /dev/null +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -0,0 +1,1411 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" +#include "include/hw_ip/mmu/mmu_general.h" + +#include +#include +#include + +#define MMU_ADDR_BUF_SIZE 40 +#define MMU_ASID_BUF_SIZE 10 +#define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE) + +static struct dentry *hl_debug_root; + +static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, + u8 i2c_reg, u32 *val) +{ + struct armcp_packet pkt; + int rc; + + if (hl_device_disabled_or_in_reset(hdev)) + return -EBUSY; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_RD << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.i2c_bus = i2c_bus; + pkt.i2c_addr = i2c_addr; + pkt.i2c_reg = i2c_reg; + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_DEVICE_TIMEOUT_USEC, (long *) val); + + if (rc) + dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc); + + return rc; +} + +static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, + u8 i2c_reg, u32 val) +{ + struct armcp_packet pkt; + int rc; + + if (hl_device_disabled_or_in_reset(hdev)) + return -EBUSY; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_WR << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.i2c_bus = i2c_bus; + pkt.i2c_addr = i2c_addr; + pkt.i2c_reg = i2c_reg; + pkt.value = cpu_to_le64(val); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_DEVICE_TIMEOUT_USEC, NULL); + + if (rc) + dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc); + + return rc; +} + +static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) +{ + struct armcp_packet pkt; + int rc; + + if (hl_device_disabled_or_in_reset(hdev)) + return; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_LED_SET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.led_index = cpu_to_le32(led); + pkt.value = cpu_to_le64(state); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_DEVICE_TIMEOUT_USEC, NULL); + + if (rc) + dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc); +} + +static int command_buffers_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_cb *cb; + bool first = true; + + spin_lock(&dev_entry->cb_spinlock); + + list_for_each_entry(cb, &dev_entry->cb_list, debugfs_list) { + if (first) { + first = false; + seq_puts(s, "\n"); + seq_puts(s, " CB ID CTX ID CB size CB RefCnt mmap? CS counter\n"); + seq_puts(s, "---------------------------------------------------------------\n"); + } + seq_printf(s, + " %03d %d 0x%08x %d %d %d\n", + cb->id, cb->ctx_id, cb->size, + kref_read(&cb->refcount), + cb->mmap, cb->cs_cnt); + } + + spin_unlock(&dev_entry->cb_spinlock); + + if (!first) + seq_puts(s, "\n"); + + return 0; +} + +static int command_submission_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_cs *cs; + bool first = true; + + spin_lock(&dev_entry->cs_spinlock); + + list_for_each_entry(cs, &dev_entry->cs_list, debugfs_list) { + if (first) { + first = false; + seq_puts(s, "\n"); + seq_puts(s, " CS ID CTX ASID CS RefCnt Submitted Completed\n"); + seq_puts(s, "------------------------------------------------------\n"); + } + seq_printf(s, + " %llu %d %d %d %d\n", + cs->sequence, cs->ctx->asid, + kref_read(&cs->refcount), + cs->submitted, cs->completed); + } + + spin_unlock(&dev_entry->cs_spinlock); + + if (!first) + seq_puts(s, "\n"); + + return 0; +} + +static int command_submission_jobs_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_cs_job *job; + bool first = true; + + spin_lock(&dev_entry->cs_job_spinlock); + + list_for_each_entry(job, &dev_entry->cs_job_list, debugfs_list) { + if (first) { + first = false; + seq_puts(s, "\n"); + seq_puts(s, " JOB ID CS ID CTX ASID H/W Queue\n"); + seq_puts(s, "---------------------------------------\n"); + } + if (job->cs) + seq_printf(s, + " %02d %llu %d %d\n", + job->id, job->cs->sequence, job->cs->ctx->asid, + job->hw_queue_id); + else + seq_printf(s, + " %02d 0 %d %d\n", + job->id, HL_KERNEL_ASID_ID, job->hw_queue_id); + } + + spin_unlock(&dev_entry->cs_job_spinlock); + + if (!first) + seq_puts(s, "\n"); + + return 0; +} + +static int userptr_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_userptr *userptr; + char dma_dir[4][30] = {"DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", + "DMA_FROM_DEVICE", "DMA_NONE"}; + bool first = true; + + spin_lock(&dev_entry->userptr_spinlock); + + list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) { + if (first) { + first = false; + seq_puts(s, "\n"); + seq_puts(s, " user virtual address size dma dir\n"); + seq_puts(s, "----------------------------------------------------------\n"); + } + seq_printf(s, + " 0x%-14llx %-10u %-30s\n", + userptr->addr, userptr->size, dma_dir[userptr->dir]); + } + + spin_unlock(&dev_entry->userptr_spinlock); + + if (!first) + seq_puts(s, "\n"); + + return 0; +} + +static int vm_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_ctx *ctx; + struct hl_vm *vm; + struct hl_vm_hash_node *hnode; + struct hl_userptr *userptr; + struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; + enum vm_type_t *vm_type; + bool once = true; + u64 j; + int i; + + if (!dev_entry->hdev->mmu_enable) + return 0; + + spin_lock(&dev_entry->ctx_mem_hash_spinlock); + + list_for_each_entry(ctx, &dev_entry->ctx_mem_hash_list, debugfs_list) { + once = false; + seq_puts(s, "\n\n----------------------------------------------------"); + seq_puts(s, "\n----------------------------------------------------\n\n"); + seq_printf(s, "ctx asid: %u\n", ctx->asid); + + seq_puts(s, "\nmappings:\n\n"); + seq_puts(s, " virtual address size handle\n"); + seq_puts(s, "----------------------------------------------------\n"); + mutex_lock(&ctx->mem_hash_lock); + hash_for_each(ctx->mem_hash, i, hnode, node) { + vm_type = hnode->ptr; + + if (*vm_type == VM_TYPE_USERPTR) { + userptr = hnode->ptr; + seq_printf(s, + " 0x%-14llx %-10u\n", + hnode->vaddr, userptr->size); + } else { + phys_pg_pack = hnode->ptr; + seq_printf(s, + " 0x%-14llx %-10llu %-4u\n", + hnode->vaddr, phys_pg_pack->total_size, + phys_pg_pack->handle); + } + } + mutex_unlock(&ctx->mem_hash_lock); + + vm = &ctx->hdev->vm; + spin_lock(&vm->idr_lock); + + if (!idr_is_empty(&vm->phys_pg_pack_handles)) + seq_puts(s, "\n\nallocations:\n"); + + idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_pack, i) { + if (phys_pg_pack->asid != ctx->asid) + continue; + + seq_printf(s, "\nhandle: %u\n", phys_pg_pack->handle); + seq_printf(s, "page size: %u\n\n", + phys_pg_pack->page_size); + seq_puts(s, " physical address\n"); + seq_puts(s, "---------------------\n"); + for (j = 0 ; j < phys_pg_pack->npages ; j++) { + seq_printf(s, " 0x%-14llx\n", + phys_pg_pack->pages[j]); + } + } + spin_unlock(&vm->idr_lock); + + } + + spin_unlock(&dev_entry->ctx_mem_hash_spinlock); + + if (!once) + seq_puts(s, "\n"); + + return 0; +} + +/* these inline functions are copied from mmu.c */ +static inline u64 get_hop0_addr(struct hl_ctx *ctx) +{ + return ctx->hdev->asic_prop.mmu_pgt_addr + + (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); +} + +static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, + u64 virt_addr, u64 mask, u64 shift) +{ + return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * + ((virt_addr & mask) >> shift); +} + +static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask, + mmu_specs->hop0_shift); +} + +static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask, + mmu_specs->hop1_shift); +} + +static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask, + mmu_specs->hop2_shift); +} + +static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask, + mmu_specs->hop3_shift); +} + +static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask, + mmu_specs->hop4_shift); +} + +static inline u64 get_next_hop_addr(u64 curr_pte) +{ + if (curr_pte & PAGE_PRESENT_MASK) + return curr_pte & HOP_PHYS_ADDR_MASK; + else + return ULLONG_MAX; +} + +static int mmu_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_device *hdev = dev_entry->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; + struct hl_ctx *ctx; + bool is_dram_addr; + + u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0, + hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0, + hop2_addr = 0, hop2_pte_addr = 0, hop2_pte = 0, + hop3_addr = 0, hop3_pte_addr = 0, hop3_pte = 0, + hop4_addr = 0, hop4_pte_addr = 0, hop4_pte = 0, + virt_addr = dev_entry->mmu_addr; + + if (!hdev->mmu_enable) + return 0; + + if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID) + ctx = hdev->kernel_ctx; + else + ctx = hdev->compute_ctx; + + if (!ctx) { + dev_err(hdev->dev, "no ctx available\n"); + return 0; + } + + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->dmmu.start_addr, + prop->dmmu.end_addr); + + /* shifts and masks are the same in PMMU and HPMMU, use one of them */ + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + + mutex_lock(&ctx->mmu_lock); + + /* the following lookup is copied from unmap() in mmu.c */ + + hop0_addr = get_hop0_addr(ctx); + hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); + hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr); + hop1_addr = get_next_hop_addr(hop0_pte); + + if (hop1_addr == ULLONG_MAX) + goto not_mapped; + + hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); + hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr); + hop2_addr = get_next_hop_addr(hop1_pte); + + if (hop2_addr == ULLONG_MAX) + goto not_mapped; + + hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); + hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr); + hop3_addr = get_next_hop_addr(hop2_pte); + + if (hop3_addr == ULLONG_MAX) + goto not_mapped; + + hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); + hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr); + + if (!(hop3_pte & LAST_MASK)) { + hop4_addr = get_next_hop_addr(hop3_pte); + + if (hop4_addr == ULLONG_MAX) + goto not_mapped; + + hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, + virt_addr); + hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr); + if (!(hop4_pte & PAGE_PRESENT_MASK)) + goto not_mapped; + } else { + if (!(hop3_pte & PAGE_PRESENT_MASK)) + goto not_mapped; + } + + seq_printf(s, "asid: %u, virt_addr: 0x%llx\n", + dev_entry->mmu_asid, dev_entry->mmu_addr); + + seq_printf(s, "hop0_addr: 0x%llx\n", hop0_addr); + seq_printf(s, "hop0_pte_addr: 0x%llx\n", hop0_pte_addr); + seq_printf(s, "hop0_pte: 0x%llx\n", hop0_pte); + + seq_printf(s, "hop1_addr: 0x%llx\n", hop1_addr); + seq_printf(s, "hop1_pte_addr: 0x%llx\n", hop1_pte_addr); + seq_printf(s, "hop1_pte: 0x%llx\n", hop1_pte); + + seq_printf(s, "hop2_addr: 0x%llx\n", hop2_addr); + seq_printf(s, "hop2_pte_addr: 0x%llx\n", hop2_pte_addr); + seq_printf(s, "hop2_pte: 0x%llx\n", hop2_pte); + + seq_printf(s, "hop3_addr: 0x%llx\n", hop3_addr); + seq_printf(s, "hop3_pte_addr: 0x%llx\n", hop3_pte_addr); + seq_printf(s, "hop3_pte: 0x%llx\n", hop3_pte); + + if (!(hop3_pte & LAST_MASK)) { + seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr); + seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr); + seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte); + } + + goto out; + +not_mapped: + dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", + virt_addr); +out: + mutex_unlock(&ctx->mmu_lock); + + return 0; +} + +static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf, + size_t count, loff_t *f_pos) +{ + struct seq_file *s = file->private_data; + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_device *hdev = dev_entry->hdev; + char kbuf[MMU_KBUF_SIZE]; + char *c; + ssize_t rc; + + if (!hdev->mmu_enable) + return count; + + if (count > sizeof(kbuf) - 1) + goto err; + if (copy_from_user(kbuf, buf, count)) + goto err; + kbuf[count] = 0; + + c = strchr(kbuf, ' '); + if (!c) + goto err; + *c = '\0'; + + rc = kstrtouint(kbuf, 10, &dev_entry->mmu_asid); + if (rc) + goto err; + + if (strncmp(c+1, "0x", 2)) + goto err; + rc = kstrtoull(c+3, 16, &dev_entry->mmu_addr); + if (rc) + goto err; + + return count; + +err: + dev_err(hdev->dev, "usage: echo <0xaddr> > mmu\n"); + + return -EINVAL; +} + +static int engines_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_device *hdev = dev_entry->hdev; + + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, + "Can't check device idle during reset\n"); + return 0; + } + + hdev->asic_funcs->is_device_idle(hdev, NULL, s); + + return 0; +} + +static bool hl_is_device_va(struct hl_device *hdev, u64 addr) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + + if (!hdev->mmu_enable) + goto out; + + if (hdev->dram_supports_virtual_memory && + (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr)) + return true; + + if (addr >= prop->pmmu.start_addr && + addr < prop->pmmu.end_addr) + return true; + + if (addr >= prop->pmmu_huge.start_addr && + addr < prop->pmmu_huge.end_addr) + return true; +out: + return false; +} + +static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, + u64 *phys_addr) +{ + struct hl_ctx *ctx = hdev->compute_ctx; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; + u64 hop_addr, hop_pte_addr, hop_pte; + u64 offset_mask = HOP4_MASK | FLAGS_MASK; + int rc = 0; + bool is_dram_addr; + + if (!ctx) { + dev_err(hdev->dev, "no ctx available\n"); + return -EINVAL; + } + + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->dmmu.start_addr, + prop->dmmu.end_addr); + + /* shifts and masks are the same in PMMU and HPMMU, use one of them */ + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + + mutex_lock(&ctx->mmu_lock); + + /* hop 0 */ + hop_addr = get_hop0_addr(ctx); + hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); + + /* hop 1 */ + hop_addr = get_next_hop_addr(hop_pte); + if (hop_addr == ULLONG_MAX) + goto not_mapped; + hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); + + /* hop 2 */ + hop_addr = get_next_hop_addr(hop_pte); + if (hop_addr == ULLONG_MAX) + goto not_mapped; + hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); + + /* hop 3 */ + hop_addr = get_next_hop_addr(hop_pte); + if (hop_addr == ULLONG_MAX) + goto not_mapped; + hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); + + if (!(hop_pte & LAST_MASK)) { + /* hop 4 */ + hop_addr = get_next_hop_addr(hop_pte); + if (hop_addr == ULLONG_MAX) + goto not_mapped; + hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr, + virt_addr); + hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); + + offset_mask = FLAGS_MASK; + } + + if (!(hop_pte & PAGE_PRESENT_MASK)) + goto not_mapped; + + *phys_addr = (hop_pte & ~offset_mask) | (virt_addr & offset_mask); + + goto out; + +not_mapped: + dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", + virt_addr); + rc = -EINVAL; +out: + mutex_unlock(&ctx->mmu_lock); + return rc; +} + +static ssize_t hl_data_read32(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + char tmp_buf[32]; + u64 addr = entry->addr; + u32 val; + ssize_t rc; + + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't read during reset\n"); + return 0; + } + + if (*ppos) + return 0; + + if (hl_is_device_va(hdev, addr)) { + rc = device_va_to_pa(hdev, addr, &addr); + if (rc) + return rc; + } + + rc = hdev->asic_funcs->debugfs_read32(hdev, addr, &val); + if (rc) { + dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); + return rc; + } + + sprintf(tmp_buf, "0x%08x\n", val); + return simple_read_from_buffer(buf, count, ppos, tmp_buf, + strlen(tmp_buf)); +} + +static ssize_t hl_data_write32(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u64 addr = entry->addr; + u32 value; + ssize_t rc; + + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't write during reset\n"); + return 0; + } + + rc = kstrtouint_from_user(buf, count, 16, &value); + if (rc) + return rc; + + if (hl_is_device_va(hdev, addr)) { + rc = device_va_to_pa(hdev, addr, &addr); + if (rc) + return rc; + } + + rc = hdev->asic_funcs->debugfs_write32(hdev, addr, value); + if (rc) { + dev_err(hdev->dev, "Failed to write 0x%08x to 0x%010llx\n", + value, addr); + return rc; + } + + return count; +} + +static ssize_t hl_data_read64(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + char tmp_buf[32]; + u64 addr = entry->addr; + u64 val; + ssize_t rc; + + if (*ppos) + return 0; + + if (hl_is_device_va(hdev, addr)) { + rc = device_va_to_pa(hdev, addr, &addr); + if (rc) + return rc; + } + + rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val); + if (rc) { + dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); + return rc; + } + + sprintf(tmp_buf, "0x%016llx\n", val); + return simple_read_from_buffer(buf, count, ppos, tmp_buf, + strlen(tmp_buf)); +} + +static ssize_t hl_data_write64(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u64 addr = entry->addr; + u64 value; + ssize_t rc; + + rc = kstrtoull_from_user(buf, count, 16, &value); + if (rc) + return rc; + + if (hl_is_device_va(hdev, addr)) { + rc = device_va_to_pa(hdev, addr, &addr); + if (rc) + return rc; + } + + rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value); + if (rc) { + dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n", + value, addr); + return rc; + } + + return count; +} + +static ssize_t hl_get_power_state(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + char tmp_buf[200]; + int i; + + if (*ppos) + return 0; + + if (hdev->pdev->current_state == PCI_D0) + i = 1; + else if (hdev->pdev->current_state == PCI_D3hot) + i = 2; + else + i = 3; + + sprintf(tmp_buf, + "current power state: %d\n1 - D0\n2 - D3hot\n3 - Unknown\n", i); + return simple_read_from_buffer(buf, count, ppos, tmp_buf, + strlen(tmp_buf)); +} + +static ssize_t hl_set_power_state(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u32 value; + ssize_t rc; + + rc = kstrtouint_from_user(buf, count, 10, &value); + if (rc) + return rc; + + if (value == 1) { + pci_set_power_state(hdev->pdev, PCI_D0); + pci_restore_state(hdev->pdev); + rc = pci_enable_device(hdev->pdev); + } else if (value == 2) { + pci_save_state(hdev->pdev); + pci_disable_device(hdev->pdev); + pci_set_power_state(hdev->pdev, PCI_D3hot); + } else { + dev_dbg(hdev->dev, "invalid power state value %u\n", value); + return -EINVAL; + } + + return count; +} + +static ssize_t hl_i2c_data_read(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + char tmp_buf[32]; + u32 val; + ssize_t rc; + + if (*ppos) + return 0; + + rc = hl_debugfs_i2c_read(hdev, entry->i2c_bus, entry->i2c_addr, + entry->i2c_reg, &val); + if (rc) { + dev_err(hdev->dev, + "Failed to read from I2C bus %d, addr %d, reg %d\n", + entry->i2c_bus, entry->i2c_addr, entry->i2c_reg); + return rc; + } + + sprintf(tmp_buf, "0x%02x\n", val); + rc = simple_read_from_buffer(buf, count, ppos, tmp_buf, + strlen(tmp_buf)); + + return rc; +} + +static ssize_t hl_i2c_data_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u32 value; + ssize_t rc; + + rc = kstrtouint_from_user(buf, count, 16, &value); + if (rc) + return rc; + + rc = hl_debugfs_i2c_write(hdev, entry->i2c_bus, entry->i2c_addr, + entry->i2c_reg, value); + if (rc) { + dev_err(hdev->dev, + "Failed to write 0x%02x to I2C bus %d, addr %d, reg %d\n", + value, entry->i2c_bus, entry->i2c_addr, entry->i2c_reg); + return rc; + } + + return count; +} + +static ssize_t hl_led0_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u32 value; + ssize_t rc; + + rc = kstrtouint_from_user(buf, count, 10, &value); + if (rc) + return rc; + + value = value ? 1 : 0; + + hl_debugfs_led_set(hdev, 0, value); + + return count; +} + +static ssize_t hl_led1_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u32 value; + ssize_t rc; + + rc = kstrtouint_from_user(buf, count, 10, &value); + if (rc) + return rc; + + value = value ? 1 : 0; + + hl_debugfs_led_set(hdev, 1, value); + + return count; +} + +static ssize_t hl_led2_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u32 value; + ssize_t rc; + + rc = kstrtouint_from_user(buf, count, 10, &value); + if (rc) + return rc; + + value = value ? 1 : 0; + + hl_debugfs_led_set(hdev, 2, value); + + return count; +} + +static ssize_t hl_device_read(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + static const char *help = + "Valid values: disable, enable, suspend, resume, cpu_timeout\n"; + return simple_read_from_buffer(buf, count, ppos, help, strlen(help)); +} + +static ssize_t hl_device_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + char data[30] = {0}; + + /* don't allow partial writes */ + if (*ppos != 0) + return 0; + + simple_write_to_buffer(data, 29, ppos, buf, count); + + if (strncmp("disable", data, strlen("disable")) == 0) { + hdev->disabled = true; + } else if (strncmp("enable", data, strlen("enable")) == 0) { + hdev->disabled = false; + } else if (strncmp("suspend", data, strlen("suspend")) == 0) { + hdev->asic_funcs->suspend(hdev); + } else if (strncmp("resume", data, strlen("resume")) == 0) { + hdev->asic_funcs->resume(hdev); + } else if (strncmp("cpu_timeout", data, strlen("cpu_timeout")) == 0) { + hdev->device_cpu_disabled = true; + } else { + dev_err(hdev->dev, + "Valid values: disable, enable, suspend, resume, cpu_timeout\n"); + count = -EINVAL; + } + + return count; +} + +static ssize_t hl_clk_gate_read(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + char tmp_buf[200]; + ssize_t rc; + + if (*ppos) + return 0; + + sprintf(tmp_buf, "%d\n", hdev->clock_gating); + rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, + strlen(tmp_buf) + 1); + + return rc; +} + +static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u32 value; + ssize_t rc; + + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, + "Can't change clock gating during reset\n"); + return 0; + } + + rc = kstrtouint_from_user(buf, count, 10, &value); + if (rc) + return rc; + + if (value) { + hdev->clock_gating = 1; + if (hdev->asic_funcs->enable_clock_gating) + hdev->asic_funcs->enable_clock_gating(hdev); + } else { + if (hdev->asic_funcs->disable_clock_gating) + hdev->asic_funcs->disable_clock_gating(hdev); + hdev->clock_gating = 0; + } + + return count; +} + +static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + char tmp_buf[200]; + ssize_t rc; + + if (*ppos) + return 0; + + sprintf(tmp_buf, "%d\n", hdev->stop_on_err); + rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, + strlen(tmp_buf) + 1); + + return rc; +} + +static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u32 value; + ssize_t rc; + + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, + "Can't change stop on error during reset\n"); + return 0; + } + + rc = kstrtouint_from_user(buf, count, 10, &value); + if (rc) + return rc; + + hdev->stop_on_err = value ? 1 : 0; + + hl_device_reset(hdev, false, false); + + return count; +} + +static const struct file_operations hl_data32b_fops = { + .owner = THIS_MODULE, + .read = hl_data_read32, + .write = hl_data_write32 +}; + +static const struct file_operations hl_data64b_fops = { + .owner = THIS_MODULE, + .read = hl_data_read64, + .write = hl_data_write64 +}; + +static const struct file_operations hl_i2c_data_fops = { + .owner = THIS_MODULE, + .read = hl_i2c_data_read, + .write = hl_i2c_data_write +}; + +static const struct file_operations hl_power_fops = { + .owner = THIS_MODULE, + .read = hl_get_power_state, + .write = hl_set_power_state +}; + +static const struct file_operations hl_led0_fops = { + .owner = THIS_MODULE, + .write = hl_led0_write +}; + +static const struct file_operations hl_led1_fops = { + .owner = THIS_MODULE, + .write = hl_led1_write +}; + +static const struct file_operations hl_led2_fops = { + .owner = THIS_MODULE, + .write = hl_led2_write +}; + +static const struct file_operations hl_device_fops = { + .owner = THIS_MODULE, + .read = hl_device_read, + .write = hl_device_write +}; + +static const struct file_operations hl_clk_gate_fops = { + .owner = THIS_MODULE, + .read = hl_clk_gate_read, + .write = hl_clk_gate_write +}; + +static const struct file_operations hl_stop_on_err_fops = { + .owner = THIS_MODULE, + .read = hl_stop_on_err_read, + .write = hl_stop_on_err_write +}; + +static const struct hl_info_list hl_debugfs_list[] = { + {"command_buffers", command_buffers_show, NULL}, + {"command_submission", command_submission_show, NULL}, + {"command_submission_jobs", command_submission_jobs_show, NULL}, + {"userptr", userptr_show, NULL}, + {"vm", vm_show, NULL}, + {"mmu", mmu_show, mmu_asid_va_write}, + {"engines", engines_show, NULL} +}; + +static int hl_debugfs_open(struct inode *inode, struct file *file) +{ + struct hl_debugfs_entry *node = inode->i_private; + + return single_open(file, node->info_ent->show, node); +} + +static ssize_t hl_debugfs_write(struct file *file, const char __user *buf, + size_t count, loff_t *f_pos) +{ + struct hl_debugfs_entry *node = file->f_inode->i_private; + + if (node->info_ent->write) + return node->info_ent->write(file, buf, count, f_pos); + else + return -EINVAL; + +} + +static const struct file_operations hl_debugfs_fops = { + .owner = THIS_MODULE, + .open = hl_debugfs_open, + .read = seq_read, + .write = hl_debugfs_write, + .llseek = seq_lseek, + .release = single_release, +}; + +void hl_debugfs_add_device(struct hl_device *hdev) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + int count = ARRAY_SIZE(hl_debugfs_list); + struct hl_debugfs_entry *entry; + struct dentry *ent; + int i; + + dev_entry->hdev = hdev; + dev_entry->entry_arr = kmalloc_array(count, + sizeof(struct hl_debugfs_entry), + GFP_KERNEL); + if (!dev_entry->entry_arr) + return; + + INIT_LIST_HEAD(&dev_entry->file_list); + INIT_LIST_HEAD(&dev_entry->cb_list); + INIT_LIST_HEAD(&dev_entry->cs_list); + INIT_LIST_HEAD(&dev_entry->cs_job_list); + INIT_LIST_HEAD(&dev_entry->userptr_list); + INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list); + mutex_init(&dev_entry->file_mutex); + spin_lock_init(&dev_entry->cb_spinlock); + spin_lock_init(&dev_entry->cs_spinlock); + spin_lock_init(&dev_entry->cs_job_spinlock); + spin_lock_init(&dev_entry->userptr_spinlock); + spin_lock_init(&dev_entry->ctx_mem_hash_spinlock); + + dev_entry->root = debugfs_create_dir(dev_name(hdev->dev), + hl_debug_root); + + debugfs_create_x64("addr", + 0644, + dev_entry->root, + &dev_entry->addr); + + debugfs_create_file("data32", + 0644, + dev_entry->root, + dev_entry, + &hl_data32b_fops); + + debugfs_create_file("data64", + 0644, + dev_entry->root, + dev_entry, + &hl_data64b_fops); + + debugfs_create_file("set_power_state", + 0200, + dev_entry->root, + dev_entry, + &hl_power_fops); + + debugfs_create_u8("i2c_bus", + 0644, + dev_entry->root, + &dev_entry->i2c_bus); + + debugfs_create_u8("i2c_addr", + 0644, + dev_entry->root, + &dev_entry->i2c_addr); + + debugfs_create_u8("i2c_reg", + 0644, + dev_entry->root, + &dev_entry->i2c_reg); + + debugfs_create_file("i2c_data", + 0644, + dev_entry->root, + dev_entry, + &hl_i2c_data_fops); + + debugfs_create_file("led0", + 0200, + dev_entry->root, + dev_entry, + &hl_led0_fops); + + debugfs_create_file("led1", + 0200, + dev_entry->root, + dev_entry, + &hl_led1_fops); + + debugfs_create_file("led2", + 0200, + dev_entry->root, + dev_entry, + &hl_led2_fops); + + debugfs_create_file("device", + 0200, + dev_entry->root, + dev_entry, + &hl_device_fops); + + debugfs_create_file("clk_gate", + 0200, + dev_entry->root, + dev_entry, + &hl_clk_gate_fops); + + debugfs_create_file("stop_on_err", + 0644, + dev_entry->root, + dev_entry, + &hl_stop_on_err_fops); + + for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { + + ent = debugfs_create_file(hl_debugfs_list[i].name, + 0444, + dev_entry->root, + entry, + &hl_debugfs_fops); + entry->dent = ent; + entry->info_ent = &hl_debugfs_list[i]; + entry->dev_entry = dev_entry; + } +} + +void hl_debugfs_remove_device(struct hl_device *hdev) +{ + struct hl_dbg_device_entry *entry = &hdev->hl_debugfs; + + debugfs_remove_recursive(entry->root); + + mutex_destroy(&entry->file_mutex); + kfree(entry->entry_arr); +} + +void hl_debugfs_add_file(struct hl_fpriv *hpriv) +{ + struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs; + + mutex_lock(&dev_entry->file_mutex); + list_add(&hpriv->debugfs_list, &dev_entry->file_list); + mutex_unlock(&dev_entry->file_mutex); +} + +void hl_debugfs_remove_file(struct hl_fpriv *hpriv) +{ + struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs; + + mutex_lock(&dev_entry->file_mutex); + list_del(&hpriv->debugfs_list); + mutex_unlock(&dev_entry->file_mutex); +} + +void hl_debugfs_add_cb(struct hl_cb *cb) +{ + struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs; + + spin_lock(&dev_entry->cb_spinlock); + list_add(&cb->debugfs_list, &dev_entry->cb_list); + spin_unlock(&dev_entry->cb_spinlock); +} + +void hl_debugfs_remove_cb(struct hl_cb *cb) +{ + struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs; + + spin_lock(&dev_entry->cb_spinlock); + list_del(&cb->debugfs_list); + spin_unlock(&dev_entry->cb_spinlock); +} + +void hl_debugfs_add_cs(struct hl_cs *cs) +{ + struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs; + + spin_lock(&dev_entry->cs_spinlock); + list_add(&cs->debugfs_list, &dev_entry->cs_list); + spin_unlock(&dev_entry->cs_spinlock); +} + +void hl_debugfs_remove_cs(struct hl_cs *cs) +{ + struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs; + + spin_lock(&dev_entry->cs_spinlock); + list_del(&cs->debugfs_list); + spin_unlock(&dev_entry->cs_spinlock); +} + +void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + + spin_lock(&dev_entry->cs_job_spinlock); + list_add(&job->debugfs_list, &dev_entry->cs_job_list); + spin_unlock(&dev_entry->cs_job_spinlock); +} + +void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + + spin_lock(&dev_entry->cs_job_spinlock); + list_del(&job->debugfs_list); + spin_unlock(&dev_entry->cs_job_spinlock); +} + +void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + + spin_lock(&dev_entry->userptr_spinlock); + list_add(&userptr->debugfs_list, &dev_entry->userptr_list); + spin_unlock(&dev_entry->userptr_spinlock); +} + +void hl_debugfs_remove_userptr(struct hl_device *hdev, + struct hl_userptr *userptr) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + + spin_lock(&dev_entry->userptr_spinlock); + list_del(&userptr->debugfs_list); + spin_unlock(&dev_entry->userptr_spinlock); +} + +void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + + spin_lock(&dev_entry->ctx_mem_hash_spinlock); + list_add(&ctx->debugfs_list, &dev_entry->ctx_mem_hash_list); + spin_unlock(&dev_entry->ctx_mem_hash_spinlock); +} + +void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + + spin_lock(&dev_entry->ctx_mem_hash_spinlock); + list_del(&ctx->debugfs_list); + spin_unlock(&dev_entry->ctx_mem_hash_spinlock); +} + +void __init hl_debugfs_init(void) +{ + hl_debug_root = debugfs_create_dir("habanalabs", NULL); +} + +void hl_debugfs_fini(void) +{ + debugfs_remove_recursive(hl_debug_root); +} diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c new file mode 100644 index 000000000000..84800efec10d --- /dev/null +++ b/drivers/misc/habanalabs/common/device.c @@ -0,0 +1,1506 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#define pr_fmt(fmt) "habanalabs: " fmt + +#include "habanalabs.h" + +#include +#include +#include +#include + +#define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10) + +bool hl_device_disabled_or_in_reset(struct hl_device *hdev) +{ + if ((hdev->disabled) || (atomic_read(&hdev->in_reset))) + return true; + else + return false; +} + +enum hl_device_status hl_device_status(struct hl_device *hdev) +{ + enum hl_device_status status; + + if (hdev->disabled) + status = HL_DEVICE_STATUS_MALFUNCTION; + else if (atomic_read(&hdev->in_reset)) + status = HL_DEVICE_STATUS_IN_RESET; + else + status = HL_DEVICE_STATUS_OPERATIONAL; + + return status; +} + +static void hpriv_release(struct kref *ref) +{ + struct hl_fpriv *hpriv; + struct hl_device *hdev; + + hpriv = container_of(ref, struct hl_fpriv, refcount); + + hdev = hpriv->hdev; + + put_pid(hpriv->taskpid); + + hl_debugfs_remove_file(hpriv); + + mutex_destroy(&hpriv->restore_phase_mutex); + + mutex_lock(&hdev->fpriv_list_lock); + list_del(&hpriv->dev_node); + hdev->compute_ctx = NULL; + mutex_unlock(&hdev->fpriv_list_lock); + + kfree(hpriv); +} + +void hl_hpriv_get(struct hl_fpriv *hpriv) +{ + kref_get(&hpriv->refcount); +} + +void hl_hpriv_put(struct hl_fpriv *hpriv) +{ + kref_put(&hpriv->refcount, hpriv_release); +} + +/* + * hl_device_release - release function for habanalabs device + * + * @inode: pointer to inode structure + * @filp: pointer to file structure + * + * Called when process closes an habanalabs device + */ +static int hl_device_release(struct inode *inode, struct file *filp) +{ + struct hl_fpriv *hpriv = filp->private_data; + + hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); + hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); + + filp->private_data = NULL; + + hl_hpriv_put(hpriv); + + return 0; +} + +static int hl_device_release_ctrl(struct inode *inode, struct file *filp) +{ + struct hl_fpriv *hpriv = filp->private_data; + struct hl_device *hdev; + + filp->private_data = NULL; + + hdev = hpriv->hdev; + + mutex_lock(&hdev->fpriv_list_lock); + list_del(&hpriv->dev_node); + mutex_unlock(&hdev->fpriv_list_lock); + + kfree(hpriv); + + return 0; +} + +/* + * hl_mmap - mmap function for habanalabs device + * + * @*filp: pointer to file structure + * @*vma: pointer to vm_area_struct of the process + * + * Called when process does an mmap on habanalabs device. Call the device's mmap + * function at the end of the common code. + */ +static int hl_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct hl_fpriv *hpriv = filp->private_data; + + if ((vma->vm_pgoff & HL_MMAP_CB_MASK) == HL_MMAP_CB_MASK) { + vma->vm_pgoff ^= HL_MMAP_CB_MASK; + return hl_cb_mmap(hpriv, vma); + } + + return -EINVAL; +} + +static const struct file_operations hl_ops = { + .owner = THIS_MODULE, + .open = hl_device_open, + .release = hl_device_release, + .mmap = hl_mmap, + .unlocked_ioctl = hl_ioctl, + .compat_ioctl = hl_ioctl +}; + +static const struct file_operations hl_ctrl_ops = { + .owner = THIS_MODULE, + .open = hl_device_open_ctrl, + .release = hl_device_release_ctrl, + .unlocked_ioctl = hl_ioctl_control, + .compat_ioctl = hl_ioctl_control +}; + +static void device_release_func(struct device *dev) +{ + kfree(dev); +} + +/* + * device_init_cdev - Initialize cdev and device for habanalabs device + * + * @hdev: pointer to habanalabs device structure + * @hclass: pointer to the class object of the device + * @minor: minor number of the specific device + * @fpos: file operations to install for this device + * @name: name of the device as it will appear in the filesystem + * @cdev: pointer to the char device object that will be initialized + * @dev: pointer to the device object that will be initialized + * + * Initialize a cdev and a Linux device for habanalabs's device. + */ +static int device_init_cdev(struct hl_device *hdev, struct class *hclass, + int minor, const struct file_operations *fops, + char *name, struct cdev *cdev, + struct device **dev) +{ + cdev_init(cdev, fops); + cdev->owner = THIS_MODULE; + + *dev = kzalloc(sizeof(**dev), GFP_KERNEL); + if (!*dev) + return -ENOMEM; + + device_initialize(*dev); + (*dev)->devt = MKDEV(hdev->major, minor); + (*dev)->class = hclass; + (*dev)->release = device_release_func; + dev_set_drvdata(*dev, hdev); + dev_set_name(*dev, "%s", name); + + return 0; +} + +static int device_cdev_sysfs_add(struct hl_device *hdev) +{ + int rc; + + rc = cdev_device_add(&hdev->cdev, hdev->dev); + if (rc) { + dev_err(hdev->dev, + "failed to add a char device to the system\n"); + return rc; + } + + rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl); + if (rc) { + dev_err(hdev->dev, + "failed to add a control char device to the system\n"); + goto delete_cdev_device; + } + + /* hl_sysfs_init() must be done after adding the device to the system */ + rc = hl_sysfs_init(hdev); + if (rc) { + dev_err(hdev->dev, "failed to initialize sysfs\n"); + goto delete_ctrl_cdev_device; + } + + hdev->cdev_sysfs_created = true; + + return 0; + +delete_ctrl_cdev_device: + cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); +delete_cdev_device: + cdev_device_del(&hdev->cdev, hdev->dev); + return rc; +} + +static void device_cdev_sysfs_del(struct hl_device *hdev) +{ + /* device_release() won't be called so must free devices explicitly */ + if (!hdev->cdev_sysfs_created) { + kfree(hdev->dev_ctrl); + kfree(hdev->dev); + return; + } + + hl_sysfs_fini(hdev); + cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); + cdev_device_del(&hdev->cdev, hdev->dev); +} + +/* + * device_early_init - do some early initialization for the habanalabs device + * + * @hdev: pointer to habanalabs device structure + * + * Install the relevant function pointers and call the early_init function, + * if such a function exists + */ +static int device_early_init(struct hl_device *hdev) +{ + int i, rc; + char workq_name[32]; + + switch (hdev->asic_type) { + case ASIC_GOYA: + goya_set_asic_funcs(hdev); + strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name)); + break; + case ASIC_GAUDI: + gaudi_set_asic_funcs(hdev); + sprintf(hdev->asic_name, "GAUDI"); + break; + default: + dev_err(hdev->dev, "Unrecognized ASIC type %d\n", + hdev->asic_type); + return -EINVAL; + } + + rc = hdev->asic_funcs->early_init(hdev); + if (rc) + return rc; + + rc = hl_asid_init(hdev); + if (rc) + goto early_fini; + + if (hdev->asic_prop.completion_queues_count) { + hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count, + sizeof(*hdev->cq_wq), + GFP_ATOMIC); + if (!hdev->cq_wq) { + rc = -ENOMEM; + goto asid_fini; + } + } + + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) { + snprintf(workq_name, 32, "hl-free-jobs-%u", i); + hdev->cq_wq[i] = create_singlethread_workqueue(workq_name); + if (hdev->cq_wq == NULL) { + dev_err(hdev->dev, "Failed to allocate CQ workqueue\n"); + rc = -ENOMEM; + goto free_cq_wq; + } + } + + hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0); + if (hdev->eq_wq == NULL) { + dev_err(hdev->dev, "Failed to allocate EQ workqueue\n"); + rc = -ENOMEM; + goto free_cq_wq; + } + + hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), + GFP_KERNEL); + if (!hdev->hl_chip_info) { + rc = -ENOMEM; + goto free_eq_wq; + } + + hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE, + sizeof(struct hl_device_idle_busy_ts), + (GFP_KERNEL | __GFP_ZERO)); + if (!hdev->idle_busy_ts_arr) { + rc = -ENOMEM; + goto free_chip_info; + } + + hl_cb_mgr_init(&hdev->kernel_cb_mgr); + + mutex_init(&hdev->send_cpu_message_lock); + mutex_init(&hdev->debug_lock); + mutex_init(&hdev->mmu_cache_lock); + INIT_LIST_HEAD(&hdev->hw_queues_mirror_list); + spin_lock_init(&hdev->hw_queues_mirror_lock); + INIT_LIST_HEAD(&hdev->fpriv_list); + mutex_init(&hdev->fpriv_list_lock); + atomic_set(&hdev->in_reset, 0); + + return 0; + +free_chip_info: + kfree(hdev->hl_chip_info); +free_eq_wq: + destroy_workqueue(hdev->eq_wq); +free_cq_wq: + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + if (hdev->cq_wq[i]) + destroy_workqueue(hdev->cq_wq[i]); + kfree(hdev->cq_wq); +asid_fini: + hl_asid_fini(hdev); +early_fini: + if (hdev->asic_funcs->early_fini) + hdev->asic_funcs->early_fini(hdev); + + return rc; +} + +/* + * device_early_fini - finalize all that was done in device_early_init + * + * @hdev: pointer to habanalabs device structure + * + */ +static void device_early_fini(struct hl_device *hdev) +{ + int i; + + mutex_destroy(&hdev->mmu_cache_lock); + mutex_destroy(&hdev->debug_lock); + mutex_destroy(&hdev->send_cpu_message_lock); + + mutex_destroy(&hdev->fpriv_list_lock); + + hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); + + kfree(hdev->idle_busy_ts_arr); + kfree(hdev->hl_chip_info); + + destroy_workqueue(hdev->eq_wq); + + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + destroy_workqueue(hdev->cq_wq[i]); + kfree(hdev->cq_wq); + + hl_asid_fini(hdev); + + if (hdev->asic_funcs->early_fini) + hdev->asic_funcs->early_fini(hdev); +} + +static void set_freq_to_low_job(struct work_struct *work) +{ + struct hl_device *hdev = container_of(work, struct hl_device, + work_freq.work); + + mutex_lock(&hdev->fpriv_list_lock); + + if (!hdev->compute_ctx) + hl_device_set_frequency(hdev, PLL_LOW); + + mutex_unlock(&hdev->fpriv_list_lock); + + schedule_delayed_work(&hdev->work_freq, + usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC)); +} + +static void hl_device_heartbeat(struct work_struct *work) +{ + struct hl_device *hdev = container_of(work, struct hl_device, + work_heartbeat.work); + + if (hl_device_disabled_or_in_reset(hdev)) + goto reschedule; + + if (!hdev->asic_funcs->send_heartbeat(hdev)) + goto reschedule; + + dev_err(hdev->dev, "Device heartbeat failed!\n"); + hl_device_reset(hdev, true, false); + + return; + +reschedule: + schedule_delayed_work(&hdev->work_heartbeat, + usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); +} + +/* + * device_late_init - do late stuff initialization for the habanalabs device + * + * @hdev: pointer to habanalabs device structure + * + * Do stuff that either needs the device H/W queues to be active or needs + * to happen after all the rest of the initialization is finished + */ +static int device_late_init(struct hl_device *hdev) +{ + int rc; + + if (hdev->asic_funcs->late_init) { + rc = hdev->asic_funcs->late_init(hdev); + if (rc) { + dev_err(hdev->dev, + "failed late initialization for the H/W\n"); + return rc; + } + } + + hdev->high_pll = hdev->asic_prop.high_pll; + + /* force setting to low frequency */ + hdev->curr_pll_profile = PLL_LOW; + + if (hdev->pm_mng_profile == PM_AUTO) + hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW); + else + hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST); + + INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job); + schedule_delayed_work(&hdev->work_freq, + usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC)); + + if (hdev->heartbeat) { + INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat); + schedule_delayed_work(&hdev->work_heartbeat, + usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); + } + + hdev->late_init_done = true; + + return 0; +} + +/* + * device_late_fini - finalize all that was done in device_late_init + * + * @hdev: pointer to habanalabs device structure + * + */ +static void device_late_fini(struct hl_device *hdev) +{ + if (!hdev->late_init_done) + return; + + cancel_delayed_work_sync(&hdev->work_freq); + if (hdev->heartbeat) + cancel_delayed_work_sync(&hdev->work_heartbeat); + + if (hdev->asic_funcs->late_fini) + hdev->asic_funcs->late_fini(hdev); + + hdev->late_init_done = false; +} + +uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms) +{ + struct hl_device_idle_busy_ts *ts; + ktime_t zero_ktime, curr = ktime_get(); + u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx; + s64 period_us, last_start_us, last_end_us, last_busy_time_us, + total_busy_time_us = 0, total_busy_time_ms; + + zero_ktime = ktime_set(0, 0); + period_us = period_ms * USEC_PER_MSEC; + ts = &hdev->idle_busy_ts_arr[last_index]; + + /* check case that device is currently in idle */ + if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) && + !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) { + + last_index--; + /* Handle case idle_busy_ts_idx was 0 */ + if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE) + last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1; + + ts = &hdev->idle_busy_ts_arr[last_index]; + } + + while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) { + /* Check if we are in last sample case. i.e. if the sample + * begun before the sampling period. This could be a real + * sample or 0 so need to handle both cases + */ + last_start_us = ktime_to_us( + ktime_sub(curr, ts->idle_to_busy_ts)); + + if (last_start_us > period_us) { + + /* First check two cases: + * 1. If the device is currently busy + * 2. If the device was idle during the whole sampling + * period + */ + + if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) { + /* Check if the device is currently busy */ + if (ktime_compare(ts->idle_to_busy_ts, + zero_ktime)) + return 100; + + /* We either didn't have any activity or we + * reached an entry which is 0. Either way, + * exit and return what was accumulated so far + */ + break; + } + + /* If sample has finished, check it is relevant */ + last_end_us = ktime_to_us( + ktime_sub(curr, ts->busy_to_idle_ts)); + + if (last_end_us > period_us) + break; + + /* It is relevant so add it but with adjustment */ + last_busy_time_us = ktime_to_us( + ktime_sub(ts->busy_to_idle_ts, + ts->idle_to_busy_ts)); + total_busy_time_us += last_busy_time_us - + (last_start_us - period_us); + break; + } + + /* Check if the sample is finished or still open */ + if (ktime_compare(ts->busy_to_idle_ts, zero_ktime)) + last_busy_time_us = ktime_to_us( + ktime_sub(ts->busy_to_idle_ts, + ts->idle_to_busy_ts)); + else + last_busy_time_us = ktime_to_us( + ktime_sub(curr, ts->idle_to_busy_ts)); + + total_busy_time_us += last_busy_time_us; + + last_index--; + /* Handle case idle_busy_ts_idx was 0 */ + if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE) + last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1; + + ts = &hdev->idle_busy_ts_arr[last_index]; + + overlap_cnt++; + } + + total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us, + USEC_PER_MSEC); + + return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms); +} + +/* + * hl_device_set_frequency - set the frequency of the device + * + * @hdev: pointer to habanalabs device structure + * @freq: the new frequency value + * + * Change the frequency if needed. This function has no protection against + * concurrency, therefore it is assumed that the calling function has protected + * itself against the case of calling this function from multiple threads with + * different values + * + * Returns 0 if no change was done, otherwise returns 1 + */ +int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq) +{ + if ((hdev->pm_mng_profile == PM_MANUAL) || + (hdev->curr_pll_profile == freq)) + return 0; + + dev_dbg(hdev->dev, "Changing device frequency to %s\n", + freq == PLL_HIGH ? "high" : "low"); + + hdev->asic_funcs->set_pll_profile(hdev, freq); + + hdev->curr_pll_profile = freq; + + return 1; +} + +int hl_device_set_debug_mode(struct hl_device *hdev, bool enable) +{ + int rc = 0; + + mutex_lock(&hdev->debug_lock); + + if (!enable) { + if (!hdev->in_debug) { + dev_err(hdev->dev, + "Failed to disable debug mode because device was not in debug mode\n"); + rc = -EFAULT; + goto out; + } + + if (!hdev->hard_reset_pending) + hdev->asic_funcs->halt_coresight(hdev); + + hdev->in_debug = 0; + + if (!hdev->hard_reset_pending) + hdev->asic_funcs->enable_clock_gating(hdev); + + goto out; + } + + if (hdev->in_debug) { + dev_err(hdev->dev, + "Failed to enable debug mode because device is already in debug mode\n"); + rc = -EFAULT; + goto out; + } + + hdev->asic_funcs->disable_clock_gating(hdev); + hdev->in_debug = 1; + +out: + mutex_unlock(&hdev->debug_lock); + + return rc; +} + +/* + * hl_device_suspend - initiate device suspend + * + * @hdev: pointer to habanalabs device structure + * + * Puts the hw in the suspend state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver suspend. + */ +int hl_device_suspend(struct hl_device *hdev) +{ + int rc; + + pci_save_state(hdev->pdev); + + /* Block future CS/VM/JOB completion operations */ + rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); + if (rc) { + dev_err(hdev->dev, "Can't suspend while in reset\n"); + return -EIO; + } + + /* This blocks all other stuff that is not blocked by in_reset */ + hdev->disabled = true; + + /* + * Flush anyone that is inside the critical section of enqueue + * jobs to the H/W + */ + hdev->asic_funcs->hw_queues_lock(hdev); + hdev->asic_funcs->hw_queues_unlock(hdev); + + /* Flush processes that are sending message to CPU */ + mutex_lock(&hdev->send_cpu_message_lock); + mutex_unlock(&hdev->send_cpu_message_lock); + + rc = hdev->asic_funcs->suspend(hdev); + if (rc) + dev_err(hdev->dev, + "Failed to disable PCI access of device CPU\n"); + + /* Shut down the device */ + pci_disable_device(hdev->pdev); + pci_set_power_state(hdev->pdev, PCI_D3hot); + + return 0; +} + +/* + * hl_device_resume - initiate device resume + * + * @hdev: pointer to habanalabs device structure + * + * Bring the hw back to operating state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver resume. + */ +int hl_device_resume(struct hl_device *hdev) +{ + int rc; + + pci_set_power_state(hdev->pdev, PCI_D0); + pci_restore_state(hdev->pdev); + rc = pci_enable_device_mem(hdev->pdev); + if (rc) { + dev_err(hdev->dev, + "Failed to enable PCI device in resume\n"); + return rc; + } + + pci_set_master(hdev->pdev); + + rc = hdev->asic_funcs->resume(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to resume device after suspend\n"); + goto disable_device; + } + + + hdev->disabled = false; + atomic_set(&hdev->in_reset, 0); + + rc = hl_device_reset(hdev, true, false); + if (rc) { + dev_err(hdev->dev, "Failed to reset device during resume\n"); + goto disable_device; + } + + return 0; + +disable_device: + pci_clear_master(hdev->pdev); + pci_disable_device(hdev->pdev); + + return rc; +} + +static int device_kill_open_processes(struct hl_device *hdev) +{ + u16 pending_total, pending_cnt; + struct hl_fpriv *hpriv; + struct task_struct *task = NULL; + + if (hdev->pldm) + pending_total = HL_PLDM_PENDING_RESET_PER_SEC; + else + pending_total = HL_PENDING_RESET_PER_SEC; + + /* Giving time for user to close FD, and for processes that are inside + * hl_device_open to finish + */ + if (!list_empty(&hdev->fpriv_list)) + ssleep(1); + + mutex_lock(&hdev->fpriv_list_lock); + + /* This section must be protected because we are dereferencing + * pointers that are freed if the process exits + */ + list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) { + task = get_pid_task(hpriv->taskpid, PIDTYPE_PID); + if (task) { + dev_info(hdev->dev, "Killing user process pid=%d\n", + task_pid_nr(task)); + send_sig(SIGKILL, task, 1); + usleep_range(1000, 10000); + + put_task_struct(task); + } + } + + mutex_unlock(&hdev->fpriv_list_lock); + + /* We killed the open users, but because the driver cleans up after the + * user contexts are closed (e.g. mmu mappings), we need to wait again + * to make sure the cleaning phase is finished before continuing with + * the reset + */ + + pending_cnt = pending_total; + + while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) { + dev_info(hdev->dev, + "Waiting for all unmap operations to finish before hard reset\n"); + + pending_cnt--; + + ssleep(1); + } + + return list_empty(&hdev->fpriv_list) ? 0 : -EBUSY; +} + +static void device_hard_reset_pending(struct work_struct *work) +{ + struct hl_device_reset_work *device_reset_work = + container_of(work, struct hl_device_reset_work, reset_work); + struct hl_device *hdev = device_reset_work->hdev; + + hl_device_reset(hdev, true, true); + + kfree(device_reset_work); +} + +/* + * hl_device_reset - reset the device + * + * @hdev: pointer to habanalabs device structure + * @hard_reset: should we do hard reset to all engines or just reset the + * compute/dma engines + * @from_hard_reset_thread: is the caller the hard-reset thread + * + * Block future CS and wait for pending CS to be enqueued + * Call ASIC H/W fini + * Flush all completions + * Re-initialize all internal data structures + * Call ASIC H/W init, late_init + * Test queues + * Enable device + * + * Returns 0 for success or an error on failure. + */ +int hl_device_reset(struct hl_device *hdev, bool hard_reset, + bool from_hard_reset_thread) +{ + int i, rc; + + if (!hdev->init_done) { + dev_err(hdev->dev, + "Can't reset before initialization is done\n"); + return 0; + } + + if ((!hard_reset) && (!hdev->supports_soft_reset)) { + dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n"); + hard_reset = true; + } + + /* + * Prevent concurrency in this function - only one reset should be + * done at any given time. Only need to perform this if we didn't + * get from the dedicated hard reset thread + */ + if (!from_hard_reset_thread) { + /* Block future CS/VM/JOB completion operations */ + rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); + if (rc) + return 0; + + if (hard_reset) { + /* Disable PCI access from device F/W so he won't send + * us additional interrupts. We disable MSI/MSI-X at + * the halt_engines function and we can't have the F/W + * sending us interrupts after that. We need to disable + * the access here because if the device is marked + * disable, the message won't be send. Also, in case + * of heartbeat, the device CPU is marked as disable + * so this message won't be sent + */ + if (hl_fw_send_pci_access_msg(hdev, + ARMCP_PACKET_DISABLE_PCI_ACCESS)) + dev_warn(hdev->dev, + "Failed to disable PCI access by F/W\n"); + } + + /* This also blocks future CS/VM/JOB completion operations */ + hdev->disabled = true; + + /* Flush anyone that is inside the critical section of enqueue + * jobs to the H/W + */ + hdev->asic_funcs->hw_queues_lock(hdev); + hdev->asic_funcs->hw_queues_unlock(hdev); + + /* Flush anyone that is inside device open */ + mutex_lock(&hdev->fpriv_list_lock); + mutex_unlock(&hdev->fpriv_list_lock); + + dev_err(hdev->dev, "Going to RESET device!\n"); + } + +again: + if ((hard_reset) && (!from_hard_reset_thread)) { + struct hl_device_reset_work *device_reset_work; + + hdev->hard_reset_pending = true; + + device_reset_work = kzalloc(sizeof(*device_reset_work), + GFP_ATOMIC); + if (!device_reset_work) { + rc = -ENOMEM; + goto out_err; + } + + /* + * Because the reset function can't run from interrupt or + * from heartbeat work, we need to call the reset function + * from a dedicated work + */ + INIT_WORK(&device_reset_work->reset_work, + device_hard_reset_pending); + device_reset_work->hdev = hdev; + schedule_work(&device_reset_work->reset_work); + + return 0; + } + + if (hard_reset) { + device_late_fini(hdev); + + /* + * Now that the heartbeat thread is closed, flush processes + * which are sending messages to CPU + */ + mutex_lock(&hdev->send_cpu_message_lock); + mutex_unlock(&hdev->send_cpu_message_lock); + } + + /* + * Halt the engines and disable interrupts so we won't get any more + * completions from H/W and we won't have any accesses from the + * H/W to the host machine + */ + hdev->asic_funcs->halt_engines(hdev, hard_reset); + + /* Go over all the queues, release all CS and their jobs */ + hl_cs_rollback_all(hdev); + + if (hard_reset) { + /* Kill processes here after CS rollback. This is because the + * process can't really exit until all its CSs are done, which + * is what we do in cs rollback + */ + rc = device_kill_open_processes(hdev); + if (rc) { + dev_crit(hdev->dev, + "Failed to kill all open processes, stopping hard reset\n"); + goto out_err; + } + + /* Flush the Event queue workers to make sure no other thread is + * reading or writing to registers during the reset + */ + flush_workqueue(hdev->eq_wq); + } + + /* Release kernel context */ + if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1)) + hdev->kernel_ctx = NULL; + + /* Reset the H/W. It will be in idle state after this returns */ + hdev->asic_funcs->hw_fini(hdev, hard_reset); + + if (hard_reset) { + hl_vm_fini(hdev); + hl_mmu_fini(hdev); + hl_eq_reset(hdev, &hdev->event_queue); + } + + /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */ + hl_hw_queue_reset(hdev, hard_reset); + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + hl_cq_reset(hdev, &hdev->completion_queue[i]); + + hdev->idle_busy_ts_idx = 0; + hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0); + hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0); + + if (hdev->cs_active_cnt) + dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n", + hdev->cs_active_cnt); + + mutex_lock(&hdev->fpriv_list_lock); + + /* Make sure the context switch phase will run again */ + if (hdev->compute_ctx) { + atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1); + hdev->compute_ctx->thread_ctx_switch_wait_token = 0; + } + + mutex_unlock(&hdev->fpriv_list_lock); + + /* Finished tear-down, starting to re-initialize */ + + if (hard_reset) { + hdev->device_cpu_disabled = false; + hdev->hard_reset_pending = false; + + if (hdev->kernel_ctx) { + dev_crit(hdev->dev, + "kernel ctx was alive during hard reset, something is terribly wrong\n"); + rc = -EBUSY; + goto out_err; + } + + rc = hl_mmu_init(hdev); + if (rc) { + dev_err(hdev->dev, + "Failed to initialize MMU S/W after hard reset\n"); + goto out_err; + } + + /* Allocate the kernel context */ + hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), + GFP_KERNEL); + if (!hdev->kernel_ctx) { + rc = -ENOMEM; + goto out_err; + } + + hdev->compute_ctx = NULL; + + rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); + if (rc) { + dev_err(hdev->dev, + "failed to init kernel ctx in hard reset\n"); + kfree(hdev->kernel_ctx); + hdev->kernel_ctx = NULL; + goto out_err; + } + } + + rc = hdev->asic_funcs->hw_init(hdev); + if (rc) { + dev_err(hdev->dev, + "failed to initialize the H/W after reset\n"); + goto out_err; + } + + hdev->disabled = false; + + /* Check that the communication with the device is working */ + rc = hdev->asic_funcs->test_queues(hdev); + if (rc) { + dev_err(hdev->dev, + "Failed to detect if device is alive after reset\n"); + goto out_err; + } + + if (hard_reset) { + rc = device_late_init(hdev); + if (rc) { + dev_err(hdev->dev, + "Failed late init after hard reset\n"); + goto out_err; + } + + rc = hl_vm_init(hdev); + if (rc) { + dev_err(hdev->dev, + "Failed to init memory module after hard reset\n"); + goto out_err; + } + + hl_set_max_power(hdev, hdev->max_power); + } else { + rc = hdev->asic_funcs->soft_reset_late_init(hdev); + if (rc) { + dev_err(hdev->dev, + "Failed late init after soft reset\n"); + goto out_err; + } + } + + atomic_set(&hdev->in_reset, 0); + + if (hard_reset) + hdev->hard_reset_cnt++; + else + hdev->soft_reset_cnt++; + + dev_warn(hdev->dev, "Successfully finished resetting the device\n"); + + return 0; + +out_err: + hdev->disabled = true; + + if (hard_reset) { + dev_err(hdev->dev, + "Failed to reset! Device is NOT usable\n"); + hdev->hard_reset_cnt++; + } else { + dev_err(hdev->dev, + "Failed to do soft-reset, trying hard reset\n"); + hdev->soft_reset_cnt++; + hard_reset = true; + goto again; + } + + atomic_set(&hdev->in_reset, 0); + + return rc; +} + +/* + * hl_device_init - main initialization function for habanalabs device + * + * @hdev: pointer to habanalabs device structure + * + * Allocate an id for the device, do early initialization and then call the + * ASIC specific initialization functions. Finally, create the cdev and the + * Linux device to expose it to the user + */ +int hl_device_init(struct hl_device *hdev, struct class *hclass) +{ + int i, rc, cq_cnt, cq_ready_cnt; + char *name; + bool add_cdev_sysfs_on_err = false; + + name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2); + if (!name) { + rc = -ENOMEM; + goto out_disabled; + } + + /* Initialize cdev and device structures */ + rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name, + &hdev->cdev, &hdev->dev); + + kfree(name); + + if (rc) + goto out_disabled; + + name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2); + if (!name) { + rc = -ENOMEM; + goto free_dev; + } + + /* Initialize cdev and device structures for control device */ + rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops, + name, &hdev->cdev_ctrl, &hdev->dev_ctrl); + + kfree(name); + + if (rc) + goto free_dev; + + /* Initialize ASIC function pointers and perform early init */ + rc = device_early_init(hdev); + if (rc) + goto free_dev_ctrl; + + /* + * Start calling ASIC initialization. First S/W then H/W and finally + * late init + */ + rc = hdev->asic_funcs->sw_init(hdev); + if (rc) + goto early_fini; + + /* + * Initialize the H/W queues. Must be done before hw_init, because + * there the addresses of the kernel queue are being written to the + * registers of the device + */ + rc = hl_hw_queues_create(hdev); + if (rc) { + dev_err(hdev->dev, "failed to initialize kernel queues\n"); + goto sw_fini; + } + + cq_cnt = hdev->asic_prop.completion_queues_count; + + /* + * Initialize the completion queues. Must be done before hw_init, + * because there the addresses of the completion queues are being + * passed as arguments to request_irq + */ + if (cq_cnt) { + hdev->completion_queue = kcalloc(cq_cnt, + sizeof(*hdev->completion_queue), + GFP_KERNEL); + + if (!hdev->completion_queue) { + dev_err(hdev->dev, + "failed to allocate completion queues\n"); + rc = -ENOMEM; + goto hw_queues_destroy; + } + } + + for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) { + rc = hl_cq_init(hdev, &hdev->completion_queue[i], + hdev->asic_funcs->get_queue_id_for_cq(hdev, i)); + if (rc) { + dev_err(hdev->dev, + "failed to initialize completion queue\n"); + goto cq_fini; + } + hdev->completion_queue[i].cq_idx = i; + } + + /* + * Initialize the event queue. Must be done before hw_init, + * because there the address of the event queue is being + * passed as argument to request_irq + */ + rc = hl_eq_init(hdev, &hdev->event_queue); + if (rc) { + dev_err(hdev->dev, "failed to initialize event queue\n"); + goto cq_fini; + } + + /* MMU S/W must be initialized before kernel context is created */ + rc = hl_mmu_init(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n"); + goto eq_fini; + } + + /* Allocate the kernel context */ + hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL); + if (!hdev->kernel_ctx) { + rc = -ENOMEM; + goto mmu_fini; + } + + hdev->compute_ctx = NULL; + + rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); + if (rc) { + dev_err(hdev->dev, "failed to initialize kernel context\n"); + kfree(hdev->kernel_ctx); + goto mmu_fini; + } + + rc = hl_cb_pool_init(hdev); + if (rc) { + dev_err(hdev->dev, "failed to initialize CB pool\n"); + goto release_ctx; + } + + hl_debugfs_add_device(hdev); + + if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { + dev_info(hdev->dev, + "H/W state is dirty, must reset before initializing\n"); + hdev->asic_funcs->halt_engines(hdev, true); + hdev->asic_funcs->hw_fini(hdev, true); + } + + /* + * From this point, in case of an error, add char devices and create + * sysfs nodes as part of the error flow, to allow debugging. + */ + add_cdev_sysfs_on_err = true; + + rc = hdev->asic_funcs->hw_init(hdev); + if (rc) { + dev_err(hdev->dev, "failed to initialize the H/W\n"); + rc = 0; + goto out_disabled; + } + + hdev->disabled = false; + + /* Check that the communication with the device is working */ + rc = hdev->asic_funcs->test_queues(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to detect if device is alive\n"); + rc = 0; + goto out_disabled; + } + + rc = device_late_init(hdev); + if (rc) { + dev_err(hdev->dev, "Failed late initialization\n"); + rc = 0; + goto out_disabled; + } + + dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n", + hdev->asic_name, + hdev->asic_prop.dram_size / 1024 / 1024 / 1024); + + rc = hl_vm_init(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize memory module\n"); + rc = 0; + goto out_disabled; + } + + /* + * Expose devices and sysfs nodes to user. + * From here there is no need to add char devices and create sysfs nodes + * in case of an error. + */ + add_cdev_sysfs_on_err = false; + rc = device_cdev_sysfs_add(hdev); + if (rc) { + dev_err(hdev->dev, + "Failed to add char devices and sysfs nodes\n"); + rc = 0; + goto out_disabled; + } + + /* + * hl_hwmon_init() must be called after device_late_init(), because only + * there we get the information from the device about which + * hwmon-related sensors the device supports. + * Furthermore, it must be done after adding the device to the system. + */ + rc = hl_hwmon_init(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize hwmon\n"); + rc = 0; + goto out_disabled; + } + + dev_notice(hdev->dev, + "Successfully added device to habanalabs driver\n"); + + hdev->init_done = true; + + return 0; + +release_ctx: + if (hl_ctx_put(hdev->kernel_ctx) != 1) + dev_err(hdev->dev, + "kernel ctx is still alive on initialization failure\n"); +mmu_fini: + hl_mmu_fini(hdev); +eq_fini: + hl_eq_fini(hdev, &hdev->event_queue); +cq_fini: + for (i = 0 ; i < cq_ready_cnt ; i++) + hl_cq_fini(hdev, &hdev->completion_queue[i]); + kfree(hdev->completion_queue); +hw_queues_destroy: + hl_hw_queues_destroy(hdev); +sw_fini: + hdev->asic_funcs->sw_fini(hdev); +early_fini: + device_early_fini(hdev); +free_dev_ctrl: + kfree(hdev->dev_ctrl); +free_dev: + kfree(hdev->dev); +out_disabled: + hdev->disabled = true; + if (add_cdev_sysfs_on_err) + device_cdev_sysfs_add(hdev); + if (hdev->pdev) + dev_err(&hdev->pdev->dev, + "Failed to initialize hl%d. Device is NOT usable !\n", + hdev->id / 2); + else + pr_err("Failed to initialize hl%d. Device is NOT usable !\n", + hdev->id / 2); + + return rc; +} + +/* + * hl_device_fini - main tear-down function for habanalabs device + * + * @hdev: pointer to habanalabs device structure + * + * Destroy the device, call ASIC fini functions and release the id + */ +void hl_device_fini(struct hl_device *hdev) +{ + int i, rc; + ktime_t timeout; + + dev_info(hdev->dev, "Removing device\n"); + + /* + * This function is competing with the reset function, so try to + * take the reset atomic and if we are already in middle of reset, + * wait until reset function is finished. Reset function is designed + * to always finish. However, in Gaudi, because of all the network + * ports, the hard reset could take between 10-30 seconds + */ + + timeout = ktime_add_us(ktime_get(), + HL_HARD_RESET_MAX_TIMEOUT * 1000 * 1000); + rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); + while (rc) { + usleep_range(50, 200); + rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); + if (ktime_compare(ktime_get(), timeout) > 0) { + WARN(1, "Failed to remove device because reset function did not finish\n"); + return; + } + } + + /* Mark device as disabled */ + hdev->disabled = true; + + /* Flush anyone that is inside the critical section of enqueue + * jobs to the H/W + */ + hdev->asic_funcs->hw_queues_lock(hdev); + hdev->asic_funcs->hw_queues_unlock(hdev); + + /* Flush anyone that is inside device open */ + mutex_lock(&hdev->fpriv_list_lock); + mutex_unlock(&hdev->fpriv_list_lock); + + hdev->hard_reset_pending = true; + + hl_hwmon_fini(hdev); + + device_late_fini(hdev); + + hl_debugfs_remove_device(hdev); + + /* + * Halt the engines and disable interrupts so we won't get any more + * completions from H/W and we won't have any accesses from the + * H/W to the host machine + */ + hdev->asic_funcs->halt_engines(hdev, true); + + /* Go over all the queues, release all CS and their jobs */ + hl_cs_rollback_all(hdev); + + /* Kill processes here after CS rollback. This is because the process + * can't really exit until all its CSs are done, which is what we + * do in cs rollback + */ + rc = device_kill_open_processes(hdev); + if (rc) + dev_crit(hdev->dev, "Failed to kill all open processes\n"); + + hl_cb_pool_fini(hdev); + + /* Release kernel context */ + if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1)) + dev_err(hdev->dev, "kernel ctx is still alive\n"); + + /* Reset the H/W. It will be in idle state after this returns */ + hdev->asic_funcs->hw_fini(hdev, true); + + hl_vm_fini(hdev); + + hl_mmu_fini(hdev); + + hl_eq_fini(hdev, &hdev->event_queue); + + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + hl_cq_fini(hdev, &hdev->completion_queue[i]); + kfree(hdev->completion_queue); + + hl_hw_queues_destroy(hdev); + + /* Call ASIC S/W finalize function */ + hdev->asic_funcs->sw_fini(hdev); + + device_early_fini(hdev); + + /* Hide devices and sysfs nodes from user */ + device_cdev_sysfs_del(hdev); + + pr_info("removed device successfully\n"); +} + +/* + * MMIO register access helper functions. + */ + +/* + * hl_rreg - Read an MMIO register + * + * @hdev: pointer to habanalabs device structure + * @reg: MMIO register offset (in bytes) + * + * Returns the value of the MMIO register we are asked to read + * + */ +inline u32 hl_rreg(struct hl_device *hdev, u32 reg) +{ + return readl(hdev->rmmio + reg); +} + +/* + * hl_wreg - Write to an MMIO register + * + * @hdev: pointer to habanalabs device structure + * @reg: MMIO register offset (in bytes) + * @val: 32-bit value + * + * Writes the 32-bit value into the MMIO register + * + */ +inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val) +{ + writel(val, hdev->rmmio + reg); +} diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c new file mode 100644 index 000000000000..b2b84510b932 --- /dev/null +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -0,0 +1,589 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" +#include "include/common/hl_boot_if.h" + +#include +#include +#include +#include + +/** + * hl_fw_load_fw_to_device() - Load F/W code to device's memory. + * + * @hdev: pointer to hl_device structure. + * @fw_name: the firmware image name + * @dst: IO memory mapped address space to copy firmware to + * + * Copy fw code from firmware file to device memory. + * + * Return: 0 on success, non-zero for failure. + */ +int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, + void __iomem *dst) +{ + const struct firmware *fw; + const u64 *fw_data; + size_t fw_size; + int rc; + + rc = request_firmware(&fw, fw_name, hdev->dev); + if (rc) { + dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name); + goto out; + } + + fw_size = fw->size; + if ((fw_size % 4) != 0) { + dev_err(hdev->dev, "Illegal %s firmware size %zu\n", + fw_name, fw_size); + rc = -EINVAL; + goto out; + } + + dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size); + + fw_data = (const u64 *) fw->data; + + memcpy_toio(dst, fw_data, fw_size); + +out: + release_firmware(fw); + return rc; +} + +int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode) +{ + struct armcp_packet pkt = {}; + + pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT); + + return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, + sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL); +} + +int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, + u16 len, u32 timeout, long *result) +{ + struct armcp_packet *pkt; + dma_addr_t pkt_dma_addr; + u32 tmp; + int rc = 0; + + pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len, + &pkt_dma_addr); + if (!pkt) { + dev_err(hdev->dev, + "Failed to allocate DMA memory for packet to CPU\n"); + return -ENOMEM; + } + + memcpy(pkt, msg, len); + + mutex_lock(&hdev->send_cpu_message_lock); + + if (hdev->disabled) + goto out; + + if (hdev->device_cpu_disabled) { + rc = -EIO; + goto out; + } + + rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr); + if (rc) { + dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc); + goto out; + } + + rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp, + (tmp == ARMCP_PACKET_FENCE_VAL), 1000, + timeout, true); + + hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); + + if (rc == -ETIMEDOUT) { + dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp); + hdev->device_cpu_disabled = true; + goto out; + } + + tmp = le32_to_cpu(pkt->ctl); + + rc = (tmp & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT; + if (rc) { + dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n", + rc, + (tmp & ARMCP_PKT_CTL_OPCODE_MASK) + >> ARMCP_PKT_CTL_OPCODE_SHIFT); + rc = -EIO; + } else if (result) { + *result = (long) le64_to_cpu(pkt->result); + } + +out: + mutex_unlock(&hdev->send_cpu_message_lock); + + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt); + + return rc; +} + +int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type) +{ + struct armcp_packet pkt; + long result; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.value = cpu_to_le64(event_type); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_DEVICE_TIMEOUT_USEC, &result); + + if (rc) + dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); + + return rc; +} + +int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, + size_t irq_arr_size) +{ + struct armcp_unmask_irq_arr_packet *pkt; + size_t total_pkt_size; + long result; + int rc; + + total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) + + irq_arr_size; + + /* data should be aligned to 8 bytes in order to ArmCP to copy it */ + total_pkt_size = (total_pkt_size + 0x7) & ~0x7; + + /* total_pkt_size is casted to u16 later on */ + if (total_pkt_size > USHRT_MAX) { + dev_err(hdev->dev, "too many elements in IRQ array\n"); + return -EINVAL; + } + + pkt = kzalloc(total_pkt_size, GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0])); + memcpy(&pkt->irqs, irq_arr, irq_arr_size); + + pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << + ARMCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, + total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result); + + if (rc) + dev_err(hdev->dev, "failed to unmask IRQ array\n"); + + kfree(pkt); + + return rc; +} + +int hl_fw_test_cpu_queue(struct hl_device *hdev) +{ + struct armcp_packet test_pkt = {}; + long result; + int rc; + + test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST << + ARMCP_PKT_CTL_OPCODE_SHIFT); + test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt, + sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result); + + if (!rc) { + if (result != ARMCP_PACKET_FENCE_VAL) + dev_err(hdev->dev, + "CPU queue test failed (0x%08lX)\n", result); + } else { + dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc); + } + + return rc; +} + +void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle) +{ + u64 kernel_addr; + + kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size); + + *dma_handle = hdev->cpu_accessible_dma_address + + (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem); + + return (void *) (uintptr_t) kernel_addr; +} + +void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, + void *vaddr) +{ + gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr, + size); +} + +int hl_fw_send_heartbeat(struct hl_device *hdev) +{ + struct armcp_packet hb_pkt = {}; + long result; + int rc; + + hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST << + ARMCP_PKT_CTL_OPCODE_SHIFT); + hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, + sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result); + + if ((rc) || (result != ARMCP_PACKET_FENCE_VAL)) + rc = -EIO; + + return rc; +} + +int hl_fw_armcp_info_get(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct armcp_packet pkt = {}; + void *armcp_info_cpu_addr; + dma_addr_t armcp_info_dma_addr; + long result; + int rc; + + armcp_info_cpu_addr = + hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, + sizeof(struct armcp_info), + &armcp_info_dma_addr); + if (!armcp_info_cpu_addr) { + dev_err(hdev->dev, + "Failed to allocate DMA memory for ArmCP info packet\n"); + return -ENOMEM; + } + + memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.addr = cpu_to_le64(armcp_info_dma_addr); + pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info)); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_ARMCP_INFO_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, + "Failed to handle ArmCP info pkt, error %d\n", rc); + goto out; + } + + memcpy(&prop->armcp_info, armcp_info_cpu_addr, + sizeof(prop->armcp_info)); + + rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors); + if (rc) { + dev_err(hdev->dev, + "Failed to build hwmon channel info, error %d\n", rc); + rc = -EFAULT; + goto out; + } + +out: + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, + sizeof(struct armcp_info), armcp_info_cpu_addr); + + return rc; +} + +int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) +{ + struct armcp_packet pkt = {}; + void *eeprom_info_cpu_addr; + dma_addr_t eeprom_info_dma_addr; + long result; + int rc; + + eeprom_info_cpu_addr = + hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, + max_size, &eeprom_info_dma_addr); + if (!eeprom_info_cpu_addr) { + dev_err(hdev->dev, + "Failed to allocate DMA memory for ArmCP EEPROM packet\n"); + return -ENOMEM; + } + + memset(eeprom_info_cpu_addr, 0, max_size); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.addr = cpu_to_le64(eeprom_info_dma_addr); + pkt.data_max_size = cpu_to_le32(max_size); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_ARMCP_EEPROM_TIMEOUT_USEC, &result); + + if (rc) { + dev_err(hdev->dev, + "Failed to handle ArmCP EEPROM packet, error %d\n", rc); + goto out; + } + + /* result contains the actual size */ + memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size)); + +out: + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size, + eeprom_info_cpu_addr); + + return rc; +} + +static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg) +{ + u32 err_val; + + /* Some of the firmware status codes are deprecated in newer f/w + * versions. In those versions, the errors are reported + * in different registers. Therefore, we need to check those + * registers and print the exact errors. Moreover, there + * may be multiple errors, so we need to report on each error + * separately. Some of the error codes might indicate a state + * that is not an error per-se, but it is an error in production + * environment + */ + err_val = RREG32(boot_err0_reg); + if (!(err_val & CPU_BOOT_ERR0_ENABLED)) + return; + + if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) + dev_err(hdev->dev, + "Device boot error - DRAM initialization failed\n"); + if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) + dev_err(hdev->dev, "Device boot error - FIT image corrupted\n"); + if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) + dev_err(hdev->dev, + "Device boot error - Thermal Sensor initialization failed\n"); + if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) + dev_warn(hdev->dev, + "Device boot warning - Skipped DRAM initialization\n"); + if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) + dev_warn(hdev->dev, + "Device boot error - Skipped waiting for BMC\n"); + if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) + dev_err(hdev->dev, + "Device boot error - Serdes data from BMC not available\n"); + if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) + dev_err(hdev->dev, + "Device boot error - NIC F/W initialization failed\n"); +} + +static void hl_detect_cpu_boot_status(struct hl_device *hdev, u32 status) +{ + switch (status) { + case CPU_BOOT_STATUS_NA: + dev_err(hdev->dev, + "Device boot error - BTL did NOT run\n"); + break; + case CPU_BOOT_STATUS_IN_WFE: + dev_err(hdev->dev, + "Device boot error - Stuck inside WFE loop\n"); + break; + case CPU_BOOT_STATUS_IN_BTL: + dev_err(hdev->dev, + "Device boot error - Stuck in BTL\n"); + break; + case CPU_BOOT_STATUS_IN_PREBOOT: + dev_err(hdev->dev, + "Device boot error - Stuck in Preboot\n"); + break; + case CPU_BOOT_STATUS_IN_SPL: + dev_err(hdev->dev, + "Device boot error - Stuck in SPL\n"); + break; + case CPU_BOOT_STATUS_IN_UBOOT: + dev_err(hdev->dev, + "Device boot error - Stuck in u-boot\n"); + break; + case CPU_BOOT_STATUS_DRAM_INIT_FAIL: + dev_err(hdev->dev, + "Device boot error - DRAM initialization failed\n"); + break; + case CPU_BOOT_STATUS_UBOOT_NOT_READY: + dev_err(hdev->dev, + "Device boot error - u-boot stopped by user\n"); + break; + case CPU_BOOT_STATUS_TS_INIT_FAIL: + dev_err(hdev->dev, + "Device boot error - Thermal Sensor initialization failed\n"); + break; + default: + dev_err(hdev->dev, + "Device boot error - Invalid status code %d\n", + status); + break; + } +} + +int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, + u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, + u32 boot_err0_reg, bool skip_bmc, + u32 cpu_timeout, u32 boot_fit_timeout) +{ + u32 status; + int rc; + + dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n", + cpu_timeout / USEC_PER_SEC); + + /* Wait for boot FIT request */ + rc = hl_poll_timeout( + hdev, + cpu_boot_status_reg, + status, + status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT, + 10000, + boot_fit_timeout); + + if (rc) { + dev_dbg(hdev->dev, + "No boot fit request received, resuming boot\n"); + } else { + rc = hdev->asic_funcs->load_boot_fit_to_device(hdev); + if (rc) + goto out; + + /* Clear device CPU message status */ + WREG32(cpu_msg_status_reg, CPU_MSG_CLR); + + /* Signal device CPU that boot loader is ready */ + WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY); + + /* Poll for CPU device ack */ + rc = hl_poll_timeout( + hdev, + cpu_msg_status_reg, + status, + status == CPU_MSG_OK, + 10000, + boot_fit_timeout); + + if (rc) { + dev_err(hdev->dev, + "Timeout waiting for boot fit load ack\n"); + goto out; + } + + /* Clear message */ + WREG32(msg_to_cpu_reg, KMD_MSG_NA); + } + + /* Make sure CPU boot-loader is running */ + rc = hl_poll_timeout( + hdev, + cpu_boot_status_reg, + status, + (status == CPU_BOOT_STATUS_DRAM_RDY) || + (status == CPU_BOOT_STATUS_NIC_FW_RDY) || + (status == CPU_BOOT_STATUS_READY_TO_BOOT) || + (status == CPU_BOOT_STATUS_SRAM_AVAIL), + 10000, + cpu_timeout); + + /* Read U-Boot, preboot versions now in case we will later fail */ + hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT); + hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT); + + /* Some of the status codes below are deprecated in newer f/w + * versions but we keep them here for backward compatibility + */ + if (rc) { + hl_detect_cpu_boot_status(hdev, status); + rc = -EIO; + goto out; + } + + if (!hdev->fw_loading) { + dev_info(hdev->dev, "Skip loading FW\n"); + goto out; + } + + if (status == CPU_BOOT_STATUS_SRAM_AVAIL) + goto out; + + dev_info(hdev->dev, + "Loading firmware to device, may take some time...\n"); + + rc = hdev->asic_funcs->load_firmware_to_device(hdev); + if (rc) + goto out; + + if (skip_bmc) { + WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC); + + rc = hl_poll_timeout( + hdev, + cpu_boot_status_reg, + status, + (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED), + 10000, + cpu_timeout); + + if (rc) { + dev_err(hdev->dev, + "Failed to get ACK on skipping BMC, %d\n", + status); + WREG32(msg_to_cpu_reg, KMD_MSG_NA); + rc = -EIO; + goto out; + } + } + + WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY); + + rc = hl_poll_timeout( + hdev, + cpu_boot_status_reg, + status, + (status == CPU_BOOT_STATUS_SRAM_AVAIL), + 10000, + cpu_timeout); + + /* Clear message */ + WREG32(msg_to_cpu_reg, KMD_MSG_NA); + + if (rc) { + if (status == CPU_BOOT_STATUS_FIT_CORRUPTED) + dev_err(hdev->dev, + "Device reports FIT image is corrupted\n"); + else + dev_err(hdev->dev, + "Failed to load firmware to device, %d\n", + status); + + rc = -EIO; + goto out; + } + + dev_info(hdev->dev, "Successfully loaded firmware to device\n"); + +out: + fw_read_errors(hdev, boot_err0_reg); + + return rc; +} diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h new file mode 100644 index 000000000000..82532f1f94cb --- /dev/null +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -0,0 +1,1948 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef HABANALABSP_H_ +#define HABANALABSP_H_ + +#include "include/common/armcp_if.h" +#include "include/common/qman_if.h" +#include + +#include +#include +#include +#include +#include +#include +#include + +#define HL_NAME "habanalabs" + +#define HL_MMAP_CB_MASK (0x8000000000000000ull >> PAGE_SHIFT) + +#define HL_PENDING_RESET_PER_SEC 30 + +#define HL_HARD_RESET_MAX_TIMEOUT 120 + +#define HL_DEVICE_TIMEOUT_USEC 1000000 /* 1 s */ + +#define HL_HEARTBEAT_PER_USEC 5000000 /* 5 s */ + +#define HL_PLL_LOW_JOB_FREQ_USEC 5000000 /* 5 s */ + +#define HL_ARMCP_INFO_TIMEOUT_USEC 10000000 /* 10s */ +#define HL_ARMCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */ + +#define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */ + +#define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */ + +#define HL_IDLE_BUSY_TS_ARR_SIZE 4096 + +/* Memory */ +#define MEM_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ + +/* MMU */ +#define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ + +/* + * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream + * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream + */ +#define HL_RSVD_SOBS 4 +#define HL_RSVD_MONS 2 + +#define HL_RSVD_SOBS_IN_USE 2 +#define HL_RSVD_MONS_IN_USE 1 + +#define HL_MAX_SOB_VAL (1 << 15) + +#define IS_POWER_OF_2(n) (n != 0 && ((n & (n - 1)) == 0)) +#define IS_MAX_PENDING_CS_VALID(n) (IS_POWER_OF_2(n) && (n > 1)) + +#define HL_PCI_NUM_BARS 6 + +/** + * struct pgt_info - MMU hop page info. + * @node: hash linked-list node for the pgts shadow hash of pgts. + * @phys_addr: physical address of the pgt. + * @shadow_addr: shadow hop in the host. + * @ctx: pointer to the owner ctx. + * @num_of_ptes: indicates how many ptes are used in the pgt. + * + * The MMU page tables hierarchy is placed on the DRAM. When a new level (hop) + * is needed during mapping, a new page is allocated and this structure holds + * its essential information. During unmapping, if no valid PTEs remained in the + * page, it is freed with its pgt_info structure. + */ +struct pgt_info { + struct hlist_node node; + u64 phys_addr; + u64 shadow_addr; + struct hl_ctx *ctx; + int num_of_ptes; +}; + +struct hl_device; +struct hl_fpriv; + +/** + * enum hl_pci_match_mode - pci match mode per region + * @PCI_ADDRESS_MATCH_MODE: address match mode + * @PCI_BAR_MATCH_MODE: bar match mode + */ +enum hl_pci_match_mode { + PCI_ADDRESS_MATCH_MODE, + PCI_BAR_MATCH_MODE +}; + +/** + * enum hl_fw_component - F/W components to read version through registers. + * @FW_COMP_UBOOT: u-boot. + * @FW_COMP_PREBOOT: preboot. + */ +enum hl_fw_component { + FW_COMP_UBOOT, + FW_COMP_PREBOOT +}; + +/** + * enum hl_queue_type - Supported QUEUE types. + * @QUEUE_TYPE_NA: queue is not available. + * @QUEUE_TYPE_EXT: external queue which is a DMA channel that may access the + * host. + * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's + * memories and/or operates the compute engines. + * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU. + * @QUEUE_TYPE_HW: queue of DMA and compute engines jobs, for which completion + * notifications are sent by H/W. + */ +enum hl_queue_type { + QUEUE_TYPE_NA, + QUEUE_TYPE_EXT, + QUEUE_TYPE_INT, + QUEUE_TYPE_CPU, + QUEUE_TYPE_HW +}; + +enum hl_cs_type { + CS_TYPE_DEFAULT, + CS_TYPE_SIGNAL, + CS_TYPE_WAIT +}; + +/* + * struct hl_inbound_pci_region - inbound region descriptor + * @mode: pci match mode for this region + * @addr: region target address + * @size: region size in bytes + * @offset_in_bar: offset within bar (address match mode) + * @bar: bar id + */ +struct hl_inbound_pci_region { + enum hl_pci_match_mode mode; + u64 addr; + u64 size; + u64 offset_in_bar; + u8 bar; +}; + +/* + * struct hl_outbound_pci_region - outbound region descriptor + * @addr: region target address + * @size: region size in bytes + */ +struct hl_outbound_pci_region { + u64 addr; + u64 size; +}; + +/* + * struct hl_hw_sob - H/W SOB info. + * @hdev: habanalabs device structure. + * @kref: refcount of this SOB. The SOB will reset once the refcount is zero. + * @sob_id: id of this SOB. + * @q_idx: the H/W queue that uses this SOB. + */ +struct hl_hw_sob { + struct hl_device *hdev; + struct kref kref; + u32 sob_id; + u32 q_idx; +}; + +/** + * struct hw_queue_properties - queue information. + * @type: queue type. + * @driver_only: true if only the driver is allowed to send a job to this queue, + * false otherwise. + * @requires_kernel_cb: true if a CB handle must be provided for jobs on this + * queue, false otherwise (a CB address must be provided). + * @supports_sync_stream: True if queue supports sync stream + */ +struct hw_queue_properties { + enum hl_queue_type type; + u8 driver_only; + u8 requires_kernel_cb; + u8 supports_sync_stream; +}; + +/** + * enum vm_type_t - virtual memory mapping request information. + * @VM_TYPE_USERPTR: mapping of user memory to device virtual address. + * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address. + */ +enum vm_type_t { + VM_TYPE_USERPTR = 0x1, + VM_TYPE_PHYS_PACK = 0x2 +}; + +/** + * enum hl_device_hw_state - H/W device state. use this to understand whether + * to do reset before hw_init or not + * @HL_DEVICE_HW_STATE_CLEAN: H/W state is clean. i.e. after hard reset + * @HL_DEVICE_HW_STATE_DIRTY: H/W state is dirty. i.e. we started to execute + * hw_init + */ +enum hl_device_hw_state { + HL_DEVICE_HW_STATE_CLEAN = 0, + HL_DEVICE_HW_STATE_DIRTY +}; + +/** + * struct hl_mmu_properties - ASIC specific MMU address translation properties. + * @start_addr: virtual start address of the memory region. + * @end_addr: virtual end address of the memory region. + * @hop0_shift: shift of hop 0 mask. + * @hop1_shift: shift of hop 1 mask. + * @hop2_shift: shift of hop 2 mask. + * @hop3_shift: shift of hop 3 mask. + * @hop4_shift: shift of hop 4 mask. + * @hop0_mask: mask to get the PTE address in hop 0. + * @hop1_mask: mask to get the PTE address in hop 1. + * @hop2_mask: mask to get the PTE address in hop 2. + * @hop3_mask: mask to get the PTE address in hop 3. + * @hop4_mask: mask to get the PTE address in hop 4. + * @page_size: default page size used to allocate memory. + */ +struct hl_mmu_properties { + u64 start_addr; + u64 end_addr; + u64 hop0_shift; + u64 hop1_shift; + u64 hop2_shift; + u64 hop3_shift; + u64 hop4_shift; + u64 hop0_mask; + u64 hop1_mask; + u64 hop2_mask; + u64 hop3_mask; + u64 hop4_mask; + u32 page_size; +}; + +/** + * struct asic_fixed_properties - ASIC specific immutable properties. + * @hw_queues_props: H/W queues properties. + * @armcp_info: received various information from ArmCP regarding the H/W, e.g. + * available sensors. + * @uboot_ver: F/W U-boot version. + * @preboot_ver: F/W Preboot version. + * @dmmu: DRAM MMU address translation properties. + * @pmmu: PCI (host) MMU address translation properties. + * @pmmu_huge: PCI (host) MMU address translation properties for memory + * allocated with huge pages. + * @sram_base_address: SRAM physical start address. + * @sram_end_address: SRAM physical end address. + * @sram_user_base_address - SRAM physical start address for user access. + * @dram_base_address: DRAM physical start address. + * @dram_end_address: DRAM physical end address. + * @dram_user_base_address: DRAM physical start address for user access. + * @dram_size: DRAM total size. + * @dram_pci_bar_size: size of PCI bar towards DRAM. + * @max_power_default: max power of the device after reset + * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page + * fault. + * @pcie_dbi_base_address: Base address of the PCIE_DBI block. + * @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register. + * @mmu_pgt_addr: base physical address in DRAM of MMU page tables. + * @mmu_dram_default_page_addr: DRAM default page physical address. + * @mmu_pgt_size: MMU page tables total size. + * @mmu_pte_size: PTE size in MMU page tables. + * @mmu_hop_table_size: MMU hop table size. + * @mmu_hop0_tables_total_size: total size of MMU hop0 tables. + * @dram_page_size: page size for MMU DRAM allocation. + * @cfg_size: configuration space size on SRAM. + * @sram_size: total size of SRAM. + * @max_asid: maximum number of open contexts (ASIDs). + * @num_of_events: number of possible internal H/W IRQs. + * @psoc_pci_pll_nr: PCI PLL NR value. + * @psoc_pci_pll_nf: PCI PLL NF value. + * @psoc_pci_pll_od: PCI PLL OD value. + * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value. + * @psoc_timestamp_frequency: frequency of the psoc timestamp clock. + * @high_pll: high PLL frequency used by the device. + * @cb_pool_cb_cnt: number of CBs in the CB pool. + * @cb_pool_cb_size: size of each CB in the CB pool. + * @max_pending_cs: maximum of concurrent pending command submissions + * @max_queues: maximum amount of queues in the system + * @sync_stream_first_sob: first sync object available for sync stream use + * @sync_stream_first_mon: first monitor available for sync stream use + * @tpc_enabled_mask: which TPCs are enabled. + * @completion_queues_count: number of completion queues. + */ +struct asic_fixed_properties { + struct hw_queue_properties *hw_queues_props; + struct armcp_info armcp_info; + char uboot_ver[VERSION_MAX_LEN]; + char preboot_ver[VERSION_MAX_LEN]; + struct hl_mmu_properties dmmu; + struct hl_mmu_properties pmmu; + struct hl_mmu_properties pmmu_huge; + u64 sram_base_address; + u64 sram_end_address; + u64 sram_user_base_address; + u64 dram_base_address; + u64 dram_end_address; + u64 dram_user_base_address; + u64 dram_size; + u64 dram_pci_bar_size; + u64 max_power_default; + u64 dram_size_for_default_page_mapping; + u64 pcie_dbi_base_address; + u64 pcie_aux_dbi_reg_addr; + u64 mmu_pgt_addr; + u64 mmu_dram_default_page_addr; + u32 mmu_pgt_size; + u32 mmu_pte_size; + u32 mmu_hop_table_size; + u32 mmu_hop0_tables_total_size; + u32 dram_page_size; + u32 cfg_size; + u32 sram_size; + u32 max_asid; + u32 num_of_events; + u32 psoc_pci_pll_nr; + u32 psoc_pci_pll_nf; + u32 psoc_pci_pll_od; + u32 psoc_pci_pll_div_factor; + u32 psoc_timestamp_frequency; + u32 high_pll; + u32 cb_pool_cb_cnt; + u32 cb_pool_cb_size; + u32 max_pending_cs; + u32 max_queues; + u16 sync_stream_first_sob; + u16 sync_stream_first_mon; + u8 tpc_enabled_mask; + u8 completion_queues_count; +}; + +/** + * struct hl_cs_compl - command submission completion object. + * @base_fence: kernel fence object. + * @lock: spinlock to protect fence. + * @hdev: habanalabs device structure. + * @hw_sob: the H/W SOB used in this signal/wait CS. + * @cs_seq: command submission sequence number. + * @type: type of the CS - signal/wait. + * @sob_val: the SOB value that is used in this signal/wait CS. + */ +struct hl_cs_compl { + struct dma_fence base_fence; + spinlock_t lock; + struct hl_device *hdev; + struct hl_hw_sob *hw_sob; + u64 cs_seq; + enum hl_cs_type type; + u16 sob_val; +}; + +/* + * Command Buffers + */ + +/** + * struct hl_cb_mgr - describes a Command Buffer Manager. + * @cb_lock: protects cb_handles. + * @cb_handles: an idr to hold all command buffer handles. + */ +struct hl_cb_mgr { + spinlock_t cb_lock; + struct idr cb_handles; /* protected by cb_lock */ +}; + +/** + * struct hl_cb - describes a Command Buffer. + * @refcount: reference counter for usage of the CB. + * @hdev: pointer to device this CB belongs to. + * @lock: spinlock to protect mmap/cs flows. + * @debugfs_list: node in debugfs list of command buffers. + * @pool_list: node in pool list of command buffers. + * @kernel_address: Holds the CB's kernel virtual address. + * @bus_address: Holds the CB's DMA address. + * @mmap_size: Holds the CB's size that was mmaped. + * @size: holds the CB's size. + * @id: the CB's ID. + * @cs_cnt: holds number of CS that this CB participates in. + * @ctx_id: holds the ID of the owner's context. + * @mmap: true if the CB is currently mmaped to user. + * @is_pool: true if CB was acquired from the pool, false otherwise. + */ +struct hl_cb { + struct kref refcount; + struct hl_device *hdev; + spinlock_t lock; + struct list_head debugfs_list; + struct list_head pool_list; + u64 kernel_address; + dma_addr_t bus_address; + u32 mmap_size; + u32 size; + u32 id; + u32 cs_cnt; + u32 ctx_id; + u8 mmap; + u8 is_pool; +}; + + +/* + * QUEUES + */ + +struct hl_cs_job; + +/* Queue length of external and HW queues */ +#define HL_QUEUE_LENGTH 4096 +#define HL_QUEUE_SIZE_IN_BYTES (HL_QUEUE_LENGTH * HL_BD_SIZE) + +#if (HL_MAX_JOBS_PER_CS > HL_QUEUE_LENGTH) +#error "HL_QUEUE_LENGTH must be greater than HL_MAX_JOBS_PER_CS" +#endif + +/* HL_CQ_LENGTH is in units of struct hl_cq_entry */ +#define HL_CQ_LENGTH HL_QUEUE_LENGTH +#define HL_CQ_SIZE_IN_BYTES (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE) + +/* Must be power of 2 */ +#define HL_EQ_LENGTH 64 +#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE) + +/* Host <-> ArmCP shared memory size */ +#define HL_CPU_ACCESSIBLE_MEM_SIZE SZ_2M + +/** + * struct hl_hw_queue - describes a H/W transport queue. + * @hw_sob: array of the used H/W SOBs by this H/W queue. + * @shadow_queue: pointer to a shadow queue that holds pointers to jobs. + * @queue_type: type of queue. + * @kernel_address: holds the queue's kernel virtual address. + * @bus_address: holds the queue's DMA address. + * @pi: holds the queue's pi value. + * @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci). + * @hw_queue_id: the id of the H/W queue. + * @cq_id: the id for the corresponding CQ for this H/W queue. + * @msi_vec: the IRQ number of the H/W queue. + * @int_queue_len: length of internal queue (number of entries). + * @next_sob_val: the next value to use for the currently used SOB. + * @base_sob_id: the base SOB id of the SOBs used by this queue. + * @base_mon_id: the base MON id of the MONs used by this queue. + * @valid: is the queue valid (we have array of 32 queues, not all of them + * exist). + * @curr_sob_offset: the id offset to the currently used SOB from the + * HL_RSVD_SOBS that are being used by this queue. + * @supports_sync_stream: True if queue supports sync stream + */ +struct hl_hw_queue { + struct hl_hw_sob hw_sob[HL_RSVD_SOBS]; + struct hl_cs_job **shadow_queue; + enum hl_queue_type queue_type; + u64 kernel_address; + dma_addr_t bus_address; + u32 pi; + atomic_t ci; + u32 hw_queue_id; + u32 cq_id; + u32 msi_vec; + u16 int_queue_len; + u16 next_sob_val; + u16 base_sob_id; + u16 base_mon_id; + u8 valid; + u8 curr_sob_offset; + u8 supports_sync_stream; +}; + +/** + * struct hl_cq - describes a completion queue + * @hdev: pointer to the device structure + * @kernel_address: holds the queue's kernel virtual address + * @bus_address: holds the queue's DMA address + * @cq_idx: completion queue index in array + * @hw_queue_id: the id of the matching H/W queue + * @ci: ci inside the queue + * @pi: pi inside the queue + * @free_slots_cnt: counter of free slots in queue + */ +struct hl_cq { + struct hl_device *hdev; + u64 kernel_address; + dma_addr_t bus_address; + u32 cq_idx; + u32 hw_queue_id; + u32 ci; + u32 pi; + atomic_t free_slots_cnt; +}; + +/** + * struct hl_eq - describes the event queue (single one per device) + * @hdev: pointer to the device structure + * @kernel_address: holds the queue's kernel virtual address + * @bus_address: holds the queue's DMA address + * @ci: ci inside the queue + */ +struct hl_eq { + struct hl_device *hdev; + u64 kernel_address; + dma_addr_t bus_address; + u32 ci; +}; + + +/* + * ASICs + */ + +/** + * enum hl_asic_type - supported ASIC types. + * @ASIC_INVALID: Invalid ASIC type. + * @ASIC_GOYA: Goya device. + * @ASIC_GAUDI: Gaudi device. + */ +enum hl_asic_type { + ASIC_INVALID, + ASIC_GOYA, + ASIC_GAUDI +}; + +struct hl_cs_parser; + +/** + * enum hl_pm_mng_profile - power management profile. + * @PM_AUTO: internal clock is set by the Linux driver. + * @PM_MANUAL: internal clock is set by the user. + * @PM_LAST: last power management type. + */ +enum hl_pm_mng_profile { + PM_AUTO = 1, + PM_MANUAL, + PM_LAST +}; + +/** + * enum hl_pll_frequency - PLL frequency. + * @PLL_HIGH: high frequency. + * @PLL_LOW: low frequency. + * @PLL_LAST: last frequency values that were configured by the user. + */ +enum hl_pll_frequency { + PLL_HIGH = 1, + PLL_LOW, + PLL_LAST +}; + +#define PLL_REF_CLK 50 + +enum div_select_defs { + DIV_SEL_REF_CLK = 0, + DIV_SEL_PLL_CLK = 1, + DIV_SEL_DIVIDED_REF = 2, + DIV_SEL_DIVIDED_PLL = 3, +}; + +/** + * struct hl_asic_funcs - ASIC specific functions that are can be called from + * common code. + * @early_init: sets up early driver state (pre sw_init), doesn't configure H/W. + * @early_fini: tears down what was done in early_init. + * @late_init: sets up late driver/hw state (post hw_init) - Optional. + * @late_fini: tears down what was done in late_init (pre hw_fini) - Optional. + * @sw_init: sets up driver state, does not configure H/W. + * @sw_fini: tears down driver state, does not configure H/W. + * @hw_init: sets up the H/W state. + * @hw_fini: tears down the H/W state. + * @halt_engines: halt engines, needed for reset sequence. This also disables + * interrupts from the device. Should be called before + * hw_fini and before CS rollback. + * @suspend: handles IP specific H/W or SW changes for suspend. + * @resume: handles IP specific H/W or SW changes for resume. + * @cb_mmap: maps a CB. + * @ring_doorbell: increment PI on a given QMAN. + * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific + * function because the PQs are located in different memory areas + * per ASIC (SRAM, DRAM, Host memory) and therefore, the method of + * writing the PQE must match the destination memory area + * properties. + * @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling + * dma_alloc_coherent(). This is ASIC function because + * its implementation is not trivial when the driver + * is loaded in simulation mode (not upstreamed). + * @asic_dma_free_coherent: Free coherent DMA memory by calling + * dma_free_coherent(). This is ASIC function because + * its implementation is not trivial when the driver + * is loaded in simulation mode (not upstreamed). + * @get_int_queue_base: get the internal queue base address. + * @test_queues: run simple test on all queues for sanity check. + * @asic_dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool. + * size of allocation is HL_DMA_POOL_BLK_SIZE. + * @asic_dma_pool_free: free small DMA allocation from pool. + * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool. + * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool. + * @hl_dma_unmap_sg: DMA unmap scatter-gather list. + * @cs_parser: parse Command Submission. + * @asic_dma_map_sg: DMA map scatter-gather list. + * @get_dma_desc_list_size: get number of LIN_DMA packets required for CB. + * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it. + * @update_eq_ci: update event queue CI. + * @context_switch: called upon ASID context switch. + * @restore_phase_topology: clear all SOBs amd MONs. + * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM. + * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM. + * @add_device_attr: add ASIC specific device attributes. + * @handle_eqe: handle event queue entry (IRQ) from ArmCP. + * @set_pll_profile: change PLL profile (manual/automatic). + * @get_events_stat: retrieve event queue entries histogram. + * @read_pte: read MMU page table entry from DRAM. + * @write_pte: write MMU page table entry to DRAM. + * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft + * (L1 only) or hard (L0 & L1) flush. + * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with + * ASID-VA-size mask. + * @send_heartbeat: send is-alive packet to ArmCP and verify response. + * @enable_clock_gating: enable clock gating for reducing power consumption. + * @disable_clock_gating: disable clock for accessing registers on HBW. + * @debug_coresight: perform certain actions on Coresight for debugging. + * @is_device_idle: return true if device is idle, false otherwise. + * @soft_reset_late_init: perform certain actions needed after soft reset. + * @hw_queues_lock: acquire H/W queues lock. + * @hw_queues_unlock: release H/W queues lock. + * @get_pci_id: retrieve PCI ID. + * @get_eeprom_data: retrieve EEPROM data from F/W. + * @send_cpu_message: send buffer to ArmCP. + * @get_hw_state: retrieve the H/W state + * @pci_bars_map: Map PCI BARs. + * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns + * old address the bar pointed to or U64_MAX for failure + * @init_iatu: Initialize the iATU unit inside the PCI controller. + * @rreg: Read a register. Needed for simulator support. + * @wreg: Write a register. Needed for simulator support. + * @halt_coresight: stop the ETF and ETR traces. + * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz + * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index. + * @read_device_fw_version: read the device's firmware versions that are + * contained in registers + * @load_firmware_to_device: load the firmware to the device's memory + * @load_boot_fit_to_device: load boot fit to device's memory + * @get_signal_cb_size: Get signal CB size. + * @get_wait_cb_size: Get wait CB size. + * @gen_signal_cb: Generate a signal CB. + * @gen_wait_cb: Generate a wait CB. + * @reset_sob: Reset a SOB. + * @set_dma_mask_from_fw: set the DMA mask in the driver according to the + * firmware configuration + * @get_device_time: Get the device time. + */ +struct hl_asic_funcs { + int (*early_init)(struct hl_device *hdev); + int (*early_fini)(struct hl_device *hdev); + int (*late_init)(struct hl_device *hdev); + void (*late_fini)(struct hl_device *hdev); + int (*sw_init)(struct hl_device *hdev); + int (*sw_fini)(struct hl_device *hdev); + int (*hw_init)(struct hl_device *hdev); + void (*hw_fini)(struct hl_device *hdev, bool hard_reset); + void (*halt_engines)(struct hl_device *hdev, bool hard_reset); + int (*suspend)(struct hl_device *hdev); + int (*resume)(struct hl_device *hdev); + int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma, + u64 kaddress, phys_addr_t paddress, u32 size); + void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi); + void (*pqe_write)(struct hl_device *hdev, __le64 *pqe, + struct hl_bd *bd); + void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle, gfp_t flag); + void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size, + void *cpu_addr, dma_addr_t dma_handle); + void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id, + dma_addr_t *dma_handle, u16 *queue_len); + int (*test_queues)(struct hl_device *hdev); + void* (*asic_dma_pool_zalloc)(struct hl_device *hdev, size_t size, + gfp_t mem_flags, dma_addr_t *dma_handle); + void (*asic_dma_pool_free)(struct hl_device *hdev, void *vaddr, + dma_addr_t dma_addr); + void* (*cpu_accessible_dma_pool_alloc)(struct hl_device *hdev, + size_t size, dma_addr_t *dma_handle); + void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev, + size_t size, void *vaddr); + void (*hl_dma_unmap_sg)(struct hl_device *hdev, + struct scatterlist *sgl, int nents, + enum dma_data_direction dir); + int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser); + int (*asic_dma_map_sg)(struct hl_device *hdev, + struct scatterlist *sgl, int nents, + enum dma_data_direction dir); + u32 (*get_dma_desc_list_size)(struct hl_device *hdev, + struct sg_table *sgt); + void (*add_end_of_cb_packets)(struct hl_device *hdev, + u64 kernel_address, u32 len, + u64 cq_addr, u32 cq_val, u32 msix_num, + bool eb); + void (*update_eq_ci)(struct hl_device *hdev, u32 val); + int (*context_switch)(struct hl_device *hdev, u32 asid); + void (*restore_phase_topology)(struct hl_device *hdev); + int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val); + int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val); + int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val); + int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val); + void (*add_device_attr)(struct hl_device *hdev, + struct attribute_group *dev_attr_grp); + void (*handle_eqe)(struct hl_device *hdev, + struct hl_eq_entry *eq_entry); + void (*set_pll_profile)(struct hl_device *hdev, + enum hl_pll_frequency freq); + void* (*get_events_stat)(struct hl_device *hdev, bool aggregate, + u32 *size); + u64 (*read_pte)(struct hl_device *hdev, u64 addr); + void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val); + int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard, + u32 flags); + int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, + u32 asid, u64 va, u64 size); + int (*send_heartbeat)(struct hl_device *hdev); + void (*enable_clock_gating)(struct hl_device *hdev); + void (*disable_clock_gating)(struct hl_device *hdev); + int (*debug_coresight)(struct hl_device *hdev, void *data); + bool (*is_device_idle)(struct hl_device *hdev, u32 *mask, + struct seq_file *s); + int (*soft_reset_late_init)(struct hl_device *hdev); + void (*hw_queues_lock)(struct hl_device *hdev); + void (*hw_queues_unlock)(struct hl_device *hdev); + u32 (*get_pci_id)(struct hl_device *hdev); + int (*get_eeprom_data)(struct hl_device *hdev, void *data, + size_t max_size); + int (*send_cpu_message)(struct hl_device *hdev, u32 *msg, + u16 len, u32 timeout, long *result); + enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev); + int (*pci_bars_map)(struct hl_device *hdev); + u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr); + int (*init_iatu)(struct hl_device *hdev); + u32 (*rreg)(struct hl_device *hdev, u32 reg); + void (*wreg)(struct hl_device *hdev, u32 reg, u32 val); + void (*halt_coresight)(struct hl_device *hdev); + int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); + u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx); + void (*read_device_fw_version)(struct hl_device *hdev, + enum hl_fw_component fwc); + int (*load_firmware_to_device)(struct hl_device *hdev); + int (*load_boot_fit_to_device)(struct hl_device *hdev); + u32 (*get_signal_cb_size)(struct hl_device *hdev); + u32 (*get_wait_cb_size)(struct hl_device *hdev); + void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id); + void (*gen_wait_cb)(struct hl_device *hdev, void *data, u16 sob_id, + u16 sob_val, u16 mon_id, u32 q_idx); + void (*reset_sob)(struct hl_device *hdev, void *data); + void (*set_dma_mask_from_fw)(struct hl_device *hdev); + u64 (*get_device_time)(struct hl_device *hdev); +}; + + +/* + * CONTEXTS + */ + +#define HL_KERNEL_ASID_ID 0 + +/** + * struct hl_va_range - virtual addresses range. + * @lock: protects the virtual addresses list. + * @list: list of virtual addresses blocks available for mappings. + * @start_addr: range start address. + * @end_addr: range end address. + */ +struct hl_va_range { + struct mutex lock; + struct list_head list; + u64 start_addr; + u64 end_addr; +}; + +/** + * struct hl_ctx - user/kernel context. + * @mem_hash: holds mapping from virtual address to virtual memory area + * descriptor (hl_vm_phys_pg_list or hl_userptr). + * @mmu_shadow_hash: holds a mapping from shadow address to pgt_info structure. + * @hpriv: pointer to the private (Kernel Driver) data of the process (fd). + * @hdev: pointer to the device structure. + * @refcount: reference counter for the context. Context is released only when + * this hits 0l. It is incremented on CS and CS_WAIT. + * @cs_pending: array of DMA fence objects representing pending CS. + * @host_va_range: holds available virtual addresses for host mappings. + * @host_huge_va_range: holds available virtual addresses for host mappings + * with huge pages. + * @dram_va_range: holds available virtual addresses for DRAM mappings. + * @mem_hash_lock: protects the mem_hash. + * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the + * MMU hash or walking the PGT requires talking this lock. + * @debugfs_list: node in debugfs list of contexts. + * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed + * to user so user could inquire about CS. It is used as + * index to cs_pending array. + * @dram_default_hops: array that holds all hops addresses needed for default + * DRAM mapping. + * @cs_lock: spinlock to protect cs_sequence. + * @dram_phys_mem: amount of used physical DRAM memory by this context. + * @thread_ctx_switch_token: token to prevent multiple threads of the same + * context from running the context switch phase. + * Only a single thread should run it. + * @thread_ctx_switch_wait_token: token to prevent the threads that didn't run + * the context switch phase from moving to their + * execution phase before the context switch phase + * has finished. + * @asid: context's unique address space ID in the device's MMU. + * @handle: context's opaque handle for user + */ +struct hl_ctx { + DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS); + DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS); + struct hl_fpriv *hpriv; + struct hl_device *hdev; + struct kref refcount; + struct dma_fence **cs_pending; + struct hl_va_range *host_va_range; + struct hl_va_range *host_huge_va_range; + struct hl_va_range *dram_va_range; + struct mutex mem_hash_lock; + struct mutex mmu_lock; + struct list_head debugfs_list; + struct hl_cs_counters cs_counters; + u64 cs_sequence; + u64 *dram_default_hops; + spinlock_t cs_lock; + atomic64_t dram_phys_mem; + atomic_t thread_ctx_switch_token; + u32 thread_ctx_switch_wait_token; + u32 asid; + u32 handle; +}; + +/** + * struct hl_ctx_mgr - for handling multiple contexts. + * @ctx_lock: protects ctx_handles. + * @ctx_handles: idr to hold all ctx handles. + */ +struct hl_ctx_mgr { + struct mutex ctx_lock; + struct idr ctx_handles; +}; + + + +/* + * COMMAND SUBMISSIONS + */ + +/** + * struct hl_userptr - memory mapping chunk information + * @vm_type: type of the VM. + * @job_node: linked-list node for hanging the object on the Job's list. + * @vec: pointer to the frame vector. + * @sgt: pointer to the scatter-gather table that holds the pages. + * @dir: for DMA unmapping, the direction must be supplied, so save it. + * @debugfs_list: node in debugfs list of command submissions. + * @addr: user-space virtual address of the start of the memory area. + * @size: size of the memory area to pin & map. + * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise. + */ +struct hl_userptr { + enum vm_type_t vm_type; /* must be first */ + struct list_head job_node; + struct frame_vector *vec; + struct sg_table *sgt; + enum dma_data_direction dir; + struct list_head debugfs_list; + u64 addr; + u32 size; + u8 dma_mapped; +}; + +/** + * struct hl_cs - command submission. + * @jobs_in_queue_cnt: per each queue, maintain counter of submitted jobs. + * @ctx: the context this CS belongs to. + * @job_list: list of the CS's jobs in the various queues. + * @job_lock: spinlock for the CS's jobs list. Needed for free_job. + * @refcount: reference counter for usage of the CS. + * @fence: pointer to the fence object of this CS. + * @signal_fence: pointer to the fence object of the signal CS (used by wait + * CS only). + * @finish_work: workqueue object to run when CS is completed by H/W. + * @work_tdr: delayed work node for TDR. + * @mirror_node : node in device mirror list of command submissions. + * @debugfs_list: node in debugfs list of command submissions. + * @sequence: the sequence number of this CS. + * @type: CS_TYPE_*. + * @submitted: true if CS was submitted to H/W. + * @completed: true if CS was completed by device. + * @timedout : true if CS was timedout. + * @tdr_active: true if TDR was activated for this CS (to prevent + * double TDR activation). + * @aborted: true if CS was aborted due to some device error. + */ +struct hl_cs { + u16 *jobs_in_queue_cnt; + struct hl_ctx *ctx; + struct list_head job_list; + spinlock_t job_lock; + struct kref refcount; + struct dma_fence *fence; + struct dma_fence *signal_fence; + struct work_struct finish_work; + struct delayed_work work_tdr; + struct list_head mirror_node; + struct list_head debugfs_list; + u64 sequence; + enum hl_cs_type type; + u8 submitted; + u8 completed; + u8 timedout; + u8 tdr_active; + u8 aborted; +}; + +/** + * struct hl_cs_job - command submission job. + * @cs_node: the node to hang on the CS jobs list. + * @cs: the CS this job belongs to. + * @user_cb: the CB we got from the user. + * @patched_cb: in case of patching, this is internal CB which is submitted on + * the queue instead of the CB we got from the IOCTL. + * @finish_work: workqueue object to run when job is completed. + * @userptr_list: linked-list of userptr mappings that belong to this job and + * wait for completion. + * @debugfs_list: node in debugfs list of command submission jobs. + * @queue_type: the type of the H/W queue this job is submitted to. + * @id: the id of this job inside a CS. + * @hw_queue_id: the id of the H/W queue this job is submitted to. + * @user_cb_size: the actual size of the CB we got from the user. + * @job_cb_size: the actual size of the CB that we put on the queue. + * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a + * handle to a kernel-allocated CB object, false + * otherwise (SRAM/DRAM/host address). + * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This + * info is needed later, when adding the 2xMSG_PROT at the + * end of the JOB, to know which barriers to put in the + * MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't + * have streams so the engine can't be busy by another + * stream. + */ +struct hl_cs_job { + struct list_head cs_node; + struct hl_cs *cs; + struct hl_cb *user_cb; + struct hl_cb *patched_cb; + struct work_struct finish_work; + struct list_head userptr_list; + struct list_head debugfs_list; + enum hl_queue_type queue_type; + u32 id; + u32 hw_queue_id; + u32 user_cb_size; + u32 job_cb_size; + u8 is_kernel_allocated_cb; + u8 contains_dma_pkt; +}; + +/** + * struct hl_cs_parser - command submission parser properties. + * @user_cb: the CB we got from the user. + * @patched_cb: in case of patching, this is internal CB which is submitted on + * the queue instead of the CB we got from the IOCTL. + * @job_userptr_list: linked-list of userptr mappings that belong to the related + * job and wait for completion. + * @cs_sequence: the sequence number of the related CS. + * @queue_type: the type of the H/W queue this job is submitted to. + * @ctx_id: the ID of the context the related CS belongs to. + * @hw_queue_id: the id of the H/W queue this job is submitted to. + * @user_cb_size: the actual size of the CB we got from the user. + * @patched_cb_size: the size of the CB after parsing. + * @job_id: the id of the related job inside the related CS. + * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a + * handle to a kernel-allocated CB object, false + * otherwise (SRAM/DRAM/host address). + * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This + * info is needed later, when adding the 2xMSG_PROT at the + * end of the JOB, to know which barriers to put in the + * MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't + * have streams so the engine can't be busy by another + * stream. + */ +struct hl_cs_parser { + struct hl_cb *user_cb; + struct hl_cb *patched_cb; + struct list_head *job_userptr_list; + u64 cs_sequence; + enum hl_queue_type queue_type; + u32 ctx_id; + u32 hw_queue_id; + u32 user_cb_size; + u32 patched_cb_size; + u8 job_id; + u8 is_kernel_allocated_cb; + u8 contains_dma_pkt; +}; + + +/* + * MEMORY STRUCTURE + */ + +/** + * struct hl_vm_hash_node - hash element from virtual address to virtual + * memory area descriptor (hl_vm_phys_pg_list or + * hl_userptr). + * @node: node to hang on the hash table in context object. + * @vaddr: key virtual address. + * @ptr: value pointer (hl_vm_phys_pg_list or hl_userptr). + */ +struct hl_vm_hash_node { + struct hlist_node node; + u64 vaddr; + void *ptr; +}; + +/** + * struct hl_vm_phys_pg_pack - physical page pack. + * @vm_type: describes the type of the virtual area descriptor. + * @pages: the physical page array. + * @npages: num physical pages in the pack. + * @total_size: total size of all the pages in this list. + * @mapping_cnt: number of shared mappings. + * @asid: the context related to this list. + * @page_size: size of each page in the pack. + * @flags: HL_MEM_* flags related to this list. + * @handle: the provided handle related to this list. + * @offset: offset from the first page. + * @contiguous: is contiguous physical memory. + * @created_from_userptr: is product of host virtual address. + */ +struct hl_vm_phys_pg_pack { + enum vm_type_t vm_type; /* must be first */ + u64 *pages; + u64 npages; + u64 total_size; + atomic_t mapping_cnt; + u32 asid; + u32 page_size; + u32 flags; + u32 handle; + u32 offset; + u8 contiguous; + u8 created_from_userptr; +}; + +/** + * struct hl_vm_va_block - virtual range block information. + * @node: node to hang on the virtual range list in context object. + * @start: virtual range start address. + * @end: virtual range end address. + * @size: virtual range size. + */ +struct hl_vm_va_block { + struct list_head node; + u64 start; + u64 end; + u64 size; +}; + +/** + * struct hl_vm - virtual memory manager for MMU. + * @dram_pg_pool: pool for DRAM physical pages of 2MB. + * @dram_pg_pool_refcount: reference counter for the pool usage. + * @idr_lock: protects the phys_pg_list_handles. + * @phys_pg_pack_handles: idr to hold all device allocations handles. + * @init_done: whether initialization was done. We need this because VM + * initialization might be skipped during device initialization. + */ +struct hl_vm { + struct gen_pool *dram_pg_pool; + struct kref dram_pg_pool_refcount; + spinlock_t idr_lock; + struct idr phys_pg_pack_handles; + u8 init_done; +}; + + +/* + * DEBUG, PROFILING STRUCTURE + */ + +/** + * struct hl_debug_params - Coresight debug parameters. + * @input: pointer to component specific input parameters. + * @output: pointer to component specific output parameters. + * @output_size: size of output buffer. + * @reg_idx: relevant register ID. + * @op: component operation to execute. + * @enable: true if to enable component debugging, false otherwise. + */ +struct hl_debug_params { + void *input; + void *output; + u32 output_size; + u32 reg_idx; + u32 op; + bool enable; +}; + +/* + * FILE PRIVATE STRUCTURE + */ + +/** + * struct hl_fpriv - process information stored in FD private data. + * @hdev: habanalabs device structure. + * @filp: pointer to the given file structure. + * @taskpid: current process ID. + * @ctx: current executing context. TODO: remove for multiple ctx per process + * @ctx_mgr: context manager to handle multiple context for this FD. + * @cb_mgr: command buffer manager to handle multiple buffers for this FD. + * @debugfs_list: list of relevant ASIC debugfs. + * @dev_node: node in the device list of file private data + * @refcount: number of related contexts. + * @restore_phase_mutex: lock for context switch and restore phase. + * @is_control: true for control device, false otherwise + */ +struct hl_fpriv { + struct hl_device *hdev; + struct file *filp; + struct pid *taskpid; + struct hl_ctx *ctx; + struct hl_ctx_mgr ctx_mgr; + struct hl_cb_mgr cb_mgr; + struct list_head debugfs_list; + struct list_head dev_node; + struct kref refcount; + struct mutex restore_phase_mutex; + u8 is_control; +}; + + +/* + * DebugFS + */ + +/** + * struct hl_info_list - debugfs file ops. + * @name: file name. + * @show: function to output information. + * @write: function to write to the file. + */ +struct hl_info_list { + const char *name; + int (*show)(struct seq_file *s, void *data); + ssize_t (*write)(struct file *file, const char __user *buf, + size_t count, loff_t *f_pos); +}; + +/** + * struct hl_debugfs_entry - debugfs dentry wrapper. + * @dent: base debugfs entry structure. + * @info_ent: dentry realted ops. + * @dev_entry: ASIC specific debugfs manager. + */ +struct hl_debugfs_entry { + struct dentry *dent; + const struct hl_info_list *info_ent; + struct hl_dbg_device_entry *dev_entry; +}; + +/** + * struct hl_dbg_device_entry - ASIC specific debugfs manager. + * @root: root dentry. + * @hdev: habanalabs device structure. + * @entry_arr: array of available hl_debugfs_entry. + * @file_list: list of available debugfs files. + * @file_mutex: protects file_list. + * @cb_list: list of available CBs. + * @cb_spinlock: protects cb_list. + * @cs_list: list of available CSs. + * @cs_spinlock: protects cs_list. + * @cs_job_list: list of available CB jobs. + * @cs_job_spinlock: protects cs_job_list. + * @userptr_list: list of available userptrs (virtual memory chunk descriptor). + * @userptr_spinlock: protects userptr_list. + * @ctx_mem_hash_list: list of available contexts with MMU mappings. + * @ctx_mem_hash_spinlock: protects cb_list. + * @addr: next address to read/write from/to in read/write32. + * @mmu_addr: next virtual address to translate to physical address in mmu_show. + * @mmu_asid: ASID to use while translating in mmu_show. + * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read. + * @i2c_bus: generic u8 debugfs file for address value to use in i2c_data_read. + * @i2c_bus: generic u8 debugfs file for register value to use in i2c_data_read. + */ +struct hl_dbg_device_entry { + struct dentry *root; + struct hl_device *hdev; + struct hl_debugfs_entry *entry_arr; + struct list_head file_list; + struct mutex file_mutex; + struct list_head cb_list; + spinlock_t cb_spinlock; + struct list_head cs_list; + spinlock_t cs_spinlock; + struct list_head cs_job_list; + spinlock_t cs_job_spinlock; + struct list_head userptr_list; + spinlock_t userptr_spinlock; + struct list_head ctx_mem_hash_list; + spinlock_t ctx_mem_hash_spinlock; + u64 addr; + u64 mmu_addr; + u32 mmu_asid; + u8 i2c_bus; + u8 i2c_addr; + u8 i2c_reg; +}; + + +/* + * DEVICES + */ + +/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe + * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards. + */ +#define HL_MAX_MINORS 256 + +/* + * Registers read & write functions. + */ + +u32 hl_rreg(struct hl_device *hdev, u32 reg); +void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); + +#define RREG32(reg) hdev->asic_funcs->rreg(hdev, (reg)) +#define WREG32(reg, v) hdev->asic_funcs->wreg(hdev, (reg), (v)) +#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n", \ + hdev->asic_funcs->rreg(hdev, (reg))) + +#define WREG32_P(reg, val, mask) \ + do { \ + u32 tmp_ = RREG32(reg); \ + tmp_ &= (mask); \ + tmp_ |= ((val) & ~(mask)); \ + WREG32(reg, tmp_); \ + } while (0) +#define WREG32_AND(reg, and) WREG32_P(reg, 0, and) +#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or)) + +#define RMWREG32(reg, val, mask) \ + do { \ + u32 tmp_ = RREG32(reg); \ + tmp_ &= ~(mask); \ + tmp_ |= ((val) << __ffs(mask)); \ + WREG32(reg, tmp_); \ + } while (0) + +#define RREG32_MASK(reg, mask) ((RREG32(reg) & mask) >> __ffs(mask)) + +#define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT +#define REG_FIELD_MASK(reg, field) reg##_##field##_MASK +#define WREG32_FIELD(reg, offset, field, val) \ + WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & \ + ~REG_FIELD_MASK(reg, field)) | \ + (val) << REG_FIELD_SHIFT(reg, field)) + +/* Timeout should be longer when working with simulator but cap the + * increased timeout to some maximum + */ +#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \ +({ \ + ktime_t __timeout; \ + if (hdev->pdev) \ + __timeout = ktime_add_us(ktime_get(), timeout_us); \ + else \ + __timeout = ktime_add_us(ktime_get(),\ + min((u64)(timeout_us * 10), \ + (u64) HL_SIM_MAX_TIMEOUT_US)); \ + might_sleep_if(sleep_us); \ + for (;;) { \ + (val) = RREG32(addr); \ + if (cond) \ + break; \ + if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \ + (val) = RREG32(addr); \ + break; \ + } \ + if (sleep_us) \ + usleep_range((sleep_us >> 2) + 1, sleep_us); \ + } \ + (cond) ? 0 : -ETIMEDOUT; \ +}) + +/* + * address in this macro points always to a memory location in the + * host's (server's) memory. That location is updated asynchronously + * either by the direct access of the device or by another core. + * + * To work both in LE and BE architectures, we need to distinguish between the + * two states (device or another core updates the memory location). Therefore, + * if mem_written_by_device is true, the host memory being polled will be + * updated directly by the device. If false, the host memory being polled will + * be updated by host CPU. Required so host knows whether or not the memory + * might need to be byte-swapped before returning value to caller. + */ +#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \ + mem_written_by_device) \ +({ \ + ktime_t __timeout; \ + if (hdev->pdev) \ + __timeout = ktime_add_us(ktime_get(), timeout_us); \ + else \ + __timeout = ktime_add_us(ktime_get(),\ + min((u64)(timeout_us * 10), \ + (u64) HL_SIM_MAX_TIMEOUT_US)); \ + might_sleep_if(sleep_us); \ + for (;;) { \ + /* Verify we read updates done by other cores or by device */ \ + mb(); \ + (val) = *((u32 *) (uintptr_t) (addr)); \ + if (mem_written_by_device) \ + (val) = le32_to_cpu(*(__le32 *) &(val)); \ + if (cond) \ + break; \ + if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \ + (val) = *((u32 *) (uintptr_t) (addr)); \ + if (mem_written_by_device) \ + (val) = le32_to_cpu(*(__le32 *) &(val)); \ + break; \ + } \ + if (sleep_us) \ + usleep_range((sleep_us >> 2) + 1, sleep_us); \ + } \ + (cond) ? 0 : -ETIMEDOUT; \ +}) + +#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \ + timeout_us) \ +({ \ + ktime_t __timeout; \ + if (hdev->pdev) \ + __timeout = ktime_add_us(ktime_get(), timeout_us); \ + else \ + __timeout = ktime_add_us(ktime_get(),\ + min((u64)(timeout_us * 10), \ + (u64) HL_SIM_MAX_TIMEOUT_US)); \ + might_sleep_if(sleep_us); \ + for (;;) { \ + (val) = readl(addr); \ + if (cond) \ + break; \ + if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \ + (val) = readl(addr); \ + break; \ + } \ + if (sleep_us) \ + usleep_range((sleep_us >> 2) + 1, sleep_us); \ + } \ + (cond) ? 0 : -ETIMEDOUT; \ +}) + +struct hwmon_chip_info; + +/** + * struct hl_device_reset_work - reset workqueue task wrapper. + * @reset_work: reset work to be done. + * @hdev: habanalabs device structure. + */ +struct hl_device_reset_work { + struct work_struct reset_work; + struct hl_device *hdev; +}; + +/** + * struct hl_device_idle_busy_ts - used for calculating device utilization rate. + * @idle_to_busy_ts: timestamp where device changed from idle to busy. + * @busy_to_idle_ts: timestamp where device changed from busy to idle. + */ +struct hl_device_idle_busy_ts { + ktime_t idle_to_busy_ts; + ktime_t busy_to_idle_ts; +}; + +/** + * struct hl_device - habanalabs device structure. + * @pdev: pointer to PCI device, can be NULL in case of simulator device. + * @pcie_bar_phys: array of available PCIe bars physical addresses. + * (required only for PCI address match mode) + * @pcie_bar: array of available PCIe bars virtual addresses. + * @rmmio: configuration area address on SRAM. + * @cdev: related char device. + * @cdev_ctrl: char device for control operations only (INFO IOCTL) + * @dev: related kernel basic device structure. + * @dev_ctrl: related kernel device structure for the control device + * @work_freq: delayed work to lower device frequency if possible. + * @work_heartbeat: delayed work for ArmCP is-alive check. + * @asic_name: ASIC specific nmae. + * @asic_type: ASIC specific type. + * @completion_queue: array of hl_cq. + * @cq_wq: work queues of completion queues for executing work in process + * context. + * @eq_wq: work queue of event queue for executing work in process context. + * @kernel_ctx: Kernel driver context structure. + * @kernel_queues: array of hl_hw_queue. + * @hw_queues_mirror_list: CS mirror list for TDR. + * @hw_queues_mirror_lock: protects hw_queues_mirror_list. + * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs. + * @event_queue: event queue for IRQ from ArmCP. + * @dma_pool: DMA pool for small allocations. + * @cpu_accessible_dma_mem: Host <-> ArmCP shared memory CPU address. + * @cpu_accessible_dma_address: Host <-> ArmCP shared memory DMA address. + * @cpu_accessible_dma_pool: Host <-> ArmCP shared memory pool. + * @asid_bitmap: holds used/available ASIDs. + * @asid_mutex: protects asid_bitmap. + * @send_cpu_message_lock: enforces only one message in Host <-> ArmCP queue. + * @debug_lock: protects critical section of setting debug mode for device + * @asic_prop: ASIC specific immutable properties. + * @asic_funcs: ASIC specific functions. + * @asic_specific: ASIC specific information to use only from ASIC files. + * @mmu_pgt_pool: pool of available MMU hops. + * @vm: virtual memory manager for MMU. + * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context. + * @mmu_shadow_hop0: shadow mapping of the MMU hop 0 zone. + * @hwmon_dev: H/W monitor device. + * @pm_mng_profile: current power management profile. + * @hl_chip_info: ASIC's sensors information. + * @hl_debugfs: device's debugfs manager. + * @cb_pool: list of preallocated CBs. + * @cb_pool_lock: protects the CB pool. + * @fpriv_list: list of file private data structures. Each structure is created + * when a user opens the device + * @fpriv_list_lock: protects the fpriv_list + * @compute_ctx: current compute context executing. + * @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy + * and vice-versa + * @aggregated_cs_counters: aggregated cs counters among all contexts + * @dram_used_mem: current DRAM memory consumption. + * @timeout_jiffies: device CS timeout value. + * @max_power: the max power of the device, as configured by the sysadmin. This + * value is saved so in case of hard-reset, the driver will restore + * this value and update the F/W after the re-initialization + * @in_reset: is device in reset flow. + * @curr_pll_profile: current PLL profile. + * @cs_active_cnt: number of active command submissions on this device (active + * means already in H/W queues) + * @major: habanalabs kernel driver major. + * @high_pll: high PLL profile frequency. + * @soft_reset_cnt: number of soft reset since the driver was loaded. + * @hard_reset_cnt: number of hard reset since the driver was loaded. + * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr + * @id: device minor. + * @id_control: minor of the control device + * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit + * addresses. + * @disabled: is device disabled. + * @late_init_done: is late init stage was done during initialization. + * @hwmon_initialized: is H/W monitor sensors was initialized. + * @hard_reset_pending: is there a hard reset work pending. + * @heartbeat: is heartbeat sanity check towards ArmCP enabled. + * @reset_on_lockup: true if a reset should be done in case of stuck CS, false + * otherwise. + * @dram_supports_virtual_memory: is MMU enabled towards DRAM. + * @dram_default_page_mapping: is DRAM default page mapping enabled. + * @pmmu_huge_range: is a different virtual addresses range used for PMMU with + * huge pages. + * @init_done: is the initialization of the device done. + * @mmu_enable: is MMU enabled. + * @mmu_huge_page_opt: is MMU huge pages optimization enabled. + * @clock_gating: is clock gating enabled. + * @device_cpu_disabled: is the device CPU disabled (due to timeouts) + * @dma_mask: the dma mask that was set for this device + * @in_debug: is device under debug. This, together with fpriv_list, enforces + * that only a single user is configuring the debug infrastructure. + * @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant + * only to POWER9 machines. + * @cdev_sysfs_created: were char devices and sysfs nodes created. + * @stop_on_err: true if engines should stop on error. + * @supports_sync_stream: is sync stream supported. + * @sync_stream_queue_idx: helper index for sync stream queues initialization. + * @supports_coresight: is CoreSight supported. + * @supports_soft_reset: is soft reset supported. + */ +struct hl_device { + struct pci_dev *pdev; + u64 pcie_bar_phys[HL_PCI_NUM_BARS]; + void __iomem *pcie_bar[HL_PCI_NUM_BARS]; + void __iomem *rmmio; + struct cdev cdev; + struct cdev cdev_ctrl; + struct device *dev; + struct device *dev_ctrl; + struct delayed_work work_freq; + struct delayed_work work_heartbeat; + char asic_name[16]; + enum hl_asic_type asic_type; + struct hl_cq *completion_queue; + struct workqueue_struct **cq_wq; + struct workqueue_struct *eq_wq; + struct hl_ctx *kernel_ctx; + struct hl_hw_queue *kernel_queues; + struct list_head hw_queues_mirror_list; + spinlock_t hw_queues_mirror_lock; + struct hl_cb_mgr kernel_cb_mgr; + struct hl_eq event_queue; + struct dma_pool *dma_pool; + void *cpu_accessible_dma_mem; + dma_addr_t cpu_accessible_dma_address; + struct gen_pool *cpu_accessible_dma_pool; + unsigned long *asid_bitmap; + struct mutex asid_mutex; + struct mutex send_cpu_message_lock; + struct mutex debug_lock; + struct asic_fixed_properties asic_prop; + const struct hl_asic_funcs *asic_funcs; + void *asic_specific; + struct gen_pool *mmu_pgt_pool; + struct hl_vm vm; + struct mutex mmu_cache_lock; + void *mmu_shadow_hop0; + struct device *hwmon_dev; + enum hl_pm_mng_profile pm_mng_profile; + struct hwmon_chip_info *hl_chip_info; + + struct hl_dbg_device_entry hl_debugfs; + + struct list_head cb_pool; + spinlock_t cb_pool_lock; + + struct list_head fpriv_list; + struct mutex fpriv_list_lock; + + struct hl_ctx *compute_ctx; + + struct hl_device_idle_busy_ts *idle_busy_ts_arr; + + struct hl_cs_counters aggregated_cs_counters; + + atomic64_t dram_used_mem; + u64 timeout_jiffies; + u64 max_power; + atomic_t in_reset; + enum hl_pll_frequency curr_pll_profile; + int cs_active_cnt; + u32 major; + u32 high_pll; + u32 soft_reset_cnt; + u32 hard_reset_cnt; + u32 idle_busy_ts_idx; + u16 id; + u16 id_control; + u16 cpu_pci_msb_addr; + u8 disabled; + u8 late_init_done; + u8 hwmon_initialized; + u8 hard_reset_pending; + u8 heartbeat; + u8 reset_on_lockup; + u8 dram_supports_virtual_memory; + u8 dram_default_page_mapping; + u8 pmmu_huge_range; + u8 init_done; + u8 clock_gating; + u8 device_cpu_disabled; + u8 dma_mask; + u8 in_debug; + u8 power9_64bit_dma_enable; + u8 cdev_sysfs_created; + u8 stop_on_err; + u8 supports_sync_stream; + u8 sync_stream_queue_idx; + u8 supports_coresight; + u8 supports_soft_reset; + + /* Parameters for bring-up */ + u8 mmu_enable; + u8 mmu_huge_page_opt; + u8 cpu_enable; + u8 reset_pcilink; + u8 cpu_queues_enable; + u8 fw_loading; + u8 pldm; + u8 axi_drain; + u8 sram_scrambler_enable; + u8 dram_scrambler_enable; + u8 hard_reset_on_fw_events; + u8 bmc_enable; + u8 rl_enable; +}; + + +/* + * IOCTLs + */ + +/** + * typedef hl_ioctl_t - typedef for ioctl function in the driver + * @hpriv: pointer to the FD's private data, which contains state of + * user process + * @data: pointer to the input/output arguments structure of the IOCTL + * + * Return: 0 for success, negative value for error + */ +typedef int hl_ioctl_t(struct hl_fpriv *hpriv, void *data); + +/** + * struct hl_ioctl_desc - describes an IOCTL entry of the driver. + * @cmd: the IOCTL code as created by the kernel macros. + * @func: pointer to the driver's function that should be called for this IOCTL. + */ +struct hl_ioctl_desc { + unsigned int cmd; + hl_ioctl_t *func; +}; + + +/* + * Kernel module functions that can be accessed by entire module + */ + +/** + * hl_mem_area_inside_range() - Checks whether address+size are inside a range. + * @address: The start address of the area we want to validate. + * @size: The size in bytes of the area we want to validate. + * @range_start_address: The start address of the valid range. + * @range_end_address: The end address of the valid range. + * + * Return: true if the area is inside the valid range, false otherwise. + */ +static inline bool hl_mem_area_inside_range(u64 address, u32 size, + u64 range_start_address, u64 range_end_address) +{ + u64 end_address = address + size; + + if ((address >= range_start_address) && + (end_address <= range_end_address) && + (end_address > address)) + return true; + + return false; +} + +/** + * hl_mem_area_crosses_range() - Checks whether address+size crossing a range. + * @address: The start address of the area we want to validate. + * @size: The size in bytes of the area we want to validate. + * @range_start_address: The start address of the valid range. + * @range_end_address: The end address of the valid range. + * + * Return: true if the area overlaps part or all of the valid range, + * false otherwise. + */ +static inline bool hl_mem_area_crosses_range(u64 address, u32 size, + u64 range_start_address, u64 range_end_address) +{ + u64 end_address = address + size; + + if ((address >= range_start_address) && + (address < range_end_address)) + return true; + + if ((end_address >= range_start_address) && + (end_address < range_end_address)) + return true; + + if ((address < range_start_address) && + (end_address >= range_end_address)) + return true; + + return false; +} + +int hl_device_open(struct inode *inode, struct file *filp); +int hl_device_open_ctrl(struct inode *inode, struct file *filp); +bool hl_device_disabled_or_in_reset(struct hl_device *hdev); +enum hl_device_status hl_device_status(struct hl_device *hdev); +int hl_device_set_debug_mode(struct hl_device *hdev, bool enable); +int create_hdev(struct hl_device **dev, struct pci_dev *pdev, + enum hl_asic_type asic_type, int minor); +void destroy_hdev(struct hl_device *hdev); +int hl_hw_queues_create(struct hl_device *hdev); +void hl_hw_queues_destroy(struct hl_device *hdev); +int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, + u32 cb_size, u64 cb_ptr); +int hl_hw_queue_schedule_cs(struct hl_cs *cs); +u32 hl_hw_queue_add_ptr(u32 ptr, u16 val); +void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id); +void hl_int_hw_queue_update_ci(struct hl_cs *cs); +void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset); + +#define hl_queue_inc_ptr(p) hl_hw_queue_add_ptr(p, 1) +#define hl_pi_2_offset(pi) ((pi) & (HL_QUEUE_LENGTH - 1)) + +int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id); +void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q); +int hl_eq_init(struct hl_device *hdev, struct hl_eq *q); +void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q); +void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q); +void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q); +irqreturn_t hl_irq_handler_cq(int irq, void *arg); +irqreturn_t hl_irq_handler_eq(int irq, void *arg); +u32 hl_cq_inc_ptr(u32 ptr); + +int hl_asid_init(struct hl_device *hdev); +void hl_asid_fini(struct hl_device *hdev); +unsigned long hl_asid_alloc(struct hl_device *hdev); +void hl_asid_free(struct hl_device *hdev, unsigned long asid); + +int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv); +void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx); +int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx); +void hl_ctx_do_release(struct kref *ref); +void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx); +int hl_ctx_put(struct hl_ctx *ctx); +struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq); +void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr); +void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr); + +int hl_device_init(struct hl_device *hdev, struct class *hclass); +void hl_device_fini(struct hl_device *hdev); +int hl_device_suspend(struct hl_device *hdev); +int hl_device_resume(struct hl_device *hdev); +int hl_device_reset(struct hl_device *hdev, bool hard_reset, + bool from_hard_reset_thread); +void hl_hpriv_get(struct hl_fpriv *hpriv); +void hl_hpriv_put(struct hl_fpriv *hpriv); +int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq); +uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms); + +int hl_build_hwmon_channel_info(struct hl_device *hdev, + struct armcp_sensor *sensors_arr); + +int hl_sysfs_init(struct hl_device *hdev); +void hl_sysfs_fini(struct hl_device *hdev); + +int hl_hwmon_init(struct hl_device *hdev); +void hl_hwmon_fini(struct hl_device *hdev); + +int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size, + u64 *handle, int ctx_id); +int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle); +int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma); +struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr, + u32 handle); +void hl_cb_put(struct hl_cb *cb); +void hl_cb_mgr_init(struct hl_cb_mgr *mgr); +void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr); +struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size); +int hl_cb_pool_init(struct hl_device *hdev); +int hl_cb_pool_fini(struct hl_device *hdev); + +void hl_cs_rollback_all(struct hl_device *hdev); +struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, + enum hl_queue_type queue_type, bool is_kernel_allocated_cb); +void hl_sob_reset_error(struct kref *ref); + +void goya_set_asic_funcs(struct hl_device *hdev); +void gaudi_set_asic_funcs(struct hl_device *hdev); + +int hl_vm_ctx_init(struct hl_ctx *ctx); +void hl_vm_ctx_fini(struct hl_ctx *ctx); + +int hl_vm_init(struct hl_device *hdev); +void hl_vm_fini(struct hl_device *hdev); + +int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, + struct hl_userptr *userptr); +void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr); +void hl_userptr_delete_list(struct hl_device *hdev, + struct list_head *userptr_list); +bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size, + struct list_head *userptr_list, + struct hl_userptr **userptr); + +int hl_mmu_init(struct hl_device *hdev); +void hl_mmu_fini(struct hl_device *hdev); +int hl_mmu_ctx_init(struct hl_ctx *ctx); +void hl_mmu_ctx_fini(struct hl_ctx *ctx); +int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, + u32 page_size, bool flush_pte); +int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, + bool flush_pte); +void hl_mmu_swap_out(struct hl_ctx *ctx); +void hl_mmu_swap_in(struct hl_ctx *ctx); + +int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, + void __iomem *dst); +int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode); +int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, + u16 len, u32 timeout, long *result); +int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type); +int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, + size_t irq_arr_size); +int hl_fw_test_cpu_queue(struct hl_device *hdev); +void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle); +void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, + void *vaddr); +int hl_fw_send_heartbeat(struct hl_device *hdev); +int hl_fw_armcp_info_get(struct hl_device *hdev); +int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size); +int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, + u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, + u32 boot_err0_reg, bool skip_bmc, + u32 cpu_timeout, u32 boot_fit_timeout); + +int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], + bool is_wc[3]); +int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data); +int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar, + u64 addr); +int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region, + struct hl_inbound_pci_region *pci_region); +int hl_pci_set_outbound_region(struct hl_device *hdev, + struct hl_outbound_pci_region *pci_region); +int hl_pci_init(struct hl_device *hdev); +void hl_pci_fini(struct hl_device *hdev); + +long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); +void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); +int hl_get_temperature(struct hl_device *hdev, + int sensor_index, u32 attr, long *value); +int hl_set_temperature(struct hl_device *hdev, + int sensor_index, u32 attr, long value); +int hl_get_voltage(struct hl_device *hdev, + int sensor_index, u32 attr, long *value); +int hl_get_current(struct hl_device *hdev, + int sensor_index, u32 attr, long *value); +int hl_get_fan_speed(struct hl_device *hdev, + int sensor_index, u32 attr, long *value); +int hl_get_pwm_info(struct hl_device *hdev, + int sensor_index, u32 attr, long *value); +void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, + long value); +u64 hl_get_max_power(struct hl_device *hdev); +void hl_set_max_power(struct hl_device *hdev, u64 value); +int hl_set_voltage(struct hl_device *hdev, + int sensor_index, u32 attr, long value); +int hl_set_current(struct hl_device *hdev, + int sensor_index, u32 attr, long value); + +#ifdef CONFIG_DEBUG_FS + +void hl_debugfs_init(void); +void hl_debugfs_fini(void); +void hl_debugfs_add_device(struct hl_device *hdev); +void hl_debugfs_remove_device(struct hl_device *hdev); +void hl_debugfs_add_file(struct hl_fpriv *hpriv); +void hl_debugfs_remove_file(struct hl_fpriv *hpriv); +void hl_debugfs_add_cb(struct hl_cb *cb); +void hl_debugfs_remove_cb(struct hl_cb *cb); +void hl_debugfs_add_cs(struct hl_cs *cs); +void hl_debugfs_remove_cs(struct hl_cs *cs); +void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job); +void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job); +void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr); +void hl_debugfs_remove_userptr(struct hl_device *hdev, + struct hl_userptr *userptr); +void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx); +void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx); + +#else + +static inline void __init hl_debugfs_init(void) +{ +} + +static inline void hl_debugfs_fini(void) +{ +} + +static inline void hl_debugfs_add_device(struct hl_device *hdev) +{ +} + +static inline void hl_debugfs_remove_device(struct hl_device *hdev) +{ +} + +static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv) +{ +} + +static inline void hl_debugfs_remove_file(struct hl_fpriv *hpriv) +{ +} + +static inline void hl_debugfs_add_cb(struct hl_cb *cb) +{ +} + +static inline void hl_debugfs_remove_cb(struct hl_cb *cb) +{ +} + +static inline void hl_debugfs_add_cs(struct hl_cs *cs) +{ +} + +static inline void hl_debugfs_remove_cs(struct hl_cs *cs) +{ +} + +static inline void hl_debugfs_add_job(struct hl_device *hdev, + struct hl_cs_job *job) +{ +} + +static inline void hl_debugfs_remove_job(struct hl_device *hdev, + struct hl_cs_job *job) +{ +} + +static inline void hl_debugfs_add_userptr(struct hl_device *hdev, + struct hl_userptr *userptr) +{ +} + +static inline void hl_debugfs_remove_userptr(struct hl_device *hdev, + struct hl_userptr *userptr) +{ +} + +static inline void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, + struct hl_ctx *ctx) +{ +} + +static inline void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, + struct hl_ctx *ctx) +{ +} + +#endif + +/* IOCTLs */ +long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); +long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg); +int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data); +int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data); +int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data); +int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data); + +#endif /* HABANALABSP_H_ */ diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c new file mode 100644 index 000000000000..f38664b03865 --- /dev/null +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -0,0 +1,529 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#define pr_fmt(fmt) "habanalabs: " fmt + +#include "habanalabs.h" + +#include +#include + +#define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team" + +#define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators" + +MODULE_AUTHOR(HL_DRIVER_AUTHOR); +MODULE_DESCRIPTION(HL_DRIVER_DESC); +MODULE_LICENSE("GPL v2"); + +static int hl_major; +static struct class *hl_class; +static DEFINE_IDR(hl_devs_idr); +static DEFINE_MUTEX(hl_devs_idr_lock); + +static int timeout_locked = 5; +static int reset_on_lockup = 1; + +module_param(timeout_locked, int, 0444); +MODULE_PARM_DESC(timeout_locked, + "Device lockup timeout in seconds (0 = disabled, default 5s)"); + +module_param(reset_on_lockup, int, 0444); +MODULE_PARM_DESC(reset_on_lockup, + "Do device reset on lockup (0 = no, 1 = yes, default yes)"); + +#define PCI_VENDOR_ID_HABANALABS 0x1da3 + +#define PCI_IDS_GOYA 0x0001 +#define PCI_IDS_GAUDI 0x1000 + +static const struct pci_device_id ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), }, + { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), }, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, ids); + +/* + * get_asic_type - translate device id to asic type + * + * @device: id of the PCI device + * + * Translate device id to asic type. + * In case of unidentified device, return -1 + */ +static enum hl_asic_type get_asic_type(u16 device) +{ + enum hl_asic_type asic_type; + + switch (device) { + case PCI_IDS_GOYA: + asic_type = ASIC_GOYA; + break; + case PCI_IDS_GAUDI: + asic_type = ASIC_GAUDI; + break; + default: + asic_type = ASIC_INVALID; + break; + } + + return asic_type; +} + +/* + * hl_device_open - open function for habanalabs device + * + * @inode: pointer to inode structure + * @filp: pointer to file structure + * + * Called when process opens an habanalabs device. + */ +int hl_device_open(struct inode *inode, struct file *filp) +{ + struct hl_device *hdev; + struct hl_fpriv *hpriv; + int rc; + + mutex_lock(&hl_devs_idr_lock); + hdev = idr_find(&hl_devs_idr, iminor(inode)); + mutex_unlock(&hl_devs_idr_lock); + + if (!hdev) { + pr_err("Couldn't find device %d:%d\n", + imajor(inode), iminor(inode)); + return -ENXIO; + } + + hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); + if (!hpriv) + return -ENOMEM; + + hpriv->hdev = hdev; + filp->private_data = hpriv; + hpriv->filp = filp; + mutex_init(&hpriv->restore_phase_mutex); + kref_init(&hpriv->refcount); + nonseekable_open(inode, filp); + + hl_cb_mgr_init(&hpriv->cb_mgr); + hl_ctx_mgr_init(&hpriv->ctx_mgr); + + hpriv->taskpid = find_get_pid(current->pid); + + mutex_lock(&hdev->fpriv_list_lock); + + if (hl_device_disabled_or_in_reset(hdev)) { + dev_err_ratelimited(hdev->dev, + "Can't open %s because it is disabled or in reset\n", + dev_name(hdev->dev)); + rc = -EPERM; + goto out_err; + } + + if (hdev->in_debug) { + dev_err_ratelimited(hdev->dev, + "Can't open %s because it is being debugged by another user\n", + dev_name(hdev->dev)); + rc = -EPERM; + goto out_err; + } + + if (hdev->compute_ctx) { + dev_dbg_ratelimited(hdev->dev, + "Can't open %s because another user is working on it\n", + dev_name(hdev->dev)); + rc = -EBUSY; + goto out_err; + } + + rc = hl_ctx_create(hdev, hpriv); + if (rc) { + dev_err(hdev->dev, "Failed to create context %d\n", rc); + goto out_err; + } + + /* Device is IDLE at this point so it is legal to change PLLs. + * There is no need to check anything because if the PLL is + * already HIGH, the set function will return without doing + * anything + */ + hl_device_set_frequency(hdev, PLL_HIGH); + + list_add(&hpriv->dev_node, &hdev->fpriv_list); + mutex_unlock(&hdev->fpriv_list_lock); + + hl_debugfs_add_file(hpriv); + + return 0; + +out_err: + mutex_unlock(&hdev->fpriv_list_lock); + + hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); + hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); + filp->private_data = NULL; + mutex_destroy(&hpriv->restore_phase_mutex); + put_pid(hpriv->taskpid); + + kfree(hpriv); + + return rc; +} + +int hl_device_open_ctrl(struct inode *inode, struct file *filp) +{ + struct hl_device *hdev; + struct hl_fpriv *hpriv; + int rc; + + mutex_lock(&hl_devs_idr_lock); + hdev = idr_find(&hl_devs_idr, iminor(inode)); + mutex_unlock(&hl_devs_idr_lock); + + if (!hdev) { + pr_err("Couldn't find device %d:%d\n", + imajor(inode), iminor(inode)); + return -ENXIO; + } + + hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); + if (!hpriv) + return -ENOMEM; + + mutex_lock(&hdev->fpriv_list_lock); + + if (hl_device_disabled_or_in_reset(hdev)) { + dev_err_ratelimited(hdev->dev_ctrl, + "Can't open %s because it is disabled or in reset\n", + dev_name(hdev->dev_ctrl)); + rc = -EPERM; + goto out_err; + } + + list_add(&hpriv->dev_node, &hdev->fpriv_list); + mutex_unlock(&hdev->fpriv_list_lock); + + hpriv->hdev = hdev; + filp->private_data = hpriv; + hpriv->filp = filp; + hpriv->is_control = true; + nonseekable_open(inode, filp); + + hpriv->taskpid = find_get_pid(current->pid); + + return 0; + +out_err: + mutex_unlock(&hdev->fpriv_list_lock); + kfree(hpriv); + return rc; +} + +static void set_driver_behavior_per_device(struct hl_device *hdev) +{ + hdev->mmu_enable = 1; + hdev->cpu_enable = 1; + hdev->fw_loading = 1; + hdev->cpu_queues_enable = 1; + hdev->heartbeat = 1; + hdev->clock_gating = 1; + + hdev->reset_pcilink = 0; + hdev->axi_drain = 0; + hdev->sram_scrambler_enable = 1; + hdev->dram_scrambler_enable = 1; + hdev->bmc_enable = 1; + hdev->hard_reset_on_fw_events = 1; +} + +/* + * create_hdev - create habanalabs device instance + * + * @dev: will hold the pointer to the new habanalabs device structure + * @pdev: pointer to the pci device + * @asic_type: in case of simulator device, which device is it + * @minor: in case of simulator device, the minor of the device + * + * Allocate memory for habanalabs device and initialize basic fields + * Identify the ASIC type + * Allocate ID (minor) for the device (only for real devices) + */ +int create_hdev(struct hl_device **dev, struct pci_dev *pdev, + enum hl_asic_type asic_type, int minor) +{ + struct hl_device *hdev; + int rc, main_id, ctrl_id = 0; + + *dev = NULL; + + hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); + if (!hdev) + return -ENOMEM; + + /* First, we must find out which ASIC are we handling. This is needed + * to configure the behavior of the driver (kernel parameters) + */ + if (pdev) { + hdev->asic_type = get_asic_type(pdev->device); + if (hdev->asic_type == ASIC_INVALID) { + dev_err(&pdev->dev, "Unsupported ASIC\n"); + rc = -ENODEV; + goto free_hdev; + } + } else { + hdev->asic_type = asic_type; + } + + hdev->major = hl_major; + hdev->reset_on_lockup = reset_on_lockup; + hdev->pldm = 0; + + set_driver_behavior_per_device(hdev); + + if (timeout_locked) + hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000); + else + hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; + + hdev->disabled = true; + hdev->pdev = pdev; /* can be NULL in case of simulator device */ + + /* Set default DMA mask to 32 bits */ + hdev->dma_mask = 32; + + mutex_lock(&hl_devs_idr_lock); + + /* Always save 2 numbers, 1 for main device and 1 for control. + * They must be consecutive + */ + main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, + GFP_KERNEL); + + if (main_id >= 0) + ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1, + main_id + 2, GFP_KERNEL); + + mutex_unlock(&hl_devs_idr_lock); + + if ((main_id < 0) || (ctrl_id < 0)) { + if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC)) + pr_err("too many devices in the system\n"); + + if (main_id >= 0) { + mutex_lock(&hl_devs_idr_lock); + idr_remove(&hl_devs_idr, main_id); + mutex_unlock(&hl_devs_idr_lock); + } + + rc = -EBUSY; + goto free_hdev; + } + + hdev->id = main_id; + hdev->id_control = ctrl_id; + + *dev = hdev; + + return 0; + +free_hdev: + kfree(hdev); + return rc; +} + +/* + * destroy_hdev - destroy habanalabs device instance + * + * @dev: pointer to the habanalabs device structure + * + */ +void destroy_hdev(struct hl_device *hdev) +{ + /* Remove device from the device list */ + mutex_lock(&hl_devs_idr_lock); + idr_remove(&hl_devs_idr, hdev->id); + idr_remove(&hl_devs_idr, hdev->id_control); + mutex_unlock(&hl_devs_idr_lock); + + kfree(hdev); +} + +static int hl_pmops_suspend(struct device *dev) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + pr_debug("Going to suspend PCI device\n"); + + if (!hdev) { + pr_err("device pointer is NULL in suspend\n"); + return 0; + } + + return hl_device_suspend(hdev); +} + +static int hl_pmops_resume(struct device *dev) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + pr_debug("Going to resume PCI device\n"); + + if (!hdev) { + pr_err("device pointer is NULL in resume\n"); + return 0; + } + + return hl_device_resume(hdev); +} + +/* + * hl_pci_probe - probe PCI habanalabs devices + * + * @pdev: pointer to pci device + * @id: pointer to pci device id structure + * + * Standard PCI probe function for habanalabs device. + * Create a new habanalabs device and initialize it according to the + * device's type + */ +static int hl_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct hl_device *hdev; + int rc; + + dev_info(&pdev->dev, HL_NAME + " device found [%04x:%04x] (rev %x)\n", + (int)pdev->vendor, (int)pdev->device, (int)pdev->revision); + + rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1); + if (rc) + return rc; + + pci_set_drvdata(pdev, hdev); + + rc = hl_device_init(hdev, hl_class); + if (rc) { + dev_err(&pdev->dev, "Fatal error during habanalabs device init\n"); + rc = -ENODEV; + goto disable_device; + } + + return 0; + +disable_device: + pci_set_drvdata(pdev, NULL); + destroy_hdev(hdev); + + return rc; +} + +/* + * hl_pci_remove - remove PCI habanalabs devices + * + * @pdev: pointer to pci device + * + * Standard PCI remove function for habanalabs device + */ +static void hl_pci_remove(struct pci_dev *pdev) +{ + struct hl_device *hdev; + + hdev = pci_get_drvdata(pdev); + if (!hdev) + return; + + hl_device_fini(hdev); + pci_set_drvdata(pdev, NULL); + + destroy_hdev(hdev); +} + +static const struct dev_pm_ops hl_pm_ops = { + .suspend = hl_pmops_suspend, + .resume = hl_pmops_resume, +}; + +static struct pci_driver hl_pci_driver = { + .name = HL_NAME, + .id_table = ids, + .probe = hl_pci_probe, + .remove = hl_pci_remove, + .driver.pm = &hl_pm_ops, +}; + +/* + * hl_init - Initialize the habanalabs kernel driver + */ +static int __init hl_init(void) +{ + int rc; + dev_t dev; + + pr_info("loading driver\n"); + + rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME); + if (rc < 0) { + pr_err("unable to get major\n"); + return rc; + } + + hl_major = MAJOR(dev); + + hl_class = class_create(THIS_MODULE, HL_NAME); + if (IS_ERR(hl_class)) { + pr_err("failed to allocate class\n"); + rc = PTR_ERR(hl_class); + goto remove_major; + } + + hl_debugfs_init(); + + rc = pci_register_driver(&hl_pci_driver); + if (rc) { + pr_err("failed to register pci device\n"); + goto remove_debugfs; + } + + pr_debug("driver loaded\n"); + + return 0; + +remove_debugfs: + hl_debugfs_fini(); + class_destroy(hl_class); +remove_major: + unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); + return rc; +} + +/* + * hl_exit - Release all resources of the habanalabs kernel driver + */ +static void __exit hl_exit(void) +{ + pci_unregister_driver(&hl_pci_driver); + + /* + * Removing debugfs must be after all devices or simulator devices + * have been removed because otherwise we get a bug in the + * debugfs module for referencing NULL objects + */ + hl_debugfs_fini(); + + class_destroy(hl_class); + unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); + + idr_destroy(&hl_devs_idr); + + pr_debug("driver removed\n"); +} + +module_init(hl_init); +module_exit(hl_exit); diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c new file mode 100644 index 000000000000..5af1c03da473 --- /dev/null +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -0,0 +1,546 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include +#include "habanalabs.h" + +#include +#include +#include + +static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = { + [HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr), + [HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf), + [HL_DEBUG_OP_STM] = sizeof(struct hl_debug_params_stm), + [HL_DEBUG_OP_FUNNEL] = 0, + [HL_DEBUG_OP_BMON] = sizeof(struct hl_debug_params_bmon), + [HL_DEBUG_OP_SPMU] = sizeof(struct hl_debug_params_spmu), + [HL_DEBUG_OP_TIMESTAMP] = 0 + +}; + +static int device_status_info(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_device_status dev_stat = {0}; + u32 size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!size) || (!out)) + return -EINVAL; + + dev_stat.status = hl_device_status(hdev); + + return copy_to_user(out, &dev_stat, + min((size_t)size, sizeof(dev_stat))) ? -EFAULT : 0; +} + +static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_hw_ip_info hw_ip = {0}; + u32 size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 sram_kmd_size, dram_kmd_size; + + if ((!size) || (!out)) + return -EINVAL; + + sram_kmd_size = (prop->sram_user_base_address - + prop->sram_base_address); + dram_kmd_size = (prop->dram_user_base_address - + prop->dram_base_address); + + hw_ip.device_id = hdev->asic_funcs->get_pci_id(hdev); + hw_ip.sram_base_address = prop->sram_user_base_address; + hw_ip.dram_base_address = prop->dram_user_base_address; + hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask; + hw_ip.sram_size = prop->sram_size - sram_kmd_size; + hw_ip.dram_size = prop->dram_size - dram_kmd_size; + if (hw_ip.dram_size > PAGE_SIZE) + hw_ip.dram_enabled = 1; + hw_ip.num_of_events = prop->num_of_events; + + memcpy(hw_ip.armcp_version, prop->armcp_info.armcp_version, + min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN)); + + memcpy(hw_ip.card_name, prop->armcp_info.card_name, + min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN)); + + hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version); + hw_ip.module_id = le32_to_cpu(prop->armcp_info.card_location); + + hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr; + hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf; + hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od; + hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor; + + return copy_to_user(out, &hw_ip, + min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0; +} + +static int hw_events_info(struct hl_device *hdev, bool aggregate, + struct hl_info_args *args) +{ + u32 size, max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + void *arr; + + if ((!max_size) || (!out)) + return -EINVAL; + + arr = hdev->asic_funcs->get_events_stat(hdev, aggregate, &size); + + return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0; +} + +static int dram_usage_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_info_dram_usage dram_usage = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 dram_kmd_size; + + if ((!max_size) || (!out)) + return -EINVAL; + + dram_kmd_size = (prop->dram_user_base_address - + prop->dram_base_address); + dram_usage.dram_free_mem = (prop->dram_size - dram_kmd_size) - + atomic64_read(&hdev->dram_used_mem); + if (hpriv->ctx) + dram_usage.ctx_dram_mem = + atomic64_read(&hpriv->ctx->dram_phys_mem); + + return copy_to_user(out, &dram_usage, + min((size_t) max_size, sizeof(dram_usage))) ? -EFAULT : 0; +} + +static int hw_idle(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_hw_idle hw_idle = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, + &hw_idle.busy_engines_mask, NULL); + + return copy_to_user(out, &hw_idle, + min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0; +} + +static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args) +{ + struct hl_debug_params *params; + void *input = NULL, *output = NULL; + int rc; + + params = kzalloc(sizeof(*params), GFP_KERNEL); + if (!params) + return -ENOMEM; + + params->reg_idx = args->reg_idx; + params->enable = args->enable; + params->op = args->op; + + if (args->input_ptr && args->input_size) { + input = kzalloc(hl_debug_struct_size[args->op], GFP_KERNEL); + if (!input) { + rc = -ENOMEM; + goto out; + } + + if (copy_from_user(input, u64_to_user_ptr(args->input_ptr), + args->input_size)) { + rc = -EFAULT; + dev_err(hdev->dev, "failed to copy input debug data\n"); + goto out; + } + + params->input = input; + } + + if (args->output_ptr && args->output_size) { + output = kzalloc(args->output_size, GFP_KERNEL); + if (!output) { + rc = -ENOMEM; + goto out; + } + + params->output = output; + params->output_size = args->output_size; + } + + rc = hdev->asic_funcs->debug_coresight(hdev, params); + if (rc) { + dev_err(hdev->dev, + "debug coresight operation failed %d\n", rc); + goto out; + } + + if (output && copy_to_user((void __user *) (uintptr_t) args->output_ptr, + output, args->output_size)) { + dev_err(hdev->dev, "copy to user failed in debug ioctl\n"); + rc = -EFAULT; + goto out; + } + + +out: + kfree(params); + kfree(output); + kfree(input); + + return rc; +} + +static int device_utilization(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_device_utilization device_util = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + if ((args->period_ms < 100) || (args->period_ms > 1000) || + (args->period_ms % 100)) { + dev_err(hdev->dev, + "period %u must be between 100 - 1000 and must be divisible by 100\n", + args->period_ms); + return -EINVAL; + } + + device_util.utilization = hl_device_utilization(hdev, args->period_ms); + + return copy_to_user(out, &device_util, + min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0; +} + +static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_clk_rate clk_rate = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz, + &clk_rate.max_clk_rate_mhz); + if (rc) + return rc; + + return copy_to_user(out, &clk_rate, + min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0; +} + +static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_reset_count reset_count = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + reset_count.hard_reset_cnt = hdev->hard_reset_cnt; + reset_count.soft_reset_cnt = hdev->soft_reset_cnt; + + return copy_to_user(out, &reset_count, + min((size_t) max_size, sizeof(reset_count))) ? -EFAULT : 0; +} + +static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_time_sync time_sync = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + time_sync.device_time = hdev->asic_funcs->get_device_time(hdev); + time_sync.host_time = ktime_get_raw_ns(); + + return copy_to_user(out, &time_sync, + min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0; +} + +static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_info_cs_counters cs_counters = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + memcpy(&cs_counters.cs_counters, &hdev->aggregated_cs_counters, + sizeof(struct hl_cs_counters)); + + if (hpriv->ctx) + memcpy(&cs_counters.ctx_cs_counters, &hpriv->ctx->cs_counters, + sizeof(struct hl_cs_counters)); + + return copy_to_user(out, &cs_counters, + min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0; +} + +static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, + struct device *dev) +{ + struct hl_info_args *args = data; + struct hl_device *hdev = hpriv->hdev; + int rc; + + /* + * Information is returned for the following opcodes even if the device + * is disabled or in reset. + */ + switch (args->op) { + case HL_INFO_HW_IP_INFO: + return hw_ip_info(hdev, args); + + case HL_INFO_DEVICE_STATUS: + return device_status_info(hdev, args); + + case HL_INFO_RESET_COUNT: + return get_reset_count(hdev, args); + + default: + break; + } + + if (hl_device_disabled_or_in_reset(hdev)) { + dev_warn_ratelimited(dev, + "Device is %s. Can't execute INFO IOCTL\n", + atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); + return -EBUSY; + } + + switch (args->op) { + case HL_INFO_HW_EVENTS: + rc = hw_events_info(hdev, false, args); + break; + + case HL_INFO_DRAM_USAGE: + rc = dram_usage_info(hpriv, args); + break; + + case HL_INFO_HW_IDLE: + rc = hw_idle(hdev, args); + break; + + case HL_INFO_DEVICE_UTILIZATION: + rc = device_utilization(hdev, args); + break; + + case HL_INFO_HW_EVENTS_AGGREGATE: + rc = hw_events_info(hdev, true, args); + break; + + case HL_INFO_CLK_RATE: + rc = get_clk_rate(hdev, args); + break; + + case HL_INFO_TIME_SYNC: + return time_sync_info(hdev, args); + + case HL_INFO_CS_COUNTERS: + return cs_counters_info(hpriv, args); + + default: + dev_err(dev, "Invalid request %d\n", args->op); + rc = -ENOTTY; + break; + } + + return rc; +} + +static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data) +{ + return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev); +} + +static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data) +{ + return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl); +} + +static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data) +{ + struct hl_debug_args *args = data; + struct hl_device *hdev = hpriv->hdev; + int rc = 0; + + if (hl_device_disabled_or_in_reset(hdev)) { + dev_warn_ratelimited(hdev->dev, + "Device is %s. Can't execute DEBUG IOCTL\n", + atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); + return -EBUSY; + } + + switch (args->op) { + case HL_DEBUG_OP_ETR: + case HL_DEBUG_OP_ETF: + case HL_DEBUG_OP_STM: + case HL_DEBUG_OP_FUNNEL: + case HL_DEBUG_OP_BMON: + case HL_DEBUG_OP_SPMU: + case HL_DEBUG_OP_TIMESTAMP: + if (!hdev->in_debug) { + dev_err_ratelimited(hdev->dev, + "Rejecting debug configuration request because device not in debug mode\n"); + return -EFAULT; + } + args->input_size = + min(args->input_size, hl_debug_struct_size[args->op]); + rc = debug_coresight(hdev, args); + break; + case HL_DEBUG_OP_SET_MODE: + rc = hl_device_set_debug_mode(hdev, (bool) args->enable); + break; + default: + dev_err(hdev->dev, "Invalid request %d\n", args->op); + rc = -ENOTTY; + break; + } + + return rc; +} + +#define HL_IOCTL_DEF(ioctl, _func) \ + [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func} + +static const struct hl_ioctl_desc hl_ioctls[] = { + HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl), + HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl), + HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl), + HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl), + HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl), + HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl) +}; + +static const struct hl_ioctl_desc hl_ioctls_control[] = { + HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control) +}; + +static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg, + const struct hl_ioctl_desc *ioctl, struct device *dev) +{ + struct hl_fpriv *hpriv = filep->private_data; + struct hl_device *hdev = hpriv->hdev; + unsigned int nr = _IOC_NR(cmd); + char stack_kdata[128] = {0}; + char *kdata = NULL; + unsigned int usize, asize; + hl_ioctl_t *func; + u32 hl_size; + int retcode; + + if (hdev->hard_reset_pending) { + dev_crit_ratelimited(hdev->dev_ctrl, + "Device HARD reset pending! Please close FD\n"); + return -ENODEV; + } + + /* Do not trust userspace, use our own definition */ + func = ioctl->func; + + if (unlikely(!func)) { + dev_dbg(dev, "no function\n"); + retcode = -ENOTTY; + goto out_err; + } + + hl_size = _IOC_SIZE(ioctl->cmd); + usize = asize = _IOC_SIZE(cmd); + if (hl_size > asize) + asize = hl_size; + + cmd = ioctl->cmd; + + if (cmd & (IOC_IN | IOC_OUT)) { + if (asize <= sizeof(stack_kdata)) { + kdata = stack_kdata; + } else { + kdata = kzalloc(asize, GFP_KERNEL); + if (!kdata) { + retcode = -ENOMEM; + goto out_err; + } + } + } + + if (cmd & IOC_IN) { + if (copy_from_user(kdata, (void __user *)arg, usize)) { + retcode = -EFAULT; + goto out_err; + } + } else if (cmd & IOC_OUT) { + memset(kdata, 0, usize); + } + + retcode = func(hpriv, kdata); + + if ((cmd & IOC_OUT) && copy_to_user((void __user *)arg, kdata, usize)) + retcode = -EFAULT; + +out_err: + if (retcode) + dev_dbg(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", + task_pid_nr(current), cmd, nr); + + if (kdata != stack_kdata) + kfree(kdata); + + return retcode; +} + +long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) +{ + struct hl_fpriv *hpriv = filep->private_data; + struct hl_device *hdev = hpriv->hdev; + const struct hl_ioctl_desc *ioctl = NULL; + unsigned int nr = _IOC_NR(cmd); + + if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) { + ioctl = &hl_ioctls[nr]; + } else { + dev_err(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n", + task_pid_nr(current), nr); + return -ENOTTY; + } + + return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev); +} + +long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg) +{ + struct hl_fpriv *hpriv = filep->private_data; + struct hl_device *hdev = hpriv->hdev; + const struct hl_ioctl_desc *ioctl = NULL; + unsigned int nr = _IOC_NR(cmd); + + if (nr == _IOC_NR(HL_IOCTL_INFO)) { + ioctl = &hl_ioctls_control[nr]; + } else { + dev_err(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n", + task_pid_nr(current), nr); + return -ENOTTY; + } + + return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl); +} diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c new file mode 100644 index 000000000000..287681646071 --- /dev/null +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -0,0 +1,918 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" + +#include + +/* + * hl_queue_add_ptr - add to pi or ci and checks if it wraps around + * + * @ptr: the current pi/ci value + * @val: the amount to add + * + * Add val to ptr. It can go until twice the queue length. + */ +inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val) +{ + ptr += val; + ptr &= ((HL_QUEUE_LENGTH << 1) - 1); + return ptr; +} +static inline int queue_ci_get(atomic_t *ci, u32 queue_len) +{ + return atomic_read(ci) & ((queue_len << 1) - 1); +} + +static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len) +{ + int delta = (q->pi - queue_ci_get(&q->ci, queue_len)); + + if (delta >= 0) + return (queue_len - delta); + else + return (abs(delta) - queue_len); +} + +void hl_int_hw_queue_update_ci(struct hl_cs *cs) +{ + struct hl_device *hdev = cs->ctx->hdev; + struct hl_hw_queue *q; + int i; + + if (hdev->disabled) + return; + + q = &hdev->kernel_queues[0]; + for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) { + if (q->queue_type == QUEUE_TYPE_INT) + atomic_add(cs->jobs_in_queue_cnt[i], &q->ci); + } +} + +/* + * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a + * H/W queue. + * @hdev: pointer to habanalabs device structure + * @q: pointer to habanalabs queue structure + * @ctl: BD's control word + * @len: BD's length + * @ptr: BD's pointer + * + * This function assumes there is enough space on the queue to submit a new + * BD to it. It initializes the next BD and calls the device specific + * function to set the pi (and doorbell) + * + * This function must be called when the scheduler mutex is taken + * + */ +static void ext_and_hw_queue_submit_bd(struct hl_device *hdev, + struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr) +{ + struct hl_bd *bd; + + bd = (struct hl_bd *) (uintptr_t) q->kernel_address; + bd += hl_pi_2_offset(q->pi); + bd->ctl = cpu_to_le32(ctl); + bd->len = cpu_to_le32(len); + bd->ptr = cpu_to_le64(ptr); + + q->pi = hl_queue_inc_ptr(q->pi); + hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); +} + +/* + * ext_queue_sanity_checks - perform some sanity checks on external queue + * + * @hdev : pointer to hl_device structure + * @q : pointer to hl_hw_queue structure + * @num_of_entries : how many entries to check for space + * @reserve_cq_entry : whether to reserve an entry in the cq + * + * H/W queues spinlock should be taken before calling this function + * + * Perform the following: + * - Make sure we have enough space in the h/w queue + * - Make sure we have enough space in the completion queue + * - Reserve space in the completion queue (needs to be reversed if there + * is a failure down the road before the actual submission of work). Only + * do this action if reserve_cq_entry is true + * + */ +static int ext_queue_sanity_checks(struct hl_device *hdev, + struct hl_hw_queue *q, int num_of_entries, + bool reserve_cq_entry) +{ + atomic_t *free_slots = + &hdev->completion_queue[q->cq_id].free_slots_cnt; + int free_slots_cnt; + + /* Check we have enough space in the queue */ + free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH); + + if (free_slots_cnt < num_of_entries) { + dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", + q->hw_queue_id, num_of_entries); + return -EAGAIN; + } + + if (reserve_cq_entry) { + /* + * Check we have enough space in the completion queue + * Add -1 to counter (decrement) unless counter was already 0 + * In that case, CQ is full so we can't submit a new CB because + * we won't get ack on its completion + * atomic_add_unless will return 0 if counter was already 0 + */ + if (atomic_add_negative(num_of_entries * -1, free_slots)) { + dev_dbg(hdev->dev, "No space for %d on CQ %d\n", + num_of_entries, q->hw_queue_id); + atomic_add(num_of_entries, free_slots); + return -EAGAIN; + } + } + + return 0; +} + +/* + * int_queue_sanity_checks - perform some sanity checks on internal queue + * + * @hdev : pointer to hl_device structure + * @q : pointer to hl_hw_queue structure + * @num_of_entries : how many entries to check for space + * + * H/W queues spinlock should be taken before calling this function + * + * Perform the following: + * - Make sure we have enough space in the h/w queue + * + */ +static int int_queue_sanity_checks(struct hl_device *hdev, + struct hl_hw_queue *q, + int num_of_entries) +{ + int free_slots_cnt; + + if (num_of_entries > q->int_queue_len) { + dev_err(hdev->dev, + "Cannot populate queue %u with %u jobs\n", + q->hw_queue_id, num_of_entries); + return -ENOMEM; + } + + /* Check we have enough space in the queue */ + free_slots_cnt = queue_free_slots(q, q->int_queue_len); + + if (free_slots_cnt < num_of_entries) { + dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", + q->hw_queue_id, num_of_entries); + return -EAGAIN; + } + + return 0; +} + +/* + * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue + * @hdev: Pointer to hl_device structure. + * @q: Pointer to hl_hw_queue structure. + * @num_of_entries: How many entries to check for space. + * + * Notice: We do not reserve queue entries so this function mustn't be called + * more than once per CS for the same queue + * + */ +static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q, + int num_of_entries) +{ + int free_slots_cnt; + + /* Check we have enough space in the queue */ + free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH); + + if (free_slots_cnt < num_of_entries) { + dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", + q->hw_queue_id, num_of_entries); + return -EAGAIN; + } + + return 0; +} + +/* + * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion + * + * @hdev: pointer to hl_device structure + * @hw_queue_id: Queue's type + * @cb_size: size of CB + * @cb_ptr: pointer to CB location + * + * This function sends a single CB, that must NOT generate a completion entry + * + */ +int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, + u32 cb_size, u64 cb_ptr) +{ + struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; + int rc = 0; + + /* + * The CPU queue is a synchronous queue with an effective depth of + * a single entry (although it is allocated with room for multiple + * entries). Therefore, there is a different lock, called + * send_cpu_message_lock, that serializes accesses to the CPU queue. + * As a result, we don't need to lock the access to the entire H/W + * queues module when submitting a JOB to the CPU queue + */ + if (q->queue_type != QUEUE_TYPE_CPU) + hdev->asic_funcs->hw_queues_lock(hdev); + + if (hdev->disabled) { + rc = -EPERM; + goto out; + } + + /* + * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue + * type only on init phase, when the queues are empty and being tested, + * so there is no need for sanity checks. + */ + if (q->queue_type != QUEUE_TYPE_HW) { + rc = ext_queue_sanity_checks(hdev, q, 1, false); + if (rc) + goto out; + } + + ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); + +out: + if (q->queue_type != QUEUE_TYPE_CPU) + hdev->asic_funcs->hw_queues_unlock(hdev); + + return rc; +} + +/* + * ext_queue_schedule_job - submit a JOB to an external queue + * + * @job: pointer to the job that needs to be submitted to the queue + * + * This function must be called when the scheduler mutex is taken + * + */ +static void ext_queue_schedule_job(struct hl_cs_job *job) +{ + struct hl_device *hdev = job->cs->ctx->hdev; + struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; + struct hl_cq_entry cq_pkt; + struct hl_cq *cq; + u64 cq_addr; + struct hl_cb *cb; + u32 ctl; + u32 len; + u64 ptr; + + /* + * Update the JOB ID inside the BD CTL so the device would know what + * to write in the completion queue + */ + ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK); + + cb = job->patched_cb; + len = job->job_cb_size; + ptr = cb->bus_address; + + cq_pkt.data = cpu_to_le32( + ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT) + & CQ_ENTRY_SHADOW_INDEX_MASK) | + (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) | + (1 << CQ_ENTRY_READY_SHIFT)); + + /* + * No need to protect pi_offset because scheduling to the + * H/W queues is done under the scheduler mutex + * + * No need to check if CQ is full because it was already + * checked in ext_queue_sanity_checks + */ + cq = &hdev->completion_queue[q->cq_id]; + cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry); + + hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len, + cq_addr, + le32_to_cpu(cq_pkt.data), + q->msi_vec, + job->contains_dma_pkt); + + q->shadow_queue[hl_pi_2_offset(q->pi)] = job; + + cq->pi = hl_cq_inc_ptr(cq->pi); + + ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); +} + +/* + * int_queue_schedule_job - submit a JOB to an internal queue + * + * @job: pointer to the job that needs to be submitted to the queue + * + * This function must be called when the scheduler mutex is taken + * + */ +static void int_queue_schedule_job(struct hl_cs_job *job) +{ + struct hl_device *hdev = job->cs->ctx->hdev; + struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; + struct hl_bd bd; + __le64 *pi; + + bd.ctl = 0; + bd.len = cpu_to_le32(job->job_cb_size); + bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb); + + pi = (__le64 *) (uintptr_t) (q->kernel_address + + ((q->pi & (q->int_queue_len - 1)) * sizeof(bd))); + + q->pi++; + q->pi &= ((q->int_queue_len << 1) - 1); + + hdev->asic_funcs->pqe_write(hdev, pi, &bd); + + hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); +} + +/* + * hw_queue_schedule_job - submit a JOB to a H/W queue + * + * @job: pointer to the job that needs to be submitted to the queue + * + * This function must be called when the scheduler mutex is taken + * + */ +static void hw_queue_schedule_job(struct hl_cs_job *job) +{ + struct hl_device *hdev = job->cs->ctx->hdev; + struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; + u64 ptr; + u32 offset, ctl, len; + + /* + * Upon PQE completion, COMP_DATA is used as the write data to the + * completion queue (QMAN HBW message), and COMP_OFFSET is used as the + * write address offset in the SM block (QMAN LBW message). + * The write address offset is calculated as "COMP_OFFSET << 2". + */ + offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1); + ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) | + ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK); + + len = job->job_cb_size; + + /* + * A patched CB is created only if a user CB was allocated by driver and + * MMU is disabled. If MMU is enabled, the user CB should be used + * instead. If the user CB wasn't allocated by driver, assume that it + * holds an address. + */ + if (job->patched_cb) + ptr = job->patched_cb->bus_address; + else if (job->is_kernel_allocated_cb) + ptr = job->user_cb->bus_address; + else + ptr = (u64) (uintptr_t) job->user_cb; + + ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); +} + +/* + * init_signal_wait_cs - initialize a signal/wait CS + * @cs: pointer to the signal/wait CS + * + * H/W queues spinlock should be taken before calling this function + */ +static void init_signal_wait_cs(struct hl_cs *cs) +{ + struct hl_ctx *ctx = cs->ctx; + struct hl_device *hdev = ctx->hdev; + struct hl_hw_queue *hw_queue; + struct hl_cs_compl *cs_cmpl = + container_of(cs->fence, struct hl_cs_compl, base_fence); + + struct hl_hw_sob *hw_sob; + struct hl_cs_job *job; + u32 q_idx; + + /* There is only one job in a signal/wait CS */ + job = list_first_entry(&cs->job_list, struct hl_cs_job, + cs_node); + q_idx = job->hw_queue_id; + hw_queue = &hdev->kernel_queues[q_idx]; + + if (cs->type & CS_TYPE_SIGNAL) { + hw_sob = &hw_queue->hw_sob[hw_queue->curr_sob_offset]; + + cs_cmpl->hw_sob = hw_sob; + cs_cmpl->sob_val = hw_queue->next_sob_val++; + + dev_dbg(hdev->dev, + "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n", + cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx); + + hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, + cs_cmpl->hw_sob->sob_id); + + kref_get(&hw_sob->kref); + + /* check for wraparound */ + if (hw_queue->next_sob_val == HL_MAX_SOB_VAL) { + /* + * Decrement as we reached the max value. + * The release function won't be called here as we've + * just incremented the refcount. + */ + kref_put(&hw_sob->kref, hl_sob_reset_error); + hw_queue->next_sob_val = 1; + /* only two SOBs are currently in use */ + hw_queue->curr_sob_offset = + (hw_queue->curr_sob_offset + 1) % + HL_RSVD_SOBS_IN_USE; + + dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", + hw_queue->curr_sob_offset, q_idx); + } + } else if (cs->type & CS_TYPE_WAIT) { + struct hl_cs_compl *signal_cs_cmpl; + + signal_cs_cmpl = container_of(cs->signal_fence, + struct hl_cs_compl, + base_fence); + + /* copy the the SOB id and value of the signal CS */ + cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; + cs_cmpl->sob_val = signal_cs_cmpl->sob_val; + + dev_dbg(hdev->dev, + "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n", + cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, + hw_queue->base_mon_id, q_idx); + + hdev->asic_funcs->gen_wait_cb(hdev, job->patched_cb, + cs_cmpl->hw_sob->sob_id, + cs_cmpl->sob_val, + hw_queue->base_mon_id, + q_idx); + + kref_get(&cs_cmpl->hw_sob->kref); + /* + * Must put the signal fence after the SOB refcnt increment so + * the SOB refcnt won't turn 0 and reset the SOB before the + * wait CS was submitted. + */ + mb(); + dma_fence_put(cs->signal_fence); + cs->signal_fence = NULL; + } +} + +/* + * hl_hw_queue_schedule_cs - schedule a command submission + * @cs: pointer to the CS + */ +int hl_hw_queue_schedule_cs(struct hl_cs *cs) +{ + struct hl_ctx *ctx = cs->ctx; + struct hl_device *hdev = ctx->hdev; + struct hl_cs_job *job, *tmp; + struct hl_hw_queue *q; + u32 max_queues; + int rc = 0, i, cq_cnt; + + hdev->asic_funcs->hw_queues_lock(hdev); + + if (hl_device_disabled_or_in_reset(hdev)) { + ctx->cs_counters.device_in_reset_drop_cnt++; + dev_err(hdev->dev, + "device is disabled or in reset, CS rejected!\n"); + rc = -EPERM; + goto out; + } + + max_queues = hdev->asic_prop.max_queues; + + q = &hdev->kernel_queues[0]; + for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) { + if (cs->jobs_in_queue_cnt[i]) { + switch (q->queue_type) { + case QUEUE_TYPE_EXT: + rc = ext_queue_sanity_checks(hdev, q, + cs->jobs_in_queue_cnt[i], true); + break; + case QUEUE_TYPE_INT: + rc = int_queue_sanity_checks(hdev, q, + cs->jobs_in_queue_cnt[i]); + break; + case QUEUE_TYPE_HW: + rc = hw_queue_sanity_checks(hdev, q, + cs->jobs_in_queue_cnt[i]); + break; + default: + dev_err(hdev->dev, "Queue type %d is invalid\n", + q->queue_type); + rc = -EINVAL; + break; + } + + if (rc) { + ctx->cs_counters.queue_full_drop_cnt++; + goto unroll_cq_resv; + } + + if (q->queue_type == QUEUE_TYPE_EXT) + cq_cnt++; + } + } + + if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) + init_signal_wait_cs(cs); + + spin_lock(&hdev->hw_queues_mirror_lock); + list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list); + + /* Queue TDR if the CS is the first entry and if timeout is wanted */ + if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) && + (list_first_entry(&hdev->hw_queues_mirror_list, + struct hl_cs, mirror_node) == cs)) { + cs->tdr_active = true; + schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies); + spin_unlock(&hdev->hw_queues_mirror_lock); + } else { + spin_unlock(&hdev->hw_queues_mirror_lock); + } + + if (!hdev->cs_active_cnt++) { + struct hl_device_idle_busy_ts *ts; + + ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx]; + ts->busy_to_idle_ts = ktime_set(0, 0); + ts->idle_to_busy_ts = ktime_get(); + } + + list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) + switch (job->queue_type) { + case QUEUE_TYPE_EXT: + ext_queue_schedule_job(job); + break; + case QUEUE_TYPE_INT: + int_queue_schedule_job(job); + break; + case QUEUE_TYPE_HW: + hw_queue_schedule_job(job); + break; + default: + break; + } + + cs->submitted = true; + + goto out; + +unroll_cq_resv: + q = &hdev->kernel_queues[0]; + for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) { + if ((q->queue_type == QUEUE_TYPE_EXT) && + (cs->jobs_in_queue_cnt[i])) { + atomic_t *free_slots = + &hdev->completion_queue[i].free_slots_cnt; + atomic_add(cs->jobs_in_queue_cnt[i], free_slots); + cq_cnt--; + } + } + +out: + hdev->asic_funcs->hw_queues_unlock(hdev); + + return rc; +} + +/* + * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue + * + * @hdev: pointer to hl_device structure + * @hw_queue_id: which queue to increment its ci + */ +void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id) +{ + struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; + + atomic_inc(&q->ci); +} + +static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, + bool is_cpu_queue) +{ + void *p; + int rc; + + if (is_cpu_queue) + p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, + HL_QUEUE_SIZE_IN_BYTES, + &q->bus_address); + else + p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, + HL_QUEUE_SIZE_IN_BYTES, + &q->bus_address, + GFP_KERNEL | __GFP_ZERO); + if (!p) + return -ENOMEM; + + q->kernel_address = (u64) (uintptr_t) p; + + q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, + sizeof(*q->shadow_queue), + GFP_KERNEL); + if (!q->shadow_queue) { + dev_err(hdev->dev, + "Failed to allocate shadow queue for H/W queue %d\n", + q->hw_queue_id); + rc = -ENOMEM; + goto free_queue; + } + + /* Make sure read/write pointers are initialized to start of queue */ + atomic_set(&q->ci, 0); + q->pi = 0; + + return 0; + +free_queue: + if (is_cpu_queue) + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, + HL_QUEUE_SIZE_IN_BYTES, + (void *) (uintptr_t) q->kernel_address); + else + hdev->asic_funcs->asic_dma_free_coherent(hdev, + HL_QUEUE_SIZE_IN_BYTES, + (void *) (uintptr_t) q->kernel_address, + q->bus_address); + + return rc; +} + +static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +{ + void *p; + + p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id, + &q->bus_address, &q->int_queue_len); + if (!p) { + dev_err(hdev->dev, + "Failed to get base address for internal queue %d\n", + q->hw_queue_id); + return -EFAULT; + } + + q->kernel_address = (u64) (uintptr_t) p; + q->pi = 0; + atomic_set(&q->ci, 0); + + return 0; +} + +static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +{ + return ext_and_cpu_queue_init(hdev, q, true); +} + +static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +{ + return ext_and_cpu_queue_init(hdev, q, false); +} + +static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +{ + void *p; + + p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, + HL_QUEUE_SIZE_IN_BYTES, + &q->bus_address, + GFP_KERNEL | __GFP_ZERO); + if (!p) + return -ENOMEM; + + q->kernel_address = (u64) (uintptr_t) p; + + /* Make sure read/write pointers are initialized to start of queue */ + atomic_set(&q->ci, 0); + q->pi = 0; + + return 0; +} + +static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx) +{ + struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx]; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_hw_sob *hw_sob; + int sob, queue_idx = hdev->sync_stream_queue_idx++; + + hw_queue->base_sob_id = + prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS; + hw_queue->base_mon_id = + prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS; + hw_queue->next_sob_val = 1; + hw_queue->curr_sob_offset = 0; + + for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) { + hw_sob = &hw_queue->hw_sob[sob]; + hw_sob->hdev = hdev; + hw_sob->sob_id = hw_queue->base_sob_id + sob; + hw_sob->q_idx = q_idx; + kref_init(&hw_sob->kref); + } +} + +static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx) +{ + struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx]; + + /* + * In case we got here due to a stuck CS, the refcnt might be bigger + * than 1 and therefore we reset it. + */ + kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref); + hw_queue->curr_sob_offset = 0; + hw_queue->next_sob_val = 1; +} + +/* + * queue_init - main initialization function for H/W queue object + * + * @hdev: pointer to hl_device device structure + * @q: pointer to hl_hw_queue queue structure + * @hw_queue_id: The id of the H/W queue + * + * Allocate dma-able memory for the queue and initialize fields + * Returns 0 on success + */ +static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q, + u32 hw_queue_id) +{ + int rc; + + q->hw_queue_id = hw_queue_id; + + switch (q->queue_type) { + case QUEUE_TYPE_EXT: + rc = ext_queue_init(hdev, q); + break; + case QUEUE_TYPE_INT: + rc = int_queue_init(hdev, q); + break; + case QUEUE_TYPE_CPU: + rc = cpu_queue_init(hdev, q); + break; + case QUEUE_TYPE_HW: + rc = hw_queue_init(hdev, q); + break; + case QUEUE_TYPE_NA: + q->valid = 0; + return 0; + default: + dev_crit(hdev->dev, "wrong queue type %d during init\n", + q->queue_type); + rc = -EINVAL; + break; + } + + if (q->supports_sync_stream) + sync_stream_queue_init(hdev, q->hw_queue_id); + + if (rc) + return rc; + + q->valid = 1; + + return 0; +} + +/* + * hw_queue_fini - destroy queue + * + * @hdev: pointer to hl_device device structure + * @q: pointer to hl_hw_queue queue structure + * + * Free the queue memory + */ +static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q) +{ + if (!q->valid) + return; + + /* + * If we arrived here, there are no jobs waiting on this queue + * so we can safely remove it. + * This is because this function can only called when: + * 1. Either a context is deleted, which only can occur if all its + * jobs were finished + * 2. A context wasn't able to be created due to failure or timeout, + * which means there are no jobs on the queue yet + * + * The only exception are the queues of the kernel context, but + * if they are being destroyed, it means that the entire module is + * being removed. If the module is removed, it means there is no open + * user context. It also means that if a job was submitted by + * the kernel driver (e.g. context creation), the job itself was + * released by the kernel driver when a timeout occurred on its + * Completion. Thus, we don't need to release it again. + */ + + if (q->queue_type == QUEUE_TYPE_INT) + return; + + kfree(q->shadow_queue); + + if (q->queue_type == QUEUE_TYPE_CPU) + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, + HL_QUEUE_SIZE_IN_BYTES, + (void *) (uintptr_t) q->kernel_address); + else + hdev->asic_funcs->asic_dma_free_coherent(hdev, + HL_QUEUE_SIZE_IN_BYTES, + (void *) (uintptr_t) q->kernel_address, + q->bus_address); +} + +int hl_hw_queues_create(struct hl_device *hdev) +{ + struct asic_fixed_properties *asic = &hdev->asic_prop; + struct hl_hw_queue *q; + int i, rc, q_ready_cnt; + + hdev->kernel_queues = kcalloc(asic->max_queues, + sizeof(*hdev->kernel_queues), GFP_KERNEL); + + if (!hdev->kernel_queues) { + dev_err(hdev->dev, "Not enough memory for H/W queues\n"); + return -ENOMEM; + } + + /* Initialize the H/W queues */ + for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues; + i < asic->max_queues ; i++, q_ready_cnt++, q++) { + + q->queue_type = asic->hw_queues_props[i].type; + q->supports_sync_stream = + asic->hw_queues_props[i].supports_sync_stream; + rc = queue_init(hdev, q, i); + if (rc) { + dev_err(hdev->dev, + "failed to initialize queue %d\n", i); + goto release_queues; + } + } + + return 0; + +release_queues: + for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++) + queue_fini(hdev, q); + + kfree(hdev->kernel_queues); + + return rc; +} + +void hl_hw_queues_destroy(struct hl_device *hdev) +{ + struct hl_hw_queue *q; + u32 max_queues = hdev->asic_prop.max_queues; + int i; + + for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) + queue_fini(hdev, q); + + kfree(hdev->kernel_queues); +} + +void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset) +{ + struct hl_hw_queue *q; + u32 max_queues = hdev->asic_prop.max_queues; + int i; + + for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) { + if ((!q->valid) || + ((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU))) + continue; + q->pi = 0; + atomic_set(&q->ci, 0); + + if (q->supports_sync_stream) + sync_stream_queue_reset(hdev, q->hw_queue_id); + } +} diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c new file mode 100644 index 000000000000..8c6cd77e6af6 --- /dev/null +++ b/drivers/misc/habanalabs/common/hwmon.c @@ -0,0 +1,579 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" + +#include +#include + +#define SENSORS_PKT_TIMEOUT 1000000 /* 1s */ +#define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1) + +int hl_build_hwmon_channel_info(struct hl_device *hdev, + struct armcp_sensor *sensors_arr) +{ + u32 counts[HWMON_NR_SENSOR_TYPES] = {0}; + u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL}; + u32 sensors_by_type_next_index[HWMON_NR_SENSOR_TYPES] = {0}; + struct hwmon_channel_info **channels_info; + u32 num_sensors_for_type, num_active_sensor_types = 0, + arr_size = 0, *curr_arr; + enum hwmon_sensor_types type; + int rc, i, j; + + for (i = 0 ; i < ARMCP_MAX_SENSORS ; i++) { + type = le32_to_cpu(sensors_arr[i].type); + + if ((type == 0) && (sensors_arr[i].flags == 0)) + break; + + if (type >= HWMON_NR_SENSOR_TYPES) { + dev_err(hdev->dev, + "Got wrong sensor type %d from device\n", type); + return -EINVAL; + } + + counts[type]++; + arr_size++; + } + + for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) { + if (counts[i] == 0) + continue; + + num_sensors_for_type = counts[i] + 1; + curr_arr = kcalloc(num_sensors_for_type, sizeof(*curr_arr), + GFP_KERNEL); + if (!curr_arr) { + rc = -ENOMEM; + goto sensors_type_err; + } + + num_active_sensor_types++; + sensors_by_type[i] = curr_arr; + } + + for (i = 0 ; i < arr_size ; i++) { + type = le32_to_cpu(sensors_arr[i].type); + curr_arr = sensors_by_type[type]; + curr_arr[sensors_by_type_next_index[type]++] = + le32_to_cpu(sensors_arr[i].flags); + } + + channels_info = kcalloc(num_active_sensor_types + 1, + sizeof(*channels_info), GFP_KERNEL); + if (!channels_info) { + rc = -ENOMEM; + goto channels_info_array_err; + } + + for (i = 0 ; i < num_active_sensor_types ; i++) { + channels_info[i] = kzalloc(sizeof(*channels_info[i]), + GFP_KERNEL); + if (!channels_info[i]) { + rc = -ENOMEM; + goto channel_info_err; + } + } + + for (i = 0, j = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) { + if (!sensors_by_type[i]) + continue; + + channels_info[j]->type = i; + channels_info[j]->config = sensors_by_type[i]; + j++; + } + + hdev->hl_chip_info->info = + (const struct hwmon_channel_info **)channels_info; + + return 0; + +channel_info_err: + for (i = 0 ; i < num_active_sensor_types ; i++) + if (channels_info[i]) { + kfree(channels_info[i]->config); + kfree(channels_info[i]); + } + kfree(channels_info); +channels_info_array_err: +sensors_type_err: + for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) + kfree(sensors_by_type[i]); + + return rc; +} + +static int hl_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + int rc; + + if (hl_device_disabled_or_in_reset(hdev)) + return -ENODEV; + + switch (type) { + case hwmon_temp: + switch (attr) { + case hwmon_temp_input: + case hwmon_temp_max: + case hwmon_temp_crit: + case hwmon_temp_max_hyst: + case hwmon_temp_crit_hyst: + case hwmon_temp_offset: + case hwmon_temp_highest: + break; + default: + return -EINVAL; + } + + rc = hl_get_temperature(hdev, channel, attr, val); + break; + case hwmon_in: + switch (attr) { + case hwmon_in_input: + case hwmon_in_min: + case hwmon_in_max: + case hwmon_in_highest: + break; + default: + return -EINVAL; + } + + rc = hl_get_voltage(hdev, channel, attr, val); + break; + case hwmon_curr: + switch (attr) { + case hwmon_curr_input: + case hwmon_curr_min: + case hwmon_curr_max: + case hwmon_curr_highest: + break; + default: + return -EINVAL; + } + + rc = hl_get_current(hdev, channel, attr, val); + break; + case hwmon_fan: + switch (attr) { + case hwmon_fan_input: + case hwmon_fan_min: + case hwmon_fan_max: + break; + default: + return -EINVAL; + } + rc = hl_get_fan_speed(hdev, channel, attr, val); + break; + case hwmon_pwm: + switch (attr) { + case hwmon_pwm_input: + case hwmon_pwm_enable: + break; + default: + return -EINVAL; + } + rc = hl_get_pwm_info(hdev, channel, attr, val); + break; + default: + return -EINVAL; + } + return rc; +} + +static int hl_write(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long val) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + if (hl_device_disabled_or_in_reset(hdev)) + return -ENODEV; + + switch (type) { + case hwmon_temp: + switch (attr) { + case hwmon_temp_offset: + case hwmon_temp_reset_history: + break; + default: + return -EINVAL; + } + hl_set_temperature(hdev, channel, attr, val); + break; + case hwmon_pwm: + switch (attr) { + case hwmon_pwm_input: + case hwmon_pwm_enable: + break; + default: + return -EINVAL; + } + hl_set_pwm_info(hdev, channel, attr, val); + break; + case hwmon_in: + switch (attr) { + case hwmon_in_reset_history: + break; + default: + return -EINVAL; + } + hl_set_voltage(hdev, channel, attr, val); + break; + case hwmon_curr: + switch (attr) { + case hwmon_curr_reset_history: + break; + default: + return -EINVAL; + } + hl_set_current(hdev, channel, attr, val); + break; + default: + return -EINVAL; + } + return 0; +} + +static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type, + u32 attr, int channel) +{ + switch (type) { + case hwmon_temp: + switch (attr) { + case hwmon_temp_input: + case hwmon_temp_max: + case hwmon_temp_max_hyst: + case hwmon_temp_crit: + case hwmon_temp_crit_hyst: + case hwmon_temp_highest: + return 0444; + case hwmon_temp_offset: + return 0644; + case hwmon_temp_reset_history: + return 0200; + } + break; + case hwmon_in: + switch (attr) { + case hwmon_in_input: + case hwmon_in_min: + case hwmon_in_max: + case hwmon_in_highest: + return 0444; + case hwmon_in_reset_history: + return 0200; + } + break; + case hwmon_curr: + switch (attr) { + case hwmon_curr_input: + case hwmon_curr_min: + case hwmon_curr_max: + case hwmon_curr_highest: + return 0444; + case hwmon_curr_reset_history: + return 0200; + } + break; + case hwmon_fan: + switch (attr) { + case hwmon_fan_input: + case hwmon_fan_min: + case hwmon_fan_max: + return 0444; + } + break; + case hwmon_pwm: + switch (attr) { + case hwmon_pwm_input: + case hwmon_pwm_enable: + return 0644; + } + break; + default: + break; + } + return 0; +} + +static const struct hwmon_ops hl_hwmon_ops = { + .is_visible = hl_is_visible, + .read = hl_read, + .write = hl_write +}; + +int hl_get_temperature(struct hl_device *hdev, + int sensor_index, u32 attr, long *value) +{ + struct armcp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + SENSORS_PKT_TIMEOUT, value); + + if (rc) { + dev_err(hdev->dev, + "Failed to get temperature from sensor %d, error %d\n", + sensor_index, rc); + *value = 0; + } + + return rc; +} + +int hl_set_temperature(struct hl_device *hdev, + int sensor_index, u32 attr, long value) +{ + struct armcp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + pkt.value = __cpu_to_le64(value); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + SENSORS_PKT_TIMEOUT, NULL); + + if (rc) + dev_err(hdev->dev, + "Failed to set temperature of sensor %d, error %d\n", + sensor_index, rc); + + return rc; +} + +int hl_get_voltage(struct hl_device *hdev, + int sensor_index, u32 attr, long *value) +{ + struct armcp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + SENSORS_PKT_TIMEOUT, value); + + if (rc) { + dev_err(hdev->dev, + "Failed to get voltage from sensor %d, error %d\n", + sensor_index, rc); + *value = 0; + } + + return rc; +} + +int hl_get_current(struct hl_device *hdev, + int sensor_index, u32 attr, long *value) +{ + struct armcp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_GET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + SENSORS_PKT_TIMEOUT, value); + + if (rc) { + dev_err(hdev->dev, + "Failed to get current from sensor %d, error %d\n", + sensor_index, rc); + *value = 0; + } + + return rc; +} + +int hl_get_fan_speed(struct hl_device *hdev, + int sensor_index, u32 attr, long *value) +{ + struct armcp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + SENSORS_PKT_TIMEOUT, value); + + if (rc) { + dev_err(hdev->dev, + "Failed to get fan speed from sensor %d, error %d\n", + sensor_index, rc); + *value = 0; + } + + return rc; +} + +int hl_get_pwm_info(struct hl_device *hdev, + int sensor_index, u32 attr, long *value) +{ + struct armcp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_GET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + SENSORS_PKT_TIMEOUT, value); + + if (rc) { + dev_err(hdev->dev, + "Failed to get pwm info from sensor %d, error %d\n", + sensor_index, rc); + *value = 0; + } + + return rc; +} + +void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, + long value) +{ + struct armcp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_SET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + pkt.value = cpu_to_le64(value); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + SENSORS_PKT_TIMEOUT, NULL); + + if (rc) + dev_err(hdev->dev, + "Failed to set pwm info to sensor %d, error %d\n", + sensor_index, rc); +} + +int hl_set_voltage(struct hl_device *hdev, + int sensor_index, u32 attr, long value) +{ + struct armcp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_SET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + pkt.value = __cpu_to_le64(value); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + SENSORS_PKT_TIMEOUT, NULL); + + if (rc) + dev_err(hdev->dev, + "Failed to set voltage of sensor %d, error %d\n", + sensor_index, rc); + + return rc; +} + +int hl_set_current(struct hl_device *hdev, + int sensor_index, u32 attr, long value) +{ + struct armcp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_SET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + pkt.value = __cpu_to_le64(value); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + SENSORS_PKT_TIMEOUT, NULL); + + if (rc) + dev_err(hdev->dev, + "Failed to set current of sensor %d, error %d\n", + sensor_index, rc); + + return rc; +} + +int hl_hwmon_init(struct hl_device *hdev) +{ + struct device *dev = hdev->pdev ? &hdev->pdev->dev : hdev->dev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + int rc; + + if ((hdev->hwmon_initialized) || !(hdev->fw_loading)) + return 0; + + if (hdev->hl_chip_info->info) { + hdev->hl_chip_info->ops = &hl_hwmon_ops; + + hdev->hwmon_dev = hwmon_device_register_with_info(dev, + prop->armcp_info.card_name, hdev, + hdev->hl_chip_info, NULL); + if (IS_ERR(hdev->hwmon_dev)) { + rc = PTR_ERR(hdev->hwmon_dev); + dev_err(hdev->dev, + "Unable to register hwmon device: %d\n", rc); + return rc; + } + + dev_info(hdev->dev, "%s: add sensors information\n", + dev_name(hdev->hwmon_dev)); + + hdev->hwmon_initialized = true; + } else { + dev_info(hdev->dev, "no available sensors\n"); + } + + return 0; +} + +void hl_hwmon_fini(struct hl_device *hdev) +{ + if (!hdev->hwmon_initialized) + return; + + hwmon_device_unregister(hdev->hwmon_dev); +} diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c new file mode 100644 index 000000000000..c8db717023f5 --- /dev/null +++ b/drivers/misc/habanalabs/common/irq.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" + +#include + +/** + * struct hl_eqe_work - This structure is used to schedule work of EQ + * entry and armcp_reset event + * + * @eq_work: workqueue object to run when EQ entry is received + * @hdev: pointer to device structure + * @eq_entry: copy of the EQ entry + */ +struct hl_eqe_work { + struct work_struct eq_work; + struct hl_device *hdev; + struct hl_eq_entry eq_entry; +}; + +/** + * hl_cq_inc_ptr - increment ci or pi of cq + * + * @ptr: the current ci or pi value of the completion queue + * + * Increment ptr by 1. If it reaches the number of completion queue + * entries, set it to 0 + */ +inline u32 hl_cq_inc_ptr(u32 ptr) +{ + ptr++; + if (unlikely(ptr == HL_CQ_LENGTH)) + ptr = 0; + return ptr; +} + +/** + * hl_eq_inc_ptr - increment ci of eq + * + * @ptr: the current ci value of the event queue + * + * Increment ptr by 1. If it reaches the number of event queue + * entries, set it to 0 + */ +inline u32 hl_eq_inc_ptr(u32 ptr) +{ + ptr++; + if (unlikely(ptr == HL_EQ_LENGTH)) + ptr = 0; + return ptr; +} + +static void irq_handle_eqe(struct work_struct *work) +{ + struct hl_eqe_work *eqe_work = container_of(work, struct hl_eqe_work, + eq_work); + struct hl_device *hdev = eqe_work->hdev; + + hdev->asic_funcs->handle_eqe(hdev, &eqe_work->eq_entry); + + kfree(eqe_work); +} + +/** + * hl_irq_handler_cq - irq handler for completion queue + * + * @irq: irq number + * @arg: pointer to completion queue structure + * + */ +irqreturn_t hl_irq_handler_cq(int irq, void *arg) +{ + struct hl_cq *cq = arg; + struct hl_device *hdev = cq->hdev; + struct hl_hw_queue *queue; + struct hl_cs_job *job; + bool shadow_index_valid; + u16 shadow_index; + struct hl_cq_entry *cq_entry, *cq_base; + + if (hdev->disabled) { + dev_dbg(hdev->dev, + "Device disabled but received IRQ %d for CQ %d\n", + irq, cq->hw_queue_id); + return IRQ_HANDLED; + } + + cq_base = (struct hl_cq_entry *) (uintptr_t) cq->kernel_address; + + while (1) { + bool entry_ready = ((le32_to_cpu(cq_base[cq->ci].data) & + CQ_ENTRY_READY_MASK) + >> CQ_ENTRY_READY_SHIFT); + + if (!entry_ready) + break; + + cq_entry = (struct hl_cq_entry *) &cq_base[cq->ci]; + + /* Make sure we read CQ entry contents after we've + * checked the ownership bit. + */ + dma_rmb(); + + shadow_index_valid = ((le32_to_cpu(cq_entry->data) & + CQ_ENTRY_SHADOW_INDEX_VALID_MASK) + >> CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT); + + shadow_index = (u16) ((le32_to_cpu(cq_entry->data) & + CQ_ENTRY_SHADOW_INDEX_MASK) + >> CQ_ENTRY_SHADOW_INDEX_SHIFT); + + queue = &hdev->kernel_queues[cq->hw_queue_id]; + + if ((shadow_index_valid) && (!hdev->disabled)) { + job = queue->shadow_queue[hl_pi_2_offset(shadow_index)]; + queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work); + } + + atomic_inc(&queue->ci); + + /* Clear CQ entry ready bit */ + cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) & + ~CQ_ENTRY_READY_MASK); + + cq->ci = hl_cq_inc_ptr(cq->ci); + + /* Increment free slots */ + atomic_inc(&cq->free_slots_cnt); + } + + return IRQ_HANDLED; +} + +/** + * hl_irq_handler_eq - irq handler for event queue + * + * @irq: irq number + * @arg: pointer to event queue structure + * + */ +irqreturn_t hl_irq_handler_eq(int irq, void *arg) +{ + struct hl_eq *eq = arg; + struct hl_device *hdev = eq->hdev; + struct hl_eq_entry *eq_entry; + struct hl_eq_entry *eq_base; + struct hl_eqe_work *handle_eqe_work; + + eq_base = (struct hl_eq_entry *) (uintptr_t) eq->kernel_address; + + while (1) { + bool entry_ready = + ((le32_to_cpu(eq_base[eq->ci].hdr.ctl) & + EQ_CTL_READY_MASK) >> EQ_CTL_READY_SHIFT); + + if (!entry_ready) + break; + + eq_entry = &eq_base[eq->ci]; + + /* + * Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + dma_rmb(); + + if (hdev->disabled) { + dev_warn(hdev->dev, + "Device disabled but received IRQ %d for EQ\n", + irq); + goto skip_irq; + } + + handle_eqe_work = kmalloc(sizeof(*handle_eqe_work), GFP_ATOMIC); + if (handle_eqe_work) { + INIT_WORK(&handle_eqe_work->eq_work, irq_handle_eqe); + handle_eqe_work->hdev = hdev; + + memcpy(&handle_eqe_work->eq_entry, eq_entry, + sizeof(*eq_entry)); + + queue_work(hdev->eq_wq, &handle_eqe_work->eq_work); + } +skip_irq: + /* Clear EQ entry ready bit */ + eq_entry->hdr.ctl = + cpu_to_le32(le32_to_cpu(eq_entry->hdr.ctl) & + ~EQ_CTL_READY_MASK); + + eq->ci = hl_eq_inc_ptr(eq->ci); + + hdev->asic_funcs->update_eq_ci(hdev, eq->ci); + } + + return IRQ_HANDLED; +} + +/** + * hl_cq_init - main initialization function for an cq object + * + * @hdev: pointer to device structure + * @q: pointer to cq structure + * @hw_queue_id: The H/W queue ID this completion queue belongs to + * + * Allocate dma-able memory for the completion queue and initialize fields + * Returns 0 on success + */ +int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id) +{ + void *p; + + p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES, + &q->bus_address, GFP_KERNEL | __GFP_ZERO); + if (!p) + return -ENOMEM; + + q->hdev = hdev; + q->kernel_address = (u64) (uintptr_t) p; + q->hw_queue_id = hw_queue_id; + q->ci = 0; + q->pi = 0; + + atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH); + + return 0; +} + +/** + * hl_cq_fini - destroy completion queue + * + * @hdev: pointer to device structure + * @q: pointer to cq structure + * + * Free the completion queue memory + */ +void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q) +{ + hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES, + (void *) (uintptr_t) q->kernel_address, q->bus_address); +} + +void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q) +{ + q->ci = 0; + q->pi = 0; + + atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH); + + /* + * It's not enough to just reset the PI/CI because the H/W may have + * written valid completion entries before it was halted and therefore + * we need to clean the actual queues so we won't process old entries + * when the device is operational again + */ + + memset((void *) (uintptr_t) q->kernel_address, 0, HL_CQ_SIZE_IN_BYTES); +} + +/** + * hl_eq_init - main initialization function for an event queue object + * + * @hdev: pointer to device structure + * @q: pointer to eq structure + * + * Allocate dma-able memory for the event queue and initialize fields + * Returns 0 on success + */ +int hl_eq_init(struct hl_device *hdev, struct hl_eq *q) +{ + void *p; + + p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, + HL_EQ_SIZE_IN_BYTES, + &q->bus_address); + if (!p) + return -ENOMEM; + + q->hdev = hdev; + q->kernel_address = (u64) (uintptr_t) p; + q->ci = 0; + + return 0; +} + +/** + * hl_eq_fini - destroy event queue + * + * @hdev: pointer to device structure + * @q: pointer to eq structure + * + * Free the event queue memory + */ +void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q) +{ + flush_workqueue(hdev->eq_wq); + + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, + HL_EQ_SIZE_IN_BYTES, + (void *) (uintptr_t) q->kernel_address); +} + +void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q) +{ + q->ci = 0; + + /* + * It's not enough to just reset the PI/CI because the H/W may have + * written valid completion entries before it was halted and therefore + * we need to clean the actual queues so we won't process old entries + * when the device is operational again + */ + + memset((void *) (uintptr_t) q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES); +} diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c new file mode 100644 index 000000000000..e4e1693e5c6c --- /dev/null +++ b/drivers/misc/habanalabs/common/memory.c @@ -0,0 +1,1843 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include +#include "habanalabs.h" +#include "include/hw_ip/mmu/mmu_general.h" + +#include +#include +#include + +#define HL_MMU_DEBUG 0 + +/* + * The va ranges in context object contain a list with the available chunks of + * device virtual memory. + * There is one range for host allocations and one for DRAM allocations. + * + * On initialization each range contains one chunk of all of its available + * virtual range which is a half of the total device virtual range. + * + * On each mapping of physical pages, a suitable virtual range chunk (with a + * minimum size) is selected from the list. If the chunk size equals the + * requested size, the chunk is returned. Otherwise, the chunk is split into + * two chunks - one to return as result and a remainder to stay in the list. + * + * On each Unmapping of a virtual address, the relevant virtual chunk is + * returned to the list. The chunk is added to the list and if its edges match + * the edges of the adjacent chunks (means a contiguous chunk can be created), + * the chunks are merged. + * + * On finish, the list is checked to have only one chunk of all the relevant + * virtual range (which is a half of the device total virtual range). + * If not (means not all mappings were unmapped), a warning is printed. + */ + +/* + * alloc_device_memory - allocate device memory + * + * @ctx : current context + * @args : host parameters containing the requested size + * @ret_handle : result handle + * + * This function does the following: + * - Allocate the requested size rounded up to 2MB pages + * - Return unique handle + */ +static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, + u32 *ret_handle) +{ + struct hl_device *hdev = ctx->hdev; + struct hl_vm *vm = &hdev->vm; + struct hl_vm_phys_pg_pack *phys_pg_pack; + u64 paddr = 0, total_size, num_pgs, i; + u32 num_curr_pgs, page_size, page_shift; + int handle, rc; + bool contiguous; + + num_curr_pgs = 0; + page_size = hdev->asic_prop.dram_page_size; + page_shift = __ffs(page_size); + num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift; + total_size = num_pgs << page_shift; + + contiguous = args->flags & HL_MEM_CONTIGUOUS; + + if (contiguous) { + paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size); + if (!paddr) { + dev_err(hdev->dev, + "failed to allocate %llu huge contiguous pages\n", + num_pgs); + return -ENOMEM; + } + } + + phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); + if (!phys_pg_pack) { + rc = -ENOMEM; + goto pages_pack_err; + } + + phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK; + phys_pg_pack->asid = ctx->asid; + phys_pg_pack->npages = num_pgs; + phys_pg_pack->page_size = page_size; + phys_pg_pack->total_size = total_size; + phys_pg_pack->flags = args->flags; + phys_pg_pack->contiguous = contiguous; + + phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL); + if (!phys_pg_pack->pages) { + rc = -ENOMEM; + goto pages_arr_err; + } + + if (phys_pg_pack->contiguous) { + for (i = 0 ; i < num_pgs ; i++) + phys_pg_pack->pages[i] = paddr + i * page_size; + } else { + for (i = 0 ; i < num_pgs ; i++) { + phys_pg_pack->pages[i] = (u64) gen_pool_alloc( + vm->dram_pg_pool, + page_size); + if (!phys_pg_pack->pages[i]) { + dev_err(hdev->dev, + "Failed to allocate device memory (out of memory)\n"); + rc = -ENOMEM; + goto page_err; + } + + num_curr_pgs++; + } + } + + spin_lock(&vm->idr_lock); + handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0, + GFP_ATOMIC); + spin_unlock(&vm->idr_lock); + + if (handle < 0) { + dev_err(hdev->dev, "Failed to get handle for page\n"); + rc = -EFAULT; + goto idr_err; + } + + for (i = 0 ; i < num_pgs ; i++) + kref_get(&vm->dram_pg_pool_refcount); + + phys_pg_pack->handle = handle; + + atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem); + atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem); + + *ret_handle = handle; + + return 0; + +idr_err: +page_err: + if (!phys_pg_pack->contiguous) + for (i = 0 ; i < num_curr_pgs ; i++) + gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i], + page_size); + + kvfree(phys_pg_pack->pages); +pages_arr_err: + kfree(phys_pg_pack); +pages_pack_err: + if (contiguous) + gen_pool_free(vm->dram_pg_pool, paddr, total_size); + + return rc; +} + +/* + * dma_map_host_va - DMA mapping of the given host virtual address. + * @hdev: habanalabs device structure + * @addr: the host virtual address of the memory area + * @size: the size of the memory area + * @p_userptr: pointer to result userptr structure + * + * This function does the following: + * - Allocate userptr structure + * - Pin the given host memory using the userptr structure + * - Perform DMA mapping to have the DMA addresses of the pages + */ +static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, + struct hl_userptr **p_userptr) +{ + struct hl_userptr *userptr; + int rc; + + userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); + if (!userptr) { + rc = -ENOMEM; + goto userptr_err; + } + + rc = hl_pin_host_memory(hdev, addr, size, userptr); + if (rc) { + dev_err(hdev->dev, "Failed to pin host memory\n"); + goto pin_err; + } + + rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl, + userptr->sgt->nents, DMA_BIDIRECTIONAL); + if (rc) { + dev_err(hdev->dev, "failed to map sgt with DMA region\n"); + goto dma_map_err; + } + + userptr->dma_mapped = true; + userptr->dir = DMA_BIDIRECTIONAL; + userptr->vm_type = VM_TYPE_USERPTR; + + *p_userptr = userptr; + + return 0; + +dma_map_err: + hl_unpin_host_memory(hdev, userptr); +pin_err: + kfree(userptr); +userptr_err: + + return rc; +} + +/* + * dma_unmap_host_va - DMA unmapping of the given host virtual address. + * @hdev: habanalabs device structure + * @userptr: userptr to free + * + * This function does the following: + * - Unpins the physical pages + * - Frees the userptr structure + */ +static void dma_unmap_host_va(struct hl_device *hdev, + struct hl_userptr *userptr) +{ + hl_unpin_host_memory(hdev, userptr); + kfree(userptr); +} + +/* + * dram_pg_pool_do_release - free DRAM pages pool + * + * @ref : pointer to reference object + * + * This function does the following: + * - Frees the idr structure of physical pages handles + * - Frees the generic pool of DRAM physical pages + */ +static void dram_pg_pool_do_release(struct kref *ref) +{ + struct hl_vm *vm = container_of(ref, struct hl_vm, + dram_pg_pool_refcount); + + /* + * free the idr here as only here we know for sure that there are no + * allocated physical pages and hence there are no handles in use + */ + idr_destroy(&vm->phys_pg_pack_handles); + gen_pool_destroy(vm->dram_pg_pool); +} + +/* + * free_phys_pg_pack - free physical page pack + * @hdev: habanalabs device structure + * @phys_pg_pack: physical page pack to free + * + * This function does the following: + * - For DRAM memory only, iterate over the pack and free each physical block + * structure by returning it to the general pool + * - Free the hl_vm_phys_pg_pack structure + */ +static void free_phys_pg_pack(struct hl_device *hdev, + struct hl_vm_phys_pg_pack *phys_pg_pack) +{ + struct hl_vm *vm = &hdev->vm; + u64 i; + + if (!phys_pg_pack->created_from_userptr) { + if (phys_pg_pack->contiguous) { + gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0], + phys_pg_pack->total_size); + + for (i = 0; i < phys_pg_pack->npages ; i++) + kref_put(&vm->dram_pg_pool_refcount, + dram_pg_pool_do_release); + } else { + for (i = 0 ; i < phys_pg_pack->npages ; i++) { + gen_pool_free(vm->dram_pg_pool, + phys_pg_pack->pages[i], + phys_pg_pack->page_size); + kref_put(&vm->dram_pg_pool_refcount, + dram_pg_pool_do_release); + } + } + } + + kvfree(phys_pg_pack->pages); + kfree(phys_pg_pack); +} + +/* + * free_device_memory - free device memory + * + * @ctx : current context + * @handle : handle of the memory chunk to free + * + * This function does the following: + * - Free the device memory related to the given handle + */ +static int free_device_memory(struct hl_ctx *ctx, u32 handle) +{ + struct hl_device *hdev = ctx->hdev; + struct hl_vm *vm = &hdev->vm; + struct hl_vm_phys_pg_pack *phys_pg_pack; + + spin_lock(&vm->idr_lock); + phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); + if (phys_pg_pack) { + if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) { + dev_err(hdev->dev, "handle %u is mapped, cannot free\n", + handle); + spin_unlock(&vm->idr_lock); + return -EINVAL; + } + + /* + * must remove from idr before the freeing of the physical + * pages as the refcount of the pool is also the trigger of the + * idr destroy + */ + idr_remove(&vm->phys_pg_pack_handles, handle); + spin_unlock(&vm->idr_lock); + + atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem); + atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem); + + free_phys_pg_pack(hdev, phys_pg_pack); + } else { + spin_unlock(&vm->idr_lock); + dev_err(hdev->dev, + "free device memory failed, no match for handle %u\n", + handle); + return -EINVAL; + } + + return 0; +} + +/* + * clear_va_list_locked - free virtual addresses list + * + * @hdev : habanalabs device structure + * @va_list : list of virtual addresses to free + * + * This function does the following: + * - Iterate over the list and free each virtual addresses block + * + * This function should be called only when va_list lock is taken + */ +static void clear_va_list_locked(struct hl_device *hdev, + struct list_head *va_list) +{ + struct hl_vm_va_block *va_block, *tmp; + + list_for_each_entry_safe(va_block, tmp, va_list, node) { + list_del(&va_block->node); + kfree(va_block); + } +} + +/* + * print_va_list_locked - print virtual addresses list + * + * @hdev : habanalabs device structure + * @va_list : list of virtual addresses to print + * + * This function does the following: + * - Iterate over the list and print each virtual addresses block + * + * This function should be called only when va_list lock is taken + */ +static void print_va_list_locked(struct hl_device *hdev, + struct list_head *va_list) +{ +#if HL_MMU_DEBUG + struct hl_vm_va_block *va_block; + + dev_dbg(hdev->dev, "print va list:\n"); + + list_for_each_entry(va_block, va_list, node) + dev_dbg(hdev->dev, + "va block, start: 0x%llx, end: 0x%llx, size: %llu\n", + va_block->start, va_block->end, va_block->size); +#endif +} + +/* + * merge_va_blocks_locked - merge a virtual block if possible + * + * @hdev : pointer to the habanalabs device structure + * @va_list : pointer to the virtual addresses block list + * @va_block : virtual block to merge with adjacent blocks + * + * This function does the following: + * - Merge the given blocks with the adjacent blocks if their virtual ranges + * create a contiguous virtual range + * + * This Function should be called only when va_list lock is taken + */ +static void merge_va_blocks_locked(struct hl_device *hdev, + struct list_head *va_list, struct hl_vm_va_block *va_block) +{ + struct hl_vm_va_block *prev, *next; + + prev = list_prev_entry(va_block, node); + if (&prev->node != va_list && prev->end + 1 == va_block->start) { + prev->end = va_block->end; + prev->size = prev->end - prev->start; + list_del(&va_block->node); + kfree(va_block); + va_block = prev; + } + + next = list_next_entry(va_block, node); + if (&next->node != va_list && va_block->end + 1 == next->start) { + next->start = va_block->start; + next->size = next->end - next->start; + list_del(&va_block->node); + kfree(va_block); + } +} + +/* + * add_va_block_locked - add a virtual block to the virtual addresses list + * + * @hdev : pointer to the habanalabs device structure + * @va_list : pointer to the virtual addresses block list + * @start : start virtual address + * @end : end virtual address + * + * This function does the following: + * - Add the given block to the virtual blocks list and merge with other + * blocks if a contiguous virtual block can be created + * + * This Function should be called only when va_list lock is taken + */ +static int add_va_block_locked(struct hl_device *hdev, + struct list_head *va_list, u64 start, u64 end) +{ + struct hl_vm_va_block *va_block, *res = NULL; + u64 size = end - start; + + print_va_list_locked(hdev, va_list); + + list_for_each_entry(va_block, va_list, node) { + /* TODO: remove upon matureness */ + if (hl_mem_area_crosses_range(start, size, va_block->start, + va_block->end)) { + dev_err(hdev->dev, + "block crossing ranges at start 0x%llx, end 0x%llx\n", + va_block->start, va_block->end); + return -EINVAL; + } + + if (va_block->end < start) + res = va_block; + } + + va_block = kmalloc(sizeof(*va_block), GFP_KERNEL); + if (!va_block) + return -ENOMEM; + + va_block->start = start; + va_block->end = end; + va_block->size = size; + + if (!res) + list_add(&va_block->node, va_list); + else + list_add(&va_block->node, &res->node); + + merge_va_blocks_locked(hdev, va_list, va_block); + + print_va_list_locked(hdev, va_list); + + return 0; +} + +/* + * add_va_block - wrapper for add_va_block_locked + * + * @hdev : pointer to the habanalabs device structure + * @va_list : pointer to the virtual addresses block list + * @start : start virtual address + * @end : end virtual address + * + * This function does the following: + * - Takes the list lock and calls add_va_block_locked + */ +static inline int add_va_block(struct hl_device *hdev, + struct hl_va_range *va_range, u64 start, u64 end) +{ + int rc; + + mutex_lock(&va_range->lock); + rc = add_va_block_locked(hdev, &va_range->list, start, end); + mutex_unlock(&va_range->lock); + + return rc; +} + +/* + * get_va_block - get a virtual block with the requested size + * + * @hdev : pointer to the habanalabs device structure + * @va_range : pointer to the virtual addresses range + * @size : requested block size + * @hint_addr : hint for request address by the user + * @is_userptr : is host or DRAM memory + * + * This function does the following: + * - Iterate on the virtual block list to find a suitable virtual block for the + * requested size + * - Reserve the requested block and update the list + * - Return the start address of the virtual block + */ +static u64 get_va_block(struct hl_device *hdev, + struct hl_va_range *va_range, u64 size, u64 hint_addr, + bool is_userptr) +{ + struct hl_vm_va_block *va_block, *new_va_block = NULL; + u64 valid_start, valid_size, prev_start, prev_end, page_mask, + res_valid_start = 0, res_valid_size = 0; + u32 page_size; + bool add_prev = false; + + if (is_userptr) + /* + * We cannot know if the user allocated memory with huge pages + * or not, hence we continue with the biggest possible + * granularity. + */ + page_size = hdev->asic_prop.pmmu_huge.page_size; + else + page_size = hdev->asic_prop.dmmu.page_size; + + page_mask = ~((u64)page_size - 1); + + mutex_lock(&va_range->lock); + + print_va_list_locked(hdev, &va_range->list); + + list_for_each_entry(va_block, &va_range->list, node) { + /* calc the first possible aligned addr */ + valid_start = va_block->start; + + if (valid_start & (page_size - 1)) { + valid_start &= page_mask; + valid_start += page_size; + if (valid_start > va_block->end) + continue; + } + + valid_size = va_block->end - valid_start; + + if (valid_size >= size && + (!new_va_block || valid_size < res_valid_size)) { + new_va_block = va_block; + res_valid_start = valid_start; + res_valid_size = valid_size; + } + + if (hint_addr && hint_addr >= valid_start && + ((hint_addr + size) <= va_block->end)) { + new_va_block = va_block; + res_valid_start = hint_addr; + res_valid_size = valid_size; + break; + } + } + + if (!new_va_block) { + dev_err(hdev->dev, "no available va block for size %llu\n", + size); + goto out; + } + + if (res_valid_start > new_va_block->start) { + prev_start = new_va_block->start; + prev_end = res_valid_start - 1; + + new_va_block->start = res_valid_start; + new_va_block->size = res_valid_size; + + add_prev = true; + } + + if (new_va_block->size > size) { + new_va_block->start += size; + new_va_block->size = new_va_block->end - new_va_block->start; + } else { + list_del(&new_va_block->node); + kfree(new_va_block); + } + + if (add_prev) + add_va_block_locked(hdev, &va_range->list, prev_start, + prev_end); + + print_va_list_locked(hdev, &va_range->list); +out: + mutex_unlock(&va_range->lock); + + return res_valid_start; +} + +/* + * get_sg_info - get number of pages and the DMA address from SG list + * + * @sg : the SG list + * @dma_addr : pointer to DMA address to return + * + * Calculate the number of consecutive pages described by the SG list. Take the + * offset of the address in the first page, add to it the length and round it up + * to the number of needed pages. + */ +static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) +{ + *dma_addr = sg_dma_address(sg); + + return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) + + (PAGE_SIZE - 1)) >> PAGE_SHIFT; +} + +/* + * init_phys_pg_pack_from_userptr - initialize physical page pack from host + * memory + * @ctx: current context + * @userptr: userptr to initialize from + * @pphys_pg_pack: result pointer + * + * This function does the following: + * - Pin the physical pages related to the given virtual block + * - Create a physical page pack from the physical pages related to the given + * virtual block + */ +static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, + struct hl_userptr *userptr, + struct hl_vm_phys_pg_pack **pphys_pg_pack) +{ + struct hl_vm_phys_pg_pack *phys_pg_pack; + struct scatterlist *sg; + dma_addr_t dma_addr; + u64 page_mask, total_npages; + u32 npages, page_size = PAGE_SIZE, + huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size; + bool first = true, is_huge_page_opt = true; + int rc, i, j; + u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size); + + phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); + if (!phys_pg_pack) + return -ENOMEM; + + phys_pg_pack->vm_type = userptr->vm_type; + phys_pg_pack->created_from_userptr = true; + phys_pg_pack->asid = ctx->asid; + atomic_set(&phys_pg_pack->mapping_cnt, 1); + + /* Only if all dma_addrs are aligned to 2MB and their + * sizes is at least 2MB, we can use huge page mapping. + * We limit the 2MB optimization to this condition, + * since later on we acquire the related VA range as one + * consecutive block. + */ + total_npages = 0; + for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { + npages = get_sg_info(sg, &dma_addr); + + total_npages += npages; + + if ((npages % pgs_in_huge_page) || + (dma_addr & (huge_page_size - 1))) + is_huge_page_opt = false; + } + + if (is_huge_page_opt) { + page_size = huge_page_size; + do_div(total_npages, pgs_in_huge_page); + } + + page_mask = ~(((u64) page_size) - 1); + + phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64), + GFP_KERNEL); + if (!phys_pg_pack->pages) { + rc = -ENOMEM; + goto page_pack_arr_mem_err; + } + + phys_pg_pack->npages = total_npages; + phys_pg_pack->page_size = page_size; + phys_pg_pack->total_size = total_npages * page_size; + + j = 0; + for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { + npages = get_sg_info(sg, &dma_addr); + + /* align down to physical page size and save the offset */ + if (first) { + first = false; + phys_pg_pack->offset = dma_addr & (page_size - 1); + dma_addr &= page_mask; + } + + while (npages) { + phys_pg_pack->pages[j++] = dma_addr; + dma_addr += page_size; + + if (is_huge_page_opt) + npages -= pgs_in_huge_page; + else + npages--; + } + } + + *pphys_pg_pack = phys_pg_pack; + + return 0; + +page_pack_arr_mem_err: + kfree(phys_pg_pack); + + return rc; +} + +/* + * map_phys_pg_pack - maps the physical page pack. + * @ctx: current context + * @vaddr: start address of the virtual area to map from + * @phys_pg_pack: the pack of physical pages to map to + * + * This function does the following: + * - Maps each chunk of virtual memory to matching physical chunk + * - Stores number of successful mappings in the given argument + * - Returns 0 on success, error code otherwise + */ +static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, + struct hl_vm_phys_pg_pack *phys_pg_pack) +{ + struct hl_device *hdev = ctx->hdev; + u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i; + u32 page_size = phys_pg_pack->page_size; + int rc = 0; + + for (i = 0 ; i < phys_pg_pack->npages ; i++) { + paddr = phys_pg_pack->pages[i]; + + rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size, + (i + 1) == phys_pg_pack->npages); + if (rc) { + dev_err(hdev->dev, + "map failed for handle %u, npages: %llu, mapped: %llu", + phys_pg_pack->handle, phys_pg_pack->npages, + mapped_pg_cnt); + goto err; + } + + mapped_pg_cnt++; + next_vaddr += page_size; + } + + return 0; + +err: + next_vaddr = vaddr; + for (i = 0 ; i < mapped_pg_cnt ; i++) { + if (hl_mmu_unmap(ctx, next_vaddr, page_size, + (i + 1) == mapped_pg_cnt)) + dev_warn_ratelimited(hdev->dev, + "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n", + phys_pg_pack->handle, next_vaddr, + phys_pg_pack->pages[i], page_size); + + next_vaddr += page_size; + } + + return rc; +} + +/* + * unmap_phys_pg_pack - unmaps the physical page pack + * @ctx: current context + * @vaddr: start address of the virtual area to unmap + * @phys_pg_pack: the pack of physical pages to unmap + */ +static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, + struct hl_vm_phys_pg_pack *phys_pg_pack) +{ + struct hl_device *hdev = ctx->hdev; + u64 next_vaddr, i; + u32 page_size; + + page_size = phys_pg_pack->page_size; + next_vaddr = vaddr; + + for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { + if (hl_mmu_unmap(ctx, next_vaddr, page_size, + (i + 1) == phys_pg_pack->npages)) + dev_warn_ratelimited(hdev->dev, + "unmap failed for vaddr: 0x%llx\n", next_vaddr); + + /* + * unmapping on Palladium can be really long, so avoid a CPU + * soft lockup bug by sleeping a little between unmapping pages + */ + if (hdev->pldm) + usleep_range(500, 1000); + } +} + +static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args, + u64 *paddr) +{ + struct hl_device *hdev = ctx->hdev; + struct hl_vm *vm = &hdev->vm; + struct hl_vm_phys_pg_pack *phys_pg_pack; + u32 handle; + + handle = lower_32_bits(args->map_device.handle); + spin_lock(&vm->idr_lock); + phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); + if (!phys_pg_pack) { + spin_unlock(&vm->idr_lock); + dev_err(hdev->dev, "no match for handle %u\n", handle); + return -EINVAL; + } + + *paddr = phys_pg_pack->pages[0]; + + spin_unlock(&vm->idr_lock); + + return 0; +} + +/* + * map_device_va - map the given memory + * + * @ctx : current context + * @args : host parameters with handle/host virtual address + * @device_addr : pointer to result device virtual address + * + * This function does the following: + * - If given a physical device memory handle, map to a device virtual block + * and return the start address of this block + * - If given a host virtual address and size, find the related physical pages, + * map a device virtual block to this pages and return the start address of + * this block + */ +static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, + u64 *device_addr) +{ + struct hl_device *hdev = ctx->hdev; + struct hl_vm *vm = &hdev->vm; + struct hl_vm_phys_pg_pack *phys_pg_pack; + struct hl_userptr *userptr = NULL; + struct hl_vm_hash_node *hnode; + struct hl_va_range *va_range; + enum vm_type_t *vm_type; + u64 ret_vaddr, hint_addr; + u32 handle = 0; + int rc; + bool is_userptr = args->flags & HL_MEM_USERPTR; + + /* Assume failure */ + *device_addr = 0; + + if (is_userptr) { + u64 addr = args->map_host.host_virt_addr, + size = args->map_host.mem_size; + + rc = dma_map_host_va(hdev, addr, size, &userptr); + if (rc) { + dev_err(hdev->dev, "failed to get userptr from va\n"); + return rc; + } + + rc = init_phys_pg_pack_from_userptr(ctx, userptr, + &phys_pg_pack); + if (rc) { + dev_err(hdev->dev, + "unable to init page pack for vaddr 0x%llx\n", + addr); + goto init_page_pack_err; + } + + vm_type = (enum vm_type_t *) userptr; + hint_addr = args->map_host.hint_addr; + handle = phys_pg_pack->handle; + } else { + handle = lower_32_bits(args->map_device.handle); + + spin_lock(&vm->idr_lock); + phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); + if (!phys_pg_pack) { + spin_unlock(&vm->idr_lock); + dev_err(hdev->dev, + "no match for handle %u\n", handle); + return -EINVAL; + } + + /* increment now to avoid freeing device memory while mapping */ + atomic_inc(&phys_pg_pack->mapping_cnt); + + spin_unlock(&vm->idr_lock); + + vm_type = (enum vm_type_t *) phys_pg_pack; + + hint_addr = args->map_device.hint_addr; + } + + /* + * relevant for mapping device physical memory only, as host memory is + * implicitly shared + */ + if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) && + phys_pg_pack->asid != ctx->asid) { + dev_err(hdev->dev, + "Failed to map memory, handle %u is not shared\n", + handle); + rc = -EPERM; + goto shared_err; + } + + hnode = kzalloc(sizeof(*hnode), GFP_KERNEL); + if (!hnode) { + rc = -ENOMEM; + goto hnode_err; + } + + if (is_userptr) + if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size) + va_range = ctx->host_va_range; + else + va_range = ctx->host_huge_va_range; + else + va_range = ctx->dram_va_range; + + ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size, + hint_addr, is_userptr); + if (!ret_vaddr) { + dev_err(hdev->dev, "no available va block for handle %u\n", + handle); + rc = -ENOMEM; + goto va_block_err; + } + + mutex_lock(&ctx->mmu_lock); + + rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); + if (rc) { + mutex_unlock(&ctx->mmu_lock); + dev_err(hdev->dev, "mapping page pack failed for handle %u\n", + handle); + goto map_err; + } + + rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type); + + mutex_unlock(&ctx->mmu_lock); + + if (rc) { + dev_err(hdev->dev, + "mapping handle %u failed due to MMU cache invalidation\n", + handle); + goto map_err; + } + + ret_vaddr += phys_pg_pack->offset; + + hnode->ptr = vm_type; + hnode->vaddr = ret_vaddr; + + mutex_lock(&ctx->mem_hash_lock); + hash_add(ctx->mem_hash, &hnode->node, ret_vaddr); + mutex_unlock(&ctx->mem_hash_lock); + + *device_addr = ret_vaddr; + + if (is_userptr) + free_phys_pg_pack(hdev, phys_pg_pack); + + return 0; + +map_err: + if (add_va_block(hdev, va_range, ret_vaddr, + ret_vaddr + phys_pg_pack->total_size - 1)) + dev_warn(hdev->dev, + "release va block failed for handle 0x%x, vaddr: 0x%llx\n", + handle, ret_vaddr); + +va_block_err: + kfree(hnode); +hnode_err: +shared_err: + atomic_dec(&phys_pg_pack->mapping_cnt); + if (is_userptr) + free_phys_pg_pack(hdev, phys_pg_pack); +init_page_pack_err: + if (is_userptr) + dma_unmap_host_va(hdev, userptr); + + return rc; +} + +/* + * unmap_device_va - unmap the given device virtual address + * + * @ctx : current context + * @vaddr : device virtual address to unmap + * @ctx_free : true if in context free flow, false otherwise. + * + * This function does the following: + * - Unmap the physical pages related to the given virtual address + * - return the device virtual block to the virtual block list + */ +static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) +{ + struct hl_device *hdev = ctx->hdev; + struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; + struct hl_vm_hash_node *hnode = NULL; + struct hl_userptr *userptr = NULL; + struct hl_va_range *va_range; + enum vm_type_t *vm_type; + bool is_userptr; + int rc = 0; + + /* protect from double entrance */ + mutex_lock(&ctx->mem_hash_lock); + hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr) + if (vaddr == hnode->vaddr) + break; + + if (!hnode) { + mutex_unlock(&ctx->mem_hash_lock); + dev_err(hdev->dev, + "unmap failed, no mem hnode for vaddr 0x%llx\n", + vaddr); + return -EINVAL; + } + + hash_del(&hnode->node); + mutex_unlock(&ctx->mem_hash_lock); + + vm_type = hnode->ptr; + + if (*vm_type == VM_TYPE_USERPTR) { + is_userptr = true; + userptr = hnode->ptr; + rc = init_phys_pg_pack_from_userptr(ctx, userptr, + &phys_pg_pack); + if (rc) { + dev_err(hdev->dev, + "unable to init page pack for vaddr 0x%llx\n", + vaddr); + goto vm_type_err; + } + + if (phys_pg_pack->page_size == + hdev->asic_prop.pmmu.page_size) + va_range = ctx->host_va_range; + else + va_range = ctx->host_huge_va_range; + } else if (*vm_type == VM_TYPE_PHYS_PACK) { + is_userptr = false; + va_range = ctx->dram_va_range; + phys_pg_pack = hnode->ptr; + } else { + dev_warn(hdev->dev, + "unmap failed, unknown vm desc for vaddr 0x%llx\n", + vaddr); + rc = -EFAULT; + goto vm_type_err; + } + + if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) { + dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr); + rc = -EINVAL; + goto mapping_cnt_err; + } + + vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); + + mutex_lock(&ctx->mmu_lock); + + unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); + + /* + * During context free this function is called in a loop to clean all + * the context mappings. Hence the cache invalidation can be called once + * at the loop end rather than for each iteration + */ + if (!ctx_free) + rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true, + *vm_type); + + mutex_unlock(&ctx->mmu_lock); + + /* + * If the context is closing we don't need to check for the MMU cache + * invalidation return code and update the VA free list as in this flow + * we invalidate the MMU cache outside of this unmap function and the VA + * free list will be freed anyway. + */ + if (!ctx_free) { + int tmp_rc; + + if (rc) + dev_err(hdev->dev, + "unmapping vaddr 0x%llx failed due to MMU cache invalidation\n", + vaddr); + + tmp_rc = add_va_block(hdev, va_range, vaddr, + vaddr + phys_pg_pack->total_size - 1); + if (tmp_rc) { + dev_warn(hdev->dev, + "add va block failed for vaddr: 0x%llx\n", + vaddr); + if (!rc) + rc = tmp_rc; + } + } + + atomic_dec(&phys_pg_pack->mapping_cnt); + kfree(hnode); + + if (is_userptr) { + free_phys_pg_pack(hdev, phys_pg_pack); + dma_unmap_host_va(hdev, userptr); + } + + return rc; + +mapping_cnt_err: + if (is_userptr) + free_phys_pg_pack(hdev, phys_pg_pack); +vm_type_err: + mutex_lock(&ctx->mem_hash_lock); + hash_add(ctx->mem_hash, &hnode->node, vaddr); + mutex_unlock(&ctx->mem_hash_lock); + + return rc; +} + +static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_ctx *ctx = hpriv->ctx; + u64 device_addr = 0; + u32 handle = 0; + int rc; + + switch (args->in.op) { + case HL_MEM_OP_ALLOC: + if (args->in.alloc.mem_size == 0) { + dev_err(hdev->dev, + "alloc size must be larger than 0\n"); + rc = -EINVAL; + goto out; + } + + /* Force contiguous as there are no real MMU + * translations to overcome physical memory gaps + */ + args->in.flags |= HL_MEM_CONTIGUOUS; + rc = alloc_device_memory(ctx, &args->in, &handle); + + memset(args, 0, sizeof(*args)); + args->out.handle = (__u64) handle; + break; + + case HL_MEM_OP_FREE: + rc = free_device_memory(ctx, args->in.free.handle); + break; + + case HL_MEM_OP_MAP: + if (args->in.flags & HL_MEM_USERPTR) { + device_addr = args->in.map_host.host_virt_addr; + rc = 0; + } else { + rc = get_paddr_from_handle(ctx, &args->in, + &device_addr); + } + + memset(args, 0, sizeof(*args)); + args->out.device_virt_addr = device_addr; + break; + + case HL_MEM_OP_UNMAP: + rc = 0; + break; + + default: + dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); + rc = -ENOTTY; + break; + } + +out: + return rc; +} + +int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) +{ + union hl_mem_args *args = data; + struct hl_device *hdev = hpriv->hdev; + struct hl_ctx *ctx = hpriv->ctx; + u64 device_addr = 0; + u32 handle = 0; + int rc; + + if (hl_device_disabled_or_in_reset(hdev)) { + dev_warn_ratelimited(hdev->dev, + "Device is %s. Can't execute MEMORY IOCTL\n", + atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); + return -EBUSY; + } + + if (!hdev->mmu_enable) + return mem_ioctl_no_mmu(hpriv, args); + + switch (args->in.op) { + case HL_MEM_OP_ALLOC: + if (!hdev->dram_supports_virtual_memory) { + dev_err(hdev->dev, "DRAM alloc is not supported\n"); + rc = -EINVAL; + goto out; + } + + if (args->in.alloc.mem_size == 0) { + dev_err(hdev->dev, + "alloc size must be larger than 0\n"); + rc = -EINVAL; + goto out; + } + rc = alloc_device_memory(ctx, &args->in, &handle); + + memset(args, 0, sizeof(*args)); + args->out.handle = (__u64) handle; + break; + + case HL_MEM_OP_FREE: + rc = free_device_memory(ctx, args->in.free.handle); + break; + + case HL_MEM_OP_MAP: + rc = map_device_va(ctx, &args->in, &device_addr); + + memset(args, 0, sizeof(*args)); + args->out.device_virt_addr = device_addr; + break; + + case HL_MEM_OP_UNMAP: + rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr, + false); + break; + + default: + dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); + rc = -ENOTTY; + break; + } + +out: + return rc; +} + +static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, + u32 npages, u64 start, u32 offset, + struct hl_userptr *userptr) +{ + int rc; + + if (!access_ok((void __user *) (uintptr_t) addr, size)) { + dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr); + return -EFAULT; + } + + userptr->vec = frame_vector_create(npages); + if (!userptr->vec) { + dev_err(hdev->dev, "Failed to create frame vector\n"); + return -ENOMEM; + } + + rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE, + userptr->vec); + + if (rc != npages) { + dev_err(hdev->dev, + "Failed to map host memory, user ptr probably wrong\n"); + if (rc < 0) + goto destroy_framevec; + rc = -EFAULT; + goto put_framevec; + } + + if (frame_vector_to_pages(userptr->vec) < 0) { + dev_err(hdev->dev, + "Failed to translate frame vector to pages\n"); + rc = -EFAULT; + goto put_framevec; + } + + rc = sg_alloc_table_from_pages(userptr->sgt, + frame_vector_pages(userptr->vec), + npages, offset, size, GFP_ATOMIC); + if (rc < 0) { + dev_err(hdev->dev, "failed to create SG table from pages\n"); + goto put_framevec; + } + + return 0; + +put_framevec: + put_vaddr_frames(userptr->vec); +destroy_framevec: + frame_vector_destroy(userptr->vec); + return rc; +} + +/* + * hl_pin_host_memory - pins a chunk of host memory. + * @hdev: pointer to the habanalabs device structure + * @addr: the host virtual address of the memory area + * @size: the size of the memory area + * @userptr: pointer to hl_userptr structure + * + * This function does the following: + * - Pins the physical pages + * - Create an SG list from those pages + */ +int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, + struct hl_userptr *userptr) +{ + u64 start, end; + u32 npages, offset; + int rc; + + if (!size) { + dev_err(hdev->dev, "size to pin is invalid - %llu\n", size); + return -EINVAL; + } + + /* + * If the combination of the address and size requested for this memory + * region causes an integer overflow, return error. + */ + if (((addr + size) < addr) || + PAGE_ALIGN(addr + size) < (addr + size)) { + dev_err(hdev->dev, + "user pointer 0x%llx + %llu causes integer overflow\n", + addr, size); + return -EINVAL; + } + + /* + * This function can be called also from data path, hence use atomic + * always as it is not a big allocation. + */ + userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC); + if (!userptr->sgt) + return -ENOMEM; + + start = addr & PAGE_MASK; + offset = addr & ~PAGE_MASK; + end = PAGE_ALIGN(addr + size); + npages = (end - start) >> PAGE_SHIFT; + + userptr->size = size; + userptr->addr = addr; + userptr->dma_mapped = false; + INIT_LIST_HEAD(&userptr->job_node); + + rc = get_user_memory(hdev, addr, size, npages, start, offset, + userptr); + if (rc) { + dev_err(hdev->dev, + "failed to get user memory for address 0x%llx\n", + addr); + goto free_sgt; + } + + hl_debugfs_add_userptr(hdev, userptr); + + return 0; + +free_sgt: + kfree(userptr->sgt); + return rc; +} + +/* + * hl_unpin_host_memory - unpins a chunk of host memory. + * @hdev: pointer to the habanalabs device structure + * @userptr: pointer to hl_userptr structure + * + * This function does the following: + * - Unpins the physical pages related to the host memory + * - Free the SG list + */ +void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) +{ + struct page **pages; + + hl_debugfs_remove_userptr(hdev, userptr); + + if (userptr->dma_mapped) + hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl, + userptr->sgt->nents, + userptr->dir); + + pages = frame_vector_pages(userptr->vec); + if (!IS_ERR(pages)) { + int i; + + for (i = 0; i < frame_vector_count(userptr->vec); i++) + set_page_dirty_lock(pages[i]); + } + put_vaddr_frames(userptr->vec); + frame_vector_destroy(userptr->vec); + + list_del(&userptr->job_node); + + sg_free_table(userptr->sgt); + kfree(userptr->sgt); +} + +/* + * hl_userptr_delete_list - clear userptr list + * + * @hdev : pointer to the habanalabs device structure + * @userptr_list : pointer to the list to clear + * + * This function does the following: + * - Iterates over the list and unpins the host memory and frees the userptr + * structure. + */ +void hl_userptr_delete_list(struct hl_device *hdev, + struct list_head *userptr_list) +{ + struct hl_userptr *userptr, *tmp; + + list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) { + hl_unpin_host_memory(hdev, userptr); + kfree(userptr); + } + + INIT_LIST_HEAD(userptr_list); +} + +/* + * hl_userptr_is_pinned - returns whether the given userptr is pinned + * + * @hdev : pointer to the habanalabs device structure + * @userptr_list : pointer to the list to clear + * @userptr : pointer to userptr to check + * + * This function does the following: + * - Iterates over the list and checks if the given userptr is in it, means is + * pinned. If so, returns true, otherwise returns false. + */ +bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, + u32 size, struct list_head *userptr_list, + struct hl_userptr **userptr) +{ + list_for_each_entry((*userptr), userptr_list, job_node) { + if ((addr == (*userptr)->addr) && (size == (*userptr)->size)) + return true; + } + + return false; +} + +/* + * va_range_init - initialize virtual addresses range + * @hdev: pointer to the habanalabs device structure + * @va_range: pointer to the range to initialize + * @start: range start address + * @end: range end address + * + * This function does the following: + * - Initializes the virtual addresses list of the given range with the given + * addresses. + */ +static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, + u64 start, u64 end) +{ + int rc; + + INIT_LIST_HEAD(&va_range->list); + + /* PAGE_SIZE alignment */ + + if (start & (PAGE_SIZE - 1)) { + start &= PAGE_MASK; + start += PAGE_SIZE; + } + + if (end & (PAGE_SIZE - 1)) + end &= PAGE_MASK; + + if (start >= end) { + dev_err(hdev->dev, "too small vm range for va list\n"); + return -EFAULT; + } + + rc = add_va_block(hdev, va_range, start, end); + + if (rc) { + dev_err(hdev->dev, "Failed to init host va list\n"); + return rc; + } + + va_range->start_addr = start; + va_range->end_addr = end; + + return 0; +} + +/* + * va_range_fini() - clear a virtual addresses range + * @hdev: pointer to the habanalabs structure + * va_range: pointer to virtual addresses range + * + * This function does the following: + * - Frees the virtual addresses block list and its lock + */ +static void va_range_fini(struct hl_device *hdev, + struct hl_va_range *va_range) +{ + mutex_lock(&va_range->lock); + clear_va_list_locked(hdev, &va_range->list); + mutex_unlock(&va_range->lock); + + mutex_destroy(&va_range->lock); + kfree(va_range); +} + +/* + * vm_ctx_init_with_ranges() - initialize virtual memory for context + * @ctx: pointer to the habanalabs context structure + * @host_range_start: host virtual addresses range start. + * @host_range_end: host virtual addresses range end. + * @host_huge_range_start: host virtual addresses range start for memory + * allocated with huge pages. + * @host_huge_range_end: host virtual addresses range end for memory allocated + * with huge pages. + * @dram_range_start: dram virtual addresses range start. + * @dram_range_end: dram virtual addresses range end. + * + * This function initializes the following: + * - MMU for context + * - Virtual address to area descriptor hashtable + * - Virtual block list of available virtual memory + */ +static int vm_ctx_init_with_ranges(struct hl_ctx *ctx, + u64 host_range_start, + u64 host_range_end, + u64 host_huge_range_start, + u64 host_huge_range_end, + u64 dram_range_start, + u64 dram_range_end) +{ + struct hl_device *hdev = ctx->hdev; + int rc; + + ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL); + if (!ctx->host_va_range) + return -ENOMEM; + + ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range), + GFP_KERNEL); + if (!ctx->host_huge_va_range) { + rc = -ENOMEM; + goto host_huge_va_range_err; + } + + ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL); + if (!ctx->dram_va_range) { + rc = -ENOMEM; + goto dram_va_range_err; + } + + rc = hl_mmu_ctx_init(ctx); + if (rc) { + dev_err(hdev->dev, "failed to init context %d\n", ctx->asid); + goto mmu_ctx_err; + } + + mutex_init(&ctx->mem_hash_lock); + hash_init(ctx->mem_hash); + + mutex_init(&ctx->host_va_range->lock); + + rc = va_range_init(hdev, ctx->host_va_range, host_range_start, + host_range_end); + if (rc) { + dev_err(hdev->dev, "failed to init host vm range\n"); + goto host_page_range_err; + } + + if (hdev->pmmu_huge_range) { + mutex_init(&ctx->host_huge_va_range->lock); + + rc = va_range_init(hdev, ctx->host_huge_va_range, + host_huge_range_start, + host_huge_range_end); + if (rc) { + dev_err(hdev->dev, + "failed to init host huge vm range\n"); + goto host_hpage_range_err; + } + } else { + ctx->host_huge_va_range = ctx->host_va_range; + } + + mutex_init(&ctx->dram_va_range->lock); + + rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start, + dram_range_end); + if (rc) { + dev_err(hdev->dev, "failed to init dram vm range\n"); + goto dram_vm_err; + } + + hl_debugfs_add_ctx_mem_hash(hdev, ctx); + + return 0; + +dram_vm_err: + mutex_destroy(&ctx->dram_va_range->lock); + + if (hdev->pmmu_huge_range) { + mutex_lock(&ctx->host_huge_va_range->lock); + clear_va_list_locked(hdev, &ctx->host_huge_va_range->list); + mutex_unlock(&ctx->host_huge_va_range->lock); + } +host_hpage_range_err: + if (hdev->pmmu_huge_range) + mutex_destroy(&ctx->host_huge_va_range->lock); + mutex_lock(&ctx->host_va_range->lock); + clear_va_list_locked(hdev, &ctx->host_va_range->list); + mutex_unlock(&ctx->host_va_range->lock); +host_page_range_err: + mutex_destroy(&ctx->host_va_range->lock); + mutex_destroy(&ctx->mem_hash_lock); + hl_mmu_ctx_fini(ctx); +mmu_ctx_err: + kfree(ctx->dram_va_range); +dram_va_range_err: + kfree(ctx->host_huge_va_range); +host_huge_va_range_err: + kfree(ctx->host_va_range); + + return rc; +} + +int hl_vm_ctx_init(struct hl_ctx *ctx) +{ + struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; + u64 host_range_start, host_range_end, host_huge_range_start, + host_huge_range_end, dram_range_start, dram_range_end; + + atomic64_set(&ctx->dram_phys_mem, 0); + + /* + * - If MMU is enabled, init the ranges as usual. + * - If MMU is disabled, in case of host mapping, the returned address + * is the given one. + * In case of DRAM mapping, the returned address is the physical + * address of the memory related to the given handle. + */ + if (ctx->hdev->mmu_enable) { + dram_range_start = prop->dmmu.start_addr; + dram_range_end = prop->dmmu.end_addr; + host_range_start = prop->pmmu.start_addr; + host_range_end = prop->pmmu.end_addr; + host_huge_range_start = prop->pmmu_huge.start_addr; + host_huge_range_end = prop->pmmu_huge.end_addr; + } else { + dram_range_start = prop->dram_user_base_address; + dram_range_end = prop->dram_end_address; + host_range_start = prop->dram_user_base_address; + host_range_end = prop->dram_end_address; + host_huge_range_start = prop->dram_user_base_address; + host_huge_range_end = prop->dram_end_address; + } + + return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end, + host_huge_range_start, + host_huge_range_end, + dram_range_start, + dram_range_end); +} + +/* + * hl_vm_ctx_fini - virtual memory teardown of context + * + * @ctx : pointer to the habanalabs context structure + * + * This function perform teardown the following: + * - Virtual block list of available virtual memory + * - Virtual address to area descriptor hashtable + * - MMU for context + * + * In addition this function does the following: + * - Unmaps the existing hashtable nodes if the hashtable is not empty. The + * hashtable should be empty as no valid mappings should exist at this + * point. + * - Frees any existing physical page list from the idr which relates to the + * current context asid. + * - This function checks the virtual block list for correctness. At this point + * the list should contain one element which describes the whole virtual + * memory range of the context. Otherwise, a warning is printed. + */ +void hl_vm_ctx_fini(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct hl_vm *vm = &hdev->vm; + struct hl_vm_phys_pg_pack *phys_pg_list; + struct hl_vm_hash_node *hnode; + struct hlist_node *tmp_node; + int i; + + hl_debugfs_remove_ctx_mem_hash(hdev, ctx); + + /* + * Clearly something went wrong on hard reset so no point in printing + * another side effect error + */ + if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash)) + dev_notice(hdev->dev, + "user released device without removing its memory mappings\n"); + + hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) { + dev_dbg(hdev->dev, + "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n", + hnode->vaddr, ctx->asid); + unmap_device_va(ctx, hnode->vaddr, true); + } + + /* invalidate the cache once after the unmapping loop */ + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK); + + spin_lock(&vm->idr_lock); + idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i) + if (phys_pg_list->asid == ctx->asid) { + dev_dbg(hdev->dev, + "page list 0x%px of asid %d is still alive\n", + phys_pg_list, ctx->asid); + atomic64_sub(phys_pg_list->total_size, + &hdev->dram_used_mem); + free_phys_pg_pack(hdev, phys_pg_list); + idr_remove(&vm->phys_pg_pack_handles, i); + } + spin_unlock(&vm->idr_lock); + + va_range_fini(hdev, ctx->dram_va_range); + if (hdev->pmmu_huge_range) + va_range_fini(hdev, ctx->host_huge_va_range); + va_range_fini(hdev, ctx->host_va_range); + + mutex_destroy(&ctx->mem_hash_lock); + hl_mmu_ctx_fini(ctx); +} + +/* + * hl_vm_init - initialize virtual memory module + * + * @hdev : pointer to the habanalabs device structure + * + * This function initializes the following: + * - MMU module + * - DRAM physical pages pool of 2MB + * - Idr for device memory allocation handles + */ +int hl_vm_init(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_vm *vm = &hdev->vm; + int rc; + + vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1); + if (!vm->dram_pg_pool) { + dev_err(hdev->dev, "Failed to create dram page pool\n"); + return -ENOMEM; + } + + kref_init(&vm->dram_pg_pool_refcount); + + rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address, + prop->dram_end_address - prop->dram_user_base_address, + -1); + + if (rc) { + dev_err(hdev->dev, + "Failed to add memory to dram page pool %d\n", rc); + goto pool_add_err; + } + + spin_lock_init(&vm->idr_lock); + idr_init(&vm->phys_pg_pack_handles); + + atomic64_set(&hdev->dram_used_mem, 0); + + vm->init_done = true; + + return 0; + +pool_add_err: + gen_pool_destroy(vm->dram_pg_pool); + + return rc; +} + +/* + * hl_vm_fini - virtual memory module teardown + * + * @hdev : pointer to the habanalabs device structure + * + * This function perform teardown to the following: + * - Idr for device memory allocation handles + * - DRAM physical pages pool of 2MB + * - MMU module + */ +void hl_vm_fini(struct hl_device *hdev) +{ + struct hl_vm *vm = &hdev->vm; + + if (!vm->init_done) + return; + + /* + * At this point all the contexts should be freed and hence no DRAM + * memory should be in use. Hence the DRAM pool should be freed here. + */ + if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1) + dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n", + __func__); + + vm->init_done = false; +} diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu.c new file mode 100644 index 000000000000..04303950e630 --- /dev/null +++ b/drivers/misc/habanalabs/common/mmu.c @@ -0,0 +1,1037 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" +#include "include/hw_ip/mmu/mmu_general.h" + +#include +#include + +static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); + +static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) +{ + struct pgt_info *pgt_info = NULL; + + hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node, + (unsigned long) hop_addr) + if (hop_addr == pgt_info->shadow_addr) + break; + + return pgt_info; +} + +static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info) +{ + struct hl_device *hdev = ctx->hdev; + + gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr, + hdev->asic_prop.mmu_hop_table_size); + hash_del(&pgt_info->node); + kfree((u64 *) (uintptr_t) pgt_info->shadow_addr); + kfree(pgt_info); +} + +static void free_hop(struct hl_ctx *ctx, u64 hop_addr) +{ + struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); + + _free_hop(ctx, pgt_info); +} + +static u64 alloc_hop(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct pgt_info *pgt_info; + u64 phys_addr, shadow_addr; + + pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL); + if (!pgt_info) + return ULLONG_MAX; + + phys_addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool, + prop->mmu_hop_table_size); + if (!phys_addr) { + dev_err(hdev->dev, "failed to allocate page\n"); + goto pool_add_err; + } + + shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size, + GFP_KERNEL); + if (!shadow_addr) + goto shadow_err; + + pgt_info->phys_addr = phys_addr; + pgt_info->shadow_addr = shadow_addr; + pgt_info->ctx = ctx; + pgt_info->num_of_ptes = 0; + hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr); + + return shadow_addr; + +shadow_err: + gen_pool_free(hdev->mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size); +pool_add_err: + kfree(pgt_info); + + return ULLONG_MAX; +} + +static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx) +{ + return ctx->hdev->asic_prop.mmu_pgt_addr + + (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); +} + +static inline u64 get_hop0_addr(struct hl_ctx *ctx) +{ + return (u64) (uintptr_t) ctx->hdev->mmu_shadow_hop0 + + (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); +} + +static inline void flush(struct hl_ctx *ctx) +{ + /* flush all writes from all cores to reach PCI */ + mb(); + ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx)); +} + +/* transform the value to physical address when writing to H/W */ +static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val) +{ + /* + * The value to write is actually the address of the next shadow hop + + * flags at the 12 LSBs. + * Hence in order to get the value to write to the physical PTE, we + * clear the 12 LSBs and translate the shadow hop to its associated + * physical hop, and add back the original 12 LSBs. + */ + u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) | + (val & FLAGS_MASK); + + ctx->hdev->asic_funcs->write_pte(ctx->hdev, + get_phys_addr(ctx, shadow_pte_addr), + phys_val); + + *(u64 *) (uintptr_t) shadow_pte_addr = val; +} + +/* do not transform the value to physical address when writing to H/W */ +static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, + u64 val) +{ + ctx->hdev->asic_funcs->write_pte(ctx->hdev, + get_phys_addr(ctx, shadow_pte_addr), + val); + *(u64 *) (uintptr_t) shadow_pte_addr = val; +} + +/* clear the last and present bits */ +static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr) +{ + /* no need to transform the value to physical address */ + write_final_pte(ctx, pte_addr, 0); +} + +static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr) +{ + get_pgt_info(ctx, hop_addr)->num_of_ptes++; +} + +/* + * put_pte - decrement the num of ptes and free the hop if possible + * + * @ctx: pointer to the context structure + * @hop_addr: addr of the hop + * + * This function returns the number of ptes left on this hop. If the number is + * 0, it means the pte was freed. + */ +static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr) +{ + struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); + int num_of_ptes_left; + + pgt_info->num_of_ptes--; + + /* + * Need to save the number of ptes left because free_hop might free + * the pgt_info + */ + num_of_ptes_left = pgt_info->num_of_ptes; + if (!num_of_ptes_left) + _free_hop(ctx, pgt_info); + + return num_of_ptes_left; +} + +static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, + u64 virt_addr, u64 mask, u64 shift) +{ + return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * + ((virt_addr & mask) >> shift); +} + +static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask, + mmu_prop->hop0_shift); +} + +static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask, + mmu_prop->hop1_shift); +} + +static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask, + mmu_prop->hop2_shift); +} + +static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask, + mmu_prop->hop3_shift); +} + +static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask, + mmu_prop->hop4_shift); +} + +static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) +{ + if (curr_pte & PAGE_PRESENT_MASK) + return curr_pte & HOP_PHYS_ADDR_MASK; + else + return ULLONG_MAX; +} + +static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, + bool *is_new_hop) +{ + u64 hop_addr = get_next_hop_addr(ctx, curr_pte); + + if (hop_addr == ULLONG_MAX) { + hop_addr = alloc_hop(ctx); + *is_new_hop = (hop_addr != ULLONG_MAX); + } + + return hop_addr; +} + +/* translates shadow address inside hop to a physical address */ +static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr) +{ + u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1); + u64 shadow_hop_addr = shadow_addr & ~page_mask; + u64 pte_offset = shadow_addr & page_mask; + u64 phys_hop_addr; + + if (shadow_hop_addr != get_hop0_addr(ctx)) + phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr; + else + phys_hop_addr = get_phys_hop0_addr(ctx); + + return phys_hop_addr + pte_offset; +} + +static bool is_dram_va(struct hl_device *hdev, u64 virt_addr) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + + return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->dmmu.start_addr, + prop->dmmu.end_addr); +} + +static int dram_default_mapping_init(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr, + hop2_pte_addr, hop3_pte_addr, pte_val; + int rc, i, j, hop3_allocated = 0; + + if ((!hdev->dram_supports_virtual_memory) || + (!hdev->dram_default_page_mapping) || + (ctx->asid == HL_KERNEL_ASID_ID)) + return 0; + + num_of_hop3 = prop->dram_size_for_default_page_mapping; + do_div(num_of_hop3, prop->dram_page_size); + do_div(num_of_hop3, PTE_ENTRIES_IN_HOP); + + /* add hop1 and hop2 */ + total_hops = num_of_hop3 + 2; + + ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL); + if (!ctx->dram_default_hops) + return -ENOMEM; + + hop0_addr = get_hop0_addr(ctx); + + hop1_addr = alloc_hop(ctx); + if (hop1_addr == ULLONG_MAX) { + dev_err(hdev->dev, "failed to alloc hop 1\n"); + rc = -ENOMEM; + goto hop1_err; + } + + ctx->dram_default_hops[total_hops - 1] = hop1_addr; + + hop2_addr = alloc_hop(ctx); + if (hop2_addr == ULLONG_MAX) { + dev_err(hdev->dev, "failed to alloc hop 2\n"); + rc = -ENOMEM; + goto hop2_err; + } + + ctx->dram_default_hops[total_hops - 2] = hop2_addr; + + for (i = 0 ; i < num_of_hop3 ; i++) { + ctx->dram_default_hops[i] = alloc_hop(ctx); + if (ctx->dram_default_hops[i] == ULLONG_MAX) { + dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i); + rc = -ENOMEM; + goto hop3_err; + } + hop3_allocated++; + } + + /* need only pte 0 in hops 0 and 1 */ + pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + write_pte(ctx, hop0_addr, pte_val); + + pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + write_pte(ctx, hop1_addr, pte_val); + get_pte(ctx, hop1_addr); + + hop2_pte_addr = hop2_addr; + for (i = 0 ; i < num_of_hop3 ; i++) { + pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) | + PAGE_PRESENT_MASK; + write_pte(ctx, hop2_pte_addr, pte_val); + get_pte(ctx, hop2_addr); + hop2_pte_addr += HL_PTE_SIZE; + } + + pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) | + LAST_MASK | PAGE_PRESENT_MASK; + + for (i = 0 ; i < num_of_hop3 ; i++) { + hop3_pte_addr = ctx->dram_default_hops[i]; + for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) { + write_final_pte(ctx, hop3_pte_addr, pte_val); + get_pte(ctx, ctx->dram_default_hops[i]); + hop3_pte_addr += HL_PTE_SIZE; + } + } + + flush(ctx); + + return 0; + +hop3_err: + for (i = 0 ; i < hop3_allocated ; i++) + free_hop(ctx, ctx->dram_default_hops[i]); + + free_hop(ctx, hop2_addr); +hop2_err: + free_hop(ctx, hop1_addr); +hop1_err: + kfree(ctx->dram_default_hops); + + return rc; +} + +static void dram_default_mapping_fini(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr, + hop2_pte_addr, hop3_pte_addr; + int i, j; + + if ((!hdev->dram_supports_virtual_memory) || + (!hdev->dram_default_page_mapping) || + (ctx->asid == HL_KERNEL_ASID_ID)) + return; + + num_of_hop3 = prop->dram_size_for_default_page_mapping; + do_div(num_of_hop3, prop->dram_page_size); + do_div(num_of_hop3, PTE_ENTRIES_IN_HOP); + + hop0_addr = get_hop0_addr(ctx); + /* add hop1 and hop2 */ + total_hops = num_of_hop3 + 2; + hop1_addr = ctx->dram_default_hops[total_hops - 1]; + hop2_addr = ctx->dram_default_hops[total_hops - 2]; + + for (i = 0 ; i < num_of_hop3 ; i++) { + hop3_pte_addr = ctx->dram_default_hops[i]; + for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) { + clear_pte(ctx, hop3_pte_addr); + put_pte(ctx, ctx->dram_default_hops[i]); + hop3_pte_addr += HL_PTE_SIZE; + } + } + + hop2_pte_addr = hop2_addr; + hop2_pte_addr = hop2_addr; + for (i = 0 ; i < num_of_hop3 ; i++) { + clear_pte(ctx, hop2_pte_addr); + put_pte(ctx, hop2_addr); + hop2_pte_addr += HL_PTE_SIZE; + } + + clear_pte(ctx, hop1_addr); + put_pte(ctx, hop1_addr); + clear_pte(ctx, hop0_addr); + + kfree(ctx->dram_default_hops); + + flush(ctx); +} + +/** + * hl_mmu_init() - initialize the MMU module. + * @hdev: habanalabs device structure. + * + * This function does the following: + * - Create a pool of pages for pgt_infos. + * - Create a shadow table for pgt + * + * Return: 0 for success, non-zero for failure. + */ +int hl_mmu_init(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + int rc; + + if (!hdev->mmu_enable) + return 0; + + hdev->mmu_pgt_pool = + gen_pool_create(__ffs(prop->mmu_hop_table_size), -1); + + if (!hdev->mmu_pgt_pool) { + dev_err(hdev->dev, "Failed to create page gen pool\n"); + return -ENOMEM; + } + + rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr + + prop->mmu_hop0_tables_total_size, + prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size, + -1); + if (rc) { + dev_err(hdev->dev, "Failed to add memory to page gen pool\n"); + goto err_pool_add; + } + + hdev->mmu_shadow_hop0 = kvmalloc_array(prop->max_asid, + prop->mmu_hop_table_size, + GFP_KERNEL | __GFP_ZERO); + if (!hdev->mmu_shadow_hop0) { + rc = -ENOMEM; + goto err_pool_add; + } + + /* MMU H/W init will be done in device hw_init() */ + + return 0; + +err_pool_add: + gen_pool_destroy(hdev->mmu_pgt_pool); + + return rc; +} + +/** + * hl_mmu_fini() - release the MMU module. + * @hdev: habanalabs device structure. + * + * This function does the following: + * - Disable MMU in H/W. + * - Free the pgt_infos pool. + * + * All contexts should be freed before calling this function. + */ +void hl_mmu_fini(struct hl_device *hdev) +{ + if (!hdev->mmu_enable) + return; + + /* MMU H/W fini was already done in device hw_fini() */ + + kvfree(hdev->mmu_shadow_hop0); + gen_pool_destroy(hdev->mmu_pgt_pool); +} + +/** + * hl_mmu_ctx_init() - initialize a context for using the MMU module. + * @ctx: pointer to the context structure to initialize. + * + * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all + * page tables hops related to this context. + * Return: 0 on success, non-zero otherwise. + */ +int hl_mmu_ctx_init(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + + if (!hdev->mmu_enable) + return 0; + + mutex_init(&ctx->mmu_lock); + hash_init(ctx->mmu_shadow_hash); + + return dram_default_mapping_init(ctx); +} + +/* + * hl_mmu_ctx_fini - disable a ctx from using the mmu module + * + * @ctx: pointer to the context structure + * + * This function does the following: + * - Free any pgts which were not freed yet + * - Free the mutex + * - Free DRAM default page mapping hops + */ +void hl_mmu_ctx_fini(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct pgt_info *pgt_info; + struct hlist_node *tmp; + int i; + + if (!hdev->mmu_enable) + return; + + dram_default_mapping_fini(ctx); + + if (!hash_empty(ctx->mmu_shadow_hash)) + dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n", + ctx->asid); + + hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) { + dev_err_ratelimited(hdev->dev, + "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", + pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); + _free_hop(ctx, pgt_info); + } + + mutex_destroy(&ctx->mmu_lock); +} + +static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; + u64 hop0_addr = 0, hop0_pte_addr = 0, + hop1_addr = 0, hop1_pte_addr = 0, + hop2_addr = 0, hop2_pte_addr = 0, + hop3_addr = 0, hop3_pte_addr = 0, + hop4_addr = 0, hop4_pte_addr = 0, + curr_pte; + bool is_huge, clear_hop3 = true; + + /* shifts and masks are the same in PMMU and HPMMU, use one of them */ + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + + hop0_addr = get_hop0_addr(ctx); + hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); + + curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; + + hop1_addr = get_next_hop_addr(ctx, curr_pte); + + if (hop1_addr == ULLONG_MAX) + goto not_mapped; + + hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); + + curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; + + hop2_addr = get_next_hop_addr(ctx, curr_pte); + + if (hop2_addr == ULLONG_MAX) + goto not_mapped; + + hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); + + curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; + + hop3_addr = get_next_hop_addr(ctx, curr_pte); + + if (hop3_addr == ULLONG_MAX) + goto not_mapped; + + hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); + + curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; + + is_huge = curr_pte & LAST_MASK; + + if (is_dram_addr && !is_huge) { + dev_err(hdev->dev, + "DRAM unmapping should use huge pages only\n"); + return -EFAULT; + } + + if (!is_huge) { + hop4_addr = get_next_hop_addr(ctx, curr_pte); + + if (hop4_addr == ULLONG_MAX) + goto not_mapped; + + hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, + virt_addr); + + curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; + + clear_hop3 = false; + } + + if (hdev->dram_default_page_mapping && is_dram_addr) { + u64 default_pte = (prop->mmu_dram_default_page_addr & + HOP_PHYS_ADDR_MASK) | LAST_MASK | + PAGE_PRESENT_MASK; + if (curr_pte == default_pte) { + dev_err(hdev->dev, + "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n", + virt_addr); + goto not_mapped; + } + + if (!(curr_pte & PAGE_PRESENT_MASK)) { + dev_err(hdev->dev, + "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n", + virt_addr); + goto not_mapped; + } + + write_final_pte(ctx, hop3_pte_addr, default_pte); + put_pte(ctx, hop3_addr); + } else { + if (!(curr_pte & PAGE_PRESENT_MASK)) + goto not_mapped; + + if (hop4_addr) + clear_pte(ctx, hop4_pte_addr); + else + clear_pte(ctx, hop3_pte_addr); + + if (hop4_addr && !put_pte(ctx, hop4_addr)) + clear_hop3 = true; + + if (!clear_hop3) + goto mapped; + + clear_pte(ctx, hop3_pte_addr); + + if (put_pte(ctx, hop3_addr)) + goto mapped; + + clear_pte(ctx, hop2_pte_addr); + + if (put_pte(ctx, hop2_addr)) + goto mapped; + + clear_pte(ctx, hop1_pte_addr); + + if (put_pte(ctx, hop1_addr)) + goto mapped; + + clear_pte(ctx, hop0_pte_addr); + } + +mapped: + return 0; + +not_mapped: + dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", + virt_addr); + + return -EINVAL; +} + +/* + * hl_mmu_unmap - unmaps a virtual addr + * + * @ctx: pointer to the context structure + * @virt_addr: virt addr to map from + * @page_size: size of the page to unmap + * @flush_pte: whether to do a PCI flush + * + * This function does the following: + * - Check that the virt addr is mapped + * - Unmap the virt addr and frees pgts if possible + * - Returns 0 on success, -EINVAL if the given addr is not mapped + * + * Because this function changes the page tables in the device and because it + * changes the MMU hash, it must be protected by a lock. + * However, because it maps only a single page, the lock should be implemented + * in a higher level in order to protect the entire mapping of the memory area + * + * For optimization reasons PCI flush may be requested once after unmapping of + * large area. + */ +int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, + bool flush_pte) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; + u64 real_virt_addr; + u32 real_page_size, npages; + int i, rc = 0; + bool is_dram_addr; + + if (!hdev->mmu_enable) + return 0; + + is_dram_addr = is_dram_va(hdev, virt_addr); + + if (is_dram_addr) + mmu_prop = &prop->dmmu; + else if ((page_size % prop->pmmu_huge.page_size) == 0) + mmu_prop = &prop->pmmu_huge; + else + mmu_prop = &prop->pmmu; + + /* + * The H/W handles mapping of specific page sizes. Hence if the page + * size is bigger, we break it to sub-pages and unmap them separately. + */ + if ((page_size % mmu_prop->page_size) == 0) { + real_page_size = mmu_prop->page_size; + } else { + dev_err(hdev->dev, + "page size of %u is not %uKB aligned, can't unmap\n", + page_size, mmu_prop->page_size >> 10); + + return -EFAULT; + } + + npages = page_size / real_page_size; + real_virt_addr = virt_addr; + + for (i = 0 ; i < npages ; i++) { + rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr); + if (rc) + break; + + real_virt_addr += real_page_size; + } + + if (flush_pte) + flush(ctx); + + return rc; +} + +static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, + u32 page_size, bool is_dram_addr) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; + u64 hop0_addr = 0, hop0_pte_addr = 0, + hop1_addr = 0, hop1_pte_addr = 0, + hop2_addr = 0, hop2_pte_addr = 0, + hop3_addr = 0, hop3_pte_addr = 0, + hop4_addr = 0, hop4_pte_addr = 0, + curr_pte = 0; + bool hop1_new = false, hop2_new = false, hop3_new = false, + hop4_new = false, is_huge; + int rc = -ENOMEM; + + /* + * This mapping function can map a page or a huge page. For huge page + * there are only 3 hops rather than 4. Currently the DRAM allocation + * uses huge pages only but user memory could have been allocated with + * one of the two page sizes. Since this is a common code for all the + * three cases, we need this hugs page check. + */ + if (is_dram_addr) { + mmu_prop = &prop->dmmu; + is_huge = true; + } else if (page_size == prop->pmmu_huge.page_size) { + mmu_prop = &prop->pmmu_huge; + is_huge = true; + } else { + mmu_prop = &prop->pmmu; + is_huge = false; + } + + hop0_addr = get_hop0_addr(ctx); + hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); + curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; + + hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new); + if (hop1_addr == ULLONG_MAX) + goto err; + + hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); + curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; + + hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new); + if (hop2_addr == ULLONG_MAX) + goto err; + + hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); + curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; + + hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new); + if (hop3_addr == ULLONG_MAX) + goto err; + + hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); + curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; + + if (!is_huge) { + hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new); + if (hop4_addr == ULLONG_MAX) + goto err; + + hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, + virt_addr); + curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; + } + + if (hdev->dram_default_page_mapping && is_dram_addr) { + u64 default_pte = (prop->mmu_dram_default_page_addr & + HOP_PHYS_ADDR_MASK) | LAST_MASK | + PAGE_PRESENT_MASK; + + if (curr_pte != default_pte) { + dev_err(hdev->dev, + "DRAM: mapping already exists for virt_addr 0x%llx\n", + virt_addr); + rc = -EINVAL; + goto err; + } + + if (hop1_new || hop2_new || hop3_new || hop4_new) { + dev_err(hdev->dev, + "DRAM mapping should not allocate more hops\n"); + rc = -EFAULT; + goto err; + } + } else if (curr_pte & PAGE_PRESENT_MASK) { + dev_err(hdev->dev, + "mapping already exists for virt_addr 0x%llx\n", + virt_addr); + + dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n", + *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr); + dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n", + *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr); + dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n", + *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr); + dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n", + *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr); + + if (!is_huge) + dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n", + *(u64 *) (uintptr_t) hop4_pte_addr, + hop4_pte_addr); + + rc = -EINVAL; + goto err; + } + + curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK + | PAGE_PRESENT_MASK; + + if (is_huge) + write_final_pte(ctx, hop3_pte_addr, curr_pte); + else + write_final_pte(ctx, hop4_pte_addr, curr_pte); + + if (hop1_new) { + curr_pte = + (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + write_pte(ctx, hop0_pte_addr, curr_pte); + } + if (hop2_new) { + curr_pte = + (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + write_pte(ctx, hop1_pte_addr, curr_pte); + get_pte(ctx, hop1_addr); + } + if (hop3_new) { + curr_pte = + (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + write_pte(ctx, hop2_pte_addr, curr_pte); + get_pte(ctx, hop2_addr); + } + + if (!is_huge) { + if (hop4_new) { + curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) | + PAGE_PRESENT_MASK; + write_pte(ctx, hop3_pte_addr, curr_pte); + get_pte(ctx, hop3_addr); + } + + get_pte(ctx, hop4_addr); + } else { + get_pte(ctx, hop3_addr); + } + + return 0; + +err: + if (hop4_new) + free_hop(ctx, hop4_addr); + if (hop3_new) + free_hop(ctx, hop3_addr); + if (hop2_new) + free_hop(ctx, hop2_addr); + if (hop1_new) + free_hop(ctx, hop1_addr); + + return rc; +} + +/* + * hl_mmu_map - maps a virtual addr to physical addr + * + * @ctx: pointer to the context structure + * @virt_addr: virt addr to map from + * @phys_addr: phys addr to map to + * @page_size: physical page size + * @flush_pte: whether to do a PCI flush + * + * This function does the following: + * - Check that the virt addr is not mapped + * - Allocate pgts as necessary in order to map the virt addr to the phys + * - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM. + * + * Because this function changes the page tables in the device and because it + * changes the MMU hash, it must be protected by a lock. + * However, because it maps only a single page, the lock should be implemented + * in a higher level in order to protect the entire mapping of the memory area + * + * For optimization reasons PCI flush may be requested once after mapping of + * large area. + */ +int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, + bool flush_pte) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; + u64 real_virt_addr, real_phys_addr; + u32 real_page_size, npages; + int i, rc, mapped_cnt = 0; + bool is_dram_addr; + + if (!hdev->mmu_enable) + return 0; + + is_dram_addr = is_dram_va(hdev, virt_addr); + + if (is_dram_addr) + mmu_prop = &prop->dmmu; + else if ((page_size % prop->pmmu_huge.page_size) == 0) + mmu_prop = &prop->pmmu_huge; + else + mmu_prop = &prop->pmmu; + + /* + * The H/W handles mapping of specific page sizes. Hence if the page + * size is bigger, we break it to sub-pages and map them separately. + */ + if ((page_size % mmu_prop->page_size) == 0) { + real_page_size = mmu_prop->page_size; + } else { + dev_err(hdev->dev, + "page size of %u is not %uKB aligned, can't unmap\n", + page_size, mmu_prop->page_size >> 10); + + return -EFAULT; + } + + WARN_ONCE((phys_addr & (real_page_size - 1)), + "Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size", + phys_addr, real_page_size); + + npages = page_size / real_page_size; + real_virt_addr = virt_addr; + real_phys_addr = phys_addr; + + for (i = 0 ; i < npages ; i++) { + rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr, + real_page_size, is_dram_addr); + if (rc) + goto err; + + real_virt_addr += real_page_size; + real_phys_addr += real_page_size; + mapped_cnt++; + } + + if (flush_pte) + flush(ctx); + + return 0; + +err: + real_virt_addr = virt_addr; + for (i = 0 ; i < mapped_cnt ; i++) { + if (_hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr)) + dev_warn_ratelimited(hdev->dev, + "failed to unmap va: 0x%llx\n", real_virt_addr); + + real_virt_addr += real_page_size; + } + + flush(ctx); + + return rc; +} + +/* + * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out + * + * @ctx: pointer to the context structure + * + */ +void hl_mmu_swap_out(struct hl_ctx *ctx) +{ + +} + +/* + * hl_mmu_swap_in - marks all mapping of the given ctx as swapped in + * + * @ctx: pointer to the context structure + * + */ +void hl_mmu_swap_in(struct hl_ctx *ctx) +{ + +} diff --git a/drivers/misc/habanalabs/common/pci.c b/drivers/misc/habanalabs/common/pci.c new file mode 100644 index 000000000000..1791f6623c69 --- /dev/null +++ b/drivers/misc/habanalabs/common/pci.c @@ -0,0 +1,400 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" +#include "include/hw_ip/pci/pci_general.h" + +#include +#include + +#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 10) + +#define IATU_REGION_CTRL_REGION_EN_MASK BIT(31) +#define IATU_REGION_CTRL_MATCH_MODE_MASK BIT(30) +#define IATU_REGION_CTRL_NUM_MATCH_EN_MASK BIT(19) +#define IATU_REGION_CTRL_BAR_NUM_MASK GENMASK(10, 8) + +/** + * hl_pci_bars_map() - Map PCI BARs. + * @hdev: Pointer to hl_device structure. + * @name: Array of BAR names. + * @is_wc: Array with flag per BAR whether a write-combined mapping is needed. + * + * Request PCI regions and map them to kernel virtual addresses. + * + * Return: 0 on success, non-zero for failure. + */ +int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], + bool is_wc[3]) +{ + struct pci_dev *pdev = hdev->pdev; + int rc, i, bar; + + rc = pci_request_regions(pdev, HL_NAME); + if (rc) { + dev_err(hdev->dev, "Cannot obtain PCI resources\n"); + return rc; + } + + for (i = 0 ; i < 3 ; i++) { + bar = i * 2; /* 64-bit BARs */ + hdev->pcie_bar[bar] = is_wc[i] ? + pci_ioremap_wc_bar(pdev, bar) : + pci_ioremap_bar(pdev, bar); + if (!hdev->pcie_bar[bar]) { + dev_err(hdev->dev, "pci_ioremap%s_bar failed for %s\n", + is_wc[i] ? "_wc" : "", name[i]); + rc = -ENODEV; + goto err; + } + } + + return 0; + +err: + for (i = 2 ; i >= 0 ; i--) { + bar = i * 2; /* 64-bit BARs */ + if (hdev->pcie_bar[bar]) + iounmap(hdev->pcie_bar[bar]); + } + + pci_release_regions(pdev); + + return rc; +} + +/** + * hl_pci_bars_unmap() - Unmap PCI BARS. + * @hdev: Pointer to hl_device structure. + * + * Release all PCI BARs and unmap their virtual addresses. + */ +static void hl_pci_bars_unmap(struct hl_device *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + int i, bar; + + for (i = 2 ; i >= 0 ; i--) { + bar = i * 2; /* 64-bit BARs */ + iounmap(hdev->pcie_bar[bar]); + } + + pci_release_regions(pdev); +} + +/** + * hl_pci_elbi_write() - Write through the ELBI interface. + * @hdev: Pointer to hl_device structure. + * @addr: Address to write to + * @data: Data to write + * + * Return: 0 on success, negative value for failure. + */ +static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data) +{ + struct pci_dev *pdev = hdev->pdev; + ktime_t timeout; + u64 msec; + u32 val; + + if (hdev->pldm) + msec = HL_PLDM_PCI_ELBI_TIMEOUT_MSEC; + else + msec = HL_PCI_ELBI_TIMEOUT_MSEC; + + /* Clear previous status */ + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0); + + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr); + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data); + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL, + PCI_CONFIG_ELBI_CTRL_WRITE); + + timeout = ktime_add_ms(ktime_get(), msec); + for (;;) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val); + if (val & PCI_CONFIG_ELBI_STS_MASK) + break; + if (ktime_compare(ktime_get(), timeout) > 0) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, + &val); + break; + } + + usleep_range(300, 500); + } + + if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) + return 0; + + if (val & PCI_CONFIG_ELBI_STS_ERR) { + dev_err(hdev->dev, "Error writing to ELBI\n"); + return -EIO; + } + + if (!(val & PCI_CONFIG_ELBI_STS_MASK)) { + dev_err(hdev->dev, "ELBI write didn't finish in time\n"); + return -EIO; + } + + dev_err(hdev->dev, "ELBI write has undefined bits in status\n"); + return -EIO; +} + +/** + * hl_pci_iatu_write() - iatu write routine. + * @hdev: Pointer to hl_device structure. + * @addr: Address to write to + * @data: Data to write + * + * Return: 0 on success, negative value for failure. + */ +int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u32 dbi_offset; + int rc; + + dbi_offset = addr & 0xFFF; + + rc = hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000); + rc |= hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset, + data); + + if (rc) + return -EIO; + + return 0; +} + +/** + * hl_pci_reset_link_through_bridge() - Reset PCI link. + * @hdev: Pointer to hl_device structure. + */ +static void hl_pci_reset_link_through_bridge(struct hl_device *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + struct pci_dev *parent_port; + u16 val; + + parent_port = pdev->bus->self; + pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val); + val |= PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val); + ssleep(1); + + val &= ~(PCI_BRIDGE_CTL_BUS_RESET); + pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val); + ssleep(3); +} + +/** + * hl_pci_set_inbound_region() - Configure inbound region + * @hdev: Pointer to hl_device structure. + * @region: Inbound region number. + * @pci_region: Inbound region parameters. + * + * Configure the iATU inbound region. + * + * Return: 0 on success, negative value for failure. + */ +int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region, + struct hl_inbound_pci_region *pci_region) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 bar_phys_base, region_base, region_end_address; + u32 offset, ctrl_reg_val; + int rc = 0; + + /* region offset */ + offset = (0x200 * region) + 0x100; + + if (pci_region->mode == PCI_ADDRESS_MATCH_MODE) { + bar_phys_base = hdev->pcie_bar_phys[pci_region->bar]; + region_base = bar_phys_base + pci_region->offset_in_bar; + region_end_address = region_base + pci_region->size - 1; + + rc |= hl_pci_iatu_write(hdev, offset + 0x8, + lower_32_bits(region_base)); + rc |= hl_pci_iatu_write(hdev, offset + 0xC, + upper_32_bits(region_base)); + rc |= hl_pci_iatu_write(hdev, offset + 0x10, + lower_32_bits(region_end_address)); + } + + /* Point to the specified address */ + rc = hl_pci_iatu_write(hdev, offset + 0x14, + lower_32_bits(pci_region->addr)); + rc |= hl_pci_iatu_write(hdev, offset + 0x18, + upper_32_bits(pci_region->addr)); + rc |= hl_pci_iatu_write(hdev, offset + 0x0, 0); + + /* Enable + bar/address match + match enable + bar number */ + ctrl_reg_val = FIELD_PREP(IATU_REGION_CTRL_REGION_EN_MASK, 1); + ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_MATCH_MODE_MASK, + pci_region->mode); + ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_NUM_MATCH_EN_MASK, 1); + + if (pci_region->mode == PCI_BAR_MATCH_MODE) + ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_BAR_NUM_MASK, + pci_region->bar); + + rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val); + + /* Return the DBI window to the default location */ + rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); + rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0); + + if (rc) + dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n", + pci_region->bar, pci_region->addr); + + return rc; +} + +/** + * hl_pci_set_outbound_region() - Configure outbound region 0 + * @hdev: Pointer to hl_device structure. + * @pci_region: Outbound region parameters. + * + * Configure the iATU outbound region 0. + * + * Return: 0 on success, negative value for failure. + */ +int hl_pci_set_outbound_region(struct hl_device *hdev, + struct hl_outbound_pci_region *pci_region) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 outbound_region_end_address; + int rc = 0; + + /* Outbound Region 0 */ + outbound_region_end_address = + pci_region->addr + pci_region->size - 1; + rc |= hl_pci_iatu_write(hdev, 0x008, + lower_32_bits(pci_region->addr)); + rc |= hl_pci_iatu_write(hdev, 0x00C, + upper_32_bits(pci_region->addr)); + rc |= hl_pci_iatu_write(hdev, 0x010, + lower_32_bits(outbound_region_end_address)); + rc |= hl_pci_iatu_write(hdev, 0x014, 0); + + if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64)) + rc |= hl_pci_iatu_write(hdev, 0x018, 0x08000000); + else + rc |= hl_pci_iatu_write(hdev, 0x018, 0); + + rc |= hl_pci_iatu_write(hdev, 0x020, + upper_32_bits(outbound_region_end_address)); + /* Increase region size */ + rc |= hl_pci_iatu_write(hdev, 0x000, 0x00002000); + /* Enable */ + rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000); + + /* Return the DBI window to the default location */ + rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); + rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0); + + return rc; +} + +/** + * hl_pci_set_dma_mask() - Set DMA masks for the device. + * @hdev: Pointer to hl_device structure. + * + * This function sets the DMA masks (regular and consistent) for a specified + * value. If it doesn't succeed, it tries to set it to a fall-back value + * + * Return: 0 on success, non-zero for failure. + */ +static int hl_pci_set_dma_mask(struct hl_device *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + int rc; + + /* set DMA mask */ + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask)); + if (rc) { + dev_err(hdev->dev, + "Failed to set pci dma mask to %d bits, error %d\n", + hdev->dma_mask, rc); + return rc; + } + + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask)); + if (rc) { + dev_err(hdev->dev, + "Failed to set pci consistent dma mask to %d bits, error %d\n", + hdev->dma_mask, rc); + return rc; + } + + return 0; +} + +/** + * hl_pci_init() - PCI initialization code. + * @hdev: Pointer to hl_device structure. + * + * Set DMA masks, initialize the PCI controller and map the PCI BARs. + * + * Return: 0 on success, non-zero for failure. + */ +int hl_pci_init(struct hl_device *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + int rc; + + if (hdev->reset_pcilink) + hl_pci_reset_link_through_bridge(hdev); + + rc = pci_enable_device_mem(pdev); + if (rc) { + dev_err(hdev->dev, "can't enable PCI device\n"); + return rc; + } + + pci_set_master(pdev); + + rc = hdev->asic_funcs->pci_bars_map(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize PCI BARs\n"); + goto disable_device; + } + + rc = hdev->asic_funcs->init_iatu(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize iATU\n"); + goto disable_device; + } + + rc = hl_pci_set_dma_mask(hdev); + if (rc) + goto disable_device; + + return 0; + +disable_device: + pci_clear_master(pdev); + pci_disable_device(pdev); + + return rc; +} + +/** + * hl_fw_fini() - PCI finalization code. + * @hdev: Pointer to hl_device structure + * + * Unmap PCI bars and disable PCI device. + */ +void hl_pci_fini(struct hl_device *hdev) +{ + hl_pci_bars_unmap(hdev); + + pci_clear_master(hdev->pdev); + pci_disable_device(hdev->pdev); +} diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c new file mode 100644 index 000000000000..5d78d5e1c782 --- /dev/null +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -0,0 +1,442 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" + +#include + +#define SET_CLK_PKT_TIMEOUT 1000000 /* 1s */ +#define SET_PWR_PKT_TIMEOUT 1000000 /* 1s */ + +long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) +{ + struct armcp_packet pkt; + long result; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + if (curr) + pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + else + pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.pll_index = cpu_to_le32(pll_index); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + SET_CLK_PKT_TIMEOUT, &result); + + if (rc) { + dev_err(hdev->dev, + "Failed to get frequency of PLL %d, error %d\n", + pll_index, rc); + result = rc; + } + + return result; +} + +void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) +{ + struct armcp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.pll_index = cpu_to_le32(pll_index); + pkt.value = cpu_to_le64(freq); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + SET_CLK_PKT_TIMEOUT, NULL); + + if (rc) + dev_err(hdev->dev, + "Failed to set frequency to PLL %d, error %d\n", + pll_index, rc); +} + +u64 hl_get_max_power(struct hl_device *hdev) +{ + struct armcp_packet pkt; + long result; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + SET_PWR_PKT_TIMEOUT, &result); + + if (rc) { + dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); + result = rc; + } + + return result; +} + +void hl_set_max_power(struct hl_device *hdev, u64 value) +{ + struct armcp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.value = cpu_to_le64(value); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + SET_PWR_PKT_TIMEOUT, NULL); + + if (rc) + dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); +} + +static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", hdev->asic_prop.uboot_ver); +} + +static ssize_t armcp_kernel_ver_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s", hdev->asic_prop.armcp_info.kernel_version); +} + +static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.armcp_version); +} + +static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "0x%08x\n", + hdev->asic_prop.armcp_info.cpld_version); +} + +static ssize_t infineon_ver_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "0x%04x\n", + hdev->asic_prop.armcp_info.infineon_version); +} + +static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.fuse_version); +} + +static ssize_t thermal_ver_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s", hdev->asic_prop.armcp_info.thermal_version); +} + +static ssize_t preboot_btl_ver_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", hdev->asic_prop.preboot_ver); +} + +static ssize_t soft_reset_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + long value; + int rc; + + rc = kstrtoul(buf, 0, &value); + + if (rc) { + count = -EINVAL; + goto out; + } + + if (!hdev->supports_soft_reset) { + dev_err(hdev->dev, "Device does not support soft-reset\n"); + goto out; + } + + dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n"); + + hl_device_reset(hdev, false, false); + +out: + return count; +} + +static ssize_t hard_reset_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + long value; + int rc; + + rc = kstrtoul(buf, 0, &value); + + if (rc) { + count = -EINVAL; + goto out; + } + + dev_warn(hdev->dev, "Hard-Reset requested through sysfs\n"); + + hl_device_reset(hdev, true, false); + +out: + return count; +} + +static ssize_t device_type_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + char *str; + + switch (hdev->asic_type) { + case ASIC_GOYA: + str = "GOYA"; + break; + case ASIC_GAUDI: + str = "GAUDI"; + break; + default: + dev_err(hdev->dev, "Unrecognized ASIC type %d\n", + hdev->asic_type); + return -EINVAL; + } + + return sprintf(buf, "%s\n", str); +} + +static ssize_t pci_addr_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%04x:%02x:%02x.%x\n", + pci_domain_nr(hdev->pdev->bus), + hdev->pdev->bus->number, + PCI_SLOT(hdev->pdev->devfn), + PCI_FUNC(hdev->pdev->devfn)); +} + +static ssize_t status_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + char *str; + + if (atomic_read(&hdev->in_reset)) + str = "In reset"; + else if (hdev->disabled) + str = "Malfunction"; + else + str = "Operational"; + + return sprintf(buf, "%s\n", str); +} + +static ssize_t soft_reset_cnt_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", hdev->soft_reset_cnt); +} + +static ssize_t hard_reset_cnt_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", hdev->hard_reset_cnt); +} + +static ssize_t max_power_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + long val; + + if (hl_device_disabled_or_in_reset(hdev)) + return -ENODEV; + + val = hl_get_max_power(hdev); + + return sprintf(buf, "%lu\n", val); +} + +static ssize_t max_power_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + unsigned long value; + int rc; + + if (hl_device_disabled_or_in_reset(hdev)) { + count = -ENODEV; + goto out; + } + + rc = kstrtoul(buf, 0, &value); + + if (rc) { + count = -EINVAL; + goto out; + } + + hdev->max_power = value; + hl_set_max_power(hdev, value); + +out: + return count; +} + +static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t offset, + size_t max_size) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct hl_device *hdev = dev_get_drvdata(dev); + char *data; + int rc; + + if (!max_size) + return -EINVAL; + + data = kzalloc(max_size, GFP_KERNEL); + if (!data) + return -ENOMEM; + + rc = hdev->asic_funcs->get_eeprom_data(hdev, data, max_size); + if (rc) + goto out; + + memcpy(buf, data, max_size); + +out: + kfree(data); + + return max_size; +} + +static DEVICE_ATTR_RO(armcp_kernel_ver); +static DEVICE_ATTR_RO(armcp_ver); +static DEVICE_ATTR_RO(cpld_ver); +static DEVICE_ATTR_RO(device_type); +static DEVICE_ATTR_RO(fuse_ver); +static DEVICE_ATTR_WO(hard_reset); +static DEVICE_ATTR_RO(hard_reset_cnt); +static DEVICE_ATTR_RO(infineon_ver); +static DEVICE_ATTR_RW(max_power); +static DEVICE_ATTR_RO(pci_addr); +static DEVICE_ATTR_RO(preboot_btl_ver); +static DEVICE_ATTR_WO(soft_reset); +static DEVICE_ATTR_RO(soft_reset_cnt); +static DEVICE_ATTR_RO(status); +static DEVICE_ATTR_RO(thermal_ver); +static DEVICE_ATTR_RO(uboot_ver); + +static struct bin_attribute bin_attr_eeprom = { + .attr = {.name = "eeprom", .mode = (0444)}, + .size = PAGE_SIZE, + .read = eeprom_read_handler +}; + +static struct attribute *hl_dev_attrs[] = { + &dev_attr_armcp_kernel_ver.attr, + &dev_attr_armcp_ver.attr, + &dev_attr_cpld_ver.attr, + &dev_attr_device_type.attr, + &dev_attr_fuse_ver.attr, + &dev_attr_hard_reset.attr, + &dev_attr_hard_reset_cnt.attr, + &dev_attr_infineon_ver.attr, + &dev_attr_max_power.attr, + &dev_attr_pci_addr.attr, + &dev_attr_preboot_btl_ver.attr, + &dev_attr_soft_reset.attr, + &dev_attr_soft_reset_cnt.attr, + &dev_attr_status.attr, + &dev_attr_thermal_ver.attr, + &dev_attr_uboot_ver.attr, + NULL, +}; + +static struct bin_attribute *hl_dev_bin_attrs[] = { + &bin_attr_eeprom, + NULL +}; + +static struct attribute_group hl_dev_attr_group = { + .attrs = hl_dev_attrs, + .bin_attrs = hl_dev_bin_attrs, +}; + +static struct attribute_group hl_dev_clks_attr_group; + +static const struct attribute_group *hl_dev_attr_groups[] = { + &hl_dev_attr_group, + &hl_dev_clks_attr_group, + NULL, +}; + +int hl_sysfs_init(struct hl_device *hdev) +{ + int rc; + + if (hdev->asic_type == ASIC_GOYA) + hdev->pm_mng_profile = PM_AUTO; + else + hdev->pm_mng_profile = PM_MANUAL; + hdev->max_power = hdev->asic_prop.max_power_default; + + hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group); + + rc = device_add_groups(hdev->dev, hl_dev_attr_groups); + if (rc) { + dev_err(hdev->dev, + "Failed to add groups to device, error %d\n", rc); + return rc; + } + + return 0; +} + +void hl_sysfs_fini(struct hl_device *hdev) +{ + device_remove_groups(hdev->dev, hl_dev_attr_groups); +} diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c deleted file mode 100644 index 1e3e5b19ecd9..000000000000 --- a/drivers/misc/habanalabs/context.c +++ /dev/null @@ -1,237 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2016-2019 HabanaLabs, Ltd. - * All Rights Reserved. - */ - -#include "habanalabs.h" - -#include - -static void hl_ctx_fini(struct hl_ctx *ctx) -{ - struct hl_device *hdev = ctx->hdev; - int i; - - /* - * If we arrived here, there are no jobs waiting for this context - * on its queues so we can safely remove it. - * This is because for each CS, we increment the ref count and for - * every CS that was finished we decrement it and we won't arrive - * to this function unless the ref count is 0 - */ - - for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++) - dma_fence_put(ctx->cs_pending[i]); - - kfree(ctx->cs_pending); - - if (ctx->asid != HL_KERNEL_ASID_ID) { - /* The engines are stopped as there is no executing CS, but the - * Coresight might be still working by accessing addresses - * related to the stopped engines. Hence stop it explicitly. - * Stop only if this is the compute context, as there can be - * only one compute context - */ - if ((hdev->in_debug) && (hdev->compute_ctx == ctx)) - hl_device_set_debug_mode(hdev, false); - - hl_vm_ctx_fini(ctx); - hl_asid_free(hdev, ctx->asid); - } else { - hl_mmu_ctx_fini(ctx); - } -} - -void hl_ctx_do_release(struct kref *ref) -{ - struct hl_ctx *ctx; - - ctx = container_of(ref, struct hl_ctx, refcount); - - hl_ctx_fini(ctx); - - if (ctx->hpriv) - hl_hpriv_put(ctx->hpriv); - - kfree(ctx); -} - -int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv) -{ - struct hl_ctx_mgr *mgr = &hpriv->ctx_mgr; - struct hl_ctx *ctx; - int rc; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) { - rc = -ENOMEM; - goto out_err; - } - - mutex_lock(&mgr->ctx_lock); - rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL); - mutex_unlock(&mgr->ctx_lock); - - if (rc < 0) { - dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n"); - goto free_ctx; - } - - ctx->handle = rc; - - rc = hl_ctx_init(hdev, ctx, false); - if (rc) - goto remove_from_idr; - - hl_hpriv_get(hpriv); - ctx->hpriv = hpriv; - - /* TODO: remove for multiple contexts per process */ - hpriv->ctx = ctx; - - /* TODO: remove the following line for multiple process support */ - hdev->compute_ctx = ctx; - - return 0; - -remove_from_idr: - mutex_lock(&mgr->ctx_lock); - idr_remove(&mgr->ctx_handles, ctx->handle); - mutex_unlock(&mgr->ctx_lock); -free_ctx: - kfree(ctx); -out_err: - return rc; -} - -void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx) -{ - if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1) - return; - - dev_warn(hdev->dev, - "user process released device but its command submissions are still executing\n"); -} - -int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) -{ - int rc = 0; - - ctx->hdev = hdev; - - kref_init(&ctx->refcount); - - ctx->cs_sequence = 1; - spin_lock_init(&ctx->cs_lock); - atomic_set(&ctx->thread_ctx_switch_token, 1); - ctx->thread_ctx_switch_wait_token = 0; - ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs, - sizeof(struct dma_fence *), - GFP_KERNEL); - if (!ctx->cs_pending) - return -ENOMEM; - - if (is_kernel_ctx) { - ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */ - rc = hl_mmu_ctx_init(ctx); - if (rc) { - dev_err(hdev->dev, "Failed to init mmu ctx module\n"); - goto mem_ctx_err; - } - } else { - ctx->asid = hl_asid_alloc(hdev); - if (!ctx->asid) { - dev_err(hdev->dev, "No free ASID, failed to create context\n"); - return -ENOMEM; - } - - rc = hl_vm_ctx_init(ctx); - if (rc) { - dev_err(hdev->dev, "Failed to init mem ctx module\n"); - rc = -ENOMEM; - goto mem_ctx_err; - } - } - - return 0; - -mem_ctx_err: - if (ctx->asid != HL_KERNEL_ASID_ID) - hl_asid_free(hdev, ctx->asid); - - return rc; -} - -void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx) -{ - kref_get(&ctx->refcount); -} - -int hl_ctx_put(struct hl_ctx *ctx) -{ - return kref_put(&ctx->refcount, hl_ctx_do_release); -} - -struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq) -{ - struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop; - struct dma_fence *fence; - - spin_lock(&ctx->cs_lock); - - if (seq >= ctx->cs_sequence) { - spin_unlock(&ctx->cs_lock); - return ERR_PTR(-EINVAL); - } - - if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) { - spin_unlock(&ctx->cs_lock); - return NULL; - } - - fence = dma_fence_get( - ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]); - spin_unlock(&ctx->cs_lock); - - return fence; -} - -/* - * hl_ctx_mgr_init - initialize the context manager - * - * @mgr: pointer to context manager structure - * - * This manager is an object inside the hpriv object of the user process. - * The function is called when a user process opens the FD. - */ -void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr) -{ - mutex_init(&mgr->ctx_lock); - idr_init(&mgr->ctx_handles); -} - -/* - * hl_ctx_mgr_fini - finalize the context manager - * - * @hdev: pointer to device structure - * @mgr: pointer to context manager structure - * - * This function goes over all the contexts in the manager and frees them. - * It is called when a process closes the FD. - */ -void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr) -{ - struct hl_ctx *ctx; - struct idr *idp; - u32 id; - - idp = &mgr->ctx_handles; - - idr_for_each_entry(idp, ctx, id) - hl_ctx_free(hdev, ctx); - - idr_destroy(&mgr->ctx_handles); - mutex_destroy(&mgr->ctx_lock); -} diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c deleted file mode 100644 index fc4372c18ce2..000000000000 --- a/drivers/misc/habanalabs/debugfs.c +++ /dev/null @@ -1,1411 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2016-2019 HabanaLabs, Ltd. - * All Rights Reserved. - */ - -#include "habanalabs.h" -#include "include/hw_ip/mmu/mmu_general.h" - -#include -#include -#include - -#define MMU_ADDR_BUF_SIZE 40 -#define MMU_ASID_BUF_SIZE 10 -#define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE) - -static struct dentry *hl_debug_root; - -static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, - u8 i2c_reg, u32 *val) -{ - struct armcp_packet pkt; - int rc; - - if (hl_device_disabled_or_in_reset(hdev)) - return -EBUSY; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_RD << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.i2c_bus = i2c_bus; - pkt.i2c_addr = i2c_addr; - pkt.i2c_reg = i2c_reg; - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, (long *) val); - - if (rc) - dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc); - - return rc; -} - -static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, - u8 i2c_reg, u32 val) -{ - struct armcp_packet pkt; - int rc; - - if (hl_device_disabled_or_in_reset(hdev)) - return -EBUSY; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_WR << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.i2c_bus = i2c_bus; - pkt.i2c_addr = i2c_addr; - pkt.i2c_reg = i2c_reg; - pkt.value = cpu_to_le64(val); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, NULL); - - if (rc) - dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc); - - return rc; -} - -static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) -{ - struct armcp_packet pkt; - int rc; - - if (hl_device_disabled_or_in_reset(hdev)) - return; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_LED_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.led_index = cpu_to_le32(led); - pkt.value = cpu_to_le64(state); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, NULL); - - if (rc) - dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc); -} - -static int command_buffers_show(struct seq_file *s, void *data) -{ - struct hl_debugfs_entry *entry = s->private; - struct hl_dbg_device_entry *dev_entry = entry->dev_entry; - struct hl_cb *cb; - bool first = true; - - spin_lock(&dev_entry->cb_spinlock); - - list_for_each_entry(cb, &dev_entry->cb_list, debugfs_list) { - if (first) { - first = false; - seq_puts(s, "\n"); - seq_puts(s, " CB ID CTX ID CB size CB RefCnt mmap? CS counter\n"); - seq_puts(s, "---------------------------------------------------------------\n"); - } - seq_printf(s, - " %03d %d 0x%08x %d %d %d\n", - cb->id, cb->ctx_id, cb->size, - kref_read(&cb->refcount), - cb->mmap, cb->cs_cnt); - } - - spin_unlock(&dev_entry->cb_spinlock); - - if (!first) - seq_puts(s, "\n"); - - return 0; -} - -static int command_submission_show(struct seq_file *s, void *data) -{ - struct hl_debugfs_entry *entry = s->private; - struct hl_dbg_device_entry *dev_entry = entry->dev_entry; - struct hl_cs *cs; - bool first = true; - - spin_lock(&dev_entry->cs_spinlock); - - list_for_each_entry(cs, &dev_entry->cs_list, debugfs_list) { - if (first) { - first = false; - seq_puts(s, "\n"); - seq_puts(s, " CS ID CTX ASID CS RefCnt Submitted Completed\n"); - seq_puts(s, "------------------------------------------------------\n"); - } - seq_printf(s, - " %llu %d %d %d %d\n", - cs->sequence, cs->ctx->asid, - kref_read(&cs->refcount), - cs->submitted, cs->completed); - } - - spin_unlock(&dev_entry->cs_spinlock); - - if (!first) - seq_puts(s, "\n"); - - return 0; -} - -static int command_submission_jobs_show(struct seq_file *s, void *data) -{ - struct hl_debugfs_entry *entry = s->private; - struct hl_dbg_device_entry *dev_entry = entry->dev_entry; - struct hl_cs_job *job; - bool first = true; - - spin_lock(&dev_entry->cs_job_spinlock); - - list_for_each_entry(job, &dev_entry->cs_job_list, debugfs_list) { - if (first) { - first = false; - seq_puts(s, "\n"); - seq_puts(s, " JOB ID CS ID CTX ASID H/W Queue\n"); - seq_puts(s, "---------------------------------------\n"); - } - if (job->cs) - seq_printf(s, - " %02d %llu %d %d\n", - job->id, job->cs->sequence, job->cs->ctx->asid, - job->hw_queue_id); - else - seq_printf(s, - " %02d 0 %d %d\n", - job->id, HL_KERNEL_ASID_ID, job->hw_queue_id); - } - - spin_unlock(&dev_entry->cs_job_spinlock); - - if (!first) - seq_puts(s, "\n"); - - return 0; -} - -static int userptr_show(struct seq_file *s, void *data) -{ - struct hl_debugfs_entry *entry = s->private; - struct hl_dbg_device_entry *dev_entry = entry->dev_entry; - struct hl_userptr *userptr; - char dma_dir[4][30] = {"DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", - "DMA_FROM_DEVICE", "DMA_NONE"}; - bool first = true; - - spin_lock(&dev_entry->userptr_spinlock); - - list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) { - if (first) { - first = false; - seq_puts(s, "\n"); - seq_puts(s, " user virtual address size dma dir\n"); - seq_puts(s, "----------------------------------------------------------\n"); - } - seq_printf(s, - " 0x%-14llx %-10u %-30s\n", - userptr->addr, userptr->size, dma_dir[userptr->dir]); - } - - spin_unlock(&dev_entry->userptr_spinlock); - - if (!first) - seq_puts(s, "\n"); - - return 0; -} - -static int vm_show(struct seq_file *s, void *data) -{ - struct hl_debugfs_entry *entry = s->private; - struct hl_dbg_device_entry *dev_entry = entry->dev_entry; - struct hl_ctx *ctx; - struct hl_vm *vm; - struct hl_vm_hash_node *hnode; - struct hl_userptr *userptr; - struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; - enum vm_type_t *vm_type; - bool once = true; - u64 j; - int i; - - if (!dev_entry->hdev->mmu_enable) - return 0; - - spin_lock(&dev_entry->ctx_mem_hash_spinlock); - - list_for_each_entry(ctx, &dev_entry->ctx_mem_hash_list, debugfs_list) { - once = false; - seq_puts(s, "\n\n----------------------------------------------------"); - seq_puts(s, "\n----------------------------------------------------\n\n"); - seq_printf(s, "ctx asid: %u\n", ctx->asid); - - seq_puts(s, "\nmappings:\n\n"); - seq_puts(s, " virtual address size handle\n"); - seq_puts(s, "----------------------------------------------------\n"); - mutex_lock(&ctx->mem_hash_lock); - hash_for_each(ctx->mem_hash, i, hnode, node) { - vm_type = hnode->ptr; - - if (*vm_type == VM_TYPE_USERPTR) { - userptr = hnode->ptr; - seq_printf(s, - " 0x%-14llx %-10u\n", - hnode->vaddr, userptr->size); - } else { - phys_pg_pack = hnode->ptr; - seq_printf(s, - " 0x%-14llx %-10llu %-4u\n", - hnode->vaddr, phys_pg_pack->total_size, - phys_pg_pack->handle); - } - } - mutex_unlock(&ctx->mem_hash_lock); - - vm = &ctx->hdev->vm; - spin_lock(&vm->idr_lock); - - if (!idr_is_empty(&vm->phys_pg_pack_handles)) - seq_puts(s, "\n\nallocations:\n"); - - idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_pack, i) { - if (phys_pg_pack->asid != ctx->asid) - continue; - - seq_printf(s, "\nhandle: %u\n", phys_pg_pack->handle); - seq_printf(s, "page size: %u\n\n", - phys_pg_pack->page_size); - seq_puts(s, " physical address\n"); - seq_puts(s, "---------------------\n"); - for (j = 0 ; j < phys_pg_pack->npages ; j++) { - seq_printf(s, " 0x%-14llx\n", - phys_pg_pack->pages[j]); - } - } - spin_unlock(&vm->idr_lock); - - } - - spin_unlock(&dev_entry->ctx_mem_hash_spinlock); - - if (!once) - seq_puts(s, "\n"); - - return 0; -} - -/* these inline functions are copied from mmu.c */ -static inline u64 get_hop0_addr(struct hl_ctx *ctx) -{ - return ctx->hdev->asic_prop.mmu_pgt_addr + - (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); -} - -static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr, u64 mask, u64 shift) -{ - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & mask) >> shift); -} - -static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_specs, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask, - mmu_specs->hop0_shift); -} - -static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_specs, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask, - mmu_specs->hop1_shift); -} - -static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_specs, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask, - mmu_specs->hop2_shift); -} - -static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_specs, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask, - mmu_specs->hop3_shift); -} - -static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_specs, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask, - mmu_specs->hop4_shift); -} - -static inline u64 get_next_hop_addr(u64 curr_pte) -{ - if (curr_pte & PAGE_PRESENT_MASK) - return curr_pte & HOP_PHYS_ADDR_MASK; - else - return ULLONG_MAX; -} - -static int mmu_show(struct seq_file *s, void *data) -{ - struct hl_debugfs_entry *entry = s->private; - struct hl_dbg_device_entry *dev_entry = entry->dev_entry; - struct hl_device *hdev = dev_entry->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct hl_mmu_properties *mmu_prop; - struct hl_ctx *ctx; - bool is_dram_addr; - - u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0, - hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0, - hop2_addr = 0, hop2_pte_addr = 0, hop2_pte = 0, - hop3_addr = 0, hop3_pte_addr = 0, hop3_pte = 0, - hop4_addr = 0, hop4_pte_addr = 0, hop4_pte = 0, - virt_addr = dev_entry->mmu_addr; - - if (!hdev->mmu_enable) - return 0; - - if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID) - ctx = hdev->kernel_ctx; - else - ctx = hdev->compute_ctx; - - if (!ctx) { - dev_err(hdev->dev, "no ctx available\n"); - return 0; - } - - is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->dmmu.start_addr, - prop->dmmu.end_addr); - - /* shifts and masks are the same in PMMU and HPMMU, use one of them */ - mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; - - mutex_lock(&ctx->mmu_lock); - - /* the following lookup is copied from unmap() in mmu.c */ - - hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); - hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr); - hop1_addr = get_next_hop_addr(hop0_pte); - - if (hop1_addr == ULLONG_MAX) - goto not_mapped; - - hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); - hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr); - hop2_addr = get_next_hop_addr(hop1_pte); - - if (hop2_addr == ULLONG_MAX) - goto not_mapped; - - hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); - hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr); - hop3_addr = get_next_hop_addr(hop2_pte); - - if (hop3_addr == ULLONG_MAX) - goto not_mapped; - - hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); - hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr); - - if (!(hop3_pte & LAST_MASK)) { - hop4_addr = get_next_hop_addr(hop3_pte); - - if (hop4_addr == ULLONG_MAX) - goto not_mapped; - - hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, - virt_addr); - hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr); - if (!(hop4_pte & PAGE_PRESENT_MASK)) - goto not_mapped; - } else { - if (!(hop3_pte & PAGE_PRESENT_MASK)) - goto not_mapped; - } - - seq_printf(s, "asid: %u, virt_addr: 0x%llx\n", - dev_entry->mmu_asid, dev_entry->mmu_addr); - - seq_printf(s, "hop0_addr: 0x%llx\n", hop0_addr); - seq_printf(s, "hop0_pte_addr: 0x%llx\n", hop0_pte_addr); - seq_printf(s, "hop0_pte: 0x%llx\n", hop0_pte); - - seq_printf(s, "hop1_addr: 0x%llx\n", hop1_addr); - seq_printf(s, "hop1_pte_addr: 0x%llx\n", hop1_pte_addr); - seq_printf(s, "hop1_pte: 0x%llx\n", hop1_pte); - - seq_printf(s, "hop2_addr: 0x%llx\n", hop2_addr); - seq_printf(s, "hop2_pte_addr: 0x%llx\n", hop2_pte_addr); - seq_printf(s, "hop2_pte: 0x%llx\n", hop2_pte); - - seq_printf(s, "hop3_addr: 0x%llx\n", hop3_addr); - seq_printf(s, "hop3_pte_addr: 0x%llx\n", hop3_pte_addr); - seq_printf(s, "hop3_pte: 0x%llx\n", hop3_pte); - - if (!(hop3_pte & LAST_MASK)) { - seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr); - seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr); - seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte); - } - - goto out; - -not_mapped: - dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", - virt_addr); -out: - mutex_unlock(&ctx->mmu_lock); - - return 0; -} - -static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf, - size_t count, loff_t *f_pos) -{ - struct seq_file *s = file->private_data; - struct hl_debugfs_entry *entry = s->private; - struct hl_dbg_device_entry *dev_entry = entry->dev_entry; - struct hl_device *hdev = dev_entry->hdev; - char kbuf[MMU_KBUF_SIZE]; - char *c; - ssize_t rc; - - if (!hdev->mmu_enable) - return count; - - if (count > sizeof(kbuf) - 1) - goto err; - if (copy_from_user(kbuf, buf, count)) - goto err; - kbuf[count] = 0; - - c = strchr(kbuf, ' '); - if (!c) - goto err; - *c = '\0'; - - rc = kstrtouint(kbuf, 10, &dev_entry->mmu_asid); - if (rc) - goto err; - - if (strncmp(c+1, "0x", 2)) - goto err; - rc = kstrtoull(c+3, 16, &dev_entry->mmu_addr); - if (rc) - goto err; - - return count; - -err: - dev_err(hdev->dev, "usage: echo <0xaddr> > mmu\n"); - - return -EINVAL; -} - -static int engines_show(struct seq_file *s, void *data) -{ - struct hl_debugfs_entry *entry = s->private; - struct hl_dbg_device_entry *dev_entry = entry->dev_entry; - struct hl_device *hdev = dev_entry->hdev; - - if (atomic_read(&hdev->in_reset)) { - dev_warn_ratelimited(hdev->dev, - "Can't check device idle during reset\n"); - return 0; - } - - hdev->asic_funcs->is_device_idle(hdev, NULL, s); - - return 0; -} - -static bool hl_is_device_va(struct hl_device *hdev, u64 addr) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - - if (!hdev->mmu_enable) - goto out; - - if (hdev->dram_supports_virtual_memory && - (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr)) - return true; - - if (addr >= prop->pmmu.start_addr && - addr < prop->pmmu.end_addr) - return true; - - if (addr >= prop->pmmu_huge.start_addr && - addr < prop->pmmu_huge.end_addr) - return true; -out: - return false; -} - -static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, - u64 *phys_addr) -{ - struct hl_ctx *ctx = hdev->compute_ctx; - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct hl_mmu_properties *mmu_prop; - u64 hop_addr, hop_pte_addr, hop_pte; - u64 offset_mask = HOP4_MASK | FLAGS_MASK; - int rc = 0; - bool is_dram_addr; - - if (!ctx) { - dev_err(hdev->dev, "no ctx available\n"); - return -EINVAL; - } - - is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->dmmu.start_addr, - prop->dmmu.end_addr); - - /* shifts and masks are the same in PMMU and HPMMU, use one of them */ - mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; - - mutex_lock(&ctx->mmu_lock); - - /* hop 0 */ - hop_addr = get_hop0_addr(ctx); - hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - - /* hop 1 */ - hop_addr = get_next_hop_addr(hop_pte); - if (hop_addr == ULLONG_MAX) - goto not_mapped; - hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - - /* hop 2 */ - hop_addr = get_next_hop_addr(hop_pte); - if (hop_addr == ULLONG_MAX) - goto not_mapped; - hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - - /* hop 3 */ - hop_addr = get_next_hop_addr(hop_pte); - if (hop_addr == ULLONG_MAX) - goto not_mapped; - hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - - if (!(hop_pte & LAST_MASK)) { - /* hop 4 */ - hop_addr = get_next_hop_addr(hop_pte); - if (hop_addr == ULLONG_MAX) - goto not_mapped; - hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr, - virt_addr); - hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - - offset_mask = FLAGS_MASK; - } - - if (!(hop_pte & PAGE_PRESENT_MASK)) - goto not_mapped; - - *phys_addr = (hop_pte & ~offset_mask) | (virt_addr & offset_mask); - - goto out; - -not_mapped: - dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", - virt_addr); - rc = -EINVAL; -out: - mutex_unlock(&ctx->mmu_lock); - return rc; -} - -static ssize_t hl_data_read32(struct file *f, char __user *buf, - size_t count, loff_t *ppos) -{ - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - char tmp_buf[32]; - u64 addr = entry->addr; - u32 val; - ssize_t rc; - - if (atomic_read(&hdev->in_reset)) { - dev_warn_ratelimited(hdev->dev, "Can't read during reset\n"); - return 0; - } - - if (*ppos) - return 0; - - if (hl_is_device_va(hdev, addr)) { - rc = device_va_to_pa(hdev, addr, &addr); - if (rc) - return rc; - } - - rc = hdev->asic_funcs->debugfs_read32(hdev, addr, &val); - if (rc) { - dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); - return rc; - } - - sprintf(tmp_buf, "0x%08x\n", val); - return simple_read_from_buffer(buf, count, ppos, tmp_buf, - strlen(tmp_buf)); -} - -static ssize_t hl_data_write32(struct file *f, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - u64 addr = entry->addr; - u32 value; - ssize_t rc; - - if (atomic_read(&hdev->in_reset)) { - dev_warn_ratelimited(hdev->dev, "Can't write during reset\n"); - return 0; - } - - rc = kstrtouint_from_user(buf, count, 16, &value); - if (rc) - return rc; - - if (hl_is_device_va(hdev, addr)) { - rc = device_va_to_pa(hdev, addr, &addr); - if (rc) - return rc; - } - - rc = hdev->asic_funcs->debugfs_write32(hdev, addr, value); - if (rc) { - dev_err(hdev->dev, "Failed to write 0x%08x to 0x%010llx\n", - value, addr); - return rc; - } - - return count; -} - -static ssize_t hl_data_read64(struct file *f, char __user *buf, - size_t count, loff_t *ppos) -{ - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - char tmp_buf[32]; - u64 addr = entry->addr; - u64 val; - ssize_t rc; - - if (*ppos) - return 0; - - if (hl_is_device_va(hdev, addr)) { - rc = device_va_to_pa(hdev, addr, &addr); - if (rc) - return rc; - } - - rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val); - if (rc) { - dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); - return rc; - } - - sprintf(tmp_buf, "0x%016llx\n", val); - return simple_read_from_buffer(buf, count, ppos, tmp_buf, - strlen(tmp_buf)); -} - -static ssize_t hl_data_write64(struct file *f, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - u64 addr = entry->addr; - u64 value; - ssize_t rc; - - rc = kstrtoull_from_user(buf, count, 16, &value); - if (rc) - return rc; - - if (hl_is_device_va(hdev, addr)) { - rc = device_va_to_pa(hdev, addr, &addr); - if (rc) - return rc; - } - - rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value); - if (rc) { - dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n", - value, addr); - return rc; - } - - return count; -} - -static ssize_t hl_get_power_state(struct file *f, char __user *buf, - size_t count, loff_t *ppos) -{ - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - char tmp_buf[200]; - int i; - - if (*ppos) - return 0; - - if (hdev->pdev->current_state == PCI_D0) - i = 1; - else if (hdev->pdev->current_state == PCI_D3hot) - i = 2; - else - i = 3; - - sprintf(tmp_buf, - "current power state: %d\n1 - D0\n2 - D3hot\n3 - Unknown\n", i); - return simple_read_from_buffer(buf, count, ppos, tmp_buf, - strlen(tmp_buf)); -} - -static ssize_t hl_set_power_state(struct file *f, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - u32 value; - ssize_t rc; - - rc = kstrtouint_from_user(buf, count, 10, &value); - if (rc) - return rc; - - if (value == 1) { - pci_set_power_state(hdev->pdev, PCI_D0); - pci_restore_state(hdev->pdev); - rc = pci_enable_device(hdev->pdev); - } else if (value == 2) { - pci_save_state(hdev->pdev); - pci_disable_device(hdev->pdev); - pci_set_power_state(hdev->pdev, PCI_D3hot); - } else { - dev_dbg(hdev->dev, "invalid power state value %u\n", value); - return -EINVAL; - } - - return count; -} - -static ssize_t hl_i2c_data_read(struct file *f, char __user *buf, - size_t count, loff_t *ppos) -{ - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - char tmp_buf[32]; - u32 val; - ssize_t rc; - - if (*ppos) - return 0; - - rc = hl_debugfs_i2c_read(hdev, entry->i2c_bus, entry->i2c_addr, - entry->i2c_reg, &val); - if (rc) { - dev_err(hdev->dev, - "Failed to read from I2C bus %d, addr %d, reg %d\n", - entry->i2c_bus, entry->i2c_addr, entry->i2c_reg); - return rc; - } - - sprintf(tmp_buf, "0x%02x\n", val); - rc = simple_read_from_buffer(buf, count, ppos, tmp_buf, - strlen(tmp_buf)); - - return rc; -} - -static ssize_t hl_i2c_data_write(struct file *f, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - u32 value; - ssize_t rc; - - rc = kstrtouint_from_user(buf, count, 16, &value); - if (rc) - return rc; - - rc = hl_debugfs_i2c_write(hdev, entry->i2c_bus, entry->i2c_addr, - entry->i2c_reg, value); - if (rc) { - dev_err(hdev->dev, - "Failed to write 0x%02x to I2C bus %d, addr %d, reg %d\n", - value, entry->i2c_bus, entry->i2c_addr, entry->i2c_reg); - return rc; - } - - return count; -} - -static ssize_t hl_led0_write(struct file *f, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - u32 value; - ssize_t rc; - - rc = kstrtouint_from_user(buf, count, 10, &value); - if (rc) - return rc; - - value = value ? 1 : 0; - - hl_debugfs_led_set(hdev, 0, value); - - return count; -} - -static ssize_t hl_led1_write(struct file *f, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - u32 value; - ssize_t rc; - - rc = kstrtouint_from_user(buf, count, 10, &value); - if (rc) - return rc; - - value = value ? 1 : 0; - - hl_debugfs_led_set(hdev, 1, value); - - return count; -} - -static ssize_t hl_led2_write(struct file *f, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - u32 value; - ssize_t rc; - - rc = kstrtouint_from_user(buf, count, 10, &value); - if (rc) - return rc; - - value = value ? 1 : 0; - - hl_debugfs_led_set(hdev, 2, value); - - return count; -} - -static ssize_t hl_device_read(struct file *f, char __user *buf, - size_t count, loff_t *ppos) -{ - static const char *help = - "Valid values: disable, enable, suspend, resume, cpu_timeout\n"; - return simple_read_from_buffer(buf, count, ppos, help, strlen(help)); -} - -static ssize_t hl_device_write(struct file *f, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - char data[30] = {0}; - - /* don't allow partial writes */ - if (*ppos != 0) - return 0; - - simple_write_to_buffer(data, 29, ppos, buf, count); - - if (strncmp("disable", data, strlen("disable")) == 0) { - hdev->disabled = true; - } else if (strncmp("enable", data, strlen("enable")) == 0) { - hdev->disabled = false; - } else if (strncmp("suspend", data, strlen("suspend")) == 0) { - hdev->asic_funcs->suspend(hdev); - } else if (strncmp("resume", data, strlen("resume")) == 0) { - hdev->asic_funcs->resume(hdev); - } else if (strncmp("cpu_timeout", data, strlen("cpu_timeout")) == 0) { - hdev->device_cpu_disabled = true; - } else { - dev_err(hdev->dev, - "Valid values: disable, enable, suspend, resume, cpu_timeout\n"); - count = -EINVAL; - } - - return count; -} - -static ssize_t hl_clk_gate_read(struct file *f, char __user *buf, - size_t count, loff_t *ppos) -{ - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - char tmp_buf[200]; - ssize_t rc; - - if (*ppos) - return 0; - - sprintf(tmp_buf, "%d\n", hdev->clock_gating); - rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, - strlen(tmp_buf) + 1); - - return rc; -} - -static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - u32 value; - ssize_t rc; - - if (atomic_read(&hdev->in_reset)) { - dev_warn_ratelimited(hdev->dev, - "Can't change clock gating during reset\n"); - return 0; - } - - rc = kstrtouint_from_user(buf, count, 10, &value); - if (rc) - return rc; - - if (value) { - hdev->clock_gating = 1; - if (hdev->asic_funcs->enable_clock_gating) - hdev->asic_funcs->enable_clock_gating(hdev); - } else { - if (hdev->asic_funcs->disable_clock_gating) - hdev->asic_funcs->disable_clock_gating(hdev); - hdev->clock_gating = 0; - } - - return count; -} - -static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf, - size_t count, loff_t *ppos) -{ - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - char tmp_buf[200]; - ssize_t rc; - - if (*ppos) - return 0; - - sprintf(tmp_buf, "%d\n", hdev->stop_on_err); - rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, - strlen(tmp_buf) + 1); - - return rc; -} - -static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - u32 value; - ssize_t rc; - - if (atomic_read(&hdev->in_reset)) { - dev_warn_ratelimited(hdev->dev, - "Can't change stop on error during reset\n"); - return 0; - } - - rc = kstrtouint_from_user(buf, count, 10, &value); - if (rc) - return rc; - - hdev->stop_on_err = value ? 1 : 0; - - hl_device_reset(hdev, false, false); - - return count; -} - -static const struct file_operations hl_data32b_fops = { - .owner = THIS_MODULE, - .read = hl_data_read32, - .write = hl_data_write32 -}; - -static const struct file_operations hl_data64b_fops = { - .owner = THIS_MODULE, - .read = hl_data_read64, - .write = hl_data_write64 -}; - -static const struct file_operations hl_i2c_data_fops = { - .owner = THIS_MODULE, - .read = hl_i2c_data_read, - .write = hl_i2c_data_write -}; - -static const struct file_operations hl_power_fops = { - .owner = THIS_MODULE, - .read = hl_get_power_state, - .write = hl_set_power_state -}; - -static const struct file_operations hl_led0_fops = { - .owner = THIS_MODULE, - .write = hl_led0_write -}; - -static const struct file_operations hl_led1_fops = { - .owner = THIS_MODULE, - .write = hl_led1_write -}; - -static const struct file_operations hl_led2_fops = { - .owner = THIS_MODULE, - .write = hl_led2_write -}; - -static const struct file_operations hl_device_fops = { - .owner = THIS_MODULE, - .read = hl_device_read, - .write = hl_device_write -}; - -static const struct file_operations hl_clk_gate_fops = { - .owner = THIS_MODULE, - .read = hl_clk_gate_read, - .write = hl_clk_gate_write -}; - -static const struct file_operations hl_stop_on_err_fops = { - .owner = THIS_MODULE, - .read = hl_stop_on_err_read, - .write = hl_stop_on_err_write -}; - -static const struct hl_info_list hl_debugfs_list[] = { - {"command_buffers", command_buffers_show, NULL}, - {"command_submission", command_submission_show, NULL}, - {"command_submission_jobs", command_submission_jobs_show, NULL}, - {"userptr", userptr_show, NULL}, - {"vm", vm_show, NULL}, - {"mmu", mmu_show, mmu_asid_va_write}, - {"engines", engines_show, NULL} -}; - -static int hl_debugfs_open(struct inode *inode, struct file *file) -{ - struct hl_debugfs_entry *node = inode->i_private; - - return single_open(file, node->info_ent->show, node); -} - -static ssize_t hl_debugfs_write(struct file *file, const char __user *buf, - size_t count, loff_t *f_pos) -{ - struct hl_debugfs_entry *node = file->f_inode->i_private; - - if (node->info_ent->write) - return node->info_ent->write(file, buf, count, f_pos); - else - return -EINVAL; - -} - -static const struct file_operations hl_debugfs_fops = { - .owner = THIS_MODULE, - .open = hl_debugfs_open, - .read = seq_read, - .write = hl_debugfs_write, - .llseek = seq_lseek, - .release = single_release, -}; - -void hl_debugfs_add_device(struct hl_device *hdev) -{ - struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; - int count = ARRAY_SIZE(hl_debugfs_list); - struct hl_debugfs_entry *entry; - struct dentry *ent; - int i; - - dev_entry->hdev = hdev; - dev_entry->entry_arr = kmalloc_array(count, - sizeof(struct hl_debugfs_entry), - GFP_KERNEL); - if (!dev_entry->entry_arr) - return; - - INIT_LIST_HEAD(&dev_entry->file_list); - INIT_LIST_HEAD(&dev_entry->cb_list); - INIT_LIST_HEAD(&dev_entry->cs_list); - INIT_LIST_HEAD(&dev_entry->cs_job_list); - INIT_LIST_HEAD(&dev_entry->userptr_list); - INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list); - mutex_init(&dev_entry->file_mutex); - spin_lock_init(&dev_entry->cb_spinlock); - spin_lock_init(&dev_entry->cs_spinlock); - spin_lock_init(&dev_entry->cs_job_spinlock); - spin_lock_init(&dev_entry->userptr_spinlock); - spin_lock_init(&dev_entry->ctx_mem_hash_spinlock); - - dev_entry->root = debugfs_create_dir(dev_name(hdev->dev), - hl_debug_root); - - debugfs_create_x64("addr", - 0644, - dev_entry->root, - &dev_entry->addr); - - debugfs_create_file("data32", - 0644, - dev_entry->root, - dev_entry, - &hl_data32b_fops); - - debugfs_create_file("data64", - 0644, - dev_entry->root, - dev_entry, - &hl_data64b_fops); - - debugfs_create_file("set_power_state", - 0200, - dev_entry->root, - dev_entry, - &hl_power_fops); - - debugfs_create_u8("i2c_bus", - 0644, - dev_entry->root, - &dev_entry->i2c_bus); - - debugfs_create_u8("i2c_addr", - 0644, - dev_entry->root, - &dev_entry->i2c_addr); - - debugfs_create_u8("i2c_reg", - 0644, - dev_entry->root, - &dev_entry->i2c_reg); - - debugfs_create_file("i2c_data", - 0644, - dev_entry->root, - dev_entry, - &hl_i2c_data_fops); - - debugfs_create_file("led0", - 0200, - dev_entry->root, - dev_entry, - &hl_led0_fops); - - debugfs_create_file("led1", - 0200, - dev_entry->root, - dev_entry, - &hl_led1_fops); - - debugfs_create_file("led2", - 0200, - dev_entry->root, - dev_entry, - &hl_led2_fops); - - debugfs_create_file("device", - 0200, - dev_entry->root, - dev_entry, - &hl_device_fops); - - debugfs_create_file("clk_gate", - 0200, - dev_entry->root, - dev_entry, - &hl_clk_gate_fops); - - debugfs_create_file("stop_on_err", - 0644, - dev_entry->root, - dev_entry, - &hl_stop_on_err_fops); - - for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { - - ent = debugfs_create_file(hl_debugfs_list[i].name, - 0444, - dev_entry->root, - entry, - &hl_debugfs_fops); - entry->dent = ent; - entry->info_ent = &hl_debugfs_list[i]; - entry->dev_entry = dev_entry; - } -} - -void hl_debugfs_remove_device(struct hl_device *hdev) -{ - struct hl_dbg_device_entry *entry = &hdev->hl_debugfs; - - debugfs_remove_recursive(entry->root); - - mutex_destroy(&entry->file_mutex); - kfree(entry->entry_arr); -} - -void hl_debugfs_add_file(struct hl_fpriv *hpriv) -{ - struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs; - - mutex_lock(&dev_entry->file_mutex); - list_add(&hpriv->debugfs_list, &dev_entry->file_list); - mutex_unlock(&dev_entry->file_mutex); -} - -void hl_debugfs_remove_file(struct hl_fpriv *hpriv) -{ - struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs; - - mutex_lock(&dev_entry->file_mutex); - list_del(&hpriv->debugfs_list); - mutex_unlock(&dev_entry->file_mutex); -} - -void hl_debugfs_add_cb(struct hl_cb *cb) -{ - struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs; - - spin_lock(&dev_entry->cb_spinlock); - list_add(&cb->debugfs_list, &dev_entry->cb_list); - spin_unlock(&dev_entry->cb_spinlock); -} - -void hl_debugfs_remove_cb(struct hl_cb *cb) -{ - struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs; - - spin_lock(&dev_entry->cb_spinlock); - list_del(&cb->debugfs_list); - spin_unlock(&dev_entry->cb_spinlock); -} - -void hl_debugfs_add_cs(struct hl_cs *cs) -{ - struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs; - - spin_lock(&dev_entry->cs_spinlock); - list_add(&cs->debugfs_list, &dev_entry->cs_list); - spin_unlock(&dev_entry->cs_spinlock); -} - -void hl_debugfs_remove_cs(struct hl_cs *cs) -{ - struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs; - - spin_lock(&dev_entry->cs_spinlock); - list_del(&cs->debugfs_list); - spin_unlock(&dev_entry->cs_spinlock); -} - -void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job) -{ - struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; - - spin_lock(&dev_entry->cs_job_spinlock); - list_add(&job->debugfs_list, &dev_entry->cs_job_list); - spin_unlock(&dev_entry->cs_job_spinlock); -} - -void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job) -{ - struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; - - spin_lock(&dev_entry->cs_job_spinlock); - list_del(&job->debugfs_list); - spin_unlock(&dev_entry->cs_job_spinlock); -} - -void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr) -{ - struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; - - spin_lock(&dev_entry->userptr_spinlock); - list_add(&userptr->debugfs_list, &dev_entry->userptr_list); - spin_unlock(&dev_entry->userptr_spinlock); -} - -void hl_debugfs_remove_userptr(struct hl_device *hdev, - struct hl_userptr *userptr) -{ - struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; - - spin_lock(&dev_entry->userptr_spinlock); - list_del(&userptr->debugfs_list); - spin_unlock(&dev_entry->userptr_spinlock); -} - -void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx) -{ - struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; - - spin_lock(&dev_entry->ctx_mem_hash_spinlock); - list_add(&ctx->debugfs_list, &dev_entry->ctx_mem_hash_list); - spin_unlock(&dev_entry->ctx_mem_hash_spinlock); -} - -void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx) -{ - struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; - - spin_lock(&dev_entry->ctx_mem_hash_spinlock); - list_del(&ctx->debugfs_list); - spin_unlock(&dev_entry->ctx_mem_hash_spinlock); -} - -void __init hl_debugfs_init(void) -{ - hl_debug_root = debugfs_create_dir("habanalabs", NULL); -} - -void hl_debugfs_fini(void) -{ - debugfs_remove_recursive(hl_debug_root); -} diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c deleted file mode 100644 index 84800efec10d..000000000000 --- a/drivers/misc/habanalabs/device.c +++ /dev/null @@ -1,1506 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2016-2019 HabanaLabs, Ltd. - * All Rights Reserved. - */ - -#define pr_fmt(fmt) "habanalabs: " fmt - -#include "habanalabs.h" - -#include -#include -#include -#include - -#define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10) - -bool hl_device_disabled_or_in_reset(struct hl_device *hdev) -{ - if ((hdev->disabled) || (atomic_read(&hdev->in_reset))) - return true; - else - return false; -} - -enum hl_device_status hl_device_status(struct hl_device *hdev) -{ - enum hl_device_status status; - - if (hdev->disabled) - status = HL_DEVICE_STATUS_MALFUNCTION; - else if (atomic_read(&hdev->in_reset)) - status = HL_DEVICE_STATUS_IN_RESET; - else - status = HL_DEVICE_STATUS_OPERATIONAL; - - return status; -} - -static void hpriv_release(struct kref *ref) -{ - struct hl_fpriv *hpriv; - struct hl_device *hdev; - - hpriv = container_of(ref, struct hl_fpriv, refcount); - - hdev = hpriv->hdev; - - put_pid(hpriv->taskpid); - - hl_debugfs_remove_file(hpriv); - - mutex_destroy(&hpriv->restore_phase_mutex); - - mutex_lock(&hdev->fpriv_list_lock); - list_del(&hpriv->dev_node); - hdev->compute_ctx = NULL; - mutex_unlock(&hdev->fpriv_list_lock); - - kfree(hpriv); -} - -void hl_hpriv_get(struct hl_fpriv *hpriv) -{ - kref_get(&hpriv->refcount); -} - -void hl_hpriv_put(struct hl_fpriv *hpriv) -{ - kref_put(&hpriv->refcount, hpriv_release); -} - -/* - * hl_device_release - release function for habanalabs device - * - * @inode: pointer to inode structure - * @filp: pointer to file structure - * - * Called when process closes an habanalabs device - */ -static int hl_device_release(struct inode *inode, struct file *filp) -{ - struct hl_fpriv *hpriv = filp->private_data; - - hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); - hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); - - filp->private_data = NULL; - - hl_hpriv_put(hpriv); - - return 0; -} - -static int hl_device_release_ctrl(struct inode *inode, struct file *filp) -{ - struct hl_fpriv *hpriv = filp->private_data; - struct hl_device *hdev; - - filp->private_data = NULL; - - hdev = hpriv->hdev; - - mutex_lock(&hdev->fpriv_list_lock); - list_del(&hpriv->dev_node); - mutex_unlock(&hdev->fpriv_list_lock); - - kfree(hpriv); - - return 0; -} - -/* - * hl_mmap - mmap function for habanalabs device - * - * @*filp: pointer to file structure - * @*vma: pointer to vm_area_struct of the process - * - * Called when process does an mmap on habanalabs device. Call the device's mmap - * function at the end of the common code. - */ -static int hl_mmap(struct file *filp, struct vm_area_struct *vma) -{ - struct hl_fpriv *hpriv = filp->private_data; - - if ((vma->vm_pgoff & HL_MMAP_CB_MASK) == HL_MMAP_CB_MASK) { - vma->vm_pgoff ^= HL_MMAP_CB_MASK; - return hl_cb_mmap(hpriv, vma); - } - - return -EINVAL; -} - -static const struct file_operations hl_ops = { - .owner = THIS_MODULE, - .open = hl_device_open, - .release = hl_device_release, - .mmap = hl_mmap, - .unlocked_ioctl = hl_ioctl, - .compat_ioctl = hl_ioctl -}; - -static const struct file_operations hl_ctrl_ops = { - .owner = THIS_MODULE, - .open = hl_device_open_ctrl, - .release = hl_device_release_ctrl, - .unlocked_ioctl = hl_ioctl_control, - .compat_ioctl = hl_ioctl_control -}; - -static void device_release_func(struct device *dev) -{ - kfree(dev); -} - -/* - * device_init_cdev - Initialize cdev and device for habanalabs device - * - * @hdev: pointer to habanalabs device structure - * @hclass: pointer to the class object of the device - * @minor: minor number of the specific device - * @fpos: file operations to install for this device - * @name: name of the device as it will appear in the filesystem - * @cdev: pointer to the char device object that will be initialized - * @dev: pointer to the device object that will be initialized - * - * Initialize a cdev and a Linux device for habanalabs's device. - */ -static int device_init_cdev(struct hl_device *hdev, struct class *hclass, - int minor, const struct file_operations *fops, - char *name, struct cdev *cdev, - struct device **dev) -{ - cdev_init(cdev, fops); - cdev->owner = THIS_MODULE; - - *dev = kzalloc(sizeof(**dev), GFP_KERNEL); - if (!*dev) - return -ENOMEM; - - device_initialize(*dev); - (*dev)->devt = MKDEV(hdev->major, minor); - (*dev)->class = hclass; - (*dev)->release = device_release_func; - dev_set_drvdata(*dev, hdev); - dev_set_name(*dev, "%s", name); - - return 0; -} - -static int device_cdev_sysfs_add(struct hl_device *hdev) -{ - int rc; - - rc = cdev_device_add(&hdev->cdev, hdev->dev); - if (rc) { - dev_err(hdev->dev, - "failed to add a char device to the system\n"); - return rc; - } - - rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl); - if (rc) { - dev_err(hdev->dev, - "failed to add a control char device to the system\n"); - goto delete_cdev_device; - } - - /* hl_sysfs_init() must be done after adding the device to the system */ - rc = hl_sysfs_init(hdev); - if (rc) { - dev_err(hdev->dev, "failed to initialize sysfs\n"); - goto delete_ctrl_cdev_device; - } - - hdev->cdev_sysfs_created = true; - - return 0; - -delete_ctrl_cdev_device: - cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); -delete_cdev_device: - cdev_device_del(&hdev->cdev, hdev->dev); - return rc; -} - -static void device_cdev_sysfs_del(struct hl_device *hdev) -{ - /* device_release() won't be called so must free devices explicitly */ - if (!hdev->cdev_sysfs_created) { - kfree(hdev->dev_ctrl); - kfree(hdev->dev); - return; - } - - hl_sysfs_fini(hdev); - cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); - cdev_device_del(&hdev->cdev, hdev->dev); -} - -/* - * device_early_init - do some early initialization for the habanalabs device - * - * @hdev: pointer to habanalabs device structure - * - * Install the relevant function pointers and call the early_init function, - * if such a function exists - */ -static int device_early_init(struct hl_device *hdev) -{ - int i, rc; - char workq_name[32]; - - switch (hdev->asic_type) { - case ASIC_GOYA: - goya_set_asic_funcs(hdev); - strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name)); - break; - case ASIC_GAUDI: - gaudi_set_asic_funcs(hdev); - sprintf(hdev->asic_name, "GAUDI"); - break; - default: - dev_err(hdev->dev, "Unrecognized ASIC type %d\n", - hdev->asic_type); - return -EINVAL; - } - - rc = hdev->asic_funcs->early_init(hdev); - if (rc) - return rc; - - rc = hl_asid_init(hdev); - if (rc) - goto early_fini; - - if (hdev->asic_prop.completion_queues_count) { - hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count, - sizeof(*hdev->cq_wq), - GFP_ATOMIC); - if (!hdev->cq_wq) { - rc = -ENOMEM; - goto asid_fini; - } - } - - for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) { - snprintf(workq_name, 32, "hl-free-jobs-%u", i); - hdev->cq_wq[i] = create_singlethread_workqueue(workq_name); - if (hdev->cq_wq == NULL) { - dev_err(hdev->dev, "Failed to allocate CQ workqueue\n"); - rc = -ENOMEM; - goto free_cq_wq; - } - } - - hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0); - if (hdev->eq_wq == NULL) { - dev_err(hdev->dev, "Failed to allocate EQ workqueue\n"); - rc = -ENOMEM; - goto free_cq_wq; - } - - hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), - GFP_KERNEL); - if (!hdev->hl_chip_info) { - rc = -ENOMEM; - goto free_eq_wq; - } - - hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE, - sizeof(struct hl_device_idle_busy_ts), - (GFP_KERNEL | __GFP_ZERO)); - if (!hdev->idle_busy_ts_arr) { - rc = -ENOMEM; - goto free_chip_info; - } - - hl_cb_mgr_init(&hdev->kernel_cb_mgr); - - mutex_init(&hdev->send_cpu_message_lock); - mutex_init(&hdev->debug_lock); - mutex_init(&hdev->mmu_cache_lock); - INIT_LIST_HEAD(&hdev->hw_queues_mirror_list); - spin_lock_init(&hdev->hw_queues_mirror_lock); - INIT_LIST_HEAD(&hdev->fpriv_list); - mutex_init(&hdev->fpriv_list_lock); - atomic_set(&hdev->in_reset, 0); - - return 0; - -free_chip_info: - kfree(hdev->hl_chip_info); -free_eq_wq: - destroy_workqueue(hdev->eq_wq); -free_cq_wq: - for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) - if (hdev->cq_wq[i]) - destroy_workqueue(hdev->cq_wq[i]); - kfree(hdev->cq_wq); -asid_fini: - hl_asid_fini(hdev); -early_fini: - if (hdev->asic_funcs->early_fini) - hdev->asic_funcs->early_fini(hdev); - - return rc; -} - -/* - * device_early_fini - finalize all that was done in device_early_init - * - * @hdev: pointer to habanalabs device structure - * - */ -static void device_early_fini(struct hl_device *hdev) -{ - int i; - - mutex_destroy(&hdev->mmu_cache_lock); - mutex_destroy(&hdev->debug_lock); - mutex_destroy(&hdev->send_cpu_message_lock); - - mutex_destroy(&hdev->fpriv_list_lock); - - hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); - - kfree(hdev->idle_busy_ts_arr); - kfree(hdev->hl_chip_info); - - destroy_workqueue(hdev->eq_wq); - - for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) - destroy_workqueue(hdev->cq_wq[i]); - kfree(hdev->cq_wq); - - hl_asid_fini(hdev); - - if (hdev->asic_funcs->early_fini) - hdev->asic_funcs->early_fini(hdev); -} - -static void set_freq_to_low_job(struct work_struct *work) -{ - struct hl_device *hdev = container_of(work, struct hl_device, - work_freq.work); - - mutex_lock(&hdev->fpriv_list_lock); - - if (!hdev->compute_ctx) - hl_device_set_frequency(hdev, PLL_LOW); - - mutex_unlock(&hdev->fpriv_list_lock); - - schedule_delayed_work(&hdev->work_freq, - usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC)); -} - -static void hl_device_heartbeat(struct work_struct *work) -{ - struct hl_device *hdev = container_of(work, struct hl_device, - work_heartbeat.work); - - if (hl_device_disabled_or_in_reset(hdev)) - goto reschedule; - - if (!hdev->asic_funcs->send_heartbeat(hdev)) - goto reschedule; - - dev_err(hdev->dev, "Device heartbeat failed!\n"); - hl_device_reset(hdev, true, false); - - return; - -reschedule: - schedule_delayed_work(&hdev->work_heartbeat, - usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); -} - -/* - * device_late_init - do late stuff initialization for the habanalabs device - * - * @hdev: pointer to habanalabs device structure - * - * Do stuff that either needs the device H/W queues to be active or needs - * to happen after all the rest of the initialization is finished - */ -static int device_late_init(struct hl_device *hdev) -{ - int rc; - - if (hdev->asic_funcs->late_init) { - rc = hdev->asic_funcs->late_init(hdev); - if (rc) { - dev_err(hdev->dev, - "failed late initialization for the H/W\n"); - return rc; - } - } - - hdev->high_pll = hdev->asic_prop.high_pll; - - /* force setting to low frequency */ - hdev->curr_pll_profile = PLL_LOW; - - if (hdev->pm_mng_profile == PM_AUTO) - hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW); - else - hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST); - - INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job); - schedule_delayed_work(&hdev->work_freq, - usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC)); - - if (hdev->heartbeat) { - INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat); - schedule_delayed_work(&hdev->work_heartbeat, - usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); - } - - hdev->late_init_done = true; - - return 0; -} - -/* - * device_late_fini - finalize all that was done in device_late_init - * - * @hdev: pointer to habanalabs device structure - * - */ -static void device_late_fini(struct hl_device *hdev) -{ - if (!hdev->late_init_done) - return; - - cancel_delayed_work_sync(&hdev->work_freq); - if (hdev->heartbeat) - cancel_delayed_work_sync(&hdev->work_heartbeat); - - if (hdev->asic_funcs->late_fini) - hdev->asic_funcs->late_fini(hdev); - - hdev->late_init_done = false; -} - -uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms) -{ - struct hl_device_idle_busy_ts *ts; - ktime_t zero_ktime, curr = ktime_get(); - u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx; - s64 period_us, last_start_us, last_end_us, last_busy_time_us, - total_busy_time_us = 0, total_busy_time_ms; - - zero_ktime = ktime_set(0, 0); - period_us = period_ms * USEC_PER_MSEC; - ts = &hdev->idle_busy_ts_arr[last_index]; - - /* check case that device is currently in idle */ - if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) && - !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) { - - last_index--; - /* Handle case idle_busy_ts_idx was 0 */ - if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE) - last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1; - - ts = &hdev->idle_busy_ts_arr[last_index]; - } - - while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) { - /* Check if we are in last sample case. i.e. if the sample - * begun before the sampling period. This could be a real - * sample or 0 so need to handle both cases - */ - last_start_us = ktime_to_us( - ktime_sub(curr, ts->idle_to_busy_ts)); - - if (last_start_us > period_us) { - - /* First check two cases: - * 1. If the device is currently busy - * 2. If the device was idle during the whole sampling - * period - */ - - if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) { - /* Check if the device is currently busy */ - if (ktime_compare(ts->idle_to_busy_ts, - zero_ktime)) - return 100; - - /* We either didn't have any activity or we - * reached an entry which is 0. Either way, - * exit and return what was accumulated so far - */ - break; - } - - /* If sample has finished, check it is relevant */ - last_end_us = ktime_to_us( - ktime_sub(curr, ts->busy_to_idle_ts)); - - if (last_end_us > period_us) - break; - - /* It is relevant so add it but with adjustment */ - last_busy_time_us = ktime_to_us( - ktime_sub(ts->busy_to_idle_ts, - ts->idle_to_busy_ts)); - total_busy_time_us += last_busy_time_us - - (last_start_us - period_us); - break; - } - - /* Check if the sample is finished or still open */ - if (ktime_compare(ts->busy_to_idle_ts, zero_ktime)) - last_busy_time_us = ktime_to_us( - ktime_sub(ts->busy_to_idle_ts, - ts->idle_to_busy_ts)); - else - last_busy_time_us = ktime_to_us( - ktime_sub(curr, ts->idle_to_busy_ts)); - - total_busy_time_us += last_busy_time_us; - - last_index--; - /* Handle case idle_busy_ts_idx was 0 */ - if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE) - last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1; - - ts = &hdev->idle_busy_ts_arr[last_index]; - - overlap_cnt++; - } - - total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us, - USEC_PER_MSEC); - - return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms); -} - -/* - * hl_device_set_frequency - set the frequency of the device - * - * @hdev: pointer to habanalabs device structure - * @freq: the new frequency value - * - * Change the frequency if needed. This function has no protection against - * concurrency, therefore it is assumed that the calling function has protected - * itself against the case of calling this function from multiple threads with - * different values - * - * Returns 0 if no change was done, otherwise returns 1 - */ -int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq) -{ - if ((hdev->pm_mng_profile == PM_MANUAL) || - (hdev->curr_pll_profile == freq)) - return 0; - - dev_dbg(hdev->dev, "Changing device frequency to %s\n", - freq == PLL_HIGH ? "high" : "low"); - - hdev->asic_funcs->set_pll_profile(hdev, freq); - - hdev->curr_pll_profile = freq; - - return 1; -} - -int hl_device_set_debug_mode(struct hl_device *hdev, bool enable) -{ - int rc = 0; - - mutex_lock(&hdev->debug_lock); - - if (!enable) { - if (!hdev->in_debug) { - dev_err(hdev->dev, - "Failed to disable debug mode because device was not in debug mode\n"); - rc = -EFAULT; - goto out; - } - - if (!hdev->hard_reset_pending) - hdev->asic_funcs->halt_coresight(hdev); - - hdev->in_debug = 0; - - if (!hdev->hard_reset_pending) - hdev->asic_funcs->enable_clock_gating(hdev); - - goto out; - } - - if (hdev->in_debug) { - dev_err(hdev->dev, - "Failed to enable debug mode because device is already in debug mode\n"); - rc = -EFAULT; - goto out; - } - - hdev->asic_funcs->disable_clock_gating(hdev); - hdev->in_debug = 1; - -out: - mutex_unlock(&hdev->debug_lock); - - return rc; -} - -/* - * hl_device_suspend - initiate device suspend - * - * @hdev: pointer to habanalabs device structure - * - * Puts the hw in the suspend state (all asics). - * Returns 0 for success or an error on failure. - * Called at driver suspend. - */ -int hl_device_suspend(struct hl_device *hdev) -{ - int rc; - - pci_save_state(hdev->pdev); - - /* Block future CS/VM/JOB completion operations */ - rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); - if (rc) { - dev_err(hdev->dev, "Can't suspend while in reset\n"); - return -EIO; - } - - /* This blocks all other stuff that is not blocked by in_reset */ - hdev->disabled = true; - - /* - * Flush anyone that is inside the critical section of enqueue - * jobs to the H/W - */ - hdev->asic_funcs->hw_queues_lock(hdev); - hdev->asic_funcs->hw_queues_unlock(hdev); - - /* Flush processes that are sending message to CPU */ - mutex_lock(&hdev->send_cpu_message_lock); - mutex_unlock(&hdev->send_cpu_message_lock); - - rc = hdev->asic_funcs->suspend(hdev); - if (rc) - dev_err(hdev->dev, - "Failed to disable PCI access of device CPU\n"); - - /* Shut down the device */ - pci_disable_device(hdev->pdev); - pci_set_power_state(hdev->pdev, PCI_D3hot); - - return 0; -} - -/* - * hl_device_resume - initiate device resume - * - * @hdev: pointer to habanalabs device structure - * - * Bring the hw back to operating state (all asics). - * Returns 0 for success or an error on failure. - * Called at driver resume. - */ -int hl_device_resume(struct hl_device *hdev) -{ - int rc; - - pci_set_power_state(hdev->pdev, PCI_D0); - pci_restore_state(hdev->pdev); - rc = pci_enable_device_mem(hdev->pdev); - if (rc) { - dev_err(hdev->dev, - "Failed to enable PCI device in resume\n"); - return rc; - } - - pci_set_master(hdev->pdev); - - rc = hdev->asic_funcs->resume(hdev); - if (rc) { - dev_err(hdev->dev, "Failed to resume device after suspend\n"); - goto disable_device; - } - - - hdev->disabled = false; - atomic_set(&hdev->in_reset, 0); - - rc = hl_device_reset(hdev, true, false); - if (rc) { - dev_err(hdev->dev, "Failed to reset device during resume\n"); - goto disable_device; - } - - return 0; - -disable_device: - pci_clear_master(hdev->pdev); - pci_disable_device(hdev->pdev); - - return rc; -} - -static int device_kill_open_processes(struct hl_device *hdev) -{ - u16 pending_total, pending_cnt; - struct hl_fpriv *hpriv; - struct task_struct *task = NULL; - - if (hdev->pldm) - pending_total = HL_PLDM_PENDING_RESET_PER_SEC; - else - pending_total = HL_PENDING_RESET_PER_SEC; - - /* Giving time for user to close FD, and for processes that are inside - * hl_device_open to finish - */ - if (!list_empty(&hdev->fpriv_list)) - ssleep(1); - - mutex_lock(&hdev->fpriv_list_lock); - - /* This section must be protected because we are dereferencing - * pointers that are freed if the process exits - */ - list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) { - task = get_pid_task(hpriv->taskpid, PIDTYPE_PID); - if (task) { - dev_info(hdev->dev, "Killing user process pid=%d\n", - task_pid_nr(task)); - send_sig(SIGKILL, task, 1); - usleep_range(1000, 10000); - - put_task_struct(task); - } - } - - mutex_unlock(&hdev->fpriv_list_lock); - - /* We killed the open users, but because the driver cleans up after the - * user contexts are closed (e.g. mmu mappings), we need to wait again - * to make sure the cleaning phase is finished before continuing with - * the reset - */ - - pending_cnt = pending_total; - - while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) { - dev_info(hdev->dev, - "Waiting for all unmap operations to finish before hard reset\n"); - - pending_cnt--; - - ssleep(1); - } - - return list_empty(&hdev->fpriv_list) ? 0 : -EBUSY; -} - -static void device_hard_reset_pending(struct work_struct *work) -{ - struct hl_device_reset_work *device_reset_work = - container_of(work, struct hl_device_reset_work, reset_work); - struct hl_device *hdev = device_reset_work->hdev; - - hl_device_reset(hdev, true, true); - - kfree(device_reset_work); -} - -/* - * hl_device_reset - reset the device - * - * @hdev: pointer to habanalabs device structure - * @hard_reset: should we do hard reset to all engines or just reset the - * compute/dma engines - * @from_hard_reset_thread: is the caller the hard-reset thread - * - * Block future CS and wait for pending CS to be enqueued - * Call ASIC H/W fini - * Flush all completions - * Re-initialize all internal data structures - * Call ASIC H/W init, late_init - * Test queues - * Enable device - * - * Returns 0 for success or an error on failure. - */ -int hl_device_reset(struct hl_device *hdev, bool hard_reset, - bool from_hard_reset_thread) -{ - int i, rc; - - if (!hdev->init_done) { - dev_err(hdev->dev, - "Can't reset before initialization is done\n"); - return 0; - } - - if ((!hard_reset) && (!hdev->supports_soft_reset)) { - dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n"); - hard_reset = true; - } - - /* - * Prevent concurrency in this function - only one reset should be - * done at any given time. Only need to perform this if we didn't - * get from the dedicated hard reset thread - */ - if (!from_hard_reset_thread) { - /* Block future CS/VM/JOB completion operations */ - rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); - if (rc) - return 0; - - if (hard_reset) { - /* Disable PCI access from device F/W so he won't send - * us additional interrupts. We disable MSI/MSI-X at - * the halt_engines function and we can't have the F/W - * sending us interrupts after that. We need to disable - * the access here because if the device is marked - * disable, the message won't be send. Also, in case - * of heartbeat, the device CPU is marked as disable - * so this message won't be sent - */ - if (hl_fw_send_pci_access_msg(hdev, - ARMCP_PACKET_DISABLE_PCI_ACCESS)) - dev_warn(hdev->dev, - "Failed to disable PCI access by F/W\n"); - } - - /* This also blocks future CS/VM/JOB completion operations */ - hdev->disabled = true; - - /* Flush anyone that is inside the critical section of enqueue - * jobs to the H/W - */ - hdev->asic_funcs->hw_queues_lock(hdev); - hdev->asic_funcs->hw_queues_unlock(hdev); - - /* Flush anyone that is inside device open */ - mutex_lock(&hdev->fpriv_list_lock); - mutex_unlock(&hdev->fpriv_list_lock); - - dev_err(hdev->dev, "Going to RESET device!\n"); - } - -again: - if ((hard_reset) && (!from_hard_reset_thread)) { - struct hl_device_reset_work *device_reset_work; - - hdev->hard_reset_pending = true; - - device_reset_work = kzalloc(sizeof(*device_reset_work), - GFP_ATOMIC); - if (!device_reset_work) { - rc = -ENOMEM; - goto out_err; - } - - /* - * Because the reset function can't run from interrupt or - * from heartbeat work, we need to call the reset function - * from a dedicated work - */ - INIT_WORK(&device_reset_work->reset_work, - device_hard_reset_pending); - device_reset_work->hdev = hdev; - schedule_work(&device_reset_work->reset_work); - - return 0; - } - - if (hard_reset) { - device_late_fini(hdev); - - /* - * Now that the heartbeat thread is closed, flush processes - * which are sending messages to CPU - */ - mutex_lock(&hdev->send_cpu_message_lock); - mutex_unlock(&hdev->send_cpu_message_lock); - } - - /* - * Halt the engines and disable interrupts so we won't get any more - * completions from H/W and we won't have any accesses from the - * H/W to the host machine - */ - hdev->asic_funcs->halt_engines(hdev, hard_reset); - - /* Go over all the queues, release all CS and their jobs */ - hl_cs_rollback_all(hdev); - - if (hard_reset) { - /* Kill processes here after CS rollback. This is because the - * process can't really exit until all its CSs are done, which - * is what we do in cs rollback - */ - rc = device_kill_open_processes(hdev); - if (rc) { - dev_crit(hdev->dev, - "Failed to kill all open processes, stopping hard reset\n"); - goto out_err; - } - - /* Flush the Event queue workers to make sure no other thread is - * reading or writing to registers during the reset - */ - flush_workqueue(hdev->eq_wq); - } - - /* Release kernel context */ - if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1)) - hdev->kernel_ctx = NULL; - - /* Reset the H/W. It will be in idle state after this returns */ - hdev->asic_funcs->hw_fini(hdev, hard_reset); - - if (hard_reset) { - hl_vm_fini(hdev); - hl_mmu_fini(hdev); - hl_eq_reset(hdev, &hdev->event_queue); - } - - /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */ - hl_hw_queue_reset(hdev, hard_reset); - for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) - hl_cq_reset(hdev, &hdev->completion_queue[i]); - - hdev->idle_busy_ts_idx = 0; - hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0); - hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0); - - if (hdev->cs_active_cnt) - dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n", - hdev->cs_active_cnt); - - mutex_lock(&hdev->fpriv_list_lock); - - /* Make sure the context switch phase will run again */ - if (hdev->compute_ctx) { - atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1); - hdev->compute_ctx->thread_ctx_switch_wait_token = 0; - } - - mutex_unlock(&hdev->fpriv_list_lock); - - /* Finished tear-down, starting to re-initialize */ - - if (hard_reset) { - hdev->device_cpu_disabled = false; - hdev->hard_reset_pending = false; - - if (hdev->kernel_ctx) { - dev_crit(hdev->dev, - "kernel ctx was alive during hard reset, something is terribly wrong\n"); - rc = -EBUSY; - goto out_err; - } - - rc = hl_mmu_init(hdev); - if (rc) { - dev_err(hdev->dev, - "Failed to initialize MMU S/W after hard reset\n"); - goto out_err; - } - - /* Allocate the kernel context */ - hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), - GFP_KERNEL); - if (!hdev->kernel_ctx) { - rc = -ENOMEM; - goto out_err; - } - - hdev->compute_ctx = NULL; - - rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); - if (rc) { - dev_err(hdev->dev, - "failed to init kernel ctx in hard reset\n"); - kfree(hdev->kernel_ctx); - hdev->kernel_ctx = NULL; - goto out_err; - } - } - - rc = hdev->asic_funcs->hw_init(hdev); - if (rc) { - dev_err(hdev->dev, - "failed to initialize the H/W after reset\n"); - goto out_err; - } - - hdev->disabled = false; - - /* Check that the communication with the device is working */ - rc = hdev->asic_funcs->test_queues(hdev); - if (rc) { - dev_err(hdev->dev, - "Failed to detect if device is alive after reset\n"); - goto out_err; - } - - if (hard_reset) { - rc = device_late_init(hdev); - if (rc) { - dev_err(hdev->dev, - "Failed late init after hard reset\n"); - goto out_err; - } - - rc = hl_vm_init(hdev); - if (rc) { - dev_err(hdev->dev, - "Failed to init memory module after hard reset\n"); - goto out_err; - } - - hl_set_max_power(hdev, hdev->max_power); - } else { - rc = hdev->asic_funcs->soft_reset_late_init(hdev); - if (rc) { - dev_err(hdev->dev, - "Failed late init after soft reset\n"); - goto out_err; - } - } - - atomic_set(&hdev->in_reset, 0); - - if (hard_reset) - hdev->hard_reset_cnt++; - else - hdev->soft_reset_cnt++; - - dev_warn(hdev->dev, "Successfully finished resetting the device\n"); - - return 0; - -out_err: - hdev->disabled = true; - - if (hard_reset) { - dev_err(hdev->dev, - "Failed to reset! Device is NOT usable\n"); - hdev->hard_reset_cnt++; - } else { - dev_err(hdev->dev, - "Failed to do soft-reset, trying hard reset\n"); - hdev->soft_reset_cnt++; - hard_reset = true; - goto again; - } - - atomic_set(&hdev->in_reset, 0); - - return rc; -} - -/* - * hl_device_init - main initialization function for habanalabs device - * - * @hdev: pointer to habanalabs device structure - * - * Allocate an id for the device, do early initialization and then call the - * ASIC specific initialization functions. Finally, create the cdev and the - * Linux device to expose it to the user - */ -int hl_device_init(struct hl_device *hdev, struct class *hclass) -{ - int i, rc, cq_cnt, cq_ready_cnt; - char *name; - bool add_cdev_sysfs_on_err = false; - - name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2); - if (!name) { - rc = -ENOMEM; - goto out_disabled; - } - - /* Initialize cdev and device structures */ - rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name, - &hdev->cdev, &hdev->dev); - - kfree(name); - - if (rc) - goto out_disabled; - - name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2); - if (!name) { - rc = -ENOMEM; - goto free_dev; - } - - /* Initialize cdev and device structures for control device */ - rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops, - name, &hdev->cdev_ctrl, &hdev->dev_ctrl); - - kfree(name); - - if (rc) - goto free_dev; - - /* Initialize ASIC function pointers and perform early init */ - rc = device_early_init(hdev); - if (rc) - goto free_dev_ctrl; - - /* - * Start calling ASIC initialization. First S/W then H/W and finally - * late init - */ - rc = hdev->asic_funcs->sw_init(hdev); - if (rc) - goto early_fini; - - /* - * Initialize the H/W queues. Must be done before hw_init, because - * there the addresses of the kernel queue are being written to the - * registers of the device - */ - rc = hl_hw_queues_create(hdev); - if (rc) { - dev_err(hdev->dev, "failed to initialize kernel queues\n"); - goto sw_fini; - } - - cq_cnt = hdev->asic_prop.completion_queues_count; - - /* - * Initialize the completion queues. Must be done before hw_init, - * because there the addresses of the completion queues are being - * passed as arguments to request_irq - */ - if (cq_cnt) { - hdev->completion_queue = kcalloc(cq_cnt, - sizeof(*hdev->completion_queue), - GFP_KERNEL); - - if (!hdev->completion_queue) { - dev_err(hdev->dev, - "failed to allocate completion queues\n"); - rc = -ENOMEM; - goto hw_queues_destroy; - } - } - - for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) { - rc = hl_cq_init(hdev, &hdev->completion_queue[i], - hdev->asic_funcs->get_queue_id_for_cq(hdev, i)); - if (rc) { - dev_err(hdev->dev, - "failed to initialize completion queue\n"); - goto cq_fini; - } - hdev->completion_queue[i].cq_idx = i; - } - - /* - * Initialize the event queue. Must be done before hw_init, - * because there the address of the event queue is being - * passed as argument to request_irq - */ - rc = hl_eq_init(hdev, &hdev->event_queue); - if (rc) { - dev_err(hdev->dev, "failed to initialize event queue\n"); - goto cq_fini; - } - - /* MMU S/W must be initialized before kernel context is created */ - rc = hl_mmu_init(hdev); - if (rc) { - dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n"); - goto eq_fini; - } - - /* Allocate the kernel context */ - hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL); - if (!hdev->kernel_ctx) { - rc = -ENOMEM; - goto mmu_fini; - } - - hdev->compute_ctx = NULL; - - rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); - if (rc) { - dev_err(hdev->dev, "failed to initialize kernel context\n"); - kfree(hdev->kernel_ctx); - goto mmu_fini; - } - - rc = hl_cb_pool_init(hdev); - if (rc) { - dev_err(hdev->dev, "failed to initialize CB pool\n"); - goto release_ctx; - } - - hl_debugfs_add_device(hdev); - - if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { - dev_info(hdev->dev, - "H/W state is dirty, must reset before initializing\n"); - hdev->asic_funcs->halt_engines(hdev, true); - hdev->asic_funcs->hw_fini(hdev, true); - } - - /* - * From this point, in case of an error, add char devices and create - * sysfs nodes as part of the error flow, to allow debugging. - */ - add_cdev_sysfs_on_err = true; - - rc = hdev->asic_funcs->hw_init(hdev); - if (rc) { - dev_err(hdev->dev, "failed to initialize the H/W\n"); - rc = 0; - goto out_disabled; - } - - hdev->disabled = false; - - /* Check that the communication with the device is working */ - rc = hdev->asic_funcs->test_queues(hdev); - if (rc) { - dev_err(hdev->dev, "Failed to detect if device is alive\n"); - rc = 0; - goto out_disabled; - } - - rc = device_late_init(hdev); - if (rc) { - dev_err(hdev->dev, "Failed late initialization\n"); - rc = 0; - goto out_disabled; - } - - dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n", - hdev->asic_name, - hdev->asic_prop.dram_size / 1024 / 1024 / 1024); - - rc = hl_vm_init(hdev); - if (rc) { - dev_err(hdev->dev, "Failed to initialize memory module\n"); - rc = 0; - goto out_disabled; - } - - /* - * Expose devices and sysfs nodes to user. - * From here there is no need to add char devices and create sysfs nodes - * in case of an error. - */ - add_cdev_sysfs_on_err = false; - rc = device_cdev_sysfs_add(hdev); - if (rc) { - dev_err(hdev->dev, - "Failed to add char devices and sysfs nodes\n"); - rc = 0; - goto out_disabled; - } - - /* - * hl_hwmon_init() must be called after device_late_init(), because only - * there we get the information from the device about which - * hwmon-related sensors the device supports. - * Furthermore, it must be done after adding the device to the system. - */ - rc = hl_hwmon_init(hdev); - if (rc) { - dev_err(hdev->dev, "Failed to initialize hwmon\n"); - rc = 0; - goto out_disabled; - } - - dev_notice(hdev->dev, - "Successfully added device to habanalabs driver\n"); - - hdev->init_done = true; - - return 0; - -release_ctx: - if (hl_ctx_put(hdev->kernel_ctx) != 1) - dev_err(hdev->dev, - "kernel ctx is still alive on initialization failure\n"); -mmu_fini: - hl_mmu_fini(hdev); -eq_fini: - hl_eq_fini(hdev, &hdev->event_queue); -cq_fini: - for (i = 0 ; i < cq_ready_cnt ; i++) - hl_cq_fini(hdev, &hdev->completion_queue[i]); - kfree(hdev->completion_queue); -hw_queues_destroy: - hl_hw_queues_destroy(hdev); -sw_fini: - hdev->asic_funcs->sw_fini(hdev); -early_fini: - device_early_fini(hdev); -free_dev_ctrl: - kfree(hdev->dev_ctrl); -free_dev: - kfree(hdev->dev); -out_disabled: - hdev->disabled = true; - if (add_cdev_sysfs_on_err) - device_cdev_sysfs_add(hdev); - if (hdev->pdev) - dev_err(&hdev->pdev->dev, - "Failed to initialize hl%d. Device is NOT usable !\n", - hdev->id / 2); - else - pr_err("Failed to initialize hl%d. Device is NOT usable !\n", - hdev->id / 2); - - return rc; -} - -/* - * hl_device_fini - main tear-down function for habanalabs device - * - * @hdev: pointer to habanalabs device structure - * - * Destroy the device, call ASIC fini functions and release the id - */ -void hl_device_fini(struct hl_device *hdev) -{ - int i, rc; - ktime_t timeout; - - dev_info(hdev->dev, "Removing device\n"); - - /* - * This function is competing with the reset function, so try to - * take the reset atomic and if we are already in middle of reset, - * wait until reset function is finished. Reset function is designed - * to always finish. However, in Gaudi, because of all the network - * ports, the hard reset could take between 10-30 seconds - */ - - timeout = ktime_add_us(ktime_get(), - HL_HARD_RESET_MAX_TIMEOUT * 1000 * 1000); - rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); - while (rc) { - usleep_range(50, 200); - rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); - if (ktime_compare(ktime_get(), timeout) > 0) { - WARN(1, "Failed to remove device because reset function did not finish\n"); - return; - } - } - - /* Mark device as disabled */ - hdev->disabled = true; - - /* Flush anyone that is inside the critical section of enqueue - * jobs to the H/W - */ - hdev->asic_funcs->hw_queues_lock(hdev); - hdev->asic_funcs->hw_queues_unlock(hdev); - - /* Flush anyone that is inside device open */ - mutex_lock(&hdev->fpriv_list_lock); - mutex_unlock(&hdev->fpriv_list_lock); - - hdev->hard_reset_pending = true; - - hl_hwmon_fini(hdev); - - device_late_fini(hdev); - - hl_debugfs_remove_device(hdev); - - /* - * Halt the engines and disable interrupts so we won't get any more - * completions from H/W and we won't have any accesses from the - * H/W to the host machine - */ - hdev->asic_funcs->halt_engines(hdev, true); - - /* Go over all the queues, release all CS and their jobs */ - hl_cs_rollback_all(hdev); - - /* Kill processes here after CS rollback. This is because the process - * can't really exit until all its CSs are done, which is what we - * do in cs rollback - */ - rc = device_kill_open_processes(hdev); - if (rc) - dev_crit(hdev->dev, "Failed to kill all open processes\n"); - - hl_cb_pool_fini(hdev); - - /* Release kernel context */ - if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1)) - dev_err(hdev->dev, "kernel ctx is still alive\n"); - - /* Reset the H/W. It will be in idle state after this returns */ - hdev->asic_funcs->hw_fini(hdev, true); - - hl_vm_fini(hdev); - - hl_mmu_fini(hdev); - - hl_eq_fini(hdev, &hdev->event_queue); - - for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) - hl_cq_fini(hdev, &hdev->completion_queue[i]); - kfree(hdev->completion_queue); - - hl_hw_queues_destroy(hdev); - - /* Call ASIC S/W finalize function */ - hdev->asic_funcs->sw_fini(hdev); - - device_early_fini(hdev); - - /* Hide devices and sysfs nodes from user */ - device_cdev_sysfs_del(hdev); - - pr_info("removed device successfully\n"); -} - -/* - * MMIO register access helper functions. - */ - -/* - * hl_rreg - Read an MMIO register - * - * @hdev: pointer to habanalabs device structure - * @reg: MMIO register offset (in bytes) - * - * Returns the value of the MMIO register we are asked to read - * - */ -inline u32 hl_rreg(struct hl_device *hdev, u32 reg) -{ - return readl(hdev->rmmio + reg); -} - -/* - * hl_wreg - Write to an MMIO register - * - * @hdev: pointer to habanalabs device structure - * @reg: MMIO register offset (in bytes) - * @val: 32-bit value - * - * Writes the 32-bit value into the MMIO register - * - */ -inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val) -{ - writel(val, hdev->rmmio + reg); -} diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c deleted file mode 100644 index 3be1549cd137..000000000000 --- a/drivers/misc/habanalabs/firmware_if.c +++ /dev/null @@ -1,589 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2016-2019 HabanaLabs, Ltd. - * All Rights Reserved. - */ - -#include "habanalabs.h" -#include "include/hl_boot_if.h" - -#include -#include -#include -#include - -/** - * hl_fw_load_fw_to_device() - Load F/W code to device's memory. - * - * @hdev: pointer to hl_device structure. - * @fw_name: the firmware image name - * @dst: IO memory mapped address space to copy firmware to - * - * Copy fw code from firmware file to device memory. - * - * Return: 0 on success, non-zero for failure. - */ -int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, - void __iomem *dst) -{ - const struct firmware *fw; - const u64 *fw_data; - size_t fw_size; - int rc; - - rc = request_firmware(&fw, fw_name, hdev->dev); - if (rc) { - dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name); - goto out; - } - - fw_size = fw->size; - if ((fw_size % 4) != 0) { - dev_err(hdev->dev, "Illegal %s firmware size %zu\n", - fw_name, fw_size); - rc = -EINVAL; - goto out; - } - - dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size); - - fw_data = (const u64 *) fw->data; - - memcpy_toio(dst, fw_data, fw_size); - -out: - release_firmware(fw); - return rc; -} - -int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode) -{ - struct armcp_packet pkt = {}; - - pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT); - - return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, - sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL); -} - -int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, - u16 len, u32 timeout, long *result) -{ - struct armcp_packet *pkt; - dma_addr_t pkt_dma_addr; - u32 tmp; - int rc = 0; - - pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len, - &pkt_dma_addr); - if (!pkt) { - dev_err(hdev->dev, - "Failed to allocate DMA memory for packet to CPU\n"); - return -ENOMEM; - } - - memcpy(pkt, msg, len); - - mutex_lock(&hdev->send_cpu_message_lock); - - if (hdev->disabled) - goto out; - - if (hdev->device_cpu_disabled) { - rc = -EIO; - goto out; - } - - rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr); - if (rc) { - dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc); - goto out; - } - - rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp, - (tmp == ARMCP_PACKET_FENCE_VAL), 1000, - timeout, true); - - hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); - - if (rc == -ETIMEDOUT) { - dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp); - hdev->device_cpu_disabled = true; - goto out; - } - - tmp = le32_to_cpu(pkt->ctl); - - rc = (tmp & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT; - if (rc) { - dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n", - rc, - (tmp & ARMCP_PKT_CTL_OPCODE_MASK) - >> ARMCP_PKT_CTL_OPCODE_SHIFT); - rc = -EIO; - } else if (result) { - *result = (long) le64_to_cpu(pkt->result); - } - -out: - mutex_unlock(&hdev->send_cpu_message_lock); - - hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt); - - return rc; -} - -int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type) -{ - struct armcp_packet pkt; - long result; - int rc; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.value = cpu_to_le64(event_type); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, &result); - - if (rc) - dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); - - return rc; -} - -int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, - size_t irq_arr_size) -{ - struct armcp_unmask_irq_arr_packet *pkt; - size_t total_pkt_size; - long result; - int rc; - - total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) + - irq_arr_size; - - /* data should be aligned to 8 bytes in order to ArmCP to copy it */ - total_pkt_size = (total_pkt_size + 0x7) & ~0x7; - - /* total_pkt_size is casted to u16 later on */ - if (total_pkt_size > USHRT_MAX) { - dev_err(hdev->dev, "too many elements in IRQ array\n"); - return -EINVAL; - } - - pkt = kzalloc(total_pkt_size, GFP_KERNEL); - if (!pkt) - return -ENOMEM; - - pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0])); - memcpy(&pkt->irqs, irq_arr, irq_arr_size); - - pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << - ARMCP_PKT_CTL_OPCODE_SHIFT); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, - total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result); - - if (rc) - dev_err(hdev->dev, "failed to unmask IRQ array\n"); - - kfree(pkt); - - return rc; -} - -int hl_fw_test_cpu_queue(struct hl_device *hdev) -{ - struct armcp_packet test_pkt = {}; - long result; - int rc; - - test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST << - ARMCP_PKT_CTL_OPCODE_SHIFT); - test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt, - sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result); - - if (!rc) { - if (result != ARMCP_PACKET_FENCE_VAL) - dev_err(hdev->dev, - "CPU queue test failed (0x%08lX)\n", result); - } else { - dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc); - } - - return rc; -} - -void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, - dma_addr_t *dma_handle) -{ - u64 kernel_addr; - - kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size); - - *dma_handle = hdev->cpu_accessible_dma_address + - (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem); - - return (void *) (uintptr_t) kernel_addr; -} - -void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, - void *vaddr) -{ - gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr, - size); -} - -int hl_fw_send_heartbeat(struct hl_device *hdev) -{ - struct armcp_packet hb_pkt = {}; - long result; - int rc; - - hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST << - ARMCP_PKT_CTL_OPCODE_SHIFT); - hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, - sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result); - - if ((rc) || (result != ARMCP_PACKET_FENCE_VAL)) - rc = -EIO; - - return rc; -} - -int hl_fw_armcp_info_get(struct hl_device *hdev) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct armcp_packet pkt = {}; - void *armcp_info_cpu_addr; - dma_addr_t armcp_info_dma_addr; - long result; - int rc; - - armcp_info_cpu_addr = - hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, - sizeof(struct armcp_info), - &armcp_info_dma_addr); - if (!armcp_info_cpu_addr) { - dev_err(hdev->dev, - "Failed to allocate DMA memory for ArmCP info packet\n"); - return -ENOMEM; - } - - memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.addr = cpu_to_le64(armcp_info_dma_addr); - pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info)); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_ARMCP_INFO_TIMEOUT_USEC, &result); - if (rc) { - dev_err(hdev->dev, - "Failed to handle ArmCP info pkt, error %d\n", rc); - goto out; - } - - memcpy(&prop->armcp_info, armcp_info_cpu_addr, - sizeof(prop->armcp_info)); - - rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors); - if (rc) { - dev_err(hdev->dev, - "Failed to build hwmon channel info, error %d\n", rc); - rc = -EFAULT; - goto out; - } - -out: - hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, - sizeof(struct armcp_info), armcp_info_cpu_addr); - - return rc; -} - -int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) -{ - struct armcp_packet pkt = {}; - void *eeprom_info_cpu_addr; - dma_addr_t eeprom_info_dma_addr; - long result; - int rc; - - eeprom_info_cpu_addr = - hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, - max_size, &eeprom_info_dma_addr); - if (!eeprom_info_cpu_addr) { - dev_err(hdev->dev, - "Failed to allocate DMA memory for ArmCP EEPROM packet\n"); - return -ENOMEM; - } - - memset(eeprom_info_cpu_addr, 0, max_size); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.addr = cpu_to_le64(eeprom_info_dma_addr); - pkt.data_max_size = cpu_to_le32(max_size); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_ARMCP_EEPROM_TIMEOUT_USEC, &result); - - if (rc) { - dev_err(hdev->dev, - "Failed to handle ArmCP EEPROM packet, error %d\n", rc); - goto out; - } - - /* result contains the actual size */ - memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size)); - -out: - hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size, - eeprom_info_cpu_addr); - - return rc; -} - -static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg) -{ - u32 err_val; - - /* Some of the firmware status codes are deprecated in newer f/w - * versions. In those versions, the errors are reported - * in different registers. Therefore, we need to check those - * registers and print the exact errors. Moreover, there - * may be multiple errors, so we need to report on each error - * separately. Some of the error codes might indicate a state - * that is not an error per-se, but it is an error in production - * environment - */ - err_val = RREG32(boot_err0_reg); - if (!(err_val & CPU_BOOT_ERR0_ENABLED)) - return; - - if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) - dev_err(hdev->dev, - "Device boot error - DRAM initialization failed\n"); - if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) - dev_err(hdev->dev, "Device boot error - FIT image corrupted\n"); - if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) - dev_err(hdev->dev, - "Device boot error - Thermal Sensor initialization failed\n"); - if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) - dev_warn(hdev->dev, - "Device boot warning - Skipped DRAM initialization\n"); - if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) - dev_warn(hdev->dev, - "Device boot error - Skipped waiting for BMC\n"); - if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) - dev_err(hdev->dev, - "Device boot error - Serdes data from BMC not available\n"); - if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) - dev_err(hdev->dev, - "Device boot error - NIC F/W initialization failed\n"); -} - -static void hl_detect_cpu_boot_status(struct hl_device *hdev, u32 status) -{ - switch (status) { - case CPU_BOOT_STATUS_NA: - dev_err(hdev->dev, - "Device boot error - BTL did NOT run\n"); - break; - case CPU_BOOT_STATUS_IN_WFE: - dev_err(hdev->dev, - "Device boot error - Stuck inside WFE loop\n"); - break; - case CPU_BOOT_STATUS_IN_BTL: - dev_err(hdev->dev, - "Device boot error - Stuck in BTL\n"); - break; - case CPU_BOOT_STATUS_IN_PREBOOT: - dev_err(hdev->dev, - "Device boot error - Stuck in Preboot\n"); - break; - case CPU_BOOT_STATUS_IN_SPL: - dev_err(hdev->dev, - "Device boot error - Stuck in SPL\n"); - break; - case CPU_BOOT_STATUS_IN_UBOOT: - dev_err(hdev->dev, - "Device boot error - Stuck in u-boot\n"); - break; - case CPU_BOOT_STATUS_DRAM_INIT_FAIL: - dev_err(hdev->dev, - "Device boot error - DRAM initialization failed\n"); - break; - case CPU_BOOT_STATUS_UBOOT_NOT_READY: - dev_err(hdev->dev, - "Device boot error - u-boot stopped by user\n"); - break; - case CPU_BOOT_STATUS_TS_INIT_FAIL: - dev_err(hdev->dev, - "Device boot error - Thermal Sensor initialization failed\n"); - break; - default: - dev_err(hdev->dev, - "Device boot error - Invalid status code %d\n", - status); - break; - } -} - -int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, - u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, - u32 boot_err0_reg, bool skip_bmc, - u32 cpu_timeout, u32 boot_fit_timeout) -{ - u32 status; - int rc; - - dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n", - cpu_timeout / USEC_PER_SEC); - - /* Wait for boot FIT request */ - rc = hl_poll_timeout( - hdev, - cpu_boot_status_reg, - status, - status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT, - 10000, - boot_fit_timeout); - - if (rc) { - dev_dbg(hdev->dev, - "No boot fit request received, resuming boot\n"); - } else { - rc = hdev->asic_funcs->load_boot_fit_to_device(hdev); - if (rc) - goto out; - - /* Clear device CPU message status */ - WREG32(cpu_msg_status_reg, CPU_MSG_CLR); - - /* Signal device CPU that boot loader is ready */ - WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY); - - /* Poll for CPU device ack */ - rc = hl_poll_timeout( - hdev, - cpu_msg_status_reg, - status, - status == CPU_MSG_OK, - 10000, - boot_fit_timeout); - - if (rc) { - dev_err(hdev->dev, - "Timeout waiting for boot fit load ack\n"); - goto out; - } - - /* Clear message */ - WREG32(msg_to_cpu_reg, KMD_MSG_NA); - } - - /* Make sure CPU boot-loader is running */ - rc = hl_poll_timeout( - hdev, - cpu_boot_status_reg, - status, - (status == CPU_BOOT_STATUS_DRAM_RDY) || - (status == CPU_BOOT_STATUS_NIC_FW_RDY) || - (status == CPU_BOOT_STATUS_READY_TO_BOOT) || - (status == CPU_BOOT_STATUS_SRAM_AVAIL), - 10000, - cpu_timeout); - - /* Read U-Boot, preboot versions now in case we will later fail */ - hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT); - hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT); - - /* Some of the status codes below are deprecated in newer f/w - * versions but we keep them here for backward compatibility - */ - if (rc) { - hl_detect_cpu_boot_status(hdev, status); - rc = -EIO; - goto out; - } - - if (!hdev->fw_loading) { - dev_info(hdev->dev, "Skip loading FW\n"); - goto out; - } - - if (status == CPU_BOOT_STATUS_SRAM_AVAIL) - goto out; - - dev_info(hdev->dev, - "Loading firmware to device, may take some time...\n"); - - rc = hdev->asic_funcs->load_firmware_to_device(hdev); - if (rc) - goto out; - - if (skip_bmc) { - WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC); - - rc = hl_poll_timeout( - hdev, - cpu_boot_status_reg, - status, - (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED), - 10000, - cpu_timeout); - - if (rc) { - dev_err(hdev->dev, - "Failed to get ACK on skipping BMC, %d\n", - status); - WREG32(msg_to_cpu_reg, KMD_MSG_NA); - rc = -EIO; - goto out; - } - } - - WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY); - - rc = hl_poll_timeout( - hdev, - cpu_boot_status_reg, - status, - (status == CPU_BOOT_STATUS_SRAM_AVAIL), - 10000, - cpu_timeout); - - /* Clear message */ - WREG32(msg_to_cpu_reg, KMD_MSG_NA); - - if (rc) { - if (status == CPU_BOOT_STATUS_FIT_CORRUPTED) - dev_err(hdev->dev, - "Device reports FIT image is corrupted\n"); - else - dev_err(hdev->dev, - "Failed to load firmware to device, %d\n", - status); - - rc = -EIO; - goto out; - } - - dev_info(hdev->dev, "Successfully loaded firmware to device\n"); - -out: - fw_read_errors(hdev, boot_err0_reg); - - return rc; -} diff --git a/drivers/misc/habanalabs/gaudi/Makefile b/drivers/misc/habanalabs/gaudi/Makefile index f802cdc980ca..75104ae74e2b 100644 --- a/drivers/misc/habanalabs/gaudi/Makefile +++ b/drivers/misc/habanalabs/gaudi/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -subdir-ccflags-y += -I$(src) +subdir-ccflags-y += -I$(src)/common HL_GAUDI_FILES := gaudi/gaudi.o gaudi/gaudi_hwmgr.o gaudi/gaudi_security.o \ gaudi/gaudi_coresight.o diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index bdc5f96085a7..a94ab6a180f0 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -10,7 +10,7 @@ #include #include "habanalabs.h" -#include "include/hl_boot_if.h" +#include "include/common/hl_boot_if.h" #include "include/gaudi/gaudi_packets.h" #include "include/gaudi/gaudi.h" #include "include/gaudi/gaudi_async_events.h" diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 8265cc21b45a..9e674cf39fd9 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -10,7 +10,7 @@ #include #include "habanalabs.h" -#include "include/hl_boot_if.h" +#include "include/common/hl_boot_if.h" #include "include/goya/goya_packets.h" #include "include/goya/goya.h" #include "include/goya/goya_async_events.h" diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h deleted file mode 100644 index 14def0d26d2d..000000000000 --- a/drivers/misc/habanalabs/habanalabs.h +++ /dev/null @@ -1,1948 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright 2016-2019 HabanaLabs, Ltd. - * All Rights Reserved. - * - */ - -#ifndef HABANALABSP_H_ -#define HABANALABSP_H_ - -#include "include/armcp_if.h" -#include "include/qman_if.h" -#include - -#include -#include -#include -#include -#include -#include -#include - -#define HL_NAME "habanalabs" - -#define HL_MMAP_CB_MASK (0x8000000000000000ull >> PAGE_SHIFT) - -#define HL_PENDING_RESET_PER_SEC 30 - -#define HL_HARD_RESET_MAX_TIMEOUT 120 - -#define HL_DEVICE_TIMEOUT_USEC 1000000 /* 1 s */ - -#define HL_HEARTBEAT_PER_USEC 5000000 /* 5 s */ - -#define HL_PLL_LOW_JOB_FREQ_USEC 5000000 /* 5 s */ - -#define HL_ARMCP_INFO_TIMEOUT_USEC 10000000 /* 10s */ -#define HL_ARMCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */ - -#define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */ - -#define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */ - -#define HL_IDLE_BUSY_TS_ARR_SIZE 4096 - -/* Memory */ -#define MEM_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ - -/* MMU */ -#define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ - -/* - * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream - * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream - */ -#define HL_RSVD_SOBS 4 -#define HL_RSVD_MONS 2 - -#define HL_RSVD_SOBS_IN_USE 2 -#define HL_RSVD_MONS_IN_USE 1 - -#define HL_MAX_SOB_VAL (1 << 15) - -#define IS_POWER_OF_2(n) (n != 0 && ((n & (n - 1)) == 0)) -#define IS_MAX_PENDING_CS_VALID(n) (IS_POWER_OF_2(n) && (n > 1)) - -#define HL_PCI_NUM_BARS 6 - -/** - * struct pgt_info - MMU hop page info. - * @node: hash linked-list node for the pgts shadow hash of pgts. - * @phys_addr: physical address of the pgt. - * @shadow_addr: shadow hop in the host. - * @ctx: pointer to the owner ctx. - * @num_of_ptes: indicates how many ptes are used in the pgt. - * - * The MMU page tables hierarchy is placed on the DRAM. When a new level (hop) - * is needed during mapping, a new page is allocated and this structure holds - * its essential information. During unmapping, if no valid PTEs remained in the - * page, it is freed with its pgt_info structure. - */ -struct pgt_info { - struct hlist_node node; - u64 phys_addr; - u64 shadow_addr; - struct hl_ctx *ctx; - int num_of_ptes; -}; - -struct hl_device; -struct hl_fpriv; - -/** - * enum hl_pci_match_mode - pci match mode per region - * @PCI_ADDRESS_MATCH_MODE: address match mode - * @PCI_BAR_MATCH_MODE: bar match mode - */ -enum hl_pci_match_mode { - PCI_ADDRESS_MATCH_MODE, - PCI_BAR_MATCH_MODE -}; - -/** - * enum hl_fw_component - F/W components to read version through registers. - * @FW_COMP_UBOOT: u-boot. - * @FW_COMP_PREBOOT: preboot. - */ -enum hl_fw_component { - FW_COMP_UBOOT, - FW_COMP_PREBOOT -}; - -/** - * enum hl_queue_type - Supported QUEUE types. - * @QUEUE_TYPE_NA: queue is not available. - * @QUEUE_TYPE_EXT: external queue which is a DMA channel that may access the - * host. - * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's - * memories and/or operates the compute engines. - * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU. - * @QUEUE_TYPE_HW: queue of DMA and compute engines jobs, for which completion - * notifications are sent by H/W. - */ -enum hl_queue_type { - QUEUE_TYPE_NA, - QUEUE_TYPE_EXT, - QUEUE_TYPE_INT, - QUEUE_TYPE_CPU, - QUEUE_TYPE_HW -}; - -enum hl_cs_type { - CS_TYPE_DEFAULT, - CS_TYPE_SIGNAL, - CS_TYPE_WAIT -}; - -/* - * struct hl_inbound_pci_region - inbound region descriptor - * @mode: pci match mode for this region - * @addr: region target address - * @size: region size in bytes - * @offset_in_bar: offset within bar (address match mode) - * @bar: bar id - */ -struct hl_inbound_pci_region { - enum hl_pci_match_mode mode; - u64 addr; - u64 size; - u64 offset_in_bar; - u8 bar; -}; - -/* - * struct hl_outbound_pci_region - outbound region descriptor - * @addr: region target address - * @size: region size in bytes - */ -struct hl_outbound_pci_region { - u64 addr; - u64 size; -}; - -/* - * struct hl_hw_sob - H/W SOB info. - * @hdev: habanalabs device structure. - * @kref: refcount of this SOB. The SOB will reset once the refcount is zero. - * @sob_id: id of this SOB. - * @q_idx: the H/W queue that uses this SOB. - */ -struct hl_hw_sob { - struct hl_device *hdev; - struct kref kref; - u32 sob_id; - u32 q_idx; -}; - -/** - * struct hw_queue_properties - queue information. - * @type: queue type. - * @driver_only: true if only the driver is allowed to send a job to this queue, - * false otherwise. - * @requires_kernel_cb: true if a CB handle must be provided for jobs on this - * queue, false otherwise (a CB address must be provided). - * @supports_sync_stream: True if queue supports sync stream - */ -struct hw_queue_properties { - enum hl_queue_type type; - u8 driver_only; - u8 requires_kernel_cb; - u8 supports_sync_stream; -}; - -/** - * enum vm_type_t - virtual memory mapping request information. - * @VM_TYPE_USERPTR: mapping of user memory to device virtual address. - * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address. - */ -enum vm_type_t { - VM_TYPE_USERPTR = 0x1, - VM_TYPE_PHYS_PACK = 0x2 -}; - -/** - * enum hl_device_hw_state - H/W device state. use this to understand whether - * to do reset before hw_init or not - * @HL_DEVICE_HW_STATE_CLEAN: H/W state is clean. i.e. after hard reset - * @HL_DEVICE_HW_STATE_DIRTY: H/W state is dirty. i.e. we started to execute - * hw_init - */ -enum hl_device_hw_state { - HL_DEVICE_HW_STATE_CLEAN = 0, - HL_DEVICE_HW_STATE_DIRTY -}; - -/** - * struct hl_mmu_properties - ASIC specific MMU address translation properties. - * @start_addr: virtual start address of the memory region. - * @end_addr: virtual end address of the memory region. - * @hop0_shift: shift of hop 0 mask. - * @hop1_shift: shift of hop 1 mask. - * @hop2_shift: shift of hop 2 mask. - * @hop3_shift: shift of hop 3 mask. - * @hop4_shift: shift of hop 4 mask. - * @hop0_mask: mask to get the PTE address in hop 0. - * @hop1_mask: mask to get the PTE address in hop 1. - * @hop2_mask: mask to get the PTE address in hop 2. - * @hop3_mask: mask to get the PTE address in hop 3. - * @hop4_mask: mask to get the PTE address in hop 4. - * @page_size: default page size used to allocate memory. - */ -struct hl_mmu_properties { - u64 start_addr; - u64 end_addr; - u64 hop0_shift; - u64 hop1_shift; - u64 hop2_shift; - u64 hop3_shift; - u64 hop4_shift; - u64 hop0_mask; - u64 hop1_mask; - u64 hop2_mask; - u64 hop3_mask; - u64 hop4_mask; - u32 page_size; -}; - -/** - * struct asic_fixed_properties - ASIC specific immutable properties. - * @hw_queues_props: H/W queues properties. - * @armcp_info: received various information from ArmCP regarding the H/W, e.g. - * available sensors. - * @uboot_ver: F/W U-boot version. - * @preboot_ver: F/W Preboot version. - * @dmmu: DRAM MMU address translation properties. - * @pmmu: PCI (host) MMU address translation properties. - * @pmmu_huge: PCI (host) MMU address translation properties for memory - * allocated with huge pages. - * @sram_base_address: SRAM physical start address. - * @sram_end_address: SRAM physical end address. - * @sram_user_base_address - SRAM physical start address for user access. - * @dram_base_address: DRAM physical start address. - * @dram_end_address: DRAM physical end address. - * @dram_user_base_address: DRAM physical start address for user access. - * @dram_size: DRAM total size. - * @dram_pci_bar_size: size of PCI bar towards DRAM. - * @max_power_default: max power of the device after reset - * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page - * fault. - * @pcie_dbi_base_address: Base address of the PCIE_DBI block. - * @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register. - * @mmu_pgt_addr: base physical address in DRAM of MMU page tables. - * @mmu_dram_default_page_addr: DRAM default page physical address. - * @mmu_pgt_size: MMU page tables total size. - * @mmu_pte_size: PTE size in MMU page tables. - * @mmu_hop_table_size: MMU hop table size. - * @mmu_hop0_tables_total_size: total size of MMU hop0 tables. - * @dram_page_size: page size for MMU DRAM allocation. - * @cfg_size: configuration space size on SRAM. - * @sram_size: total size of SRAM. - * @max_asid: maximum number of open contexts (ASIDs). - * @num_of_events: number of possible internal H/W IRQs. - * @psoc_pci_pll_nr: PCI PLL NR value. - * @psoc_pci_pll_nf: PCI PLL NF value. - * @psoc_pci_pll_od: PCI PLL OD value. - * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value. - * @psoc_timestamp_frequency: frequency of the psoc timestamp clock. - * @high_pll: high PLL frequency used by the device. - * @cb_pool_cb_cnt: number of CBs in the CB pool. - * @cb_pool_cb_size: size of each CB in the CB pool. - * @max_pending_cs: maximum of concurrent pending command submissions - * @max_queues: maximum amount of queues in the system - * @sync_stream_first_sob: first sync object available for sync stream use - * @sync_stream_first_mon: first monitor available for sync stream use - * @tpc_enabled_mask: which TPCs are enabled. - * @completion_queues_count: number of completion queues. - */ -struct asic_fixed_properties { - struct hw_queue_properties *hw_queues_props; - struct armcp_info armcp_info; - char uboot_ver[VERSION_MAX_LEN]; - char preboot_ver[VERSION_MAX_LEN]; - struct hl_mmu_properties dmmu; - struct hl_mmu_properties pmmu; - struct hl_mmu_properties pmmu_huge; - u64 sram_base_address; - u64 sram_end_address; - u64 sram_user_base_address; - u64 dram_base_address; - u64 dram_end_address; - u64 dram_user_base_address; - u64 dram_size; - u64 dram_pci_bar_size; - u64 max_power_default; - u64 dram_size_for_default_page_mapping; - u64 pcie_dbi_base_address; - u64 pcie_aux_dbi_reg_addr; - u64 mmu_pgt_addr; - u64 mmu_dram_default_page_addr; - u32 mmu_pgt_size; - u32 mmu_pte_size; - u32 mmu_hop_table_size; - u32 mmu_hop0_tables_total_size; - u32 dram_page_size; - u32 cfg_size; - u32 sram_size; - u32 max_asid; - u32 num_of_events; - u32 psoc_pci_pll_nr; - u32 psoc_pci_pll_nf; - u32 psoc_pci_pll_od; - u32 psoc_pci_pll_div_factor; - u32 psoc_timestamp_frequency; - u32 high_pll; - u32 cb_pool_cb_cnt; - u32 cb_pool_cb_size; - u32 max_pending_cs; - u32 max_queues; - u16 sync_stream_first_sob; - u16 sync_stream_first_mon; - u8 tpc_enabled_mask; - u8 completion_queues_count; -}; - -/** - * struct hl_cs_compl - command submission completion object. - * @base_fence: kernel fence object. - * @lock: spinlock to protect fence. - * @hdev: habanalabs device structure. - * @hw_sob: the H/W SOB used in this signal/wait CS. - * @cs_seq: command submission sequence number. - * @type: type of the CS - signal/wait. - * @sob_val: the SOB value that is used in this signal/wait CS. - */ -struct hl_cs_compl { - struct dma_fence base_fence; - spinlock_t lock; - struct hl_device *hdev; - struct hl_hw_sob *hw_sob; - u64 cs_seq; - enum hl_cs_type type; - u16 sob_val; -}; - -/* - * Command Buffers - */ - -/** - * struct hl_cb_mgr - describes a Command Buffer Manager. - * @cb_lock: protects cb_handles. - * @cb_handles: an idr to hold all command buffer handles. - */ -struct hl_cb_mgr { - spinlock_t cb_lock; - struct idr cb_handles; /* protected by cb_lock */ -}; - -/** - * struct hl_cb - describes a Command Buffer. - * @refcount: reference counter for usage of the CB. - * @hdev: pointer to device this CB belongs to. - * @lock: spinlock to protect mmap/cs flows. - * @debugfs_list: node in debugfs list of command buffers. - * @pool_list: node in pool list of command buffers. - * @kernel_address: Holds the CB's kernel virtual address. - * @bus_address: Holds the CB's DMA address. - * @mmap_size: Holds the CB's size that was mmaped. - * @size: holds the CB's size. - * @id: the CB's ID. - * @cs_cnt: holds number of CS that this CB participates in. - * @ctx_id: holds the ID of the owner's context. - * @mmap: true if the CB is currently mmaped to user. - * @is_pool: true if CB was acquired from the pool, false otherwise. - */ -struct hl_cb { - struct kref refcount; - struct hl_device *hdev; - spinlock_t lock; - struct list_head debugfs_list; - struct list_head pool_list; - u64 kernel_address; - dma_addr_t bus_address; - u32 mmap_size; - u32 size; - u32 id; - u32 cs_cnt; - u32 ctx_id; - u8 mmap; - u8 is_pool; -}; - - -/* - * QUEUES - */ - -struct hl_cs_job; - -/* Queue length of external and HW queues */ -#define HL_QUEUE_LENGTH 4096 -#define HL_QUEUE_SIZE_IN_BYTES (HL_QUEUE_LENGTH * HL_BD_SIZE) - -#if (HL_MAX_JOBS_PER_CS > HL_QUEUE_LENGTH) -#error "HL_QUEUE_LENGTH must be greater than HL_MAX_JOBS_PER_CS" -#endif - -/* HL_CQ_LENGTH is in units of struct hl_cq_entry */ -#define HL_CQ_LENGTH HL_QUEUE_LENGTH -#define HL_CQ_SIZE_IN_BYTES (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE) - -/* Must be power of 2 */ -#define HL_EQ_LENGTH 64 -#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE) - -/* Host <-> ArmCP shared memory size */ -#define HL_CPU_ACCESSIBLE_MEM_SIZE SZ_2M - -/** - * struct hl_hw_queue - describes a H/W transport queue. - * @hw_sob: array of the used H/W SOBs by this H/W queue. - * @shadow_queue: pointer to a shadow queue that holds pointers to jobs. - * @queue_type: type of queue. - * @kernel_address: holds the queue's kernel virtual address. - * @bus_address: holds the queue's DMA address. - * @pi: holds the queue's pi value. - * @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci). - * @hw_queue_id: the id of the H/W queue. - * @cq_id: the id for the corresponding CQ for this H/W queue. - * @msi_vec: the IRQ number of the H/W queue. - * @int_queue_len: length of internal queue (number of entries). - * @next_sob_val: the next value to use for the currently used SOB. - * @base_sob_id: the base SOB id of the SOBs used by this queue. - * @base_mon_id: the base MON id of the MONs used by this queue. - * @valid: is the queue valid (we have array of 32 queues, not all of them - * exist). - * @curr_sob_offset: the id offset to the currently used SOB from the - * HL_RSVD_SOBS that are being used by this queue. - * @supports_sync_stream: True if queue supports sync stream - */ -struct hl_hw_queue { - struct hl_hw_sob hw_sob[HL_RSVD_SOBS]; - struct hl_cs_job **shadow_queue; - enum hl_queue_type queue_type; - u64 kernel_address; - dma_addr_t bus_address; - u32 pi; - atomic_t ci; - u32 hw_queue_id; - u32 cq_id; - u32 msi_vec; - u16 int_queue_len; - u16 next_sob_val; - u16 base_sob_id; - u16 base_mon_id; - u8 valid; - u8 curr_sob_offset; - u8 supports_sync_stream; -}; - -/** - * struct hl_cq - describes a completion queue - * @hdev: pointer to the device structure - * @kernel_address: holds the queue's kernel virtual address - * @bus_address: holds the queue's DMA address - * @cq_idx: completion queue index in array - * @hw_queue_id: the id of the matching H/W queue - * @ci: ci inside the queue - * @pi: pi inside the queue - * @free_slots_cnt: counter of free slots in queue - */ -struct hl_cq { - struct hl_device *hdev; - u64 kernel_address; - dma_addr_t bus_address; - u32 cq_idx; - u32 hw_queue_id; - u32 ci; - u32 pi; - atomic_t free_slots_cnt; -}; - -/** - * struct hl_eq - describes the event queue (single one per device) - * @hdev: pointer to the device structure - * @kernel_address: holds the queue's kernel virtual address - * @bus_address: holds the queue's DMA address - * @ci: ci inside the queue - */ -struct hl_eq { - struct hl_device *hdev; - u64 kernel_address; - dma_addr_t bus_address; - u32 ci; -}; - - -/* - * ASICs - */ - -/** - * enum hl_asic_type - supported ASIC types. - * @ASIC_INVALID: Invalid ASIC type. - * @ASIC_GOYA: Goya device. - * @ASIC_GAUDI: Gaudi device. - */ -enum hl_asic_type { - ASIC_INVALID, - ASIC_GOYA, - ASIC_GAUDI -}; - -struct hl_cs_parser; - -/** - * enum hl_pm_mng_profile - power management profile. - * @PM_AUTO: internal clock is set by the Linux driver. - * @PM_MANUAL: internal clock is set by the user. - * @PM_LAST: last power management type. - */ -enum hl_pm_mng_profile { - PM_AUTO = 1, - PM_MANUAL, - PM_LAST -}; - -/** - * enum hl_pll_frequency - PLL frequency. - * @PLL_HIGH: high frequency. - * @PLL_LOW: low frequency. - * @PLL_LAST: last frequency values that were configured by the user. - */ -enum hl_pll_frequency { - PLL_HIGH = 1, - PLL_LOW, - PLL_LAST -}; - -#define PLL_REF_CLK 50 - -enum div_select_defs { - DIV_SEL_REF_CLK = 0, - DIV_SEL_PLL_CLK = 1, - DIV_SEL_DIVIDED_REF = 2, - DIV_SEL_DIVIDED_PLL = 3, -}; - -/** - * struct hl_asic_funcs - ASIC specific functions that are can be called from - * common code. - * @early_init: sets up early driver state (pre sw_init), doesn't configure H/W. - * @early_fini: tears down what was done in early_init. - * @late_init: sets up late driver/hw state (post hw_init) - Optional. - * @late_fini: tears down what was done in late_init (pre hw_fini) - Optional. - * @sw_init: sets up driver state, does not configure H/W. - * @sw_fini: tears down driver state, does not configure H/W. - * @hw_init: sets up the H/W state. - * @hw_fini: tears down the H/W state. - * @halt_engines: halt engines, needed for reset sequence. This also disables - * interrupts from the device. Should be called before - * hw_fini and before CS rollback. - * @suspend: handles IP specific H/W or SW changes for suspend. - * @resume: handles IP specific H/W or SW changes for resume. - * @cb_mmap: maps a CB. - * @ring_doorbell: increment PI on a given QMAN. - * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific - * function because the PQs are located in different memory areas - * per ASIC (SRAM, DRAM, Host memory) and therefore, the method of - * writing the PQE must match the destination memory area - * properties. - * @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling - * dma_alloc_coherent(). This is ASIC function because - * its implementation is not trivial when the driver - * is loaded in simulation mode (not upstreamed). - * @asic_dma_free_coherent: Free coherent DMA memory by calling - * dma_free_coherent(). This is ASIC function because - * its implementation is not trivial when the driver - * is loaded in simulation mode (not upstreamed). - * @get_int_queue_base: get the internal queue base address. - * @test_queues: run simple test on all queues for sanity check. - * @asic_dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool. - * size of allocation is HL_DMA_POOL_BLK_SIZE. - * @asic_dma_pool_free: free small DMA allocation from pool. - * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool. - * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool. - * @hl_dma_unmap_sg: DMA unmap scatter-gather list. - * @cs_parser: parse Command Submission. - * @asic_dma_map_sg: DMA map scatter-gather list. - * @get_dma_desc_list_size: get number of LIN_DMA packets required for CB. - * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it. - * @update_eq_ci: update event queue CI. - * @context_switch: called upon ASID context switch. - * @restore_phase_topology: clear all SOBs amd MONs. - * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM. - * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM. - * @add_device_attr: add ASIC specific device attributes. - * @handle_eqe: handle event queue entry (IRQ) from ArmCP. - * @set_pll_profile: change PLL profile (manual/automatic). - * @get_events_stat: retrieve event queue entries histogram. - * @read_pte: read MMU page table entry from DRAM. - * @write_pte: write MMU page table entry to DRAM. - * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft - * (L1 only) or hard (L0 & L1) flush. - * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with - * ASID-VA-size mask. - * @send_heartbeat: send is-alive packet to ArmCP and verify response. - * @enable_clock_gating: enable clock gating for reducing power consumption. - * @disable_clock_gating: disable clock for accessing registers on HBW. - * @debug_coresight: perform certain actions on Coresight for debugging. - * @is_device_idle: return true if device is idle, false otherwise. - * @soft_reset_late_init: perform certain actions needed after soft reset. - * @hw_queues_lock: acquire H/W queues lock. - * @hw_queues_unlock: release H/W queues lock. - * @get_pci_id: retrieve PCI ID. - * @get_eeprom_data: retrieve EEPROM data from F/W. - * @send_cpu_message: send buffer to ArmCP. - * @get_hw_state: retrieve the H/W state - * @pci_bars_map: Map PCI BARs. - * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns - * old address the bar pointed to or U64_MAX for failure - * @init_iatu: Initialize the iATU unit inside the PCI controller. - * @rreg: Read a register. Needed for simulator support. - * @wreg: Write a register. Needed for simulator support. - * @halt_coresight: stop the ETF and ETR traces. - * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz - * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index. - * @read_device_fw_version: read the device's firmware versions that are - * contained in registers - * @load_firmware_to_device: load the firmware to the device's memory - * @load_boot_fit_to_device: load boot fit to device's memory - * @get_signal_cb_size: Get signal CB size. - * @get_wait_cb_size: Get wait CB size. - * @gen_signal_cb: Generate a signal CB. - * @gen_wait_cb: Generate a wait CB. - * @reset_sob: Reset a SOB. - * @set_dma_mask_from_fw: set the DMA mask in the driver according to the - * firmware configuration - * @get_device_time: Get the device time. - */ -struct hl_asic_funcs { - int (*early_init)(struct hl_device *hdev); - int (*early_fini)(struct hl_device *hdev); - int (*late_init)(struct hl_device *hdev); - void (*late_fini)(struct hl_device *hdev); - int (*sw_init)(struct hl_device *hdev); - int (*sw_fini)(struct hl_device *hdev); - int (*hw_init)(struct hl_device *hdev); - void (*hw_fini)(struct hl_device *hdev, bool hard_reset); - void (*halt_engines)(struct hl_device *hdev, bool hard_reset); - int (*suspend)(struct hl_device *hdev); - int (*resume)(struct hl_device *hdev); - int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma, - u64 kaddress, phys_addr_t paddress, u32 size); - void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi); - void (*pqe_write)(struct hl_device *hdev, __le64 *pqe, - struct hl_bd *bd); - void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size, - dma_addr_t *dma_handle, gfp_t flag); - void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size, - void *cpu_addr, dma_addr_t dma_handle); - void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id, - dma_addr_t *dma_handle, u16 *queue_len); - int (*test_queues)(struct hl_device *hdev); - void* (*asic_dma_pool_zalloc)(struct hl_device *hdev, size_t size, - gfp_t mem_flags, dma_addr_t *dma_handle); - void (*asic_dma_pool_free)(struct hl_device *hdev, void *vaddr, - dma_addr_t dma_addr); - void* (*cpu_accessible_dma_pool_alloc)(struct hl_device *hdev, - size_t size, dma_addr_t *dma_handle); - void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev, - size_t size, void *vaddr); - void (*hl_dma_unmap_sg)(struct hl_device *hdev, - struct scatterlist *sgl, int nents, - enum dma_data_direction dir); - int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser); - int (*asic_dma_map_sg)(struct hl_device *hdev, - struct scatterlist *sgl, int nents, - enum dma_data_direction dir); - u32 (*get_dma_desc_list_size)(struct hl_device *hdev, - struct sg_table *sgt); - void (*add_end_of_cb_packets)(struct hl_device *hdev, - u64 kernel_address, u32 len, - u64 cq_addr, u32 cq_val, u32 msix_num, - bool eb); - void (*update_eq_ci)(struct hl_device *hdev, u32 val); - int (*context_switch)(struct hl_device *hdev, u32 asid); - void (*restore_phase_topology)(struct hl_device *hdev); - int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val); - int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val); - int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val); - int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val); - void (*add_device_attr)(struct hl_device *hdev, - struct attribute_group *dev_attr_grp); - void (*handle_eqe)(struct hl_device *hdev, - struct hl_eq_entry *eq_entry); - void (*set_pll_profile)(struct hl_device *hdev, - enum hl_pll_frequency freq); - void* (*get_events_stat)(struct hl_device *hdev, bool aggregate, - u32 *size); - u64 (*read_pte)(struct hl_device *hdev, u64 addr); - void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val); - int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard, - u32 flags); - int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, - u32 asid, u64 va, u64 size); - int (*send_heartbeat)(struct hl_device *hdev); - void (*enable_clock_gating)(struct hl_device *hdev); - void (*disable_clock_gating)(struct hl_device *hdev); - int (*debug_coresight)(struct hl_device *hdev, void *data); - bool (*is_device_idle)(struct hl_device *hdev, u32 *mask, - struct seq_file *s); - int (*soft_reset_late_init)(struct hl_device *hdev); - void (*hw_queues_lock)(struct hl_device *hdev); - void (*hw_queues_unlock)(struct hl_device *hdev); - u32 (*get_pci_id)(struct hl_device *hdev); - int (*get_eeprom_data)(struct hl_device *hdev, void *data, - size_t max_size); - int (*send_cpu_message)(struct hl_device *hdev, u32 *msg, - u16 len, u32 timeout, long *result); - enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev); - int (*pci_bars_map)(struct hl_device *hdev); - u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr); - int (*init_iatu)(struct hl_device *hdev); - u32 (*rreg)(struct hl_device *hdev, u32 reg); - void (*wreg)(struct hl_device *hdev, u32 reg, u32 val); - void (*halt_coresight)(struct hl_device *hdev); - int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); - u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx); - void (*read_device_fw_version)(struct hl_device *hdev, - enum hl_fw_component fwc); - int (*load_firmware_to_device)(struct hl_device *hdev); - int (*load_boot_fit_to_device)(struct hl_device *hdev); - u32 (*get_signal_cb_size)(struct hl_device *hdev); - u32 (*get_wait_cb_size)(struct hl_device *hdev); - void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id); - void (*gen_wait_cb)(struct hl_device *hdev, void *data, u16 sob_id, - u16 sob_val, u16 mon_id, u32 q_idx); - void (*reset_sob)(struct hl_device *hdev, void *data); - void (*set_dma_mask_from_fw)(struct hl_device *hdev); - u64 (*get_device_time)(struct hl_device *hdev); -}; - - -/* - * CONTEXTS - */ - -#define HL_KERNEL_ASID_ID 0 - -/** - * struct hl_va_range - virtual addresses range. - * @lock: protects the virtual addresses list. - * @list: list of virtual addresses blocks available for mappings. - * @start_addr: range start address. - * @end_addr: range end address. - */ -struct hl_va_range { - struct mutex lock; - struct list_head list; - u64 start_addr; - u64 end_addr; -}; - -/** - * struct hl_ctx - user/kernel context. - * @mem_hash: holds mapping from virtual address to virtual memory area - * descriptor (hl_vm_phys_pg_list or hl_userptr). - * @mmu_shadow_hash: holds a mapping from shadow address to pgt_info structure. - * @hpriv: pointer to the private (Kernel Driver) data of the process (fd). - * @hdev: pointer to the device structure. - * @refcount: reference counter for the context. Context is released only when - * this hits 0l. It is incremented on CS and CS_WAIT. - * @cs_pending: array of DMA fence objects representing pending CS. - * @host_va_range: holds available virtual addresses for host mappings. - * @host_huge_va_range: holds available virtual addresses for host mappings - * with huge pages. - * @dram_va_range: holds available virtual addresses for DRAM mappings. - * @mem_hash_lock: protects the mem_hash. - * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the - * MMU hash or walking the PGT requires talking this lock. - * @debugfs_list: node in debugfs list of contexts. - * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed - * to user so user could inquire about CS. It is used as - * index to cs_pending array. - * @dram_default_hops: array that holds all hops addresses needed for default - * DRAM mapping. - * @cs_lock: spinlock to protect cs_sequence. - * @dram_phys_mem: amount of used physical DRAM memory by this context. - * @thread_ctx_switch_token: token to prevent multiple threads of the same - * context from running the context switch phase. - * Only a single thread should run it. - * @thread_ctx_switch_wait_token: token to prevent the threads that didn't run - * the context switch phase from moving to their - * execution phase before the context switch phase - * has finished. - * @asid: context's unique address space ID in the device's MMU. - * @handle: context's opaque handle for user - */ -struct hl_ctx { - DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS); - DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS); - struct hl_fpriv *hpriv; - struct hl_device *hdev; - struct kref refcount; - struct dma_fence **cs_pending; - struct hl_va_range *host_va_range; - struct hl_va_range *host_huge_va_range; - struct hl_va_range *dram_va_range; - struct mutex mem_hash_lock; - struct mutex mmu_lock; - struct list_head debugfs_list; - struct hl_cs_counters cs_counters; - u64 cs_sequence; - u64 *dram_default_hops; - spinlock_t cs_lock; - atomic64_t dram_phys_mem; - atomic_t thread_ctx_switch_token; - u32 thread_ctx_switch_wait_token; - u32 asid; - u32 handle; -}; - -/** - * struct hl_ctx_mgr - for handling multiple contexts. - * @ctx_lock: protects ctx_handles. - * @ctx_handles: idr to hold all ctx handles. - */ -struct hl_ctx_mgr { - struct mutex ctx_lock; - struct idr ctx_handles; -}; - - - -/* - * COMMAND SUBMISSIONS - */ - -/** - * struct hl_userptr - memory mapping chunk information - * @vm_type: type of the VM. - * @job_node: linked-list node for hanging the object on the Job's list. - * @vec: pointer to the frame vector. - * @sgt: pointer to the scatter-gather table that holds the pages. - * @dir: for DMA unmapping, the direction must be supplied, so save it. - * @debugfs_list: node in debugfs list of command submissions. - * @addr: user-space virtual address of the start of the memory area. - * @size: size of the memory area to pin & map. - * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise. - */ -struct hl_userptr { - enum vm_type_t vm_type; /* must be first */ - struct list_head job_node; - struct frame_vector *vec; - struct sg_table *sgt; - enum dma_data_direction dir; - struct list_head debugfs_list; - u64 addr; - u32 size; - u8 dma_mapped; -}; - -/** - * struct hl_cs - command submission. - * @jobs_in_queue_cnt: per each queue, maintain counter of submitted jobs. - * @ctx: the context this CS belongs to. - * @job_list: list of the CS's jobs in the various queues. - * @job_lock: spinlock for the CS's jobs list. Needed for free_job. - * @refcount: reference counter for usage of the CS. - * @fence: pointer to the fence object of this CS. - * @signal_fence: pointer to the fence object of the signal CS (used by wait - * CS only). - * @finish_work: workqueue object to run when CS is completed by H/W. - * @work_tdr: delayed work node for TDR. - * @mirror_node : node in device mirror list of command submissions. - * @debugfs_list: node in debugfs list of command submissions. - * @sequence: the sequence number of this CS. - * @type: CS_TYPE_*. - * @submitted: true if CS was submitted to H/W. - * @completed: true if CS was completed by device. - * @timedout : true if CS was timedout. - * @tdr_active: true if TDR was activated for this CS (to prevent - * double TDR activation). - * @aborted: true if CS was aborted due to some device error. - */ -struct hl_cs { - u16 *jobs_in_queue_cnt; - struct hl_ctx *ctx; - struct list_head job_list; - spinlock_t job_lock; - struct kref refcount; - struct dma_fence *fence; - struct dma_fence *signal_fence; - struct work_struct finish_work; - struct delayed_work work_tdr; - struct list_head mirror_node; - struct list_head debugfs_list; - u64 sequence; - enum hl_cs_type type; - u8 submitted; - u8 completed; - u8 timedout; - u8 tdr_active; - u8 aborted; -}; - -/** - * struct hl_cs_job - command submission job. - * @cs_node: the node to hang on the CS jobs list. - * @cs: the CS this job belongs to. - * @user_cb: the CB we got from the user. - * @patched_cb: in case of patching, this is internal CB which is submitted on - * the queue instead of the CB we got from the IOCTL. - * @finish_work: workqueue object to run when job is completed. - * @userptr_list: linked-list of userptr mappings that belong to this job and - * wait for completion. - * @debugfs_list: node in debugfs list of command submission jobs. - * @queue_type: the type of the H/W queue this job is submitted to. - * @id: the id of this job inside a CS. - * @hw_queue_id: the id of the H/W queue this job is submitted to. - * @user_cb_size: the actual size of the CB we got from the user. - * @job_cb_size: the actual size of the CB that we put on the queue. - * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a - * handle to a kernel-allocated CB object, false - * otherwise (SRAM/DRAM/host address). - * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This - * info is needed later, when adding the 2xMSG_PROT at the - * end of the JOB, to know which barriers to put in the - * MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't - * have streams so the engine can't be busy by another - * stream. - */ -struct hl_cs_job { - struct list_head cs_node; - struct hl_cs *cs; - struct hl_cb *user_cb; - struct hl_cb *patched_cb; - struct work_struct finish_work; - struct list_head userptr_list; - struct list_head debugfs_list; - enum hl_queue_type queue_type; - u32 id; - u32 hw_queue_id; - u32 user_cb_size; - u32 job_cb_size; - u8 is_kernel_allocated_cb; - u8 contains_dma_pkt; -}; - -/** - * struct hl_cs_parser - command submission parser properties. - * @user_cb: the CB we got from the user. - * @patched_cb: in case of patching, this is internal CB which is submitted on - * the queue instead of the CB we got from the IOCTL. - * @job_userptr_list: linked-list of userptr mappings that belong to the related - * job and wait for completion. - * @cs_sequence: the sequence number of the related CS. - * @queue_type: the type of the H/W queue this job is submitted to. - * @ctx_id: the ID of the context the related CS belongs to. - * @hw_queue_id: the id of the H/W queue this job is submitted to. - * @user_cb_size: the actual size of the CB we got from the user. - * @patched_cb_size: the size of the CB after parsing. - * @job_id: the id of the related job inside the related CS. - * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a - * handle to a kernel-allocated CB object, false - * otherwise (SRAM/DRAM/host address). - * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This - * info is needed later, when adding the 2xMSG_PROT at the - * end of the JOB, to know which barriers to put in the - * MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't - * have streams so the engine can't be busy by another - * stream. - */ -struct hl_cs_parser { - struct hl_cb *user_cb; - struct hl_cb *patched_cb; - struct list_head *job_userptr_list; - u64 cs_sequence; - enum hl_queue_type queue_type; - u32 ctx_id; - u32 hw_queue_id; - u32 user_cb_size; - u32 patched_cb_size; - u8 job_id; - u8 is_kernel_allocated_cb; - u8 contains_dma_pkt; -}; - - -/* - * MEMORY STRUCTURE - */ - -/** - * struct hl_vm_hash_node - hash element from virtual address to virtual - * memory area descriptor (hl_vm_phys_pg_list or - * hl_userptr). - * @node: node to hang on the hash table in context object. - * @vaddr: key virtual address. - * @ptr: value pointer (hl_vm_phys_pg_list or hl_userptr). - */ -struct hl_vm_hash_node { - struct hlist_node node; - u64 vaddr; - void *ptr; -}; - -/** - * struct hl_vm_phys_pg_pack - physical page pack. - * @vm_type: describes the type of the virtual area descriptor. - * @pages: the physical page array. - * @npages: num physical pages in the pack. - * @total_size: total size of all the pages in this list. - * @mapping_cnt: number of shared mappings. - * @asid: the context related to this list. - * @page_size: size of each page in the pack. - * @flags: HL_MEM_* flags related to this list. - * @handle: the provided handle related to this list. - * @offset: offset from the first page. - * @contiguous: is contiguous physical memory. - * @created_from_userptr: is product of host virtual address. - */ -struct hl_vm_phys_pg_pack { - enum vm_type_t vm_type; /* must be first */ - u64 *pages; - u64 npages; - u64 total_size; - atomic_t mapping_cnt; - u32 asid; - u32 page_size; - u32 flags; - u32 handle; - u32 offset; - u8 contiguous; - u8 created_from_userptr; -}; - -/** - * struct hl_vm_va_block - virtual range block information. - * @node: node to hang on the virtual range list in context object. - * @start: virtual range start address. - * @end: virtual range end address. - * @size: virtual range size. - */ -struct hl_vm_va_block { - struct list_head node; - u64 start; - u64 end; - u64 size; -}; - -/** - * struct hl_vm - virtual memory manager for MMU. - * @dram_pg_pool: pool for DRAM physical pages of 2MB. - * @dram_pg_pool_refcount: reference counter for the pool usage. - * @idr_lock: protects the phys_pg_list_handles. - * @phys_pg_pack_handles: idr to hold all device allocations handles. - * @init_done: whether initialization was done. We need this because VM - * initialization might be skipped during device initialization. - */ -struct hl_vm { - struct gen_pool *dram_pg_pool; - struct kref dram_pg_pool_refcount; - spinlock_t idr_lock; - struct idr phys_pg_pack_handles; - u8 init_done; -}; - - -/* - * DEBUG, PROFILING STRUCTURE - */ - -/** - * struct hl_debug_params - Coresight debug parameters. - * @input: pointer to component specific input parameters. - * @output: pointer to component specific output parameters. - * @output_size: size of output buffer. - * @reg_idx: relevant register ID. - * @op: component operation to execute. - * @enable: true if to enable component debugging, false otherwise. - */ -struct hl_debug_params { - void *input; - void *output; - u32 output_size; - u32 reg_idx; - u32 op; - bool enable; -}; - -/* - * FILE PRIVATE STRUCTURE - */ - -/** - * struct hl_fpriv - process information stored in FD private data. - * @hdev: habanalabs device structure. - * @filp: pointer to the given file structure. - * @taskpid: current process ID. - * @ctx: current executing context. TODO: remove for multiple ctx per process - * @ctx_mgr: context manager to handle multiple context for this FD. - * @cb_mgr: command buffer manager to handle multiple buffers for this FD. - * @debugfs_list: list of relevant ASIC debugfs. - * @dev_node: node in the device list of file private data - * @refcount: number of related contexts. - * @restore_phase_mutex: lock for context switch and restore phase. - * @is_control: true for control device, false otherwise - */ -struct hl_fpriv { - struct hl_device *hdev; - struct file *filp; - struct pid *taskpid; - struct hl_ctx *ctx; - struct hl_ctx_mgr ctx_mgr; - struct hl_cb_mgr cb_mgr; - struct list_head debugfs_list; - struct list_head dev_node; - struct kref refcount; - struct mutex restore_phase_mutex; - u8 is_control; -}; - - -/* - * DebugFS - */ - -/** - * struct hl_info_list - debugfs file ops. - * @name: file name. - * @show: function to output information. - * @write: function to write to the file. - */ -struct hl_info_list { - const char *name; - int (*show)(struct seq_file *s, void *data); - ssize_t (*write)(struct file *file, const char __user *buf, - size_t count, loff_t *f_pos); -}; - -/** - * struct hl_debugfs_entry - debugfs dentry wrapper. - * @dent: base debugfs entry structure. - * @info_ent: dentry realted ops. - * @dev_entry: ASIC specific debugfs manager. - */ -struct hl_debugfs_entry { - struct dentry *dent; - const struct hl_info_list *info_ent; - struct hl_dbg_device_entry *dev_entry; -}; - -/** - * struct hl_dbg_device_entry - ASIC specific debugfs manager. - * @root: root dentry. - * @hdev: habanalabs device structure. - * @entry_arr: array of available hl_debugfs_entry. - * @file_list: list of available debugfs files. - * @file_mutex: protects file_list. - * @cb_list: list of available CBs. - * @cb_spinlock: protects cb_list. - * @cs_list: list of available CSs. - * @cs_spinlock: protects cs_list. - * @cs_job_list: list of available CB jobs. - * @cs_job_spinlock: protects cs_job_list. - * @userptr_list: list of available userptrs (virtual memory chunk descriptor). - * @userptr_spinlock: protects userptr_list. - * @ctx_mem_hash_list: list of available contexts with MMU mappings. - * @ctx_mem_hash_spinlock: protects cb_list. - * @addr: next address to read/write from/to in read/write32. - * @mmu_addr: next virtual address to translate to physical address in mmu_show. - * @mmu_asid: ASID to use while translating in mmu_show. - * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read. - * @i2c_bus: generic u8 debugfs file for address value to use in i2c_data_read. - * @i2c_bus: generic u8 debugfs file for register value to use in i2c_data_read. - */ -struct hl_dbg_device_entry { - struct dentry *root; - struct hl_device *hdev; - struct hl_debugfs_entry *entry_arr; - struct list_head file_list; - struct mutex file_mutex; - struct list_head cb_list; - spinlock_t cb_spinlock; - struct list_head cs_list; - spinlock_t cs_spinlock; - struct list_head cs_job_list; - spinlock_t cs_job_spinlock; - struct list_head userptr_list; - spinlock_t userptr_spinlock; - struct list_head ctx_mem_hash_list; - spinlock_t ctx_mem_hash_spinlock; - u64 addr; - u64 mmu_addr; - u32 mmu_asid; - u8 i2c_bus; - u8 i2c_addr; - u8 i2c_reg; -}; - - -/* - * DEVICES - */ - -/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe - * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards. - */ -#define HL_MAX_MINORS 256 - -/* - * Registers read & write functions. - */ - -u32 hl_rreg(struct hl_device *hdev, u32 reg); -void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); - -#define RREG32(reg) hdev->asic_funcs->rreg(hdev, (reg)) -#define WREG32(reg, v) hdev->asic_funcs->wreg(hdev, (reg), (v)) -#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n", \ - hdev->asic_funcs->rreg(hdev, (reg))) - -#define WREG32_P(reg, val, mask) \ - do { \ - u32 tmp_ = RREG32(reg); \ - tmp_ &= (mask); \ - tmp_ |= ((val) & ~(mask)); \ - WREG32(reg, tmp_); \ - } while (0) -#define WREG32_AND(reg, and) WREG32_P(reg, 0, and) -#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or)) - -#define RMWREG32(reg, val, mask) \ - do { \ - u32 tmp_ = RREG32(reg); \ - tmp_ &= ~(mask); \ - tmp_ |= ((val) << __ffs(mask)); \ - WREG32(reg, tmp_); \ - } while (0) - -#define RREG32_MASK(reg, mask) ((RREG32(reg) & mask) >> __ffs(mask)) - -#define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT -#define REG_FIELD_MASK(reg, field) reg##_##field##_MASK -#define WREG32_FIELD(reg, offset, field, val) \ - WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & \ - ~REG_FIELD_MASK(reg, field)) | \ - (val) << REG_FIELD_SHIFT(reg, field)) - -/* Timeout should be longer when working with simulator but cap the - * increased timeout to some maximum - */ -#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \ -({ \ - ktime_t __timeout; \ - if (hdev->pdev) \ - __timeout = ktime_add_us(ktime_get(), timeout_us); \ - else \ - __timeout = ktime_add_us(ktime_get(),\ - min((u64)(timeout_us * 10), \ - (u64) HL_SIM_MAX_TIMEOUT_US)); \ - might_sleep_if(sleep_us); \ - for (;;) { \ - (val) = RREG32(addr); \ - if (cond) \ - break; \ - if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \ - (val) = RREG32(addr); \ - break; \ - } \ - if (sleep_us) \ - usleep_range((sleep_us >> 2) + 1, sleep_us); \ - } \ - (cond) ? 0 : -ETIMEDOUT; \ -}) - -/* - * address in this macro points always to a memory location in the - * host's (server's) memory. That location is updated asynchronously - * either by the direct access of the device or by another core. - * - * To work both in LE and BE architectures, we need to distinguish between the - * two states (device or another core updates the memory location). Therefore, - * if mem_written_by_device is true, the host memory being polled will be - * updated directly by the device. If false, the host memory being polled will - * be updated by host CPU. Required so host knows whether or not the memory - * might need to be byte-swapped before returning value to caller. - */ -#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \ - mem_written_by_device) \ -({ \ - ktime_t __timeout; \ - if (hdev->pdev) \ - __timeout = ktime_add_us(ktime_get(), timeout_us); \ - else \ - __timeout = ktime_add_us(ktime_get(),\ - min((u64)(timeout_us * 10), \ - (u64) HL_SIM_MAX_TIMEOUT_US)); \ - might_sleep_if(sleep_us); \ - for (;;) { \ - /* Verify we read updates done by other cores or by device */ \ - mb(); \ - (val) = *((u32 *) (uintptr_t) (addr)); \ - if (mem_written_by_device) \ - (val) = le32_to_cpu(*(__le32 *) &(val)); \ - if (cond) \ - break; \ - if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \ - (val) = *((u32 *) (uintptr_t) (addr)); \ - if (mem_written_by_device) \ - (val) = le32_to_cpu(*(__le32 *) &(val)); \ - break; \ - } \ - if (sleep_us) \ - usleep_range((sleep_us >> 2) + 1, sleep_us); \ - } \ - (cond) ? 0 : -ETIMEDOUT; \ -}) - -#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \ - timeout_us) \ -({ \ - ktime_t __timeout; \ - if (hdev->pdev) \ - __timeout = ktime_add_us(ktime_get(), timeout_us); \ - else \ - __timeout = ktime_add_us(ktime_get(),\ - min((u64)(timeout_us * 10), \ - (u64) HL_SIM_MAX_TIMEOUT_US)); \ - might_sleep_if(sleep_us); \ - for (;;) { \ - (val) = readl(addr); \ - if (cond) \ - break; \ - if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \ - (val) = readl(addr); \ - break; \ - } \ - if (sleep_us) \ - usleep_range((sleep_us >> 2) + 1, sleep_us); \ - } \ - (cond) ? 0 : -ETIMEDOUT; \ -}) - -struct hwmon_chip_info; - -/** - * struct hl_device_reset_work - reset workqueue task wrapper. - * @reset_work: reset work to be done. - * @hdev: habanalabs device structure. - */ -struct hl_device_reset_work { - struct work_struct reset_work; - struct hl_device *hdev; -}; - -/** - * struct hl_device_idle_busy_ts - used for calculating device utilization rate. - * @idle_to_busy_ts: timestamp where device changed from idle to busy. - * @busy_to_idle_ts: timestamp where device changed from busy to idle. - */ -struct hl_device_idle_busy_ts { - ktime_t idle_to_busy_ts; - ktime_t busy_to_idle_ts; -}; - -/** - * struct hl_device - habanalabs device structure. - * @pdev: pointer to PCI device, can be NULL in case of simulator device. - * @pcie_bar_phys: array of available PCIe bars physical addresses. - * (required only for PCI address match mode) - * @pcie_bar: array of available PCIe bars virtual addresses. - * @rmmio: configuration area address on SRAM. - * @cdev: related char device. - * @cdev_ctrl: char device for control operations only (INFO IOCTL) - * @dev: related kernel basic device structure. - * @dev_ctrl: related kernel device structure for the control device - * @work_freq: delayed work to lower device frequency if possible. - * @work_heartbeat: delayed work for ArmCP is-alive check. - * @asic_name: ASIC specific nmae. - * @asic_type: ASIC specific type. - * @completion_queue: array of hl_cq. - * @cq_wq: work queues of completion queues for executing work in process - * context. - * @eq_wq: work queue of event queue for executing work in process context. - * @kernel_ctx: Kernel driver context structure. - * @kernel_queues: array of hl_hw_queue. - * @hw_queues_mirror_list: CS mirror list for TDR. - * @hw_queues_mirror_lock: protects hw_queues_mirror_list. - * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs. - * @event_queue: event queue for IRQ from ArmCP. - * @dma_pool: DMA pool for small allocations. - * @cpu_accessible_dma_mem: Host <-> ArmCP shared memory CPU address. - * @cpu_accessible_dma_address: Host <-> ArmCP shared memory DMA address. - * @cpu_accessible_dma_pool: Host <-> ArmCP shared memory pool. - * @asid_bitmap: holds used/available ASIDs. - * @asid_mutex: protects asid_bitmap. - * @send_cpu_message_lock: enforces only one message in Host <-> ArmCP queue. - * @debug_lock: protects critical section of setting debug mode for device - * @asic_prop: ASIC specific immutable properties. - * @asic_funcs: ASIC specific functions. - * @asic_specific: ASIC specific information to use only from ASIC files. - * @mmu_pgt_pool: pool of available MMU hops. - * @vm: virtual memory manager for MMU. - * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context. - * @mmu_shadow_hop0: shadow mapping of the MMU hop 0 zone. - * @hwmon_dev: H/W monitor device. - * @pm_mng_profile: current power management profile. - * @hl_chip_info: ASIC's sensors information. - * @hl_debugfs: device's debugfs manager. - * @cb_pool: list of preallocated CBs. - * @cb_pool_lock: protects the CB pool. - * @fpriv_list: list of file private data structures. Each structure is created - * when a user opens the device - * @fpriv_list_lock: protects the fpriv_list - * @compute_ctx: current compute context executing. - * @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy - * and vice-versa - * @aggregated_cs_counters: aggregated cs counters among all contexts - * @dram_used_mem: current DRAM memory consumption. - * @timeout_jiffies: device CS timeout value. - * @max_power: the max power of the device, as configured by the sysadmin. This - * value is saved so in case of hard-reset, the driver will restore - * this value and update the F/W after the re-initialization - * @in_reset: is device in reset flow. - * @curr_pll_profile: current PLL profile. - * @cs_active_cnt: number of active command submissions on this device (active - * means already in H/W queues) - * @major: habanalabs kernel driver major. - * @high_pll: high PLL profile frequency. - * @soft_reset_cnt: number of soft reset since the driver was loaded. - * @hard_reset_cnt: number of hard reset since the driver was loaded. - * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr - * @id: device minor. - * @id_control: minor of the control device - * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit - * addresses. - * @disabled: is device disabled. - * @late_init_done: is late init stage was done during initialization. - * @hwmon_initialized: is H/W monitor sensors was initialized. - * @hard_reset_pending: is there a hard reset work pending. - * @heartbeat: is heartbeat sanity check towards ArmCP enabled. - * @reset_on_lockup: true if a reset should be done in case of stuck CS, false - * otherwise. - * @dram_supports_virtual_memory: is MMU enabled towards DRAM. - * @dram_default_page_mapping: is DRAM default page mapping enabled. - * @pmmu_huge_range: is a different virtual addresses range used for PMMU with - * huge pages. - * @init_done: is the initialization of the device done. - * @mmu_enable: is MMU enabled. - * @mmu_huge_page_opt: is MMU huge pages optimization enabled. - * @clock_gating: is clock gating enabled. - * @device_cpu_disabled: is the device CPU disabled (due to timeouts) - * @dma_mask: the dma mask that was set for this device - * @in_debug: is device under debug. This, together with fpriv_list, enforces - * that only a single user is configuring the debug infrastructure. - * @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant - * only to POWER9 machines. - * @cdev_sysfs_created: were char devices and sysfs nodes created. - * @stop_on_err: true if engines should stop on error. - * @supports_sync_stream: is sync stream supported. - * @sync_stream_queue_idx: helper index for sync stream queues initialization. - * @supports_coresight: is CoreSight supported. - * @supports_soft_reset: is soft reset supported. - */ -struct hl_device { - struct pci_dev *pdev; - u64 pcie_bar_phys[HL_PCI_NUM_BARS]; - void __iomem *pcie_bar[HL_PCI_NUM_BARS]; - void __iomem *rmmio; - struct cdev cdev; - struct cdev cdev_ctrl; - struct device *dev; - struct device *dev_ctrl; - struct delayed_work work_freq; - struct delayed_work work_heartbeat; - char asic_name[16]; - enum hl_asic_type asic_type; - struct hl_cq *completion_queue; - struct workqueue_struct **cq_wq; - struct workqueue_struct *eq_wq; - struct hl_ctx *kernel_ctx; - struct hl_hw_queue *kernel_queues; - struct list_head hw_queues_mirror_list; - spinlock_t hw_queues_mirror_lock; - struct hl_cb_mgr kernel_cb_mgr; - struct hl_eq event_queue; - struct dma_pool *dma_pool; - void *cpu_accessible_dma_mem; - dma_addr_t cpu_accessible_dma_address; - struct gen_pool *cpu_accessible_dma_pool; - unsigned long *asid_bitmap; - struct mutex asid_mutex; - struct mutex send_cpu_message_lock; - struct mutex debug_lock; - struct asic_fixed_properties asic_prop; - const struct hl_asic_funcs *asic_funcs; - void *asic_specific; - struct gen_pool *mmu_pgt_pool; - struct hl_vm vm; - struct mutex mmu_cache_lock; - void *mmu_shadow_hop0; - struct device *hwmon_dev; - enum hl_pm_mng_profile pm_mng_profile; - struct hwmon_chip_info *hl_chip_info; - - struct hl_dbg_device_entry hl_debugfs; - - struct list_head cb_pool; - spinlock_t cb_pool_lock; - - struct list_head fpriv_list; - struct mutex fpriv_list_lock; - - struct hl_ctx *compute_ctx; - - struct hl_device_idle_busy_ts *idle_busy_ts_arr; - - struct hl_cs_counters aggregated_cs_counters; - - atomic64_t dram_used_mem; - u64 timeout_jiffies; - u64 max_power; - atomic_t in_reset; - enum hl_pll_frequency curr_pll_profile; - int cs_active_cnt; - u32 major; - u32 high_pll; - u32 soft_reset_cnt; - u32 hard_reset_cnt; - u32 idle_busy_ts_idx; - u16 id; - u16 id_control; - u16 cpu_pci_msb_addr; - u8 disabled; - u8 late_init_done; - u8 hwmon_initialized; - u8 hard_reset_pending; - u8 heartbeat; - u8 reset_on_lockup; - u8 dram_supports_virtual_memory; - u8 dram_default_page_mapping; - u8 pmmu_huge_range; - u8 init_done; - u8 clock_gating; - u8 device_cpu_disabled; - u8 dma_mask; - u8 in_debug; - u8 power9_64bit_dma_enable; - u8 cdev_sysfs_created; - u8 stop_on_err; - u8 supports_sync_stream; - u8 sync_stream_queue_idx; - u8 supports_coresight; - u8 supports_soft_reset; - - /* Parameters for bring-up */ - u8 mmu_enable; - u8 mmu_huge_page_opt; - u8 cpu_enable; - u8 reset_pcilink; - u8 cpu_queues_enable; - u8 fw_loading; - u8 pldm; - u8 axi_drain; - u8 sram_scrambler_enable; - u8 dram_scrambler_enable; - u8 hard_reset_on_fw_events; - u8 bmc_enable; - u8 rl_enable; -}; - - -/* - * IOCTLs - */ - -/** - * typedef hl_ioctl_t - typedef for ioctl function in the driver - * @hpriv: pointer to the FD's private data, which contains state of - * user process - * @data: pointer to the input/output arguments structure of the IOCTL - * - * Return: 0 for success, negative value for error - */ -typedef int hl_ioctl_t(struct hl_fpriv *hpriv, void *data); - -/** - * struct hl_ioctl_desc - describes an IOCTL entry of the driver. - * @cmd: the IOCTL code as created by the kernel macros. - * @func: pointer to the driver's function that should be called for this IOCTL. - */ -struct hl_ioctl_desc { - unsigned int cmd; - hl_ioctl_t *func; -}; - - -/* - * Kernel module functions that can be accessed by entire module - */ - -/** - * hl_mem_area_inside_range() - Checks whether address+size are inside a range. - * @address: The start address of the area we want to validate. - * @size: The size in bytes of the area we want to validate. - * @range_start_address: The start address of the valid range. - * @range_end_address: The end address of the valid range. - * - * Return: true if the area is inside the valid range, false otherwise. - */ -static inline bool hl_mem_area_inside_range(u64 address, u32 size, - u64 range_start_address, u64 range_end_address) -{ - u64 end_address = address + size; - - if ((address >= range_start_address) && - (end_address <= range_end_address) && - (end_address > address)) - return true; - - return false; -} - -/** - * hl_mem_area_crosses_range() - Checks whether address+size crossing a range. - * @address: The start address of the area we want to validate. - * @size: The size in bytes of the area we want to validate. - * @range_start_address: The start address of the valid range. - * @range_end_address: The end address of the valid range. - * - * Return: true if the area overlaps part or all of the valid range, - * false otherwise. - */ -static inline bool hl_mem_area_crosses_range(u64 address, u32 size, - u64 range_start_address, u64 range_end_address) -{ - u64 end_address = address + size; - - if ((address >= range_start_address) && - (address < range_end_address)) - return true; - - if ((end_address >= range_start_address) && - (end_address < range_end_address)) - return true; - - if ((address < range_start_address) && - (end_address >= range_end_address)) - return true; - - return false; -} - -int hl_device_open(struct inode *inode, struct file *filp); -int hl_device_open_ctrl(struct inode *inode, struct file *filp); -bool hl_device_disabled_or_in_reset(struct hl_device *hdev); -enum hl_device_status hl_device_status(struct hl_device *hdev); -int hl_device_set_debug_mode(struct hl_device *hdev, bool enable); -int create_hdev(struct hl_device **dev, struct pci_dev *pdev, - enum hl_asic_type asic_type, int minor); -void destroy_hdev(struct hl_device *hdev); -int hl_hw_queues_create(struct hl_device *hdev); -void hl_hw_queues_destroy(struct hl_device *hdev); -int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, - u32 cb_size, u64 cb_ptr); -int hl_hw_queue_schedule_cs(struct hl_cs *cs); -u32 hl_hw_queue_add_ptr(u32 ptr, u16 val); -void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id); -void hl_int_hw_queue_update_ci(struct hl_cs *cs); -void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset); - -#define hl_queue_inc_ptr(p) hl_hw_queue_add_ptr(p, 1) -#define hl_pi_2_offset(pi) ((pi) & (HL_QUEUE_LENGTH - 1)) - -int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id); -void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q); -int hl_eq_init(struct hl_device *hdev, struct hl_eq *q); -void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q); -void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q); -void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q); -irqreturn_t hl_irq_handler_cq(int irq, void *arg); -irqreturn_t hl_irq_handler_eq(int irq, void *arg); -u32 hl_cq_inc_ptr(u32 ptr); - -int hl_asid_init(struct hl_device *hdev); -void hl_asid_fini(struct hl_device *hdev); -unsigned long hl_asid_alloc(struct hl_device *hdev); -void hl_asid_free(struct hl_device *hdev, unsigned long asid); - -int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv); -void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx); -int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx); -void hl_ctx_do_release(struct kref *ref); -void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx); -int hl_ctx_put(struct hl_ctx *ctx); -struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq); -void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr); -void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr); - -int hl_device_init(struct hl_device *hdev, struct class *hclass); -void hl_device_fini(struct hl_device *hdev); -int hl_device_suspend(struct hl_device *hdev); -int hl_device_resume(struct hl_device *hdev); -int hl_device_reset(struct hl_device *hdev, bool hard_reset, - bool from_hard_reset_thread); -void hl_hpriv_get(struct hl_fpriv *hpriv); -void hl_hpriv_put(struct hl_fpriv *hpriv); -int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq); -uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms); - -int hl_build_hwmon_channel_info(struct hl_device *hdev, - struct armcp_sensor *sensors_arr); - -int hl_sysfs_init(struct hl_device *hdev); -void hl_sysfs_fini(struct hl_device *hdev); - -int hl_hwmon_init(struct hl_device *hdev); -void hl_hwmon_fini(struct hl_device *hdev); - -int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size, - u64 *handle, int ctx_id); -int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle); -int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma); -struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr, - u32 handle); -void hl_cb_put(struct hl_cb *cb); -void hl_cb_mgr_init(struct hl_cb_mgr *mgr); -void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr); -struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size); -int hl_cb_pool_init(struct hl_device *hdev); -int hl_cb_pool_fini(struct hl_device *hdev); - -void hl_cs_rollback_all(struct hl_device *hdev); -struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, - enum hl_queue_type queue_type, bool is_kernel_allocated_cb); -void hl_sob_reset_error(struct kref *ref); - -void goya_set_asic_funcs(struct hl_device *hdev); -void gaudi_set_asic_funcs(struct hl_device *hdev); - -int hl_vm_ctx_init(struct hl_ctx *ctx); -void hl_vm_ctx_fini(struct hl_ctx *ctx); - -int hl_vm_init(struct hl_device *hdev); -void hl_vm_fini(struct hl_device *hdev); - -int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, - struct hl_userptr *userptr); -void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr); -void hl_userptr_delete_list(struct hl_device *hdev, - struct list_head *userptr_list); -bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size, - struct list_head *userptr_list, - struct hl_userptr **userptr); - -int hl_mmu_init(struct hl_device *hdev); -void hl_mmu_fini(struct hl_device *hdev); -int hl_mmu_ctx_init(struct hl_ctx *ctx); -void hl_mmu_ctx_fini(struct hl_ctx *ctx); -int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, - u32 page_size, bool flush_pte); -int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, - bool flush_pte); -void hl_mmu_swap_out(struct hl_ctx *ctx); -void hl_mmu_swap_in(struct hl_ctx *ctx); - -int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, - void __iomem *dst); -int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode); -int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, - u16 len, u32 timeout, long *result); -int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type); -int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, - size_t irq_arr_size); -int hl_fw_test_cpu_queue(struct hl_device *hdev); -void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, - dma_addr_t *dma_handle); -void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, - void *vaddr); -int hl_fw_send_heartbeat(struct hl_device *hdev); -int hl_fw_armcp_info_get(struct hl_device *hdev); -int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size); -int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, - u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, - u32 boot_err0_reg, bool skip_bmc, - u32 cpu_timeout, u32 boot_fit_timeout); - -int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], - bool is_wc[3]); -int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data); -int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar, - u64 addr); -int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region, - struct hl_inbound_pci_region *pci_region); -int hl_pci_set_outbound_region(struct hl_device *hdev, - struct hl_outbound_pci_region *pci_region); -int hl_pci_init(struct hl_device *hdev); -void hl_pci_fini(struct hl_device *hdev); - -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); -int hl_get_temperature(struct hl_device *hdev, - int sensor_index, u32 attr, long *value); -int hl_set_temperature(struct hl_device *hdev, - int sensor_index, u32 attr, long value); -int hl_get_voltage(struct hl_device *hdev, - int sensor_index, u32 attr, long *value); -int hl_get_current(struct hl_device *hdev, - int sensor_index, u32 attr, long *value); -int hl_get_fan_speed(struct hl_device *hdev, - int sensor_index, u32 attr, long *value); -int hl_get_pwm_info(struct hl_device *hdev, - int sensor_index, u32 attr, long *value); -void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, - long value); -u64 hl_get_max_power(struct hl_device *hdev); -void hl_set_max_power(struct hl_device *hdev, u64 value); -int hl_set_voltage(struct hl_device *hdev, - int sensor_index, u32 attr, long value); -int hl_set_current(struct hl_device *hdev, - int sensor_index, u32 attr, long value); - -#ifdef CONFIG_DEBUG_FS - -void hl_debugfs_init(void); -void hl_debugfs_fini(void); -void hl_debugfs_add_device(struct hl_device *hdev); -void hl_debugfs_remove_device(struct hl_device *hdev); -void hl_debugfs_add_file(struct hl_fpriv *hpriv); -void hl_debugfs_remove_file(struct hl_fpriv *hpriv); -void hl_debugfs_add_cb(struct hl_cb *cb); -void hl_debugfs_remove_cb(struct hl_cb *cb); -void hl_debugfs_add_cs(struct hl_cs *cs); -void hl_debugfs_remove_cs(struct hl_cs *cs); -void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job); -void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job); -void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr); -void hl_debugfs_remove_userptr(struct hl_device *hdev, - struct hl_userptr *userptr); -void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx); -void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx); - -#else - -static inline void __init hl_debugfs_init(void) -{ -} - -static inline void hl_debugfs_fini(void) -{ -} - -static inline void hl_debugfs_add_device(struct hl_device *hdev) -{ -} - -static inline void hl_debugfs_remove_device(struct hl_device *hdev) -{ -} - -static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv) -{ -} - -static inline void hl_debugfs_remove_file(struct hl_fpriv *hpriv) -{ -} - -static inline void hl_debugfs_add_cb(struct hl_cb *cb) -{ -} - -static inline void hl_debugfs_remove_cb(struct hl_cb *cb) -{ -} - -static inline void hl_debugfs_add_cs(struct hl_cs *cs) -{ -} - -static inline void hl_debugfs_remove_cs(struct hl_cs *cs) -{ -} - -static inline void hl_debugfs_add_job(struct hl_device *hdev, - struct hl_cs_job *job) -{ -} - -static inline void hl_debugfs_remove_job(struct hl_device *hdev, - struct hl_cs_job *job) -{ -} - -static inline void hl_debugfs_add_userptr(struct hl_device *hdev, - struct hl_userptr *userptr) -{ -} - -static inline void hl_debugfs_remove_userptr(struct hl_device *hdev, - struct hl_userptr *userptr) -{ -} - -static inline void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, - struct hl_ctx *ctx) -{ -} - -static inline void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, - struct hl_ctx *ctx) -{ -} - -#endif - -/* IOCTLs */ -long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); -long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg); -int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data); -int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data); -int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data); -int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data); - -#endif /* HABANALABSP_H_ */ diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c deleted file mode 100644 index f38664b03865..000000000000 --- a/drivers/misc/habanalabs/habanalabs_drv.c +++ /dev/null @@ -1,529 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2016-2019 HabanaLabs, Ltd. - * All Rights Reserved. - * - */ - -#define pr_fmt(fmt) "habanalabs: " fmt - -#include "habanalabs.h" - -#include -#include - -#define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team" - -#define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators" - -MODULE_AUTHOR(HL_DRIVER_AUTHOR); -MODULE_DESCRIPTION(HL_DRIVER_DESC); -MODULE_LICENSE("GPL v2"); - -static int hl_major; -static struct class *hl_class; -static DEFINE_IDR(hl_devs_idr); -static DEFINE_MUTEX(hl_devs_idr_lock); - -static int timeout_locked = 5; -static int reset_on_lockup = 1; - -module_param(timeout_locked, int, 0444); -MODULE_PARM_DESC(timeout_locked, - "Device lockup timeout in seconds (0 = disabled, default 5s)"); - -module_param(reset_on_lockup, int, 0444); -MODULE_PARM_DESC(reset_on_lockup, - "Do device reset on lockup (0 = no, 1 = yes, default yes)"); - -#define PCI_VENDOR_ID_HABANALABS 0x1da3 - -#define PCI_IDS_GOYA 0x0001 -#define PCI_IDS_GAUDI 0x1000 - -static const struct pci_device_id ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), }, - { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), }, - { 0, } -}; -MODULE_DEVICE_TABLE(pci, ids); - -/* - * get_asic_type - translate device id to asic type - * - * @device: id of the PCI device - * - * Translate device id to asic type. - * In case of unidentified device, return -1 - */ -static enum hl_asic_type get_asic_type(u16 device) -{ - enum hl_asic_type asic_type; - - switch (device) { - case PCI_IDS_GOYA: - asic_type = ASIC_GOYA; - break; - case PCI_IDS_GAUDI: - asic_type = ASIC_GAUDI; - break; - default: - asic_type = ASIC_INVALID; - break; - } - - return asic_type; -} - -/* - * hl_device_open - open function for habanalabs device - * - * @inode: pointer to inode structure - * @filp: pointer to file structure - * - * Called when process opens an habanalabs device. - */ -int hl_device_open(struct inode *inode, struct file *filp) -{ - struct hl_device *hdev; - struct hl_fpriv *hpriv; - int rc; - - mutex_lock(&hl_devs_idr_lock); - hdev = idr_find(&hl_devs_idr, iminor(inode)); - mutex_unlock(&hl_devs_idr_lock); - - if (!hdev) { - pr_err("Couldn't find device %d:%d\n", - imajor(inode), iminor(inode)); - return -ENXIO; - } - - hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); - if (!hpriv) - return -ENOMEM; - - hpriv->hdev = hdev; - filp->private_data = hpriv; - hpriv->filp = filp; - mutex_init(&hpriv->restore_phase_mutex); - kref_init(&hpriv->refcount); - nonseekable_open(inode, filp); - - hl_cb_mgr_init(&hpriv->cb_mgr); - hl_ctx_mgr_init(&hpriv->ctx_mgr); - - hpriv->taskpid = find_get_pid(current->pid); - - mutex_lock(&hdev->fpriv_list_lock); - - if (hl_device_disabled_or_in_reset(hdev)) { - dev_err_ratelimited(hdev->dev, - "Can't open %s because it is disabled or in reset\n", - dev_name(hdev->dev)); - rc = -EPERM; - goto out_err; - } - - if (hdev->in_debug) { - dev_err_ratelimited(hdev->dev, - "Can't open %s because it is being debugged by another user\n", - dev_name(hdev->dev)); - rc = -EPERM; - goto out_err; - } - - if (hdev->compute_ctx) { - dev_dbg_ratelimited(hdev->dev, - "Can't open %s because another user is working on it\n", - dev_name(hdev->dev)); - rc = -EBUSY; - goto out_err; - } - - rc = hl_ctx_create(hdev, hpriv); - if (rc) { - dev_err(hdev->dev, "Failed to create context %d\n", rc); - goto out_err; - } - - /* Device is IDLE at this point so it is legal to change PLLs. - * There is no need to check anything because if the PLL is - * already HIGH, the set function will return without doing - * anything - */ - hl_device_set_frequency(hdev, PLL_HIGH); - - list_add(&hpriv->dev_node, &hdev->fpriv_list); - mutex_unlock(&hdev->fpriv_list_lock); - - hl_debugfs_add_file(hpriv); - - return 0; - -out_err: - mutex_unlock(&hdev->fpriv_list_lock); - - hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); - hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); - filp->private_data = NULL; - mutex_destroy(&hpriv->restore_phase_mutex); - put_pid(hpriv->taskpid); - - kfree(hpriv); - - return rc; -} - -int hl_device_open_ctrl(struct inode *inode, struct file *filp) -{ - struct hl_device *hdev; - struct hl_fpriv *hpriv; - int rc; - - mutex_lock(&hl_devs_idr_lock); - hdev = idr_find(&hl_devs_idr, iminor(inode)); - mutex_unlock(&hl_devs_idr_lock); - - if (!hdev) { - pr_err("Couldn't find device %d:%d\n", - imajor(inode), iminor(inode)); - return -ENXIO; - } - - hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); - if (!hpriv) - return -ENOMEM; - - mutex_lock(&hdev->fpriv_list_lock); - - if (hl_device_disabled_or_in_reset(hdev)) { - dev_err_ratelimited(hdev->dev_ctrl, - "Can't open %s because it is disabled or in reset\n", - dev_name(hdev->dev_ctrl)); - rc = -EPERM; - goto out_err; - } - - list_add(&hpriv->dev_node, &hdev->fpriv_list); - mutex_unlock(&hdev->fpriv_list_lock); - - hpriv->hdev = hdev; - filp->private_data = hpriv; - hpriv->filp = filp; - hpriv->is_control = true; - nonseekable_open(inode, filp); - - hpriv->taskpid = find_get_pid(current->pid); - - return 0; - -out_err: - mutex_unlock(&hdev->fpriv_list_lock); - kfree(hpriv); - return rc; -} - -static void set_driver_behavior_per_device(struct hl_device *hdev) -{ - hdev->mmu_enable = 1; - hdev->cpu_enable = 1; - hdev->fw_loading = 1; - hdev->cpu_queues_enable = 1; - hdev->heartbeat = 1; - hdev->clock_gating = 1; - - hdev->reset_pcilink = 0; - hdev->axi_drain = 0; - hdev->sram_scrambler_enable = 1; - hdev->dram_scrambler_enable = 1; - hdev->bmc_enable = 1; - hdev->hard_reset_on_fw_events = 1; -} - -/* - * create_hdev - create habanalabs device instance - * - * @dev: will hold the pointer to the new habanalabs device structure - * @pdev: pointer to the pci device - * @asic_type: in case of simulator device, which device is it - * @minor: in case of simulator device, the minor of the device - * - * Allocate memory for habanalabs device and initialize basic fields - * Identify the ASIC type - * Allocate ID (minor) for the device (only for real devices) - */ -int create_hdev(struct hl_device **dev, struct pci_dev *pdev, - enum hl_asic_type asic_type, int minor) -{ - struct hl_device *hdev; - int rc, main_id, ctrl_id = 0; - - *dev = NULL; - - hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); - if (!hdev) - return -ENOMEM; - - /* First, we must find out which ASIC are we handling. This is needed - * to configure the behavior of the driver (kernel parameters) - */ - if (pdev) { - hdev->asic_type = get_asic_type(pdev->device); - if (hdev->asic_type == ASIC_INVALID) { - dev_err(&pdev->dev, "Unsupported ASIC\n"); - rc = -ENODEV; - goto free_hdev; - } - } else { - hdev->asic_type = asic_type; - } - - hdev->major = hl_major; - hdev->reset_on_lockup = reset_on_lockup; - hdev->pldm = 0; - - set_driver_behavior_per_device(hdev); - - if (timeout_locked) - hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000); - else - hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; - - hdev->disabled = true; - hdev->pdev = pdev; /* can be NULL in case of simulator device */ - - /* Set default DMA mask to 32 bits */ - hdev->dma_mask = 32; - - mutex_lock(&hl_devs_idr_lock); - - /* Always save 2 numbers, 1 for main device and 1 for control. - * They must be consecutive - */ - main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, - GFP_KERNEL); - - if (main_id >= 0) - ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1, - main_id + 2, GFP_KERNEL); - - mutex_unlock(&hl_devs_idr_lock); - - if ((main_id < 0) || (ctrl_id < 0)) { - if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC)) - pr_err("too many devices in the system\n"); - - if (main_id >= 0) { - mutex_lock(&hl_devs_idr_lock); - idr_remove(&hl_devs_idr, main_id); - mutex_unlock(&hl_devs_idr_lock); - } - - rc = -EBUSY; - goto free_hdev; - } - - hdev->id = main_id; - hdev->id_control = ctrl_id; - - *dev = hdev; - - return 0; - -free_hdev: - kfree(hdev); - return rc; -} - -/* - * destroy_hdev - destroy habanalabs device instance - * - * @dev: pointer to the habanalabs device structure - * - */ -void destroy_hdev(struct hl_device *hdev) -{ - /* Remove device from the device list */ - mutex_lock(&hl_devs_idr_lock); - idr_remove(&hl_devs_idr, hdev->id); - idr_remove(&hl_devs_idr, hdev->id_control); - mutex_unlock(&hl_devs_idr_lock); - - kfree(hdev); -} - -static int hl_pmops_suspend(struct device *dev) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - pr_debug("Going to suspend PCI device\n"); - - if (!hdev) { - pr_err("device pointer is NULL in suspend\n"); - return 0; - } - - return hl_device_suspend(hdev); -} - -static int hl_pmops_resume(struct device *dev) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - pr_debug("Going to resume PCI device\n"); - - if (!hdev) { - pr_err("device pointer is NULL in resume\n"); - return 0; - } - - return hl_device_resume(hdev); -} - -/* - * hl_pci_probe - probe PCI habanalabs devices - * - * @pdev: pointer to pci device - * @id: pointer to pci device id structure - * - * Standard PCI probe function for habanalabs device. - * Create a new habanalabs device and initialize it according to the - * device's type - */ -static int hl_pci_probe(struct pci_dev *pdev, - const struct pci_device_id *id) -{ - struct hl_device *hdev; - int rc; - - dev_info(&pdev->dev, HL_NAME - " device found [%04x:%04x] (rev %x)\n", - (int)pdev->vendor, (int)pdev->device, (int)pdev->revision); - - rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1); - if (rc) - return rc; - - pci_set_drvdata(pdev, hdev); - - rc = hl_device_init(hdev, hl_class); - if (rc) { - dev_err(&pdev->dev, "Fatal error during habanalabs device init\n"); - rc = -ENODEV; - goto disable_device; - } - - return 0; - -disable_device: - pci_set_drvdata(pdev, NULL); - destroy_hdev(hdev); - - return rc; -} - -/* - * hl_pci_remove - remove PCI habanalabs devices - * - * @pdev: pointer to pci device - * - * Standard PCI remove function for habanalabs device - */ -static void hl_pci_remove(struct pci_dev *pdev) -{ - struct hl_device *hdev; - - hdev = pci_get_drvdata(pdev); - if (!hdev) - return; - - hl_device_fini(hdev); - pci_set_drvdata(pdev, NULL); - - destroy_hdev(hdev); -} - -static const struct dev_pm_ops hl_pm_ops = { - .suspend = hl_pmops_suspend, - .resume = hl_pmops_resume, -}; - -static struct pci_driver hl_pci_driver = { - .name = HL_NAME, - .id_table = ids, - .probe = hl_pci_probe, - .remove = hl_pci_remove, - .driver.pm = &hl_pm_ops, -}; - -/* - * hl_init - Initialize the habanalabs kernel driver - */ -static int __init hl_init(void) -{ - int rc; - dev_t dev; - - pr_info("loading driver\n"); - - rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME); - if (rc < 0) { - pr_err("unable to get major\n"); - return rc; - } - - hl_major = MAJOR(dev); - - hl_class = class_create(THIS_MODULE, HL_NAME); - if (IS_ERR(hl_class)) { - pr_err("failed to allocate class\n"); - rc = PTR_ERR(hl_class); - goto remove_major; - } - - hl_debugfs_init(); - - rc = pci_register_driver(&hl_pci_driver); - if (rc) { - pr_err("failed to register pci device\n"); - goto remove_debugfs; - } - - pr_debug("driver loaded\n"); - - return 0; - -remove_debugfs: - hl_debugfs_fini(); - class_destroy(hl_class); -remove_major: - unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); - return rc; -} - -/* - * hl_exit - Release all resources of the habanalabs kernel driver - */ -static void __exit hl_exit(void) -{ - pci_unregister_driver(&hl_pci_driver); - - /* - * Removing debugfs must be after all devices or simulator devices - * have been removed because otherwise we get a bug in the - * debugfs module for referencing NULL objects - */ - hl_debugfs_fini(); - - class_destroy(hl_class); - unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); - - idr_destroy(&hl_devs_idr); - - pr_debug("driver removed\n"); -} - -module_init(hl_init); -module_exit(hl_exit); diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c deleted file mode 100644 index 5af1c03da473..000000000000 --- a/drivers/misc/habanalabs/habanalabs_ioctl.c +++ /dev/null @@ -1,546 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2016-2019 HabanaLabs, Ltd. - * All Rights Reserved. - */ - -#include -#include "habanalabs.h" - -#include -#include -#include - -static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = { - [HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr), - [HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf), - [HL_DEBUG_OP_STM] = sizeof(struct hl_debug_params_stm), - [HL_DEBUG_OP_FUNNEL] = 0, - [HL_DEBUG_OP_BMON] = sizeof(struct hl_debug_params_bmon), - [HL_DEBUG_OP_SPMU] = sizeof(struct hl_debug_params_spmu), - [HL_DEBUG_OP_TIMESTAMP] = 0 - -}; - -static int device_status_info(struct hl_device *hdev, struct hl_info_args *args) -{ - struct hl_info_device_status dev_stat = {0}; - u32 size = args->return_size; - void __user *out = (void __user *) (uintptr_t) args->return_pointer; - - if ((!size) || (!out)) - return -EINVAL; - - dev_stat.status = hl_device_status(hdev); - - return copy_to_user(out, &dev_stat, - min((size_t)size, sizeof(dev_stat))) ? -EFAULT : 0; -} - -static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) -{ - struct hl_info_hw_ip_info hw_ip = {0}; - u32 size = args->return_size; - void __user *out = (void __user *) (uintptr_t) args->return_pointer; - struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 sram_kmd_size, dram_kmd_size; - - if ((!size) || (!out)) - return -EINVAL; - - sram_kmd_size = (prop->sram_user_base_address - - prop->sram_base_address); - dram_kmd_size = (prop->dram_user_base_address - - prop->dram_base_address); - - hw_ip.device_id = hdev->asic_funcs->get_pci_id(hdev); - hw_ip.sram_base_address = prop->sram_user_base_address; - hw_ip.dram_base_address = prop->dram_user_base_address; - hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask; - hw_ip.sram_size = prop->sram_size - sram_kmd_size; - hw_ip.dram_size = prop->dram_size - dram_kmd_size; - if (hw_ip.dram_size > PAGE_SIZE) - hw_ip.dram_enabled = 1; - hw_ip.num_of_events = prop->num_of_events; - - memcpy(hw_ip.armcp_version, prop->armcp_info.armcp_version, - min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN)); - - memcpy(hw_ip.card_name, prop->armcp_info.card_name, - min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN)); - - hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version); - hw_ip.module_id = le32_to_cpu(prop->armcp_info.card_location); - - hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr; - hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf; - hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od; - hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor; - - return copy_to_user(out, &hw_ip, - min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0; -} - -static int hw_events_info(struct hl_device *hdev, bool aggregate, - struct hl_info_args *args) -{ - u32 size, max_size = args->return_size; - void __user *out = (void __user *) (uintptr_t) args->return_pointer; - void *arr; - - if ((!max_size) || (!out)) - return -EINVAL; - - arr = hdev->asic_funcs->get_events_stat(hdev, aggregate, &size); - - return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0; -} - -static int dram_usage_info(struct hl_fpriv *hpriv, struct hl_info_args *args) -{ - struct hl_device *hdev = hpriv->hdev; - struct hl_info_dram_usage dram_usage = {0}; - u32 max_size = args->return_size; - void __user *out = (void __user *) (uintptr_t) args->return_pointer; - struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 dram_kmd_size; - - if ((!max_size) || (!out)) - return -EINVAL; - - dram_kmd_size = (prop->dram_user_base_address - - prop->dram_base_address); - dram_usage.dram_free_mem = (prop->dram_size - dram_kmd_size) - - atomic64_read(&hdev->dram_used_mem); - if (hpriv->ctx) - dram_usage.ctx_dram_mem = - atomic64_read(&hpriv->ctx->dram_phys_mem); - - return copy_to_user(out, &dram_usage, - min((size_t) max_size, sizeof(dram_usage))) ? -EFAULT : 0; -} - -static int hw_idle(struct hl_device *hdev, struct hl_info_args *args) -{ - struct hl_info_hw_idle hw_idle = {0}; - u32 max_size = args->return_size; - void __user *out = (void __user *) (uintptr_t) args->return_pointer; - - if ((!max_size) || (!out)) - return -EINVAL; - - hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, - &hw_idle.busy_engines_mask, NULL); - - return copy_to_user(out, &hw_idle, - min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0; -} - -static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args) -{ - struct hl_debug_params *params; - void *input = NULL, *output = NULL; - int rc; - - params = kzalloc(sizeof(*params), GFP_KERNEL); - if (!params) - return -ENOMEM; - - params->reg_idx = args->reg_idx; - params->enable = args->enable; - params->op = args->op; - - if (args->input_ptr && args->input_size) { - input = kzalloc(hl_debug_struct_size[args->op], GFP_KERNEL); - if (!input) { - rc = -ENOMEM; - goto out; - } - - if (copy_from_user(input, u64_to_user_ptr(args->input_ptr), - args->input_size)) { - rc = -EFAULT; - dev_err(hdev->dev, "failed to copy input debug data\n"); - goto out; - } - - params->input = input; - } - - if (args->output_ptr && args->output_size) { - output = kzalloc(args->output_size, GFP_KERNEL); - if (!output) { - rc = -ENOMEM; - goto out; - } - - params->output = output; - params->output_size = args->output_size; - } - - rc = hdev->asic_funcs->debug_coresight(hdev, params); - if (rc) { - dev_err(hdev->dev, - "debug coresight operation failed %d\n", rc); - goto out; - } - - if (output && copy_to_user((void __user *) (uintptr_t) args->output_ptr, - output, args->output_size)) { - dev_err(hdev->dev, "copy to user failed in debug ioctl\n"); - rc = -EFAULT; - goto out; - } - - -out: - kfree(params); - kfree(output); - kfree(input); - - return rc; -} - -static int device_utilization(struct hl_device *hdev, struct hl_info_args *args) -{ - struct hl_info_device_utilization device_util = {0}; - u32 max_size = args->return_size; - void __user *out = (void __user *) (uintptr_t) args->return_pointer; - - if ((!max_size) || (!out)) - return -EINVAL; - - if ((args->period_ms < 100) || (args->period_ms > 1000) || - (args->period_ms % 100)) { - dev_err(hdev->dev, - "period %u must be between 100 - 1000 and must be divisible by 100\n", - args->period_ms); - return -EINVAL; - } - - device_util.utilization = hl_device_utilization(hdev, args->period_ms); - - return copy_to_user(out, &device_util, - min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0; -} - -static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args) -{ - struct hl_info_clk_rate clk_rate = {0}; - u32 max_size = args->return_size; - void __user *out = (void __user *) (uintptr_t) args->return_pointer; - int rc; - - if ((!max_size) || (!out)) - return -EINVAL; - - rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz, - &clk_rate.max_clk_rate_mhz); - if (rc) - return rc; - - return copy_to_user(out, &clk_rate, - min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0; -} - -static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args) -{ - struct hl_info_reset_count reset_count = {0}; - u32 max_size = args->return_size; - void __user *out = (void __user *) (uintptr_t) args->return_pointer; - - if ((!max_size) || (!out)) - return -EINVAL; - - reset_count.hard_reset_cnt = hdev->hard_reset_cnt; - reset_count.soft_reset_cnt = hdev->soft_reset_cnt; - - return copy_to_user(out, &reset_count, - min((size_t) max_size, sizeof(reset_count))) ? -EFAULT : 0; -} - -static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args) -{ - struct hl_info_time_sync time_sync = {0}; - u32 max_size = args->return_size; - void __user *out = (void __user *) (uintptr_t) args->return_pointer; - - if ((!max_size) || (!out)) - return -EINVAL; - - time_sync.device_time = hdev->asic_funcs->get_device_time(hdev); - time_sync.host_time = ktime_get_raw_ns(); - - return copy_to_user(out, &time_sync, - min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0; -} - -static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) -{ - struct hl_device *hdev = hpriv->hdev; - struct hl_info_cs_counters cs_counters = {0}; - u32 max_size = args->return_size; - void __user *out = (void __user *) (uintptr_t) args->return_pointer; - - if ((!max_size) || (!out)) - return -EINVAL; - - memcpy(&cs_counters.cs_counters, &hdev->aggregated_cs_counters, - sizeof(struct hl_cs_counters)); - - if (hpriv->ctx) - memcpy(&cs_counters.ctx_cs_counters, &hpriv->ctx->cs_counters, - sizeof(struct hl_cs_counters)); - - return copy_to_user(out, &cs_counters, - min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0; -} - -static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, - struct device *dev) -{ - struct hl_info_args *args = data; - struct hl_device *hdev = hpriv->hdev; - int rc; - - /* - * Information is returned for the following opcodes even if the device - * is disabled or in reset. - */ - switch (args->op) { - case HL_INFO_HW_IP_INFO: - return hw_ip_info(hdev, args); - - case HL_INFO_DEVICE_STATUS: - return device_status_info(hdev, args); - - case HL_INFO_RESET_COUNT: - return get_reset_count(hdev, args); - - default: - break; - } - - if (hl_device_disabled_or_in_reset(hdev)) { - dev_warn_ratelimited(dev, - "Device is %s. Can't execute INFO IOCTL\n", - atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); - return -EBUSY; - } - - switch (args->op) { - case HL_INFO_HW_EVENTS: - rc = hw_events_info(hdev, false, args); - break; - - case HL_INFO_DRAM_USAGE: - rc = dram_usage_info(hpriv, args); - break; - - case HL_INFO_HW_IDLE: - rc = hw_idle(hdev, args); - break; - - case HL_INFO_DEVICE_UTILIZATION: - rc = device_utilization(hdev, args); - break; - - case HL_INFO_HW_EVENTS_AGGREGATE: - rc = hw_events_info(hdev, true, args); - break; - - case HL_INFO_CLK_RATE: - rc = get_clk_rate(hdev, args); - break; - - case HL_INFO_TIME_SYNC: - return time_sync_info(hdev, args); - - case HL_INFO_CS_COUNTERS: - return cs_counters_info(hpriv, args); - - default: - dev_err(dev, "Invalid request %d\n", args->op); - rc = -ENOTTY; - break; - } - - return rc; -} - -static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data) -{ - return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev); -} - -static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data) -{ - return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl); -} - -static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data) -{ - struct hl_debug_args *args = data; - struct hl_device *hdev = hpriv->hdev; - int rc = 0; - - if (hl_device_disabled_or_in_reset(hdev)) { - dev_warn_ratelimited(hdev->dev, - "Device is %s. Can't execute DEBUG IOCTL\n", - atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); - return -EBUSY; - } - - switch (args->op) { - case HL_DEBUG_OP_ETR: - case HL_DEBUG_OP_ETF: - case HL_DEBUG_OP_STM: - case HL_DEBUG_OP_FUNNEL: - case HL_DEBUG_OP_BMON: - case HL_DEBUG_OP_SPMU: - case HL_DEBUG_OP_TIMESTAMP: - if (!hdev->in_debug) { - dev_err_ratelimited(hdev->dev, - "Rejecting debug configuration request because device not in debug mode\n"); - return -EFAULT; - } - args->input_size = - min(args->input_size, hl_debug_struct_size[args->op]); - rc = debug_coresight(hdev, args); - break; - case HL_DEBUG_OP_SET_MODE: - rc = hl_device_set_debug_mode(hdev, (bool) args->enable); - break; - default: - dev_err(hdev->dev, "Invalid request %d\n", args->op); - rc = -ENOTTY; - break; - } - - return rc; -} - -#define HL_IOCTL_DEF(ioctl, _func) \ - [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func} - -static const struct hl_ioctl_desc hl_ioctls[] = { - HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl), - HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl), - HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl), - HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl), - HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl), - HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl) -}; - -static const struct hl_ioctl_desc hl_ioctls_control[] = { - HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control) -}; - -static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg, - const struct hl_ioctl_desc *ioctl, struct device *dev) -{ - struct hl_fpriv *hpriv = filep->private_data; - struct hl_device *hdev = hpriv->hdev; - unsigned int nr = _IOC_NR(cmd); - char stack_kdata[128] = {0}; - char *kdata = NULL; - unsigned int usize, asize; - hl_ioctl_t *func; - u32 hl_size; - int retcode; - - if (hdev->hard_reset_pending) { - dev_crit_ratelimited(hdev->dev_ctrl, - "Device HARD reset pending! Please close FD\n"); - return -ENODEV; - } - - /* Do not trust userspace, use our own definition */ - func = ioctl->func; - - if (unlikely(!func)) { - dev_dbg(dev, "no function\n"); - retcode = -ENOTTY; - goto out_err; - } - - hl_size = _IOC_SIZE(ioctl->cmd); - usize = asize = _IOC_SIZE(cmd); - if (hl_size > asize) - asize = hl_size; - - cmd = ioctl->cmd; - - if (cmd & (IOC_IN | IOC_OUT)) { - if (asize <= sizeof(stack_kdata)) { - kdata = stack_kdata; - } else { - kdata = kzalloc(asize, GFP_KERNEL); - if (!kdata) { - retcode = -ENOMEM; - goto out_err; - } - } - } - - if (cmd & IOC_IN) { - if (copy_from_user(kdata, (void __user *)arg, usize)) { - retcode = -EFAULT; - goto out_err; - } - } else if (cmd & IOC_OUT) { - memset(kdata, 0, usize); - } - - retcode = func(hpriv, kdata); - - if ((cmd & IOC_OUT) && copy_to_user((void __user *)arg, kdata, usize)) - retcode = -EFAULT; - -out_err: - if (retcode) - dev_dbg(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", - task_pid_nr(current), cmd, nr); - - if (kdata != stack_kdata) - kfree(kdata); - - return retcode; -} - -long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) -{ - struct hl_fpriv *hpriv = filep->private_data; - struct hl_device *hdev = hpriv->hdev; - const struct hl_ioctl_desc *ioctl = NULL; - unsigned int nr = _IOC_NR(cmd); - - if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) { - ioctl = &hl_ioctls[nr]; - } else { - dev_err(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n", - task_pid_nr(current), nr); - return -ENOTTY; - } - - return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev); -} - -long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg) -{ - struct hl_fpriv *hpriv = filep->private_data; - struct hl_device *hdev = hpriv->hdev; - const struct hl_ioctl_desc *ioctl = NULL; - unsigned int nr = _IOC_NR(cmd); - - if (nr == _IOC_NR(HL_IOCTL_INFO)) { - ioctl = &hl_ioctls_control[nr]; - } else { - dev_err(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n", - task_pid_nr(current), nr); - return -ENOTTY; - } - - return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl); -} diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c deleted file mode 100644 index 287681646071..000000000000 --- a/drivers/misc/habanalabs/hw_queue.c +++ /dev/null @@ -1,918 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2016-2019 HabanaLabs, Ltd. - * All Rights Reserved. - */ - -#include "habanalabs.h" - -#include - -/* - * hl_queue_add_ptr - add to pi or ci and checks if it wraps around - * - * @ptr: the current pi/ci value - * @val: the amount to add - * - * Add val to ptr. It can go until twice the queue length. - */ -inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val) -{ - ptr += val; - ptr &= ((HL_QUEUE_LENGTH << 1) - 1); - return ptr; -} -static inline int queue_ci_get(atomic_t *ci, u32 queue_len) -{ - return atomic_read(ci) & ((queue_len << 1) - 1); -} - -static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len) -{ - int delta = (q->pi - queue_ci_get(&q->ci, queue_len)); - - if (delta >= 0) - return (queue_len - delta); - else - return (abs(delta) - queue_len); -} - -void hl_int_hw_queue_update_ci(struct hl_cs *cs) -{ - struct hl_device *hdev = cs->ctx->hdev; - struct hl_hw_queue *q; - int i; - - if (hdev->disabled) - return; - - q = &hdev->kernel_queues[0]; - for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) { - if (q->queue_type == QUEUE_TYPE_INT) - atomic_add(cs->jobs_in_queue_cnt[i], &q->ci); - } -} - -/* - * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a - * H/W queue. - * @hdev: pointer to habanalabs device structure - * @q: pointer to habanalabs queue structure - * @ctl: BD's control word - * @len: BD's length - * @ptr: BD's pointer - * - * This function assumes there is enough space on the queue to submit a new - * BD to it. It initializes the next BD and calls the device specific - * function to set the pi (and doorbell) - * - * This function must be called when the scheduler mutex is taken - * - */ -static void ext_and_hw_queue_submit_bd(struct hl_device *hdev, - struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr) -{ - struct hl_bd *bd; - - bd = (struct hl_bd *) (uintptr_t) q->kernel_address; - bd += hl_pi_2_offset(q->pi); - bd->ctl = cpu_to_le32(ctl); - bd->len = cpu_to_le32(len); - bd->ptr = cpu_to_le64(ptr); - - q->pi = hl_queue_inc_ptr(q->pi); - hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); -} - -/* - * ext_queue_sanity_checks - perform some sanity checks on external queue - * - * @hdev : pointer to hl_device structure - * @q : pointer to hl_hw_queue structure - * @num_of_entries : how many entries to check for space - * @reserve_cq_entry : whether to reserve an entry in the cq - * - * H/W queues spinlock should be taken before calling this function - * - * Perform the following: - * - Make sure we have enough space in the h/w queue - * - Make sure we have enough space in the completion queue - * - Reserve space in the completion queue (needs to be reversed if there - * is a failure down the road before the actual submission of work). Only - * do this action if reserve_cq_entry is true - * - */ -static int ext_queue_sanity_checks(struct hl_device *hdev, - struct hl_hw_queue *q, int num_of_entries, - bool reserve_cq_entry) -{ - atomic_t *free_slots = - &hdev->completion_queue[q->cq_id].free_slots_cnt; - int free_slots_cnt; - - /* Check we have enough space in the queue */ - free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH); - - if (free_slots_cnt < num_of_entries) { - dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", - q->hw_queue_id, num_of_entries); - return -EAGAIN; - } - - if (reserve_cq_entry) { - /* - * Check we have enough space in the completion queue - * Add -1 to counter (decrement) unless counter was already 0 - * In that case, CQ is full so we can't submit a new CB because - * we won't get ack on its completion - * atomic_add_unless will return 0 if counter was already 0 - */ - if (atomic_add_negative(num_of_entries * -1, free_slots)) { - dev_dbg(hdev->dev, "No space for %d on CQ %d\n", - num_of_entries, q->hw_queue_id); - atomic_add(num_of_entries, free_slots); - return -EAGAIN; - } - } - - return 0; -} - -/* - * int_queue_sanity_checks - perform some sanity checks on internal queue - * - * @hdev : pointer to hl_device structure - * @q : pointer to hl_hw_queue structure - * @num_of_entries : how many entries to check for space - * - * H/W queues spinlock should be taken before calling this function - * - * Perform the following: - * - Make sure we have enough space in the h/w queue - * - */ -static int int_queue_sanity_checks(struct hl_device *hdev, - struct hl_hw_queue *q, - int num_of_entries) -{ - int free_slots_cnt; - - if (num_of_entries > q->int_queue_len) { - dev_err(hdev->dev, - "Cannot populate queue %u with %u jobs\n", - q->hw_queue_id, num_of_entries); - return -ENOMEM; - } - - /* Check we have enough space in the queue */ - free_slots_cnt = queue_free_slots(q, q->int_queue_len); - - if (free_slots_cnt < num_of_entries) { - dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", - q->hw_queue_id, num_of_entries); - return -EAGAIN; - } - - return 0; -} - -/* - * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue - * @hdev: Pointer to hl_device structure. - * @q: Pointer to hl_hw_queue structure. - * @num_of_entries: How many entries to check for space. - * - * Notice: We do not reserve queue entries so this function mustn't be called - * more than once per CS for the same queue - * - */ -static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q, - int num_of_entries) -{ - int free_slots_cnt; - - /* Check we have enough space in the queue */ - free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH); - - if (free_slots_cnt < num_of_entries) { - dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", - q->hw_queue_id, num_of_entries); - return -EAGAIN; - } - - return 0; -} - -/* - * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion - * - * @hdev: pointer to hl_device structure - * @hw_queue_id: Queue's type - * @cb_size: size of CB - * @cb_ptr: pointer to CB location - * - * This function sends a single CB, that must NOT generate a completion entry - * - */ -int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, - u32 cb_size, u64 cb_ptr) -{ - struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; - int rc = 0; - - /* - * The CPU queue is a synchronous queue with an effective depth of - * a single entry (although it is allocated with room for multiple - * entries). Therefore, there is a different lock, called - * send_cpu_message_lock, that serializes accesses to the CPU queue. - * As a result, we don't need to lock the access to the entire H/W - * queues module when submitting a JOB to the CPU queue - */ - if (q->queue_type != QUEUE_TYPE_CPU) - hdev->asic_funcs->hw_queues_lock(hdev); - - if (hdev->disabled) { - rc = -EPERM; - goto out; - } - - /* - * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue - * type only on init phase, when the queues are empty and being tested, - * so there is no need for sanity checks. - */ - if (q->queue_type != QUEUE_TYPE_HW) { - rc = ext_queue_sanity_checks(hdev, q, 1, false); - if (rc) - goto out; - } - - ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); - -out: - if (q->queue_type != QUEUE_TYPE_CPU) - hdev->asic_funcs->hw_queues_unlock(hdev); - - return rc; -} - -/* - * ext_queue_schedule_job - submit a JOB to an external queue - * - * @job: pointer to the job that needs to be submitted to the queue - * - * This function must be called when the scheduler mutex is taken - * - */ -static void ext_queue_schedule_job(struct hl_cs_job *job) -{ - struct hl_device *hdev = job->cs->ctx->hdev; - struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; - struct hl_cq_entry cq_pkt; - struct hl_cq *cq; - u64 cq_addr; - struct hl_cb *cb; - u32 ctl; - u32 len; - u64 ptr; - - /* - * Update the JOB ID inside the BD CTL so the device would know what - * to write in the completion queue - */ - ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK); - - cb = job->patched_cb; - len = job->job_cb_size; - ptr = cb->bus_address; - - cq_pkt.data = cpu_to_le32( - ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT) - & CQ_ENTRY_SHADOW_INDEX_MASK) | - (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) | - (1 << CQ_ENTRY_READY_SHIFT)); - - /* - * No need to protect pi_offset because scheduling to the - * H/W queues is done under the scheduler mutex - * - * No need to check if CQ is full because it was already - * checked in ext_queue_sanity_checks - */ - cq = &hdev->completion_queue[q->cq_id]; - cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry); - - hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len, - cq_addr, - le32_to_cpu(cq_pkt.data), - q->msi_vec, - job->contains_dma_pkt); - - q->shadow_queue[hl_pi_2_offset(q->pi)] = job; - - cq->pi = hl_cq_inc_ptr(cq->pi); - - ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); -} - -/* - * int_queue_schedule_job - submit a JOB to an internal queue - * - * @job: pointer to the job that needs to be submitted to the queue - * - * This function must be called when the scheduler mutex is taken - * - */ -static void int_queue_schedule_job(struct hl_cs_job *job) -{ - struct hl_device *hdev = job->cs->ctx->hdev; - struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; - struct hl_bd bd; - __le64 *pi; - - bd.ctl = 0; - bd.len = cpu_to_le32(job->job_cb_size); - bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb); - - pi = (__le64 *) (uintptr_t) (q->kernel_address + - ((q->pi & (q->int_queue_len - 1)) * sizeof(bd))); - - q->pi++; - q->pi &= ((q->int_queue_len << 1) - 1); - - hdev->asic_funcs->pqe_write(hdev, pi, &bd); - - hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); -} - -/* - * hw_queue_schedule_job - submit a JOB to a H/W queue - * - * @job: pointer to the job that needs to be submitted to the queue - * - * This function must be called when the scheduler mutex is taken - * - */ -static void hw_queue_schedule_job(struct hl_cs_job *job) -{ - struct hl_device *hdev = job->cs->ctx->hdev; - struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; - u64 ptr; - u32 offset, ctl, len; - - /* - * Upon PQE completion, COMP_DATA is used as the write data to the - * completion queue (QMAN HBW message), and COMP_OFFSET is used as the - * write address offset in the SM block (QMAN LBW message). - * The write address offset is calculated as "COMP_OFFSET << 2". - */ - offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1); - ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) | - ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK); - - len = job->job_cb_size; - - /* - * A patched CB is created only if a user CB was allocated by driver and - * MMU is disabled. If MMU is enabled, the user CB should be used - * instead. If the user CB wasn't allocated by driver, assume that it - * holds an address. - */ - if (job->patched_cb) - ptr = job->patched_cb->bus_address; - else if (job->is_kernel_allocated_cb) - ptr = job->user_cb->bus_address; - else - ptr = (u64) (uintptr_t) job->user_cb; - - ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); -} - -/* - * init_signal_wait_cs - initialize a signal/wait CS - * @cs: pointer to the signal/wait CS - * - * H/W queues spinlock should be taken before calling this function - */ -static void init_signal_wait_cs(struct hl_cs *cs) -{ - struct hl_ctx *ctx = cs->ctx; - struct hl_device *hdev = ctx->hdev; - struct hl_hw_queue *hw_queue; - struct hl_cs_compl *cs_cmpl = - container_of(cs->fence, struct hl_cs_compl, base_fence); - - struct hl_hw_sob *hw_sob; - struct hl_cs_job *job; - u32 q_idx; - - /* There is only one job in a signal/wait CS */ - job = list_first_entry(&cs->job_list, struct hl_cs_job, - cs_node); - q_idx = job->hw_queue_id; - hw_queue = &hdev->kernel_queues[q_idx]; - - if (cs->type & CS_TYPE_SIGNAL) { - hw_sob = &hw_queue->hw_sob[hw_queue->curr_sob_offset]; - - cs_cmpl->hw_sob = hw_sob; - cs_cmpl->sob_val = hw_queue->next_sob_val++; - - dev_dbg(hdev->dev, - "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n", - cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx); - - hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, - cs_cmpl->hw_sob->sob_id); - - kref_get(&hw_sob->kref); - - /* check for wraparound */ - if (hw_queue->next_sob_val == HL_MAX_SOB_VAL) { - /* - * Decrement as we reached the max value. - * The release function won't be called here as we've - * just incremented the refcount. - */ - kref_put(&hw_sob->kref, hl_sob_reset_error); - hw_queue->next_sob_val = 1; - /* only two SOBs are currently in use */ - hw_queue->curr_sob_offset = - (hw_queue->curr_sob_offset + 1) % - HL_RSVD_SOBS_IN_USE; - - dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", - hw_queue->curr_sob_offset, q_idx); - } - } else if (cs->type & CS_TYPE_WAIT) { - struct hl_cs_compl *signal_cs_cmpl; - - signal_cs_cmpl = container_of(cs->signal_fence, - struct hl_cs_compl, - base_fence); - - /* copy the the SOB id and value of the signal CS */ - cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; - cs_cmpl->sob_val = signal_cs_cmpl->sob_val; - - dev_dbg(hdev->dev, - "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n", - cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, - hw_queue->base_mon_id, q_idx); - - hdev->asic_funcs->gen_wait_cb(hdev, job->patched_cb, - cs_cmpl->hw_sob->sob_id, - cs_cmpl->sob_val, - hw_queue->base_mon_id, - q_idx); - - kref_get(&cs_cmpl->hw_sob->kref); - /* - * Must put the signal fence after the SOB refcnt increment so - * the SOB refcnt won't turn 0 and reset the SOB before the - * wait CS was submitted. - */ - mb(); - dma_fence_put(cs->signal_fence); - cs->signal_fence = NULL; - } -} - -/* - * hl_hw_queue_schedule_cs - schedule a command submission - * @cs: pointer to the CS - */ -int hl_hw_queue_schedule_cs(struct hl_cs *cs) -{ - struct hl_ctx *ctx = cs->ctx; - struct hl_device *hdev = ctx->hdev; - struct hl_cs_job *job, *tmp; - struct hl_hw_queue *q; - u32 max_queues; - int rc = 0, i, cq_cnt; - - hdev->asic_funcs->hw_queues_lock(hdev); - - if (hl_device_disabled_or_in_reset(hdev)) { - ctx->cs_counters.device_in_reset_drop_cnt++; - dev_err(hdev->dev, - "device is disabled or in reset, CS rejected!\n"); - rc = -EPERM; - goto out; - } - - max_queues = hdev->asic_prop.max_queues; - - q = &hdev->kernel_queues[0]; - for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) { - if (cs->jobs_in_queue_cnt[i]) { - switch (q->queue_type) { - case QUEUE_TYPE_EXT: - rc = ext_queue_sanity_checks(hdev, q, - cs->jobs_in_queue_cnt[i], true); - break; - case QUEUE_TYPE_INT: - rc = int_queue_sanity_checks(hdev, q, - cs->jobs_in_queue_cnt[i]); - break; - case QUEUE_TYPE_HW: - rc = hw_queue_sanity_checks(hdev, q, - cs->jobs_in_queue_cnt[i]); - break; - default: - dev_err(hdev->dev, "Queue type %d is invalid\n", - q->queue_type); - rc = -EINVAL; - break; - } - - if (rc) { - ctx->cs_counters.queue_full_drop_cnt++; - goto unroll_cq_resv; - } - - if (q->queue_type == QUEUE_TYPE_EXT) - cq_cnt++; - } - } - - if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) - init_signal_wait_cs(cs); - - spin_lock(&hdev->hw_queues_mirror_lock); - list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list); - - /* Queue TDR if the CS is the first entry and if timeout is wanted */ - if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) && - (list_first_entry(&hdev->hw_queues_mirror_list, - struct hl_cs, mirror_node) == cs)) { - cs->tdr_active = true; - schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies); - spin_unlock(&hdev->hw_queues_mirror_lock); - } else { - spin_unlock(&hdev->hw_queues_mirror_lock); - } - - if (!hdev->cs_active_cnt++) { - struct hl_device_idle_busy_ts *ts; - - ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx]; - ts->busy_to_idle_ts = ktime_set(0, 0); - ts->idle_to_busy_ts = ktime_get(); - } - - list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) - switch (job->queue_type) { - case QUEUE_TYPE_EXT: - ext_queue_schedule_job(job); - break; - case QUEUE_TYPE_INT: - int_queue_schedule_job(job); - break; - case QUEUE_TYPE_HW: - hw_queue_schedule_job(job); - break; - default: - break; - } - - cs->submitted = true; - - goto out; - -unroll_cq_resv: - q = &hdev->kernel_queues[0]; - for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) { - if ((q->queue_type == QUEUE_TYPE_EXT) && - (cs->jobs_in_queue_cnt[i])) { - atomic_t *free_slots = - &hdev->completion_queue[i].free_slots_cnt; - atomic_add(cs->jobs_in_queue_cnt[i], free_slots); - cq_cnt--; - } - } - -out: - hdev->asic_funcs->hw_queues_unlock(hdev); - - return rc; -} - -/* - * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue - * - * @hdev: pointer to hl_device structure - * @hw_queue_id: which queue to increment its ci - */ -void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id) -{ - struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; - - atomic_inc(&q->ci); -} - -static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, - bool is_cpu_queue) -{ - void *p; - int rc; - - if (is_cpu_queue) - p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, - HL_QUEUE_SIZE_IN_BYTES, - &q->bus_address); - else - p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, - HL_QUEUE_SIZE_IN_BYTES, - &q->bus_address, - GFP_KERNEL | __GFP_ZERO); - if (!p) - return -ENOMEM; - - q->kernel_address = (u64) (uintptr_t) p; - - q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, - sizeof(*q->shadow_queue), - GFP_KERNEL); - if (!q->shadow_queue) { - dev_err(hdev->dev, - "Failed to allocate shadow queue for H/W queue %d\n", - q->hw_queue_id); - rc = -ENOMEM; - goto free_queue; - } - - /* Make sure read/write pointers are initialized to start of queue */ - atomic_set(&q->ci, 0); - q->pi = 0; - - return 0; - -free_queue: - if (is_cpu_queue) - hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, - HL_QUEUE_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address); - else - hdev->asic_funcs->asic_dma_free_coherent(hdev, - HL_QUEUE_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address, - q->bus_address); - - return rc; -} - -static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) -{ - void *p; - - p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id, - &q->bus_address, &q->int_queue_len); - if (!p) { - dev_err(hdev->dev, - "Failed to get base address for internal queue %d\n", - q->hw_queue_id); - return -EFAULT; - } - - q->kernel_address = (u64) (uintptr_t) p; - q->pi = 0; - atomic_set(&q->ci, 0); - - return 0; -} - -static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) -{ - return ext_and_cpu_queue_init(hdev, q, true); -} - -static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) -{ - return ext_and_cpu_queue_init(hdev, q, false); -} - -static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) -{ - void *p; - - p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, - HL_QUEUE_SIZE_IN_BYTES, - &q->bus_address, - GFP_KERNEL | __GFP_ZERO); - if (!p) - return -ENOMEM; - - q->kernel_address = (u64) (uintptr_t) p; - - /* Make sure read/write pointers are initialized to start of queue */ - atomic_set(&q->ci, 0); - q->pi = 0; - - return 0; -} - -static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx) -{ - struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx]; - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct hl_hw_sob *hw_sob; - int sob, queue_idx = hdev->sync_stream_queue_idx++; - - hw_queue->base_sob_id = - prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS; - hw_queue->base_mon_id = - prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS; - hw_queue->next_sob_val = 1; - hw_queue->curr_sob_offset = 0; - - for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) { - hw_sob = &hw_queue->hw_sob[sob]; - hw_sob->hdev = hdev; - hw_sob->sob_id = hw_queue->base_sob_id + sob; - hw_sob->q_idx = q_idx; - kref_init(&hw_sob->kref); - } -} - -static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx) -{ - struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx]; - - /* - * In case we got here due to a stuck CS, the refcnt might be bigger - * than 1 and therefore we reset it. - */ - kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref); - hw_queue->curr_sob_offset = 0; - hw_queue->next_sob_val = 1; -} - -/* - * queue_init - main initialization function for H/W queue object - * - * @hdev: pointer to hl_device device structure - * @q: pointer to hl_hw_queue queue structure - * @hw_queue_id: The id of the H/W queue - * - * Allocate dma-able memory for the queue and initialize fields - * Returns 0 on success - */ -static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q, - u32 hw_queue_id) -{ - int rc; - - q->hw_queue_id = hw_queue_id; - - switch (q->queue_type) { - case QUEUE_TYPE_EXT: - rc = ext_queue_init(hdev, q); - break; - case QUEUE_TYPE_INT: - rc = int_queue_init(hdev, q); - break; - case QUEUE_TYPE_CPU: - rc = cpu_queue_init(hdev, q); - break; - case QUEUE_TYPE_HW: - rc = hw_queue_init(hdev, q); - break; - case QUEUE_TYPE_NA: - q->valid = 0; - return 0; - default: - dev_crit(hdev->dev, "wrong queue type %d during init\n", - q->queue_type); - rc = -EINVAL; - break; - } - - if (q->supports_sync_stream) - sync_stream_queue_init(hdev, q->hw_queue_id); - - if (rc) - return rc; - - q->valid = 1; - - return 0; -} - -/* - * hw_queue_fini - destroy queue - * - * @hdev: pointer to hl_device device structure - * @q: pointer to hl_hw_queue queue structure - * - * Free the queue memory - */ -static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q) -{ - if (!q->valid) - return; - - /* - * If we arrived here, there are no jobs waiting on this queue - * so we can safely remove it. - * This is because this function can only called when: - * 1. Either a context is deleted, which only can occur if all its - * jobs were finished - * 2. A context wasn't able to be created due to failure or timeout, - * which means there are no jobs on the queue yet - * - * The only exception are the queues of the kernel context, but - * if they are being destroyed, it means that the entire module is - * being removed. If the module is removed, it means there is no open - * user context. It also means that if a job was submitted by - * the kernel driver (e.g. context creation), the job itself was - * released by the kernel driver when a timeout occurred on its - * Completion. Thus, we don't need to release it again. - */ - - if (q->queue_type == QUEUE_TYPE_INT) - return; - - kfree(q->shadow_queue); - - if (q->queue_type == QUEUE_TYPE_CPU) - hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, - HL_QUEUE_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address); - else - hdev->asic_funcs->asic_dma_free_coherent(hdev, - HL_QUEUE_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address, - q->bus_address); -} - -int hl_hw_queues_create(struct hl_device *hdev) -{ - struct asic_fixed_properties *asic = &hdev->asic_prop; - struct hl_hw_queue *q; - int i, rc, q_ready_cnt; - - hdev->kernel_queues = kcalloc(asic->max_queues, - sizeof(*hdev->kernel_queues), GFP_KERNEL); - - if (!hdev->kernel_queues) { - dev_err(hdev->dev, "Not enough memory for H/W queues\n"); - return -ENOMEM; - } - - /* Initialize the H/W queues */ - for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues; - i < asic->max_queues ; i++, q_ready_cnt++, q++) { - - q->queue_type = asic->hw_queues_props[i].type; - q->supports_sync_stream = - asic->hw_queues_props[i].supports_sync_stream; - rc = queue_init(hdev, q, i); - if (rc) { - dev_err(hdev->dev, - "failed to initialize queue %d\n", i); - goto release_queues; - } - } - - return 0; - -release_queues: - for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++) - queue_fini(hdev, q); - - kfree(hdev->kernel_queues); - - return rc; -} - -void hl_hw_queues_destroy(struct hl_device *hdev) -{ - struct hl_hw_queue *q; - u32 max_queues = hdev->asic_prop.max_queues; - int i; - - for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) - queue_fini(hdev, q); - - kfree(hdev->kernel_queues); -} - -void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset) -{ - struct hl_hw_queue *q; - u32 max_queues = hdev->asic_prop.max_queues; - int i; - - for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) { - if ((!q->valid) || - ((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU))) - continue; - q->pi = 0; - atomic_set(&q->ci, 0); - - if (q->supports_sync_stream) - sync_stream_queue_reset(hdev, q->hw_queue_id); - } -} diff --git a/drivers/misc/habanalabs/hwmon.c b/drivers/misc/habanalabs/hwmon.c deleted file mode 100644 index 8c6cd77e6af6..000000000000 --- a/drivers/misc/habanalabs/hwmon.c +++ /dev/null @@ -1,579 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2016-2019 HabanaLabs, Ltd. - * All Rights Reserved. - */ - -#include "habanalabs.h" - -#include -#include - -#define SENSORS_PKT_TIMEOUT 1000000 /* 1s */ -#define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1) - -int hl_build_hwmon_channel_info(struct hl_device *hdev, - struct armcp_sensor *sensors_arr) -{ - u32 counts[HWMON_NR_SENSOR_TYPES] = {0}; - u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL}; - u32 sensors_by_type_next_index[HWMON_NR_SENSOR_TYPES] = {0}; - struct hwmon_channel_info **channels_info; - u32 num_sensors_for_type, num_active_sensor_types = 0, - arr_size = 0, *curr_arr; - enum hwmon_sensor_types type; - int rc, i, j; - - for (i = 0 ; i < ARMCP_MAX_SENSORS ; i++) { - type = le32_to_cpu(sensors_arr[i].type); - - if ((type == 0) && (sensors_arr[i].flags == 0)) - break; - - if (type >= HWMON_NR_SENSOR_TYPES) { - dev_err(hdev->dev, - "Got wrong sensor type %d from device\n", type); - return -EINVAL; - } - - counts[type]++; - arr_size++; - } - - for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) { - if (counts[i] == 0) - continue; - - num_sensors_for_type = counts[i] + 1; - curr_arr = kcalloc(num_sensors_for_type, sizeof(*curr_arr), - GFP_KERNEL); - if (!curr_arr) { - rc = -ENOMEM; - goto sensors_type_err; - } - - num_active_sensor_types++; - sensors_by_type[i] = curr_arr; - } - - for (i = 0 ; i < arr_size ; i++) { - type = le32_to_cpu(sensors_arr[i].type); - curr_arr = sensors_by_type[type]; - curr_arr[sensors_by_type_next_index[type]++] = - le32_to_cpu(sensors_arr[i].flags); - } - - channels_info = kcalloc(num_active_sensor_types + 1, - sizeof(*channels_info), GFP_KERNEL); - if (!channels_info) { - rc = -ENOMEM; - goto channels_info_array_err; - } - - for (i = 0 ; i < num_active_sensor_types ; i++) { - channels_info[i] = kzalloc(sizeof(*channels_info[i]), - GFP_KERNEL); - if (!channels_info[i]) { - rc = -ENOMEM; - goto channel_info_err; - } - } - - for (i = 0, j = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) { - if (!sensors_by_type[i]) - continue; - - channels_info[j]->type = i; - channels_info[j]->config = sensors_by_type[i]; - j++; - } - - hdev->hl_chip_info->info = - (const struct hwmon_channel_info **)channels_info; - - return 0; - -channel_info_err: - for (i = 0 ; i < num_active_sensor_types ; i++) - if (channels_info[i]) { - kfree(channels_info[i]->config); - kfree(channels_info[i]); - } - kfree(channels_info); -channels_info_array_err: -sensors_type_err: - for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) - kfree(sensors_by_type[i]); - - return rc; -} - -static int hl_read(struct device *dev, enum hwmon_sensor_types type, - u32 attr, int channel, long *val) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - int rc; - - if (hl_device_disabled_or_in_reset(hdev)) - return -ENODEV; - - switch (type) { - case hwmon_temp: - switch (attr) { - case hwmon_temp_input: - case hwmon_temp_max: - case hwmon_temp_crit: - case hwmon_temp_max_hyst: - case hwmon_temp_crit_hyst: - case hwmon_temp_offset: - case hwmon_temp_highest: - break; - default: - return -EINVAL; - } - - rc = hl_get_temperature(hdev, channel, attr, val); - break; - case hwmon_in: - switch (attr) { - case hwmon_in_input: - case hwmon_in_min: - case hwmon_in_max: - case hwmon_in_highest: - break; - default: - return -EINVAL; - } - - rc = hl_get_voltage(hdev, channel, attr, val); - break; - case hwmon_curr: - switch (attr) { - case hwmon_curr_input: - case hwmon_curr_min: - case hwmon_curr_max: - case hwmon_curr_highest: - break; - default: - return -EINVAL; - } - - rc = hl_get_current(hdev, channel, attr, val); - break; - case hwmon_fan: - switch (attr) { - case hwmon_fan_input: - case hwmon_fan_min: - case hwmon_fan_max: - break; - default: - return -EINVAL; - } - rc = hl_get_fan_speed(hdev, channel, attr, val); - break; - case hwmon_pwm: - switch (attr) { - case hwmon_pwm_input: - case hwmon_pwm_enable: - break; - default: - return -EINVAL; - } - rc = hl_get_pwm_info(hdev, channel, attr, val); - break; - default: - return -EINVAL; - } - return rc; -} - -static int hl_write(struct device *dev, enum hwmon_sensor_types type, - u32 attr, int channel, long val) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - if (hl_device_disabled_or_in_reset(hdev)) - return -ENODEV; - - switch (type) { - case hwmon_temp: - switch (attr) { - case hwmon_temp_offset: - case hwmon_temp_reset_history: - break; - default: - return -EINVAL; - } - hl_set_temperature(hdev, channel, attr, val); - break; - case hwmon_pwm: - switch (attr) { - case hwmon_pwm_input: - case hwmon_pwm_enable: - break; - default: - return -EINVAL; - } - hl_set_pwm_info(hdev, channel, attr, val); - break; - case hwmon_in: - switch (attr) { - case hwmon_in_reset_history: - break; - default: - return -EINVAL; - } - hl_set_voltage(hdev, channel, attr, val); - break; - case hwmon_curr: - switch (attr) { - case hwmon_curr_reset_history: - break; - default: - return -EINVAL; - } - hl_set_current(hdev, channel, attr, val); - break; - default: - return -EINVAL; - } - return 0; -} - -static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type, - u32 attr, int channel) -{ - switch (type) { - case hwmon_temp: - switch (attr) { - case hwmon_temp_input: - case hwmon_temp_max: - case hwmon_temp_max_hyst: - case hwmon_temp_crit: - case hwmon_temp_crit_hyst: - case hwmon_temp_highest: - return 0444; - case hwmon_temp_offset: - return 0644; - case hwmon_temp_reset_history: - return 0200; - } - break; - case hwmon_in: - switch (attr) { - case hwmon_in_input: - case hwmon_in_min: - case hwmon_in_max: - case hwmon_in_highest: - return 0444; - case hwmon_in_reset_history: - return 0200; - } - break; - case hwmon_curr: - switch (attr) { - case hwmon_curr_input: - case hwmon_curr_min: - case hwmon_curr_max: - case hwmon_curr_highest: - return 0444; - case hwmon_curr_reset_history: - return 0200; - } - break; - case hwmon_fan: - switch (attr) { - case hwmon_fan_input: - case hwmon_fan_min: - case hwmon_fan_max: - return 0444; - } - break; - case hwmon_pwm: - switch (attr) { - case hwmon_pwm_input: - case hwmon_pwm_enable: - return 0644; - } - break; - default: - break; - } - return 0; -} - -static const struct hwmon_ops hl_hwmon_ops = { - .is_visible = hl_is_visible, - .read = hl_read, - .write = hl_write -}; - -int hl_get_temperature(struct hl_device *hdev, - int sensor_index, u32 attr, long *value) -{ - struct armcp_packet pkt; - int rc; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.sensor_index = __cpu_to_le16(sensor_index); - pkt.type = __cpu_to_le16(attr); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); - - if (rc) { - dev_err(hdev->dev, - "Failed to get temperature from sensor %d, error %d\n", - sensor_index, rc); - *value = 0; - } - - return rc; -} - -int hl_set_temperature(struct hl_device *hdev, - int sensor_index, u32 attr, long value) -{ - struct armcp_packet pkt; - int rc; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.sensor_index = __cpu_to_le16(sensor_index); - pkt.type = __cpu_to_le16(attr); - pkt.value = __cpu_to_le64(value); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); - - if (rc) - dev_err(hdev->dev, - "Failed to set temperature of sensor %d, error %d\n", - sensor_index, rc); - - return rc; -} - -int hl_get_voltage(struct hl_device *hdev, - int sensor_index, u32 attr, long *value) -{ - struct armcp_packet pkt; - int rc; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.sensor_index = __cpu_to_le16(sensor_index); - pkt.type = __cpu_to_le16(attr); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); - - if (rc) { - dev_err(hdev->dev, - "Failed to get voltage from sensor %d, error %d\n", - sensor_index, rc); - *value = 0; - } - - return rc; -} - -int hl_get_current(struct hl_device *hdev, - int sensor_index, u32 attr, long *value) -{ - struct armcp_packet pkt; - int rc; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.sensor_index = __cpu_to_le16(sensor_index); - pkt.type = __cpu_to_le16(attr); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); - - if (rc) { - dev_err(hdev->dev, - "Failed to get current from sensor %d, error %d\n", - sensor_index, rc); - *value = 0; - } - - return rc; -} - -int hl_get_fan_speed(struct hl_device *hdev, - int sensor_index, u32 attr, long *value) -{ - struct armcp_packet pkt; - int rc; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.sensor_index = __cpu_to_le16(sensor_index); - pkt.type = __cpu_to_le16(attr); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); - - if (rc) { - dev_err(hdev->dev, - "Failed to get fan speed from sensor %d, error %d\n", - sensor_index, rc); - *value = 0; - } - - return rc; -} - -int hl_get_pwm_info(struct hl_device *hdev, - int sensor_index, u32 attr, long *value) -{ - struct armcp_packet pkt; - int rc; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.sensor_index = __cpu_to_le16(sensor_index); - pkt.type = __cpu_to_le16(attr); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); - - if (rc) { - dev_err(hdev->dev, - "Failed to get pwm info from sensor %d, error %d\n", - sensor_index, rc); - *value = 0; - } - - return rc; -} - -void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, - long value) -{ - struct armcp_packet pkt; - int rc; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.sensor_index = __cpu_to_le16(sensor_index); - pkt.type = __cpu_to_le16(attr); - pkt.value = cpu_to_le64(value); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); - - if (rc) - dev_err(hdev->dev, - "Failed to set pwm info to sensor %d, error %d\n", - sensor_index, rc); -} - -int hl_set_voltage(struct hl_device *hdev, - int sensor_index, u32 attr, long value) -{ - struct armcp_packet pkt; - int rc; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.sensor_index = __cpu_to_le16(sensor_index); - pkt.type = __cpu_to_le16(attr); - pkt.value = __cpu_to_le64(value); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); - - if (rc) - dev_err(hdev->dev, - "Failed to set voltage of sensor %d, error %d\n", - sensor_index, rc); - - return rc; -} - -int hl_set_current(struct hl_device *hdev, - int sensor_index, u32 attr, long value) -{ - struct armcp_packet pkt; - int rc; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.sensor_index = __cpu_to_le16(sensor_index); - pkt.type = __cpu_to_le16(attr); - pkt.value = __cpu_to_le64(value); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); - - if (rc) - dev_err(hdev->dev, - "Failed to set current of sensor %d, error %d\n", - sensor_index, rc); - - return rc; -} - -int hl_hwmon_init(struct hl_device *hdev) -{ - struct device *dev = hdev->pdev ? &hdev->pdev->dev : hdev->dev; - struct asic_fixed_properties *prop = &hdev->asic_prop; - int rc; - - if ((hdev->hwmon_initialized) || !(hdev->fw_loading)) - return 0; - - if (hdev->hl_chip_info->info) { - hdev->hl_chip_info->ops = &hl_hwmon_ops; - - hdev->hwmon_dev = hwmon_device_register_with_info(dev, - prop->armcp_info.card_name, hdev, - hdev->hl_chip_info, NULL); - if (IS_ERR(hdev->hwmon_dev)) { - rc = PTR_ERR(hdev->hwmon_dev); - dev_err(hdev->dev, - "Unable to register hwmon device: %d\n", rc); - return rc; - } - - dev_info(hdev->dev, "%s: add sensors information\n", - dev_name(hdev->hwmon_dev)); - - hdev->hwmon_initialized = true; - } else { - dev_info(hdev->dev, "no available sensors\n"); - } - - return 0; -} - -void hl_hwmon_fini(struct hl_device *hdev) -{ - if (!hdev->hwmon_initialized) - return; - - hwmon_device_unregister(hdev->hwmon_dev); -} diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h deleted file mode 100644 index 07f9972db28d..000000000000 --- a/drivers/misc/habanalabs/include/armcp_if.h +++ /dev/null @@ -1,407 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright 2016-2020 HabanaLabs, Ltd. - * All Rights Reserved. - * - */ - -#ifndef ARMCP_IF_H -#define ARMCP_IF_H - -#include - -/* - * EVENT QUEUE - */ - -struct hl_eq_header { - __le32 reserved; - __le32 ctl; -}; - -struct hl_eq_ecc_data { - __le64 ecc_address; - __le64 ecc_syndrom; - __u8 memory_wrapper_idx; - __u8 pad[7]; -}; - -struct hl_eq_entry { - struct hl_eq_header hdr; - union { - struct hl_eq_ecc_data ecc_data; - __le64 data[7]; - }; -}; - -#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry) - -#define EQ_CTL_READY_SHIFT 31 -#define EQ_CTL_READY_MASK 0x80000000 - -#define EQ_CTL_EVENT_TYPE_SHIFT 16 -#define EQ_CTL_EVENT_TYPE_MASK 0x03FF0000 - -enum pq_init_status { - PQ_INIT_STATUS_NA = 0, - PQ_INIT_STATUS_READY_FOR_CP, - PQ_INIT_STATUS_READY_FOR_HOST, - PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI -}; - -/* - * ArmCP Primary Queue Packets - * - * During normal operation, the host's kernel driver needs to send various - * messages to ArmCP, usually either to SET some value into a H/W periphery or - * to GET the current value of some H/W periphery. For example, SET the - * frequency of MME/TPC and GET the value of the thermal sensor. - * - * These messages can be initiated either by the User application or by the - * host's driver itself, e.g. power management code. In either case, the - * communication from the host's driver to ArmCP will *always* be in - * synchronous mode, meaning that the host will send a single message and poll - * until the message was acknowledged and the results are ready (if results are - * needed). - * - * This means that only a single message can be sent at a time and the host's - * driver must wait for its result before sending the next message. Having said - * that, because these are control messages which are sent in a relatively low - * frequency, this limitation seems acceptable. It's important to note that - * in case of multiple devices, messages to different devices *can* be sent - * at the same time. - * - * The message, inputs/outputs (if relevant) and fence object will be located - * on the device DDR at an address that will be determined by the host's driver. - * During device initialization phase, the host will pass to ArmCP that address. - * Most of the message types will contain inputs/outputs inside the message - * itself. The common part of each message will contain the opcode of the - * message (its type) and a field representing a fence object. - * - * When the host's driver wishes to send a message to ArmCP, it will write the - * message contents to the device DDR, clear the fence object and then write the - * value 484 to the mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR register to issue - * the 484 interrupt-id to the ARM core. - * - * Upon receiving the 484 interrupt-id, ArmCP will read the message from the - * DDR. In case the message is a SET operation, ArmCP will first perform the - * operation and then write to the fence object on the device DDR. In case the - * message is a GET operation, ArmCP will first fill the results section on the - * device DDR and then write to the fence object. If an error occurred, ArmCP - * will fill the rc field with the right error code. - * - * In the meantime, the host's driver will poll on the fence object. Once the - * host sees that the fence object is signaled, it will read the results from - * the device DDR (if relevant) and resume the code execution in the host's - * driver. - * - * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8 - * so the value being put by the host's driver matches the value read by ArmCP - * - * Non-QMAN packets should be limited to values 1 through (2^8 - 1) - * - * Detailed description: - * - * ARMCP_PACKET_DISABLE_PCI_ACCESS - - * After receiving this packet the embedded CPU must NOT issue PCI - * transactions (read/write) towards the Host CPU. This also include - * sending MSI-X interrupts. - * This packet is usually sent before the device is moved to D3Hot state. - * - * ARMCP_PACKET_ENABLE_PCI_ACCESS - - * After receiving this packet the embedded CPU is allowed to issue PCI - * transactions towards the Host CPU, including sending MSI-X interrupts. - * This packet is usually send after the device is moved to D0 state. - * - * ARMCP_PACKET_TEMPERATURE_GET - - * Fetch the current temperature / Max / Max Hyst / Critical / - * Critical Hyst of a specified thermal sensor. The packet's - * arguments specify the desired sensor and the field to get. - * - * ARMCP_PACKET_VOLTAGE_GET - - * Fetch the voltage / Max / Min of a specified sensor. The packet's - * arguments specify the sensor and type. - * - * ARMCP_PACKET_CURRENT_GET - - * Fetch the current / Max / Min of a specified sensor. The packet's - * arguments specify the sensor and type. - * - * ARMCP_PACKET_FAN_SPEED_GET - - * Fetch the speed / Max / Min of a specified fan. The packet's - * arguments specify the sensor and type. - * - * ARMCP_PACKET_PWM_GET - - * Fetch the pwm value / mode of a specified pwm. The packet's - * arguments specify the sensor and type. - * - * ARMCP_PACKET_PWM_SET - - * Set the pwm value / mode of a specified pwm. The packet's - * arguments specify the sensor, type and value. - * - * ARMCP_PACKET_FREQUENCY_SET - - * Set the frequency of a specified PLL. The packet's arguments specify - * the PLL and the desired frequency. The actual frequency in the device - * might differ from the requested frequency. - * - * ARMCP_PACKET_FREQUENCY_GET - - * Fetch the frequency of a specified PLL. The packet's arguments specify - * the PLL. - * - * ARMCP_PACKET_LED_SET - - * Set the state of a specified led. The packet's arguments - * specify the led and the desired state. - * - * ARMCP_PACKET_I2C_WR - - * Write 32-bit value to I2C device. The packet's arguments specify the - * I2C bus, address and value. - * - * ARMCP_PACKET_I2C_RD - - * Read 32-bit value from I2C device. The packet's arguments specify the - * I2C bus and address. - * - * ARMCP_PACKET_INFO_GET - - * Fetch information from the device as specified in the packet's - * structure. The host's driver passes the max size it allows the ArmCP to - * write to the structure, to prevent data corruption in case of - * mismatched driver/FW versions. - * - * ARMCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed - * - * ARMCP_PACKET_UNMASK_RAZWI_IRQ - - * Unmask the given IRQ. The IRQ number is specified in the value field. - * The packet is sent after receiving an interrupt and printing its - * relevant information. - * - * ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY - - * Unmask the given IRQs. The IRQs numbers are specified in an array right - * after the armcp_packet structure, where its first element is the array - * length. The packet is sent after a soft reset was done in order to - * handle any interrupts that were sent during the reset process. - * - * ARMCP_PACKET_TEST - - * Test packet for ArmCP connectivity. The CPU will put the fence value - * in the result field. - * - * ARMCP_PACKET_FREQUENCY_CURR_GET - - * Fetch the current frequency of a specified PLL. The packet's arguments - * specify the PLL. - * - * ARMCP_PACKET_MAX_POWER_GET - - * Fetch the maximal power of the device. - * - * ARMCP_PACKET_MAX_POWER_SET - - * Set the maximal power of the device. The packet's arguments specify - * the power. - * - * ARMCP_PACKET_EEPROM_DATA_GET - - * Get EEPROM data from the ArmCP kernel. The buffer is specified in the - * addr field. The CPU will put the returned data size in the result - * field. In addition, the host's driver passes the max size it allows the - * ArmCP to write to the structure, to prevent data corruption in case of - * mismatched driver/FW versions. - * - * ARMCP_PACKET_TEMPERATURE_SET - - * Set the value of the offset property of a specified thermal sensor. - * The packet's arguments specify the desired sensor and the field to - * set. - * - * ARMCP_PACKET_VOLTAGE_SET - - * Trigger the reset_history property of a specified voltage sensor. - * The packet's arguments specify the desired sensor and the field to - * set. - * - * ARMCP_PACKET_CURRENT_SET - - * Trigger the reset_history property of a specified current sensor. - * The packet's arguments specify the desired sensor and the field to - * set. - */ - -enum armcp_packet_id { - ARMCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */ - ARMCP_PACKET_ENABLE_PCI_ACCESS, /* internal */ - ARMCP_PACKET_TEMPERATURE_GET, /* sysfs */ - ARMCP_PACKET_VOLTAGE_GET, /* sysfs */ - ARMCP_PACKET_CURRENT_GET, /* sysfs */ - ARMCP_PACKET_FAN_SPEED_GET, /* sysfs */ - ARMCP_PACKET_PWM_GET, /* sysfs */ - ARMCP_PACKET_PWM_SET, /* sysfs */ - ARMCP_PACKET_FREQUENCY_SET, /* sysfs */ - ARMCP_PACKET_FREQUENCY_GET, /* sysfs */ - ARMCP_PACKET_LED_SET, /* debugfs */ - ARMCP_PACKET_I2C_WR, /* debugfs */ - ARMCP_PACKET_I2C_RD, /* debugfs */ - ARMCP_PACKET_INFO_GET, /* IOCTL */ - ARMCP_PACKET_FLASH_PROGRAM_REMOVED, - ARMCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */ - ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */ - ARMCP_PACKET_TEST, /* internal */ - ARMCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */ - ARMCP_PACKET_MAX_POWER_GET, /* sysfs */ - ARMCP_PACKET_MAX_POWER_SET, /* sysfs */ - ARMCP_PACKET_EEPROM_DATA_GET, /* sysfs */ - ARMCP_RESERVED, - ARMCP_PACKET_TEMPERATURE_SET, /* sysfs */ - ARMCP_PACKET_VOLTAGE_SET, /* sysfs */ - ARMCP_PACKET_CURRENT_SET, /* sysfs */ -}; - -#define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5 - -#define ARMCP_PKT_CTL_RC_SHIFT 12 -#define ARMCP_PKT_CTL_RC_MASK 0x0000F000 - -#define ARMCP_PKT_CTL_OPCODE_SHIFT 16 -#define ARMCP_PKT_CTL_OPCODE_MASK 0x1FFF0000 - -struct armcp_packet { - union { - __le64 value; /* For SET packets */ - __le64 result; /* For GET packets */ - __le64 addr; /* For PQ */ - }; - - __le32 ctl; - - __le32 fence; /* Signal to host that message is completed */ - - union { - struct {/* For temperature/current/voltage/fan/pwm get/set */ - __le16 sensor_index; - __le16 type; - }; - - struct { /* For I2C read/write */ - __u8 i2c_bus; - __u8 i2c_addr; - __u8 i2c_reg; - __u8 pad; /* unused */ - }; - - /* For frequency get/set */ - __le32 pll_index; - - /* For led set */ - __le32 led_index; - - /* For get Armcp info/EEPROM data */ - __le32 data_max_size; - }; - - __le32 reserved; -}; - -struct armcp_unmask_irq_arr_packet { - struct armcp_packet armcp_pkt; - __le32 length; - __le32 irqs[0]; -}; - -enum armcp_packet_rc { - armcp_packet_success, - armcp_packet_invalid, - armcp_packet_fault -}; - -/* - * armcp_temp_type should adhere to hwmon_temp_attributes - * defined in Linux kernel hwmon.h file - */ -enum armcp_temp_type { - armcp_temp_input, - armcp_temp_max = 6, - armcp_temp_max_hyst, - armcp_temp_crit, - armcp_temp_crit_hyst, - armcp_temp_offset = 19, - armcp_temp_highest = 22, - armcp_temp_reset_history = 23 -}; - -enum armcp_in_attributes { - armcp_in_input, - armcp_in_min, - armcp_in_max, - armcp_in_highest = 7, - armcp_in_reset_history -}; - -enum armcp_curr_attributes { - armcp_curr_input, - armcp_curr_min, - armcp_curr_max, - armcp_curr_highest = 7, - armcp_curr_reset_history -}; - -enum armcp_fan_attributes { - armcp_fan_input, - armcp_fan_min = 2, - armcp_fan_max -}; - -enum armcp_pwm_attributes { - armcp_pwm_input, - armcp_pwm_enable -}; - -/* Event Queue Packets */ - -struct eq_generic_event { - __le64 data[7]; -}; - -/* - * ArmCP info - */ - -#define CARD_NAME_MAX_LEN 16 -#define VERSION_MAX_LEN 128 -#define ARMCP_MAX_SENSORS 128 - -struct armcp_sensor { - __le32 type; - __le32 flags; -}; - -/** - * struct armcp_card_types - ASIC card type. - * @armcp_card_type_pci: PCI card. - * @armcp_card_type_pmc: PCI Mezzanine Card. - */ -enum armcp_card_types { - armcp_card_type_pci, - armcp_card_type_pmc -}; - -/** - * struct armcp_info - Info from ArmCP that is necessary to the host's driver - * @sensors: available sensors description. - * @kernel_version: ArmCP linux kernel version. - * @reserved: reserved field. - * @card_type: card configuration type. - * @card_location: in a server, each card has different connections topology - * depending on its location (relevant for PMC card type) - * @cpld_version: CPLD programmed F/W version. - * @infineon_version: Infineon main DC-DC version. - * @fuse_version: silicon production FUSE information. - * @thermal_version: thermald S/W version. - * @armcp_version: ArmCP S/W version. - * @dram_size: available DRAM size. - * @card_name: card name that will be displayed in HWMON subsystem on the host - */ -struct armcp_info { - struct armcp_sensor sensors[ARMCP_MAX_SENSORS]; - __u8 kernel_version[VERSION_MAX_LEN]; - __le32 reserved; - __le32 card_type; - __le32 card_location; - __le32 cpld_version; - __le32 infineon_version; - __u8 fuse_version[VERSION_MAX_LEN]; - __u8 thermal_version[VERSION_MAX_LEN]; - __u8 armcp_version[VERSION_MAX_LEN]; - __le64 dram_size; - char card_name[CARD_NAME_MAX_LEN]; -}; - -#endif /* ARMCP_IF_H */ diff --git a/drivers/misc/habanalabs/include/common/armcp_if.h b/drivers/misc/habanalabs/include/common/armcp_if.h new file mode 100644 index 000000000000..07f9972db28d --- /dev/null +++ b/drivers/misc/habanalabs/include/common/armcp_if.h @@ -0,0 +1,407 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2016-2020 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef ARMCP_IF_H +#define ARMCP_IF_H + +#include + +/* + * EVENT QUEUE + */ + +struct hl_eq_header { + __le32 reserved; + __le32 ctl; +}; + +struct hl_eq_ecc_data { + __le64 ecc_address; + __le64 ecc_syndrom; + __u8 memory_wrapper_idx; + __u8 pad[7]; +}; + +struct hl_eq_entry { + struct hl_eq_header hdr; + union { + struct hl_eq_ecc_data ecc_data; + __le64 data[7]; + }; +}; + +#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry) + +#define EQ_CTL_READY_SHIFT 31 +#define EQ_CTL_READY_MASK 0x80000000 + +#define EQ_CTL_EVENT_TYPE_SHIFT 16 +#define EQ_CTL_EVENT_TYPE_MASK 0x03FF0000 + +enum pq_init_status { + PQ_INIT_STATUS_NA = 0, + PQ_INIT_STATUS_READY_FOR_CP, + PQ_INIT_STATUS_READY_FOR_HOST, + PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI +}; + +/* + * ArmCP Primary Queue Packets + * + * During normal operation, the host's kernel driver needs to send various + * messages to ArmCP, usually either to SET some value into a H/W periphery or + * to GET the current value of some H/W periphery. For example, SET the + * frequency of MME/TPC and GET the value of the thermal sensor. + * + * These messages can be initiated either by the User application or by the + * host's driver itself, e.g. power management code. In either case, the + * communication from the host's driver to ArmCP will *always* be in + * synchronous mode, meaning that the host will send a single message and poll + * until the message was acknowledged and the results are ready (if results are + * needed). + * + * This means that only a single message can be sent at a time and the host's + * driver must wait for its result before sending the next message. Having said + * that, because these are control messages which are sent in a relatively low + * frequency, this limitation seems acceptable. It's important to note that + * in case of multiple devices, messages to different devices *can* be sent + * at the same time. + * + * The message, inputs/outputs (if relevant) and fence object will be located + * on the device DDR at an address that will be determined by the host's driver. + * During device initialization phase, the host will pass to ArmCP that address. + * Most of the message types will contain inputs/outputs inside the message + * itself. The common part of each message will contain the opcode of the + * message (its type) and a field representing a fence object. + * + * When the host's driver wishes to send a message to ArmCP, it will write the + * message contents to the device DDR, clear the fence object and then write the + * value 484 to the mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR register to issue + * the 484 interrupt-id to the ARM core. + * + * Upon receiving the 484 interrupt-id, ArmCP will read the message from the + * DDR. In case the message is a SET operation, ArmCP will first perform the + * operation and then write to the fence object on the device DDR. In case the + * message is a GET operation, ArmCP will first fill the results section on the + * device DDR and then write to the fence object. If an error occurred, ArmCP + * will fill the rc field with the right error code. + * + * In the meantime, the host's driver will poll on the fence object. Once the + * host sees that the fence object is signaled, it will read the results from + * the device DDR (if relevant) and resume the code execution in the host's + * driver. + * + * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8 + * so the value being put by the host's driver matches the value read by ArmCP + * + * Non-QMAN packets should be limited to values 1 through (2^8 - 1) + * + * Detailed description: + * + * ARMCP_PACKET_DISABLE_PCI_ACCESS - + * After receiving this packet the embedded CPU must NOT issue PCI + * transactions (read/write) towards the Host CPU. This also include + * sending MSI-X interrupts. + * This packet is usually sent before the device is moved to D3Hot state. + * + * ARMCP_PACKET_ENABLE_PCI_ACCESS - + * After receiving this packet the embedded CPU is allowed to issue PCI + * transactions towards the Host CPU, including sending MSI-X interrupts. + * This packet is usually send after the device is moved to D0 state. + * + * ARMCP_PACKET_TEMPERATURE_GET - + * Fetch the current temperature / Max / Max Hyst / Critical / + * Critical Hyst of a specified thermal sensor. The packet's + * arguments specify the desired sensor and the field to get. + * + * ARMCP_PACKET_VOLTAGE_GET - + * Fetch the voltage / Max / Min of a specified sensor. The packet's + * arguments specify the sensor and type. + * + * ARMCP_PACKET_CURRENT_GET - + * Fetch the current / Max / Min of a specified sensor. The packet's + * arguments specify the sensor and type. + * + * ARMCP_PACKET_FAN_SPEED_GET - + * Fetch the speed / Max / Min of a specified fan. The packet's + * arguments specify the sensor and type. + * + * ARMCP_PACKET_PWM_GET - + * Fetch the pwm value / mode of a specified pwm. The packet's + * arguments specify the sensor and type. + * + * ARMCP_PACKET_PWM_SET - + * Set the pwm value / mode of a specified pwm. The packet's + * arguments specify the sensor, type and value. + * + * ARMCP_PACKET_FREQUENCY_SET - + * Set the frequency of a specified PLL. The packet's arguments specify + * the PLL and the desired frequency. The actual frequency in the device + * might differ from the requested frequency. + * + * ARMCP_PACKET_FREQUENCY_GET - + * Fetch the frequency of a specified PLL. The packet's arguments specify + * the PLL. + * + * ARMCP_PACKET_LED_SET - + * Set the state of a specified led. The packet's arguments + * specify the led and the desired state. + * + * ARMCP_PACKET_I2C_WR - + * Write 32-bit value to I2C device. The packet's arguments specify the + * I2C bus, address and value. + * + * ARMCP_PACKET_I2C_RD - + * Read 32-bit value from I2C device. The packet's arguments specify the + * I2C bus and address. + * + * ARMCP_PACKET_INFO_GET - + * Fetch information from the device as specified in the packet's + * structure. The host's driver passes the max size it allows the ArmCP to + * write to the structure, to prevent data corruption in case of + * mismatched driver/FW versions. + * + * ARMCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed + * + * ARMCP_PACKET_UNMASK_RAZWI_IRQ - + * Unmask the given IRQ. The IRQ number is specified in the value field. + * The packet is sent after receiving an interrupt and printing its + * relevant information. + * + * ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY - + * Unmask the given IRQs. The IRQs numbers are specified in an array right + * after the armcp_packet structure, where its first element is the array + * length. The packet is sent after a soft reset was done in order to + * handle any interrupts that were sent during the reset process. + * + * ARMCP_PACKET_TEST - + * Test packet for ArmCP connectivity. The CPU will put the fence value + * in the result field. + * + * ARMCP_PACKET_FREQUENCY_CURR_GET - + * Fetch the current frequency of a specified PLL. The packet's arguments + * specify the PLL. + * + * ARMCP_PACKET_MAX_POWER_GET - + * Fetch the maximal power of the device. + * + * ARMCP_PACKET_MAX_POWER_SET - + * Set the maximal power of the device. The packet's arguments specify + * the power. + * + * ARMCP_PACKET_EEPROM_DATA_GET - + * Get EEPROM data from the ArmCP kernel. The buffer is specified in the + * addr field. The CPU will put the returned data size in the result + * field. In addition, the host's driver passes the max size it allows the + * ArmCP to write to the structure, to prevent data corruption in case of + * mismatched driver/FW versions. + * + * ARMCP_PACKET_TEMPERATURE_SET - + * Set the value of the offset property of a specified thermal sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * ARMCP_PACKET_VOLTAGE_SET - + * Trigger the reset_history property of a specified voltage sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * ARMCP_PACKET_CURRENT_SET - + * Trigger the reset_history property of a specified current sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + */ + +enum armcp_packet_id { + ARMCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */ + ARMCP_PACKET_ENABLE_PCI_ACCESS, /* internal */ + ARMCP_PACKET_TEMPERATURE_GET, /* sysfs */ + ARMCP_PACKET_VOLTAGE_GET, /* sysfs */ + ARMCP_PACKET_CURRENT_GET, /* sysfs */ + ARMCP_PACKET_FAN_SPEED_GET, /* sysfs */ + ARMCP_PACKET_PWM_GET, /* sysfs */ + ARMCP_PACKET_PWM_SET, /* sysfs */ + ARMCP_PACKET_FREQUENCY_SET, /* sysfs */ + ARMCP_PACKET_FREQUENCY_GET, /* sysfs */ + ARMCP_PACKET_LED_SET, /* debugfs */ + ARMCP_PACKET_I2C_WR, /* debugfs */ + ARMCP_PACKET_I2C_RD, /* debugfs */ + ARMCP_PACKET_INFO_GET, /* IOCTL */ + ARMCP_PACKET_FLASH_PROGRAM_REMOVED, + ARMCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */ + ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */ + ARMCP_PACKET_TEST, /* internal */ + ARMCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */ + ARMCP_PACKET_MAX_POWER_GET, /* sysfs */ + ARMCP_PACKET_MAX_POWER_SET, /* sysfs */ + ARMCP_PACKET_EEPROM_DATA_GET, /* sysfs */ + ARMCP_RESERVED, + ARMCP_PACKET_TEMPERATURE_SET, /* sysfs */ + ARMCP_PACKET_VOLTAGE_SET, /* sysfs */ + ARMCP_PACKET_CURRENT_SET, /* sysfs */ +}; + +#define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5 + +#define ARMCP_PKT_CTL_RC_SHIFT 12 +#define ARMCP_PKT_CTL_RC_MASK 0x0000F000 + +#define ARMCP_PKT_CTL_OPCODE_SHIFT 16 +#define ARMCP_PKT_CTL_OPCODE_MASK 0x1FFF0000 + +struct armcp_packet { + union { + __le64 value; /* For SET packets */ + __le64 result; /* For GET packets */ + __le64 addr; /* For PQ */ + }; + + __le32 ctl; + + __le32 fence; /* Signal to host that message is completed */ + + union { + struct {/* For temperature/current/voltage/fan/pwm get/set */ + __le16 sensor_index; + __le16 type; + }; + + struct { /* For I2C read/write */ + __u8 i2c_bus; + __u8 i2c_addr; + __u8 i2c_reg; + __u8 pad; /* unused */ + }; + + /* For frequency get/set */ + __le32 pll_index; + + /* For led set */ + __le32 led_index; + + /* For get Armcp info/EEPROM data */ + __le32 data_max_size; + }; + + __le32 reserved; +}; + +struct armcp_unmask_irq_arr_packet { + struct armcp_packet armcp_pkt; + __le32 length; + __le32 irqs[0]; +}; + +enum armcp_packet_rc { + armcp_packet_success, + armcp_packet_invalid, + armcp_packet_fault +}; + +/* + * armcp_temp_type should adhere to hwmon_temp_attributes + * defined in Linux kernel hwmon.h file + */ +enum armcp_temp_type { + armcp_temp_input, + armcp_temp_max = 6, + armcp_temp_max_hyst, + armcp_temp_crit, + armcp_temp_crit_hyst, + armcp_temp_offset = 19, + armcp_temp_highest = 22, + armcp_temp_reset_history = 23 +}; + +enum armcp_in_attributes { + armcp_in_input, + armcp_in_min, + armcp_in_max, + armcp_in_highest = 7, + armcp_in_reset_history +}; + +enum armcp_curr_attributes { + armcp_curr_input, + armcp_curr_min, + armcp_curr_max, + armcp_curr_highest = 7, + armcp_curr_reset_history +}; + +enum armcp_fan_attributes { + armcp_fan_input, + armcp_fan_min = 2, + armcp_fan_max +}; + +enum armcp_pwm_attributes { + armcp_pwm_input, + armcp_pwm_enable +}; + +/* Event Queue Packets */ + +struct eq_generic_event { + __le64 data[7]; +}; + +/* + * ArmCP info + */ + +#define CARD_NAME_MAX_LEN 16 +#define VERSION_MAX_LEN 128 +#define ARMCP_MAX_SENSORS 128 + +struct armcp_sensor { + __le32 type; + __le32 flags; +}; + +/** + * struct armcp_card_types - ASIC card type. + * @armcp_card_type_pci: PCI card. + * @armcp_card_type_pmc: PCI Mezzanine Card. + */ +enum armcp_card_types { + armcp_card_type_pci, + armcp_card_type_pmc +}; + +/** + * struct armcp_info - Info from ArmCP that is necessary to the host's driver + * @sensors: available sensors description. + * @kernel_version: ArmCP linux kernel version. + * @reserved: reserved field. + * @card_type: card configuration type. + * @card_location: in a server, each card has different connections topology + * depending on its location (relevant for PMC card type) + * @cpld_version: CPLD programmed F/W version. + * @infineon_version: Infineon main DC-DC version. + * @fuse_version: silicon production FUSE information. + * @thermal_version: thermald S/W version. + * @armcp_version: ArmCP S/W version. + * @dram_size: available DRAM size. + * @card_name: card name that will be displayed in HWMON subsystem on the host + */ +struct armcp_info { + struct armcp_sensor sensors[ARMCP_MAX_SENSORS]; + __u8 kernel_version[VERSION_MAX_LEN]; + __le32 reserved; + __le32 card_type; + __le32 card_location; + __le32 cpld_version; + __le32 infineon_version; + __u8 fuse_version[VERSION_MAX_LEN]; + __u8 thermal_version[VERSION_MAX_LEN]; + __u8 armcp_version[VERSION_MAX_LEN]; + __le64 dram_size; + char card_name[CARD_NAME_MAX_LEN]; +}; + +#endif /* ARMCP_IF_H */ diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h new file mode 100644 index 000000000000..c22d134e73af --- /dev/null +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2018-2020 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef HL_BOOT_IF_H +#define HL_BOOT_IF_H + +#define LKD_HARD_RESET_MAGIC 0xED7BD694 +#define HL_POWER9_HOST_MAGIC 0x1DA30009 + +#define BOOT_FIT_SRAM_OFFSET 0x200000 + +/* + * CPU error bits in BOOT_ERROR registers + * + * CPU_BOOT_ERR0_DRAM_INIT_FAIL DRAM initialization failed. + * DRAM is not reliable to use. + * + * CPU_BOOT_ERR0_FIT_CORRUPTED FIT data integrity verification of the + * image provided by the host has failed. + * + * CPU_BOOT_ERR0_TS_INIT_FAIL Thermal Sensor initialization failed. + * Boot continues as usual, but keep in + * mind this is a warning. + * + * CPU_BOOT_ERR0_DRAM_SKIPPED DRAM initialization has been skipped. + * Skipping DRAM initialization has been + * requested (e.g. strap, command, etc.) + * and FW skipped the DRAM initialization. + * Host can initialize the DRAM. + * + * CPU_BOOT_ERR0_BMC_WAIT_SKIPPED Waiting for BMC data will be skipped. + * Meaning the BMC data might not be + * available until reset. + * + * CPU_BOOT_ERR0_NIC_DATA_NOT_RDY NIC data from BMC is not ready. + * BMC has not provided the NIC data yet. + * Once provided this bit will be cleared. + * + * CPU_BOOT_ERR0_NIC_FW_FAIL NIC FW loading failed. + * The NIC FW loading and initialization + * failed. This means NICs are not usable. + * + * CPU_BOOT_ERR0_ENABLED Error registers enabled. + * This is a main indication that the + * running FW populates the error + * registers. Meaning the error bits are + * not garbage, but actual error statuses. + */ +#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << 0) +#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << 1) +#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << 2) +#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << 3) +#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << 4) +#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << 5) +#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << 6) +#define CPU_BOOT_ERR0_ENABLED (1 << 31) + +enum cpu_boot_status { + CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */ + CPU_BOOT_STATUS_IN_WFE = 1, + CPU_BOOT_STATUS_DRAM_RDY = 2, + CPU_BOOT_STATUS_SRAM_AVAIL = 3, + CPU_BOOT_STATUS_IN_BTL = 4, /* BTL is H/W FSM */ + CPU_BOOT_STATUS_IN_PREBOOT = 5, + CPU_BOOT_STATUS_IN_SPL, /* deprecated - not reported */ + CPU_BOOT_STATUS_IN_UBOOT = 7, + CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */ + CPU_BOOT_STATUS_FIT_CORRUPTED, /* deprecated - will be removed */ + /* U-Boot console prompt activated, commands are not processed */ + CPU_BOOT_STATUS_UBOOT_NOT_READY = 10, + /* Finished NICs init, reported after DRAM and NICs */ + CPU_BOOT_STATUS_NIC_FW_RDY = 11, + CPU_BOOT_STATUS_TS_INIT_FAIL, /* deprecated - will be removed */ + CPU_BOOT_STATUS_DRAM_SKIPPED, /* deprecated - will be removed */ + CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */ + /* Last boot loader progress status, ready to receive commands */ + CPU_BOOT_STATUS_READY_TO_BOOT = 15, + CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16, +}; + +enum kmd_msg { + KMD_MSG_NA = 0, + KMD_MSG_GOTO_WFE, + KMD_MSG_FIT_RDY, + KMD_MSG_SKIP_BMC, +}; + +enum cpu_msg_status { + CPU_MSG_CLR = 0, + CPU_MSG_OK, + CPU_MSG_ERR, +}; + +#endif /* HL_BOOT_IF_H */ diff --git a/drivers/misc/habanalabs/include/common/qman_if.h b/drivers/misc/habanalabs/include/common/qman_if.h new file mode 100644 index 000000000000..0fdb49188ed7 --- /dev/null +++ b/drivers/misc/habanalabs/include/common/qman_if.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2016-2018 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef QMAN_IF_H +#define QMAN_IF_H + +#include + +/* + * PRIMARY QUEUE + */ + +struct hl_bd { + __le64 ptr; + __le32 len; + __le32 ctl; +}; + +#define HL_BD_SIZE sizeof(struct hl_bd) + +/* + * S/W CTL FIELDS. + * + * BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is + * valid. 1 means the repeat field is valid, 0 means not-valid, + * i.e. repeat == 1 + */ +#define BD_CTL_REPEAT_VALID_SHIFT 24 +#define BD_CTL_REPEAT_VALID_MASK 0x01000000 + +#define BD_CTL_SHADOW_INDEX_SHIFT 0 +#define BD_CTL_SHADOW_INDEX_MASK 0x00000FFF + +/* + * H/W CTL FIELDS + */ + +#define BD_CTL_COMP_OFFSET_SHIFT 16 +#define BD_CTL_COMP_OFFSET_MASK 0x00FF0000 + +#define BD_CTL_COMP_DATA_SHIFT 0 +#define BD_CTL_COMP_DATA_MASK 0x0000FFFF + +/* + * COMPLETION QUEUE + */ + +struct hl_cq_entry { + __le32 data; +}; + +#define HL_CQ_ENTRY_SIZE sizeof(struct hl_cq_entry) + +#define CQ_ENTRY_READY_SHIFT 31 +#define CQ_ENTRY_READY_MASK 0x80000000 + +#define CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT 30 +#define CQ_ENTRY_SHADOW_INDEX_VALID_MASK 0x40000000 + +#define CQ_ENTRY_SHADOW_INDEX_SHIFT BD_CTL_SHADOW_INDEX_SHIFT +#define CQ_ENTRY_SHADOW_INDEX_MASK BD_CTL_SHADOW_INDEX_MASK + + +#endif /* QMAN_IF_H */ diff --git a/drivers/misc/habanalabs/include/hl_boot_if.h b/drivers/misc/habanalabs/include/hl_boot_if.h deleted file mode 100644 index c22d134e73af..000000000000 --- a/drivers/misc/habanalabs/include/hl_boot_if.h +++ /dev/null @@ -1,98 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright 2018-2020 HabanaLabs, Ltd. - * All Rights Reserved. - * - */ - -#ifndef HL_BOOT_IF_H -#define HL_BOOT_IF_H - -#define LKD_HARD_RESET_MAGIC 0xED7BD694 -#define HL_POWER9_HOST_MAGIC 0x1DA30009 - -#define BOOT_FIT_SRAM_OFFSET 0x200000 - -/* - * CPU error bits in BOOT_ERROR registers - * - * CPU_BOOT_ERR0_DRAM_INIT_FAIL DRAM initialization failed. - * DRAM is not reliable to use. - * - * CPU_BOOT_ERR0_FIT_CORRUPTED FIT data integrity verification of the - * image provided by the host has failed. - * - * CPU_BOOT_ERR0_TS_INIT_FAIL Thermal Sensor initialization failed. - * Boot continues as usual, but keep in - * mind this is a warning. - * - * CPU_BOOT_ERR0_DRAM_SKIPPED DRAM initialization has been skipped. - * Skipping DRAM initialization has been - * requested (e.g. strap, command, etc.) - * and FW skipped the DRAM initialization. - * Host can initialize the DRAM. - * - * CPU_BOOT_ERR0_BMC_WAIT_SKIPPED Waiting for BMC data will be skipped. - * Meaning the BMC data might not be - * available until reset. - * - * CPU_BOOT_ERR0_NIC_DATA_NOT_RDY NIC data from BMC is not ready. - * BMC has not provided the NIC data yet. - * Once provided this bit will be cleared. - * - * CPU_BOOT_ERR0_NIC_FW_FAIL NIC FW loading failed. - * The NIC FW loading and initialization - * failed. This means NICs are not usable. - * - * CPU_BOOT_ERR0_ENABLED Error registers enabled. - * This is a main indication that the - * running FW populates the error - * registers. Meaning the error bits are - * not garbage, but actual error statuses. - */ -#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << 0) -#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << 1) -#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << 2) -#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << 3) -#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << 4) -#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << 5) -#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << 6) -#define CPU_BOOT_ERR0_ENABLED (1 << 31) - -enum cpu_boot_status { - CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */ - CPU_BOOT_STATUS_IN_WFE = 1, - CPU_BOOT_STATUS_DRAM_RDY = 2, - CPU_BOOT_STATUS_SRAM_AVAIL = 3, - CPU_BOOT_STATUS_IN_BTL = 4, /* BTL is H/W FSM */ - CPU_BOOT_STATUS_IN_PREBOOT = 5, - CPU_BOOT_STATUS_IN_SPL, /* deprecated - not reported */ - CPU_BOOT_STATUS_IN_UBOOT = 7, - CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */ - CPU_BOOT_STATUS_FIT_CORRUPTED, /* deprecated - will be removed */ - /* U-Boot console prompt activated, commands are not processed */ - CPU_BOOT_STATUS_UBOOT_NOT_READY = 10, - /* Finished NICs init, reported after DRAM and NICs */ - CPU_BOOT_STATUS_NIC_FW_RDY = 11, - CPU_BOOT_STATUS_TS_INIT_FAIL, /* deprecated - will be removed */ - CPU_BOOT_STATUS_DRAM_SKIPPED, /* deprecated - will be removed */ - CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */ - /* Last boot loader progress status, ready to receive commands */ - CPU_BOOT_STATUS_READY_TO_BOOT = 15, - CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16, -}; - -enum kmd_msg { - KMD_MSG_NA = 0, - KMD_MSG_GOTO_WFE, - KMD_MSG_FIT_RDY, - KMD_MSG_SKIP_BMC, -}; - -enum cpu_msg_status { - CPU_MSG_CLR = 0, - CPU_MSG_OK, - CPU_MSG_ERR, -}; - -#endif /* HL_BOOT_IF_H */ diff --git a/drivers/misc/habanalabs/include/qman_if.h b/drivers/misc/habanalabs/include/qman_if.h deleted file mode 100644 index 0fdb49188ed7..000000000000 --- a/drivers/misc/habanalabs/include/qman_if.h +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright 2016-2018 HabanaLabs, Ltd. - * All Rights Reserved. - * - */ - -#ifndef QMAN_IF_H -#define QMAN_IF_H - -#include - -/* - * PRIMARY QUEUE - */ - -struct hl_bd { - __le64 ptr; - __le32 len; - __le32 ctl; -}; - -#define HL_BD_SIZE sizeof(struct hl_bd) - -/* - * S/W CTL FIELDS. - * - * BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is - * valid. 1 means the repeat field is valid, 0 means not-valid, - * i.e. repeat == 1 - */ -#define BD_CTL_REPEAT_VALID_SHIFT 24 -#define BD_CTL_REPEAT_VALID_MASK 0x01000000 - -#define BD_CTL_SHADOW_INDEX_SHIFT 0 -#define BD_CTL_SHADOW_INDEX_MASK 0x00000FFF - -/* - * H/W CTL FIELDS - */ - -#define BD_CTL_COMP_OFFSET_SHIFT 16 -#define BD_CTL_COMP_OFFSET_MASK 0x00FF0000 - -#define BD_CTL_COMP_DATA_SHIFT 0 -#define BD_CTL_COMP_DATA_MASK 0x0000FFFF - -/* - * COMPLETION QUEUE - */ - -struct hl_cq_entry { - __le32 data; -}; - -#define HL_CQ_ENTRY_SIZE sizeof(struct hl_cq_entry) - -#define CQ_ENTRY_READY_SHIFT 31 -#define CQ_ENTRY_READY_MASK 0x80000000 - -#define CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT 30 -#define CQ_ENTRY_SHADOW_INDEX_VALID_MASK 0x40000000 - -#define CQ_ENTRY_SHADOW_INDEX_SHIFT BD_CTL_SHADOW_INDEX_SHIFT -#define CQ_ENTRY_SHADOW_INDEX_MASK BD_CTL_SHADOW_INDEX_MASK - - -#endif /* QMAN_IF_H */ diff --git a/drivers/misc/habanalabs/irq.c b/drivers/misc/habanalabs/irq.c deleted file mode 100644 index c8db717023f5..000000000000 --- a/drivers/misc/habanalabs/irq.c +++ /dev/null @@ -1,320 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2016-2019 HabanaLabs, Ltd. - * All Rights Reserved. - */ - -#include "habanalabs.h" - -#include - -/** - * struct hl_eqe_work - This structure is used to schedule work of EQ - * entry and armcp_reset event - * - * @eq_work: workqueue object to run when EQ entry is received - * @hdev: pointer to device structure - * @eq_entry: copy of the EQ entry - */ -struct hl_eqe_work { - struct work_struct eq_work; - struct hl_device *hdev; - struct hl_eq_entry eq_entry; -}; - -/** - * hl_cq_inc_ptr - increment ci or pi of cq - * - * @ptr: the current ci or pi value of the completion queue - * - * Increment ptr by 1. If it reaches the number of completion queue - * entries, set it to 0 - */ -inline u32 hl_cq_inc_ptr(u32 ptr) -{ - ptr++; - if (unlikely(ptr == HL_CQ_LENGTH)) - ptr = 0; - return ptr; -} - -/** - * hl_eq_inc_ptr - increment ci of eq - * - * @ptr: the current ci value of the event queue - * - * Increment ptr by 1. If it reaches the number of event queue - * entries, set it to 0 - */ -inline u32 hl_eq_inc_ptr(u32 ptr) -{ - ptr++; - if (unlikely(ptr == HL_EQ_LENGTH)) - ptr = 0; - return ptr; -} - -static void irq_handle_eqe(struct work_struct *work) -{ - struct hl_eqe_work *eqe_work = container_of(work, struct hl_eqe_work, - eq_work); - struct hl_device *hdev = eqe_work->hdev; - - hdev->asic_funcs->handle_eqe(hdev, &eqe_work->eq_entry); - - kfree(eqe_work); -} - -/** - * hl_irq_handler_cq - irq handler for completion queue - * - * @irq: irq number - * @arg: pointer to completion queue structure - * - */ -irqreturn_t hl_irq_handler_cq(int irq, void *arg) -{ - struct hl_cq *cq = arg; - struct hl_device *hdev = cq->hdev; - struct hl_hw_queue *queue; - struct hl_cs_job *job; - bool shadow_index_valid; - u16 shadow_index; - struct hl_cq_entry *cq_entry, *cq_base; - - if (hdev->disabled) { - dev_dbg(hdev->dev, - "Device disabled but received IRQ %d for CQ %d\n", - irq, cq->hw_queue_id); - return IRQ_HANDLED; - } - - cq_base = (struct hl_cq_entry *) (uintptr_t) cq->kernel_address; - - while (1) { - bool entry_ready = ((le32_to_cpu(cq_base[cq->ci].data) & - CQ_ENTRY_READY_MASK) - >> CQ_ENTRY_READY_SHIFT); - - if (!entry_ready) - break; - - cq_entry = (struct hl_cq_entry *) &cq_base[cq->ci]; - - /* Make sure we read CQ entry contents after we've - * checked the ownership bit. - */ - dma_rmb(); - - shadow_index_valid = ((le32_to_cpu(cq_entry->data) & - CQ_ENTRY_SHADOW_INDEX_VALID_MASK) - >> CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT); - - shadow_index = (u16) ((le32_to_cpu(cq_entry->data) & - CQ_ENTRY_SHADOW_INDEX_MASK) - >> CQ_ENTRY_SHADOW_INDEX_SHIFT); - - queue = &hdev->kernel_queues[cq->hw_queue_id]; - - if ((shadow_index_valid) && (!hdev->disabled)) { - job = queue->shadow_queue[hl_pi_2_offset(shadow_index)]; - queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work); - } - - atomic_inc(&queue->ci); - - /* Clear CQ entry ready bit */ - cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) & - ~CQ_ENTRY_READY_MASK); - - cq->ci = hl_cq_inc_ptr(cq->ci); - - /* Increment free slots */ - atomic_inc(&cq->free_slots_cnt); - } - - return IRQ_HANDLED; -} - -/** - * hl_irq_handler_eq - irq handler for event queue - * - * @irq: irq number - * @arg: pointer to event queue structure - * - */ -irqreturn_t hl_irq_handler_eq(int irq, void *arg) -{ - struct hl_eq *eq = arg; - struct hl_device *hdev = eq->hdev; - struct hl_eq_entry *eq_entry; - struct hl_eq_entry *eq_base; - struct hl_eqe_work *handle_eqe_work; - - eq_base = (struct hl_eq_entry *) (uintptr_t) eq->kernel_address; - - while (1) { - bool entry_ready = - ((le32_to_cpu(eq_base[eq->ci].hdr.ctl) & - EQ_CTL_READY_MASK) >> EQ_CTL_READY_SHIFT); - - if (!entry_ready) - break; - - eq_entry = &eq_base[eq->ci]; - - /* - * Make sure we read EQ entry contents after we've - * checked the ownership bit. - */ - dma_rmb(); - - if (hdev->disabled) { - dev_warn(hdev->dev, - "Device disabled but received IRQ %d for EQ\n", - irq); - goto skip_irq; - } - - handle_eqe_work = kmalloc(sizeof(*handle_eqe_work), GFP_ATOMIC); - if (handle_eqe_work) { - INIT_WORK(&handle_eqe_work->eq_work, irq_handle_eqe); - handle_eqe_work->hdev = hdev; - - memcpy(&handle_eqe_work->eq_entry, eq_entry, - sizeof(*eq_entry)); - - queue_work(hdev->eq_wq, &handle_eqe_work->eq_work); - } -skip_irq: - /* Clear EQ entry ready bit */ - eq_entry->hdr.ctl = - cpu_to_le32(le32_to_cpu(eq_entry->hdr.ctl) & - ~EQ_CTL_READY_MASK); - - eq->ci = hl_eq_inc_ptr(eq->ci); - - hdev->asic_funcs->update_eq_ci(hdev, eq->ci); - } - - return IRQ_HANDLED; -} - -/** - * hl_cq_init - main initialization function for an cq object - * - * @hdev: pointer to device structure - * @q: pointer to cq structure - * @hw_queue_id: The H/W queue ID this completion queue belongs to - * - * Allocate dma-able memory for the completion queue and initialize fields - * Returns 0 on success - */ -int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id) -{ - void *p; - - p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES, - &q->bus_address, GFP_KERNEL | __GFP_ZERO); - if (!p) - return -ENOMEM; - - q->hdev = hdev; - q->kernel_address = (u64) (uintptr_t) p; - q->hw_queue_id = hw_queue_id; - q->ci = 0; - q->pi = 0; - - atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH); - - return 0; -} - -/** - * hl_cq_fini - destroy completion queue - * - * @hdev: pointer to device structure - * @q: pointer to cq structure - * - * Free the completion queue memory - */ -void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q) -{ - hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address, q->bus_address); -} - -void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q) -{ - q->ci = 0; - q->pi = 0; - - atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH); - - /* - * It's not enough to just reset the PI/CI because the H/W may have - * written valid completion entries before it was halted and therefore - * we need to clean the actual queues so we won't process old entries - * when the device is operational again - */ - - memset((void *) (uintptr_t) q->kernel_address, 0, HL_CQ_SIZE_IN_BYTES); -} - -/** - * hl_eq_init - main initialization function for an event queue object - * - * @hdev: pointer to device structure - * @q: pointer to eq structure - * - * Allocate dma-able memory for the event queue and initialize fields - * Returns 0 on success - */ -int hl_eq_init(struct hl_device *hdev, struct hl_eq *q) -{ - void *p; - - p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, - HL_EQ_SIZE_IN_BYTES, - &q->bus_address); - if (!p) - return -ENOMEM; - - q->hdev = hdev; - q->kernel_address = (u64) (uintptr_t) p; - q->ci = 0; - - return 0; -} - -/** - * hl_eq_fini - destroy event queue - * - * @hdev: pointer to device structure - * @q: pointer to eq structure - * - * Free the event queue memory - */ -void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q) -{ - flush_workqueue(hdev->eq_wq); - - hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, - HL_EQ_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address); -} - -void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q) -{ - q->ci = 0; - - /* - * It's not enough to just reset the PI/CI because the H/W may have - * written valid completion entries before it was halted and therefore - * we need to clean the actual queues so we won't process old entries - * when the device is operational again - */ - - memset((void *) (uintptr_t) q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES); -} diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c deleted file mode 100644 index e4e1693e5c6c..000000000000 --- a/drivers/misc/habanalabs/memory.c +++ /dev/null @@ -1,1843 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2016-2019 HabanaLabs, Ltd. - * All Rights Reserved. - */ - -#include -#include "habanalabs.h" -#include "include/hw_ip/mmu/mmu_general.h" - -#include -#include -#include - -#define HL_MMU_DEBUG 0 - -/* - * The va ranges in context object contain a list with the available chunks of - * device virtual memory. - * There is one range for host allocations and one for DRAM allocations. - * - * On initialization each range contains one chunk of all of its available - * virtual range which is a half of the total device virtual range. - * - * On each mapping of physical pages, a suitable virtual range chunk (with a - * minimum size) is selected from the list. If the chunk size equals the - * requested size, the chunk is returned. Otherwise, the chunk is split into - * two chunks - one to return as result and a remainder to stay in the list. - * - * On each Unmapping of a virtual address, the relevant virtual chunk is - * returned to the list. The chunk is added to the list and if its edges match - * the edges of the adjacent chunks (means a contiguous chunk can be created), - * the chunks are merged. - * - * On finish, the list is checked to have only one chunk of all the relevant - * virtual range (which is a half of the device total virtual range). - * If not (means not all mappings were unmapped), a warning is printed. - */ - -/* - * alloc_device_memory - allocate device memory - * - * @ctx : current context - * @args : host parameters containing the requested size - * @ret_handle : result handle - * - * This function does the following: - * - Allocate the requested size rounded up to 2MB pages - * - Return unique handle - */ -static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, - u32 *ret_handle) -{ - struct hl_device *hdev = ctx->hdev; - struct hl_vm *vm = &hdev->vm; - struct hl_vm_phys_pg_pack *phys_pg_pack; - u64 paddr = 0, total_size, num_pgs, i; - u32 num_curr_pgs, page_size, page_shift; - int handle, rc; - bool contiguous; - - num_curr_pgs = 0; - page_size = hdev->asic_prop.dram_page_size; - page_shift = __ffs(page_size); - num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift; - total_size = num_pgs << page_shift; - - contiguous = args->flags & HL_MEM_CONTIGUOUS; - - if (contiguous) { - paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size); - if (!paddr) { - dev_err(hdev->dev, - "failed to allocate %llu huge contiguous pages\n", - num_pgs); - return -ENOMEM; - } - } - - phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); - if (!phys_pg_pack) { - rc = -ENOMEM; - goto pages_pack_err; - } - - phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK; - phys_pg_pack->asid = ctx->asid; - phys_pg_pack->npages = num_pgs; - phys_pg_pack->page_size = page_size; - phys_pg_pack->total_size = total_size; - phys_pg_pack->flags = args->flags; - phys_pg_pack->contiguous = contiguous; - - phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL); - if (!phys_pg_pack->pages) { - rc = -ENOMEM; - goto pages_arr_err; - } - - if (phys_pg_pack->contiguous) { - for (i = 0 ; i < num_pgs ; i++) - phys_pg_pack->pages[i] = paddr + i * page_size; - } else { - for (i = 0 ; i < num_pgs ; i++) { - phys_pg_pack->pages[i] = (u64) gen_pool_alloc( - vm->dram_pg_pool, - page_size); - if (!phys_pg_pack->pages[i]) { - dev_err(hdev->dev, - "Failed to allocate device memory (out of memory)\n"); - rc = -ENOMEM; - goto page_err; - } - - num_curr_pgs++; - } - } - - spin_lock(&vm->idr_lock); - handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0, - GFP_ATOMIC); - spin_unlock(&vm->idr_lock); - - if (handle < 0) { - dev_err(hdev->dev, "Failed to get handle for page\n"); - rc = -EFAULT; - goto idr_err; - } - - for (i = 0 ; i < num_pgs ; i++) - kref_get(&vm->dram_pg_pool_refcount); - - phys_pg_pack->handle = handle; - - atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem); - atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem); - - *ret_handle = handle; - - return 0; - -idr_err: -page_err: - if (!phys_pg_pack->contiguous) - for (i = 0 ; i < num_curr_pgs ; i++) - gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i], - page_size); - - kvfree(phys_pg_pack->pages); -pages_arr_err: - kfree(phys_pg_pack); -pages_pack_err: - if (contiguous) - gen_pool_free(vm->dram_pg_pool, paddr, total_size); - - return rc; -} - -/* - * dma_map_host_va - DMA mapping of the given host virtual address. - * @hdev: habanalabs device structure - * @addr: the host virtual address of the memory area - * @size: the size of the memory area - * @p_userptr: pointer to result userptr structure - * - * This function does the following: - * - Allocate userptr structure - * - Pin the given host memory using the userptr structure - * - Perform DMA mapping to have the DMA addresses of the pages - */ -static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, - struct hl_userptr **p_userptr) -{ - struct hl_userptr *userptr; - int rc; - - userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); - if (!userptr) { - rc = -ENOMEM; - goto userptr_err; - } - - rc = hl_pin_host_memory(hdev, addr, size, userptr); - if (rc) { - dev_err(hdev->dev, "Failed to pin host memory\n"); - goto pin_err; - } - - rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl, - userptr->sgt->nents, DMA_BIDIRECTIONAL); - if (rc) { - dev_err(hdev->dev, "failed to map sgt with DMA region\n"); - goto dma_map_err; - } - - userptr->dma_mapped = true; - userptr->dir = DMA_BIDIRECTIONAL; - userptr->vm_type = VM_TYPE_USERPTR; - - *p_userptr = userptr; - - return 0; - -dma_map_err: - hl_unpin_host_memory(hdev, userptr); -pin_err: - kfree(userptr); -userptr_err: - - return rc; -} - -/* - * dma_unmap_host_va - DMA unmapping of the given host virtual address. - * @hdev: habanalabs device structure - * @userptr: userptr to free - * - * This function does the following: - * - Unpins the physical pages - * - Frees the userptr structure - */ -static void dma_unmap_host_va(struct hl_device *hdev, - struct hl_userptr *userptr) -{ - hl_unpin_host_memory(hdev, userptr); - kfree(userptr); -} - -/* - * dram_pg_pool_do_release - free DRAM pages pool - * - * @ref : pointer to reference object - * - * This function does the following: - * - Frees the idr structure of physical pages handles - * - Frees the generic pool of DRAM physical pages - */ -static void dram_pg_pool_do_release(struct kref *ref) -{ - struct hl_vm *vm = container_of(ref, struct hl_vm, - dram_pg_pool_refcount); - - /* - * free the idr here as only here we know for sure that there are no - * allocated physical pages and hence there are no handles in use - */ - idr_destroy(&vm->phys_pg_pack_handles); - gen_pool_destroy(vm->dram_pg_pool); -} - -/* - * free_phys_pg_pack - free physical page pack - * @hdev: habanalabs device structure - * @phys_pg_pack: physical page pack to free - * - * This function does the following: - * - For DRAM memory only, iterate over the pack and free each physical block - * structure by returning it to the general pool - * - Free the hl_vm_phys_pg_pack structure - */ -static void free_phys_pg_pack(struct hl_device *hdev, - struct hl_vm_phys_pg_pack *phys_pg_pack) -{ - struct hl_vm *vm = &hdev->vm; - u64 i; - - if (!phys_pg_pack->created_from_userptr) { - if (phys_pg_pack->contiguous) { - gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0], - phys_pg_pack->total_size); - - for (i = 0; i < phys_pg_pack->npages ; i++) - kref_put(&vm->dram_pg_pool_refcount, - dram_pg_pool_do_release); - } else { - for (i = 0 ; i < phys_pg_pack->npages ; i++) { - gen_pool_free(vm->dram_pg_pool, - phys_pg_pack->pages[i], - phys_pg_pack->page_size); - kref_put(&vm->dram_pg_pool_refcount, - dram_pg_pool_do_release); - } - } - } - - kvfree(phys_pg_pack->pages); - kfree(phys_pg_pack); -} - -/* - * free_device_memory - free device memory - * - * @ctx : current context - * @handle : handle of the memory chunk to free - * - * This function does the following: - * - Free the device memory related to the given handle - */ -static int free_device_memory(struct hl_ctx *ctx, u32 handle) -{ - struct hl_device *hdev = ctx->hdev; - struct hl_vm *vm = &hdev->vm; - struct hl_vm_phys_pg_pack *phys_pg_pack; - - spin_lock(&vm->idr_lock); - phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); - if (phys_pg_pack) { - if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) { - dev_err(hdev->dev, "handle %u is mapped, cannot free\n", - handle); - spin_unlock(&vm->idr_lock); - return -EINVAL; - } - - /* - * must remove from idr before the freeing of the physical - * pages as the refcount of the pool is also the trigger of the - * idr destroy - */ - idr_remove(&vm->phys_pg_pack_handles, handle); - spin_unlock(&vm->idr_lock); - - atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem); - atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem); - - free_phys_pg_pack(hdev, phys_pg_pack); - } else { - spin_unlock(&vm->idr_lock); - dev_err(hdev->dev, - "free device memory failed, no match for handle %u\n", - handle); - return -EINVAL; - } - - return 0; -} - -/* - * clear_va_list_locked - free virtual addresses list - * - * @hdev : habanalabs device structure - * @va_list : list of virtual addresses to free - * - * This function does the following: - * - Iterate over the list and free each virtual addresses block - * - * This function should be called only when va_list lock is taken - */ -static void clear_va_list_locked(struct hl_device *hdev, - struct list_head *va_list) -{ - struct hl_vm_va_block *va_block, *tmp; - - list_for_each_entry_safe(va_block, tmp, va_list, node) { - list_del(&va_block->node); - kfree(va_block); - } -} - -/* - * print_va_list_locked - print virtual addresses list - * - * @hdev : habanalabs device structure - * @va_list : list of virtual addresses to print - * - * This function does the following: - * - Iterate over the list and print each virtual addresses block - * - * This function should be called only when va_list lock is taken - */ -static void print_va_list_locked(struct hl_device *hdev, - struct list_head *va_list) -{ -#if HL_MMU_DEBUG - struct hl_vm_va_block *va_block; - - dev_dbg(hdev->dev, "print va list:\n"); - - list_for_each_entry(va_block, va_list, node) - dev_dbg(hdev->dev, - "va block, start: 0x%llx, end: 0x%llx, size: %llu\n", - va_block->start, va_block->end, va_block->size); -#endif -} - -/* - * merge_va_blocks_locked - merge a virtual block if possible - * - * @hdev : pointer to the habanalabs device structure - * @va_list : pointer to the virtual addresses block list - * @va_block : virtual block to merge with adjacent blocks - * - * This function does the following: - * - Merge the given blocks with the adjacent blocks if their virtual ranges - * create a contiguous virtual range - * - * This Function should be called only when va_list lock is taken - */ -static void merge_va_blocks_locked(struct hl_device *hdev, - struct list_head *va_list, struct hl_vm_va_block *va_block) -{ - struct hl_vm_va_block *prev, *next; - - prev = list_prev_entry(va_block, node); - if (&prev->node != va_list && prev->end + 1 == va_block->start) { - prev->end = va_block->end; - prev->size = prev->end - prev->start; - list_del(&va_block->node); - kfree(va_block); - va_block = prev; - } - - next = list_next_entry(va_block, node); - if (&next->node != va_list && va_block->end + 1 == next->start) { - next->start = va_block->start; - next->size = next->end - next->start; - list_del(&va_block->node); - kfree(va_block); - } -} - -/* - * add_va_block_locked - add a virtual block to the virtual addresses list - * - * @hdev : pointer to the habanalabs device structure - * @va_list : pointer to the virtual addresses block list - * @start : start virtual address - * @end : end virtual address - * - * This function does the following: - * - Add the given block to the virtual blocks list and merge with other - * blocks if a contiguous virtual block can be created - * - * This Function should be called only when va_list lock is taken - */ -static int add_va_block_locked(struct hl_device *hdev, - struct list_head *va_list, u64 start, u64 end) -{ - struct hl_vm_va_block *va_block, *res = NULL; - u64 size = end - start; - - print_va_list_locked(hdev, va_list); - - list_for_each_entry(va_block, va_list, node) { - /* TODO: remove upon matureness */ - if (hl_mem_area_crosses_range(start, size, va_block->start, - va_block->end)) { - dev_err(hdev->dev, - "block crossing ranges at start 0x%llx, end 0x%llx\n", - va_block->start, va_block->end); - return -EINVAL; - } - - if (va_block->end < start) - res = va_block; - } - - va_block = kmalloc(sizeof(*va_block), GFP_KERNEL); - if (!va_block) - return -ENOMEM; - - va_block->start = start; - va_block->end = end; - va_block->size = size; - - if (!res) - list_add(&va_block->node, va_list); - else - list_add(&va_block->node, &res->node); - - merge_va_blocks_locked(hdev, va_list, va_block); - - print_va_list_locked(hdev, va_list); - - return 0; -} - -/* - * add_va_block - wrapper for add_va_block_locked - * - * @hdev : pointer to the habanalabs device structure - * @va_list : pointer to the virtual addresses block list - * @start : start virtual address - * @end : end virtual address - * - * This function does the following: - * - Takes the list lock and calls add_va_block_locked - */ -static inline int add_va_block(struct hl_device *hdev, - struct hl_va_range *va_range, u64 start, u64 end) -{ - int rc; - - mutex_lock(&va_range->lock); - rc = add_va_block_locked(hdev, &va_range->list, start, end); - mutex_unlock(&va_range->lock); - - return rc; -} - -/* - * get_va_block - get a virtual block with the requested size - * - * @hdev : pointer to the habanalabs device structure - * @va_range : pointer to the virtual addresses range - * @size : requested block size - * @hint_addr : hint for request address by the user - * @is_userptr : is host or DRAM memory - * - * This function does the following: - * - Iterate on the virtual block list to find a suitable virtual block for the - * requested size - * - Reserve the requested block and update the list - * - Return the start address of the virtual block - */ -static u64 get_va_block(struct hl_device *hdev, - struct hl_va_range *va_range, u64 size, u64 hint_addr, - bool is_userptr) -{ - struct hl_vm_va_block *va_block, *new_va_block = NULL; - u64 valid_start, valid_size, prev_start, prev_end, page_mask, - res_valid_start = 0, res_valid_size = 0; - u32 page_size; - bool add_prev = false; - - if (is_userptr) - /* - * We cannot know if the user allocated memory with huge pages - * or not, hence we continue with the biggest possible - * granularity. - */ - page_size = hdev->asic_prop.pmmu_huge.page_size; - else - page_size = hdev->asic_prop.dmmu.page_size; - - page_mask = ~((u64)page_size - 1); - - mutex_lock(&va_range->lock); - - print_va_list_locked(hdev, &va_range->list); - - list_for_each_entry(va_block, &va_range->list, node) { - /* calc the first possible aligned addr */ - valid_start = va_block->start; - - if (valid_start & (page_size - 1)) { - valid_start &= page_mask; - valid_start += page_size; - if (valid_start > va_block->end) - continue; - } - - valid_size = va_block->end - valid_start; - - if (valid_size >= size && - (!new_va_block || valid_size < res_valid_size)) { - new_va_block = va_block; - res_valid_start = valid_start; - res_valid_size = valid_size; - } - - if (hint_addr && hint_addr >= valid_start && - ((hint_addr + size) <= va_block->end)) { - new_va_block = va_block; - res_valid_start = hint_addr; - res_valid_size = valid_size; - break; - } - } - - if (!new_va_block) { - dev_err(hdev->dev, "no available va block for size %llu\n", - size); - goto out; - } - - if (res_valid_start > new_va_block->start) { - prev_start = new_va_block->start; - prev_end = res_valid_start - 1; - - new_va_block->start = res_valid_start; - new_va_block->size = res_valid_size; - - add_prev = true; - } - - if (new_va_block->size > size) { - new_va_block->start += size; - new_va_block->size = new_va_block->end - new_va_block->start; - } else { - list_del(&new_va_block->node); - kfree(new_va_block); - } - - if (add_prev) - add_va_block_locked(hdev, &va_range->list, prev_start, - prev_end); - - print_va_list_locked(hdev, &va_range->list); -out: - mutex_unlock(&va_range->lock); - - return res_valid_start; -} - -/* - * get_sg_info - get number of pages and the DMA address from SG list - * - * @sg : the SG list - * @dma_addr : pointer to DMA address to return - * - * Calculate the number of consecutive pages described by the SG list. Take the - * offset of the address in the first page, add to it the length and round it up - * to the number of needed pages. - */ -static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) -{ - *dma_addr = sg_dma_address(sg); - - return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) + - (PAGE_SIZE - 1)) >> PAGE_SHIFT; -} - -/* - * init_phys_pg_pack_from_userptr - initialize physical page pack from host - * memory - * @ctx: current context - * @userptr: userptr to initialize from - * @pphys_pg_pack: result pointer - * - * This function does the following: - * - Pin the physical pages related to the given virtual block - * - Create a physical page pack from the physical pages related to the given - * virtual block - */ -static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, - struct hl_userptr *userptr, - struct hl_vm_phys_pg_pack **pphys_pg_pack) -{ - struct hl_vm_phys_pg_pack *phys_pg_pack; - struct scatterlist *sg; - dma_addr_t dma_addr; - u64 page_mask, total_npages; - u32 npages, page_size = PAGE_SIZE, - huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size; - bool first = true, is_huge_page_opt = true; - int rc, i, j; - u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size); - - phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); - if (!phys_pg_pack) - return -ENOMEM; - - phys_pg_pack->vm_type = userptr->vm_type; - phys_pg_pack->created_from_userptr = true; - phys_pg_pack->asid = ctx->asid; - atomic_set(&phys_pg_pack->mapping_cnt, 1); - - /* Only if all dma_addrs are aligned to 2MB and their - * sizes is at least 2MB, we can use huge page mapping. - * We limit the 2MB optimization to this condition, - * since later on we acquire the related VA range as one - * consecutive block. - */ - total_npages = 0; - for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { - npages = get_sg_info(sg, &dma_addr); - - total_npages += npages; - - if ((npages % pgs_in_huge_page) || - (dma_addr & (huge_page_size - 1))) - is_huge_page_opt = false; - } - - if (is_huge_page_opt) { - page_size = huge_page_size; - do_div(total_npages, pgs_in_huge_page); - } - - page_mask = ~(((u64) page_size) - 1); - - phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64), - GFP_KERNEL); - if (!phys_pg_pack->pages) { - rc = -ENOMEM; - goto page_pack_arr_mem_err; - } - - phys_pg_pack->npages = total_npages; - phys_pg_pack->page_size = page_size; - phys_pg_pack->total_size = total_npages * page_size; - - j = 0; - for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { - npages = get_sg_info(sg, &dma_addr); - - /* align down to physical page size and save the offset */ - if (first) { - first = false; - phys_pg_pack->offset = dma_addr & (page_size - 1); - dma_addr &= page_mask; - } - - while (npages) { - phys_pg_pack->pages[j++] = dma_addr; - dma_addr += page_size; - - if (is_huge_page_opt) - npages -= pgs_in_huge_page; - else - npages--; - } - } - - *pphys_pg_pack = phys_pg_pack; - - return 0; - -page_pack_arr_mem_err: - kfree(phys_pg_pack); - - return rc; -} - -/* - * map_phys_pg_pack - maps the physical page pack. - * @ctx: current context - * @vaddr: start address of the virtual area to map from - * @phys_pg_pack: the pack of physical pages to map to - * - * This function does the following: - * - Maps each chunk of virtual memory to matching physical chunk - * - Stores number of successful mappings in the given argument - * - Returns 0 on success, error code otherwise - */ -static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, - struct hl_vm_phys_pg_pack *phys_pg_pack) -{ - struct hl_device *hdev = ctx->hdev; - u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i; - u32 page_size = phys_pg_pack->page_size; - int rc = 0; - - for (i = 0 ; i < phys_pg_pack->npages ; i++) { - paddr = phys_pg_pack->pages[i]; - - rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size, - (i + 1) == phys_pg_pack->npages); - if (rc) { - dev_err(hdev->dev, - "map failed for handle %u, npages: %llu, mapped: %llu", - phys_pg_pack->handle, phys_pg_pack->npages, - mapped_pg_cnt); - goto err; - } - - mapped_pg_cnt++; - next_vaddr += page_size; - } - - return 0; - -err: - next_vaddr = vaddr; - for (i = 0 ; i < mapped_pg_cnt ; i++) { - if (hl_mmu_unmap(ctx, next_vaddr, page_size, - (i + 1) == mapped_pg_cnt)) - dev_warn_ratelimited(hdev->dev, - "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n", - phys_pg_pack->handle, next_vaddr, - phys_pg_pack->pages[i], page_size); - - next_vaddr += page_size; - } - - return rc; -} - -/* - * unmap_phys_pg_pack - unmaps the physical page pack - * @ctx: current context - * @vaddr: start address of the virtual area to unmap - * @phys_pg_pack: the pack of physical pages to unmap - */ -static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, - struct hl_vm_phys_pg_pack *phys_pg_pack) -{ - struct hl_device *hdev = ctx->hdev; - u64 next_vaddr, i; - u32 page_size; - - page_size = phys_pg_pack->page_size; - next_vaddr = vaddr; - - for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { - if (hl_mmu_unmap(ctx, next_vaddr, page_size, - (i + 1) == phys_pg_pack->npages)) - dev_warn_ratelimited(hdev->dev, - "unmap failed for vaddr: 0x%llx\n", next_vaddr); - - /* - * unmapping on Palladium can be really long, so avoid a CPU - * soft lockup bug by sleeping a little between unmapping pages - */ - if (hdev->pldm) - usleep_range(500, 1000); - } -} - -static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args, - u64 *paddr) -{ - struct hl_device *hdev = ctx->hdev; - struct hl_vm *vm = &hdev->vm; - struct hl_vm_phys_pg_pack *phys_pg_pack; - u32 handle; - - handle = lower_32_bits(args->map_device.handle); - spin_lock(&vm->idr_lock); - phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); - if (!phys_pg_pack) { - spin_unlock(&vm->idr_lock); - dev_err(hdev->dev, "no match for handle %u\n", handle); - return -EINVAL; - } - - *paddr = phys_pg_pack->pages[0]; - - spin_unlock(&vm->idr_lock); - - return 0; -} - -/* - * map_device_va - map the given memory - * - * @ctx : current context - * @args : host parameters with handle/host virtual address - * @device_addr : pointer to result device virtual address - * - * This function does the following: - * - If given a physical device memory handle, map to a device virtual block - * and return the start address of this block - * - If given a host virtual address and size, find the related physical pages, - * map a device virtual block to this pages and return the start address of - * this block - */ -static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, - u64 *device_addr) -{ - struct hl_device *hdev = ctx->hdev; - struct hl_vm *vm = &hdev->vm; - struct hl_vm_phys_pg_pack *phys_pg_pack; - struct hl_userptr *userptr = NULL; - struct hl_vm_hash_node *hnode; - struct hl_va_range *va_range; - enum vm_type_t *vm_type; - u64 ret_vaddr, hint_addr; - u32 handle = 0; - int rc; - bool is_userptr = args->flags & HL_MEM_USERPTR; - - /* Assume failure */ - *device_addr = 0; - - if (is_userptr) { - u64 addr = args->map_host.host_virt_addr, - size = args->map_host.mem_size; - - rc = dma_map_host_va(hdev, addr, size, &userptr); - if (rc) { - dev_err(hdev->dev, "failed to get userptr from va\n"); - return rc; - } - - rc = init_phys_pg_pack_from_userptr(ctx, userptr, - &phys_pg_pack); - if (rc) { - dev_err(hdev->dev, - "unable to init page pack for vaddr 0x%llx\n", - addr); - goto init_page_pack_err; - } - - vm_type = (enum vm_type_t *) userptr; - hint_addr = args->map_host.hint_addr; - handle = phys_pg_pack->handle; - } else { - handle = lower_32_bits(args->map_device.handle); - - spin_lock(&vm->idr_lock); - phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); - if (!phys_pg_pack) { - spin_unlock(&vm->idr_lock); - dev_err(hdev->dev, - "no match for handle %u\n", handle); - return -EINVAL; - } - - /* increment now to avoid freeing device memory while mapping */ - atomic_inc(&phys_pg_pack->mapping_cnt); - - spin_unlock(&vm->idr_lock); - - vm_type = (enum vm_type_t *) phys_pg_pack; - - hint_addr = args->map_device.hint_addr; - } - - /* - * relevant for mapping device physical memory only, as host memory is - * implicitly shared - */ - if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) && - phys_pg_pack->asid != ctx->asid) { - dev_err(hdev->dev, - "Failed to map memory, handle %u is not shared\n", - handle); - rc = -EPERM; - goto shared_err; - } - - hnode = kzalloc(sizeof(*hnode), GFP_KERNEL); - if (!hnode) { - rc = -ENOMEM; - goto hnode_err; - } - - if (is_userptr) - if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size) - va_range = ctx->host_va_range; - else - va_range = ctx->host_huge_va_range; - else - va_range = ctx->dram_va_range; - - ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size, - hint_addr, is_userptr); - if (!ret_vaddr) { - dev_err(hdev->dev, "no available va block for handle %u\n", - handle); - rc = -ENOMEM; - goto va_block_err; - } - - mutex_lock(&ctx->mmu_lock); - - rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); - if (rc) { - mutex_unlock(&ctx->mmu_lock); - dev_err(hdev->dev, "mapping page pack failed for handle %u\n", - handle); - goto map_err; - } - - rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type); - - mutex_unlock(&ctx->mmu_lock); - - if (rc) { - dev_err(hdev->dev, - "mapping handle %u failed due to MMU cache invalidation\n", - handle); - goto map_err; - } - - ret_vaddr += phys_pg_pack->offset; - - hnode->ptr = vm_type; - hnode->vaddr = ret_vaddr; - - mutex_lock(&ctx->mem_hash_lock); - hash_add(ctx->mem_hash, &hnode->node, ret_vaddr); - mutex_unlock(&ctx->mem_hash_lock); - - *device_addr = ret_vaddr; - - if (is_userptr) - free_phys_pg_pack(hdev, phys_pg_pack); - - return 0; - -map_err: - if (add_va_block(hdev, va_range, ret_vaddr, - ret_vaddr + phys_pg_pack->total_size - 1)) - dev_warn(hdev->dev, - "release va block failed for handle 0x%x, vaddr: 0x%llx\n", - handle, ret_vaddr); - -va_block_err: - kfree(hnode); -hnode_err: -shared_err: - atomic_dec(&phys_pg_pack->mapping_cnt); - if (is_userptr) - free_phys_pg_pack(hdev, phys_pg_pack); -init_page_pack_err: - if (is_userptr) - dma_unmap_host_va(hdev, userptr); - - return rc; -} - -/* - * unmap_device_va - unmap the given device virtual address - * - * @ctx : current context - * @vaddr : device virtual address to unmap - * @ctx_free : true if in context free flow, false otherwise. - * - * This function does the following: - * - Unmap the physical pages related to the given virtual address - * - return the device virtual block to the virtual block list - */ -static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) -{ - struct hl_device *hdev = ctx->hdev; - struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; - struct hl_vm_hash_node *hnode = NULL; - struct hl_userptr *userptr = NULL; - struct hl_va_range *va_range; - enum vm_type_t *vm_type; - bool is_userptr; - int rc = 0; - - /* protect from double entrance */ - mutex_lock(&ctx->mem_hash_lock); - hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr) - if (vaddr == hnode->vaddr) - break; - - if (!hnode) { - mutex_unlock(&ctx->mem_hash_lock); - dev_err(hdev->dev, - "unmap failed, no mem hnode for vaddr 0x%llx\n", - vaddr); - return -EINVAL; - } - - hash_del(&hnode->node); - mutex_unlock(&ctx->mem_hash_lock); - - vm_type = hnode->ptr; - - if (*vm_type == VM_TYPE_USERPTR) { - is_userptr = true; - userptr = hnode->ptr; - rc = init_phys_pg_pack_from_userptr(ctx, userptr, - &phys_pg_pack); - if (rc) { - dev_err(hdev->dev, - "unable to init page pack for vaddr 0x%llx\n", - vaddr); - goto vm_type_err; - } - - if (phys_pg_pack->page_size == - hdev->asic_prop.pmmu.page_size) - va_range = ctx->host_va_range; - else - va_range = ctx->host_huge_va_range; - } else if (*vm_type == VM_TYPE_PHYS_PACK) { - is_userptr = false; - va_range = ctx->dram_va_range; - phys_pg_pack = hnode->ptr; - } else { - dev_warn(hdev->dev, - "unmap failed, unknown vm desc for vaddr 0x%llx\n", - vaddr); - rc = -EFAULT; - goto vm_type_err; - } - - if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) { - dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr); - rc = -EINVAL; - goto mapping_cnt_err; - } - - vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); - - mutex_lock(&ctx->mmu_lock); - - unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); - - /* - * During context free this function is called in a loop to clean all - * the context mappings. Hence the cache invalidation can be called once - * at the loop end rather than for each iteration - */ - if (!ctx_free) - rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true, - *vm_type); - - mutex_unlock(&ctx->mmu_lock); - - /* - * If the context is closing we don't need to check for the MMU cache - * invalidation return code and update the VA free list as in this flow - * we invalidate the MMU cache outside of this unmap function and the VA - * free list will be freed anyway. - */ - if (!ctx_free) { - int tmp_rc; - - if (rc) - dev_err(hdev->dev, - "unmapping vaddr 0x%llx failed due to MMU cache invalidation\n", - vaddr); - - tmp_rc = add_va_block(hdev, va_range, vaddr, - vaddr + phys_pg_pack->total_size - 1); - if (tmp_rc) { - dev_warn(hdev->dev, - "add va block failed for vaddr: 0x%llx\n", - vaddr); - if (!rc) - rc = tmp_rc; - } - } - - atomic_dec(&phys_pg_pack->mapping_cnt); - kfree(hnode); - - if (is_userptr) { - free_phys_pg_pack(hdev, phys_pg_pack); - dma_unmap_host_va(hdev, userptr); - } - - return rc; - -mapping_cnt_err: - if (is_userptr) - free_phys_pg_pack(hdev, phys_pg_pack); -vm_type_err: - mutex_lock(&ctx->mem_hash_lock); - hash_add(ctx->mem_hash, &hnode->node, vaddr); - mutex_unlock(&ctx->mem_hash_lock); - - return rc; -} - -static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) -{ - struct hl_device *hdev = hpriv->hdev; - struct hl_ctx *ctx = hpriv->ctx; - u64 device_addr = 0; - u32 handle = 0; - int rc; - - switch (args->in.op) { - case HL_MEM_OP_ALLOC: - if (args->in.alloc.mem_size == 0) { - dev_err(hdev->dev, - "alloc size must be larger than 0\n"); - rc = -EINVAL; - goto out; - } - - /* Force contiguous as there are no real MMU - * translations to overcome physical memory gaps - */ - args->in.flags |= HL_MEM_CONTIGUOUS; - rc = alloc_device_memory(ctx, &args->in, &handle); - - memset(args, 0, sizeof(*args)); - args->out.handle = (__u64) handle; - break; - - case HL_MEM_OP_FREE: - rc = free_device_memory(ctx, args->in.free.handle); - break; - - case HL_MEM_OP_MAP: - if (args->in.flags & HL_MEM_USERPTR) { - device_addr = args->in.map_host.host_virt_addr; - rc = 0; - } else { - rc = get_paddr_from_handle(ctx, &args->in, - &device_addr); - } - - memset(args, 0, sizeof(*args)); - args->out.device_virt_addr = device_addr; - break; - - case HL_MEM_OP_UNMAP: - rc = 0; - break; - - default: - dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); - rc = -ENOTTY; - break; - } - -out: - return rc; -} - -int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) -{ - union hl_mem_args *args = data; - struct hl_device *hdev = hpriv->hdev; - struct hl_ctx *ctx = hpriv->ctx; - u64 device_addr = 0; - u32 handle = 0; - int rc; - - if (hl_device_disabled_or_in_reset(hdev)) { - dev_warn_ratelimited(hdev->dev, - "Device is %s. Can't execute MEMORY IOCTL\n", - atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); - return -EBUSY; - } - - if (!hdev->mmu_enable) - return mem_ioctl_no_mmu(hpriv, args); - - switch (args->in.op) { - case HL_MEM_OP_ALLOC: - if (!hdev->dram_supports_virtual_memory) { - dev_err(hdev->dev, "DRAM alloc is not supported\n"); - rc = -EINVAL; - goto out; - } - - if (args->in.alloc.mem_size == 0) { - dev_err(hdev->dev, - "alloc size must be larger than 0\n"); - rc = -EINVAL; - goto out; - } - rc = alloc_device_memory(ctx, &args->in, &handle); - - memset(args, 0, sizeof(*args)); - args->out.handle = (__u64) handle; - break; - - case HL_MEM_OP_FREE: - rc = free_device_memory(ctx, args->in.free.handle); - break; - - case HL_MEM_OP_MAP: - rc = map_device_va(ctx, &args->in, &device_addr); - - memset(args, 0, sizeof(*args)); - args->out.device_virt_addr = device_addr; - break; - - case HL_MEM_OP_UNMAP: - rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr, - false); - break; - - default: - dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); - rc = -ENOTTY; - break; - } - -out: - return rc; -} - -static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, - u32 npages, u64 start, u32 offset, - struct hl_userptr *userptr) -{ - int rc; - - if (!access_ok((void __user *) (uintptr_t) addr, size)) { - dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr); - return -EFAULT; - } - - userptr->vec = frame_vector_create(npages); - if (!userptr->vec) { - dev_err(hdev->dev, "Failed to create frame vector\n"); - return -ENOMEM; - } - - rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE, - userptr->vec); - - if (rc != npages) { - dev_err(hdev->dev, - "Failed to map host memory, user ptr probably wrong\n"); - if (rc < 0) - goto destroy_framevec; - rc = -EFAULT; - goto put_framevec; - } - - if (frame_vector_to_pages(userptr->vec) < 0) { - dev_err(hdev->dev, - "Failed to translate frame vector to pages\n"); - rc = -EFAULT; - goto put_framevec; - } - - rc = sg_alloc_table_from_pages(userptr->sgt, - frame_vector_pages(userptr->vec), - npages, offset, size, GFP_ATOMIC); - if (rc < 0) { - dev_err(hdev->dev, "failed to create SG table from pages\n"); - goto put_framevec; - } - - return 0; - -put_framevec: - put_vaddr_frames(userptr->vec); -destroy_framevec: - frame_vector_destroy(userptr->vec); - return rc; -} - -/* - * hl_pin_host_memory - pins a chunk of host memory. - * @hdev: pointer to the habanalabs device structure - * @addr: the host virtual address of the memory area - * @size: the size of the memory area - * @userptr: pointer to hl_userptr structure - * - * This function does the following: - * - Pins the physical pages - * - Create an SG list from those pages - */ -int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, - struct hl_userptr *userptr) -{ - u64 start, end; - u32 npages, offset; - int rc; - - if (!size) { - dev_err(hdev->dev, "size to pin is invalid - %llu\n", size); - return -EINVAL; - } - - /* - * If the combination of the address and size requested for this memory - * region causes an integer overflow, return error. - */ - if (((addr + size) < addr) || - PAGE_ALIGN(addr + size) < (addr + size)) { - dev_err(hdev->dev, - "user pointer 0x%llx + %llu causes integer overflow\n", - addr, size); - return -EINVAL; - } - - /* - * This function can be called also from data path, hence use atomic - * always as it is not a big allocation. - */ - userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC); - if (!userptr->sgt) - return -ENOMEM; - - start = addr & PAGE_MASK; - offset = addr & ~PAGE_MASK; - end = PAGE_ALIGN(addr + size); - npages = (end - start) >> PAGE_SHIFT; - - userptr->size = size; - userptr->addr = addr; - userptr->dma_mapped = false; - INIT_LIST_HEAD(&userptr->job_node); - - rc = get_user_memory(hdev, addr, size, npages, start, offset, - userptr); - if (rc) { - dev_err(hdev->dev, - "failed to get user memory for address 0x%llx\n", - addr); - goto free_sgt; - } - - hl_debugfs_add_userptr(hdev, userptr); - - return 0; - -free_sgt: - kfree(userptr->sgt); - return rc; -} - -/* - * hl_unpin_host_memory - unpins a chunk of host memory. - * @hdev: pointer to the habanalabs device structure - * @userptr: pointer to hl_userptr structure - * - * This function does the following: - * - Unpins the physical pages related to the host memory - * - Free the SG list - */ -void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) -{ - struct page **pages; - - hl_debugfs_remove_userptr(hdev, userptr); - - if (userptr->dma_mapped) - hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl, - userptr->sgt->nents, - userptr->dir); - - pages = frame_vector_pages(userptr->vec); - if (!IS_ERR(pages)) { - int i; - - for (i = 0; i < frame_vector_count(userptr->vec); i++) - set_page_dirty_lock(pages[i]); - } - put_vaddr_frames(userptr->vec); - frame_vector_destroy(userptr->vec); - - list_del(&userptr->job_node); - - sg_free_table(userptr->sgt); - kfree(userptr->sgt); -} - -/* - * hl_userptr_delete_list - clear userptr list - * - * @hdev : pointer to the habanalabs device structure - * @userptr_list : pointer to the list to clear - * - * This function does the following: - * - Iterates over the list and unpins the host memory and frees the userptr - * structure. - */ -void hl_userptr_delete_list(struct hl_device *hdev, - struct list_head *userptr_list) -{ - struct hl_userptr *userptr, *tmp; - - list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) { - hl_unpin_host_memory(hdev, userptr); - kfree(userptr); - } - - INIT_LIST_HEAD(userptr_list); -} - -/* - * hl_userptr_is_pinned - returns whether the given userptr is pinned - * - * @hdev : pointer to the habanalabs device structure - * @userptr_list : pointer to the list to clear - * @userptr : pointer to userptr to check - * - * This function does the following: - * - Iterates over the list and checks if the given userptr is in it, means is - * pinned. If so, returns true, otherwise returns false. - */ -bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, - u32 size, struct list_head *userptr_list, - struct hl_userptr **userptr) -{ - list_for_each_entry((*userptr), userptr_list, job_node) { - if ((addr == (*userptr)->addr) && (size == (*userptr)->size)) - return true; - } - - return false; -} - -/* - * va_range_init - initialize virtual addresses range - * @hdev: pointer to the habanalabs device structure - * @va_range: pointer to the range to initialize - * @start: range start address - * @end: range end address - * - * This function does the following: - * - Initializes the virtual addresses list of the given range with the given - * addresses. - */ -static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, - u64 start, u64 end) -{ - int rc; - - INIT_LIST_HEAD(&va_range->list); - - /* PAGE_SIZE alignment */ - - if (start & (PAGE_SIZE - 1)) { - start &= PAGE_MASK; - start += PAGE_SIZE; - } - - if (end & (PAGE_SIZE - 1)) - end &= PAGE_MASK; - - if (start >= end) { - dev_err(hdev->dev, "too small vm range for va list\n"); - return -EFAULT; - } - - rc = add_va_block(hdev, va_range, start, end); - - if (rc) { - dev_err(hdev->dev, "Failed to init host va list\n"); - return rc; - } - - va_range->start_addr = start; - va_range->end_addr = end; - - return 0; -} - -/* - * va_range_fini() - clear a virtual addresses range - * @hdev: pointer to the habanalabs structure - * va_range: pointer to virtual addresses range - * - * This function does the following: - * - Frees the virtual addresses block list and its lock - */ -static void va_range_fini(struct hl_device *hdev, - struct hl_va_range *va_range) -{ - mutex_lock(&va_range->lock); - clear_va_list_locked(hdev, &va_range->list); - mutex_unlock(&va_range->lock); - - mutex_destroy(&va_range->lock); - kfree(va_range); -} - -/* - * vm_ctx_init_with_ranges() - initialize virtual memory for context - * @ctx: pointer to the habanalabs context structure - * @host_range_start: host virtual addresses range start. - * @host_range_end: host virtual addresses range end. - * @host_huge_range_start: host virtual addresses range start for memory - * allocated with huge pages. - * @host_huge_range_end: host virtual addresses range end for memory allocated - * with huge pages. - * @dram_range_start: dram virtual addresses range start. - * @dram_range_end: dram virtual addresses range end. - * - * This function initializes the following: - * - MMU for context - * - Virtual address to area descriptor hashtable - * - Virtual block list of available virtual memory - */ -static int vm_ctx_init_with_ranges(struct hl_ctx *ctx, - u64 host_range_start, - u64 host_range_end, - u64 host_huge_range_start, - u64 host_huge_range_end, - u64 dram_range_start, - u64 dram_range_end) -{ - struct hl_device *hdev = ctx->hdev; - int rc; - - ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL); - if (!ctx->host_va_range) - return -ENOMEM; - - ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range), - GFP_KERNEL); - if (!ctx->host_huge_va_range) { - rc = -ENOMEM; - goto host_huge_va_range_err; - } - - ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL); - if (!ctx->dram_va_range) { - rc = -ENOMEM; - goto dram_va_range_err; - } - - rc = hl_mmu_ctx_init(ctx); - if (rc) { - dev_err(hdev->dev, "failed to init context %d\n", ctx->asid); - goto mmu_ctx_err; - } - - mutex_init(&ctx->mem_hash_lock); - hash_init(ctx->mem_hash); - - mutex_init(&ctx->host_va_range->lock); - - rc = va_range_init(hdev, ctx->host_va_range, host_range_start, - host_range_end); - if (rc) { - dev_err(hdev->dev, "failed to init host vm range\n"); - goto host_page_range_err; - } - - if (hdev->pmmu_huge_range) { - mutex_init(&ctx->host_huge_va_range->lock); - - rc = va_range_init(hdev, ctx->host_huge_va_range, - host_huge_range_start, - host_huge_range_end); - if (rc) { - dev_err(hdev->dev, - "failed to init host huge vm range\n"); - goto host_hpage_range_err; - } - } else { - ctx->host_huge_va_range = ctx->host_va_range; - } - - mutex_init(&ctx->dram_va_range->lock); - - rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start, - dram_range_end); - if (rc) { - dev_err(hdev->dev, "failed to init dram vm range\n"); - goto dram_vm_err; - } - - hl_debugfs_add_ctx_mem_hash(hdev, ctx); - - return 0; - -dram_vm_err: - mutex_destroy(&ctx->dram_va_range->lock); - - if (hdev->pmmu_huge_range) { - mutex_lock(&ctx->host_huge_va_range->lock); - clear_va_list_locked(hdev, &ctx->host_huge_va_range->list); - mutex_unlock(&ctx->host_huge_va_range->lock); - } -host_hpage_range_err: - if (hdev->pmmu_huge_range) - mutex_destroy(&ctx->host_huge_va_range->lock); - mutex_lock(&ctx->host_va_range->lock); - clear_va_list_locked(hdev, &ctx->host_va_range->list); - mutex_unlock(&ctx->host_va_range->lock); -host_page_range_err: - mutex_destroy(&ctx->host_va_range->lock); - mutex_destroy(&ctx->mem_hash_lock); - hl_mmu_ctx_fini(ctx); -mmu_ctx_err: - kfree(ctx->dram_va_range); -dram_va_range_err: - kfree(ctx->host_huge_va_range); -host_huge_va_range_err: - kfree(ctx->host_va_range); - - return rc; -} - -int hl_vm_ctx_init(struct hl_ctx *ctx) -{ - struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; - u64 host_range_start, host_range_end, host_huge_range_start, - host_huge_range_end, dram_range_start, dram_range_end; - - atomic64_set(&ctx->dram_phys_mem, 0); - - /* - * - If MMU is enabled, init the ranges as usual. - * - If MMU is disabled, in case of host mapping, the returned address - * is the given one. - * In case of DRAM mapping, the returned address is the physical - * address of the memory related to the given handle. - */ - if (ctx->hdev->mmu_enable) { - dram_range_start = prop->dmmu.start_addr; - dram_range_end = prop->dmmu.end_addr; - host_range_start = prop->pmmu.start_addr; - host_range_end = prop->pmmu.end_addr; - host_huge_range_start = prop->pmmu_huge.start_addr; - host_huge_range_end = prop->pmmu_huge.end_addr; - } else { - dram_range_start = prop->dram_user_base_address; - dram_range_end = prop->dram_end_address; - host_range_start = prop->dram_user_base_address; - host_range_end = prop->dram_end_address; - host_huge_range_start = prop->dram_user_base_address; - host_huge_range_end = prop->dram_end_address; - } - - return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end, - host_huge_range_start, - host_huge_range_end, - dram_range_start, - dram_range_end); -} - -/* - * hl_vm_ctx_fini - virtual memory teardown of context - * - * @ctx : pointer to the habanalabs context structure - * - * This function perform teardown the following: - * - Virtual block list of available virtual memory - * - Virtual address to area descriptor hashtable - * - MMU for context - * - * In addition this function does the following: - * - Unmaps the existing hashtable nodes if the hashtable is not empty. The - * hashtable should be empty as no valid mappings should exist at this - * point. - * - Frees any existing physical page list from the idr which relates to the - * current context asid. - * - This function checks the virtual block list for correctness. At this point - * the list should contain one element which describes the whole virtual - * memory range of the context. Otherwise, a warning is printed. - */ -void hl_vm_ctx_fini(struct hl_ctx *ctx) -{ - struct hl_device *hdev = ctx->hdev; - struct hl_vm *vm = &hdev->vm; - struct hl_vm_phys_pg_pack *phys_pg_list; - struct hl_vm_hash_node *hnode; - struct hlist_node *tmp_node; - int i; - - hl_debugfs_remove_ctx_mem_hash(hdev, ctx); - - /* - * Clearly something went wrong on hard reset so no point in printing - * another side effect error - */ - if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash)) - dev_notice(hdev->dev, - "user released device without removing its memory mappings\n"); - - hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) { - dev_dbg(hdev->dev, - "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n", - hnode->vaddr, ctx->asid); - unmap_device_va(ctx, hnode->vaddr, true); - } - - /* invalidate the cache once after the unmapping loop */ - hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK); - - spin_lock(&vm->idr_lock); - idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i) - if (phys_pg_list->asid == ctx->asid) { - dev_dbg(hdev->dev, - "page list 0x%px of asid %d is still alive\n", - phys_pg_list, ctx->asid); - atomic64_sub(phys_pg_list->total_size, - &hdev->dram_used_mem); - free_phys_pg_pack(hdev, phys_pg_list); - idr_remove(&vm->phys_pg_pack_handles, i); - } - spin_unlock(&vm->idr_lock); - - va_range_fini(hdev, ctx->dram_va_range); - if (hdev->pmmu_huge_range) - va_range_fini(hdev, ctx->host_huge_va_range); - va_range_fini(hdev, ctx->host_va_range); - - mutex_destroy(&ctx->mem_hash_lock); - hl_mmu_ctx_fini(ctx); -} - -/* - * hl_vm_init - initialize virtual memory module - * - * @hdev : pointer to the habanalabs device structure - * - * This function initializes the following: - * - MMU module - * - DRAM physical pages pool of 2MB - * - Idr for device memory allocation handles - */ -int hl_vm_init(struct hl_device *hdev) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct hl_vm *vm = &hdev->vm; - int rc; - - vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1); - if (!vm->dram_pg_pool) { - dev_err(hdev->dev, "Failed to create dram page pool\n"); - return -ENOMEM; - } - - kref_init(&vm->dram_pg_pool_refcount); - - rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address, - prop->dram_end_address - prop->dram_user_base_address, - -1); - - if (rc) { - dev_err(hdev->dev, - "Failed to add memory to dram page pool %d\n", rc); - goto pool_add_err; - } - - spin_lock_init(&vm->idr_lock); - idr_init(&vm->phys_pg_pack_handles); - - atomic64_set(&hdev->dram_used_mem, 0); - - vm->init_done = true; - - return 0; - -pool_add_err: - gen_pool_destroy(vm->dram_pg_pool); - - return rc; -} - -/* - * hl_vm_fini - virtual memory module teardown - * - * @hdev : pointer to the habanalabs device structure - * - * This function perform teardown to the following: - * - Idr for device memory allocation handles - * - DRAM physical pages pool of 2MB - * - MMU module - */ -void hl_vm_fini(struct hl_device *hdev) -{ - struct hl_vm *vm = &hdev->vm; - - if (!vm->init_done) - return; - - /* - * At this point all the contexts should be freed and hence no DRAM - * memory should be in use. Hence the DRAM pool should be freed here. - */ - if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1) - dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n", - __func__); - - vm->init_done = false; -} diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c deleted file mode 100644 index 04303950e630..000000000000 --- a/drivers/misc/habanalabs/mmu.c +++ /dev/null @@ -1,1037 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2016-2019 HabanaLabs, Ltd. - * All Rights Reserved. - */ - -#include "habanalabs.h" -#include "include/hw_ip/mmu/mmu_general.h" - -#include -#include - -static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); - -static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) -{ - struct pgt_info *pgt_info = NULL; - - hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node, - (unsigned long) hop_addr) - if (hop_addr == pgt_info->shadow_addr) - break; - - return pgt_info; -} - -static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info) -{ - struct hl_device *hdev = ctx->hdev; - - gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr, - hdev->asic_prop.mmu_hop_table_size); - hash_del(&pgt_info->node); - kfree((u64 *) (uintptr_t) pgt_info->shadow_addr); - kfree(pgt_info); -} - -static void free_hop(struct hl_ctx *ctx, u64 hop_addr) -{ - struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); - - _free_hop(ctx, pgt_info); -} - -static u64 alloc_hop(struct hl_ctx *ctx) -{ - struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct pgt_info *pgt_info; - u64 phys_addr, shadow_addr; - - pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL); - if (!pgt_info) - return ULLONG_MAX; - - phys_addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool, - prop->mmu_hop_table_size); - if (!phys_addr) { - dev_err(hdev->dev, "failed to allocate page\n"); - goto pool_add_err; - } - - shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size, - GFP_KERNEL); - if (!shadow_addr) - goto shadow_err; - - pgt_info->phys_addr = phys_addr; - pgt_info->shadow_addr = shadow_addr; - pgt_info->ctx = ctx; - pgt_info->num_of_ptes = 0; - hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr); - - return shadow_addr; - -shadow_err: - gen_pool_free(hdev->mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size); -pool_add_err: - kfree(pgt_info); - - return ULLONG_MAX; -} - -static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx) -{ - return ctx->hdev->asic_prop.mmu_pgt_addr + - (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); -} - -static inline u64 get_hop0_addr(struct hl_ctx *ctx) -{ - return (u64) (uintptr_t) ctx->hdev->mmu_shadow_hop0 + - (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); -} - -static inline void flush(struct hl_ctx *ctx) -{ - /* flush all writes from all cores to reach PCI */ - mb(); - ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx)); -} - -/* transform the value to physical address when writing to H/W */ -static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val) -{ - /* - * The value to write is actually the address of the next shadow hop + - * flags at the 12 LSBs. - * Hence in order to get the value to write to the physical PTE, we - * clear the 12 LSBs and translate the shadow hop to its associated - * physical hop, and add back the original 12 LSBs. - */ - u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) | - (val & FLAGS_MASK); - - ctx->hdev->asic_funcs->write_pte(ctx->hdev, - get_phys_addr(ctx, shadow_pte_addr), - phys_val); - - *(u64 *) (uintptr_t) shadow_pte_addr = val; -} - -/* do not transform the value to physical address when writing to H/W */ -static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, - u64 val) -{ - ctx->hdev->asic_funcs->write_pte(ctx->hdev, - get_phys_addr(ctx, shadow_pte_addr), - val); - *(u64 *) (uintptr_t) shadow_pte_addr = val; -} - -/* clear the last and present bits */ -static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr) -{ - /* no need to transform the value to physical address */ - write_final_pte(ctx, pte_addr, 0); -} - -static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr) -{ - get_pgt_info(ctx, hop_addr)->num_of_ptes++; -} - -/* - * put_pte - decrement the num of ptes and free the hop if possible - * - * @ctx: pointer to the context structure - * @hop_addr: addr of the hop - * - * This function returns the number of ptes left on this hop. If the number is - * 0, it means the pte was freed. - */ -static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr) -{ - struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); - int num_of_ptes_left; - - pgt_info->num_of_ptes--; - - /* - * Need to save the number of ptes left because free_hop might free - * the pgt_info - */ - num_of_ptes_left = pgt_info->num_of_ptes; - if (!num_of_ptes_left) - _free_hop(ctx, pgt_info); - - return num_of_ptes_left; -} - -static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr, u64 mask, u64 shift) -{ - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & mask) >> shift); -} - -static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask, - mmu_prop->hop0_shift); -} - -static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask, - mmu_prop->hop1_shift); -} - -static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask, - mmu_prop->hop2_shift); -} - -static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask, - mmu_prop->hop3_shift); -} - -static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask, - mmu_prop->hop4_shift); -} - -static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) -{ - if (curr_pte & PAGE_PRESENT_MASK) - return curr_pte & HOP_PHYS_ADDR_MASK; - else - return ULLONG_MAX; -} - -static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, - bool *is_new_hop) -{ - u64 hop_addr = get_next_hop_addr(ctx, curr_pte); - - if (hop_addr == ULLONG_MAX) { - hop_addr = alloc_hop(ctx); - *is_new_hop = (hop_addr != ULLONG_MAX); - } - - return hop_addr; -} - -/* translates shadow address inside hop to a physical address */ -static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr) -{ - u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1); - u64 shadow_hop_addr = shadow_addr & ~page_mask; - u64 pte_offset = shadow_addr & page_mask; - u64 phys_hop_addr; - - if (shadow_hop_addr != get_hop0_addr(ctx)) - phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr; - else - phys_hop_addr = get_phys_hop0_addr(ctx); - - return phys_hop_addr + pte_offset; -} - -static bool is_dram_va(struct hl_device *hdev, u64 virt_addr) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - - return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->dmmu.start_addr, - prop->dmmu.end_addr); -} - -static int dram_default_mapping_init(struct hl_ctx *ctx) -{ - struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr, - hop2_pte_addr, hop3_pte_addr, pte_val; - int rc, i, j, hop3_allocated = 0; - - if ((!hdev->dram_supports_virtual_memory) || - (!hdev->dram_default_page_mapping) || - (ctx->asid == HL_KERNEL_ASID_ID)) - return 0; - - num_of_hop3 = prop->dram_size_for_default_page_mapping; - do_div(num_of_hop3, prop->dram_page_size); - do_div(num_of_hop3, PTE_ENTRIES_IN_HOP); - - /* add hop1 and hop2 */ - total_hops = num_of_hop3 + 2; - - ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL); - if (!ctx->dram_default_hops) - return -ENOMEM; - - hop0_addr = get_hop0_addr(ctx); - - hop1_addr = alloc_hop(ctx); - if (hop1_addr == ULLONG_MAX) { - dev_err(hdev->dev, "failed to alloc hop 1\n"); - rc = -ENOMEM; - goto hop1_err; - } - - ctx->dram_default_hops[total_hops - 1] = hop1_addr; - - hop2_addr = alloc_hop(ctx); - if (hop2_addr == ULLONG_MAX) { - dev_err(hdev->dev, "failed to alloc hop 2\n"); - rc = -ENOMEM; - goto hop2_err; - } - - ctx->dram_default_hops[total_hops - 2] = hop2_addr; - - for (i = 0 ; i < num_of_hop3 ; i++) { - ctx->dram_default_hops[i] = alloc_hop(ctx); - if (ctx->dram_default_hops[i] == ULLONG_MAX) { - dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i); - rc = -ENOMEM; - goto hop3_err; - } - hop3_allocated++; - } - - /* need only pte 0 in hops 0 and 1 */ - pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop0_addr, pte_val); - - pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop1_addr, pte_val); - get_pte(ctx, hop1_addr); - - hop2_pte_addr = hop2_addr; - for (i = 0 ; i < num_of_hop3 ; i++) { - pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) | - PAGE_PRESENT_MASK; - write_pte(ctx, hop2_pte_addr, pte_val); - get_pte(ctx, hop2_addr); - hop2_pte_addr += HL_PTE_SIZE; - } - - pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) | - LAST_MASK | PAGE_PRESENT_MASK; - - for (i = 0 ; i < num_of_hop3 ; i++) { - hop3_pte_addr = ctx->dram_default_hops[i]; - for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) { - write_final_pte(ctx, hop3_pte_addr, pte_val); - get_pte(ctx, ctx->dram_default_hops[i]); - hop3_pte_addr += HL_PTE_SIZE; - } - } - - flush(ctx); - - return 0; - -hop3_err: - for (i = 0 ; i < hop3_allocated ; i++) - free_hop(ctx, ctx->dram_default_hops[i]); - - free_hop(ctx, hop2_addr); -hop2_err: - free_hop(ctx, hop1_addr); -hop1_err: - kfree(ctx->dram_default_hops); - - return rc; -} - -static void dram_default_mapping_fini(struct hl_ctx *ctx) -{ - struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr, - hop2_pte_addr, hop3_pte_addr; - int i, j; - - if ((!hdev->dram_supports_virtual_memory) || - (!hdev->dram_default_page_mapping) || - (ctx->asid == HL_KERNEL_ASID_ID)) - return; - - num_of_hop3 = prop->dram_size_for_default_page_mapping; - do_div(num_of_hop3, prop->dram_page_size); - do_div(num_of_hop3, PTE_ENTRIES_IN_HOP); - - hop0_addr = get_hop0_addr(ctx); - /* add hop1 and hop2 */ - total_hops = num_of_hop3 + 2; - hop1_addr = ctx->dram_default_hops[total_hops - 1]; - hop2_addr = ctx->dram_default_hops[total_hops - 2]; - - for (i = 0 ; i < num_of_hop3 ; i++) { - hop3_pte_addr = ctx->dram_default_hops[i]; - for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) { - clear_pte(ctx, hop3_pte_addr); - put_pte(ctx, ctx->dram_default_hops[i]); - hop3_pte_addr += HL_PTE_SIZE; - } - } - - hop2_pte_addr = hop2_addr; - hop2_pte_addr = hop2_addr; - for (i = 0 ; i < num_of_hop3 ; i++) { - clear_pte(ctx, hop2_pte_addr); - put_pte(ctx, hop2_addr); - hop2_pte_addr += HL_PTE_SIZE; - } - - clear_pte(ctx, hop1_addr); - put_pte(ctx, hop1_addr); - clear_pte(ctx, hop0_addr); - - kfree(ctx->dram_default_hops); - - flush(ctx); -} - -/** - * hl_mmu_init() - initialize the MMU module. - * @hdev: habanalabs device structure. - * - * This function does the following: - * - Create a pool of pages for pgt_infos. - * - Create a shadow table for pgt - * - * Return: 0 for success, non-zero for failure. - */ -int hl_mmu_init(struct hl_device *hdev) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - int rc; - - if (!hdev->mmu_enable) - return 0; - - hdev->mmu_pgt_pool = - gen_pool_create(__ffs(prop->mmu_hop_table_size), -1); - - if (!hdev->mmu_pgt_pool) { - dev_err(hdev->dev, "Failed to create page gen pool\n"); - return -ENOMEM; - } - - rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr + - prop->mmu_hop0_tables_total_size, - prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size, - -1); - if (rc) { - dev_err(hdev->dev, "Failed to add memory to page gen pool\n"); - goto err_pool_add; - } - - hdev->mmu_shadow_hop0 = kvmalloc_array(prop->max_asid, - prop->mmu_hop_table_size, - GFP_KERNEL | __GFP_ZERO); - if (!hdev->mmu_shadow_hop0) { - rc = -ENOMEM; - goto err_pool_add; - } - - /* MMU H/W init will be done in device hw_init() */ - - return 0; - -err_pool_add: - gen_pool_destroy(hdev->mmu_pgt_pool); - - return rc; -} - -/** - * hl_mmu_fini() - release the MMU module. - * @hdev: habanalabs device structure. - * - * This function does the following: - * - Disable MMU in H/W. - * - Free the pgt_infos pool. - * - * All contexts should be freed before calling this function. - */ -void hl_mmu_fini(struct hl_device *hdev) -{ - if (!hdev->mmu_enable) - return; - - /* MMU H/W fini was already done in device hw_fini() */ - - kvfree(hdev->mmu_shadow_hop0); - gen_pool_destroy(hdev->mmu_pgt_pool); -} - -/** - * hl_mmu_ctx_init() - initialize a context for using the MMU module. - * @ctx: pointer to the context structure to initialize. - * - * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all - * page tables hops related to this context. - * Return: 0 on success, non-zero otherwise. - */ -int hl_mmu_ctx_init(struct hl_ctx *ctx) -{ - struct hl_device *hdev = ctx->hdev; - - if (!hdev->mmu_enable) - return 0; - - mutex_init(&ctx->mmu_lock); - hash_init(ctx->mmu_shadow_hash); - - return dram_default_mapping_init(ctx); -} - -/* - * hl_mmu_ctx_fini - disable a ctx from using the mmu module - * - * @ctx: pointer to the context structure - * - * This function does the following: - * - Free any pgts which were not freed yet - * - Free the mutex - * - Free DRAM default page mapping hops - */ -void hl_mmu_ctx_fini(struct hl_ctx *ctx) -{ - struct hl_device *hdev = ctx->hdev; - struct pgt_info *pgt_info; - struct hlist_node *tmp; - int i; - - if (!hdev->mmu_enable) - return; - - dram_default_mapping_fini(ctx); - - if (!hash_empty(ctx->mmu_shadow_hash)) - dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n", - ctx->asid); - - hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) { - dev_err_ratelimited(hdev->dev, - "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", - pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); - _free_hop(ctx, pgt_info); - } - - mutex_destroy(&ctx->mmu_lock); -} - -static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) -{ - struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct hl_mmu_properties *mmu_prop; - u64 hop0_addr = 0, hop0_pte_addr = 0, - hop1_addr = 0, hop1_pte_addr = 0, - hop2_addr = 0, hop2_pte_addr = 0, - hop3_addr = 0, hop3_pte_addr = 0, - hop4_addr = 0, hop4_pte_addr = 0, - curr_pte; - bool is_huge, clear_hop3 = true; - - /* shifts and masks are the same in PMMU and HPMMU, use one of them */ - mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; - - hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); - - curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; - - hop1_addr = get_next_hop_addr(ctx, curr_pte); - - if (hop1_addr == ULLONG_MAX) - goto not_mapped; - - hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); - - curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; - - hop2_addr = get_next_hop_addr(ctx, curr_pte); - - if (hop2_addr == ULLONG_MAX) - goto not_mapped; - - hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); - - curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; - - hop3_addr = get_next_hop_addr(ctx, curr_pte); - - if (hop3_addr == ULLONG_MAX) - goto not_mapped; - - hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); - - curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; - - is_huge = curr_pte & LAST_MASK; - - if (is_dram_addr && !is_huge) { - dev_err(hdev->dev, - "DRAM unmapping should use huge pages only\n"); - return -EFAULT; - } - - if (!is_huge) { - hop4_addr = get_next_hop_addr(ctx, curr_pte); - - if (hop4_addr == ULLONG_MAX) - goto not_mapped; - - hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, - virt_addr); - - curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; - - clear_hop3 = false; - } - - if (hdev->dram_default_page_mapping && is_dram_addr) { - u64 default_pte = (prop->mmu_dram_default_page_addr & - HOP_PHYS_ADDR_MASK) | LAST_MASK | - PAGE_PRESENT_MASK; - if (curr_pte == default_pte) { - dev_err(hdev->dev, - "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n", - virt_addr); - goto not_mapped; - } - - if (!(curr_pte & PAGE_PRESENT_MASK)) { - dev_err(hdev->dev, - "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n", - virt_addr); - goto not_mapped; - } - - write_final_pte(ctx, hop3_pte_addr, default_pte); - put_pte(ctx, hop3_addr); - } else { - if (!(curr_pte & PAGE_PRESENT_MASK)) - goto not_mapped; - - if (hop4_addr) - clear_pte(ctx, hop4_pte_addr); - else - clear_pte(ctx, hop3_pte_addr); - - if (hop4_addr && !put_pte(ctx, hop4_addr)) - clear_hop3 = true; - - if (!clear_hop3) - goto mapped; - - clear_pte(ctx, hop3_pte_addr); - - if (put_pte(ctx, hop3_addr)) - goto mapped; - - clear_pte(ctx, hop2_pte_addr); - - if (put_pte(ctx, hop2_addr)) - goto mapped; - - clear_pte(ctx, hop1_pte_addr); - - if (put_pte(ctx, hop1_addr)) - goto mapped; - - clear_pte(ctx, hop0_pte_addr); - } - -mapped: - return 0; - -not_mapped: - dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", - virt_addr); - - return -EINVAL; -} - -/* - * hl_mmu_unmap - unmaps a virtual addr - * - * @ctx: pointer to the context structure - * @virt_addr: virt addr to map from - * @page_size: size of the page to unmap - * @flush_pte: whether to do a PCI flush - * - * This function does the following: - * - Check that the virt addr is mapped - * - Unmap the virt addr and frees pgts if possible - * - Returns 0 on success, -EINVAL if the given addr is not mapped - * - * Because this function changes the page tables in the device and because it - * changes the MMU hash, it must be protected by a lock. - * However, because it maps only a single page, the lock should be implemented - * in a higher level in order to protect the entire mapping of the memory area - * - * For optimization reasons PCI flush may be requested once after unmapping of - * large area. - */ -int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, - bool flush_pte) -{ - struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct hl_mmu_properties *mmu_prop; - u64 real_virt_addr; - u32 real_page_size, npages; - int i, rc = 0; - bool is_dram_addr; - - if (!hdev->mmu_enable) - return 0; - - is_dram_addr = is_dram_va(hdev, virt_addr); - - if (is_dram_addr) - mmu_prop = &prop->dmmu; - else if ((page_size % prop->pmmu_huge.page_size) == 0) - mmu_prop = &prop->pmmu_huge; - else - mmu_prop = &prop->pmmu; - - /* - * The H/W handles mapping of specific page sizes. Hence if the page - * size is bigger, we break it to sub-pages and unmap them separately. - */ - if ((page_size % mmu_prop->page_size) == 0) { - real_page_size = mmu_prop->page_size; - } else { - dev_err(hdev->dev, - "page size of %u is not %uKB aligned, can't unmap\n", - page_size, mmu_prop->page_size >> 10); - - return -EFAULT; - } - - npages = page_size / real_page_size; - real_virt_addr = virt_addr; - - for (i = 0 ; i < npages ; i++) { - rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr); - if (rc) - break; - - real_virt_addr += real_page_size; - } - - if (flush_pte) - flush(ctx); - - return rc; -} - -static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, - u32 page_size, bool is_dram_addr) -{ - struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct hl_mmu_properties *mmu_prop; - u64 hop0_addr = 0, hop0_pte_addr = 0, - hop1_addr = 0, hop1_pte_addr = 0, - hop2_addr = 0, hop2_pte_addr = 0, - hop3_addr = 0, hop3_pte_addr = 0, - hop4_addr = 0, hop4_pte_addr = 0, - curr_pte = 0; - bool hop1_new = false, hop2_new = false, hop3_new = false, - hop4_new = false, is_huge; - int rc = -ENOMEM; - - /* - * This mapping function can map a page or a huge page. For huge page - * there are only 3 hops rather than 4. Currently the DRAM allocation - * uses huge pages only but user memory could have been allocated with - * one of the two page sizes. Since this is a common code for all the - * three cases, we need this hugs page check. - */ - if (is_dram_addr) { - mmu_prop = &prop->dmmu; - is_huge = true; - } else if (page_size == prop->pmmu_huge.page_size) { - mmu_prop = &prop->pmmu_huge; - is_huge = true; - } else { - mmu_prop = &prop->pmmu; - is_huge = false; - } - - hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; - - hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new); - if (hop1_addr == ULLONG_MAX) - goto err; - - hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; - - hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new); - if (hop2_addr == ULLONG_MAX) - goto err; - - hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; - - hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new); - if (hop3_addr == ULLONG_MAX) - goto err; - - hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; - - if (!is_huge) { - hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new); - if (hop4_addr == ULLONG_MAX) - goto err; - - hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, - virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; - } - - if (hdev->dram_default_page_mapping && is_dram_addr) { - u64 default_pte = (prop->mmu_dram_default_page_addr & - HOP_PHYS_ADDR_MASK) | LAST_MASK | - PAGE_PRESENT_MASK; - - if (curr_pte != default_pte) { - dev_err(hdev->dev, - "DRAM: mapping already exists for virt_addr 0x%llx\n", - virt_addr); - rc = -EINVAL; - goto err; - } - - if (hop1_new || hop2_new || hop3_new || hop4_new) { - dev_err(hdev->dev, - "DRAM mapping should not allocate more hops\n"); - rc = -EFAULT; - goto err; - } - } else if (curr_pte & PAGE_PRESENT_MASK) { - dev_err(hdev->dev, - "mapping already exists for virt_addr 0x%llx\n", - virt_addr); - - dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr); - dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr); - dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr); - dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr); - - if (!is_huge) - dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop4_pte_addr, - hop4_pte_addr); - - rc = -EINVAL; - goto err; - } - - curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK - | PAGE_PRESENT_MASK; - - if (is_huge) - write_final_pte(ctx, hop3_pte_addr, curr_pte); - else - write_final_pte(ctx, hop4_pte_addr, curr_pte); - - if (hop1_new) { - curr_pte = - (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop0_pte_addr, curr_pte); - } - if (hop2_new) { - curr_pte = - (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop1_pte_addr, curr_pte); - get_pte(ctx, hop1_addr); - } - if (hop3_new) { - curr_pte = - (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop2_pte_addr, curr_pte); - get_pte(ctx, hop2_addr); - } - - if (!is_huge) { - if (hop4_new) { - curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) | - PAGE_PRESENT_MASK; - write_pte(ctx, hop3_pte_addr, curr_pte); - get_pte(ctx, hop3_addr); - } - - get_pte(ctx, hop4_addr); - } else { - get_pte(ctx, hop3_addr); - } - - return 0; - -err: - if (hop4_new) - free_hop(ctx, hop4_addr); - if (hop3_new) - free_hop(ctx, hop3_addr); - if (hop2_new) - free_hop(ctx, hop2_addr); - if (hop1_new) - free_hop(ctx, hop1_addr); - - return rc; -} - -/* - * hl_mmu_map - maps a virtual addr to physical addr - * - * @ctx: pointer to the context structure - * @virt_addr: virt addr to map from - * @phys_addr: phys addr to map to - * @page_size: physical page size - * @flush_pte: whether to do a PCI flush - * - * This function does the following: - * - Check that the virt addr is not mapped - * - Allocate pgts as necessary in order to map the virt addr to the phys - * - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM. - * - * Because this function changes the page tables in the device and because it - * changes the MMU hash, it must be protected by a lock. - * However, because it maps only a single page, the lock should be implemented - * in a higher level in order to protect the entire mapping of the memory area - * - * For optimization reasons PCI flush may be requested once after mapping of - * large area. - */ -int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, - bool flush_pte) -{ - struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct hl_mmu_properties *mmu_prop; - u64 real_virt_addr, real_phys_addr; - u32 real_page_size, npages; - int i, rc, mapped_cnt = 0; - bool is_dram_addr; - - if (!hdev->mmu_enable) - return 0; - - is_dram_addr = is_dram_va(hdev, virt_addr); - - if (is_dram_addr) - mmu_prop = &prop->dmmu; - else if ((page_size % prop->pmmu_huge.page_size) == 0) - mmu_prop = &prop->pmmu_huge; - else - mmu_prop = &prop->pmmu; - - /* - * The H/W handles mapping of specific page sizes. Hence if the page - * size is bigger, we break it to sub-pages and map them separately. - */ - if ((page_size % mmu_prop->page_size) == 0) { - real_page_size = mmu_prop->page_size; - } else { - dev_err(hdev->dev, - "page size of %u is not %uKB aligned, can't unmap\n", - page_size, mmu_prop->page_size >> 10); - - return -EFAULT; - } - - WARN_ONCE((phys_addr & (real_page_size - 1)), - "Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size", - phys_addr, real_page_size); - - npages = page_size / real_page_size; - real_virt_addr = virt_addr; - real_phys_addr = phys_addr; - - for (i = 0 ; i < npages ; i++) { - rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr, - real_page_size, is_dram_addr); - if (rc) - goto err; - - real_virt_addr += real_page_size; - real_phys_addr += real_page_size; - mapped_cnt++; - } - - if (flush_pte) - flush(ctx); - - return 0; - -err: - real_virt_addr = virt_addr; - for (i = 0 ; i < mapped_cnt ; i++) { - if (_hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr)) - dev_warn_ratelimited(hdev->dev, - "failed to unmap va: 0x%llx\n", real_virt_addr); - - real_virt_addr += real_page_size; - } - - flush(ctx); - - return rc; -} - -/* - * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out - * - * @ctx: pointer to the context structure - * - */ -void hl_mmu_swap_out(struct hl_ctx *ctx) -{ - -} - -/* - * hl_mmu_swap_in - marks all mapping of the given ctx as swapped in - * - * @ctx: pointer to the context structure - * - */ -void hl_mmu_swap_in(struct hl_ctx *ctx) -{ - -} diff --git a/drivers/misc/habanalabs/pci.c b/drivers/misc/habanalabs/pci.c deleted file mode 100644 index 1791f6623c69..000000000000 --- a/drivers/misc/habanalabs/pci.c +++ /dev/null @@ -1,400 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2016-2019 HabanaLabs, Ltd. - * All Rights Reserved. - */ - -#include "habanalabs.h" -#include "include/hw_ip/pci/pci_general.h" - -#include -#include - -#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 10) - -#define IATU_REGION_CTRL_REGION_EN_MASK BIT(31) -#define IATU_REGION_CTRL_MATCH_MODE_MASK BIT(30) -#define IATU_REGION_CTRL_NUM_MATCH_EN_MASK BIT(19) -#define IATU_REGION_CTRL_BAR_NUM_MASK GENMASK(10, 8) - -/** - * hl_pci_bars_map() - Map PCI BARs. - * @hdev: Pointer to hl_device structure. - * @name: Array of BAR names. - * @is_wc: Array with flag per BAR whether a write-combined mapping is needed. - * - * Request PCI regions and map them to kernel virtual addresses. - * - * Return: 0 on success, non-zero for failure. - */ -int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], - bool is_wc[3]) -{ - struct pci_dev *pdev = hdev->pdev; - int rc, i, bar; - - rc = pci_request_regions(pdev, HL_NAME); - if (rc) { - dev_err(hdev->dev, "Cannot obtain PCI resources\n"); - return rc; - } - - for (i = 0 ; i < 3 ; i++) { - bar = i * 2; /* 64-bit BARs */ - hdev->pcie_bar[bar] = is_wc[i] ? - pci_ioremap_wc_bar(pdev, bar) : - pci_ioremap_bar(pdev, bar); - if (!hdev->pcie_bar[bar]) { - dev_err(hdev->dev, "pci_ioremap%s_bar failed for %s\n", - is_wc[i] ? "_wc" : "", name[i]); - rc = -ENODEV; - goto err; - } - } - - return 0; - -err: - for (i = 2 ; i >= 0 ; i--) { - bar = i * 2; /* 64-bit BARs */ - if (hdev->pcie_bar[bar]) - iounmap(hdev->pcie_bar[bar]); - } - - pci_release_regions(pdev); - - return rc; -} - -/** - * hl_pci_bars_unmap() - Unmap PCI BARS. - * @hdev: Pointer to hl_device structure. - * - * Release all PCI BARs and unmap their virtual addresses. - */ -static void hl_pci_bars_unmap(struct hl_device *hdev) -{ - struct pci_dev *pdev = hdev->pdev; - int i, bar; - - for (i = 2 ; i >= 0 ; i--) { - bar = i * 2; /* 64-bit BARs */ - iounmap(hdev->pcie_bar[bar]); - } - - pci_release_regions(pdev); -} - -/** - * hl_pci_elbi_write() - Write through the ELBI interface. - * @hdev: Pointer to hl_device structure. - * @addr: Address to write to - * @data: Data to write - * - * Return: 0 on success, negative value for failure. - */ -static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data) -{ - struct pci_dev *pdev = hdev->pdev; - ktime_t timeout; - u64 msec; - u32 val; - - if (hdev->pldm) - msec = HL_PLDM_PCI_ELBI_TIMEOUT_MSEC; - else - msec = HL_PCI_ELBI_TIMEOUT_MSEC; - - /* Clear previous status */ - pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0); - - pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr); - pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data); - pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL, - PCI_CONFIG_ELBI_CTRL_WRITE); - - timeout = ktime_add_ms(ktime_get(), msec); - for (;;) { - pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val); - if (val & PCI_CONFIG_ELBI_STS_MASK) - break; - if (ktime_compare(ktime_get(), timeout) > 0) { - pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, - &val); - break; - } - - usleep_range(300, 500); - } - - if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) - return 0; - - if (val & PCI_CONFIG_ELBI_STS_ERR) { - dev_err(hdev->dev, "Error writing to ELBI\n"); - return -EIO; - } - - if (!(val & PCI_CONFIG_ELBI_STS_MASK)) { - dev_err(hdev->dev, "ELBI write didn't finish in time\n"); - return -EIO; - } - - dev_err(hdev->dev, "ELBI write has undefined bits in status\n"); - return -EIO; -} - -/** - * hl_pci_iatu_write() - iatu write routine. - * @hdev: Pointer to hl_device structure. - * @addr: Address to write to - * @data: Data to write - * - * Return: 0 on success, negative value for failure. - */ -int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - u32 dbi_offset; - int rc; - - dbi_offset = addr & 0xFFF; - - rc = hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000); - rc |= hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset, - data); - - if (rc) - return -EIO; - - return 0; -} - -/** - * hl_pci_reset_link_through_bridge() - Reset PCI link. - * @hdev: Pointer to hl_device structure. - */ -static void hl_pci_reset_link_through_bridge(struct hl_device *hdev) -{ - struct pci_dev *pdev = hdev->pdev; - struct pci_dev *parent_port; - u16 val; - - parent_port = pdev->bus->self; - pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val); - val |= PCI_BRIDGE_CTL_BUS_RESET; - pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val); - ssleep(1); - - val &= ~(PCI_BRIDGE_CTL_BUS_RESET); - pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val); - ssleep(3); -} - -/** - * hl_pci_set_inbound_region() - Configure inbound region - * @hdev: Pointer to hl_device structure. - * @region: Inbound region number. - * @pci_region: Inbound region parameters. - * - * Configure the iATU inbound region. - * - * Return: 0 on success, negative value for failure. - */ -int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region, - struct hl_inbound_pci_region *pci_region) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 bar_phys_base, region_base, region_end_address; - u32 offset, ctrl_reg_val; - int rc = 0; - - /* region offset */ - offset = (0x200 * region) + 0x100; - - if (pci_region->mode == PCI_ADDRESS_MATCH_MODE) { - bar_phys_base = hdev->pcie_bar_phys[pci_region->bar]; - region_base = bar_phys_base + pci_region->offset_in_bar; - region_end_address = region_base + pci_region->size - 1; - - rc |= hl_pci_iatu_write(hdev, offset + 0x8, - lower_32_bits(region_base)); - rc |= hl_pci_iatu_write(hdev, offset + 0xC, - upper_32_bits(region_base)); - rc |= hl_pci_iatu_write(hdev, offset + 0x10, - lower_32_bits(region_end_address)); - } - - /* Point to the specified address */ - rc = hl_pci_iatu_write(hdev, offset + 0x14, - lower_32_bits(pci_region->addr)); - rc |= hl_pci_iatu_write(hdev, offset + 0x18, - upper_32_bits(pci_region->addr)); - rc |= hl_pci_iatu_write(hdev, offset + 0x0, 0); - - /* Enable + bar/address match + match enable + bar number */ - ctrl_reg_val = FIELD_PREP(IATU_REGION_CTRL_REGION_EN_MASK, 1); - ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_MATCH_MODE_MASK, - pci_region->mode); - ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_NUM_MATCH_EN_MASK, 1); - - if (pci_region->mode == PCI_BAR_MATCH_MODE) - ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_BAR_NUM_MASK, - pci_region->bar); - - rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val); - - /* Return the DBI window to the default location */ - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0); - - if (rc) - dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n", - pci_region->bar, pci_region->addr); - - return rc; -} - -/** - * hl_pci_set_outbound_region() - Configure outbound region 0 - * @hdev: Pointer to hl_device structure. - * @pci_region: Outbound region parameters. - * - * Configure the iATU outbound region 0. - * - * Return: 0 on success, negative value for failure. - */ -int hl_pci_set_outbound_region(struct hl_device *hdev, - struct hl_outbound_pci_region *pci_region) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 outbound_region_end_address; - int rc = 0; - - /* Outbound Region 0 */ - outbound_region_end_address = - pci_region->addr + pci_region->size - 1; - rc |= hl_pci_iatu_write(hdev, 0x008, - lower_32_bits(pci_region->addr)); - rc |= hl_pci_iatu_write(hdev, 0x00C, - upper_32_bits(pci_region->addr)); - rc |= hl_pci_iatu_write(hdev, 0x010, - lower_32_bits(outbound_region_end_address)); - rc |= hl_pci_iatu_write(hdev, 0x014, 0); - - if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64)) - rc |= hl_pci_iatu_write(hdev, 0x018, 0x08000000); - else - rc |= hl_pci_iatu_write(hdev, 0x018, 0); - - rc |= hl_pci_iatu_write(hdev, 0x020, - upper_32_bits(outbound_region_end_address)); - /* Increase region size */ - rc |= hl_pci_iatu_write(hdev, 0x000, 0x00002000); - /* Enable */ - rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000); - - /* Return the DBI window to the default location */ - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0); - - return rc; -} - -/** - * hl_pci_set_dma_mask() - Set DMA masks for the device. - * @hdev: Pointer to hl_device structure. - * - * This function sets the DMA masks (regular and consistent) for a specified - * value. If it doesn't succeed, it tries to set it to a fall-back value - * - * Return: 0 on success, non-zero for failure. - */ -static int hl_pci_set_dma_mask(struct hl_device *hdev) -{ - struct pci_dev *pdev = hdev->pdev; - int rc; - - /* set DMA mask */ - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask)); - if (rc) { - dev_err(hdev->dev, - "Failed to set pci dma mask to %d bits, error %d\n", - hdev->dma_mask, rc); - return rc; - } - - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask)); - if (rc) { - dev_err(hdev->dev, - "Failed to set pci consistent dma mask to %d bits, error %d\n", - hdev->dma_mask, rc); - return rc; - } - - return 0; -} - -/** - * hl_pci_init() - PCI initialization code. - * @hdev: Pointer to hl_device structure. - * - * Set DMA masks, initialize the PCI controller and map the PCI BARs. - * - * Return: 0 on success, non-zero for failure. - */ -int hl_pci_init(struct hl_device *hdev) -{ - struct pci_dev *pdev = hdev->pdev; - int rc; - - if (hdev->reset_pcilink) - hl_pci_reset_link_through_bridge(hdev); - - rc = pci_enable_device_mem(pdev); - if (rc) { - dev_err(hdev->dev, "can't enable PCI device\n"); - return rc; - } - - pci_set_master(pdev); - - rc = hdev->asic_funcs->pci_bars_map(hdev); - if (rc) { - dev_err(hdev->dev, "Failed to initialize PCI BARs\n"); - goto disable_device; - } - - rc = hdev->asic_funcs->init_iatu(hdev); - if (rc) { - dev_err(hdev->dev, "Failed to initialize iATU\n"); - goto disable_device; - } - - rc = hl_pci_set_dma_mask(hdev); - if (rc) - goto disable_device; - - return 0; - -disable_device: - pci_clear_master(pdev); - pci_disable_device(pdev); - - return rc; -} - -/** - * hl_fw_fini() - PCI finalization code. - * @hdev: Pointer to hl_device structure - * - * Unmap PCI bars and disable PCI device. - */ -void hl_pci_fini(struct hl_device *hdev) -{ - hl_pci_bars_unmap(hdev); - - pci_clear_master(hdev->pdev); - pci_disable_device(hdev->pdev); -} diff --git a/drivers/misc/habanalabs/sysfs.c b/drivers/misc/habanalabs/sysfs.c deleted file mode 100644 index 5d78d5e1c782..000000000000 --- a/drivers/misc/habanalabs/sysfs.c +++ /dev/null @@ -1,442 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright 2016-2019 HabanaLabs, Ltd. - * All Rights Reserved. - */ - -#include "habanalabs.h" - -#include - -#define SET_CLK_PKT_TIMEOUT 1000000 /* 1s */ -#define SET_PWR_PKT_TIMEOUT 1000000 /* 1s */ - -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) -{ - struct armcp_packet pkt; - long result; - int rc; - - memset(&pkt, 0, sizeof(pkt)); - - if (curr) - pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - else - pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_index = cpu_to_le32(pll_index); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_CLK_PKT_TIMEOUT, &result); - - if (rc) { - dev_err(hdev->dev, - "Failed to get frequency of PLL %d, error %d\n", - pll_index, rc); - result = rc; - } - - return result; -} - -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) -{ - struct armcp_packet pkt; - int rc; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_index = cpu_to_le32(pll_index); - pkt.value = cpu_to_le64(freq); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_CLK_PKT_TIMEOUT, NULL); - - if (rc) - dev_err(hdev->dev, - "Failed to set frequency to PLL %d, error %d\n", - pll_index, rc); -} - -u64 hl_get_max_power(struct hl_device *hdev) -{ - struct armcp_packet pkt; - long result; - int rc; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_PWR_PKT_TIMEOUT, &result); - - if (rc) { - dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); - result = rc; - } - - return result; -} - -void hl_set_max_power(struct hl_device *hdev, u64 value) -{ - struct armcp_packet pkt; - int rc; - - memset(&pkt, 0, sizeof(pkt)); - - pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.value = cpu_to_le64(value); - - rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_PWR_PKT_TIMEOUT, NULL); - - if (rc) - dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); -} - -static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - return sprintf(buf, "%s\n", hdev->asic_prop.uboot_ver); -} - -static ssize_t armcp_kernel_ver_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - return sprintf(buf, "%s", hdev->asic_prop.armcp_info.kernel_version); -} - -static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.armcp_version); -} - -static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - return sprintf(buf, "0x%08x\n", - hdev->asic_prop.armcp_info.cpld_version); -} - -static ssize_t infineon_ver_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - return sprintf(buf, "0x%04x\n", - hdev->asic_prop.armcp_info.infineon_version); -} - -static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.fuse_version); -} - -static ssize_t thermal_ver_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - return sprintf(buf, "%s", hdev->asic_prop.armcp_info.thermal_version); -} - -static ssize_t preboot_btl_ver_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - return sprintf(buf, "%s\n", hdev->asic_prop.preboot_ver); -} - -static ssize_t soft_reset_store(struct device *dev, - struct device_attribute *attr, const char *buf, - size_t count) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - long value; - int rc; - - rc = kstrtoul(buf, 0, &value); - - if (rc) { - count = -EINVAL; - goto out; - } - - if (!hdev->supports_soft_reset) { - dev_err(hdev->dev, "Device does not support soft-reset\n"); - goto out; - } - - dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n"); - - hl_device_reset(hdev, false, false); - -out: - return count; -} - -static ssize_t hard_reset_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - long value; - int rc; - - rc = kstrtoul(buf, 0, &value); - - if (rc) { - count = -EINVAL; - goto out; - } - - dev_warn(hdev->dev, "Hard-Reset requested through sysfs\n"); - - hl_device_reset(hdev, true, false); - -out: - return count; -} - -static ssize_t device_type_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - char *str; - - switch (hdev->asic_type) { - case ASIC_GOYA: - str = "GOYA"; - break; - case ASIC_GAUDI: - str = "GAUDI"; - break; - default: - dev_err(hdev->dev, "Unrecognized ASIC type %d\n", - hdev->asic_type); - return -EINVAL; - } - - return sprintf(buf, "%s\n", str); -} - -static ssize_t pci_addr_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - return sprintf(buf, "%04x:%02x:%02x.%x\n", - pci_domain_nr(hdev->pdev->bus), - hdev->pdev->bus->number, - PCI_SLOT(hdev->pdev->devfn), - PCI_FUNC(hdev->pdev->devfn)); -} - -static ssize_t status_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - char *str; - - if (atomic_read(&hdev->in_reset)) - str = "In reset"; - else if (hdev->disabled) - str = "Malfunction"; - else - str = "Operational"; - - return sprintf(buf, "%s\n", str); -} - -static ssize_t soft_reset_cnt_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - return sprintf(buf, "%d\n", hdev->soft_reset_cnt); -} - -static ssize_t hard_reset_cnt_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - return sprintf(buf, "%d\n", hdev->hard_reset_cnt); -} - -static ssize_t max_power_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - long val; - - if (hl_device_disabled_or_in_reset(hdev)) - return -ENODEV; - - val = hl_get_max_power(hdev); - - return sprintf(buf, "%lu\n", val); -} - -static ssize_t max_power_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - unsigned long value; - int rc; - - if (hl_device_disabled_or_in_reset(hdev)) { - count = -ENODEV; - goto out; - } - - rc = kstrtoul(buf, 0, &value); - - if (rc) { - count = -EINVAL; - goto out; - } - - hdev->max_power = value; - hl_set_max_power(hdev, value); - -out: - return count; -} - -static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t offset, - size_t max_size) -{ - struct device *dev = container_of(kobj, struct device, kobj); - struct hl_device *hdev = dev_get_drvdata(dev); - char *data; - int rc; - - if (!max_size) - return -EINVAL; - - data = kzalloc(max_size, GFP_KERNEL); - if (!data) - return -ENOMEM; - - rc = hdev->asic_funcs->get_eeprom_data(hdev, data, max_size); - if (rc) - goto out; - - memcpy(buf, data, max_size); - -out: - kfree(data); - - return max_size; -} - -static DEVICE_ATTR_RO(armcp_kernel_ver); -static DEVICE_ATTR_RO(armcp_ver); -static DEVICE_ATTR_RO(cpld_ver); -static DEVICE_ATTR_RO(device_type); -static DEVICE_ATTR_RO(fuse_ver); -static DEVICE_ATTR_WO(hard_reset); -static DEVICE_ATTR_RO(hard_reset_cnt); -static DEVICE_ATTR_RO(infineon_ver); -static DEVICE_ATTR_RW(max_power); -static DEVICE_ATTR_RO(pci_addr); -static DEVICE_ATTR_RO(preboot_btl_ver); -static DEVICE_ATTR_WO(soft_reset); -static DEVICE_ATTR_RO(soft_reset_cnt); -static DEVICE_ATTR_RO(status); -static DEVICE_ATTR_RO(thermal_ver); -static DEVICE_ATTR_RO(uboot_ver); - -static struct bin_attribute bin_attr_eeprom = { - .attr = {.name = "eeprom", .mode = (0444)}, - .size = PAGE_SIZE, - .read = eeprom_read_handler -}; - -static struct attribute *hl_dev_attrs[] = { - &dev_attr_armcp_kernel_ver.attr, - &dev_attr_armcp_ver.attr, - &dev_attr_cpld_ver.attr, - &dev_attr_device_type.attr, - &dev_attr_fuse_ver.attr, - &dev_attr_hard_reset.attr, - &dev_attr_hard_reset_cnt.attr, - &dev_attr_infineon_ver.attr, - &dev_attr_max_power.attr, - &dev_attr_pci_addr.attr, - &dev_attr_preboot_btl_ver.attr, - &dev_attr_soft_reset.attr, - &dev_attr_soft_reset_cnt.attr, - &dev_attr_status.attr, - &dev_attr_thermal_ver.attr, - &dev_attr_uboot_ver.attr, - NULL, -}; - -static struct bin_attribute *hl_dev_bin_attrs[] = { - &bin_attr_eeprom, - NULL -}; - -static struct attribute_group hl_dev_attr_group = { - .attrs = hl_dev_attrs, - .bin_attrs = hl_dev_bin_attrs, -}; - -static struct attribute_group hl_dev_clks_attr_group; - -static const struct attribute_group *hl_dev_attr_groups[] = { - &hl_dev_attr_group, - &hl_dev_clks_attr_group, - NULL, -}; - -int hl_sysfs_init(struct hl_device *hdev) -{ - int rc; - - if (hdev->asic_type == ASIC_GOYA) - hdev->pm_mng_profile = PM_AUTO; - else - hdev->pm_mng_profile = PM_MANUAL; - hdev->max_power = hdev->asic_prop.max_power_default; - - hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group); - - rc = device_add_groups(hdev->dev, hl_dev_attr_groups); - if (rc) { - dev_err(hdev->dev, - "Failed to add groups to device, error %d\n", rc); - return rc; - } - - return 0; -} - -void hl_sysfs_fini(struct hl_device *hdev) -{ - device_remove_groups(hdev->dev, hl_dev_attr_groups); -} -- cgit v1.2.3-59-g8ed1b