diff options
author | 2021-12-23 13:24:34 +0200 | |
---|---|---|
committer | 2022-02-28 14:22:04 +0200 | |
commit | 9158bf69e74f98fea6847cca93bbf33a589bebcd (patch) | |
tree | 5905089780ae092461eb5c3a440280317037096c /drivers/misc/habanalabs/common/command_submission.c | |
parent | habanalabs: fix race when waiting on encaps signal (diff) | |
download | wireguard-linux-9158bf69e74f98fea6847cca93bbf33a589bebcd.tar.xz wireguard-linux-9158bf69e74f98fea6847cca93bbf33a589bebcd.zip |
habanalabs: Timestamps buffers registration
Timestamp registration API allows the user to register
a timestamp record event which will make the driver set
timestamp when CQ counter reaches the target value
and write it to a specific location specified
by the user.
This is a non blocking API, unlike the wait_for_interrupt
which is a blocking one.
Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers/misc/habanalabs/common/command_submission.c')
-rw-r--r-- | drivers/misc/habanalabs/common/command_submission.c | 197 |
1 files changed, 163 insertions, 34 deletions
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index ba5215b77852..c7757c78d0b1 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -14,6 +14,8 @@ #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ HL_CS_FLAGS_COLLECTIVE_WAIT) +#define MAX_TS_ITER_NUM 10 + /** * enum hl_cs_wait_status - cs wait status * @CS_WAIT_STATUS_BUSY: cs was not completed yet @@ -924,7 +926,7 @@ void hl_cs_rollback_all(struct hl_device *hdev) int i; struct hl_cs *cs, *tmp; - flush_workqueue(hdev->sob_reset_wq); + flush_workqueue(hdev->ts_free_obj_wq); /* flush all completions before iterating over the CS mirror list in * order to avoid a race with the release functions @@ -948,13 +950,19 @@ void hl_cs_rollback_all(struct hl_device *hdev) static void wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) { - struct hl_user_pending_interrupt *pend; + struct hl_user_pending_interrupt *pend, *temp; unsigned long flags; spin_lock_irqsave(&interrupt->wait_list_lock, flags); - list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) { - pend->fence.error = -EIO; - complete_all(&pend->fence.completion); + list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) { + if (pend->ts_reg_info.ts_buff) { + list_del(&pend->wait_list_node); + hl_ts_put(pend->ts_reg_info.ts_buff); + hl_cb_put(pend->ts_reg_info.cq_cb); + } else { + pend->fence.error = -EIO; + complete_all(&pend->fence.completion); + } } spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); } @@ -2857,43 +2865,133 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) return 0; } +static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff, + struct hl_cb *cq_cb, + u64 ts_offset, u64 cq_offset, u64 target_value, + spinlock_t *wait_list_lock, + struct hl_user_pending_interrupt **pend) +{ + struct hl_user_pending_interrupt *requested_offset_record = + (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + + ts_offset; + struct hl_user_pending_interrupt *cb_last = + (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + + (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt)); + unsigned long flags, iter_counter = 0; + u64 current_cq_counter; + + /* Validate ts_offset not exceeding last max */ + if (requested_offset_record > cb_last) { + dev_err(ts_buff->hdev->dev, "Ts offset exceeds max CB offset(0x%llx)\n", + (u64)(uintptr_t)cb_last); + return -EINVAL; + } + +start_over: + spin_lock_irqsave(wait_list_lock, flags); + + /* Unregister only if we didn't reach the target value + * since in this case there will be no handling in irq context + * and then it's safe to delete the node out of the interrupt list + * then re-use it on other interrupt + */ + if (requested_offset_record->ts_reg_info.in_use) { + current_cq_counter = *requested_offset_record->cq_kernel_addr; + if (current_cq_counter < requested_offset_record->cq_target_value) { + list_del(&requested_offset_record->wait_list_node); + spin_unlock_irqrestore(wait_list_lock, flags); + + hl_ts_put(requested_offset_record->ts_reg_info.ts_buff); + hl_cb_put(requested_offset_record->ts_reg_info.cq_cb); + + dev_dbg(ts_buff->hdev->dev, "ts node removed from interrupt list now can re-use\n"); + } else { + dev_dbg(ts_buff->hdev->dev, "ts node in middle of irq handling\n"); + + /* irq handling in the middle give it time to finish */ + spin_unlock_irqrestore(wait_list_lock, flags); + usleep_range(1, 10); + if (++iter_counter == MAX_TS_ITER_NUM) { + dev_err(ts_buff->hdev->dev, "handling registration interrupt took too long!!\n"); + return -EINVAL; + } + + goto start_over; + } + } else { + spin_unlock_irqrestore(wait_list_lock, flags); + } + + /* Fill up the new registration node info */ + requested_offset_record->ts_reg_info.in_use = 1; + requested_offset_record->ts_reg_info.ts_buff = ts_buff; + requested_offset_record->ts_reg_info.cq_cb = cq_cb; + requested_offset_record->ts_reg_info.timestamp_kernel_addr = + (u64 *) ts_buff->user_buff_address + ts_offset; + requested_offset_record->cq_kernel_addr = + (u64 *) cq_cb->kernel_address + cq_offset; + requested_offset_record->cq_target_value = target_value; + + *pend = requested_offset_record; + + dev_dbg(ts_buff->hdev->dev, "Found available node in TS kernel CB(0x%llx)\n", + (u64)(uintptr_t)requested_offset_record); + return 0; +} + static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, - struct hl_cb_mgr *cb_mgr, u64 timeout_us, - u64 cq_counters_handle, u64 cq_counters_offset, + struct hl_cb_mgr *cb_mgr, struct hl_ts_mgr *ts_mgr, + u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset, u64 target_value, struct hl_user_interrupt *interrupt, + bool register_ts_record, u64 ts_handle, u64 ts_offset, u32 *status, u64 *timestamp) { + u32 cq_patched_handle, ts_patched_handle; struct hl_user_pending_interrupt *pend; + struct hl_ts_buff *ts_buff; + struct hl_cb *cq_cb; unsigned long timeout, flags; long completion_rc; - struct hl_cb *cb; int rc = 0; - u32 handle; timeout = hl_usecs64_to_jiffies(timeout_us); hl_ctx_get(hdev, ctx); - cq_counters_handle >>= PAGE_SHIFT; - handle = (u32) cq_counters_handle; - - cb = hl_cb_get(hdev, cb_mgr, handle); - if (!cb) { - hl_ctx_put(ctx); - return -EINVAL; + cq_patched_handle = lower_32_bits(cq_counters_handle >> PAGE_SHIFT); + cq_cb = hl_cb_get(hdev, cb_mgr, cq_patched_handle); + if (!cq_cb) { + rc = -EINVAL; + goto put_ctx; } - pend = kzalloc(sizeof(*pend), GFP_KERNEL); - if (!pend) { - hl_cb_put(cb); - hl_ctx_put(ctx); - return -ENOMEM; - } + if (register_ts_record) { + dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n", + interrupt->interrupt_id, ts_offset, cq_counters_offset); - hl_fence_init(&pend->fence, ULONG_MAX); + ts_patched_handle = lower_32_bits(ts_handle >> PAGE_SHIFT); + ts_buff = hl_ts_get(hdev, ts_mgr, ts_patched_handle); + if (!ts_buff) { + rc = -EINVAL; + goto put_cq_cb; + } - pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset; - pend->cq_target_value = target_value; + /* Find first available record */ + rc = ts_buff_get_kernel_ts_record(ts_buff, cq_cb, ts_offset, + cq_counters_offset, target_value, + &interrupt->wait_list_lock, &pend); + if (rc) + goto put_ts_buff; + } else { + pend = kzalloc(sizeof(*pend), GFP_KERNEL); + if (!pend) { + rc = -ENOMEM; + goto put_cq_cb; + } + hl_fence_init(&pend->fence, ULONG_MAX); + pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset; + pend->cq_target_value = target_value; + } spin_lock_irqsave(&interrupt->wait_list_lock, flags); @@ -2901,13 +2999,19 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, * before we added the node to the wait list */ if (*pend->cq_kernel_addr >= target_value) { + if (register_ts_record) + pend->ts_reg_info.in_use = 0; spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); *status = HL_WAIT_CS_STATUS_COMPLETED; - /* There was no interrupt, we assume the completion is now. */ - pend->fence.timestamp = ktime_get(); - goto set_timestamp; + if (register_ts_record) { + *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); + goto put_ts_buff; + } else { + pend->fence.timestamp = ktime_get(); + goto set_timestamp; + } } else if (!timeout_us) { spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); *status = HL_WAIT_CS_STATUS_BUSY; @@ -2916,11 +3020,19 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, } /* Add pending user interrupt to relevant list for the interrupt - * handler to monitor + * handler to monitor. + * Note that we cannot have sorted list by target value, + * in order to shorten the list pass loop, since + * same list could have nodes for different cq counter handle. */ list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + if (register_ts_record) { + rc = *status = HL_WAIT_CS_STATUS_COMPLETED; + goto ts_registration_exit; + } + /* Wait for interrupt handler to signal completion */ completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, timeout); @@ -2952,15 +3064,30 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, } } + /* + * We keep removing the node from list here, and not at the irq handler + * for completion timeout case. and if it's a registration + * for ts record, the node will be deleted in the irq handler after + * we reach the target value. + */ spin_lock_irqsave(&interrupt->wait_list_lock, flags); list_del(&pend->wait_list_node); spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); set_timestamp: *timestamp = ktime_to_ns(pend->fence.timestamp); - kfree(pend); - hl_cb_put(cb); + hl_cb_put(cq_cb); +ts_registration_exit: + hl_ctx_put(ctx); + + return rc; + +put_ts_buff: + hl_ts_put(ts_buff); +put_cq_cb: + hl_cb_put(cq_cb); +put_ctx: hl_ctx_put(ctx); return rc; @@ -3119,11 +3246,13 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt]; if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) - rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, + rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, &hpriv->ts_mem_mgr, args->in.interrupt_timeout_us, args->in.cq_counters_handle, args->in.cq_counters_offset, - args->in.target, interrupt, &status, - ×tamp); + args->in.target, interrupt, + !!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT), + args->in.timestamp_handle, args->in.timestamp_offset, + &status, ×tamp); else rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx, args->in.interrupt_timeout_us, args->in.addr, |